diff options
539 files changed, 28180 insertions, 16853 deletions
diff --git a/Documentation/devicetree/bindings/net/can/ifi_canfd.txt b/Documentation/devicetree/bindings/net/can/ifi_canfd.txt new file mode 100644 index 000000000000..20ea5c70ab82 --- /dev/null +++ b/Documentation/devicetree/bindings/net/can/ifi_canfd.txt @@ -0,0 +1,15 @@ +IFI CANFD controller +-------------------- + +Required properties: + - compatible: Should be "ifi,canfd-1.0" + - reg: Should contain CAN controller registers location and length + - interrupts: Should contain IRQ line for the CAN controller + +Example: + + canfd0: canfd@ff220000 { + compatible = "ifi,canfd-1.0"; + reg = <0xff220000 0x00001000>; + interrupts = <0 43 0>; + }; diff --git a/Documentation/devicetree/bindings/net/can/sja1000.txt b/Documentation/devicetree/bindings/net/can/sja1000.txt index b4a6d53fb01a..ac3160eca96a 100644 --- a/Documentation/devicetree/bindings/net/can/sja1000.txt +++ b/Documentation/devicetree/bindings/net/can/sja1000.txt @@ -2,7 +2,7 @@ Memory mapped SJA1000 CAN controller from NXP (formerly Philips) Required properties: -- compatible : should be "nxp,sja1000". +- compatible : should be one of "nxp,sja1000", "technologic,sja1000". - reg : should specify the chip select, address offset and size required to map the registers of the SJA1000. The size is usually 0x80. @@ -14,6 +14,7 @@ Optional properties: - reg-io-width : Specify the size (in bytes) of the IO accesses that should be performed on the device. Valid value is 1, 2 or 4. + This property is ignored for technologic version. Default to 1 (8 bits). - nxp,external-clock-frequency : Frequency of the external oscillator diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt b/Documentation/devicetree/bindings/net/fsl-fec.txt index a9eb611bee68..a4799fff0d1f 100644 --- a/Documentation/devicetree/bindings/net/fsl-fec.txt +++ b/Documentation/devicetree/bindings/net/fsl-fec.txt @@ -12,6 +12,9 @@ Optional properties: only if property "phy-reset-gpios" is available. Missing the property will have the duration be 1 millisecond. Numbers greater than 1000 are invalid and 1 millisecond will be used instead. +- phy-reset-active-low : If present then the reset sequence using the GPIO + specified in the "phy-reset-gpios" property is reversed (H=reset state, + L=operation state). - phy-supply : regulator that powers the Ethernet PHY. - phy-handle : phandle to the PHY device connected to this device. - fixed-link : Assume a fixed link. See fixed-link.txt in the same directory. diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt index d2e243b1ec0e..b5a42df4c928 100644 --- a/Documentation/devicetree/bindings/net/macb.txt +++ b/Documentation/devicetree/bindings/net/macb.txt @@ -25,6 +25,8 @@ Required properties: Optional properties for PHY child node: - reset-gpios : Should specify the gpio for phy reset +- magic-packet : If present, indicates that the hardware supports waking + up via magic packet. Examples: diff --git a/Documentation/devicetree/bindings/net/micrel-ks8995.txt b/Documentation/devicetree/bindings/net/micrel-ks8995.txt new file mode 100644 index 000000000000..281bc2498d12 --- /dev/null +++ b/Documentation/devicetree/bindings/net/micrel-ks8995.txt @@ -0,0 +1,20 @@ +Micrel KS8995 SPI controlled Ethernet Switch families + +Required properties (according to spi-bus.txt): +- compatible: either "micrel,ks8995", "micrel,ksz8864" or "micrel,ksz8795" + +Optional properties: +- reset-gpios : phandle of gpio that will be used to reset chip during probe + +Example: + +spi-master { + ... + switch@0 { + compatible = "micrel,ksz8795"; + + reg = <0>; + spi-max-frequency = <50000000>; + reset-gpios = <&gpio0 46 GPIO_ACTIVE_LOW>; + }; +}; diff --git a/Documentation/devicetree/bindings/net/wireless/ti,wlcore,spi.txt b/Documentation/devicetree/bindings/net/wireless/ti,wlcore,spi.txt new file mode 100644 index 000000000000..9180724e182c --- /dev/null +++ b/Documentation/devicetree/bindings/net/wireless/ti,wlcore,spi.txt @@ -0,0 +1,36 @@ +* Texas Instruments wl1271 wireless lan controller + +The wl1271 chip can be connected via SPI or via SDIO. This +document describes the binding for the SPI connected chip. + +Required properties: +- compatible : Should be "ti,wl1271" +- reg : Chip select address of device +- spi-max-frequency : Maximum SPI clocking speed of device in Hz +- ref-clock-frequency : Reference clock frequency +- interrupt-parent, interrupts : + Should contain parameters for 1 interrupt line. + Interrupt parameters: parent, line number, type. +- vwlan-supply : Point the node of the regulator that powers/enable the wl1271 chip + +Optional properties: +- clock-xtal : boolean, clock is generated from XTAL + +- Please consult Documentation/devicetree/bindings/spi/spi-bus.txt + for optional SPI connection related properties, + +Examples: + +&spi1 { + wl1271@1 { + compatible = "ti,wl1271"; + + reg = <1>; + spi-max-frequency = <48000000>; + clock-xtal; + ref-clock-frequency = <38400000>; + interrupt-parent = <&gpio3>; + interrupts = <8 IRQ_TYPE_LEVEL_HIGH>; + vwlan-supply = <&vwlan_fixed>; + }; +}; diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index 72e2c5a2b327..c6134dcd2e04 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -111,6 +111,7 @@ hp Hewlett Packard i2se I2SE GmbH ibm International Business Machines (IBM) idt Integrated Device Technologies, Inc. +ifi Ingenieurburo Fur Ic-Technologie (I/F/I) iom Iomega Corporation img Imagination Technologies Ltd. ingenic Ingenic Semiconductor diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX index df27a1a50776..415154a487d0 100644 --- a/Documentation/networking/00-INDEX +++ b/Documentation/networking/00-INDEX @@ -44,6 +44,8 @@ can.txt - documentation on CAN protocol family. cdc_mbim.txt - 3G/LTE USB modem (Mobile Broadband Interface Model) +checksum-offloads.txt + - Explanation of checksum offloads; LCO, RCO cops.txt - info on the COPS LocalTalk Linux driver cs89x0.txt diff --git a/Documentation/networking/batman-adv.txt b/Documentation/networking/batman-adv.txt index ff23b755f5e4..1b5e7a7f2185 100644 --- a/Documentation/networking/batman-adv.txt +++ b/Documentation/networking/batman-adv.txt @@ -187,7 +187,7 @@ interfaces to the kernel module settings. For more information, please see the manpage (man batctl). -batctl is available on http://www.open-mesh.org/ +batctl is available on https://www.open-mesh.org/ CONTACT diff --git a/Documentation/networking/checksum-offloads.txt b/Documentation/networking/checksum-offloads.txt new file mode 100644 index 000000000000..de2a327766a7 --- /dev/null +++ b/Documentation/networking/checksum-offloads.txt @@ -0,0 +1,119 @@ +Checksum Offloads in the Linux Networking Stack + + +Introduction +============ + +This document describes a set of techniques in the Linux networking stack + to take advantage of checksum offload capabilities of various NICs. + +The following technologies are described: + * TX Checksum Offload + * LCO: Local Checksum Offload + * RCO: Remote Checksum Offload + +Things that should be documented here but aren't yet: + * RX Checksum Offload + * CHECKSUM_UNNECESSARY conversion + + +TX Checksum Offload +=================== + +The interface for offloading a transmit checksum to a device is explained + in detail in comments near the top of include/linux/skbuff.h. +In brief, it allows to request the device fill in a single ones-complement + checksum defined by the sk_buff fields skb->csum_start and + skb->csum_offset. The device should compute the 16-bit ones-complement + checksum (i.e. the 'IP-style' checksum) from csum_start to the end of the + packet, and fill in the result at (csum_start + csum_offset). +Because csum_offset cannot be negative, this ensures that the previous + value of the checksum field is included in the checksum computation, thus + it can be used to supply any needed corrections to the checksum (such as + the sum of the pseudo-header for UDP or TCP). +This interface only allows a single checksum to be offloaded. Where + encapsulation is used, the packet may have multiple checksum fields in + different header layers, and the rest will have to be handled by another + mechanism such as LCO or RCO. +No offloading of the IP header checksum is performed; it is always done in + software. This is OK because when we build the IP header, we obviously + have it in cache, so summing it isn't expensive. It's also rather short. +The requirements for GSO are more complicated, because when segmenting an + encapsulated packet both the inner and outer checksums may need to be + edited or recomputed for each resulting segment. See the skbuff.h comment + (section 'E') for more details. + +A driver declares its offload capabilities in netdev->hw_features; see + Documentation/networking/netdev-features for more. Note that a device + which only advertises NETIF_F_IP[V6]_CSUM must still obey the csum_start + and csum_offset given in the SKB; if it tries to deduce these itself in + hardware (as some NICs do) the driver should check that the values in the + SKB match those which the hardware will deduce, and if not, fall back to + checksumming in software instead (with skb_checksum_help or one of the + skb_csum_off_chk* functions as mentioned in include/linux/skbuff.h). This + is a pain, but that's what you get when hardware tries to be clever. + +The stack should, for the most part, assume that checksum offload is + supported by the underlying device. The only place that should check is + validate_xmit_skb(), and the functions it calls directly or indirectly. + That function compares the offload features requested by the SKB (which + may include other offloads besides TX Checksum Offload) and, if they are + not supported or enabled on the device (determined by netdev->features), + performs the corresponding offload in software. In the case of TX + Checksum Offload, that means calling skb_checksum_help(skb). + + +LCO: Local Checksum Offload +=========================== + +LCO is a technique for efficiently computing the outer checksum of an + encapsulated datagram when the inner checksum is due to be offloaded. +The ones-complement sum of a correctly checksummed TCP or UDP packet is + equal to the sum of the pseudo header, because everything else gets + 'cancelled out' by the checksum field. This is because the sum was + complemented before being written to the checksum field. +More generally, this holds in any case where the 'IP-style' ones complement + checksum is used, and thus any checksum that TX Checksum Offload supports. +That is, if we have set up TX Checksum Offload with a start/offset pair, we + know that _after the device has filled in that checksum_, the ones + complement sum from csum_start to the end of the packet will be equal to + _whatever value we put in the checksum field beforehand_. This allows us + to compute the outer checksum without looking at the payload: we simply + stop summing when we get to csum_start, then add the 16-bit word at + (csum_start + csum_offset). +Then, when the true inner checksum is filled in (either by hardware or by + skb_checksum_help()), the outer checksum will become correct by virtue of + the arithmetic. + +LCO is performed by the stack when constructing an outer UDP header for an + encapsulation such as VXLAN or GENEVE, in udp_set_csum(). Similarly for + the IPv6 equivalents, in udp6_set_csum(). +It is also performed when constructing an IPv4 GRE header, in + net/ipv4/ip_gre.c:build_header(). It is *not* currently performed when + constructing an IPv6 GRE header; the GRE checksum is computed over the + whole packet in net/ipv6/ip6_gre.c:ip6gre_xmit2(), but it should be + possible to use LCO here as IPv6 GRE still uses an IP-style checksum. +All of the LCO implementations use a helper function lco_csum(), in + include/linux/skbuff.h. + +LCO can safely be used for nested encapsulations; in this case, the outer + encapsulation layer will sum over both its own header and the 'middle' + header. This does mean that the 'middle' header will get summed multiple + times, but there doesn't seem to be a way to avoid that without incurring + bigger costs (e.g. in SKB bloat). + + +RCO: Remote Checksum Offload +============================ + +RCO is a technique for eliding the inner checksum of an encapsulated + datagram, allowing the outer checksum to be offloaded. It does, however, + involve a change to the encapsulation protocols, which the receiver must + also support. For this reason, it is disabled by default. +RCO is detailed in the following Internet-Drafts: +https://tools.ietf.org/html/draft-herbert-remotecsumoffload-00 +https://tools.ietf.org/html/draft-herbert-vxlan-rco-00 +In Linux, RCO is implemented individually in each encapsulation protocol, + and most tunnel types have flags controlling its use. For instance, VXLAN + has the flag VXLAN_F_REMCSUM_TX (per struct vxlan_rdst) to indicate that + RCO should be used when transmitting to a given remote destination. diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 73b36d7c7b0d..24ce97f42d35 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1216,6 +1216,19 @@ promote_secondaries - BOOLEAN promote a corresponding secondary IP address instead of removing all the corresponding secondary IP addresses. +drop_unicast_in_l2_multicast - BOOLEAN + Drop any unicast IP packets that are received in link-layer + multicast (or broadcast) frames. + This behavior (for multicast) is actually a SHOULD in RFC + 1122, but is disabled by default for compatibility reasons. + Default: off (0) + +drop_gratuitous_arp - BOOLEAN + Drop all gratuitous ARP frames, for example if there's a known + good ARP proxy on the network and such frames need not be used + (or in the case of 802.11, must not be used to prevent attacks.) + Default: off (0) + tag - INTEGER Allows you to write a number, which can be used as required. @@ -1661,6 +1674,19 @@ stable_secret - IPv6 address By default the stable secret is unset. +drop_unicast_in_l2_multicast - BOOLEAN + Drop any unicast IPv6 packets that are received in link-layer + multicast (or broadcast) frames. + + By default this is turned off. + +drop_unsolicited_na - BOOLEAN + Drop all unsolicited neighbor advertisements, for example if there's + a known good NA proxy on the network and such frames need not be used + (or in the case of 802.11, must not be used to prevent attacks.) + + By default this is turned off. + icmp/*: ratelimit - INTEGER Limit the maximal rates for sending ICMPv6 packets. diff --git a/Documentation/networking/netlink_mmap.txt b/Documentation/networking/netlink_mmap.txt deleted file mode 100644 index 54f10478e8e3..000000000000 --- a/Documentation/networking/netlink_mmap.txt +++ /dev/null @@ -1,332 +0,0 @@ -This file documents how to use memory mapped I/O with netlink. - -Author: Patrick McHardy <kaber@trash.net> - -Overview --------- - -Memory mapped netlink I/O can be used to increase throughput and decrease -overhead of unicast receive and transmit operations. Some netlink subsystems -require high throughput, these are mainly the netfilter subsystems -nfnetlink_queue and nfnetlink_log, but it can also help speed up large -dump operations of f.i. the routing database. - -Memory mapped netlink I/O used two circular ring buffers for RX and TX which -are mapped into the processes address space. - -The RX ring is used by the kernel to directly construct netlink messages into -user-space memory without copying them as done with regular socket I/O, -additionally as long as the ring contains messages no recvmsg() or poll() -syscalls have to be issued by user-space to get more message. - -The TX ring is used to process messages directly from user-space memory, the -kernel processes all messages contained in the ring using a single sendmsg() -call. - -Usage overview --------------- - -In order to use memory mapped netlink I/O, user-space needs three main changes: - -- ring setup -- conversion of the RX path to get messages from the ring instead of recvmsg() -- conversion of the TX path to construct messages into the ring - -Ring setup is done using setsockopt() to provide the ring parameters to the -kernel, then a call to mmap() to map the ring into the processes address space: - -- setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, ¶ms, sizeof(params)); -- setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, ¶ms, sizeof(params)); -- ring = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0) - -Usage of either ring is optional, but even if only the RX ring is used the -mapping still needs to be writable in order to update the frame status after -processing. - -Conversion of the reception path involves calling poll() on the file -descriptor, once the socket is readable the frames from the ring are -processed in order until no more messages are available, as indicated by -a status word in the frame header. - -On kernel side, in order to make use of memory mapped I/O on receive, the -originating netlink subsystem needs to support memory mapped I/O, otherwise -it will use an allocated socket buffer as usual and the contents will be - copied to the ring on transmission, nullifying most of the performance gains. -Dumps of kernel databases automatically support memory mapped I/O. - -Conversion of the transmit path involves changing message construction to -use memory from the TX ring instead of (usually) a buffer declared on the -stack and setting up the frame header appropriately. Optionally poll() can -be used to wait for free frames in the TX ring. - -Structured and definitions for using memory mapped I/O are contained in -<linux/netlink.h>. - -RX and TX rings ----------------- - -Each ring contains a number of continuous memory blocks, containing frames of -fixed size dependent on the parameters used for ring setup. - -Ring: [ block 0 ] - [ frame 0 ] - [ frame 1 ] - [ block 1 ] - [ frame 2 ] - [ frame 3 ] - ... - [ block n ] - [ frame 2 * n ] - [ frame 2 * n + 1 ] - -The blocks are only visible to the kernel, from the point of view of user-space -the ring just contains the frames in a continuous memory zone. - -The ring parameters used for setting up the ring are defined as follows: - -struct nl_mmap_req { - unsigned int nm_block_size; - unsigned int nm_block_nr; - unsigned int nm_frame_size; - unsigned int nm_frame_nr; -}; - -Frames are grouped into blocks, where each block is a continuous region of memory -and holds nm_block_size / nm_frame_size frames. The total number of frames in -the ring is nm_frame_nr. The following invariants hold: - -- frames_per_block = nm_block_size / nm_frame_size - -- nm_frame_nr = frames_per_block * nm_block_nr - -Some parameters are constrained, specifically: - -- nm_block_size must be a multiple of the architectures memory page size. - The getpagesize() function can be used to get the page size. - -- nm_frame_size must be equal or larger to NL_MMAP_HDRLEN, IOW a frame must be - able to hold at least the frame header - -- nm_frame_size must be smaller or equal to nm_block_size - -- nm_frame_size must be a multiple of NL_MMAP_MSG_ALIGNMENT - -- nm_frame_nr must equal the actual number of frames as specified above. - -When the kernel can't allocate physically continuous memory for a ring block, -it will fall back to use physically discontinuous memory. This might affect -performance negatively, in order to avoid this the nm_frame_size parameter -should be chosen to be as small as possible for the required frame size and -the number of blocks should be increased instead. - -Ring frames ------------- - -Each frames contain a frame header, consisting of a synchronization word and some -meta-data, and the message itself. - -Frame: [ header message ] - -The frame header is defined as follows: - -struct nl_mmap_hdr { - unsigned int nm_status; - unsigned int nm_len; - __u32 nm_group; - /* credentials */ - __u32 nm_pid; - __u32 nm_uid; - __u32 nm_gid; -}; - -- nm_status is used for synchronizing processing between the kernel and user- - space and specifies ownership of the frame as well as the operation to perform - -- nm_len contains the length of the message contained in the data area - -- nm_group specified the destination multicast group of message - -- nm_pid, nm_uid and nm_gid contain the netlink pid, UID and GID of the sending - process. These values correspond to the data available using SOCK_PASSCRED in - the SCM_CREDENTIALS cmsg. - -The possible values in the status word are: - -- NL_MMAP_STATUS_UNUSED: - RX ring: frame belongs to the kernel and contains no message - for user-space. Approriate action is to invoke poll() - to wait for new messages. - - TX ring: frame belongs to user-space and can be used for - message construction. - -- NL_MMAP_STATUS_RESERVED: - RX ring only: frame is currently used by the kernel for message - construction and contains no valid message yet. - Appropriate action is to invoke poll() to wait for - new messages. - -- NL_MMAP_STATUS_VALID: - RX ring: frame contains a valid message. Approriate action is - to process the message and release the frame back to - the kernel by setting the status to - NL_MMAP_STATUS_UNUSED or queue the frame by setting the - status to NL_MMAP_STATUS_SKIP. - - TX ring: the frame contains a valid message from user-space to - be processed by the kernel. After completing processing - the kernel will release the frame back to user-space by - setting the status to NL_MMAP_STATUS_UNUSED. - -- NL_MMAP_STATUS_COPY: - RX ring only: a message is ready to be processed but could not be - stored in the ring, either because it exceeded the - frame size or because the originating subsystem does - not support memory mapped I/O. Appropriate action is - to invoke recvmsg() to receive the message and release - the frame back to the kernel by setting the status to - NL_MMAP_STATUS_UNUSED. - -- NL_MMAP_STATUS_SKIP: - RX ring only: user-space queued the message for later processing, but - processed some messages following it in the ring. The - kernel should skip this frame when looking for unused - frames. - -The data area of a frame begins at a offset of NL_MMAP_HDRLEN relative to the -frame header. - -TX limitations --------------- - -As of Jan 2015 the message is always copied from the ring frame to an -allocated buffer due to unresolved security concerns. -See commit 4682a0358639b29cf ("netlink: Always copy on mmap TX."). - -Example -------- - -Ring setup: - - unsigned int block_size = 16 * getpagesize(); - struct nl_mmap_req req = { - .nm_block_size = block_size, - .nm_block_nr = 64, - .nm_frame_size = 16384, - .nm_frame_nr = 64 * block_size / 16384, - }; - unsigned int ring_size; - void *rx_ring, *tx_ring; - - /* Configure ring parameters */ - if (setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &req, sizeof(req)) < 0) - exit(1); - if (setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &req, sizeof(req)) < 0) - exit(1) - - /* Calculate size of each individual ring */ - ring_size = req.nm_block_nr * req.nm_block_size; - - /* Map RX/TX rings. The TX ring is located after the RX ring */ - rx_ring = mmap(NULL, 2 * ring_size, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, 0); - if ((long)rx_ring == -1L) - exit(1); - tx_ring = rx_ring + ring_size: - -Message reception: - -This example assumes some ring parameters of the ring setup are available. - - unsigned int frame_offset = 0; - struct nl_mmap_hdr *hdr; - struct nlmsghdr *nlh; - unsigned char buf[16384]; - ssize_t len; - - while (1) { - struct pollfd pfds[1]; - - pfds[0].fd = fd; - pfds[0].events = POLLIN | POLLERR; - pfds[0].revents = 0; - - if (poll(pfds, 1, -1) < 0 && errno != -EINTR) - exit(1); - - /* Check for errors. Error handling omitted */ - if (pfds[0].revents & POLLERR) - <handle error> - - /* If no new messages, poll again */ - if (!(pfds[0].revents & POLLIN)) - continue; - - /* Process all frames */ - while (1) { - /* Get next frame header */ - hdr = rx_ring + frame_offset; - - if (hdr->nm_status == NL_MMAP_STATUS_VALID) { - /* Regular memory mapped frame */ - nlh = (void *)hdr + NL_MMAP_HDRLEN; - len = hdr->nm_len; - - /* Release empty message immediately. May happen - * on error during message construction. - */ - if (len == 0) - goto release; - } else if (hdr->nm_status == NL_MMAP_STATUS_COPY) { - /* Frame queued to socket receive queue */ - len = recv(fd, buf, sizeof(buf), MSG_DONTWAIT); - if (len <= 0) - break; - nlh = buf; - } else - /* No more messages to process, continue polling */ - break; - - process_msg(nlh); -release: - /* Release frame back to the kernel */ - hdr->nm_status = NL_MMAP_STATUS_UNUSED; - - /* Advance frame offset to next frame */ - frame_offset = (frame_offset + frame_size) % ring_size; - } - } - -Message transmission: - -This example assumes some ring parameters of the ring setup are available. -A single message is constructed and transmitted, to send multiple messages -at once they would be constructed in consecutive frames before a final call -to sendto(). - - unsigned int frame_offset = 0; - struct nl_mmap_hdr *hdr; - struct nlmsghdr *nlh; - struct sockaddr_nl addr = { - .nl_family = AF_NETLINK, - }; - - hdr = tx_ring + frame_offset; - if (hdr->nm_status != NL_MMAP_STATUS_UNUSED) - /* No frame available. Use poll() to avoid. */ - exit(1); - - nlh = (void *)hdr + NL_MMAP_HDRLEN; - - /* Build message */ - build_message(nlh); - - /* Fill frame header: length and status need to be set */ - hdr->nm_len = nlh->nlmsg_len; - hdr->nm_status = NL_MMAP_STATUS_VALID; - - if (sendto(fd, NULL, 0, 0, &addr, sizeof(addr)) < 0) - exit(1); - - /* Advance frame offset to next frame */ - frame_offset = (frame_offset + frame_size) % ring_size; diff --git a/MAINTAINERS b/MAINTAINERS index 2fa3303ea625..27393cff1707 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2151,7 +2151,7 @@ M: Marek Lindner <mareklindner@neomailbox.ch> M: Simon Wunderlich <sw@simonwunderlich.de> M: Antonio Quartulli <a@unstable.cc> L: b.a.t.m.a.n@lists.open-mesh.org -W: http://www.open-mesh.org/ +W: https://www.open-mesh.org/ S: Maintained F: net/batman-adv/ @@ -2421,6 +2421,7 @@ F: include/linux/bcm963xx_nvram.h F: include/linux/bcm963xx_tag.h BROADCOM TG3 GIGABIT ETHERNET DRIVER +M: Siva Reddy Kallam <siva.kallam@broadcom.com> M: Prashant Sreedharan <prashant@broadcom.com> M: Michael Chan <mchan@broadcom.com> L: netdev@vger.kernel.org diff --git a/arch/arm64/boot/dts/apm/apm-shadowcat.dtsi b/arch/arm64/boot/dts/apm/apm-shadowcat.dtsi index 5d87a3dc44b8..278f106a0054 100644 --- a/arch/arm64/boot/dts/apm/apm-shadowcat.dtsi +++ b/arch/arm64/boot/dts/apm/apm-shadowcat.dtsi @@ -621,7 +621,13 @@ <0x0 0x1f600000 0x0 0Xd100>, <0x0 0x20000000 0x0 0X220000>; interrupts = <0 108 4>, - <0 109 4>; + <0 109 4>, + <0 110 4>, + <0 111 4>, + <0 112 4>, + <0 113 4>, + <0 114 4>, + <0 115 4>; port-id = <1>; dma-coherent; clocks = <&xge1clk 0>; diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi index fe30f7671ea3..cafb2c2715fb 100644 --- a/arch/arm64/boot/dts/apm/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi @@ -958,7 +958,13 @@ <0x0 0x18000000 0x0 0X200>; reg-names = "enet_csr", "ring_csr", "ring_cmd"; interrupts = <0x0 0x60 0x4>, - <0x0 0x61 0x4>; + <0x0 0x61 0x4>, + <0x0 0x62 0x4>, + <0x0 0x63 0x4>, + <0x0 0x64 0x4>, + <0x0 0x65 0x4>, + <0x0 0x66 0x4>, + <0x0 0x67 0x4>; dma-coherent; clocks = <&xge0clk 0>; /* mac address will be overwritten by the bootloader */ diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 70bbe39043a9..7c247e7404be 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -37,7 +37,7 @@ print_context_stack_bp(struct thread_info *tinfo, /* Generic stack tracer with callbacks */ struct stacktrace_ops { - void (*address)(void *data, unsigned long address, int reliable); + int (*address)(void *data, unsigned long address, int reliable); /* On negative return stop dumping */ int (*stack)(void *data, char *name); walk_stack_t walk_stack; diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 1b443db2db50..d276b31ca473 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2180,11 +2180,11 @@ static int backtrace_stack(void *data, char *name) return 0; } -static void backtrace_address(void *data, unsigned long addr, int reliable) +static int backtrace_address(void *data, unsigned long addr, int reliable) { struct perf_callchain_entry *entry = data; - perf_callchain_store(entry, addr); + return perf_callchain_store(entry, addr); } static const struct stacktrace_ops backtrace_ops = { diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 9c30acfadae2..0d1ff4b407d4 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -135,7 +135,8 @@ print_context_stack_bp(struct thread_info *tinfo, if (!__kernel_text_address(addr)) break; - ops->address(data, addr, 1); + if (ops->address(data, addr, 1)) + break; frame = frame->next_frame; ret_addr = &frame->return_address; print_ftrace_graph_addr(addr, data, ops, tinfo, graph); @@ -154,10 +155,11 @@ static int print_trace_stack(void *data, char *name) /* * Print one address/symbol entries per line. */ -static void print_trace_address(void *data, unsigned long addr, int reliable) +static int print_trace_address(void *data, unsigned long addr, int reliable) { touch_nmi_watchdog(); printk_stack_address(addr, reliable, data); + return 0; } static const struct stacktrace_ops print_trace_ops = { diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index fdd0c6430e5a..9ee98eefc44d 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -14,30 +14,34 @@ static int save_stack_stack(void *data, char *name) return 0; } -static void +static int __save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched) { struct stack_trace *trace = data; #ifdef CONFIG_FRAME_POINTER if (!reliable) - return; + return 0; #endif if (nosched && in_sched_functions(addr)) - return; + return 0; if (trace->skip > 0) { trace->skip--; - return; + return 0; } - if (trace->nr_entries < trace->max_entries) + if (trace->nr_entries < trace->max_entries) { trace->entries[trace->nr_entries++] = addr; + return 0; + } else { + return -1; /* no more room, stop walking the stack */ + } } -static void save_stack_address(void *data, unsigned long addr, int reliable) +static int save_stack_address(void *data, unsigned long addr, int reliable) { return __save_stack_address(data, addr, reliable, false); } -static void +static int save_stack_address_nosched(void *data, unsigned long addr, int reliable) { return __save_stack_address(data, addr, reliable, true); diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index 4e664bdb535a..cb31a4440e58 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -23,12 +23,13 @@ static int backtrace_stack(void *data, char *name) return 0; } -static void backtrace_address(void *data, unsigned long addr, int reliable) +static int backtrace_address(void *data, unsigned long addr, int reliable) { unsigned int *depth = data; if ((*depth)--) oprofile_add_trace(addr); + return 0; } static struct stacktrace_ops backtrace_ops = { diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h index 38f156745d53..7e4ddfb076d3 100644 --- a/drivers/bcma/bcma_private.h +++ b/drivers/bcma/bcma_private.h @@ -48,7 +48,6 @@ void bcma_core_chipcommon_early_init(struct bcma_drv_cc *cc); void bcma_core_chipcommon_init(struct bcma_drv_cc *cc); void bcma_chipco_bcm4331_ext_pa_lines_ctl(struct bcma_drv_cc *cc, bool enable); #ifdef CONFIG_BCMA_DRIVER_MIPS -void bcma_chipco_serial_init(struct bcma_drv_cc *cc); extern struct platform_device bcma_pflash_dev; #endif /* CONFIG_BCMA_DRIVER_MIPS */ diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c index b7c8a8d4e6d1..b0f44a2937b9 100644 --- a/drivers/bcma/driver_chipcommon.c +++ b/drivers/bcma/driver_chipcommon.c @@ -15,6 +15,8 @@ #include <linux/platform_device.h> #include <linux/bcma/bcma.h> +static void bcma_chipco_serial_init(struct bcma_drv_cc *cc); + static inline u32 bcma_cc_write32_masked(struct bcma_drv_cc *cc, u16 offset, u32 mask, u32 value) { @@ -115,6 +117,8 @@ int bcma_chipco_watchdog_register(struct bcma_drv_cc *cc) void bcma_core_chipcommon_early_init(struct bcma_drv_cc *cc) { + struct bcma_bus *bus = cc->core->bus; + if (cc->early_setup_done) return; @@ -129,6 +133,9 @@ void bcma_core_chipcommon_early_init(struct bcma_drv_cc *cc) if (cc->capabilities & BCMA_CC_CAP_PMU) bcma_pmu_early_init(cc); + if (IS_BUILTIN(CONFIG_BCM47XX) && bus->hosttype == BCMA_HOSTTYPE_SOC) + bcma_chipco_serial_init(cc); + cc->early_setup_done = true; } @@ -185,11 +192,12 @@ u32 bcma_chipco_watchdog_timer_set(struct bcma_drv_cc *cc, u32 ticks) ticks = 2; else if (ticks > maxt) ticks = maxt; - bcma_cc_write32(cc, BCMA_CC_PMU_WATCHDOG, ticks); + bcma_pmu_write32(cc, BCMA_CC_PMU_WATCHDOG, ticks); } else { struct bcma_bus *bus = cc->core->bus; if (bus->chipinfo.id != BCMA_CHIP_ID_BCM4707 && + bus->chipinfo.id != BCMA_CHIP_ID_BCM47094 && bus->chipinfo.id != BCMA_CHIP_ID_BCM53018) bcma_core_set_clockmode(cc->core, ticks ? BCMA_CLKMODE_FAST : BCMA_CLKMODE_DYNAMIC); @@ -314,9 +322,9 @@ u32 bcma_chipco_gpio_pulldown(struct bcma_drv_cc *cc, u32 mask, u32 value) return res; } -#ifdef CONFIG_BCMA_DRIVER_MIPS -void bcma_chipco_serial_init(struct bcma_drv_cc *cc) +static void bcma_chipco_serial_init(struct bcma_drv_cc *cc) { +#if IS_BUILTIN(CONFIG_BCM47XX) unsigned int irq; u32 baud_base; u32 i; @@ -358,5 +366,5 @@ void bcma_chipco_serial_init(struct bcma_drv_cc *cc) ports[i].baud_base = baud_base; ports[i].reg_shift = 0; } +#endif /* CONFIG_BCM47XX */ } -#endif /* CONFIG_BCMA_DRIVER_MIPS */ diff --git a/drivers/bcma/driver_chipcommon_pmu.c b/drivers/bcma/driver_chipcommon_pmu.c index fe0d48cb1778..f1eb4d3e1d57 100644 --- a/drivers/bcma/driver_chipcommon_pmu.c +++ b/drivers/bcma/driver_chipcommon_pmu.c @@ -15,44 +15,44 @@ u32 bcma_chipco_pll_read(struct bcma_drv_cc *cc, u32 offset) { - bcma_cc_write32(cc, BCMA_CC_PLLCTL_ADDR, offset); - bcma_cc_read32(cc, BCMA_CC_PLLCTL_ADDR); - return bcma_cc_read32(cc, BCMA_CC_PLLCTL_DATA); + bcma_pmu_write32(cc, BCMA_CC_PMU_PLLCTL_ADDR, offset); + bcma_pmu_read32(cc, BCMA_CC_PMU_PLLCTL_ADDR); + return bcma_pmu_read32(cc, BCMA_CC_PMU_PLLCTL_DATA); } EXPORT_SYMBOL_GPL(bcma_chipco_pll_read); void bcma_chipco_pll_write(struct bcma_drv_cc *cc, u32 offset, u32 value) { - bcma_cc_write32(cc, BCMA_CC_PLLCTL_ADDR, offset); - bcma_cc_read32(cc, BCMA_CC_PLLCTL_ADDR); - bcma_cc_write32(cc, BCMA_CC_PLLCTL_DATA, value); + bcma_pmu_write32(cc, BCMA_CC_PMU_PLLCTL_ADDR, offset); + bcma_pmu_read32(cc, BCMA_CC_PMU_PLLCTL_ADDR); + bcma_pmu_write32(cc, BCMA_CC_PMU_PLLCTL_DATA, value); } EXPORT_SYMBOL_GPL(bcma_chipco_pll_write); void bcma_chipco_pll_maskset(struct bcma_drv_cc *cc, u32 offset, u32 mask, u32 set) { - bcma_cc_write32(cc, BCMA_CC_PLLCTL_ADDR, offset); - bcma_cc_read32(cc, BCMA_CC_PLLCTL_ADDR); - bcma_cc_maskset32(cc, BCMA_CC_PLLCTL_DATA, mask, set); + bcma_pmu_write32(cc, BCMA_CC_PMU_PLLCTL_ADDR, offset); + bcma_pmu_read32(cc, BCMA_CC_PMU_PLLCTL_ADDR); + bcma_pmu_maskset32(cc, BCMA_CC_PMU_PLLCTL_DATA, mask, set); } EXPORT_SYMBOL_GPL(bcma_chipco_pll_maskset); void bcma_chipco_chipctl_maskset(struct bcma_drv_cc *cc, u32 offset, u32 mask, u32 set) { - bcma_cc_write32(cc, BCMA_CC_CHIPCTL_ADDR, offset); - bcma_cc_read32(cc, BCMA_CC_CHIPCTL_ADDR); - bcma_cc_maskset32(cc, BCMA_CC_CHIPCTL_DATA, mask, set); + bcma_pmu_write32(cc, BCMA_CC_PMU_CHIPCTL_ADDR, offset); + bcma_pmu_read32(cc, BCMA_CC_PMU_CHIPCTL_ADDR); + bcma_pmu_maskset32(cc, BCMA_CC_PMU_CHIPCTL_DATA, mask, set); } EXPORT_SYMBOL_GPL(bcma_chipco_chipctl_maskset); void bcma_chipco_regctl_maskset(struct bcma_drv_cc *cc, u32 offset, u32 mask, u32 set) { - bcma_cc_write32(cc, BCMA_CC_REGCTL_ADDR, offset); - bcma_cc_read32(cc, BCMA_CC_REGCTL_ADDR); - bcma_cc_maskset32(cc, BCMA_CC_REGCTL_DATA, mask, set); + bcma_pmu_write32(cc, BCMA_CC_PMU_REGCTL_ADDR, offset); + bcma_pmu_read32(cc, BCMA_CC_PMU_REGCTL_ADDR); + bcma_pmu_maskset32(cc, BCMA_CC_PMU_REGCTL_DATA, mask, set); } EXPORT_SYMBOL_GPL(bcma_chipco_regctl_maskset); @@ -60,18 +60,18 @@ static u32 bcma_pmu_xtalfreq(struct bcma_drv_cc *cc) { u32 ilp_ctl, alp_hz; - if (!(bcma_cc_read32(cc, BCMA_CC_PMU_STAT) & + if (!(bcma_pmu_read32(cc, BCMA_CC_PMU_STAT) & BCMA_CC_PMU_STAT_EXT_LPO_AVAIL)) return 0; - bcma_cc_write32(cc, BCMA_CC_PMU_XTAL_FREQ, - BIT(BCMA_CC_PMU_XTAL_FREQ_MEASURE_SHIFT)); + bcma_pmu_write32(cc, BCMA_CC_PMU_XTAL_FREQ, + BIT(BCMA_CC_PMU_XTAL_FREQ_MEASURE_SHIFT)); usleep_range(1000, 2000); - ilp_ctl = bcma_cc_read32(cc, BCMA_CC_PMU_XTAL_FREQ); + ilp_ctl = bcma_pmu_read32(cc, BCMA_CC_PMU_XTAL_FREQ); ilp_ctl &= BCMA_CC_PMU_XTAL_FREQ_ILPCTL_MASK; - bcma_cc_write32(cc, BCMA_CC_PMU_XTAL_FREQ, 0); + bcma_pmu_write32(cc, BCMA_CC_PMU_XTAL_FREQ, 0); alp_hz = ilp_ctl * 32768 / 4; return (alp_hz + 50000) / 100000 * 100; @@ -127,8 +127,8 @@ static void bcma_pmu2_pll_init0(struct bcma_drv_cc *cc, u32 xtalfreq) mask = (u32)~(BCMA_RES_4314_HT_AVAIL | BCMA_RES_4314_MACPHY_CLK_AVAIL); - bcma_cc_mask32(cc, BCMA_CC_PMU_MINRES_MSK, mask); - bcma_cc_mask32(cc, BCMA_CC_PMU_MAXRES_MSK, mask); + bcma_pmu_mask32(cc, BCMA_CC_PMU_MINRES_MSK, mask); + bcma_pmu_mask32(cc, BCMA_CC_PMU_MAXRES_MSK, mask); bcma_wait_value(cc->core, BCMA_CLKCTLST, BCMA_CLKCTLST_HAVEHT, 0, 20000); break; @@ -140,7 +140,7 @@ static void bcma_pmu2_pll_init0(struct bcma_drv_cc *cc, u32 xtalfreq) /* Flush */ if (cc->pmu.rev >= 2) - bcma_cc_set32(cc, BCMA_CC_PMU_CTL, BCMA_CC_PMU_CTL_PLL_UPD); + bcma_pmu_set32(cc, BCMA_CC_PMU_CTL, BCMA_CC_PMU_CTL_PLL_UPD); /* TODO: Do we need to update OTP? */ } @@ -195,9 +195,9 @@ static void bcma_pmu_resources_init(struct bcma_drv_cc *cc) /* Set the resource masks. */ if (min_msk) - bcma_cc_write32(cc, BCMA_CC_PMU_MINRES_MSK, min_msk); + bcma_pmu_write32(cc, BCMA_CC_PMU_MINRES_MSK, min_msk); if (max_msk) - bcma_cc_write32(cc, BCMA_CC_PMU_MAXRES_MSK, max_msk); + bcma_pmu_write32(cc, BCMA_CC_PMU_MAXRES_MSK, max_msk); /* * Add some delay; allow resources to come up and settle. @@ -269,23 +269,33 @@ static void bcma_pmu_workarounds(struct bcma_drv_cc *cc) void bcma_pmu_early_init(struct bcma_drv_cc *cc) { + struct bcma_bus *bus = cc->core->bus; u32 pmucap; - pmucap = bcma_cc_read32(cc, BCMA_CC_PMU_CAP); + if (cc->core->id.rev >= 35 && + cc->capabilities_ext & BCMA_CC_CAP_EXT_AOB_PRESENT) { + cc->pmu.core = bcma_find_core(bus, BCMA_CORE_PMU); + if (!cc->pmu.core) + bcma_warn(bus, "Couldn't find expected PMU core"); + } + if (!cc->pmu.core) + cc->pmu.core = cc->core; + + pmucap = bcma_pmu_read32(cc, BCMA_CC_PMU_CAP); cc->pmu.rev = (pmucap & BCMA_CC_PMU_CAP_REVISION); - bcma_debug(cc->core->bus, "Found rev %u PMU (capabilities 0x%08X)\n", - cc->pmu.rev, pmucap); + bcma_debug(bus, "Found rev %u PMU (capabilities 0x%08X)\n", cc->pmu.rev, + pmucap); } void bcma_pmu_init(struct bcma_drv_cc *cc) { if (cc->pmu.rev == 1) - bcma_cc_mask32(cc, BCMA_CC_PMU_CTL, - ~BCMA_CC_PMU_CTL_NOILPONW); + bcma_pmu_mask32(cc, BCMA_CC_PMU_CTL, + ~BCMA_CC_PMU_CTL_NOILPONW); else - bcma_cc_set32(cc, BCMA_CC_PMU_CTL, - BCMA_CC_PMU_CTL_NOILPONW); + bcma_pmu_set32(cc, BCMA_CC_PMU_CTL, + BCMA_CC_PMU_CTL_NOILPONW); bcma_pmu_pll_init(cc); bcma_pmu_resources_init(cc); @@ -472,8 +482,8 @@ u32 bcma_pmu_get_cpu_clock(struct bcma_drv_cc *cc) static void bcma_pmu_spuravoid_pll_write(struct bcma_drv_cc *cc, u32 offset, u32 value) { - bcma_cc_write32(cc, BCMA_CC_PLLCTL_ADDR, offset); - bcma_cc_write32(cc, BCMA_CC_PLLCTL_DATA, value); + bcma_pmu_write32(cc, BCMA_CC_PMU_PLLCTL_ADDR, offset); + bcma_pmu_write32(cc, BCMA_CC_PMU_PLLCTL_DATA, value); } void bcma_pmu_spuravoid_pllupdate(struct bcma_drv_cc *cc, int spuravoid) @@ -497,20 +507,20 @@ void bcma_pmu_spuravoid_pllupdate(struct bcma_drv_cc *cc, int spuravoid) bus->chipinfo.id == BCMA_CHIP_ID_BCM53572) ? 6 : 0; /* RMW only the P1 divider */ - bcma_cc_write32(cc, BCMA_CC_PLLCTL_ADDR, + bcma_pmu_write32(cc, BCMA_CC_PMU_PLLCTL_ADDR, BCMA_CC_PMU_PLL_CTL0 + phypll_offset); - tmp = bcma_cc_read32(cc, BCMA_CC_PLLCTL_DATA); + tmp = bcma_pmu_read32(cc, BCMA_CC_PMU_PLLCTL_DATA); tmp &= (~(BCMA_CC_PMU1_PLL0_PC0_P1DIV_MASK)); tmp |= (bcm5357_bcm43236_p1div[spuravoid] << BCMA_CC_PMU1_PLL0_PC0_P1DIV_SHIFT); - bcma_cc_write32(cc, BCMA_CC_PLLCTL_DATA, tmp); + bcma_pmu_write32(cc, BCMA_CC_PMU_PLLCTL_DATA, tmp); /* RMW only the int feedback divider */ - bcma_cc_write32(cc, BCMA_CC_PLLCTL_ADDR, + bcma_pmu_write32(cc, BCMA_CC_PMU_PLLCTL_ADDR, BCMA_CC_PMU_PLL_CTL2 + phypll_offset); - tmp = bcma_cc_read32(cc, BCMA_CC_PLLCTL_DATA); + tmp = bcma_pmu_read32(cc, BCMA_CC_PMU_PLLCTL_DATA); tmp &= ~(BCMA_CC_PMU1_PLL0_PC2_NDIV_INT_MASK); tmp |= (bcm5357_bcm43236_ndiv[spuravoid]) << BCMA_CC_PMU1_PLL0_PC2_NDIV_INT_SHIFT; - bcma_cc_write32(cc, BCMA_CC_PLLCTL_DATA, tmp); + bcma_pmu_write32(cc, BCMA_CC_PMU_PLLCTL_DATA, tmp); tmp = BCMA_CC_PMU_CTL_PLL_UPD; break; @@ -646,7 +656,7 @@ void bcma_pmu_spuravoid_pllupdate(struct bcma_drv_cc *cc, int spuravoid) break; } - tmp |= bcma_cc_read32(cc, BCMA_CC_PMU_CTL); - bcma_cc_write32(cc, BCMA_CC_PMU_CTL, tmp); + tmp |= bcma_pmu_read32(cc, BCMA_CC_PMU_CTL); + bcma_pmu_write32(cc, BCMA_CC_PMU_CTL, tmp); } EXPORT_SYMBOL_GPL(bcma_pmu_spuravoid_pllupdate); diff --git a/drivers/bcma/driver_chipcommon_sflash.c b/drivers/bcma/driver_chipcommon_sflash.c index 7e11ef4cb7db..04d706ca5f43 100644 --- a/drivers/bcma/driver_chipcommon_sflash.c +++ b/drivers/bcma/driver_chipcommon_sflash.c @@ -38,6 +38,7 @@ static const struct bcma_sflash_tbl_e bcma_sflash_st_tbl[] = { { "M25P32", 0x15, 0x10000, 64, }, { "M25P64", 0x16, 0x10000, 128, }, { "M25FL128", 0x17, 0x10000, 256, }, + { "MX25L25635F", 0x18, 0x10000, 512, }, { NULL }, }; diff --git a/drivers/bcma/driver_gpio.c b/drivers/bcma/driver_gpio.c index 98067f757fb0..771a2a253440 100644 --- a/drivers/bcma/driver_gpio.c +++ b/drivers/bcma/driver_gpio.c @@ -192,6 +192,7 @@ int bcma_gpio_init(struct bcma_drv_cc *cc) case BCMA_CHIP_ID_BCM4707: case BCMA_CHIP_ID_BCM5357: case BCMA_CHIP_ID_BCM53572: + case BCMA_CHIP_ID_BCM47094: chip->ngpio = 32; break; default: diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c index 24424f3fef96..a40a203314db 100644 --- a/drivers/bcma/driver_mips.c +++ b/drivers/bcma/driver_mips.c @@ -337,12 +337,9 @@ static void bcma_core_mips_flash_detect(struct bcma_drv_mips *mcore) void bcma_core_mips_early_init(struct bcma_drv_mips *mcore) { - struct bcma_bus *bus = mcore->core->bus; - if (mcore->early_setup_done) return; - bcma_chipco_serial_init(&bus->drv_cc); bcma_core_mips_flash_detect(mcore); mcore->early_setup_done = true; diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c index 0856189c065f..cae5385cf499 100644 --- a/drivers/bcma/host_pci.c +++ b/drivers/bcma/host_pci.c @@ -294,7 +294,7 @@ static const struct pci_device_id bcma_pci_bridge_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4358) }, { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4359) }, { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4360) }, - { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4365) }, + { PCI_DEVICE_SUB(PCI_VENDOR_ID_BROADCOM, 0x4365, PCI_VENDOR_ID_DELL, 0x0016) }, { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x43a0) }, { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x43a9) }, { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x43aa) }, diff --git a/drivers/bcma/scan.c b/drivers/bcma/scan.c index df806b9c5490..4a2d1b235fb5 100644 --- a/drivers/bcma/scan.c +++ b/drivers/bcma/scan.c @@ -98,6 +98,9 @@ static const struct bcma_device_id_name bcma_bcm_device_names[] = { { BCMA_CORE_SHIM, "SHIM" }, { BCMA_CORE_PCIE2, "PCIe Gen2" }, { BCMA_CORE_ARM_CR4, "ARM CR4" }, + { BCMA_CORE_GCI, "GCI" }, + { BCMA_CORE_CMEM, "CNDS DDR2/3 memory controller" }, + { BCMA_CORE_ARM_CA7, "ARM CA7" }, { BCMA_CORE_DEFAULT, "Default" }, }; @@ -315,6 +318,8 @@ static int bcma_get_next_core(struct bcma_bus *bus, u32 __iomem **eromptr, switch (core->id.id) { case BCMA_CORE_4706_MAC_GBIT_COMMON: case BCMA_CORE_NS_CHIPCOMMON_B: + case BCMA_CORE_PMU: + case BCMA_CORE_GCI: /* Not used yet: case BCMA_CORE_OOB_ROUTER: */ break; default: diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index cd2ff5f9518a..8cd7b5e4b03d 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -485,12 +485,19 @@ static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb) unsigned int flowclen = 80; struct fw_flowc_wr *flowc; int i; + u16 vlan = ep->l2t->vlan; + int nparams; + + if (vlan == CPL_L2T_VLAN_NONE) + nparams = 8; + else + nparams = 9; skb = get_skb(skb, flowclen, GFP_KERNEL); flowc = (struct fw_flowc_wr *)__skb_put(skb, flowclen); flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) | - FW_FLOWC_WR_NPARAMS_V(8)); + FW_FLOWC_WR_NPARAMS_V(nparams)); flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(flowclen, 16)) | FW_WR_FLOWID_V(ep->hwtid)); @@ -511,9 +518,17 @@ static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb) flowc->mnemval[6].val = cpu_to_be32(ep->snd_win); flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; flowc->mnemval[7].val = cpu_to_be32(ep->emss); - /* Pad WR to 16 byte boundary */ - flowc->mnemval[8].mnemonic = 0; - flowc->mnemval[8].val = 0; + if (nparams == 9) { + u16 pri; + + pri = (vlan & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; + flowc->mnemval[8].val = cpu_to_be32(pri); + } else { + /* Pad WR to 16 byte boundary */ + flowc->mnemval[8].mnemonic = 0; + flowc->mnemval[8].val = 0; + } for (i = 0; i < 9; i++) { flowc->mnemval[i].r4[0] = 0; flowc->mnemval[i].r4[1] = 0; @@ -710,7 +725,7 @@ static int send_connect(struct c4iw_ep *ep) L2T_IDX_V(ep->l2t->idx) | TX_CHAN_V(ep->tx_chan) | SMAC_SEL_V(ep->smac_idx) | - DSCP_V(ep->tos) | + DSCP_V(ep->tos >> 2) | ULP_MODE_V(ULP_MODE_TCPDDP) | RCV_BUFSIZ_V(win); opt2 = RX_CHANNEL_V(0) | @@ -1864,7 +1879,7 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) L2T_IDX_V(ep->l2t->idx) | TX_CHAN_V(ep->tx_chan) | SMAC_SEL_V(ep->smac_idx) | - DSCP_V(ep->tos) | + DSCP_V(ep->tos >> 2) | ULP_MODE_V(ULP_MODE_TCPDDP) | RCV_BUFSIZ_V(win)); req->tcb.opt2 = (__force __be32) (PACE_V(1) | @@ -1928,7 +1943,7 @@ static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi) static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, struct dst_entry *dst, struct c4iw_dev *cdev, - bool clear_mpa_v1, enum chip_type adapter_type) + bool clear_mpa_v1, enum chip_type adapter_type, u8 tos) { struct neighbour *n; int err, step; @@ -1958,7 +1973,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, goto out; } ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, - n, pdev, 0); + n, pdev, rt_tos2priority(tos)); if (!ep->l2t) goto out; ep->mtu = pdev->mtu; @@ -2041,7 +2056,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep) if (ep->com.cm_id->local_addr.ss_family == AF_INET) { ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr, raddr->sin_addr.s_addr, laddr->sin_port, - raddr->sin_port, 0); + raddr->sin_port, ep->com.cm_id->tos); iptype = 4; ra = (__u8 *)&raddr->sin_addr; } else { @@ -2058,7 +2073,8 @@ static int c4iw_reconnect(struct c4iw_ep *ep) goto fail3; } err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false, - ep->com.dev->rdev.lldi.adapter_type); + ep->com.dev->rdev.lldi.adapter_type, + ep->com.cm_id->tos); if (err) { pr_err("%s - cannot alloc l2e.\n", __func__); goto fail4; @@ -2069,7 +2085,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep) ep->l2t->idx); state_set(&ep->com, CONNECTING); - ep->tos = 0; + ep->tos = ep->com.cm_id->tos; /* send connect request to rnic */ err = send_connect(ep); @@ -2391,6 +2407,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) u16 peer_mss = ntohs(req->tcpopt.mss); int iptype; unsigned short hdrs; + u8 tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid)); parent_ep = lookup_stid(t, stid); if (!parent_ep) { @@ -2399,8 +2416,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) } if (state_read(&parent_ep->com) != LISTEN) { - printk(KERN_ERR "%s - listening ep not in LISTEN\n", - __func__); + PDBG("%s - listening ep not in LISTEN\n", __func__); goto reject; } @@ -2415,7 +2431,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) ntohs(peer_port), peer_mss); dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip, local_port, peer_port, - PASS_OPEN_TOS_G(ntohl(req->tos_stid))); + tos); } else { PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n" , __func__, parent_ep, hwtid, @@ -2441,7 +2457,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) } err = import_ep(child_ep, iptype, peer_ip, dst, dev, false, - parent_ep->com.dev->rdev.lldi.adapter_type); + parent_ep->com.dev->rdev.lldi.adapter_type, tos); if (err) { printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", __func__); @@ -2509,7 +2525,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) c4iw_get_ep(&parent_ep->com); child_ep->parent_ep = parent_ep; - child_ep->tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid)); + child_ep->tos = tos; child_ep->dst = dst; child_ep->hwtid = hwtid; @@ -3203,7 +3219,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ra, ntohs(raddr->sin_port)); ep->dst = find_route(dev, laddr->sin_addr.s_addr, raddr->sin_addr.s_addr, laddr->sin_port, - raddr->sin_port, 0); + raddr->sin_port, cm_id->tos); } else { iptype = 6; ra = (__u8 *)&raddr6->sin6_addr; @@ -3234,7 +3250,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true, - ep->com.dev->rdev.lldi.adapter_type); + ep->com.dev->rdev.lldi.adapter_type, cm_id->tos); if (err) { printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); goto fail3; @@ -3245,7 +3261,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ep->l2t->idx); state_set(&ep->com, CONNECTING); - ep->tos = 0; + ep->tos = cm_id->tos; /* send connect request to rnic */ err = send_connect(ep); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index e99345eb875a..dadf5f1855b3 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1621,7 +1621,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, unsigned int sqsize, rqsize; struct c4iw_ucontext *ucontext; int ret; - struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4, *mm5 = NULL; + struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm; + struct c4iw_mm_entry *rq_db_key_mm = NULL, *ma_sync_key_mm = NULL; PDBG("%s ib_pd %p\n", __func__, pd); @@ -1706,29 +1707,30 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, goto err2; if (udata) { - mm1 = kmalloc(sizeof *mm1, GFP_KERNEL); - if (!mm1) { + sq_key_mm = kmalloc(sizeof(*sq_key_mm), GFP_KERNEL); + if (!sq_key_mm) { ret = -ENOMEM; goto err3; } - mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); - if (!mm2) { + rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL); + if (!rq_key_mm) { ret = -ENOMEM; goto err4; } - mm3 = kmalloc(sizeof *mm3, GFP_KERNEL); - if (!mm3) { + sq_db_key_mm = kmalloc(sizeof(*sq_db_key_mm), GFP_KERNEL); + if (!sq_db_key_mm) { ret = -ENOMEM; goto err5; } - mm4 = kmalloc(sizeof *mm4, GFP_KERNEL); - if (!mm4) { + rq_db_key_mm = kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL); + if (!rq_db_key_mm) { ret = -ENOMEM; goto err6; } if (t4_sq_onchip(&qhp->wq.sq)) { - mm5 = kmalloc(sizeof *mm5, GFP_KERNEL); - if (!mm5) { + ma_sync_key_mm = kmalloc(sizeof(*ma_sync_key_mm), + GFP_KERNEL); + if (!ma_sync_key_mm) { ret = -ENOMEM; goto err7; } @@ -1743,7 +1745,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, uresp.rq_size = qhp->wq.rq.size; uresp.rq_memsize = qhp->wq.rq.memsize; spin_lock(&ucontext->mmap_lock); - if (mm5) { + if (ma_sync_key_mm) { uresp.ma_sync_key = ucontext->key; ucontext->key += PAGE_SIZE; } else { @@ -1761,28 +1763,29 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, ret = ib_copy_to_udata(udata, &uresp, sizeof uresp); if (ret) goto err8; - mm1->key = uresp.sq_key; - mm1->addr = qhp->wq.sq.phys_addr; - mm1->len = PAGE_ALIGN(qhp->wq.sq.memsize); - insert_mmap(ucontext, mm1); - mm2->key = uresp.rq_key; - mm2->addr = virt_to_phys(qhp->wq.rq.queue); - mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize); - insert_mmap(ucontext, mm2); - mm3->key = uresp.sq_db_gts_key; - mm3->addr = (__force unsigned long)qhp->wq.sq.bar2_pa; - mm3->len = PAGE_SIZE; - insert_mmap(ucontext, mm3); - mm4->key = uresp.rq_db_gts_key; - mm4->addr = (__force unsigned long)qhp->wq.rq.bar2_pa; - mm4->len = PAGE_SIZE; - insert_mmap(ucontext, mm4); - if (mm5) { - mm5->key = uresp.ma_sync_key; - mm5->addr = (pci_resource_start(rhp->rdev.lldi.pdev, 0) - + PCIE_MA_SYNC_A) & PAGE_MASK; - mm5->len = PAGE_SIZE; - insert_mmap(ucontext, mm5); + sq_key_mm->key = uresp.sq_key; + sq_key_mm->addr = qhp->wq.sq.phys_addr; + sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize); + insert_mmap(ucontext, sq_key_mm); + rq_key_mm->key = uresp.rq_key; + rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue); + rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize); + insert_mmap(ucontext, rq_key_mm); + sq_db_key_mm->key = uresp.sq_db_gts_key; + sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa; + sq_db_key_mm->len = PAGE_SIZE; + insert_mmap(ucontext, sq_db_key_mm); + rq_db_key_mm->key = uresp.rq_db_gts_key; + rq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.rq.bar2_pa; + rq_db_key_mm->len = PAGE_SIZE; + insert_mmap(ucontext, rq_db_key_mm); + if (ma_sync_key_mm) { + ma_sync_key_mm->key = uresp.ma_sync_key; + ma_sync_key_mm->addr = + (pci_resource_start(rhp->rdev.lldi.pdev, 0) + + PCIE_MA_SYNC_A) & PAGE_MASK; + ma_sync_key_mm->len = PAGE_SIZE; + insert_mmap(ucontext, ma_sync_key_mm); } } qhp->ibqp.qp_num = qhp->wq.sq.qid; @@ -1795,15 +1798,15 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, qhp->wq.rq.memsize, attrs->cap.max_recv_wr); return &qhp->ibqp; err8: - kfree(mm5); + kfree(ma_sync_key_mm); err7: - kfree(mm4); + kfree(rq_db_key_mm); err6: - kfree(mm3); + kfree(sq_db_key_mm); err5: - kfree(mm2); + kfree(rq_key_mm); err4: - kfree(mm1); + kfree(sq_key_mm); err3: remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); err2: diff --git a/drivers/infiniband/hw/nes/Kconfig b/drivers/infiniband/hw/nes/Kconfig index 846dc97cf260..7964eba8e7ed 100644 --- a/drivers/infiniband/hw/nes/Kconfig +++ b/drivers/infiniband/hw/nes/Kconfig @@ -2,7 +2,6 @@ config INFINIBAND_NES tristate "NetEffect RNIC Driver" depends on PCI && INET && INFINIBAND select LIBCRC32C - select INET_LRO ---help--- This is the RDMA Network Interface Card (RNIC) driver for NetEffect Ethernet Cluster Server Adapters. diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 4713dd7ed764..a1c6481d8038 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -35,18 +35,11 @@ #include <linux/moduleparam.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> -#include <linux/ip.h> -#include <linux/tcp.h> #include <linux/if_vlan.h> -#include <linux/inet_lro.h> #include <linux/slab.h> #include "nes.h" -static unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR; -module_param(nes_lro_max_aggr, uint, 0444); -MODULE_PARM_DESC(nes_lro_max_aggr, "NIC LRO max packet aggregation"); - static int wide_ppm_offset; module_param(wide_ppm_offset, int, 0644); MODULE_PARM_DESC(wide_ppm_offset, "Increase CX4 interface clock ppm offset, 0=100ppm (default), 1=300ppm"); @@ -1642,25 +1635,6 @@ static void nes_rq_wqes_timeout(unsigned long parm) } -static int nes_lro_get_skb_hdr(struct sk_buff *skb, void **iphdr, - void **tcph, u64 *hdr_flags, void *priv) -{ - unsigned int ip_len; - struct iphdr *iph; - skb_reset_network_header(skb); - iph = ip_hdr(skb); - if (iph->protocol != IPPROTO_TCP) - return -1; - ip_len = ip_hdrlen(skb); - skb_set_transport_header(skb, ip_len); - *tcph = tcp_hdr(skb); - - *hdr_flags = LRO_IPV4 | LRO_TCP; - *iphdr = iph; - return 0; -} - - /** * nes_init_nic_qp */ @@ -1895,14 +1869,6 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) return -ENOMEM; } - nesvnic->lro_mgr.max_aggr = nes_lro_max_aggr; - nesvnic->lro_mgr.max_desc = NES_MAX_LRO_DESCRIPTORS; - nesvnic->lro_mgr.lro_arr = nesvnic->lro_desc; - nesvnic->lro_mgr.get_skb_header = nes_lro_get_skb_hdr; - nesvnic->lro_mgr.features = LRO_F_NAPI | LRO_F_EXTRACT_VLAN_ID; - nesvnic->lro_mgr.dev = netdev; - nesvnic->lro_mgr.ip_summed = CHECKSUM_UNNECESSARY; - nesvnic->lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY; return 0; } @@ -2809,13 +2775,10 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) u16 pkt_type; u16 rqes_processed = 0; u8 sq_cqes = 0; - u8 nes_use_lro = 0; head = cq->cq_head; cq_size = cq->cq_size; cq->cqes_pending = 1; - if (nesvnic->netdev->features & NETIF_F_LRO) - nes_use_lro = 1; do { if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]) & NES_NIC_CQE_VALID) { @@ -2950,10 +2913,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) __vlan_hwaccel_put_tag(rx_skb, htons(ETH_P_8021Q), vlan_tag); } - if (nes_use_lro) - lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL); - else - netif_receive_skb(rx_skb); + napi_gro_receive(&nesvnic->napi, rx_skb); skip_rx_indicate0: ; @@ -2984,8 +2944,6 @@ skip_rx_indicate0: } while (1); - if (nes_use_lro) - lro_flush_all(&nesvnic->lro_mgr); if (sq_cqes) { barrier(); /* restart the queue if it had been stopped */ diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index c9080208aad2..1b66ef1e9937 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -33,8 +33,6 @@ #ifndef __NES_HW_H #define __NES_HW_H -#include <linux/inet_lro.h> - #define NES_PHY_TYPE_CX4 1 #define NES_PHY_TYPE_1G 2 #define NES_PHY_TYPE_ARGUS 4 @@ -1049,8 +1047,6 @@ struct nes_hw_tune_timer { #define NES_TIMER_ENABLE_LIMIT 4 #define NES_MAX_LINK_INTERRUPTS 128 #define NES_MAX_LINK_CHECK 200 -#define NES_MAX_LRO_DESCRIPTORS 32 -#define NES_LRO_MAX_AGGR 64 struct nes_adapter { u64 fw_ver; @@ -1263,9 +1259,6 @@ struct nes_vnic { u8 next_qp_nic_index; u8 of_device_registered; u8 rdma_enabled; - u32 lro_max_aggr; - struct net_lro_mgr lro_mgr; - struct net_lro_desc lro_desc[NES_MAX_LRO_DESCRIPTORS]; struct timer_list event_timer; enum ib_event_type delayed_event; enum ib_event_type last_dispatched_event; diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 6a0bdfa0ce2e..3ea9e055fdd3 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -1085,9 +1085,6 @@ static const char nes_ethtool_stringset[][ETH_GSTRING_LEN] = { "Free 4Kpbls", "Free 256pbls", "Timer Inits", - "LRO aggregated", - "LRO flushed", - "LRO no_desc", "PAU CreateQPs", "PAU DestroyQPs", }; @@ -1302,9 +1299,6 @@ static void nes_netdev_get_ethtool_stats(struct net_device *netdev, target_stat_values[++index] = nesadapter->free_4kpbl; target_stat_values[++index] = nesadapter->free_256pbl; target_stat_values[++index] = int_mod_timer_init; - target_stat_values[++index] = nesvnic->lro_mgr.stats.aggregated; - target_stat_values[++index] = nesvnic->lro_mgr.stats.flushed; - target_stat_values[++index] = nesvnic->lro_mgr.stats.no_desc; target_stat_values[++index] = atomic_read(&pau_qps_created); target_stat_values[++index] = atomic_read(&pau_qps_destroyed); } @@ -1709,7 +1703,6 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, netdev->hw_features |= NETIF_F_TSO; netdev->features = netdev->hw_features | NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX; - netdev->hw_features |= NETIF_F_LRO; nes_debug(NES_DBG_INIT, "nesvnic = %p, reported features = 0x%lX, QPid = %d," " nic_index = %d, logical_port = %d, mac_index = %d.\n", diff --git a/drivers/isdn/hardware/eicon/debug.c b/drivers/isdn/hardware/eicon/debug.c index b5226af6ddec..576b7b4a3278 100644 --- a/drivers/isdn/hardware/eicon/debug.c +++ b/drivers/isdn/hardware/eicon/debug.c @@ -192,8 +192,6 @@ static diva_os_spin_lock_t dbg_q_lock; static diva_os_spin_lock_t dbg_adapter_lock; static int dbg_q_busy; static volatile dword dbg_sequence; -static dword start_sec; -static dword start_usec; /* INTERFACE: @@ -215,8 +213,6 @@ int diva_maint_init(byte *base, unsigned long length, int do_init) { dbg_base = base; - diva_os_get_time(&start_sec, &start_usec); - *(dword *)base = (dword)DBG_MAGIC; /* Store Magic */ base += sizeof(dword); length -= sizeof(dword); diff --git a/drivers/isdn/hardware/eicon/divamnt.c b/drivers/isdn/hardware/eicon/divamnt.c index 48db08d0bb3d..0de29b7b712f 100644 --- a/drivers/isdn/hardware/eicon/divamnt.c +++ b/drivers/isdn/hardware/eicon/divamnt.c @@ -45,7 +45,6 @@ char *DRIVERRELEASE_MNT = "2.0"; static wait_queue_head_t msgwaitq; static unsigned long opened; -static struct timeval start_time; extern int mntfunc_init(int *, void **, unsigned long); extern void mntfunc_finit(void); @@ -88,28 +87,12 @@ int diva_os_copy_from_user(void *os_handle, void *dst, const void __user *src, */ void diva_os_get_time(dword *sec, dword *usec) { - struct timeval tv; - - do_gettimeofday(&tv); - - if (tv.tv_sec > start_time.tv_sec) { - if (start_time.tv_usec > tv.tv_usec) { - tv.tv_sec--; - tv.tv_usec += 1000000; - } - *sec = (dword) (tv.tv_sec - start_time.tv_sec); - *usec = (dword) (tv.tv_usec - start_time.tv_usec); - } else if (tv.tv_sec == start_time.tv_sec) { - *sec = 0; - if (start_time.tv_usec < tv.tv_usec) { - *usec = (dword) (tv.tv_usec - start_time.tv_usec); - } else { - *usec = 0; - } - } else { - *sec = (dword) tv.tv_sec; - *usec = (dword) tv.tv_usec; - } + struct timespec64 time; + + ktime_get_ts64(&time); + + *sec = (dword) time.tv_sec; + *usec = (dword) (time.tv_nsec / NSEC_PER_USEC); } /* @@ -213,7 +196,6 @@ static int __init maint_init(void) int ret = 0; void *buffer = NULL; - do_gettimeofday(&start_time); init_waitqueue_head(&msgwaitq); printk(KERN_INFO "%s\n", DRIVERNAME); diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 4cbb8b27a891..b9304a295f86 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -357,6 +357,14 @@ static u8 __get_duplex(struct port *port) return retval; } +static void __ad_actor_update_port(struct port *port) +{ + const struct bonding *bond = bond_get_bond_by_slave(port->slave); + + port->actor_system = BOND_AD_INFO(bond).system.sys_mac_addr; + port->actor_system_priority = BOND_AD_INFO(bond).system.sys_priority; +} + /* Conversions */ /** @@ -1963,9 +1971,7 @@ void bond_3ad_bind_slave(struct slave *slave) port->actor_admin_port_key = bond->params.ad_user_port_key << 6; ad_update_actor_keys(port, false); /* actor system is the bond's system */ - port->actor_system = BOND_AD_INFO(bond).system.sys_mac_addr; - port->actor_system_priority = - BOND_AD_INFO(bond).system.sys_priority; + __ad_actor_update_port(port); /* tx timer(to verify that no more than MAX_TX_IN_SECOND * lacpdu's are sent in one second) */ @@ -2148,6 +2154,38 @@ out: } /** + * bond_3ad_update_ad_actor_settings - reflect change of actor settings to ports + * @bond: bonding struct to work on + * + * If an ad_actor setting gets changed we need to update the individual port + * settings so the bond device will use the new values when it gets upped. + */ +void bond_3ad_update_ad_actor_settings(struct bonding *bond) +{ + struct list_head *iter; + struct slave *slave; + + ASSERT_RTNL(); + + BOND_AD_INFO(bond).system.sys_priority = bond->params.ad_actor_sys_prio; + if (is_zero_ether_addr(bond->params.ad_actor_system)) + BOND_AD_INFO(bond).system.sys_mac_addr = + *((struct mac_addr *)bond->dev->dev_addr); + else + BOND_AD_INFO(bond).system.sys_mac_addr = + *((struct mac_addr *)bond->params.ad_actor_system); + + spin_lock_bh(&bond->mode_lock); + bond_for_each_slave(bond, slave, iter) { + struct port *port = &(SLAVE_AD_INFO(slave))->port; + + __ad_actor_update_port(port); + port->ntt = true; + } + spin_unlock_bh(&bond->mode_lock); +} + +/** * bond_3ad_state_machine_handler - handle state machines timeout * @bond: bonding struct to work on * diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index bb9e9fc45e1b..c5ac160a8ae9 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -159,7 +159,7 @@ static int tlb_initialize(struct bonding *bond) new_hashtbl = kzalloc(size, GFP_KERNEL); if (!new_hashtbl) - return -1; + return -ENOMEM; spin_lock_bh(&bond->mode_lock); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index b7f1a9919033..a6527d5b3269 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -620,8 +620,8 @@ static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active, static void bond_set_dev_addr(struct net_device *bond_dev, struct net_device *slave_dev) { - netdev_dbg(bond_dev, "bond_dev=%p slave_dev=%p slave_dev->addr_len=%d\n", - bond_dev, slave_dev, slave_dev->addr_len); + netdev_dbg(bond_dev, "bond_dev=%p slave_dev=%p slave_dev->name=%s slave_dev->addr_len=%d\n", + bond_dev, slave_dev, slave_dev->name, slave_dev->addr_len); memcpy(bond_dev->dev_addr, slave_dev->dev_addr, slave_dev->addr_len); bond_dev->addr_assign_type = NET_ADDR_STOLEN; call_netdevice_notifiers(NETDEV_CHANGEADDR, bond_dev); @@ -930,11 +930,10 @@ void bond_select_active_slave(struct bonding *bond) if (!rv) return; - if (netif_carrier_ok(bond->dev)) { + if (netif_carrier_ok(bond->dev)) netdev_info(bond->dev, "first active interface up!\n"); - } else { + else netdev_info(bond->dev, "now running without any active interface!\n"); - } } } @@ -1180,9 +1179,8 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb) } } - if (bond_should_deliver_exact_match(skb, slave, bond)) { + if (bond_should_deliver_exact_match(skb, slave, bond)) return RX_HANDLER_EXACT; - } skb->dev = bond->dev; @@ -1243,7 +1241,7 @@ static struct slave *bond_alloc_slave(struct bonding *bond) { struct slave *slave = NULL; - slave = kzalloc(sizeof(struct slave), GFP_KERNEL); + slave = kzalloc(sizeof(*slave), GFP_KERNEL); if (!slave) return NULL; @@ -1383,8 +1381,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) if (slave_dev->flags & IFF_UP) { netdev_err(bond_dev, "%s is up - this may be due to an out of date ifenslave\n", slave_dev->name); - res = -EPERM; - goto err_undo_flags; + return -EPERM; } /* set bonding device ether type by slave - bonding netdevices are @@ -1404,8 +1401,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) res = notifier_to_errno(res); if (res) { netdev_err(bond_dev, "refused to change device type\n"); - res = -EBUSY; - goto err_undo_flags; + return -EBUSY; } /* Flush unicast and multicast addresses */ @@ -1425,8 +1421,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) } else if (bond_dev->type != slave_dev->type) { netdev_err(bond_dev, "%s ether type (%d) is different from other slaves (%d), can not enslave it\n", slave_dev->name, slave_dev->type, bond_dev->type); - res = -EINVAL; - goto err_undo_flags; + return -EINVAL; } if (slave_ops->ndo_set_mac_address == NULL) { @@ -3327,6 +3322,7 @@ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev, stats->rx_bytes += sstats->rx_bytes - pstats->rx_bytes; stats->rx_errors += sstats->rx_errors - pstats->rx_errors; stats->rx_dropped += sstats->rx_dropped - pstats->rx_dropped; + stats->rx_nohandler += sstats->rx_nohandler - pstats->rx_nohandler; stats->tx_packets += sstats->tx_packets - pstats->tx_packets;; stats->tx_bytes += sstats->tx_bytes - pstats->tx_bytes; diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 55e93b6b6d21..577e57cad1dc 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -402,7 +402,6 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { .id = BOND_OPT_AD_ACTOR_SYS_PRIO, .name = "ad_actor_sys_prio", .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), - .flags = BOND_OPTFLAG_IFDOWN, .values = bond_ad_actor_sys_prio_tbl, .set = bond_option_ad_actor_sys_prio_set, }, @@ -410,7 +409,7 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { .id = BOND_OPT_AD_ACTOR_SYSTEM, .name = "ad_actor_system", .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), - .flags = BOND_OPTFLAG_RAWVAL | BOND_OPTFLAG_IFDOWN, + .flags = BOND_OPTFLAG_RAWVAL, .set = bond_option_ad_actor_system_set, }, [BOND_OPT_AD_USER_PORT_KEY] = { @@ -1392,6 +1391,8 @@ static int bond_option_ad_actor_sys_prio_set(struct bonding *bond, newval->value); bond->params.ad_actor_sys_prio = newval->value; + bond_3ad_update_ad_actor_settings(bond); + return 0; } @@ -1418,6 +1419,8 @@ static int bond_option_ad_actor_system_set(struct bonding *bond, netdev_info(bond->dev, "Setting ad_actor_system to %pM\n", mac); ether_addr_copy(bond->params.ad_actor_system, mac); + bond_3ad_update_ad_actor_settings(bond); + return 0; err: diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index 6d04183ed955..164ccdeca663 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -70,13 +70,6 @@ config CAN_AT91 This is a driver for the SoC CAN controller in Atmel's AT91SAM9263 and AT91SAM9X5 processors. -config CAN_TI_HECC - depends on ARM - tristate "TI High End CAN Controller" - ---help--- - Driver for TI HECC (High End CAN Controller) module found on many - TI devices. The device specifications are available from www.ti.com - config CAN_BFIN depends on BF534 || BF536 || BF537 || BF538 || BF539 || BF54x tristate "Analog Devices Blackfin on-chip CAN" @@ -86,30 +79,12 @@ config CAN_BFIN To compile this driver as a module, choose M here: the module will be called bfin_can. -config CAN_JANZ_ICAN3 - tristate "Janz VMOD-ICAN3 Intelligent CAN controller" - depends on MFD_JANZ_CMODIO - ---help--- - Driver for Janz VMOD-ICAN3 Intelligent CAN controller module, which - connects to a MODULbus carrier board. - - This driver can also be built as a module. If so, the module will be - called janz-ican3.ko. - config CAN_FLEXCAN tristate "Support for Freescale FLEXCAN based chips" depends on ARM || PPC ---help--- Say Y here if you want to support for Freescale FlexCAN. -config PCH_CAN - tristate "Intel EG20T PCH CAN controller" - depends on PCI && (X86_32 || COMPILE_TEST) - ---help--- - This driver is for PCH CAN of Topcliff (Intel EG20T PCH) which - is an IOH for x86 embedded processor (Intel Atom E6xx series). - This driver can access CAN bus. - config CAN_GRCAN tristate "Aeroflex Gaisler GRCAN and GRHCAN CAN devices" depends on OF && HAS_DMA @@ -119,6 +94,16 @@ config CAN_GRCAN endian syntheses of the cores would need some modifications on the hardware level to work. +config CAN_JANZ_ICAN3 + tristate "Janz VMOD-ICAN3 Intelligent CAN controller" + depends on MFD_JANZ_CMODIO + ---help--- + Driver for Janz VMOD-ICAN3 Intelligent CAN controller module, which + connects to a MODULbus carrier board. + + This driver can also be built as a module. If so, the module will be + called janz-ican3.ko. + config CAN_RCAR tristate "Renesas R-Car CAN controller" depends on ARM @@ -139,6 +124,13 @@ config CAN_SUN4I To compile this driver as a module, choose M here: the module will be called sun4i_can. +config CAN_TI_HECC + depends on ARM + tristate "TI High End CAN Controller" + ---help--- + Driver for TI HECC (High End CAN Controller) module found on many + TI devices. The device specifications are available from www.ti.com + config CAN_XILINXCAN tristate "Xilinx CAN" depends on ARCH_ZYNQ || ARM64 || MICROBLAZE || COMPILE_TEST @@ -147,22 +139,24 @@ config CAN_XILINXCAN Xilinx CAN driver. This driver supports both soft AXI CAN IP and Zynq CANPS IP. -source "drivers/net/can/mscan/Kconfig" - -source "drivers/net/can/sja1000/Kconfig" +config PCH_CAN + tristate "Intel EG20T PCH CAN controller" + depends on PCI && (X86_32 || COMPILE_TEST) + ---help--- + This driver is for PCH CAN of Topcliff (Intel EG20T PCH) which + is an IOH for x86 embedded processor (Intel Atom E6xx series). + This driver can access CAN bus. source "drivers/net/can/c_can/Kconfig" - -source "drivers/net/can/m_can/Kconfig" - source "drivers/net/can/cc770/Kconfig" - +source "drivers/net/can/ifi_canfd/Kconfig" +source "drivers/net/can/m_can/Kconfig" +source "drivers/net/can/mscan/Kconfig" +source "drivers/net/can/sja1000/Kconfig" +source "drivers/net/can/softing/Kconfig" source "drivers/net/can/spi/Kconfig" - source "drivers/net/can/usb/Kconfig" -source "drivers/net/can/softing/Kconfig" - endif config CAN_DEBUG_DEVICES diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile index 1f21cef1d458..e3db0c807f55 100644 --- a/drivers/net/can/Makefile +++ b/drivers/net/can/Makefile @@ -14,21 +14,22 @@ obj-y += spi/ obj-y += usb/ obj-y += softing/ -obj-$(CONFIG_CAN_SJA1000) += sja1000/ -obj-$(CONFIG_CAN_MSCAN) += mscan/ -obj-$(CONFIG_CAN_C_CAN) += c_can/ -obj-$(CONFIG_CAN_M_CAN) += m_can/ -obj-$(CONFIG_CAN_CC770) += cc770/ obj-$(CONFIG_CAN_AT91) += at91_can.o -obj-$(CONFIG_CAN_TI_HECC) += ti_hecc.o obj-$(CONFIG_CAN_BFIN) += bfin_can.o -obj-$(CONFIG_CAN_JANZ_ICAN3) += janz-ican3.o +obj-$(CONFIG_CAN_CC770) += cc770/ +obj-$(CONFIG_CAN_C_CAN) += c_can/ obj-$(CONFIG_CAN_FLEXCAN) += flexcan.o -obj-$(CONFIG_PCH_CAN) += pch_can.o obj-$(CONFIG_CAN_GRCAN) += grcan.o +obj-$(CONFIG_CAN_IFI_CANFD) += ifi_canfd/ +obj-$(CONFIG_CAN_JANZ_ICAN3) += janz-ican3.o +obj-$(CONFIG_CAN_MSCAN) += mscan/ +obj-$(CONFIG_CAN_M_CAN) += m_can/ obj-$(CONFIG_CAN_RCAR) += rcar_can.o +obj-$(CONFIG_CAN_SJA1000) += sja1000/ obj-$(CONFIG_CAN_SUN4I) += sun4i_can.o +obj-$(CONFIG_CAN_TI_HECC) += ti_hecc.o obj-$(CONFIG_CAN_XILINXCAN) += xilinx_can.o +obj-$(CONFIG_PCH_CAN) += pch_can.o subdir-ccflags-y += -D__CHECK_ENDIAN__ subdir-ccflags-$(CONFIG_CAN_DEBUG_DEVICES) += -DDEBUG diff --git a/drivers/net/can/ifi_canfd/Kconfig b/drivers/net/can/ifi_canfd/Kconfig new file mode 100644 index 000000000000..9e8934ff63a7 --- /dev/null +++ b/drivers/net/can/ifi_canfd/Kconfig @@ -0,0 +1,8 @@ +config CAN_IFI_CANFD + depends on HAS_IOMEM + tristate "IFI CAN_FD IP" + ---help--- + This driver adds support for the I/F/I CAN_FD soft IP block + connected to the "platform bus" (Linux abstraction for directly + to the processor attached devices). The CAN_FD is most often + synthesised into an FPGA or CPLD. diff --git a/drivers/net/can/ifi_canfd/Makefile b/drivers/net/can/ifi_canfd/Makefile new file mode 100644 index 000000000000..b229960cdf39 --- /dev/null +++ b/drivers/net/can/ifi_canfd/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the IFI CANFD controller driver. +# + +obj-$(CONFIG_CAN_IFI_CANFD) += ifi_canfd.o diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c new file mode 100644 index 000000000000..0d1c164374b7 --- /dev/null +++ b/drivers/net/can/ifi_canfd/ifi_canfd.c @@ -0,0 +1,917 @@ +/* + * CAN bus driver for IFI CANFD controller + * + * Copyright (C) 2016 Marek Vasut <marex@denx.de> + * + * Details about this controller can be found at + * http://www.ifi-pld.de/IP/CANFD/canfd.html + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include <linux/clk.h> +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> + +#include <linux/can/dev.h> + +#define IFI_CANFD_STCMD 0x0 +#define IFI_CANFD_STCMD_HARDRESET 0xDEADCAFD +#define IFI_CANFD_STCMD_ENABLE BIT(0) +#define IFI_CANFD_STCMD_ERROR_ACTIVE BIT(2) +#define IFI_CANFD_STCMD_ERROR_PASSIVE BIT(3) +#define IFI_CANFD_STCMD_BUSOFF BIT(4) +#define IFI_CANFD_STCMD_BUSMONITOR BIT(16) +#define IFI_CANFD_STCMD_LOOPBACK BIT(18) +#define IFI_CANFD_STCMD_DISABLE_CANFD BIT(24) +#define IFI_CANFD_STCMD_ENABLE_ISO BIT(25) +#define IFI_CANFD_STCMD_NORMAL_MODE ((u32)BIT(31)) + +#define IFI_CANFD_RXSTCMD 0x4 +#define IFI_CANFD_RXSTCMD_REMOVE_MSG BIT(0) +#define IFI_CANFD_RXSTCMD_RESET BIT(7) +#define IFI_CANFD_RXSTCMD_EMPTY BIT(8) +#define IFI_CANFD_RXSTCMD_OVERFLOW BIT(13) + +#define IFI_CANFD_TXSTCMD 0x8 +#define IFI_CANFD_TXSTCMD_ADD_MSG BIT(0) +#define IFI_CANFD_TXSTCMD_HIGH_PRIO BIT(1) +#define IFI_CANFD_TXSTCMD_RESET BIT(7) +#define IFI_CANFD_TXSTCMD_EMPTY BIT(8) +#define IFI_CANFD_TXSTCMD_FULL BIT(12) +#define IFI_CANFD_TXSTCMD_OVERFLOW BIT(13) + +#define IFI_CANFD_INTERRUPT 0xc +#define IFI_CANFD_INTERRUPT_ERROR_WARNING ((u32)BIT(1)) +#define IFI_CANFD_INTERRUPT_TXFIFO_EMPTY BIT(16) +#define IFI_CANFD_INTERRUPT_TXFIFO_REMOVE BIT(22) +#define IFI_CANFD_INTERRUPT_RXFIFO_NEMPTY BIT(24) +#define IFI_CANFD_INTERRUPT_RXFIFO_NEMPTY_PER BIT(25) +#define IFI_CANFD_INTERRUPT_SET_IRQ ((u32)BIT(31)) + +#define IFI_CANFD_IRQMASK 0x10 +#define IFI_CANFD_IRQMASK_SET_ERR BIT(7) +#define IFI_CANFD_IRQMASK_SET_TS BIT(15) +#define IFI_CANFD_IRQMASK_TXFIFO_EMPTY BIT(16) +#define IFI_CANFD_IRQMASK_SET_TX BIT(23) +#define IFI_CANFD_IRQMASK_RXFIFO_NEMPTY BIT(24) +#define IFI_CANFD_IRQMASK_SET_RX ((u32)BIT(31)) + +#define IFI_CANFD_TIME 0x14 +#define IFI_CANFD_FTIME 0x18 +#define IFI_CANFD_TIME_TIMEB_OFF 0 +#define IFI_CANFD_TIME_TIMEA_OFF 8 +#define IFI_CANFD_TIME_PRESCALE_OFF 16 +#define IFI_CANFD_TIME_SJW_OFF_ISO 25 +#define IFI_CANFD_TIME_SJW_OFF_BOSCH 28 +#define IFI_CANFD_TIME_SET_SJW_BOSCH BIT(6) +#define IFI_CANFD_TIME_SET_TIMEB_BOSCH BIT(7) +#define IFI_CANFD_TIME_SET_PRESC_BOSCH BIT(14) +#define IFI_CANFD_TIME_SET_TIMEA_BOSCH BIT(15) + +#define IFI_CANFD_TDELAY 0x1c + +#define IFI_CANFD_ERROR 0x20 +#define IFI_CANFD_ERROR_TX_OFFSET 0 +#define IFI_CANFD_ERROR_TX_MASK 0xff +#define IFI_CANFD_ERROR_RX_OFFSET 16 +#define IFI_CANFD_ERROR_RX_MASK 0xff + +#define IFI_CANFD_ERRCNT 0x24 + +#define IFI_CANFD_SUSPEND 0x28 + +#define IFI_CANFD_REPEAT 0x2c + +#define IFI_CANFD_TRAFFIC 0x30 + +#define IFI_CANFD_TSCONTROL 0x34 + +#define IFI_CANFD_TSC 0x38 + +#define IFI_CANFD_TST 0x3c + +#define IFI_CANFD_RES1 0x40 + +#define IFI_CANFD_RES2 0x44 + +#define IFI_CANFD_PAR 0x48 + +#define IFI_CANFD_CANCLOCK 0x4c + +#define IFI_CANFD_SYSCLOCK 0x50 + +#define IFI_CANFD_VER 0x54 + +#define IFI_CANFD_IP_ID 0x58 +#define IFI_CANFD_IP_ID_VALUE 0xD073CAFD + +#define IFI_CANFD_TEST 0x5c + +#define IFI_CANFD_RXFIFO_TS_63_32 0x60 + +#define IFI_CANFD_RXFIFO_TS_31_0 0x64 + +#define IFI_CANFD_RXFIFO_DLC 0x68 +#define IFI_CANFD_RXFIFO_DLC_DLC_OFFSET 0 +#define IFI_CANFD_RXFIFO_DLC_DLC_MASK 0xf +#define IFI_CANFD_RXFIFO_DLC_RTR BIT(4) +#define IFI_CANFD_RXFIFO_DLC_EDL BIT(5) +#define IFI_CANFD_RXFIFO_DLC_BRS BIT(6) +#define IFI_CANFD_RXFIFO_DLC_ESI BIT(7) +#define IFI_CANFD_RXFIFO_DLC_OBJ_OFFSET 8 +#define IFI_CANFD_RXFIFO_DLC_OBJ_MASK 0x1ff +#define IFI_CANFD_RXFIFO_DLC_FNR_OFFSET 24 +#define IFI_CANFD_RXFIFO_DLC_FNR_MASK 0xff + +#define IFI_CANFD_RXFIFO_ID 0x6c +#define IFI_CANFD_RXFIFO_ID_ID_OFFSET 0 +#define IFI_CANFD_RXFIFO_ID_ID_STD_MASK 0x3ff +#define IFI_CANFD_RXFIFO_ID_ID_XTD_MASK 0x1fffffff +#define IFI_CANFD_RXFIFO_ID_IDE BIT(29) + +#define IFI_CANFD_RXFIFO_DATA 0x70 /* 0x70..0xac */ + +#define IFI_CANFD_TXFIFO_SUSPEND_US 0xb0 + +#define IFI_CANFD_TXFIFO_REPEATCOUNT 0xb4 + +#define IFI_CANFD_TXFIFO_DLC 0xb8 +#define IFI_CANFD_TXFIFO_DLC_DLC_OFFSET 0 +#define IFI_CANFD_TXFIFO_DLC_DLC_MASK 0xf +#define IFI_CANFD_TXFIFO_DLC_RTR BIT(4) +#define IFI_CANFD_TXFIFO_DLC_EDL BIT(5) +#define IFI_CANFD_TXFIFO_DLC_BRS BIT(6) +#define IFI_CANFD_TXFIFO_DLC_FNR_OFFSET 24 +#define IFI_CANFD_TXFIFO_DLC_FNR_MASK 0xff + +#define IFI_CANFD_TXFIFO_ID 0xbc +#define IFI_CANFD_TXFIFO_ID_ID_OFFSET 0 +#define IFI_CANFD_TXFIFO_ID_ID_STD_MASK 0x3ff +#define IFI_CANFD_TXFIFO_ID_ID_XTD_MASK 0x1fffffff +#define IFI_CANFD_TXFIFO_ID_IDE BIT(29) + +#define IFI_CANFD_TXFIFO_DATA 0xc0 /* 0xb0..0xfc */ + +#define IFI_CANFD_FILTER_MASK(n) (0x800 + ((n) * 8) + 0) +#define IFI_CANFD_FILTER_MASK_EXT BIT(29) +#define IFI_CANFD_FILTER_MASK_EDL BIT(30) +#define IFI_CANFD_FILTER_MASK_VALID ((u32)BIT(31)) + +#define IFI_CANFD_FILTER_IDENT(n) (0x800 + ((n) * 8) + 4) +#define IFI_CANFD_FILTER_IDENT_IDE BIT(29) +#define IFI_CANFD_FILTER_IDENT_CANFD BIT(30) +#define IFI_CANFD_FILTER_IDENT_VALID ((u32)BIT(31)) + +/* IFI CANFD private data structure */ +struct ifi_canfd_priv { + struct can_priv can; /* must be the first member */ + struct napi_struct napi; + struct net_device *ndev; + void __iomem *base; +}; + +static void ifi_canfd_irq_enable(struct net_device *ndev, bool enable) +{ + struct ifi_canfd_priv *priv = netdev_priv(ndev); + u32 enirq = 0; + + if (enable) { + enirq = IFI_CANFD_IRQMASK_TXFIFO_EMPTY | + IFI_CANFD_IRQMASK_RXFIFO_NEMPTY; + } + + writel(IFI_CANFD_IRQMASK_SET_ERR | + IFI_CANFD_IRQMASK_SET_TS | + IFI_CANFD_IRQMASK_SET_TX | + IFI_CANFD_IRQMASK_SET_RX | enirq, + priv->base + IFI_CANFD_IRQMASK); +} + +static void ifi_canfd_read_fifo(struct net_device *ndev) +{ + struct net_device_stats *stats = &ndev->stats; + struct ifi_canfd_priv *priv = netdev_priv(ndev); + struct canfd_frame *cf; + struct sk_buff *skb; + const u32 rx_irq_mask = IFI_CANFD_INTERRUPT_RXFIFO_NEMPTY | + IFI_CANFD_INTERRUPT_RXFIFO_NEMPTY_PER; + u32 rxdlc, rxid; + u32 dlc, id; + int i; + + rxdlc = readl(priv->base + IFI_CANFD_RXFIFO_DLC); + if (rxdlc & IFI_CANFD_RXFIFO_DLC_EDL) + skb = alloc_canfd_skb(ndev, &cf); + else + skb = alloc_can_skb(ndev, (struct can_frame **)&cf); + + if (!skb) { + stats->rx_dropped++; + return; + } + + dlc = (rxdlc >> IFI_CANFD_RXFIFO_DLC_DLC_OFFSET) & + IFI_CANFD_RXFIFO_DLC_DLC_MASK; + if (rxdlc & IFI_CANFD_RXFIFO_DLC_EDL) + cf->len = can_dlc2len(dlc); + else + cf->len = get_can_dlc(dlc); + + rxid = readl(priv->base + IFI_CANFD_RXFIFO_ID); + id = (rxid >> IFI_CANFD_RXFIFO_ID_ID_OFFSET); + if (id & IFI_CANFD_RXFIFO_ID_IDE) + id &= IFI_CANFD_RXFIFO_ID_ID_XTD_MASK; + else + id &= IFI_CANFD_RXFIFO_ID_ID_STD_MASK; + cf->can_id = id; + + if (rxdlc & IFI_CANFD_RXFIFO_DLC_ESI) { + cf->flags |= CANFD_ESI; + netdev_dbg(ndev, "ESI Error\n"); + } + + if (!(rxdlc & IFI_CANFD_RXFIFO_DLC_EDL) && + (rxdlc & IFI_CANFD_RXFIFO_DLC_RTR)) { + cf->can_id |= CAN_RTR_FLAG; + } else { + if (rxdlc & IFI_CANFD_RXFIFO_DLC_BRS) + cf->flags |= CANFD_BRS; + + for (i = 0; i < cf->len; i += 4) { + *(u32 *)(cf->data + i) = + readl(priv->base + IFI_CANFD_RXFIFO_DATA + i); + } + } + + /* Remove the packet from FIFO */ + writel(IFI_CANFD_RXSTCMD_REMOVE_MSG, priv->base + IFI_CANFD_RXSTCMD); + writel(rx_irq_mask, priv->base + IFI_CANFD_INTERRUPT); + + stats->rx_packets++; + stats->rx_bytes += cf->len; + + netif_receive_skb(skb); +} + +static int ifi_canfd_do_rx_poll(struct net_device *ndev, int quota) +{ + struct ifi_canfd_priv *priv = netdev_priv(ndev); + u32 pkts = 0; + u32 rxst; + + rxst = readl(priv->base + IFI_CANFD_RXSTCMD); + if (rxst & IFI_CANFD_RXSTCMD_EMPTY) { + netdev_dbg(ndev, "No messages in RX FIFO\n"); + return 0; + } + + for (;;) { + if (rxst & IFI_CANFD_RXSTCMD_EMPTY) + break; + if (quota <= 0) + break; + + ifi_canfd_read_fifo(ndev); + quota--; + pkts++; + rxst = readl(priv->base + IFI_CANFD_RXSTCMD); + } + + if (pkts) + can_led_event(ndev, CAN_LED_EVENT_RX); + + return pkts; +} + +static int ifi_canfd_handle_lost_msg(struct net_device *ndev) +{ + struct net_device_stats *stats = &ndev->stats; + struct sk_buff *skb; + struct can_frame *frame; + + netdev_err(ndev, "RX FIFO overflow, message(s) lost.\n"); + + stats->rx_errors++; + stats->rx_over_errors++; + + skb = alloc_can_err_skb(ndev, &frame); + if (unlikely(!skb)) + return 0; + + frame->can_id |= CAN_ERR_CRTL; + frame->data[1] = CAN_ERR_CRTL_RX_OVERFLOW; + + netif_receive_skb(skb); + + return 1; +} + +static int ifi_canfd_get_berr_counter(const struct net_device *ndev, + struct can_berr_counter *bec) +{ + struct ifi_canfd_priv *priv = netdev_priv(ndev); + u32 err; + + err = readl(priv->base + IFI_CANFD_ERROR); + bec->rxerr = (err >> IFI_CANFD_ERROR_RX_OFFSET) & + IFI_CANFD_ERROR_RX_MASK; + bec->txerr = (err >> IFI_CANFD_ERROR_TX_OFFSET) & + IFI_CANFD_ERROR_TX_MASK; + + return 0; +} + +static int ifi_canfd_handle_state_change(struct net_device *ndev, + enum can_state new_state) +{ + struct ifi_canfd_priv *priv = netdev_priv(ndev); + struct net_device_stats *stats = &ndev->stats; + struct can_frame *cf; + struct sk_buff *skb; + struct can_berr_counter bec; + + switch (new_state) { + case CAN_STATE_ERROR_ACTIVE: + /* error warning state */ + priv->can.can_stats.error_warning++; + priv->can.state = CAN_STATE_ERROR_WARNING; + break; + case CAN_STATE_ERROR_PASSIVE: + /* error passive state */ + priv->can.can_stats.error_passive++; + priv->can.state = CAN_STATE_ERROR_PASSIVE; + break; + case CAN_STATE_BUS_OFF: + /* bus-off state */ + priv->can.state = CAN_STATE_BUS_OFF; + ifi_canfd_irq_enable(ndev, 0); + priv->can.can_stats.bus_off++; + can_bus_off(ndev); + break; + default: + break; + } + + /* propagate the error condition to the CAN stack */ + skb = alloc_can_err_skb(ndev, &cf); + if (unlikely(!skb)) + return 0; + + ifi_canfd_get_berr_counter(ndev, &bec); + + switch (new_state) { + case CAN_STATE_ERROR_ACTIVE: + /* error warning state */ + cf->can_id |= CAN_ERR_CRTL; + cf->data[1] = (bec.txerr > bec.rxerr) ? + CAN_ERR_CRTL_TX_WARNING : + CAN_ERR_CRTL_RX_WARNING; + cf->data[6] = bec.txerr; + cf->data[7] = bec.rxerr; + break; + case CAN_STATE_ERROR_PASSIVE: + /* error passive state */ + cf->can_id |= CAN_ERR_CRTL; + cf->data[1] |= CAN_ERR_CRTL_RX_PASSIVE; + if (bec.txerr > 127) + cf->data[1] |= CAN_ERR_CRTL_TX_PASSIVE; + cf->data[6] = bec.txerr; + cf->data[7] = bec.rxerr; + break; + case CAN_STATE_BUS_OFF: + /* bus-off state */ + cf->can_id |= CAN_ERR_BUSOFF; + break; + default: + break; + } + + stats->rx_packets++; + stats->rx_bytes += cf->can_dlc; + netif_receive_skb(skb); + + return 1; +} + +static int ifi_canfd_handle_state_errors(struct net_device *ndev, u32 stcmd) +{ + struct ifi_canfd_priv *priv = netdev_priv(ndev); + int work_done = 0; + u32 isr; + + /* + * The ErrWarn condition is a little special, since the bit is + * located in the INTERRUPT register instead of STCMD register. + */ + isr = readl(priv->base + IFI_CANFD_INTERRUPT); + if ((isr & IFI_CANFD_INTERRUPT_ERROR_WARNING) && + (priv->can.state != CAN_STATE_ERROR_WARNING)) { + /* Clear the interrupt */ + writel(IFI_CANFD_INTERRUPT_ERROR_WARNING, + priv->base + IFI_CANFD_INTERRUPT); + netdev_dbg(ndev, "Error, entered warning state\n"); + work_done += ifi_canfd_handle_state_change(ndev, + CAN_STATE_ERROR_WARNING); + } + + if ((stcmd & IFI_CANFD_STCMD_ERROR_PASSIVE) && + (priv->can.state != CAN_STATE_ERROR_PASSIVE)) { + netdev_dbg(ndev, "Error, entered passive state\n"); + work_done += ifi_canfd_handle_state_change(ndev, + CAN_STATE_ERROR_PASSIVE); + } + + if ((stcmd & IFI_CANFD_STCMD_BUSOFF) && + (priv->can.state != CAN_STATE_BUS_OFF)) { + netdev_dbg(ndev, "Error, entered bus-off state\n"); + work_done += ifi_canfd_handle_state_change(ndev, + CAN_STATE_BUS_OFF); + } + + return work_done; +} + +static int ifi_canfd_poll(struct napi_struct *napi, int quota) +{ + struct net_device *ndev = napi->dev; + struct ifi_canfd_priv *priv = netdev_priv(ndev); + const u32 stcmd_state_mask = IFI_CANFD_STCMD_ERROR_PASSIVE | + IFI_CANFD_STCMD_BUSOFF; + int work_done = 0; + + u32 stcmd = readl(priv->base + IFI_CANFD_STCMD); + u32 rxstcmd = readl(priv->base + IFI_CANFD_STCMD); + + /* Handle bus state changes */ + if ((stcmd & stcmd_state_mask) || + ((stcmd & IFI_CANFD_STCMD_ERROR_ACTIVE) == 0)) + work_done += ifi_canfd_handle_state_errors(ndev, stcmd); + + /* Handle lost messages on RX */ + if (rxstcmd & IFI_CANFD_RXSTCMD_OVERFLOW) + work_done += ifi_canfd_handle_lost_msg(ndev); + + /* Handle normal messages on RX */ + if (!(rxstcmd & IFI_CANFD_RXSTCMD_EMPTY)) + work_done += ifi_canfd_do_rx_poll(ndev, quota - work_done); + + if (work_done < quota) { + napi_complete(napi); + ifi_canfd_irq_enable(ndev, 1); + } + + return work_done; +} + +static irqreturn_t ifi_canfd_isr(int irq, void *dev_id) +{ + struct net_device *ndev = (struct net_device *)dev_id; + struct ifi_canfd_priv *priv = netdev_priv(ndev); + struct net_device_stats *stats = &ndev->stats; + const u32 rx_irq_mask = IFI_CANFD_INTERRUPT_RXFIFO_NEMPTY | + IFI_CANFD_INTERRUPT_RXFIFO_NEMPTY_PER; + const u32 tx_irq_mask = IFI_CANFD_INTERRUPT_TXFIFO_EMPTY | + IFI_CANFD_INTERRUPT_TXFIFO_REMOVE; + const u32 clr_irq_mask = ~(IFI_CANFD_INTERRUPT_SET_IRQ | + IFI_CANFD_INTERRUPT_ERROR_WARNING); + u32 isr; + + isr = readl(priv->base + IFI_CANFD_INTERRUPT); + + /* No interrupt */ + if (isr == 0) + return IRQ_NONE; + + /* Clear all pending interrupts but ErrWarn */ + writel(clr_irq_mask, priv->base + IFI_CANFD_INTERRUPT); + + /* RX IRQ, start NAPI */ + if (isr & rx_irq_mask) { + ifi_canfd_irq_enable(ndev, 0); + napi_schedule(&priv->napi); + } + + /* TX IRQ */ + if (isr & tx_irq_mask) { + stats->tx_bytes += can_get_echo_skb(ndev, 0); + stats->tx_packets++; + can_led_event(ndev, CAN_LED_EVENT_TX); + netif_wake_queue(ndev); + } + + return IRQ_HANDLED; +} + +static const struct can_bittiming_const ifi_canfd_bittiming_const = { + .name = KBUILD_MODNAME, + .tseg1_min = 2, /* Time segment 1 = prop_seg + phase_seg1 */ + .tseg1_max = 64, + .tseg2_min = 1, /* Time segment 2 = phase_seg2 */ + .tseg2_max = 16, + .sjw_max = 16, + .brp_min = 1, + .brp_max = 1024, + .brp_inc = 1, +}; + +static const struct can_bittiming_const ifi_canfd_data_bittiming_const = { + .name = KBUILD_MODNAME, + .tseg1_min = 2, /* Time segment 1 = prop_seg + phase_seg1 */ + .tseg1_max = 16, + .tseg2_min = 1, /* Time segment 2 = phase_seg2 */ + .tseg2_max = 8, + .sjw_max = 4, + .brp_min = 1, + .brp_max = 32, + .brp_inc = 1, +}; + +static void ifi_canfd_set_bittiming(struct net_device *ndev) +{ + struct ifi_canfd_priv *priv = netdev_priv(ndev); + const struct can_bittiming *bt = &priv->can.bittiming; + const struct can_bittiming *dbt = &priv->can.data_bittiming; + u16 brp, sjw, tseg1, tseg2; + u32 noniso_arg = 0; + u32 time_off; + + if (priv->can.ctrlmode & CAN_CTRLMODE_FD_NON_ISO) { + noniso_arg = IFI_CANFD_TIME_SET_TIMEB_BOSCH | + IFI_CANFD_TIME_SET_TIMEA_BOSCH | + IFI_CANFD_TIME_SET_PRESC_BOSCH | + IFI_CANFD_TIME_SET_SJW_BOSCH; + time_off = IFI_CANFD_TIME_SJW_OFF_BOSCH; + } else { + time_off = IFI_CANFD_TIME_SJW_OFF_ISO; + } + + /* Configure bit timing */ + brp = bt->brp - 1; + sjw = bt->sjw - 1; + tseg1 = bt->prop_seg + bt->phase_seg1 - 1; + tseg2 = bt->phase_seg2 - 1; + writel((tseg2 << IFI_CANFD_TIME_TIMEB_OFF) | + (tseg1 << IFI_CANFD_TIME_TIMEA_OFF) | + (brp << IFI_CANFD_TIME_PRESCALE_OFF) | + (sjw << time_off), + priv->base + IFI_CANFD_TIME); + + /* Configure data bit timing */ + brp = dbt->brp - 1; + sjw = dbt->sjw - 1; + tseg1 = dbt->prop_seg + dbt->phase_seg1 - 1; + tseg2 = dbt->phase_seg2 - 1; + writel((tseg2 << IFI_CANFD_TIME_TIMEB_OFF) | + (tseg1 << IFI_CANFD_TIME_TIMEA_OFF) | + (brp << IFI_CANFD_TIME_PRESCALE_OFF) | + (sjw << time_off) | + noniso_arg, + priv->base + IFI_CANFD_FTIME); +} + +static void ifi_canfd_set_filter(struct net_device *ndev, const u32 id, + const u32 mask, const u32 ident) +{ + struct ifi_canfd_priv *priv = netdev_priv(ndev); + + writel(mask, priv->base + IFI_CANFD_FILTER_MASK(id)); + writel(ident, priv->base + IFI_CANFD_FILTER_IDENT(id)); +} + +static void ifi_canfd_set_filters(struct net_device *ndev) +{ + /* Receive all CAN frames (standard ID) */ + ifi_canfd_set_filter(ndev, 0, + IFI_CANFD_FILTER_MASK_VALID | + IFI_CANFD_FILTER_MASK_EXT, + IFI_CANFD_FILTER_IDENT_VALID); + + /* Receive all CAN frames (extended ID) */ + ifi_canfd_set_filter(ndev, 1, + IFI_CANFD_FILTER_MASK_VALID | + IFI_CANFD_FILTER_MASK_EXT, + IFI_CANFD_FILTER_IDENT_VALID | + IFI_CANFD_FILTER_IDENT_IDE); + + /* Receive all CANFD frames */ + ifi_canfd_set_filter(ndev, 2, + IFI_CANFD_FILTER_MASK_VALID | + IFI_CANFD_FILTER_MASK_EDL | + IFI_CANFD_FILTER_MASK_EXT, + IFI_CANFD_FILTER_IDENT_VALID | + IFI_CANFD_FILTER_IDENT_CANFD | + IFI_CANFD_FILTER_IDENT_IDE); +} + +static void ifi_canfd_start(struct net_device *ndev) +{ + struct ifi_canfd_priv *priv = netdev_priv(ndev); + u32 stcmd; + + /* Reset the IP */ + writel(IFI_CANFD_STCMD_HARDRESET, priv->base + IFI_CANFD_STCMD); + writel(0, priv->base + IFI_CANFD_STCMD); + + ifi_canfd_set_bittiming(ndev); + ifi_canfd_set_filters(ndev); + + /* Reset FIFOs */ + writel(IFI_CANFD_RXSTCMD_RESET, priv->base + IFI_CANFD_RXSTCMD); + writel(0, priv->base + IFI_CANFD_RXSTCMD); + writel(IFI_CANFD_TXSTCMD_RESET, priv->base + IFI_CANFD_TXSTCMD); + writel(0, priv->base + IFI_CANFD_TXSTCMD); + + /* Repeat transmission until successful */ + writel(0, priv->base + IFI_CANFD_REPEAT); + writel(0, priv->base + IFI_CANFD_SUSPEND); + + /* Clear all pending interrupts */ + writel((u32)(~IFI_CANFD_INTERRUPT_SET_IRQ), + priv->base + IFI_CANFD_INTERRUPT); + + stcmd = IFI_CANFD_STCMD_ENABLE | IFI_CANFD_STCMD_NORMAL_MODE; + + if (priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY) + stcmd |= IFI_CANFD_STCMD_BUSMONITOR; + + if (priv->can.ctrlmode & CAN_CTRLMODE_LOOPBACK) + stcmd |= IFI_CANFD_STCMD_LOOPBACK; + + if (priv->can.ctrlmode & CAN_CTRLMODE_FD) + stcmd |= IFI_CANFD_STCMD_ENABLE_ISO; + + if (!(priv->can.ctrlmode & (CAN_CTRLMODE_FD | CAN_CTRLMODE_FD_NON_ISO))) + stcmd |= IFI_CANFD_STCMD_DISABLE_CANFD; + + priv->can.state = CAN_STATE_ERROR_ACTIVE; + + ifi_canfd_irq_enable(ndev, 1); + + /* Enable controller */ + writel(stcmd, priv->base + IFI_CANFD_STCMD); +} + +static void ifi_canfd_stop(struct net_device *ndev) +{ + struct ifi_canfd_priv *priv = netdev_priv(ndev); + + /* Reset the IP */ + writel(IFI_CANFD_STCMD_HARDRESET, priv->base + IFI_CANFD_STCMD); + + /* Mask all interrupts */ + writel(~0, priv->base + IFI_CANFD_IRQMASK); + + /* Clear all pending interrupts */ + writel((u32)(~IFI_CANFD_INTERRUPT_SET_IRQ), + priv->base + IFI_CANFD_INTERRUPT); + + /* Set the state as STOPPED */ + priv->can.state = CAN_STATE_STOPPED; +} + +static int ifi_canfd_set_mode(struct net_device *ndev, enum can_mode mode) +{ + switch (mode) { + case CAN_MODE_START: + ifi_canfd_start(ndev); + netif_wake_queue(ndev); + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static int ifi_canfd_open(struct net_device *ndev) +{ + struct ifi_canfd_priv *priv = netdev_priv(ndev); + int ret; + + ret = open_candev(ndev); + if (ret) { + netdev_err(ndev, "Failed to open CAN device\n"); + return ret; + } + + /* Register interrupt handler */ + ret = request_irq(ndev->irq, ifi_canfd_isr, IRQF_SHARED, + ndev->name, ndev); + if (ret < 0) { + netdev_err(ndev, "Failed to request interrupt\n"); + goto err_irq; + } + + ifi_canfd_start(ndev); + + can_led_event(ndev, CAN_LED_EVENT_OPEN); + napi_enable(&priv->napi); + netif_start_queue(ndev); + + return 0; +err_irq: + close_candev(ndev); + return ret; +} + +static int ifi_canfd_close(struct net_device *ndev) +{ + struct ifi_canfd_priv *priv = netdev_priv(ndev); + + netif_stop_queue(ndev); + napi_disable(&priv->napi); + + ifi_canfd_stop(ndev); + + free_irq(ndev->irq, ndev); + + close_candev(ndev); + + can_led_event(ndev, CAN_LED_EVENT_STOP); + + return 0; +} + +static netdev_tx_t ifi_canfd_start_xmit(struct sk_buff *skb, + struct net_device *ndev) +{ + struct ifi_canfd_priv *priv = netdev_priv(ndev); + struct canfd_frame *cf = (struct canfd_frame *)skb->data; + u32 txst, txid; + u32 txdlc = 0; + int i; + + if (can_dropped_invalid_skb(ndev, skb)) + return NETDEV_TX_OK; + + /* Check if the TX buffer is full */ + txst = readl(priv->base + IFI_CANFD_TXSTCMD); + if (txst & IFI_CANFD_TXSTCMD_FULL) { + netif_stop_queue(ndev); + netdev_err(ndev, "BUG! TX FIFO full when queue awake!\n"); + return NETDEV_TX_BUSY; + } + + netif_stop_queue(ndev); + + if (cf->can_id & CAN_EFF_FLAG) { + txid = cf->can_id & CAN_EFF_MASK; + txid |= IFI_CANFD_TXFIFO_ID_IDE; + } else { + txid = cf->can_id & CAN_SFF_MASK; + } + + if (priv->can.ctrlmode & (CAN_CTRLMODE_FD | CAN_CTRLMODE_FD_NON_ISO)) { + if (can_is_canfd_skb(skb)) { + txdlc |= IFI_CANFD_TXFIFO_DLC_EDL; + if (cf->flags & CANFD_BRS) + txdlc |= IFI_CANFD_TXFIFO_DLC_BRS; + } + } + + if (cf->can_id & CAN_RTR_FLAG) + txdlc |= IFI_CANFD_TXFIFO_DLC_RTR; + + /* message ram configuration */ + writel(txid, priv->base + IFI_CANFD_TXFIFO_ID); + writel(txdlc, priv->base + IFI_CANFD_TXFIFO_DLC); + + for (i = 0; i < cf->len; i += 4) { + writel(*(u32 *)(cf->data + i), + priv->base + IFI_CANFD_TXFIFO_DATA + i); + } + + writel(0, priv->base + IFI_CANFD_TXFIFO_REPEATCOUNT); + writel(0, priv->base + IFI_CANFD_TXFIFO_SUSPEND_US); + + can_put_echo_skb(skb, ndev, 0); + + /* Start the transmission */ + writel(IFI_CANFD_TXSTCMD_ADD_MSG, priv->base + IFI_CANFD_TXSTCMD); + + return NETDEV_TX_OK; +} + +static const struct net_device_ops ifi_canfd_netdev_ops = { + .ndo_open = ifi_canfd_open, + .ndo_stop = ifi_canfd_close, + .ndo_start_xmit = ifi_canfd_start_xmit, + .ndo_change_mtu = can_change_mtu, +}; + +static int ifi_canfd_plat_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct net_device *ndev; + struct ifi_canfd_priv *priv; + struct resource *res; + void __iomem *addr; + int irq, ret; + u32 id; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + addr = devm_ioremap_resource(dev, res); + irq = platform_get_irq(pdev, 0); + if (IS_ERR(addr) || irq < 0) + return -EINVAL; + + id = readl(addr + IFI_CANFD_IP_ID); + if (id != IFI_CANFD_IP_ID_VALUE) { + dev_err(dev, "This block is not IFI CANFD, id=%08x\n", id); + return -EINVAL; + } + + ndev = alloc_candev(sizeof(*priv), 1); + if (!ndev) + return -ENOMEM; + + ndev->irq = irq; + ndev->flags |= IFF_ECHO; /* we support local echo */ + ndev->netdev_ops = &ifi_canfd_netdev_ops; + + priv = netdev_priv(ndev); + priv->ndev = ndev; + priv->base = addr; + + netif_napi_add(ndev, &priv->napi, ifi_canfd_poll, 64); + + priv->can.state = CAN_STATE_STOPPED; + + priv->can.clock.freq = readl(addr + IFI_CANFD_SYSCLOCK); + + priv->can.bittiming_const = &ifi_canfd_bittiming_const; + priv->can.data_bittiming_const = &ifi_canfd_data_bittiming_const; + priv->can.do_set_mode = ifi_canfd_set_mode; + priv->can.do_get_berr_counter = ifi_canfd_get_berr_counter; + + /* IFI CANFD can do both Bosch FD and ISO FD */ + priv->can.ctrlmode = CAN_CTRLMODE_FD; + + /* IFI CANFD can do both Bosch FD and ISO FD */ + priv->can.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK | + CAN_CTRLMODE_LISTENONLY | + CAN_CTRLMODE_FD | + CAN_CTRLMODE_FD_NON_ISO; + + platform_set_drvdata(pdev, ndev); + SET_NETDEV_DEV(ndev, dev); + + ret = register_candev(ndev); + if (ret) { + dev_err(dev, "Failed to register (ret=%d)\n", ret); + goto err_reg; + } + + devm_can_led_init(ndev); + + dev_info(dev, "Driver registered: regs=%p, irq=%d, clock=%d\n", + priv->base, ndev->irq, priv->can.clock.freq); + + return 0; + +err_reg: + free_candev(ndev); + return ret; +} + +static int ifi_canfd_plat_remove(struct platform_device *pdev) +{ + struct net_device *ndev = platform_get_drvdata(pdev); + + unregister_candev(ndev); + platform_set_drvdata(pdev, NULL); + free_candev(ndev); + + return 0; +} + +static const struct of_device_id ifi_canfd_of_table[] = { + { .compatible = "ifi,canfd-1.0", .data = NULL }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, ifi_canfd_of_table); + +static struct platform_driver ifi_canfd_plat_driver = { + .driver = { + .name = KBUILD_MODNAME, + .of_match_table = ifi_canfd_of_table, + }, + .probe = ifi_canfd_plat_probe, + .remove = ifi_canfd_plat_remove, +}; + +module_platform_driver(ifi_canfd_plat_driver); + +MODULE_AUTHOR("Marek Vasut <marex@denx.de>"); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("CAN bus driver for IFI CANFD controller"); diff --git a/drivers/net/can/sja1000/sja1000_platform.c b/drivers/net/can/sja1000/sja1000_platform.c index 0552ed46a206..dc9c6db96c3c 100644 --- a/drivers/net/can/sja1000/sja1000_platform.c +++ b/drivers/net/can/sja1000/sja1000_platform.c @@ -27,6 +27,7 @@ #include <linux/can/platform/sja1000.h> #include <linux/io.h> #include <linux/of.h> +#include <linux/of_device.h> #include <linux/of_irq.h> #include "sja1000.h" @@ -40,6 +41,15 @@ MODULE_DESCRIPTION("Socket-CAN driver for SJA1000 on the platform bus"); MODULE_ALIAS("platform:" DRV_NAME); MODULE_LICENSE("GPL v2"); +struct sja1000_of_data { + size_t priv_sz; + int (*init)(struct sja1000_priv *priv, struct device_node *of); +}; + +struct technologic_priv { + spinlock_t io_lock; +}; + static u8 sp_read_reg8(const struct sja1000_priv *priv, int reg) { return ioread8(priv->reg_base + reg); @@ -70,6 +80,43 @@ static void sp_write_reg32(const struct sja1000_priv *priv, int reg, u8 val) iowrite8(val, priv->reg_base + reg * 4); } +static u8 sp_technologic_read_reg16(const struct sja1000_priv *priv, int reg) +{ + struct technologic_priv *tp = priv->priv; + unsigned long flags; + u8 val; + + spin_lock_irqsave(&tp->io_lock, flags); + iowrite16(reg, priv->reg_base + 0); + val = ioread16(priv->reg_base + 2); + spin_unlock_irqrestore(&tp->io_lock, flags); + + return val; +} + +static void sp_technologic_write_reg16(const struct sja1000_priv *priv, + int reg, u8 val) +{ + struct technologic_priv *tp = priv->priv; + unsigned long flags; + + spin_lock_irqsave(&tp->io_lock, flags); + iowrite16(reg, priv->reg_base + 0); + iowrite16(val, priv->reg_base + 2); + spin_unlock_irqrestore(&tp->io_lock, flags); +} + +static int sp_technologic_init(struct sja1000_priv *priv, struct device_node *of) +{ + struct technologic_priv *tp = priv->priv; + + priv->read_reg = sp_technologic_read_reg16; + priv->write_reg = sp_technologic_write_reg16; + spin_lock_init(&tp->io_lock); + + return 0; +} + static void sp_populate(struct sja1000_priv *priv, struct sja1000_platform_data *pdata, unsigned long resource_mem_flags) @@ -154,6 +201,18 @@ static void sp_populate_of(struct sja1000_priv *priv, struct device_node *of) priv->cdr |= CDR_CBP; /* default */ } +static struct sja1000_of_data technologic_data = { + .priv_sz = sizeof(struct technologic_priv), + .init = sp_technologic_init, +}; + +static const struct of_device_id sp_of_table[] = { + { .compatible = "nxp,sja1000", .data = NULL, }, + { .compatible = "technologic,sja1000", .data = &technologic_data, }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, sp_of_table); + static int sp_probe(struct platform_device *pdev) { int err, irq = 0; @@ -163,6 +222,9 @@ static int sp_probe(struct platform_device *pdev) struct resource *res_mem, *res_irq = NULL; struct sja1000_platform_data *pdata; struct device_node *of = pdev->dev.of_node; + const struct of_device_id *of_id; + const struct sja1000_of_data *of_data = NULL; + size_t priv_sz = 0; pdata = dev_get_platdata(&pdev->dev); if (!pdata && !of) { @@ -191,7 +253,13 @@ static int sp_probe(struct platform_device *pdev) if (!irq && !res_irq) return -ENODEV; - dev = alloc_sja1000dev(0); + of_id = of_match_device(sp_of_table, &pdev->dev); + if (of_id && of_id->data) { + of_data = of_id->data; + priv_sz = of_data->priv_sz; + } + + dev = alloc_sja1000dev(priv_sz); if (!dev) return -ENOMEM; priv = netdev_priv(dev); @@ -208,10 +276,17 @@ static int sp_probe(struct platform_device *pdev) dev->irq = irq; priv->reg_base = addr; - if (of) + if (of) { sp_populate_of(priv, of); - else + + if (of_data && of_data->init) { + err = of_data->init(priv, of); + if (err) + goto exit_free; + } + } else { sp_populate(priv, pdata, res_mem->flags); + } platform_set_drvdata(pdev, dev); SET_NETDEV_DEV(dev, &pdev->dev); @@ -242,12 +317,6 @@ static int sp_remove(struct platform_device *pdev) return 0; } -static const struct of_device_id sp_of_table[] = { - {.compatible = "nxp,sja1000"}, - {}, -}; -MODULE_DEVICE_TABLE(of, sp_of_table); - static struct platform_driver sp_driver = { .probe = sp_probe, .remove = sp_remove, diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index b6fa89102526..bbef95973c27 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -6,7 +6,7 @@ * * License 1: GPLv2 * - * Copyright (c) 2014 Advanced Micro Devices, Inc. + * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. * * This file is free software; you may copy, redistribute and/or modify * it under the terms of the GNU General Public License as published by @@ -56,7 +56,7 @@ * * License 2: Modified BSD * - * Copyright (c) 2014 Advanced Micro Devices, Inc. + * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -768,12 +768,16 @@ #define MTL_Q_TQDR 0x08 #define MTL_Q_RQOMR 0x40 #define MTL_Q_RQMPOCR 0x44 -#define MTL_Q_RQDR 0x4c +#define MTL_Q_RQDR 0x48 #define MTL_Q_RQFCR 0x50 #define MTL_Q_IER 0x70 #define MTL_Q_ISR 0x74 /* MTL queue register entry bit positions and sizes */ +#define MTL_Q_RQDR_PRXQ_INDEX 16 +#define MTL_Q_RQDR_PRXQ_WIDTH 14 +#define MTL_Q_RQDR_RXQSTS_INDEX 4 +#define MTL_Q_RQDR_RXQSTS_WIDTH 2 #define MTL_Q_RQFCR_RFA_INDEX 1 #define MTL_Q_RQFCR_RFA_WIDTH 6 #define MTL_Q_RQFCR_RFD_INDEX 17 diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dcb.c b/drivers/net/ethernet/amd/xgbe/xgbe-dcb.c index a6b9899e285f..895d35639129 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dcb.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dcb.c @@ -6,7 +6,7 @@ * * License 1: GPLv2 * - * Copyright (c) 2014 Advanced Micro Devices, Inc. + * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. * * This file is free software; you may copy, redistribute and/or modify * it under the terms of the GNU General Public License as published by @@ -56,7 +56,7 @@ * * License 2: Modified BSD * - * Copyright (c) 2014 Advanced Micro Devices, Inc. + * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -146,6 +146,7 @@ static int xgbe_dcb_ieee_setets(struct net_device *netdev, { struct xgbe_prv_data *pdata = netdev_priv(netdev); unsigned int i, tc_ets, tc_ets_weight; + u8 max_tc = 0; tc_ets = 0; tc_ets_weight = 0; @@ -157,12 +158,9 @@ static int xgbe_dcb_ieee_setets(struct net_device *netdev, netif_dbg(pdata, drv, netdev, "PRIO%u: TC=%hhu\n", i, ets->prio_tc[i]); - if ((ets->tc_tx_bw[i] || ets->tc_tsa[i]) && - (i >= pdata->hw_feat.tc_cnt)) - return -EINVAL; - - if (ets->prio_tc[i] >= pdata->hw_feat.tc_cnt) - return -EINVAL; + max_tc = max_t(u8, max_tc, ets->prio_tc[i]); + if ((ets->tc_tx_bw[i] || ets->tc_tsa[i])) + max_tc = max_t(u8, max_tc, i); switch (ets->tc_tsa[i]) { case IEEE_8021QAZ_TSA_STRICT: @@ -171,15 +169,28 @@ static int xgbe_dcb_ieee_setets(struct net_device *netdev, tc_ets = 1; tc_ets_weight += ets->tc_tx_bw[i]; break; - default: + netif_err(pdata, drv, netdev, + "unsupported TSA algorithm (%hhu)\n", + ets->tc_tsa[i]); return -EINVAL; } } + /* Check maximum traffic class requested */ + if (max_tc >= pdata->hw_feat.tc_cnt) { + netif_err(pdata, drv, netdev, + "exceeded number of supported traffic classes\n"); + return -EINVAL; + } + /* Weights must add up to 100% */ - if (tc_ets && (tc_ets_weight != 100)) + if (tc_ets && (tc_ets_weight != 100)) { + netif_err(pdata, drv, netdev, + "sum of ETS algorithm weights is not 100 (%u)\n", + tc_ets_weight); return -EINVAL; + } if (!pdata->ets) { pdata->ets = devm_kzalloc(pdata->dev, sizeof(*pdata->ets), @@ -188,6 +199,7 @@ static int xgbe_dcb_ieee_setets(struct net_device *netdev, return -ENOMEM; } + pdata->num_tcs = max_tc + 1; memcpy(pdata->ets, ets, sizeof(*pdata->ets)); pdata->hw_if.config_dcb_tc(pdata); @@ -221,6 +233,13 @@ static int xgbe_dcb_ieee_setpfc(struct net_device *netdev, "cap=%hhu, en=%#hhx, mbc=%hhu, delay=%hhu\n", pfc->pfc_cap, pfc->pfc_en, pfc->mbc, pfc->delay); + /* Check PFC for supported number of traffic classes */ + if (pfc->pfc_en & ~((1 << pdata->hw_feat.tc_cnt) - 1)) { + netif_err(pdata, drv, netdev, + "PFC requested for unsupported traffic class\n"); + return -EINVAL; + } + if (!pdata->pfc) { pdata->pfc = devm_kzalloc(pdata->dev, sizeof(*pdata->pfc), GFP_KERNEL); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index f6a7161e3b85..1babcc11a248 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -6,7 +6,7 @@ * * License 1: GPLv2 * - * Copyright (c) 2014 Advanced Micro Devices, Inc. + * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. * * This file is free software; you may copy, redistribute and/or modify * it under the terms of the GNU General Public License as published by @@ -56,7 +56,7 @@ * * License 2: Modified BSD * - * Copyright (c) 2014 Advanced Micro Devices, Inc. + * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -518,13 +518,45 @@ static int xgbe_disable_tx_flow_control(struct xgbe_prv_data *pdata) static int xgbe_enable_tx_flow_control(struct xgbe_prv_data *pdata) { + struct ieee_pfc *pfc = pdata->pfc; + struct ieee_ets *ets = pdata->ets; unsigned int max_q_count, q_count; unsigned int reg, reg_val; unsigned int i; /* Set MTL flow control */ - for (i = 0; i < pdata->rx_q_count; i++) - XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, EHFC, 1); + for (i = 0; i < pdata->rx_q_count; i++) { + unsigned int ehfc = 0; + + if (pfc && ets) { + unsigned int prio; + + for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) { + unsigned int tc; + + /* Does this queue handle the priority? */ + if (pdata->prio2q_map[prio] != i) + continue; + + /* Get the Traffic Class for this priority */ + tc = ets->prio_tc[prio]; + + /* Check if flow control should be enabled */ + if (pfc->pfc_en & (1 << tc)) { + ehfc = 1; + break; + } + } + } else { + ehfc = 1; + } + + XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, EHFC, ehfc); + + netif_dbg(pdata, drv, pdata->netdev, + "flow control %s for RXq%u\n", + ehfc ? "enabled" : "disabled", i); + } /* Set MAC flow control */ max_q_count = XGMAC_MAX_FLOW_CONTROL_QUEUES; @@ -702,6 +734,113 @@ static int xgbe_set_xgmii_speed(struct xgbe_prv_data *pdata) return 0; } +static int xgbe_enable_rx_vlan_stripping(struct xgbe_prv_data *pdata) +{ + /* Put the VLAN tag in the Rx descriptor */ + XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, EVLRXS, 1); + + /* Don't check the VLAN type */ + XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, DOVLTC, 1); + + /* Check only C-TAG (0x8100) packets */ + XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, ERSVLM, 0); + + /* Don't consider an S-TAG (0x88A8) packet as a VLAN packet */ + XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, ESVL, 0); + + /* Enable VLAN tag stripping */ + XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, EVLS, 0x3); + + return 0; +} + +static int xgbe_disable_rx_vlan_stripping(struct xgbe_prv_data *pdata) +{ + XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, EVLS, 0); + + return 0; +} + +static int xgbe_enable_rx_vlan_filtering(struct xgbe_prv_data *pdata) +{ + /* Enable VLAN filtering */ + XGMAC_IOWRITE_BITS(pdata, MAC_PFR, VTFE, 1); + + /* Enable VLAN Hash Table filtering */ + XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, VTHM, 1); + + /* Disable VLAN tag inverse matching */ + XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, VTIM, 0); + + /* Only filter on the lower 12-bits of the VLAN tag */ + XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, ETV, 1); + + /* In order for the VLAN Hash Table filtering to be effective, + * the VLAN tag identifier in the VLAN Tag Register must not + * be zero. Set the VLAN tag identifier to "1" to enable the + * VLAN Hash Table filtering. This implies that a VLAN tag of + * 1 will always pass filtering. + */ + XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, VL, 1); + + return 0; +} + +static int xgbe_disable_rx_vlan_filtering(struct xgbe_prv_data *pdata) +{ + /* Disable VLAN filtering */ + XGMAC_IOWRITE_BITS(pdata, MAC_PFR, VTFE, 0); + + return 0; +} + +static u32 xgbe_vid_crc32_le(__le16 vid_le) +{ + u32 poly = 0xedb88320; /* CRCPOLY_LE */ + u32 crc = ~0; + u32 temp = 0; + unsigned char *data = (unsigned char *)&vid_le; + unsigned char data_byte = 0; + int i, bits; + + bits = get_bitmask_order(VLAN_VID_MASK); + for (i = 0; i < bits; i++) { + if ((i % 8) == 0) + data_byte = data[i / 8]; + + temp = ((crc & 1) ^ data_byte) & 1; + crc >>= 1; + data_byte >>= 1; + + if (temp) + crc ^= poly; + } + + return crc; +} + +static int xgbe_update_vlan_hash_table(struct xgbe_prv_data *pdata) +{ + u32 crc; + u16 vid; + __le16 vid_le; + u16 vlan_hash_table = 0; + + /* Generate the VLAN Hash Table value */ + for_each_set_bit(vid, pdata->active_vlans, VLAN_N_VID) { + /* Get the CRC32 value of the VLAN ID */ + vid_le = cpu_to_le16(vid); + crc = bitrev32(~xgbe_vid_crc32_le(vid_le)) >> 28; + + vlan_hash_table |= (1 << crc); + } + + /* Set the VLAN Hash Table filtering register */ + XGMAC_IOWRITE_BITS(pdata, MAC_VLANHTR, VLHT, vlan_hash_table); + + return 0; +} + static int xgbe_set_promiscuous_mode(struct xgbe_prv_data *pdata, unsigned int enable) { @@ -714,6 +853,14 @@ static int xgbe_set_promiscuous_mode(struct xgbe_prv_data *pdata, enable ? "entering" : "leaving"); XGMAC_IOWRITE_BITS(pdata, MAC_PFR, PR, val); + /* Hardware will still perform VLAN filtering in promiscuous mode */ + if (enable) { + xgbe_disable_rx_vlan_filtering(pdata); + } else { + if (pdata->netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER) + xgbe_enable_rx_vlan_filtering(pdata); + } + return 0; } @@ -875,6 +1022,7 @@ static int xgbe_config_rx_mode(struct xgbe_prv_data *pdata) static int xgbe_read_mmd_regs(struct xgbe_prv_data *pdata, int prtad, int mmd_reg) { + unsigned long flags; unsigned int mmd_address; int mmd_data; @@ -892,10 +1040,10 @@ static int xgbe_read_mmd_regs(struct xgbe_prv_data *pdata, int prtad, * register offsets must therefore be adjusted by left shifting the * offset 2 bits and reading 32 bits of data. */ - mutex_lock(&pdata->xpcs_mutex); + spin_lock_irqsave(&pdata->xpcs_lock, flags); XPCS_IOWRITE(pdata, PCS_MMD_SELECT << 2, mmd_address >> 8); mmd_data = XPCS_IOREAD(pdata, (mmd_address & 0xff) << 2); - mutex_unlock(&pdata->xpcs_mutex); + spin_unlock_irqrestore(&pdata->xpcs_lock, flags); return mmd_data; } @@ -904,6 +1052,7 @@ static void xgbe_write_mmd_regs(struct xgbe_prv_data *pdata, int prtad, int mmd_reg, int mmd_data) { unsigned int mmd_address; + unsigned long flags; if (mmd_reg & MII_ADDR_C45) mmd_address = mmd_reg & ~MII_ADDR_C45; @@ -919,10 +1068,10 @@ static void xgbe_write_mmd_regs(struct xgbe_prv_data *pdata, int prtad, * register offsets must therefore be adjusted by left shifting the * offset 2 bits and reading 32 bits of data. */ - mutex_lock(&pdata->xpcs_mutex); + spin_lock_irqsave(&pdata->xpcs_lock, flags); XPCS_IOWRITE(pdata, PCS_MMD_SELECT << 2, mmd_address >> 8); XPCS_IOWRITE(pdata, (mmd_address & 0xff) << 2, mmd_data); - mutex_unlock(&pdata->xpcs_mutex); + spin_unlock_irqrestore(&pdata->xpcs_lock, flags); } static int xgbe_tx_complete(struct xgbe_ring_desc *rdesc) @@ -944,116 +1093,6 @@ static int xgbe_enable_rx_csum(struct xgbe_prv_data *pdata) return 0; } -static int xgbe_enable_rx_vlan_stripping(struct xgbe_prv_data *pdata) -{ - /* Put the VLAN tag in the Rx descriptor */ - XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, EVLRXS, 1); - - /* Don't check the VLAN type */ - XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, DOVLTC, 1); - - /* Check only C-TAG (0x8100) packets */ - XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, ERSVLM, 0); - - /* Don't consider an S-TAG (0x88A8) packet as a VLAN packet */ - XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, ESVL, 0); - - /* Enable VLAN tag stripping */ - XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, EVLS, 0x3); - - return 0; -} - -static int xgbe_disable_rx_vlan_stripping(struct xgbe_prv_data *pdata) -{ - XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, EVLS, 0); - - return 0; -} - -static int xgbe_enable_rx_vlan_filtering(struct xgbe_prv_data *pdata) -{ - /* Enable VLAN filtering */ - XGMAC_IOWRITE_BITS(pdata, MAC_PFR, VTFE, 1); - - /* Enable VLAN Hash Table filtering */ - XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, VTHM, 1); - - /* Disable VLAN tag inverse matching */ - XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, VTIM, 0); - - /* Only filter on the lower 12-bits of the VLAN tag */ - XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, ETV, 1); - - /* In order for the VLAN Hash Table filtering to be effective, - * the VLAN tag identifier in the VLAN Tag Register must not - * be zero. Set the VLAN tag identifier to "1" to enable the - * VLAN Hash Table filtering. This implies that a VLAN tag of - * 1 will always pass filtering. - */ - XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, VL, 1); - - return 0; -} - -static int xgbe_disable_rx_vlan_filtering(struct xgbe_prv_data *pdata) -{ - /* Disable VLAN filtering */ - XGMAC_IOWRITE_BITS(pdata, MAC_PFR, VTFE, 0); - - return 0; -} - -#ifndef CRCPOLY_LE -#define CRCPOLY_LE 0xedb88320 -#endif -static u32 xgbe_vid_crc32_le(__le16 vid_le) -{ - u32 poly = CRCPOLY_LE; - u32 crc = ~0; - u32 temp = 0; - unsigned char *data = (unsigned char *)&vid_le; - unsigned char data_byte = 0; - int i, bits; - - bits = get_bitmask_order(VLAN_VID_MASK); - for (i = 0; i < bits; i++) { - if ((i % 8) == 0) - data_byte = data[i / 8]; - - temp = ((crc & 1) ^ data_byte) & 1; - crc >>= 1; - data_byte >>= 1; - - if (temp) - crc ^= poly; - } - - return crc; -} - -static int xgbe_update_vlan_hash_table(struct xgbe_prv_data *pdata) -{ - u32 crc; - u16 vid; - __le16 vid_le; - u16 vlan_hash_table = 0; - - /* Generate the VLAN Hash Table value */ - for_each_set_bit(vid, pdata->active_vlans, VLAN_N_VID) { - /* Get the CRC32 value of the VLAN ID */ - vid_le = cpu_to_le16(vid); - crc = bitrev32(~xgbe_vid_crc32_le(vid_le)) >> 28; - - vlan_hash_table |= (1 << crc); - } - - /* Set the VLAN Hash Table filtering register */ - XGMAC_IOWRITE_BITS(pdata, MAC_VLANHTR, VLHT, vlan_hash_table); - - return 0; -} - static void xgbe_tx_desc_reset(struct xgbe_ring_data *rdata) { struct xgbe_ring_desc *rdesc = rdata->rdesc; @@ -1288,11 +1327,42 @@ static int xgbe_config_tstamp(struct xgbe_prv_data *pdata, return 0; } +static void xgbe_config_tc(struct xgbe_prv_data *pdata) +{ + unsigned int offset, queue, prio; + u8 i; + + netdev_reset_tc(pdata->netdev); + if (!pdata->num_tcs) + return; + + netdev_set_num_tc(pdata->netdev, pdata->num_tcs); + + for (i = 0, queue = 0, offset = 0; i < pdata->num_tcs; i++) { + while ((queue < pdata->tx_q_count) && + (pdata->q2tc_map[queue] == i)) + queue++; + + netif_dbg(pdata, drv, pdata->netdev, "TC%u using TXq%u-%u\n", + i, offset, queue - 1); + netdev_set_tc_queue(pdata->netdev, i, queue - offset, offset); + offset = queue; + } + + if (!pdata->ets) + return; + + for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) + netdev_set_prio_tc_map(pdata->netdev, prio, + pdata->ets->prio_tc[prio]); +} + static void xgbe_config_dcb_tc(struct xgbe_prv_data *pdata) { struct ieee_ets *ets = pdata->ets; unsigned int total_weight, min_weight, weight; - unsigned int i; + unsigned int mask, reg, reg_val; + unsigned int i, prio; if (!ets) return; @@ -1309,6 +1379,25 @@ static void xgbe_config_dcb_tc(struct xgbe_prv_data *pdata) min_weight = 1; for (i = 0; i < pdata->hw_feat.tc_cnt; i++) { + /* Map the priorities to the traffic class */ + mask = 0; + for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) { + if (ets->prio_tc[prio] == i) + mask |= (1 << prio); + } + mask &= 0xff; + + netif_dbg(pdata, drv, pdata->netdev, "TC%u PRIO mask=%#x\n", + i, mask); + reg = MTL_TCPM0R + (MTL_TCPM_INC * (i / MTL_TCPM_TC_PER_REG)); + reg_val = XGMAC_IOREAD(pdata, reg); + + reg_val &= ~(0xff << ((i % MTL_TCPM_TC_PER_REG) << 3)); + reg_val |= (mask << ((i % MTL_TCPM_TC_PER_REG) << 3)); + + XGMAC_IOWRITE(pdata, reg, reg_val); + + /* Set the traffic class algorithm */ switch (ets->tc_tsa[i]) { case IEEE_8021QAZ_TSA_STRICT: netif_dbg(pdata, drv, pdata->netdev, @@ -1329,38 +1418,12 @@ static void xgbe_config_dcb_tc(struct xgbe_prv_data *pdata) break; } } + + xgbe_config_tc(pdata); } static void xgbe_config_dcb_pfc(struct xgbe_prv_data *pdata) { - struct ieee_pfc *pfc = pdata->pfc; - struct ieee_ets *ets = pdata->ets; - unsigned int mask, reg, reg_val; - unsigned int tc, prio; - - if (!pfc || !ets) - return; - - for (tc = 0; tc < pdata->hw_feat.tc_cnt; tc++) { - mask = 0; - for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) { - if ((pfc->pfc_en & (1 << prio)) && - (ets->prio_tc[prio] == tc)) - mask |= (1 << prio); - } - mask &= 0xff; - - netif_dbg(pdata, drv, pdata->netdev, "TC%u PFC mask=%#x\n", - tc, mask); - reg = MTL_TCPM0R + (MTL_TCPM_INC * (tc / MTL_TCPM_TC_PER_REG)); - reg_val = XGMAC_IOREAD(pdata, reg); - - reg_val &= ~(0xff << ((tc % MTL_TCPM_TC_PER_REG) << 3)); - reg_val |= (mask << ((tc % MTL_TCPM_TC_PER_REG) << 3)); - - XGMAC_IOWRITE(pdata, reg, reg_val); - } - xgbe_config_flow_control(pdata); } @@ -2595,6 +2658,32 @@ static void xgbe_disable_tx(struct xgbe_prv_data *pdata) } } +static void xgbe_prepare_rx_stop(struct xgbe_prv_data *pdata, + unsigned int queue) +{ + unsigned int rx_status; + unsigned long rx_timeout; + + /* The Rx engine cannot be stopped if it is actively processing + * packets. Wait for the Rx queue to empty the Rx fifo. Don't + * wait forever though... + */ + rx_timeout = jiffies + (XGBE_DMA_STOP_TIMEOUT * HZ); + while (time_before(jiffies, rx_timeout)) { + rx_status = XGMAC_MTL_IOREAD(pdata, queue, MTL_Q_RQDR); + if ((XGMAC_GET_BITS(rx_status, MTL_Q_RQDR, PRXQ) == 0) && + (XGMAC_GET_BITS(rx_status, MTL_Q_RQDR, RXQSTS) == 0)) + break; + + usleep_range(500, 1000); + } + + if (!time_before(jiffies, rx_timeout)) + netdev_info(pdata->netdev, + "timed out waiting for Rx queue %u to empty\n", + queue); +} + static void xgbe_enable_rx(struct xgbe_prv_data *pdata) { struct xgbe_channel *channel; @@ -2633,6 +2722,10 @@ static void xgbe_disable_rx(struct xgbe_prv_data *pdata) XGMAC_IOWRITE_BITS(pdata, MAC_RCR, ACS, 0); XGMAC_IOWRITE_BITS(pdata, MAC_RCR, RE, 0); + /* Prepare for Rx DMA channel stop */ + for (i = 0; i < pdata->rx_q_count; i++) + xgbe_prepare_rx_stop(pdata, i); + /* Disable each Rx queue */ XGMAC_IOWRITE(pdata, MAC_RQC0R, 0); @@ -2881,6 +2974,7 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if) hw_if->get_tx_tstamp = xgbe_get_tx_tstamp; /* For Data Center Bridging config */ + hw_if->config_tc = xgbe_config_tc; hw_if->config_dcb_tc = xgbe_config_dcb_tc; hw_if->config_dcb_pfc = xgbe_config_dcb_pfc; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 8a9b493566c9..33606840ae15 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -6,7 +6,7 @@ * * License 1: GPLv2 * - * Copyright (c) 2014 Advanced Micro Devices, Inc. + * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. * * This file is free software; you may copy, redistribute and/or modify * it under the terms of the GNU General Public License as published by @@ -56,7 +56,7 @@ * * License 2: Modified BSD * - * Copyright (c) 2014 Advanced Micro Devices, Inc. + * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -356,7 +356,7 @@ static irqreturn_t xgbe_isr(int irq, void *data) xgbe_disable_rx_tx_ints(pdata); /* Turn on polling */ - __napi_schedule(&pdata->napi); + __napi_schedule_irqoff(&pdata->napi); } } @@ -409,7 +409,7 @@ static irqreturn_t xgbe_dma_isr(int irq, void *data) disable_irq_nosync(channel->dma_irq); /* Turn on polling */ - __napi_schedule(&channel->napi); + __napi_schedule_irqoff(&channel->napi); } return IRQ_HANDLED; @@ -1626,30 +1626,22 @@ static void xgbe_poll_controller(struct net_device *netdev) } #endif /* End CONFIG_NET_POLL_CONTROLLER */ -static int xgbe_setup_tc(struct net_device *netdev, u8 tc) +static int xgbe_setup_tc(struct net_device *netdev, u32 handle, __be16 proto, + struct tc_to_netdev *tc_to_netdev) { struct xgbe_prv_data *pdata = netdev_priv(netdev); - unsigned int offset, queue; - u8 i; + u8 tc; - if (tc && (tc != pdata->hw_feat.tc_cnt)) + if (handle != TC_H_ROOT || tc_to_netdev->type != TC_SETUP_MQPRIO) return -EINVAL; - if (tc) { - netdev_set_num_tc(netdev, tc); - for (i = 0, queue = 0, offset = 0; i < tc; i++) { - while ((queue < pdata->tx_q_count) && - (pdata->q2tc_map[queue] == i)) - queue++; - - netif_dbg(pdata, drv, netdev, "TC%u using TXq%u-%u\n", - i, offset, queue - 1); - netdev_set_tc_queue(netdev, i, queue - offset, offset); - offset = queue; - } - } else { - netdev_reset_tc(netdev); - } + tc = tc_to_netdev->tc; + + if (tc > pdata->hw_feat.tc_cnt) + return -EINVAL; + + pdata->num_tcs = tc; + pdata->hw_if.config_tc(pdata); return 0; } @@ -2062,7 +2054,7 @@ static int xgbe_one_poll(struct napi_struct *napi, int budget) /* If we processed everything, we are done */ if (processed < budget) { /* Turn off polling */ - napi_complete(napi); + napi_complete_done(napi, processed); /* Enable Tx and Rx interrupts */ enable_irq(channel->dma_irq); @@ -2104,7 +2096,7 @@ static int xgbe_all_poll(struct napi_struct *napi, int budget) /* If we processed everything, we are done */ if (processed < budget) { /* Turn off polling */ - napi_complete(napi); + napi_complete_done(napi, processed); /* Enable Tx and Rx interrupts */ xgbe_enable_rx_tx_ints(pdata); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c index 6040293db9c1..11d9f0c5b78b 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c @@ -6,7 +6,7 @@ * * License 1: GPLv2 * - * Copyright (c) 2014 Advanced Micro Devices, Inc. + * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. * * This file is free software; you may copy, redistribute and/or modify * it under the terms of the GNU General Public License as published by @@ -56,7 +56,7 @@ * * License 2: Modified BSD * - * Copyright (c) 2014 Advanced Micro Devices, Inc. + * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -318,8 +318,20 @@ static int xgbe_set_settings(struct net_device *netdev, if (cmd->autoneg == AUTONEG_DISABLE) { switch (speed) { case SPEED_10000: + break; case SPEED_2500: + if (pdata->speed_set != XGBE_SPEEDSET_2500_10000) { + netdev_err(netdev, "unsupported speed %u\n", + speed); + return -EINVAL; + } + break; case SPEED_1000: + if (pdata->speed_set != XGBE_SPEEDSET_1000_10000) { + netdev_err(netdev, "unsupported speed %u\n", + speed); + return -EINVAL; + } break; default: netdev_err(netdev, "unsupported speed %u\n", speed); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c index 618d952c2984..3eee3201b58f 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c @@ -6,7 +6,7 @@ * * License 1: GPLv2 * - * Copyright (c) 2014 Advanced Micro Devices, Inc. + * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. * * This file is free software; you may copy, redistribute and/or modify * it under the terms of the GNU General Public License as published by @@ -56,7 +56,7 @@ * * License 2: Modified BSD * - * Copyright (c) 2014 Advanced Micro Devices, Inc. + * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -363,7 +363,7 @@ static int xgbe_probe(struct platform_device *pdev) platform_set_drvdata(pdev, netdev); spin_lock_init(&pdata->lock); - mutex_init(&pdata->xpcs_mutex); + spin_lock_init(&pdata->xpcs_lock); mutex_init(&pdata->rss_mutex); spin_lock_init(&pdata->tstamp_lock); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 446058081866..84c5d296d13e 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -6,7 +6,7 @@ * * License 1: GPLv2 * - * Copyright (c) 2014 Advanced Micro Devices, Inc. + * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. * * This file is free software; you may copy, redistribute and/or modify * it under the terms of the GNU General Public License as published by @@ -56,7 +56,7 @@ * * License 2: Modified BSD * - * Copyright (c) 2014 Advanced Micro Devices, Inc. + * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -626,10 +626,22 @@ static irqreturn_t xgbe_an_isr(int irq, void *data) netif_dbg(pdata, intr, pdata->netdev, "AN interrupt received\n"); - /* Interrupt reason must be read and cleared outside of IRQ context */ - disable_irq_nosync(pdata->an_irq); + /* Disable AN interrupts */ + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, 0); + + /* Save the interrupt(s) that fired */ + pdata->an_int = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_INT); - queue_work(pdata->an_workqueue, &pdata->an_irq_work); + if (pdata->an_int) { + /* Clear the interrupt(s) that fired and process them */ + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, ~pdata->an_int); + + queue_work(pdata->an_workqueue, &pdata->an_irq_work); + } else { + /* Enable AN interrupts */ + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, + XGBE_AN_INT_MASK); + } return IRQ_HANDLED; } @@ -673,34 +685,26 @@ static void xgbe_an_state_machine(struct work_struct *work) struct xgbe_prv_data, an_work); enum xgbe_an cur_state = pdata->an_state; - unsigned int int_reg, int_mask; mutex_lock(&pdata->an_mutex); - /* Read the interrupt */ - int_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_INT); - if (!int_reg) + if (!pdata->an_int) goto out; next_int: - if (int_reg & XGBE_AN_PG_RCV) { + if (pdata->an_int & XGBE_AN_PG_RCV) { pdata->an_state = XGBE_AN_PAGE_RECEIVED; - int_mask = XGBE_AN_PG_RCV; - } else if (int_reg & XGBE_AN_INC_LINK) { + pdata->an_int &= ~XGBE_AN_PG_RCV; + } else if (pdata->an_int & XGBE_AN_INC_LINK) { pdata->an_state = XGBE_AN_INCOMPAT_LINK; - int_mask = XGBE_AN_INC_LINK; - } else if (int_reg & XGBE_AN_INT_CMPLT) { + pdata->an_int &= ~XGBE_AN_INC_LINK; + } else if (pdata->an_int & XGBE_AN_INT_CMPLT) { pdata->an_state = XGBE_AN_COMPLETE; - int_mask = XGBE_AN_INT_CMPLT; + pdata->an_int &= ~XGBE_AN_INT_CMPLT; } else { pdata->an_state = XGBE_AN_ERROR; - int_mask = 0; } - /* Clear the interrupt to be processed */ - int_reg &= ~int_mask; - XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, int_reg); - pdata->an_result = pdata->an_state; again: @@ -740,14 +744,14 @@ again: } if (pdata->an_state == XGBE_AN_NO_LINK) { - int_reg = 0; + pdata->an_int = 0; XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0); } else if (pdata->an_state == XGBE_AN_ERROR) { netdev_err(pdata->netdev, "error during auto-negotiation, state=%u\n", cur_state); - int_reg = 0; + pdata->an_int = 0; XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0); } @@ -765,11 +769,12 @@ again: if (cur_state != pdata->an_state) goto again; - if (int_reg) + if (pdata->an_int) goto next_int; out: - enable_irq(pdata->an_irq); + /* Enable AN interrupts on the way out */ + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, XGBE_AN_INT_MASK); mutex_unlock(&pdata->an_mutex); } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index e234b9970318..98d9d63c4353 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -6,7 +6,7 @@ * * License 1: GPLv2 * - * Copyright (c) 2014 Advanced Micro Devices, Inc. + * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. * * This file is free software; you may copy, redistribute and/or modify * it under the terms of the GNU General Public License as published by @@ -56,7 +56,7 @@ * * License 2: Modified BSD * - * Copyright (c) 2014 Advanced Micro Devices, Inc. + * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -673,6 +673,7 @@ struct xgbe_hw_if { u64 (*get_tx_tstamp)(struct xgbe_prv_data *); /* For Data Center Bridging config */ + void (*config_tc)(struct xgbe_prv_data *); void (*config_dcb_tc)(struct xgbe_prv_data *); void (*config_dcb_pfc)(struct xgbe_prv_data *); @@ -773,8 +774,8 @@ struct xgbe_prv_data { /* Overall device lock */ spinlock_t lock; - /* XPCS indirect addressing mutex */ - struct mutex xpcs_mutex; + /* XPCS indirect addressing lock */ + spinlock_t xpcs_lock; /* RSS addressing mutex */ struct mutex rss_mutex; @@ -880,6 +881,7 @@ struct xgbe_prv_data { struct ieee_pfc *pfc; unsigned int q2tc_map[XGBE_MAX_QUEUES]; unsigned int prio2q_map[IEEE_8021QAZ_MAX_TCS]; + u8 num_tcs; /* Hardware features of the device */ struct xgbe_hw_features hw_feat; @@ -925,6 +927,7 @@ struct xgbe_prv_data { u32 serdes_dfe_tap_ena[XGBE_SPEEDS]; /* Auto-negotiation state machine support */ + unsigned int an_int; struct mutex an_mutex; enum xgbe_an an_result; enum xgbe_an an_state; diff --git a/drivers/net/ethernet/apm/xgene/Makefile b/drivers/net/ethernet/apm/xgene/Makefile index 700b5abe5de5..f46321f68315 100644 --- a/drivers/net/ethernet/apm/xgene/Makefile +++ b/drivers/net/ethernet/apm/xgene/Makefile @@ -3,5 +3,6 @@ # xgene-enet-objs := xgene_enet_hw.o xgene_enet_sgmac.o xgene_enet_xgmac.o \ - xgene_enet_main.o xgene_enet_ring2.o xgene_enet_ethtool.o + xgene_enet_main.o xgene_enet_ring2.o xgene_enet_ethtool.o \ + xgene_enet_cle.o obj-$(CONFIG_NET_XGENE) += xgene-enet.o diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c new file mode 100644 index 000000000000..b212488606da --- /dev/null +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c @@ -0,0 +1,734 @@ +/* Applied Micro X-Gene SoC Ethernet Classifier structures + * + * Copyright (c) 2016, Applied Micro Circuits Corporation + * Authors: Khuong Dinh <kdinh@apm.com> + * Tanmay Inamdar <tinamdar@apm.com> + * Iyappan Subramanian <isubramanian@apm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "xgene_enet_main.h" + +/* interfaces to convert structures to HW recognized bit formats */ +static void xgene_cle_sband_to_hw(u8 frag, enum xgene_cle_prot_version ver, + enum xgene_cle_prot_type type, u32 len, + u32 *reg) +{ + *reg = SET_VAL(SB_IPFRAG, frag) | + SET_VAL(SB_IPPROT, type) | + SET_VAL(SB_IPVER, ver) | + SET_VAL(SB_HDRLEN, len); +} + +static void xgene_cle_idt_to_hw(u32 dstqid, u32 fpsel, + u32 nfpsel, u32 *idt_reg) +{ + *idt_reg = SET_VAL(IDT_DSTQID, dstqid) | + SET_VAL(IDT_FPSEL, fpsel) | + SET_VAL(IDT_NFPSEL, nfpsel); +} + +static void xgene_cle_dbptr_to_hw(struct xgene_enet_pdata *pdata, + struct xgene_cle_dbptr *dbptr, u32 *buf) +{ + buf[4] = SET_VAL(CLE_FPSEL, dbptr->fpsel) | + SET_VAL(CLE_DSTQIDL, dbptr->dstqid); + + buf[5] = SET_VAL(CLE_DSTQIDH, (u32)dbptr->dstqid >> CLE_DSTQIDL_LEN) | + SET_VAL(CLE_PRIORITY, dbptr->cle_priority); +} + +static void xgene_cle_kn_to_hw(struct xgene_cle_ptree_kn *kn, u32 *buf) +{ + u32 i, j = 0; + u32 data; + + buf[j++] = SET_VAL(CLE_TYPE, kn->node_type); + for (i = 0; i < kn->num_keys; i++) { + struct xgene_cle_ptree_key *key = &kn->key[i]; + + if (!(i % 2)) { + buf[j] = SET_VAL(CLE_KN_PRIO, key->priority) | + SET_VAL(CLE_KN_RPTR, key->result_pointer); + } else { + data = SET_VAL(CLE_KN_PRIO, key->priority) | + SET_VAL(CLE_KN_RPTR, key->result_pointer); + buf[j++] |= (data << 16); + } + } +} + +static void xgene_cle_dn_to_hw(struct xgene_cle_ptree_ewdn *dn, + u32 *buf, u32 jb) +{ + struct xgene_cle_ptree_branch *br; + u32 i, j = 0; + u32 npp; + + buf[j++] = SET_VAL(CLE_DN_TYPE, dn->node_type) | + SET_VAL(CLE_DN_LASTN, dn->last_node) | + SET_VAL(CLE_DN_HLS, dn->hdr_len_store) | + SET_VAL(CLE_DN_EXT, dn->hdr_extn) | + SET_VAL(CLE_DN_BSTOR, dn->byte_store) | + SET_VAL(CLE_DN_SBSTOR, dn->search_byte_store) | + SET_VAL(CLE_DN_RPTR, dn->result_pointer); + + for (i = 0; i < dn->num_branches; i++) { + br = &dn->branch[i]; + npp = br->next_packet_pointer; + + if ((br->jump_rel == JMP_ABS) && (npp < CLE_PKTRAM_SIZE)) + npp += jb; + + buf[j++] = SET_VAL(CLE_BR_VALID, br->valid) | + SET_VAL(CLE_BR_NPPTR, npp) | + SET_VAL(CLE_BR_JB, br->jump_bw) | + SET_VAL(CLE_BR_JR, br->jump_rel) | + SET_VAL(CLE_BR_OP, br->operation) | + SET_VAL(CLE_BR_NNODE, br->next_node) | + SET_VAL(CLE_BR_NBR, br->next_branch); + + buf[j++] = SET_VAL(CLE_BR_DATA, br->data) | + SET_VAL(CLE_BR_MASK, br->mask); + } +} + +static int xgene_cle_poll_cmd_done(void __iomem *base, + enum xgene_cle_cmd_type cmd) +{ + u32 status, loop = 10; + int ret = -EBUSY; + + while (loop--) { + status = ioread32(base + INDCMD_STATUS); + if (status & cmd) { + ret = 0; + break; + } + usleep_range(1000, 2000); + } + + return ret; +} + +static int xgene_cle_dram_wr(struct xgene_enet_cle *cle, u32 *data, u8 nregs, + u32 index, enum xgene_cle_dram_type type, + enum xgene_cle_cmd_type cmd) +{ + enum xgene_cle_parser parser = cle->active_parser; + void __iomem *base = cle->base; + u32 i, j, ind_addr; + u8 port, nparsers; + int ret = 0; + + /* PTREE_RAM onwards, DRAM regions are common for all parsers */ + nparsers = (type >= PTREE_RAM) ? 1 : cle->parsers; + + for (i = 0; i < nparsers; i++) { + port = i; + if ((type < PTREE_RAM) && (parser != PARSER_ALL)) + port = parser; + + ind_addr = XGENE_CLE_DRAM(type + (port * 4)) | index; + iowrite32(ind_addr, base + INDADDR); + for (j = 0; j < nregs; j++) + iowrite32(data[j], base + DATA_RAM0 + (j * 4)); + iowrite32(cmd, base + INDCMD); + + ret = xgene_cle_poll_cmd_done(base, cmd); + if (ret) + break; + } + + return ret; +} + +static void xgene_cle_enable_ptree(struct xgene_enet_pdata *pdata, + struct xgene_enet_cle *cle) +{ + struct xgene_cle_ptree *ptree = &cle->ptree; + void __iomem *addr, *base = cle->base; + u32 offset = CLE_PORT_OFFSET; + u32 i; + + /* 1G port has to advance 4 bytes and 10G has to advance 8 bytes */ + ptree->start_pkt += cle->jump_bytes; + for (i = 0; i < cle->parsers; i++) { + if (cle->active_parser != PARSER_ALL) + addr = base + cle->active_parser * offset; + else + addr = base + (i * offset); + + iowrite32(ptree->start_node & 0x3fff, addr + SNPTR0); + iowrite32(ptree->start_pkt & 0x1ff, addr + SPPTR0); + } +} + +static int xgene_cle_setup_dbptr(struct xgene_enet_pdata *pdata, + struct xgene_enet_cle *cle) +{ + struct xgene_cle_ptree *ptree = &cle->ptree; + u32 buf[CLE_DRAM_REGS]; + u32 i; + int ret; + + memset(buf, 0, sizeof(buf)); + for (i = 0; i < ptree->num_dbptr; i++) { + xgene_cle_dbptr_to_hw(pdata, &ptree->dbptr[i], buf); + ret = xgene_cle_dram_wr(cle, buf, 6, i + ptree->start_dbptr, + DB_RAM, CLE_CMD_WR); + if (ret) + return ret; + } + + return 0; +} + +static int xgene_cle_setup_node(struct xgene_enet_pdata *pdata, + struct xgene_enet_cle *cle) +{ + struct xgene_cle_ptree *ptree = &cle->ptree; + struct xgene_cle_ptree_ewdn *dn = ptree->dn; + struct xgene_cle_ptree_kn *kn = ptree->kn; + u32 buf[CLE_DRAM_REGS]; + int i, j, ret; + + memset(buf, 0, sizeof(buf)); + for (i = 0; i < ptree->num_dn; i++) { + xgene_cle_dn_to_hw(&dn[i], buf, cle->jump_bytes); + ret = xgene_cle_dram_wr(cle, buf, 17, i + ptree->start_node, + PTREE_RAM, CLE_CMD_WR); + if (ret) + return ret; + } + + /* continue node index for key node */ + memset(buf, 0, sizeof(buf)); + for (j = i; j < (ptree->num_kn + ptree->num_dn); j++) { + xgene_cle_kn_to_hw(&kn[j - ptree->num_dn], buf); + ret = xgene_cle_dram_wr(cle, buf, 17, j + ptree->start_node, + PTREE_RAM, CLE_CMD_WR); + if (ret) + return ret; + } + + return 0; +} + +static int xgene_cle_setup_ptree(struct xgene_enet_pdata *pdata, + struct xgene_enet_cle *cle) +{ + int ret; + + ret = xgene_cle_setup_node(pdata, cle); + if (ret) + return ret; + + ret = xgene_cle_setup_dbptr(pdata, cle); + if (ret) + return ret; + + xgene_cle_enable_ptree(pdata, cle); + + return 0; +} + +static void xgene_cle_setup_def_dbptr(struct xgene_enet_pdata *pdata, + struct xgene_enet_cle *enet_cle, + struct xgene_cle_dbptr *dbptr, + u32 index, u8 priority) +{ + void __iomem *base = enet_cle->base; + void __iomem *base_addr; + u32 buf[CLE_DRAM_REGS]; + u32 def_cls, offset; + u32 i, j; + + memset(buf, 0, sizeof(buf)); + xgene_cle_dbptr_to_hw(pdata, dbptr, buf); + + for (i = 0; i < enet_cle->parsers; i++) { + if (enet_cle->active_parser != PARSER_ALL) { + offset = enet_cle->active_parser * + CLE_PORT_OFFSET; + } else { + offset = i * CLE_PORT_OFFSET; + } + + base_addr = base + DFCLSRESDB00 + offset; + for (j = 0; j < 6; j++) + iowrite32(buf[j], base_addr + (j * 4)); + + def_cls = ((priority & 0x7) << 10) | (index & 0x3ff); + iowrite32(def_cls, base + DFCLSRESDBPTR0 + offset); + } +} + +static int xgene_cle_set_rss_sband(struct xgene_enet_cle *cle) +{ + u32 idx = CLE_PKTRAM_SIZE / sizeof(u32); + u32 mac_hdr_len = ETH_HLEN; + u32 sband, reg = 0; + u32 ipv4_ihl = 5; + u32 hdr_len; + int ret; + + /* Sideband: IPV4/TCP packets */ + hdr_len = (mac_hdr_len << 5) | ipv4_ihl; + xgene_cle_sband_to_hw(0, XGENE_CLE_IPV4, XGENE_CLE_TCP, hdr_len, ®); + sband = reg; + + /* Sideband: IPv4/UDP packets */ + hdr_len = (mac_hdr_len << 5) | ipv4_ihl; + xgene_cle_sband_to_hw(1, XGENE_CLE_IPV4, XGENE_CLE_UDP, hdr_len, ®); + sband |= (reg << 16); + + ret = xgene_cle_dram_wr(cle, &sband, 1, idx, PKT_RAM, CLE_CMD_WR); + if (ret) + return ret; + + /* Sideband: IPv4/RAW packets */ + hdr_len = (mac_hdr_len << 5) | ipv4_ihl; + xgene_cle_sband_to_hw(0, XGENE_CLE_IPV4, XGENE_CLE_OTHER, + hdr_len, ®); + sband = reg; + + /* Sideband: Ethernet II/RAW packets */ + hdr_len = (mac_hdr_len << 5); + xgene_cle_sband_to_hw(0, XGENE_CLE_IPV4, XGENE_CLE_OTHER, + hdr_len, ®); + sband |= (reg << 16); + + ret = xgene_cle_dram_wr(cle, &sband, 1, idx + 1, PKT_RAM, CLE_CMD_WR); + if (ret) + return ret; + + return 0; +} + +static int xgene_cle_set_rss_skeys(struct xgene_enet_cle *cle) +{ + u32 secret_key_ipv4[4]; /* 16 Bytes*/ + int ret = 0; + + get_random_bytes(secret_key_ipv4, 16); + ret = xgene_cle_dram_wr(cle, secret_key_ipv4, 4, 0, + RSS_IPV4_HASH_SKEY, CLE_CMD_WR); + return ret; +} + +static int xgene_cle_set_rss_idt(struct xgene_enet_pdata *pdata) +{ + u32 fpsel, dstqid, nfpsel, idt_reg, idx; + int i, ret = 0; + u16 pool_id; + + for (i = 0; i < XGENE_CLE_IDT_ENTRIES; i++) { + idx = i % pdata->rxq_cnt; + pool_id = pdata->rx_ring[idx]->buf_pool->id; + fpsel = xgene_enet_ring_bufnum(pool_id) - 0x20; + dstqid = xgene_enet_dst_ring_num(pdata->rx_ring[idx]); + nfpsel = 0; + idt_reg = 0; + + xgene_cle_idt_to_hw(dstqid, fpsel, nfpsel, &idt_reg); + ret = xgene_cle_dram_wr(&pdata->cle, &idt_reg, 1, i, + RSS_IDT, CLE_CMD_WR); + if (ret) + return ret; + } + + ret = xgene_cle_set_rss_skeys(&pdata->cle); + if (ret) + return ret; + + return 0; +} + +static int xgene_cle_setup_rss(struct xgene_enet_pdata *pdata) +{ + struct xgene_enet_cle *cle = &pdata->cle; + void __iomem *base = cle->base; + u32 offset, val = 0; + int i, ret = 0; + + offset = CLE_PORT_OFFSET; + for (i = 0; i < cle->parsers; i++) { + if (cle->active_parser != PARSER_ALL) + offset = cle->active_parser * CLE_PORT_OFFSET; + else + offset = i * CLE_PORT_OFFSET; + + /* enable RSS */ + val = (RSS_IPV4_12B << 1) | 0x1; + writel(val, base + RSS_CTRL0 + offset); + } + + /* setup sideband data */ + ret = xgene_cle_set_rss_sband(cle); + if (ret) + return ret; + + /* setup indirection table */ + ret = xgene_cle_set_rss_idt(pdata); + if (ret) + return ret; + + return 0; +} + +static int xgene_enet_cle_init(struct xgene_enet_pdata *pdata) +{ + struct xgene_enet_cle *enet_cle = &pdata->cle; + struct xgene_cle_dbptr dbptr[DB_MAX_PTRS]; + struct xgene_cle_ptree_branch *br; + u32 def_qid, def_fpsel, pool_id; + struct xgene_cle_ptree *ptree; + struct xgene_cle_ptree_kn kn; + int ret; + struct xgene_cle_ptree_ewdn ptree_dn[] = { + { + /* PKT_TYPE_NODE */ + .node_type = EWDN, + .last_node = 0, + .hdr_len_store = 1, + .hdr_extn = NO_BYTE, + .byte_store = NO_BYTE, + .search_byte_store = NO_BYTE, + .result_pointer = DB_RES_DROP, + .num_branches = 2, + .branch = { + { + /* IPV4 */ + .valid = 0, + .next_packet_pointer = 22, + .jump_bw = JMP_FW, + .jump_rel = JMP_ABS, + .operation = EQT, + .next_node = PKT_PROT_NODE, + .next_branch = 0, + .data = 0x8, + .mask = 0xffff + }, + { + .valid = 0, + .next_packet_pointer = 262, + .jump_bw = JMP_FW, + .jump_rel = JMP_ABS, + .operation = EQT, + .next_node = LAST_NODE, + .next_branch = 0, + .data = 0x0, + .mask = 0xffff + } + }, + }, + { + /* PKT_PROT_NODE */ + .node_type = EWDN, + .last_node = 0, + .hdr_len_store = 1, + .hdr_extn = NO_BYTE, + .byte_store = NO_BYTE, + .search_byte_store = NO_BYTE, + .result_pointer = DB_RES_DROP, + .num_branches = 3, + .branch = { + { + /* TCP */ + .valid = 1, + .next_packet_pointer = 26, + .jump_bw = JMP_FW, + .jump_rel = JMP_ABS, + .operation = EQT, + .next_node = RSS_IPV4_TCP_NODE, + .next_branch = 0, + .data = 0x0600, + .mask = 0xffff + }, + { + /* UDP */ + .valid = 1, + .next_packet_pointer = 26, + .jump_bw = JMP_FW, + .jump_rel = JMP_ABS, + .operation = EQT, + .next_node = RSS_IPV4_UDP_NODE, + .next_branch = 0, + .data = 0x1100, + .mask = 0xffff + }, + { + .valid = 0, + .next_packet_pointer = 260, + .jump_bw = JMP_FW, + .jump_rel = JMP_ABS, + .operation = EQT, + .next_node = LAST_NODE, + .next_branch = 0, + .data = 0x0, + .mask = 0xffff + } + } + }, + { + /* RSS_IPV4_TCP_NODE */ + .node_type = EWDN, + .last_node = 0, + .hdr_len_store = 1, + .hdr_extn = NO_BYTE, + .byte_store = NO_BYTE, + .search_byte_store = BOTH_BYTES, + .result_pointer = DB_RES_DROP, + .num_branches = 6, + .branch = { + { + /* SRC IPV4 B01 */ + .valid = 0, + .next_packet_pointer = 28, + .jump_bw = JMP_FW, + .jump_rel = JMP_ABS, + .operation = EQT, + .next_node = RSS_IPV4_TCP_NODE, + .next_branch = 1, + .data = 0x0, + .mask = 0xffff + }, + { + /* SRC IPV4 B23 */ + .valid = 0, + .next_packet_pointer = 30, + .jump_bw = JMP_FW, + .jump_rel = JMP_ABS, + .operation = EQT, + .next_node = RSS_IPV4_TCP_NODE, + .next_branch = 2, + .data = 0x0, + .mask = 0xffff + }, + { + /* DST IPV4 B01 */ + .valid = 0, + .next_packet_pointer = 32, + .jump_bw = JMP_FW, + .jump_rel = JMP_ABS, + .operation = EQT, + .next_node = RSS_IPV4_TCP_NODE, + .next_branch = 3, + .data = 0x0, + .mask = 0xffff + }, + { + /* DST IPV4 B23 */ + .valid = 0, + .next_packet_pointer = 34, + .jump_bw = JMP_FW, + .jump_rel = JMP_ABS, + .operation = EQT, + .next_node = RSS_IPV4_TCP_NODE, + .next_branch = 4, + .data = 0x0, + .mask = 0xffff + }, + { + /* TCP SRC Port */ + .valid = 0, + .next_packet_pointer = 36, + .jump_bw = JMP_FW, + .jump_rel = JMP_ABS, + .operation = EQT, + .next_node = RSS_IPV4_TCP_NODE, + .next_branch = 5, + .data = 0x0, + .mask = 0xffff + }, + { + /* TCP DST Port */ + .valid = 0, + .next_packet_pointer = 256, + .jump_bw = JMP_FW, + .jump_rel = JMP_ABS, + .operation = EQT, + .next_node = LAST_NODE, + .next_branch = 0, + .data = 0x0, + .mask = 0xffff + } + } + }, + { + /* RSS_IPV4_UDP_NODE */ + .node_type = EWDN, + .last_node = 0, + .hdr_len_store = 1, + .hdr_extn = NO_BYTE, + .byte_store = NO_BYTE, + .search_byte_store = BOTH_BYTES, + .result_pointer = DB_RES_DROP, + .num_branches = 6, + .branch = { + { + /* SRC IPV4 B01 */ + .valid = 0, + .next_packet_pointer = 28, + .jump_bw = JMP_FW, + .jump_rel = JMP_ABS, + .operation = EQT, + .next_node = RSS_IPV4_UDP_NODE, + .next_branch = 1, + .data = 0x0, + .mask = 0xffff + }, + { + /* SRC IPV4 B23 */ + .valid = 0, + .next_packet_pointer = 30, + .jump_bw = JMP_FW, + .jump_rel = JMP_ABS, + .operation = EQT, + .next_node = RSS_IPV4_UDP_NODE, + .next_branch = 2, + .data = 0x0, + .mask = 0xffff + }, + { + /* DST IPV4 B01 */ + .valid = 0, + .next_packet_pointer = 32, + .jump_bw = JMP_FW, + .jump_rel = JMP_ABS, + .operation = EQT, + .next_node = RSS_IPV4_UDP_NODE, + .next_branch = 3, + .data = 0x0, + .mask = 0xffff + }, + { + /* DST IPV4 B23 */ + .valid = 0, + .next_packet_pointer = 34, + .jump_bw = JMP_FW, + .jump_rel = JMP_ABS, + .operation = EQT, + .next_node = RSS_IPV4_UDP_NODE, + .next_branch = 4, + .data = 0x0, + .mask = 0xffff + }, + { + /* TCP SRC Port */ + .valid = 0, + .next_packet_pointer = 36, + .jump_bw = JMP_FW, + .jump_rel = JMP_ABS, + .operation = EQT, + .next_node = RSS_IPV4_UDP_NODE, + .next_branch = 5, + .data = 0x0, + .mask = 0xffff + }, + { + /* TCP DST Port */ + .valid = 0, + .next_packet_pointer = 256, + .jump_bw = JMP_FW, + .jump_rel = JMP_ABS, + .operation = EQT, + .next_node = LAST_NODE, + .next_branch = 0, + .data = 0x0, + .mask = 0xffff + } + } + }, + { + /* LAST NODE */ + .node_type = EWDN, + .last_node = 1, + .hdr_len_store = 1, + .hdr_extn = NO_BYTE, + .byte_store = NO_BYTE, + .search_byte_store = NO_BYTE, + .result_pointer = DB_RES_DROP, + .num_branches = 1, + .branch = { + { + .valid = 0, + .next_packet_pointer = 0, + .jump_bw = JMP_FW, + .jump_rel = JMP_ABS, + .operation = EQT, + .next_node = MAX_NODES, + .next_branch = 0, + .data = 0, + .mask = 0xffff + } + } + } + }; + + ptree = &enet_cle->ptree; + ptree->start_pkt = 12; /* Ethertype */ + if (pdata->phy_mode == PHY_INTERFACE_MODE_XGMII) { + ret = xgene_cle_setup_rss(pdata); + if (ret) { + netdev_err(pdata->ndev, "RSS initialization failed\n"); + return ret; + } + } else { + br = &ptree_dn[PKT_PROT_NODE].branch[0]; + br->valid = 0; + br->next_packet_pointer = 260; + br->next_node = LAST_NODE; + br->data = 0x0000; + br->mask = 0xffff; + } + + def_qid = xgene_enet_dst_ring_num(pdata->rx_ring[0]); + pool_id = pdata->rx_ring[0]->buf_pool->id; + def_fpsel = xgene_enet_ring_bufnum(pool_id) - 0x20; + + memset(dbptr, 0, sizeof(struct xgene_cle_dbptr) * DB_MAX_PTRS); + dbptr[DB_RES_ACCEPT].fpsel = def_fpsel; + dbptr[DB_RES_ACCEPT].dstqid = def_qid; + dbptr[DB_RES_ACCEPT].cle_priority = 1; + + dbptr[DB_RES_DEF].fpsel = def_fpsel; + dbptr[DB_RES_DEF].dstqid = def_qid; + dbptr[DB_RES_DEF].cle_priority = 7; + xgene_cle_setup_def_dbptr(pdata, enet_cle, &dbptr[DB_RES_DEF], + DB_RES_ACCEPT, 7); + + dbptr[DB_RES_DROP].drop = 1; + + memset(&kn, 0, sizeof(kn)); + kn.node_type = KN; + kn.num_keys = 1; + kn.key[0].priority = 0; + kn.key[0].result_pointer = DB_RES_ACCEPT; + + ptree->dn = ptree_dn; + ptree->kn = &kn; + ptree->dbptr = dbptr; + ptree->num_dn = MAX_NODES; + ptree->num_kn = 1; + ptree->num_dbptr = DB_MAX_PTRS; + + return xgene_cle_setup_ptree(pdata, enet_cle); +} + +struct xgene_cle_ops xgene_cle3in_ops = { + .cle_init = xgene_enet_cle_init, +}; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h new file mode 100644 index 000000000000..29a17abdd828 --- /dev/null +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h @@ -0,0 +1,295 @@ +/* Applied Micro X-Gene SoC Ethernet Classifier structures + * + * Copyright (c) 2016, Applied Micro Circuits Corporation + * Authors: Khuong Dinh <kdinh@apm.com> + * Tanmay Inamdar <tinamdar@apm.com> + * Iyappan Subramanian <isubramanian@apm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __XGENE_ENET_CLE_H__ +#define __XGENE_ENET_CLE_H__ + +#include <linux/io.h> +#include <linux/random.h> + +/* Register offsets */ +#define INDADDR 0x04 +#define INDCMD 0x08 +#define INDCMD_STATUS 0x0c +#define DATA_RAM0 0x10 +#define SNPTR0 0x0100 +#define SPPTR0 0x0104 +#define DFCLSRESDBPTR0 0x0108 +#define DFCLSRESDB00 0x010c +#define RSS_CTRL0 0x0000013c + +#define CLE_CMD_TO 10 /* ms */ +#define CLE_PKTRAM_SIZE 256 /* bytes */ +#define CLE_PORT_OFFSET 0x200 +#define CLE_DRAM_REGS 17 + +#define CLE_DN_TYPE_LEN 2 +#define CLE_DN_TYPE_POS 0 +#define CLE_DN_LASTN_LEN 1 +#define CLE_DN_LASTN_POS 2 +#define CLE_DN_HLS_LEN 1 +#define CLE_DN_HLS_POS 3 +#define CLE_DN_EXT_LEN 2 +#define CLE_DN_EXT_POS 4 +#define CLE_DN_BSTOR_LEN 2 +#define CLE_DN_BSTOR_POS 6 +#define CLE_DN_SBSTOR_LEN 2 +#define CLE_DN_SBSTOR_POS 8 +#define CLE_DN_RPTR_LEN 12 +#define CLE_DN_RPTR_POS 12 + +#define CLE_BR_VALID_LEN 1 +#define CLE_BR_VALID_POS 0 +#define CLE_BR_NPPTR_LEN 9 +#define CLE_BR_NPPTR_POS 1 +#define CLE_BR_JB_LEN 1 +#define CLE_BR_JB_POS 10 +#define CLE_BR_JR_LEN 1 +#define CLE_BR_JR_POS 11 +#define CLE_BR_OP_LEN 3 +#define CLE_BR_OP_POS 12 +#define CLE_BR_NNODE_LEN 9 +#define CLE_BR_NNODE_POS 15 +#define CLE_BR_NBR_LEN 5 +#define CLE_BR_NBR_POS 24 + +#define CLE_BR_DATA_LEN 16 +#define CLE_BR_DATA_POS 0 +#define CLE_BR_MASK_LEN 16 +#define CLE_BR_MASK_POS 16 + +#define CLE_KN_PRIO_POS 0 +#define CLE_KN_PRIO_LEN 3 +#define CLE_KN_RPTR_POS 3 +#define CLE_KN_RPTR_LEN 10 +#define CLE_TYPE_POS 0 +#define CLE_TYPE_LEN 2 + +#define CLE_DSTQIDL_POS 25 +#define CLE_DSTQIDL_LEN 7 +#define CLE_DSTQIDH_POS 0 +#define CLE_DSTQIDH_LEN 5 +#define CLE_FPSEL_POS 21 +#define CLE_FPSEL_LEN 4 +#define CLE_PRIORITY_POS 5 +#define CLE_PRIORITY_LEN 3 + +#define JMP_ABS 0 +#define JMP_REL 1 +#define JMP_FW 0 +#define JMP_BW 1 + +enum xgene_cle_ptree_nodes { + PKT_TYPE_NODE, + PKT_PROT_NODE, + RSS_IPV4_TCP_NODE, + RSS_IPV4_UDP_NODE, + LAST_NODE, + MAX_NODES +}; + +enum xgene_cle_byte_store { + NO_BYTE, + FIRST_BYTE, + SECOND_BYTE, + BOTH_BYTES +}; + +/* Preclassification operation types */ +enum xgene_cle_node_type { + INV, + KN, + EWDN, + RES_NODE +}; + +/* Preclassification operation types */ +enum xgene_cle_op_type { + EQT, + NEQT, + LTEQT, + GTEQT, + AND, + NAND +}; + +enum xgene_cle_parser { + PARSER0, + PARSER1, + PARSER2, + PARSER_ALL +}; + +#define XGENE_CLE_DRAM(type) (((type) & 0xf) << 28) +enum xgene_cle_dram_type { + PKT_RAM, + RSS_IDT, + RSS_IPV4_HASH_SKEY, + PTREE_RAM = 0xc, + AVL_RAM, + DB_RAM +}; + +enum xgene_cle_cmd_type { + CLE_CMD_WR = 1, + CLE_CMD_RD = 2, + CLE_CMD_AVL_ADD = 8, + CLE_CMD_AVL_DEL = 16, + CLE_CMD_AVL_SRCH = 32 +}; + +enum xgene_cle_ipv4_rss_hashtype { + RSS_IPV4_8B, + RSS_IPV4_12B, +}; + +enum xgene_cle_prot_type { + XGENE_CLE_TCP, + XGENE_CLE_UDP, + XGENE_CLE_ESP, + XGENE_CLE_OTHER +}; + +enum xgene_cle_prot_version { + XGENE_CLE_IPV4, +}; + +enum xgene_cle_ptree_dbptrs { + DB_RES_DROP, + DB_RES_DEF, + DB_RES_ACCEPT, + DB_MAX_PTRS +}; + +/* RSS sideband signal info */ +#define SB_IPFRAG_POS 0 +#define SB_IPFRAG_LEN 1 +#define SB_IPPROT_POS 1 +#define SB_IPPROT_LEN 2 +#define SB_IPVER_POS 3 +#define SB_IPVER_LEN 1 +#define SB_HDRLEN_POS 4 +#define SB_HDRLEN_LEN 12 + +/* RSS indirection table */ +#define XGENE_CLE_IDT_ENTRIES 128 +#define IDT_DSTQID_POS 0 +#define IDT_DSTQID_LEN 12 +#define IDT_FPSEL_POS 12 +#define IDT_FPSEL_LEN 4 +#define IDT_NFPSEL_POS 16 +#define IDT_NFPSEL_LEN 4 + +struct xgene_cle_ptree_branch { + bool valid; + u16 next_packet_pointer; + bool jump_bw; + bool jump_rel; + u8 operation; + u16 next_node; + u8 next_branch; + u16 data; + u16 mask; +}; + +struct xgene_cle_ptree_ewdn { + u8 node_type; + bool last_node; + bool hdr_len_store; + u8 hdr_extn; + u8 byte_store; + u8 search_byte_store; + u16 result_pointer; + u8 num_branches; + struct xgene_cle_ptree_branch branch[6]; +}; + +struct xgene_cle_ptree_key { + u8 priority; + u16 result_pointer; +}; + +struct xgene_cle_ptree_kn { + u8 node_type; + u8 num_keys; + struct xgene_cle_ptree_key key[32]; +}; + +struct xgene_cle_dbptr { + u8 split_boundary; + u8 mirror_nxtfpsel; + u8 mirror_fpsel; + u16 mirror_dstqid; + u8 drop; + u8 mirror; + u8 hdr_data_split; + u64 hopinfomsbs; + u8 DR; + u8 HR; + u64 hopinfomlsbs; + u16 h0enq_num; + u8 h0fpsel; + u8 nxtfpsel; + u8 fpsel; + u16 dstqid; + u8 cle_priority; + u8 cle_flowgroup; + u8 cle_perflow; + u8 cle_insert_timestamp; + u8 stash; + u8 in; + u8 perprioen; + u8 perflowgroupen; + u8 perflowen; + u8 selhash; + u8 selhdrext; + u8 mirror_nxtfpsel_msb; + u8 mirror_fpsel_msb; + u8 hfpsel_msb; + u8 nxtfpsel_msb; + u8 fpsel_msb; +}; + +struct xgene_cle_ptree { + struct xgene_cle_ptree_ewdn *dn; + struct xgene_cle_ptree_kn *kn; + struct xgene_cle_dbptr *dbptr; + u32 num_dn; + u32 num_kn; + u32 num_dbptr; + u32 start_node; + u32 start_pkt; + u32 start_dbptr; +}; + +struct xgene_enet_cle { + void __iomem *base; + struct xgene_cle_ptree ptree; + enum xgene_cle_parser active_parser; + u32 parsers; + u32 max_nodes; + u32 max_dbptrs; + u32 jump_bytes; +}; + +extern struct xgene_cle_ops xgene_cle3in_ops; + +#endif /* __XGENE_ENET_CLE_H__ */ diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c index db55c9f6e8e1..39e081a70f5b 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c @@ -204,6 +204,17 @@ static u32 xgene_enet_ring_len(struct xgene_enet_desc_ring *ring) return num_msgs; } +static void xgene_enet_setup_coalescing(struct xgene_enet_desc_ring *ring) +{ + u32 data = 0x7777; + + xgene_enet_ring_wr32(ring, CSR_PBM_COAL, 0x8e); + xgene_enet_ring_wr32(ring, CSR_PBM_CTICK1, data); + xgene_enet_ring_wr32(ring, CSR_PBM_CTICK2, data << 16); + xgene_enet_ring_wr32(ring, CSR_THRESHOLD0_SET1, 0x40); + xgene_enet_ring_wr32(ring, CSR_THRESHOLD1_SET1, 0x80); +} + void xgene_enet_parse_error(struct xgene_enet_desc_ring *ring, struct xgene_enet_pdata *pdata, enum xgene_enet_err_code status) @@ -892,4 +903,5 @@ struct xgene_ring_ops xgene_ring1_ops = { .clear = xgene_enet_clear_ring, .wr_cmd = xgene_enet_wr_cmd, .len = xgene_enet_ring_len, + .coalesce = xgene_enet_setup_coalescing, }; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h index 8a9091039ab4..ba7da98af2ef 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h @@ -54,6 +54,11 @@ enum xgene_enet_rm { #define IS_BUFFER_POOL BIT(20) #define PREFETCH_BUF_EN BIT(21) #define CSR_RING_ID_BUF 0x000c +#define CSR_PBM_COAL 0x0014 +#define CSR_PBM_CTICK1 0x001c +#define CSR_PBM_CTICK2 0x0020 +#define CSR_THRESHOLD0_SET1 0x0030 +#define CSR_THRESHOLD1_SET1 0x0034 #define CSR_RING_NE_INT_MODE 0x017c #define CSR_RING_CONFIG 0x006c #define CSR_RING_WR_BASE 0x0070 @@ -101,6 +106,7 @@ enum xgene_enet_rm { #define MAC_OFFSET 0x30 #define BLOCK_ETH_CSR_OFFSET 0x2000 +#define BLOCK_ETH_CLE_CSR_OFFSET 0x6000 #define BLOCK_ETH_RING_IF_OFFSET 0x9000 #define BLOCK_ETH_CLKRST_CSR_OFFSET 0xc000 #define BLOCK_ETH_DIAG_CSR_OFFSET 0xD000 diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 5eb9b20c0eea..8d4c1ad2fc60 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -93,13 +93,6 @@ static int xgene_enet_refill_bufpool(struct xgene_enet_desc_ring *buf_pool, return 0; } -static u16 xgene_enet_dst_ring_num(struct xgene_enet_desc_ring *ring) -{ - struct xgene_enet_pdata *pdata = netdev_priv(ring->ndev); - - return ((u16)pdata->rm << 10) | ring->num; -} - static u8 xgene_enet_hdr_len(const void *data) { const struct ethhdr *eth = data; @@ -189,7 +182,6 @@ static int xgene_enet_tx_completion(struct xgene_enet_desc_ring *cp_ring, static u64 xgene_enet_work_msg(struct sk_buff *skb) { struct net_device *ndev = skb->dev; - struct xgene_enet_pdata *pdata = netdev_priv(ndev); struct iphdr *iph; u8 l3hlen = 0, l4hlen = 0; u8 ethhdr, proto = 0, csum_enable = 0; @@ -235,10 +227,6 @@ static u64 xgene_enet_work_msg(struct sk_buff *skb) if (!mss || ((skb->len - hdr_len) <= mss)) goto out; - if (mss != pdata->mss) { - pdata->mss = mss; - pdata->mac_ops->set_mss(pdata); - } hopinfo |= SET_BIT(ET); } } else if (iph->protocol == IPPROTO_UDP) { @@ -420,7 +408,7 @@ out: raw_desc->m0 = cpu_to_le64(SET_VAL(LL, ll) | SET_VAL(NV, nv) | SET_VAL(USERINFO, tx_ring->tail)); tx_ring->cp_ring->cp_skb[tx_ring->tail] = skb; - pdata->tx_level += count; + pdata->tx_level[tx_ring->cp_ring->index] += count; tx_ring->tail = tail; return count; @@ -430,15 +418,17 @@ static netdev_tx_t xgene_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); - struct xgene_enet_desc_ring *tx_ring = pdata->tx_ring; - u32 tx_level = pdata->tx_level; + struct xgene_enet_desc_ring *tx_ring; + int index = skb->queue_mapping; + u32 tx_level = pdata->tx_level[index]; int count; - if (tx_level < pdata->txc_level) - tx_level += ((typeof(pdata->tx_level))~0U); + tx_ring = pdata->tx_ring[index]; + if (tx_level < pdata->txc_level[index]) + tx_level += ((typeof(pdata->tx_level[index]))~0U); - if ((tx_level - pdata->txc_level) > pdata->tx_qcnt_hi) { - netif_stop_queue(ndev); + if ((tx_level - pdata->txc_level[index]) > pdata->tx_qcnt_hi) { + netif_stop_subqueue(ndev, index); return NETDEV_TX_BUSY; } @@ -536,7 +526,8 @@ static bool is_rx_desc(struct xgene_enet_raw_desc *raw_desc) static int xgene_enet_process_ring(struct xgene_enet_desc_ring *ring, int budget) { - struct xgene_enet_pdata *pdata = netdev_priv(ring->ndev); + struct net_device *ndev = ring->ndev; + struct xgene_enet_pdata *pdata = netdev_priv(ndev); struct xgene_enet_raw_desc *raw_desc, *exp_desc; u16 head = ring->head; u16 slots = ring->slots - 1; @@ -580,7 +571,7 @@ static int xgene_enet_process_ring(struct xgene_enet_desc_ring *ring, desc_count++; processed++; if (is_completion) - pdata->txc_level += desc_count; + pdata->txc_level[ring->index] += desc_count; if (ret) break; @@ -590,8 +581,8 @@ static int xgene_enet_process_ring(struct xgene_enet_desc_ring *ring, pdata->ring_ops->wr_cmd(ring, -count); ring->head = head; - if (netif_queue_stopped(ring->ndev)) - netif_start_queue(ring->ndev); + if (__netif_subqueue_stopped(ndev, ring->index)) + netif_start_subqueue(ndev, ring->index); } return processed; @@ -616,8 +607,16 @@ static int xgene_enet_napi(struct napi_struct *napi, const int budget) static void xgene_enet_timeout(struct net_device *ndev) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); + struct netdev_queue *txq; + int i; pdata->mac_ops->reset(pdata); + + for (i = 0; i < pdata->txq_cnt; i++) { + txq = netdev_get_tx_queue(ndev, i); + txq->trans_start = jiffies; + netif_tx_start_queue(txq); + } } static int xgene_enet_register_irq(struct net_device *ndev) @@ -625,17 +624,21 @@ static int xgene_enet_register_irq(struct net_device *ndev) struct xgene_enet_pdata *pdata = netdev_priv(ndev); struct device *dev = ndev_to_dev(ndev); struct xgene_enet_desc_ring *ring; - int ret; + int ret = 0, i; - ring = pdata->rx_ring; - irq_set_status_flags(ring->irq, IRQ_DISABLE_UNLAZY); - ret = devm_request_irq(dev, ring->irq, xgene_enet_rx_irq, - IRQF_SHARED, ring->irq_name, ring); - if (ret) - netdev_err(ndev, "Failed to request irq %s\n", ring->irq_name); + for (i = 0; i < pdata->rxq_cnt; i++) { + ring = pdata->rx_ring[i]; + irq_set_status_flags(ring->irq, IRQ_DISABLE_UNLAZY); + ret = devm_request_irq(dev, ring->irq, xgene_enet_rx_irq, + IRQF_SHARED, ring->irq_name, ring); + if (ret) { + netdev_err(ndev, "Failed to request irq %s\n", + ring->irq_name); + } + } - if (pdata->cq_cnt) { - ring = pdata->tx_ring->cp_ring; + for (i = 0; i < pdata->cq_cnt; i++) { + ring = pdata->tx_ring[i]->cp_ring; irq_set_status_flags(ring->irq, IRQ_DISABLE_UNLAZY); ret = devm_request_irq(dev, ring->irq, xgene_enet_rx_irq, IRQF_SHARED, ring->irq_name, ring); @@ -653,15 +656,19 @@ static void xgene_enet_free_irq(struct net_device *ndev) struct xgene_enet_pdata *pdata; struct xgene_enet_desc_ring *ring; struct device *dev; + int i; pdata = netdev_priv(ndev); dev = ndev_to_dev(ndev); - ring = pdata->rx_ring; - irq_clear_status_flags(ring->irq, IRQ_DISABLE_UNLAZY); - devm_free_irq(dev, ring->irq, ring); - if (pdata->cq_cnt) { - ring = pdata->tx_ring->cp_ring; + for (i = 0; i < pdata->rxq_cnt; i++) { + ring = pdata->rx_ring[i]; + irq_clear_status_flags(ring->irq, IRQ_DISABLE_UNLAZY); + devm_free_irq(dev, ring->irq, ring); + } + + for (i = 0; i < pdata->cq_cnt; i++) { + ring = pdata->tx_ring[i]->cp_ring; irq_clear_status_flags(ring->irq, IRQ_DISABLE_UNLAZY); devm_free_irq(dev, ring->irq, ring); } @@ -670,12 +677,15 @@ static void xgene_enet_free_irq(struct net_device *ndev) static void xgene_enet_napi_enable(struct xgene_enet_pdata *pdata) { struct napi_struct *napi; + int i; - napi = &pdata->rx_ring->napi; - napi_enable(napi); + for (i = 0; i < pdata->rxq_cnt; i++) { + napi = &pdata->rx_ring[i]->napi; + napi_enable(napi); + } - if (pdata->cq_cnt) { - napi = &pdata->tx_ring->cp_ring->napi; + for (i = 0; i < pdata->cq_cnt; i++) { + napi = &pdata->tx_ring[i]->cp_ring->napi; napi_enable(napi); } } @@ -683,12 +693,15 @@ static void xgene_enet_napi_enable(struct xgene_enet_pdata *pdata) static void xgene_enet_napi_disable(struct xgene_enet_pdata *pdata) { struct napi_struct *napi; + int i; - napi = &pdata->rx_ring->napi; - napi_disable(napi); + for (i = 0; i < pdata->rxq_cnt; i++) { + napi = &pdata->rx_ring[i]->napi; + napi_disable(napi); + } - if (pdata->cq_cnt) { - napi = &pdata->tx_ring->cp_ring->napi; + for (i = 0; i < pdata->cq_cnt; i++) { + napi = &pdata->tx_ring[i]->cp_ring->napi; napi_disable(napi); } } @@ -699,6 +712,14 @@ static int xgene_enet_open(struct net_device *ndev) const struct xgene_mac_ops *mac_ops = pdata->mac_ops; int ret; + ret = netif_set_real_num_tx_queues(ndev, pdata->txq_cnt); + if (ret) + return ret; + + ret = netif_set_real_num_rx_queues(ndev, pdata->rxq_cnt); + if (ret) + return ret; + mac_ops->tx_enable(pdata); mac_ops->rx_enable(pdata); @@ -721,6 +742,7 @@ static int xgene_enet_close(struct net_device *ndev) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); const struct xgene_mac_ops *mac_ops = pdata->mac_ops; + int i; netif_stop_queue(ndev); @@ -734,7 +756,8 @@ static int xgene_enet_close(struct net_device *ndev) xgene_enet_free_irq(ndev); xgene_enet_napi_disable(pdata); - xgene_enet_process_ring(pdata->rx_ring, -1); + for (i = 0; i < pdata->rxq_cnt; i++) + xgene_enet_process_ring(pdata->rx_ring[i], -1); return 0; } @@ -754,18 +777,26 @@ static void xgene_enet_delete_ring(struct xgene_enet_desc_ring *ring) static void xgene_enet_delete_desc_rings(struct xgene_enet_pdata *pdata) { struct xgene_enet_desc_ring *buf_pool; + struct xgene_enet_desc_ring *ring; + int i; - if (pdata->tx_ring) { - xgene_enet_delete_ring(pdata->tx_ring); - pdata->tx_ring = NULL; + for (i = 0; i < pdata->txq_cnt; i++) { + ring = pdata->tx_ring[i]; + if (ring) { + xgene_enet_delete_ring(ring); + pdata->tx_ring[i] = NULL; + } } - if (pdata->rx_ring) { - buf_pool = pdata->rx_ring->buf_pool; - xgene_enet_delete_bufpool(buf_pool); - xgene_enet_delete_ring(buf_pool); - xgene_enet_delete_ring(pdata->rx_ring); - pdata->rx_ring = NULL; + for (i = 0; i < pdata->rxq_cnt; i++) { + ring = pdata->rx_ring[i]; + if (ring) { + buf_pool = ring->buf_pool; + xgene_enet_delete_bufpool(buf_pool); + xgene_enet_delete_ring(buf_pool); + xgene_enet_delete_ring(ring); + pdata->rx_ring[i] = NULL; + } } } @@ -820,24 +851,29 @@ static void xgene_enet_free_desc_rings(struct xgene_enet_pdata *pdata) { struct device *dev = &pdata->pdev->dev; struct xgene_enet_desc_ring *ring; + int i; + + for (i = 0; i < pdata->txq_cnt; i++) { + ring = pdata->tx_ring[i]; + if (ring) { + if (ring->cp_ring && ring->cp_ring->cp_skb) + devm_kfree(dev, ring->cp_ring->cp_skb); + if (ring->cp_ring && pdata->cq_cnt) + xgene_enet_free_desc_ring(ring->cp_ring); + xgene_enet_free_desc_ring(ring); + } + } - ring = pdata->tx_ring; - if (ring) { - if (ring->cp_ring && ring->cp_ring->cp_skb) - devm_kfree(dev, ring->cp_ring->cp_skb); - if (ring->cp_ring && pdata->cq_cnt) - xgene_enet_free_desc_ring(ring->cp_ring); - xgene_enet_free_desc_ring(ring); - } - - ring = pdata->rx_ring; - if (ring) { - if (ring->buf_pool) { - if (ring->buf_pool->rx_skb) - devm_kfree(dev, ring->buf_pool->rx_skb); - xgene_enet_free_desc_ring(ring->buf_pool); + for (i = 0; i < pdata->rxq_cnt; i++) { + ring = pdata->rx_ring[i]; + if (ring) { + if (ring->buf_pool) { + if (ring->buf_pool->rx_skb) + devm_kfree(dev, ring->buf_pool->rx_skb); + xgene_enet_free_desc_ring(ring->buf_pool); + } + xgene_enet_free_desc_ring(ring); } - xgene_enet_free_desc_ring(ring); } } @@ -950,104 +986,120 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev) u8 bp_bufnum = pdata->bp_bufnum; u16 ring_num = pdata->ring_num; u16 ring_id; - int ret, size; - - /* allocate rx descriptor ring */ - owner = xgene_derive_ring_owner(pdata); - ring_id = xgene_enet_get_ring_id(RING_OWNER_CPU, cpu_bufnum++); - rx_ring = xgene_enet_create_desc_ring(ndev, ring_num++, - RING_CFGSIZE_16KB, ring_id); - if (!rx_ring) { - ret = -ENOMEM; - goto err; - } + int i, ret, size; - /* allocate buffer pool for receiving packets */ - owner = xgene_derive_ring_owner(pdata); - ring_id = xgene_enet_get_ring_id(owner, bp_bufnum++); - buf_pool = xgene_enet_create_desc_ring(ndev, ring_num++, - RING_CFGSIZE_2KB, ring_id); - if (!buf_pool) { - ret = -ENOMEM; - goto err; - } - - rx_ring->nbufpool = NUM_BUFPOOL; - rx_ring->buf_pool = buf_pool; - rx_ring->irq = pdata->rx_irq; - if (!pdata->cq_cnt) { - snprintf(rx_ring->irq_name, IRQ_ID_SIZE, "%s-rx-txc", - ndev->name); - } else { - snprintf(rx_ring->irq_name, IRQ_ID_SIZE, "%s-rx", ndev->name); - } - buf_pool->rx_skb = devm_kcalloc(dev, buf_pool->slots, - sizeof(struct sk_buff *), GFP_KERNEL); - if (!buf_pool->rx_skb) { - ret = -ENOMEM; - goto err; - } - - buf_pool->dst_ring_num = xgene_enet_dst_ring_num(buf_pool); - rx_ring->buf_pool = buf_pool; - pdata->rx_ring = rx_ring; + for (i = 0; i < pdata->rxq_cnt; i++) { + /* allocate rx descriptor ring */ + owner = xgene_derive_ring_owner(pdata); + ring_id = xgene_enet_get_ring_id(RING_OWNER_CPU, cpu_bufnum++); + rx_ring = xgene_enet_create_desc_ring(ndev, ring_num++, + RING_CFGSIZE_16KB, + ring_id); + if (!rx_ring) { + ret = -ENOMEM; + goto err; + } - /* allocate tx descriptor ring */ - owner = xgene_derive_ring_owner(pdata); - ring_id = xgene_enet_get_ring_id(owner, eth_bufnum++); - tx_ring = xgene_enet_create_desc_ring(ndev, ring_num++, - RING_CFGSIZE_16KB, ring_id); - if (!tx_ring) { - ret = -ENOMEM; - goto err; - } + /* allocate buffer pool for receiving packets */ + owner = xgene_derive_ring_owner(pdata); + ring_id = xgene_enet_get_ring_id(owner, bp_bufnum++); + buf_pool = xgene_enet_create_desc_ring(ndev, ring_num++, + RING_CFGSIZE_2KB, + ring_id); + if (!buf_pool) { + ret = -ENOMEM; + goto err; + } - size = (tx_ring->slots / 2) * sizeof(__le64) * MAX_EXP_BUFFS; - tx_ring->exp_bufs = dma_zalloc_coherent(dev, size, &dma_exp_bufs, + rx_ring->nbufpool = NUM_BUFPOOL; + rx_ring->buf_pool = buf_pool; + rx_ring->irq = pdata->irqs[i]; + if (!pdata->cq_cnt) { + snprintf(rx_ring->irq_name, IRQ_ID_SIZE, "%s-rx-txc", + ndev->name); + } else { + snprintf(rx_ring->irq_name, IRQ_ID_SIZE, "%s-rx%d", + ndev->name, i); + } + buf_pool->rx_skb = devm_kcalloc(dev, buf_pool->slots, + sizeof(struct sk_buff *), GFP_KERNEL); - if (!tx_ring->exp_bufs) { - ret = -ENOMEM; - goto err; - } + if (!buf_pool->rx_skb) { + ret = -ENOMEM; + goto err; + } - pdata->tx_ring = tx_ring; + buf_pool->dst_ring_num = xgene_enet_dst_ring_num(buf_pool); + rx_ring->buf_pool = buf_pool; + pdata->rx_ring[i] = rx_ring; + } - if (!pdata->cq_cnt) { - cp_ring = pdata->rx_ring; - } else { - /* allocate tx completion descriptor ring */ - ring_id = xgene_enet_get_ring_id(RING_OWNER_CPU, cpu_bufnum++); - cp_ring = xgene_enet_create_desc_ring(ndev, ring_num++, + for (i = 0; i < pdata->txq_cnt; i++) { + /* allocate tx descriptor ring */ + owner = xgene_derive_ring_owner(pdata); + ring_id = xgene_enet_get_ring_id(owner, eth_bufnum++); + tx_ring = xgene_enet_create_desc_ring(ndev, ring_num++, RING_CFGSIZE_16KB, ring_id); - if (!cp_ring) { + if (!tx_ring) { ret = -ENOMEM; goto err; } - cp_ring->irq = pdata->txc_irq; - snprintf(cp_ring->irq_name, IRQ_ID_SIZE, "%s-txc", ndev->name); - } - cp_ring->cp_skb = devm_kcalloc(dev, tx_ring->slots, - sizeof(struct sk_buff *), GFP_KERNEL); - if (!cp_ring->cp_skb) { - ret = -ENOMEM; - goto err; - } + size = (tx_ring->slots / 2) * sizeof(__le64) * MAX_EXP_BUFFS; + tx_ring->exp_bufs = dma_zalloc_coherent(dev, size, + &dma_exp_bufs, + GFP_KERNEL); + if (!tx_ring->exp_bufs) { + ret = -ENOMEM; + goto err; + } - size = sizeof(dma_addr_t) * MAX_SKB_FRAGS; - cp_ring->frag_dma_addr = devm_kcalloc(dev, tx_ring->slots, - size, GFP_KERNEL); - if (!cp_ring->frag_dma_addr) { - devm_kfree(dev, cp_ring->cp_skb); - ret = -ENOMEM; - goto err; - } + pdata->tx_ring[i] = tx_ring; - pdata->tx_ring->cp_ring = cp_ring; - pdata->tx_ring->dst_ring_num = xgene_enet_dst_ring_num(cp_ring); + if (!pdata->cq_cnt) { + cp_ring = pdata->rx_ring[i]; + } else { + /* allocate tx completion descriptor ring */ + ring_id = xgene_enet_get_ring_id(RING_OWNER_CPU, + cpu_bufnum++); + cp_ring = xgene_enet_create_desc_ring(ndev, ring_num++, + RING_CFGSIZE_16KB, + ring_id); + if (!cp_ring) { + ret = -ENOMEM; + goto err; + } - pdata->tx_qcnt_hi = pdata->tx_ring->slots - 128; + cp_ring->irq = pdata->irqs[pdata->rxq_cnt + i]; + cp_ring->index = i; + snprintf(cp_ring->irq_name, IRQ_ID_SIZE, "%s-txc%d", + ndev->name, i); + } + + cp_ring->cp_skb = devm_kcalloc(dev, tx_ring->slots, + sizeof(struct sk_buff *), + GFP_KERNEL); + if (!cp_ring->cp_skb) { + ret = -ENOMEM; + goto err; + } + + size = sizeof(dma_addr_t) * MAX_SKB_FRAGS; + cp_ring->frag_dma_addr = devm_kcalloc(dev, tx_ring->slots, + size, GFP_KERNEL); + if (!cp_ring->frag_dma_addr) { + devm_kfree(dev, cp_ring->cp_skb); + ret = -ENOMEM; + goto err; + } + + tx_ring->cp_ring = cp_ring; + tx_ring->dst_ring_num = xgene_enet_dst_ring_num(cp_ring); + } + + pdata->ring_ops->coalesce(pdata->tx_ring[0]); + pdata->tx_qcnt_hi = pdata->tx_ring[0]->slots - 128; return 0; @@ -1166,6 +1218,32 @@ static int xgene_get_rx_delay(struct xgene_enet_pdata *pdata) return 0; } +static int xgene_enet_get_irqs(struct xgene_enet_pdata *pdata) +{ + struct platform_device *pdev = pdata->pdev; + struct device *dev = &pdev->dev; + int i, ret, max_irqs; + + if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) + max_irqs = 1; + else if (pdata->phy_mode == PHY_INTERFACE_MODE_SGMII) + max_irqs = 2; + else + max_irqs = XGENE_MAX_ENET_IRQ; + + for (i = 0; i < max_irqs; i++) { + ret = platform_get_irq(pdev, i); + if (ret <= 0) { + dev_err(dev, "Unable to get ENET IRQ\n"); + ret = ret ? : -ENXIO; + return ret; + } + pdata->irqs[i] = ret; + } + + return 0; +} + static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) { struct platform_device *pdev; @@ -1247,25 +1325,9 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) if (ret) return ret; - ret = platform_get_irq(pdev, 0); - if (ret <= 0) { - dev_err(dev, "Unable to get ENET Rx IRQ\n"); - ret = ret ? : -ENXIO; + ret = xgene_enet_get_irqs(pdata); + if (ret) return ret; - } - pdata->rx_irq = ret; - - if (pdata->phy_mode != PHY_INTERFACE_MODE_RGMII) { - ret = platform_get_irq(pdev, 1); - if (ret <= 0) { - pdata->cq_cnt = 0; - dev_info(dev, "Unable to get Tx completion IRQ," - "using Rx IRQ instead\n"); - } else { - pdata->cq_cnt = XGENE_MAX_TXC_RINGS; - pdata->txc_irq = ret; - } - } pdata->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(pdata->clk)) { @@ -1278,6 +1340,7 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) else base_addr = pdata->base_addr; pdata->eth_csr_addr = base_addr + BLOCK_ETH_CSR_OFFSET; + pdata->cle.base = base_addr + BLOCK_ETH_CLE_CSR_OFFSET; pdata->eth_ring_if_addr = base_addr + BLOCK_ETH_RING_IF_OFFSET; pdata->eth_diag_csr_addr = base_addr + BLOCK_ETH_DIAG_CSR_OFFSET; if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII || @@ -1298,10 +1361,11 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) static int xgene_enet_init_hw(struct xgene_enet_pdata *pdata) { + struct xgene_enet_cle *enet_cle = &pdata->cle; struct net_device *ndev = pdata->ndev; struct xgene_enet_desc_ring *buf_pool; u16 dst_ring_num; - int ret; + int i, ret; ret = pdata->port_ops->reset(pdata); if (ret) @@ -1314,16 +1378,36 @@ static int xgene_enet_init_hw(struct xgene_enet_pdata *pdata) } /* setup buffer pool */ - buf_pool = pdata->rx_ring->buf_pool; - xgene_enet_init_bufpool(buf_pool); - ret = xgene_enet_refill_bufpool(buf_pool, pdata->rx_buff_cnt); - if (ret) { - xgene_enet_delete_desc_rings(pdata); - return ret; + for (i = 0; i < pdata->rxq_cnt; i++) { + buf_pool = pdata->rx_ring[i]->buf_pool; + xgene_enet_init_bufpool(buf_pool); + ret = xgene_enet_refill_bufpool(buf_pool, pdata->rx_buff_cnt); + if (ret) { + xgene_enet_delete_desc_rings(pdata); + return ret; + } + } + + dst_ring_num = xgene_enet_dst_ring_num(pdata->rx_ring[0]); + buf_pool = pdata->rx_ring[0]->buf_pool; + if (pdata->phy_mode == PHY_INTERFACE_MODE_XGMII) { + /* Initialize and Enable PreClassifier Tree */ + enet_cle->max_nodes = 512; + enet_cle->max_dbptrs = 1024; + enet_cle->parsers = 3; + enet_cle->active_parser = PARSER_ALL; + enet_cle->ptree.start_node = 0; + enet_cle->ptree.start_dbptr = 0; + enet_cle->jump_bytes = 8; + ret = pdata->cle_ops->cle_init(pdata); + if (ret) { + netdev_err(ndev, "Preclass Tree init error\n"); + return ret; + } + } else { + pdata->port_ops->cle_bypass(pdata, dst_ring_num, buf_pool->id); } - dst_ring_num = xgene_enet_dst_ring_num(pdata->rx_ring); - pdata->port_ops->cle_bypass(pdata, dst_ring_num, buf_pool->id); pdata->mac_ops->init(pdata); return ret; @@ -1336,16 +1420,26 @@ static void xgene_enet_setup_ops(struct xgene_enet_pdata *pdata) pdata->mac_ops = &xgene_gmac_ops; pdata->port_ops = &xgene_gport_ops; pdata->rm = RM3; + pdata->rxq_cnt = 1; + pdata->txq_cnt = 1; + pdata->cq_cnt = 0; break; case PHY_INTERFACE_MODE_SGMII: pdata->mac_ops = &xgene_sgmac_ops; pdata->port_ops = &xgene_sgport_ops; pdata->rm = RM1; + pdata->rxq_cnt = 1; + pdata->txq_cnt = 1; + pdata->cq_cnt = 1; break; default: pdata->mac_ops = &xgene_xgmac_ops; pdata->port_ops = &xgene_xgport_ops; + pdata->cle_ops = &xgene_cle3in_ops; pdata->rm = RM0; + pdata->rxq_cnt = XGENE_NUM_RX_RING; + pdata->txq_cnt = XGENE_NUM_TX_RING; + pdata->cq_cnt = XGENE_NUM_TXC_RING; break; } @@ -1399,12 +1493,16 @@ static void xgene_enet_setup_ops(struct xgene_enet_pdata *pdata) static void xgene_enet_napi_add(struct xgene_enet_pdata *pdata) { struct napi_struct *napi; + int i; - napi = &pdata->rx_ring->napi; - netif_napi_add(pdata->ndev, napi, xgene_enet_napi, NAPI_POLL_WEIGHT); + for (i = 0; i < pdata->rxq_cnt; i++) { + napi = &pdata->rx_ring[i]->napi; + netif_napi_add(pdata->ndev, napi, xgene_enet_napi, + NAPI_POLL_WEIGHT); + } - if (pdata->cq_cnt) { - napi = &pdata->tx_ring->cp_ring->napi; + for (i = 0; i < pdata->cq_cnt; i++) { + napi = &pdata->tx_ring[i]->cp_ring->napi; netif_napi_add(pdata->ndev, napi, xgene_enet_napi, NAPI_POLL_WEIGHT); } @@ -1413,12 +1511,15 @@ static void xgene_enet_napi_add(struct xgene_enet_pdata *pdata) static void xgene_enet_napi_del(struct xgene_enet_pdata *pdata) { struct napi_struct *napi; + int i; - napi = &pdata->rx_ring->napi; - netif_napi_del(napi); + for (i = 0; i < pdata->rxq_cnt; i++) { + napi = &pdata->rx_ring[i]->napi; + netif_napi_del(napi); + } - if (pdata->cq_cnt) { - napi = &pdata->tx_ring->cp_ring->napi; + for (i = 0; i < pdata->cq_cnt; i++) { + napi = &pdata->tx_ring[i]->cp_ring->napi; netif_napi_del(napi); } } @@ -1432,7 +1533,8 @@ static int xgene_enet_probe(struct platform_device *pdev) const struct of_device_id *of_id; int ret; - ndev = alloc_etherdev(sizeof(struct xgene_enet_pdata)); + ndev = alloc_etherdev_mqs(sizeof(struct xgene_enet_pdata), + XGENE_NUM_RX_RING, XGENE_NUM_TX_RING); if (!ndev) return -ENOMEM; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h index 248dfc40a761..175d18890c7a 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h @@ -36,6 +36,7 @@ #include <linux/if_vlan.h> #include <linux/phy.h> #include "xgene_enet_hw.h" +#include "xgene_enet_cle.h" #include "xgene_enet_ring2.h" #define XGENE_DRV_VERSION "v1.0" @@ -48,6 +49,11 @@ #define XGENE_ENET_MSS 1448 #define XGENE_MIN_ENET_FRAME_SIZE 60 +#define XGENE_MAX_ENET_IRQ 8 +#define XGENE_NUM_RX_RING 4 +#define XGENE_NUM_TX_RING 4 +#define XGENE_NUM_TXC_RING 4 + #define START_CPU_BUFNUM_0 0 #define START_ETH_BUFNUM_0 2 #define START_BP_BUFNUM_0 0x22 @@ -72,7 +78,6 @@ #define X2_START_RING_NUM_1 256 #define IRQ_ID_SIZE 16 -#define XGENE_MAX_TXC_RINGS 1 #define PHY_POLL_LINK_ON (10 * HZ) #define PHY_POLL_LINK_OFF (PHY_POLL_LINK_ON / 5) @@ -102,6 +107,7 @@ struct xgene_enet_desc_ring { void *irq_mbox_addr; u16 dst_ring_num; u8 nbufpool; + u8 index; struct sk_buff *(*rx_skb); struct sk_buff *(*cp_skb); dma_addr_t *frag_dma_addr; @@ -143,6 +149,11 @@ struct xgene_ring_ops { void (*clear)(struct xgene_enet_desc_ring *); void (*wr_cmd)(struct xgene_enet_desc_ring *, int); u32 (*len)(struct xgene_enet_desc_ring *); + void (*coalesce)(struct xgene_enet_desc_ring *); +}; + +struct xgene_cle_ops { + int (*cle_init)(struct xgene_enet_pdata *pdata); }; /* ethernet private data */ @@ -154,15 +165,16 @@ struct xgene_enet_pdata { struct clk *clk; struct platform_device *pdev; enum xgene_enet_id enet_id; - struct xgene_enet_desc_ring *tx_ring; - struct xgene_enet_desc_ring *rx_ring; - u16 tx_level; - u16 txc_level; + struct xgene_enet_desc_ring *tx_ring[XGENE_NUM_TX_RING]; + struct xgene_enet_desc_ring *rx_ring[XGENE_NUM_RX_RING]; + u16 tx_level[XGENE_NUM_TX_RING]; + u16 txc_level[XGENE_NUM_TX_RING]; char *dev_name; u32 rx_buff_cnt; u32 tx_qcnt_hi; - u32 rx_irq; - u32 txc_irq; + u32 irqs[XGENE_MAX_ENET_IRQ]; + u8 rxq_cnt; + u8 txq_cnt; u8 cq_cnt; void __iomem *eth_csr_addr; void __iomem *eth_ring_if_addr; @@ -174,10 +186,12 @@ struct xgene_enet_pdata { void __iomem *ring_cmd_addr; int phy_mode; enum xgene_enet_rm rm; + struct xgene_enet_cle cle; struct rtnl_link_stats64 stats; const struct xgene_mac_ops *mac_ops; const struct xgene_port_ops *port_ops; struct xgene_ring_ops *ring_ops; + struct xgene_cle_ops *cle_ops; struct delayed_work link_work; u32 port_id; u8 cpu_bufnum; @@ -229,6 +243,13 @@ static inline struct device *ndev_to_dev(struct net_device *ndev) return ndev->dev.parent; } +static inline u16 xgene_enet_dst_ring_num(struct xgene_enet_desc_ring *ring) +{ + struct xgene_enet_pdata *pdata = netdev_priv(ring->ndev); + + return ((u16)pdata->rm << 10) | ring->num; +} + void xgene_enet_set_ethtool_ops(struct net_device *netdev); #endif /* __XGENE_ENET_MAIN_H__ */ diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c b/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c index 0b6896bb351e..2b76732add5d 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c @@ -190,6 +190,17 @@ static u32 xgene_enet_ring_len(struct xgene_enet_desc_ring *ring) return num_msgs; } +static void xgene_enet_setup_coalescing(struct xgene_enet_desc_ring *ring) +{ + u32 data = 0x7777; + + xgene_enet_ring_wr32(ring, CSR_PBM_COAL, 0x8e); + xgene_enet_ring_wr32(ring, CSR_PBM_CTICK1, data); + xgene_enet_ring_wr32(ring, CSR_PBM_CTICK2, data << 16); + xgene_enet_ring_wr32(ring, CSR_THRESHOLD0_SET1, 0x40); + xgene_enet_ring_wr32(ring, CSR_THRESHOLD1_SET1, 0x80); +} + struct xgene_ring_ops xgene_ring2_ops = { .num_ring_config = X2_NUM_RING_CONFIG, .num_ring_id_shift = 13, @@ -197,4 +208,5 @@ struct xgene_ring_ops xgene_ring2_ops = { .clear = xgene_enet_clear_ring, .wr_cmd = xgene_enet_wr_cmd, .len = xgene_enet_ring_len, + .coalesce = xgene_enet_setup_coalescing, }; diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 06f6cffdfaf5..230f8e6209e5 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -26,6 +26,17 @@ static const struct bcma_device_id bgmac_bcma_tbl[] = { }; MODULE_DEVICE_TABLE(bcma, bgmac_bcma_tbl); +static inline bool bgmac_is_bcm4707_family(struct bgmac *bgmac) +{ + switch (bgmac->core->bus->chipinfo.id) { + case BCMA_CHIP_ID_BCM4707: + case BCMA_CHIP_ID_BCM53018: + return true; + default: + return false; + } +} + static bool bgmac_wait_value(struct bcma_device *core, u16 reg, u32 mask, u32 value, int timeout) { @@ -987,11 +998,9 @@ static void bgmac_mac_speed(struct bgmac *bgmac) static void bgmac_miiconfig(struct bgmac *bgmac) { struct bcma_device *core = bgmac->core; - struct bcma_chipinfo *ci = &core->bus->chipinfo; u8 imode; - if (ci->id == BCMA_CHIP_ID_BCM4707 || - ci->id == BCMA_CHIP_ID_BCM53018) { + if (bgmac_is_bcm4707_family(bgmac)) { bcma_awrite32(core, BCMA_IOCTL, bcma_aread32(core, BCMA_IOCTL) | 0x40 | BGMAC_BCMA_IOCTL_SW_CLKEN); @@ -1055,9 +1064,7 @@ static void bgmac_chip_reset(struct bgmac *bgmac) } /* Request Misc PLL for corerev > 2 */ - if (core->id.rev > 2 && - ci->id != BCMA_CHIP_ID_BCM4707 && - ci->id != BCMA_CHIP_ID_BCM53018) { + if (core->id.rev > 2 && !bgmac_is_bcm4707_family(bgmac)) { bgmac_set(bgmac, BCMA_CLKCTLST, BGMAC_BCMA_CLKCTLST_MISC_PLL_REQ); bgmac_wait_value(bgmac->core, BCMA_CLKCTLST, @@ -1193,8 +1200,7 @@ static void bgmac_enable(struct bgmac *bgmac) break; } - if (ci->id != BCMA_CHIP_ID_BCM4707 && - ci->id != BCMA_CHIP_ID_BCM53018) { + if (!bgmac_is_bcm4707_family(bgmac)) { rxq_ctl = bgmac_read(bgmac, BGMAC_RXQ_CTL); rxq_ctl &= ~BGMAC_RXQ_CTL_MDP_MASK; bp_clk = bcma_pmu_get_bus_clock(&bgmac->core->bus->drv_cc) / @@ -1472,14 +1478,12 @@ static int bgmac_fixed_phy_register(struct bgmac *bgmac) static int bgmac_mii_register(struct bgmac *bgmac) { - struct bcma_chipinfo *ci = &bgmac->core->bus->chipinfo; struct mii_bus *mii_bus; struct phy_device *phy_dev; char bus_id[MII_BUS_ID_SIZE + 3]; int err = 0; - if (ci->id == BCMA_CHIP_ID_BCM4707 || - ci->id == BCMA_CHIP_ID_BCM53018) + if (bgmac_is_bcm4707_family(bgmac)) return bgmac_fixed_phy_register(bgmac); mii_bus = mdiobus_alloc(); @@ -1539,7 +1543,6 @@ static void bgmac_mii_unregister(struct bgmac *bgmac) /* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipattach */ static int bgmac_probe(struct bcma_device *core) { - struct bcma_chipinfo *ci = &core->bus->chipinfo; struct net_device *net_dev; struct bgmac *bgmac; struct ssb_sprom *sprom = &core->bus->sprom; @@ -1620,8 +1623,7 @@ static int bgmac_probe(struct bcma_device *core) bgmac_chip_reset(bgmac); /* For Northstar, we have to take all GMAC core out of reset */ - if (ci->id == BCMA_CHIP_ID_BCM4707 || - ci->id == BCMA_CHIP_ID_BCM53018) { + if (bgmac_is_bcm4707_family(bgmac)) { struct bcma_device *ns_core; int ns_gmac; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index cae0956186ce..7dd7490fdac1 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1277,8 +1277,7 @@ enum sp_rtnl_flag { BNX2X_SP_RTNL_HYPERVISOR_VLAN, BNX2X_SP_RTNL_TX_STOP, BNX2X_SP_RTNL_GET_DRV_VERSION, - BNX2X_SP_RTNL_ADD_VXLAN_PORT, - BNX2X_SP_RTNL_DEL_VXLAN_PORT, + BNX2X_SP_RTNL_CHANGE_UDP_PORT, }; enum bnx2x_iov_flag { @@ -1327,6 +1326,17 @@ struct bnx2x_vlan_entry { bool hw; }; +enum bnx2x_udp_port_type { + BNX2X_UDP_PORT_VXLAN, + BNX2X_UDP_PORT_GENEVE, + BNX2X_UDP_PORT_MAX, +}; + +struct bnx2x_udp_tunnel { + u16 dst_port; + u8 count; +}; + struct bnx2x { /* Fields used in the tx and intr/napi performance paths * are grouped together in the beginning of the structure @@ -1830,9 +1840,10 @@ struct bnx2x { struct list_head vlan_reg; u16 vlan_cnt; u16 vlan_credit; - u16 vxlan_dst_port; - u8 vxlan_dst_port_count; bool accept_any_vlan; + + /* Vxlan/Geneve related information */ + struct bnx2x_udp_tunnel udp_tunnel_ports[BNX2X_UDP_PORT_MAX]; }; /* Tx queues may be less or equal to Rx queues */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 9695a4c4a434..45843d150868 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -4272,6 +4272,14 @@ int bnx2x_setup_tc(struct net_device *dev, u8 num_tc) return 0; } +int __bnx2x_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +{ + if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO) + return -EINVAL; + return bnx2x_setup_tc(dev, tc->tc); +} + /* called with rtnl_lock */ int bnx2x_change_mac_addr(struct net_device *dev, void *p) { @@ -5086,4 +5094,3 @@ void bnx2x_schedule_sp_rtnl(struct bnx2x *bp, enum sp_rtnl_flag flag, flag); schedule_delayed_work(&bp->sp_rtnl_task, 0); } -EXPORT_SYMBOL(bnx2x_schedule_sp_rtnl); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index 4cbb03f87b5a..0e68fadecfdb 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -486,6 +486,8 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev); /* setup_tc callback */ int bnx2x_setup_tc(struct net_device *dev, u8 num_tc); +int __bnx2x_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc); int bnx2x_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi); @@ -923,6 +925,7 @@ static inline int bnx2x_func_start(struct bnx2x *bp) struct bnx2x_func_state_params func_params = {NULL}; struct bnx2x_func_start_params *start_params = &func_params.params.start; + u16 port; /* Prepare parameters for function state transitions */ __set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags); @@ -959,8 +962,14 @@ static inline int bnx2x_func_start(struct bnx2x *bp) start_params->network_cos_mode = STATIC_COS; else /* CHIP_IS_E1X */ start_params->network_cos_mode = FW_WRR; - - start_params->vxlan_dst_port = bp->vxlan_dst_port; + if (bp->udp_tunnel_ports[BNX2X_UDP_PORT_VXLAN].count) { + port = bp->udp_tunnel_ports[BNX2X_UDP_PORT_VXLAN].dst_port; + start_params->vxlan_dst_port = port; + } + if (bp->udp_tunnel_ports[BNX2X_UDP_PORT_GENEVE].count) { + port = bp->udp_tunnel_ports[BNX2X_UDP_PORT_GENEVE].dst_port; + start_params->geneve_dst_port = port; + } start_params->inner_rss = 1; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c index 7ccf6684e0a3..2c6ba046d2a8 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c @@ -195,6 +195,7 @@ static void bnx2x_dcbx_get_ap_feature(struct bnx2x *bp, u32 error) { u8 index; u32 *ttp = bp->dcbx_port_params.app.traffic_type_priority; + u8 iscsi_pri_found = 0, fcoe_pri_found = 0; if (GET_FLAGS(error, DCBX_LOCAL_APP_ERROR)) DP(BNX2X_MSG_DCB, "DCBX_LOCAL_APP_ERROR\n"); @@ -210,29 +211,57 @@ static void bnx2x_dcbx_get_ap_feature(struct bnx2x *bp, bp->dcbx_port_params.app.enabled = true; + /* Use 0 as the default application priority for all. */ for (index = 0 ; index < LLFC_DRIVER_TRAFFIC_TYPE_MAX; index++) ttp[index] = 0; - if (app->default_pri < MAX_PFC_PRIORITIES) - ttp[LLFC_TRAFFIC_TYPE_NW] = app->default_pri; - for (index = 0 ; index < DCBX_MAX_APP_PROTOCOL; index++) { struct dcbx_app_priority_entry *entry = app->app_pri_tbl; + enum traffic_type type = MAX_TRAFFIC_TYPE; if (GET_FLAGS(entry[index].appBitfield, - DCBX_APP_SF_ETH_TYPE) && - ETH_TYPE_FCOE == entry[index].app_id) - bnx2x_dcbx_get_ap_priority(bp, - entry[index].pri_bitmap, - LLFC_TRAFFIC_TYPE_FCOE); + DCBX_APP_SF_DEFAULT) && + GET_FLAGS(entry[index].appBitfield, + DCBX_APP_SF_ETH_TYPE)) { + type = LLFC_TRAFFIC_TYPE_NW; + } else if (GET_FLAGS(entry[index].appBitfield, + DCBX_APP_SF_PORT) && + TCP_PORT_ISCSI == entry[index].app_id) { + type = LLFC_TRAFFIC_TYPE_ISCSI; + iscsi_pri_found = 1; + } else if (GET_FLAGS(entry[index].appBitfield, + DCBX_APP_SF_ETH_TYPE) && + ETH_TYPE_FCOE == entry[index].app_id) { + type = LLFC_TRAFFIC_TYPE_FCOE; + fcoe_pri_found = 1; + } - if (GET_FLAGS(entry[index].appBitfield, - DCBX_APP_SF_PORT) && - TCP_PORT_ISCSI == entry[index].app_id) - bnx2x_dcbx_get_ap_priority(bp, - entry[index].pri_bitmap, - LLFC_TRAFFIC_TYPE_ISCSI); + if (type == MAX_TRAFFIC_TYPE) + continue; + + bnx2x_dcbx_get_ap_priority(bp, + entry[index].pri_bitmap, + type); + } + + /* If we have received a non-zero default application + * priority, then use that for applications which are + * not configured with any priority. + */ + if (ttp[LLFC_TRAFFIC_TYPE_NW] != 0) { + if (!iscsi_pri_found) { + ttp[LLFC_TRAFFIC_TYPE_ISCSI] = + ttp[LLFC_TRAFFIC_TYPE_NW]; + DP(BNX2X_MSG_DCB, + "ISCSI is using default priority.\n"); + } + if (!fcoe_pri_found) { + ttp[LLFC_TRAFFIC_TYPE_FCOE] = + ttp[LLFC_TRAFFIC_TYPE_NW]; + DP(BNX2X_MSG_DCB, + "FCoE is using default priority.\n"); + } } } else { DP(BNX2X_MSG_DCB, "DCBX_LOCAL_APP_DISABLED\n"); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index 820b7e04bb5f..85a7800bfc12 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -981,6 +981,11 @@ static void bnx2x_get_regs(struct net_device *dev, memcpy(p, &dump_hdr, sizeof(struct dump_header)); p += dump_hdr.header_size + 1; + /* This isn't really an error, but since attention handling is going + * to print the GRC timeouts using this macro, we use the same. + */ + BNX2X_ERR("Generating register dump. Might trigger harmless GRC timeouts\n"); + /* Actually read the registers */ __bnx2x_get_regs(bp, p); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h index 27aa0802d87d..04523d41b873 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h @@ -1824,17 +1824,22 @@ struct dcbx_app_priority_entry { u8 pri_bitmap; u8 appBitfield; #define DCBX_APP_ENTRY_VALID 0x01 - #define DCBX_APP_ENTRY_SF_MASK 0x30 + #define DCBX_APP_ENTRY_SF_MASK 0xF0 #define DCBX_APP_ENTRY_SF_SHIFT 4 #define DCBX_APP_SF_ETH_TYPE 0x10 #define DCBX_APP_SF_PORT 0x20 + #define DCBX_APP_SF_UDP 0x40 + #define DCBX_APP_SF_DEFAULT 0x80 #elif defined(__LITTLE_ENDIAN) u8 appBitfield; #define DCBX_APP_ENTRY_VALID 0x01 - #define DCBX_APP_ENTRY_SF_MASK 0x30 + #define DCBX_APP_ENTRY_SF_MASK 0xF0 #define DCBX_APP_ENTRY_SF_SHIFT 4 + #define DCBX_APP_ENTRY_VALID 0x01 #define DCBX_APP_SF_ETH_TYPE 0x10 #define DCBX_APP_SF_PORT 0x20 + #define DCBX_APP_SF_UDP 0x40 + #define DCBX_APP_SF_DEFAULT 0x80 u8 pri_bitmap; u16 app_id; #endif diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 6c4e3a69976f..81fc51c4ec2b 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -59,7 +59,9 @@ #include <linux/semaphore.h> #include <linux/stringify.h> #include <linux/vmalloc.h> - +#if IS_ENABLED(CONFIG_GENEVE) +#include <net/geneve.h> +#endif #include "bnx2x.h" #include "bnx2x_init.h" #include "bnx2x_init_ops.h" @@ -10076,11 +10078,13 @@ static void bnx2x_parity_recover(struct bnx2x *bp) } } -#ifdef CONFIG_BNX2X_VXLAN -static int bnx2x_vxlan_port_update(struct bnx2x *bp, u16 port) +#if defined(CONFIG_BNX2X_VXLAN) || IS_ENABLED(CONFIG_GENEVE) +static int bnx2x_udp_port_update(struct bnx2x *bp) { struct bnx2x_func_switch_update_params *switch_update_params; struct bnx2x_func_state_params func_params = {NULL}; + struct bnx2x_udp_tunnel *udp_tunnel; + u16 vxlan_port = 0, geneve_port = 0; int rc; switch_update_params = &func_params.params.switch_update; @@ -10095,69 +10099,125 @@ static int bnx2x_vxlan_port_update(struct bnx2x *bp, u16 port) /* Function parameters */ __set_bit(BNX2X_F_UPDATE_TUNNEL_CFG_CHNG, &switch_update_params->changes); - switch_update_params->vxlan_dst_port = port; + + if (bp->udp_tunnel_ports[BNX2X_UDP_PORT_GENEVE].count) { + udp_tunnel = &bp->udp_tunnel_ports[BNX2X_UDP_PORT_GENEVE]; + geneve_port = udp_tunnel->dst_port; + switch_update_params->geneve_dst_port = geneve_port; + } + + if (bp->udp_tunnel_ports[BNX2X_UDP_PORT_VXLAN].count) { + udp_tunnel = &bp->udp_tunnel_ports[BNX2X_UDP_PORT_VXLAN]; + vxlan_port = udp_tunnel->dst_port; + switch_update_params->vxlan_dst_port = vxlan_port; + } + + /* Re-enable inner-rss for the offloaded UDP tunnels */ + __set_bit(BNX2X_F_UPDATE_TUNNEL_INNER_RSS, + &switch_update_params->changes); + rc = bnx2x_func_state_change(bp, &func_params); if (rc) - BNX2X_ERR("failed to change vxlan dst port to %d (rc = 0x%x)\n", - port, rc); + BNX2X_ERR("failed to set UDP dst port to %04x %04x (rc = 0x%x)\n", + vxlan_port, geneve_port, rc); + else + DP(BNX2X_MSG_SP, + "Configured UDP ports: Vxlan [%04x] Geneve [%04x]\n", + vxlan_port, geneve_port); + return rc; } -static void __bnx2x_add_vxlan_port(struct bnx2x *bp, u16 port) +static void __bnx2x_add_udp_port(struct bnx2x *bp, u16 port, + enum bnx2x_udp_port_type type) { - if (!netif_running(bp->dev)) + struct bnx2x_udp_tunnel *udp_port = &bp->udp_tunnel_ports[type]; + + if (!netif_running(bp->dev) || !IS_PF(bp)) + return; + + if (udp_port->count && udp_port->dst_port == port) { + udp_port->count++; return; + } - if (bp->vxlan_dst_port_count && bp->vxlan_dst_port == port) { - bp->vxlan_dst_port_count++; + if (udp_port->count) { + DP(BNX2X_MSG_SP, + "UDP tunnel [%d] - destination port limit reached\n", + type); return; } - if (bp->vxlan_dst_port_count || !IS_PF(bp)) { - DP(BNX2X_MSG_SP, "Vxlan destination port limit reached\n"); + udp_port->dst_port = port; + udp_port->count = 1; + bnx2x_schedule_sp_rtnl(bp, BNX2X_SP_RTNL_CHANGE_UDP_PORT, 0); +} + +static void __bnx2x_del_udp_port(struct bnx2x *bp, u16 port, + enum bnx2x_udp_port_type type) +{ + struct bnx2x_udp_tunnel *udp_port = &bp->udp_tunnel_ports[type]; + + if (!IS_PF(bp)) + return; + + if (!udp_port->count || udp_port->dst_port != port) { + DP(BNX2X_MSG_SP, "Invalid UDP tunnel [%d] port\n", + type); return; } - bp->vxlan_dst_port = port; - bp->vxlan_dst_port_count = 1; - bnx2x_schedule_sp_rtnl(bp, BNX2X_SP_RTNL_ADD_VXLAN_PORT, 0); + /* Remove reference, and make certain it's no longer in use */ + udp_port->count--; + if (udp_port->count) + return; + udp_port->dst_port = 0; + + if (netif_running(bp->dev)) + bnx2x_schedule_sp_rtnl(bp, BNX2X_SP_RTNL_CHANGE_UDP_PORT, 0); + else + DP(BNX2X_MSG_SP, "Deleted UDP tunnel [%d] port %d\n", + type, port); } +#endif +#ifdef CONFIG_BNX2X_VXLAN static void bnx2x_add_vxlan_port(struct net_device *netdev, sa_family_t sa_family, __be16 port) { struct bnx2x *bp = netdev_priv(netdev); u16 t_port = ntohs(port); - __bnx2x_add_vxlan_port(bp, t_port); + __bnx2x_add_udp_port(bp, t_port, BNX2X_UDP_PORT_VXLAN); } -static void __bnx2x_del_vxlan_port(struct bnx2x *bp, u16 port) +static void bnx2x_del_vxlan_port(struct net_device *netdev, + sa_family_t sa_family, __be16 port) { - if (!bp->vxlan_dst_port_count || bp->vxlan_dst_port != port || - !IS_PF(bp)) { - DP(BNX2X_MSG_SP, "Invalid vxlan port\n"); - return; - } - bp->vxlan_dst_port_count--; - if (bp->vxlan_dst_port_count) - return; + struct bnx2x *bp = netdev_priv(netdev); + u16 t_port = ntohs(port); - if (netif_running(bp->dev)) { - bnx2x_schedule_sp_rtnl(bp, BNX2X_SP_RTNL_DEL_VXLAN_PORT, 0); - } else { - bp->vxlan_dst_port = 0; - netdev_info(bp->dev, "Deleted vxlan dest port %d", port); - } + __bnx2x_del_udp_port(bp, t_port, BNX2X_UDP_PORT_VXLAN); +} +#endif + +#if IS_ENABLED(CONFIG_GENEVE) +static void bnx2x_add_geneve_port(struct net_device *netdev, + sa_family_t sa_family, __be16 port) +{ + struct bnx2x *bp = netdev_priv(netdev); + u16 t_port = ntohs(port); + + __bnx2x_add_udp_port(bp, t_port, BNX2X_UDP_PORT_GENEVE); } -static void bnx2x_del_vxlan_port(struct net_device *netdev, - sa_family_t sa_family, __be16 port) +static void bnx2x_del_geneve_port(struct net_device *netdev, + sa_family_t sa_family, __be16 port) { struct bnx2x *bp = netdev_priv(netdev); u16 t_port = ntohs(port); - __bnx2x_del_vxlan_port(bp, t_port); + __bnx2x_del_udp_port(bp, t_port, BNX2X_UDP_PORT_GENEVE); } #endif @@ -10169,9 +10229,6 @@ static int bnx2x_close(struct net_device *dev); static void bnx2x_sp_rtnl_task(struct work_struct *work) { struct bnx2x *bp = container_of(work, struct bnx2x, sp_rtnl_task.work); -#ifdef CONFIG_BNX2X_VXLAN - u16 port; -#endif rtnl_lock(); @@ -10270,23 +10327,27 @@ sp_rtnl_not_reset: &bp->sp_rtnl_state)) bnx2x_update_mng_version(bp); -#ifdef CONFIG_BNX2X_VXLAN - port = bp->vxlan_dst_port; - if (test_and_clear_bit(BNX2X_SP_RTNL_ADD_VXLAN_PORT, - &bp->sp_rtnl_state)) { - if (!bnx2x_vxlan_port_update(bp, port)) - netdev_info(bp->dev, "Added vxlan dest port %d", port); - else - bp->vxlan_dst_port = 0; - } - - if (test_and_clear_bit(BNX2X_SP_RTNL_DEL_VXLAN_PORT, +#if defined(CONFIG_BNX2X_VXLAN) || IS_ENABLED(CONFIG_GENEVE) + if (test_and_clear_bit(BNX2X_SP_RTNL_CHANGE_UDP_PORT, &bp->sp_rtnl_state)) { - if (!bnx2x_vxlan_port_update(bp, 0)) { - netdev_info(bp->dev, - "Deleted vxlan dest port %d", port); - bp->vxlan_dst_port = 0; - vxlan_get_rx_port(bp->dev); + if (bnx2x_udp_port_update(bp)) { + /* On error, forget configuration */ + memset(bp->udp_tunnel_ports, 0, + sizeof(struct bnx2x_udp_tunnel) * + BNX2X_UDP_PORT_MAX); + } else { + /* Since we don't store additional port information, + * if no port is configured for any feature ask for + * information about currently configured ports. + */ +#ifdef CONFIG_BNX2X_VXLAN + if (!bp->udp_tunnel_ports[BNX2X_UDP_PORT_VXLAN].count) + vxlan_get_rx_port(bp->dev); +#endif +#if IS_ENABLED(CONFIG_GENEVE) + if (!bp->udp_tunnel_ports[BNX2X_UDP_PORT_GENEVE].count) + geneve_get_rx_port(bp->dev); +#endif } } #endif @@ -12368,8 +12429,10 @@ static int bnx2x_init_bp(struct bnx2x *bp) if (SHMEM2_HAS(bp, dcbx_lldp_params_offset) && SHMEM2_HAS(bp, dcbx_lldp_dcbx_stat_offset) && + SHMEM2_HAS(bp, dcbx_en) && SHMEM2_RD(bp, dcbx_lldp_params_offset) && - SHMEM2_RD(bp, dcbx_lldp_dcbx_stat_offset)) { + SHMEM2_RD(bp, dcbx_lldp_dcbx_stat_offset) && + SHMEM2_RD(bp, dcbx_en[BP_PORT(bp)])) { bnx2x_dcbx_set_state(bp, true, BNX2X_DCBX_ENABLED_ON_NEG_ON); bnx2x_dcbx_init_params(bp); } else { @@ -12494,6 +12557,10 @@ static int bnx2x_open(struct net_device *dev) if (IS_PF(bp)) vxlan_get_rx_port(dev); #endif +#if IS_ENABLED(CONFIG_GENEVE) + if (IS_PF(bp)) + geneve_get_rx_port(dev); +#endif return 0; } @@ -12994,7 +13061,7 @@ static const struct net_device_ops bnx2x_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = poll_bnx2x, #endif - .ndo_setup_tc = bnx2x_setup_tc, + .ndo_setup_tc = __bnx2x_setup_tc, #ifdef CONFIG_BNX2X_SRIOV .ndo_set_vf_mac = bnx2x_set_vf_mac, .ndo_set_vf_vlan = bnx2x_set_vf_vlan, @@ -13011,6 +13078,10 @@ static const struct net_device_ops bnx2x_netdev_ops = { .ndo_add_vxlan_port = bnx2x_add_vxlan_port, .ndo_del_vxlan_port = bnx2x_del_vxlan_port, #endif +#if IS_ENABLED(CONFIG_GENEVE) + .ndo_add_geneve_port = bnx2x_add_geneve_port, + .ndo_del_geneve_port = bnx2x_del_geneve_port, +#endif }; static int bnx2x_set_coherency_mask(struct bnx2x *bp) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 8ab000dd52d9..ff1507f3e226 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5377,9 +5377,16 @@ static int bnxt_change_mtu(struct net_device *dev, int new_mtu) return 0; } -static int bnxt_setup_tc(struct net_device *dev, u8 tc) +static int bnxt_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *ntc) { struct bnxt *bp = netdev_priv(dev); + u8 tc; + + if (handle != TC_H_ROOT || ntc->type != TC_SETUP_MQPRIO) + return -EINVAL; + + tc = ntc->tc; if (tc > bp->max_tc) { netdev_err(dev, "too many traffic classes requested: %d Max supported is %d\n", diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 50c94104f19c..7ccf2298a5fa 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -58,6 +58,9 @@ #define GEM_MTU_MIN_SIZE 68 +#define MACB_WOL_HAS_MAGIC_PACKET (0x1 << 0) +#define MACB_WOL_ENABLED (0x1 << 1) + /* * Graceful stop timeouts in us. We should allow up to * 1 frame time (10 Mbits/s, full-duplex, ignoring collisions) @@ -2124,6 +2127,39 @@ static void macb_get_regs(struct net_device *dev, struct ethtool_regs *regs, } } +static void macb_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) +{ + struct macb *bp = netdev_priv(netdev); + + wol->supported = 0; + wol->wolopts = 0; + + if (bp->wol & MACB_WOL_HAS_MAGIC_PACKET) { + wol->supported = WAKE_MAGIC; + + if (bp->wol & MACB_WOL_ENABLED) + wol->wolopts |= WAKE_MAGIC; + } +} + +static int macb_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) +{ + struct macb *bp = netdev_priv(netdev); + + if (!(bp->wol & MACB_WOL_HAS_MAGIC_PACKET) || + (wol->wolopts & ~WAKE_MAGIC)) + return -EOPNOTSUPP; + + if (wol->wolopts & WAKE_MAGIC) + bp->wol |= MACB_WOL_ENABLED; + else + bp->wol &= ~MACB_WOL_ENABLED; + + device_set_wakeup_enable(&bp->pdev->dev, bp->wol & MACB_WOL_ENABLED); + + return 0; +} + static const struct ethtool_ops macb_ethtool_ops = { .get_settings = macb_get_settings, .set_settings = macb_set_settings, @@ -2131,6 +2167,8 @@ static const struct ethtool_ops macb_ethtool_ops = { .get_regs = macb_get_regs, .get_link = ethtool_op_get_link, .get_ts_info = ethtool_op_get_ts_info, + .get_wol = macb_get_wol, + .set_wol = macb_set_wol, }; static const struct ethtool_ops gem_ethtool_ops = { @@ -2890,6 +2928,11 @@ static int macb_probe(struct platform_device *pdev) if (macb_config) bp->jumbo_max_len = macb_config->jumbo_max_len; + bp->wol = 0; + if (of_get_property(np, "magic-packet", NULL)) + bp->wol |= MACB_WOL_HAS_MAGIC_PACKET; + device_init_wakeup(&pdev->dev, bp->wol & MACB_WOL_HAS_MAGIC_PACKET); + spin_lock_init(&bp->lock); /* setup capabilities */ @@ -3006,9 +3049,15 @@ static int __maybe_unused macb_suspend(struct device *dev) netif_carrier_off(netdev); netif_device_detach(netdev); - clk_disable_unprepare(bp->tx_clk); - clk_disable_unprepare(bp->hclk); - clk_disable_unprepare(bp->pclk); + if (bp->wol & MACB_WOL_ENABLED) { + macb_writel(bp, IER, MACB_BIT(WOL)); + macb_writel(bp, WOL, MACB_BIT(MAG)); + enable_irq_wake(bp->queues[0].irq); + } else { + clk_disable_unprepare(bp->tx_clk); + clk_disable_unprepare(bp->hclk); + clk_disable_unprepare(bp->pclk); + } return 0; } @@ -3019,9 +3068,15 @@ static int __maybe_unused macb_resume(struct device *dev) struct net_device *netdev = platform_get_drvdata(pdev); struct macb *bp = netdev_priv(netdev); - clk_prepare_enable(bp->pclk); - clk_prepare_enable(bp->hclk); - clk_prepare_enable(bp->tx_clk); + if (bp->wol & MACB_WOL_ENABLED) { + macb_writel(bp, IDR, MACB_BIT(WOL)); + macb_writel(bp, WOL, 0); + disable_irq_wake(bp->queues[0].irq); + } else { + clk_prepare_enable(bp->pclk); + clk_prepare_enable(bp->hclk); + clk_prepare_enable(bp->tx_clk); + } netif_device_attach(netdev); diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 0d4ecfcd60b7..9ba416d5afff 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -312,6 +312,8 @@ #define MACB_PFR_SIZE 1 #define MACB_PTZ_OFFSET 13 /* Enable pause time zero interrupt */ #define MACB_PTZ_SIZE 1 +#define MACB_WOL_OFFSET 14 /* Enable wake-on-lan interrupt */ +#define MACB_WOL_SIZE 1 /* Bitfields in MAN */ #define MACB_DATA_OFFSET 0 /* data */ @@ -842,6 +844,8 @@ struct macb { unsigned int rx_frm_len_mask; unsigned int jumbo_max_len; + + u32 wol; }; static inline bool macb_is_gem(struct macb *bp) diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index 688828865c48..00cc9156abbb 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -248,10 +248,13 @@ struct nicvf_drv_stats { u64 rx_frames_jumbo; u64 rx_drops; + u64 rcv_buffer_alloc_failures; + /* Tx */ u64 tx_frames_ok; u64 tx_drops; u64 tx_tso; + u64 tx_timeout; u64 txq_stop; u64 txq_wake; }; @@ -306,6 +309,7 @@ struct nicvf { struct msix_entry msix_entries[NIC_VF_MSIX_VECTORS]; char irq_name[NIC_VF_MSIX_VECTORS][20]; bool irq_allocated[NIC_VF_MSIX_VECTORS]; + cpumask_var_t affinity_mask[NIC_VF_MSIX_VECTORS]; /* VF <-> PF mailbox communication */ bool pf_acked; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c index a12b2e38cf61..d2d8ef270142 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c @@ -89,9 +89,11 @@ static const struct nicvf_stat nicvf_drv_stats[] = { NICVF_DRV_STAT(rx_frames_1518), NICVF_DRV_STAT(rx_frames_jumbo), NICVF_DRV_STAT(rx_drops), + NICVF_DRV_STAT(rcv_buffer_alloc_failures), NICVF_DRV_STAT(tx_frames_ok), NICVF_DRV_STAT(tx_tso), NICVF_DRV_STAT(tx_drops), + NICVF_DRV_STAT(tx_timeout), NICVF_DRV_STAT(txq_stop), NICVF_DRV_STAT(txq_wake), }; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index a009bc30dc4d..bfee298fc02a 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -826,7 +826,7 @@ static irqreturn_t nicvf_intr_handler(int irq, void *cq_irq) nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx); /* Schedule NAPI */ - napi_schedule(&cq_poll->napi); + napi_schedule_irqoff(&cq_poll->napi); /* Clear interrupt */ nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx); @@ -897,6 +897,31 @@ static void nicvf_disable_msix(struct nicvf *nic) } } +static void nicvf_set_irq_affinity(struct nicvf *nic) +{ + int vec, cpu; + int irqnum; + + for (vec = 0; vec < nic->num_vec; vec++) { + if (!nic->irq_allocated[vec]) + continue; + + if (!zalloc_cpumask_var(&nic->affinity_mask[vec], GFP_KERNEL)) + return; + /* CQ interrupts */ + if (vec < NICVF_INTR_ID_SQ) + /* Leave CPU0 for RBDR and other interrupts */ + cpu = nicvf_netdev_qidx(nic, vec) + 1; + else + cpu = 0; + + cpumask_set_cpu(cpumask_local_spread(cpu, nic->node), + nic->affinity_mask[vec]); + irqnum = nic->msix_entries[vec].vector; + irq_set_affinity_hint(irqnum, nic->affinity_mask[vec]); + } +} + static int nicvf_register_interrupts(struct nicvf *nic) { int irq, ret = 0; @@ -942,8 +967,13 @@ static int nicvf_register_interrupts(struct nicvf *nic) ret = request_irq(nic->msix_entries[irq].vector, nicvf_qs_err_intr_handler, 0, nic->irq_name[irq], nic); - if (!ret) - nic->irq_allocated[irq] = true; + if (ret) + goto err; + + nic->irq_allocated[irq] = true; + + /* Set IRQ affinities */ + nicvf_set_irq_affinity(nic); err: if (ret) @@ -961,6 +991,9 @@ static void nicvf_unregister_interrupts(struct nicvf *nic) if (!nic->irq_allocated[irq]) continue; + irq_set_affinity_hint(nic->msix_entries[irq].vector, NULL); + free_cpumask_var(nic->affinity_mask[irq]); + if (irq < NICVF_INTR_ID_SQ) free_irq(nic->msix_entries[irq].vector, nic->napi[irq]); else @@ -1394,6 +1427,7 @@ static void nicvf_tx_timeout(struct net_device *dev) netdev_warn(dev, "%s: Transmit timed out, resetting\n", dev->name); + nic->drv_stats.tx_timeout++; schedule_work(&nic->reset_task); } diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index 767347b1f631..0dd1abf86079 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -78,7 +78,7 @@ static void nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem) static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp, u32 buf_len, u64 **rbuf) { - int order = get_order(buf_len); + int order = (PAGE_SIZE <= 4096) ? PAGE_ALLOC_COSTLY_ORDER : 0; /* Check if request can be accomodated in previous allocated page */ if (nic->rb_page) { @@ -96,8 +96,7 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp, nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, order); if (!nic->rb_page) { - netdev_err(nic->netdev, - "Failed to allocate new rcv buffer\n"); + nic->drv_stats.rcv_buffer_alloc_failures++; return -ENOMEM; } nic->rb_page_offset = 0; diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 9df26c2263bc..f8abdffdd851 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -886,7 +886,8 @@ static void bgx_get_qlm_mode(struct bgx *bgx) #ifdef CONFIG_ACPI -static int acpi_get_mac_address(struct acpi_device *adev, u8 *dst) +static int acpi_get_mac_address(struct device *dev, struct acpi_device *adev, + u8 *dst) { u8 mac[ETH_ALEN]; int ret; @@ -897,10 +898,13 @@ static int acpi_get_mac_address(struct acpi_device *adev, u8 *dst) goto out; if (!is_valid_ether_addr(mac)) { + dev_err(dev, "MAC address invalid: %pM\n", mac); ret = -EINVAL; goto out; } + dev_info(dev, "MAC address set to: %pM\n", mac); + memcpy(dst, mac, ETH_ALEN); out: return ret; @@ -911,14 +915,15 @@ static acpi_status bgx_acpi_register_phy(acpi_handle handle, u32 lvl, void *context, void **rv) { struct bgx *bgx = context; + struct device *dev = &bgx->pdev->dev; struct acpi_device *adev; if (acpi_bus_get_device(handle, &adev)) goto out; - acpi_get_mac_address(adev, bgx->lmac[bgx->lmac_count].mac); + acpi_get_mac_address(dev, adev, bgx->lmac[bgx->lmac_count].mac); - SET_NETDEV_DEV(&bgx->lmac[bgx->lmac_count].netdev, &bgx->pdev->dev); + SET_NETDEV_DEV(&bgx->lmac[bgx->lmac_count].netdev, dev); bgx->lmac[bgx->lmac_count].lmacid = bgx->lmac_count; out: @@ -968,26 +973,27 @@ static int bgx_init_acpi_phy(struct bgx *bgx) static int bgx_init_of_phy(struct bgx *bgx) { - struct device_node *np; - struct device_node *np_child; + struct fwnode_handle *fwn; u8 lmac = 0; - char bgx_sel[5]; const char *mac; - /* Get BGX node from DT */ - snprintf(bgx_sel, 5, "bgx%d", bgx->bgx_id); - np = of_find_node_by_name(NULL, bgx_sel); - if (!np) - return -ENODEV; + device_for_each_child_node(&bgx->pdev->dev, fwn) { + struct device_node *phy_np; + struct device_node *node = to_of_node(fwn); - for_each_child_of_node(np, np_child) { - struct device_node *phy_np = of_parse_phandle(np_child, - "phy-handle", 0); + /* If it is not an OF node we cannot handle it yet, so + * exit the loop. + */ + if (!node) + break; + + phy_np = of_parse_phandle(node, "phy-handle", 0); if (!phy_np) continue; + bgx->lmac[lmac].phydev = of_phy_find_device(phy_np); - mac = of_get_mac_address(np_child); + mac = of_get_mac_address(node); if (mac) ether_addr_copy(bgx->lmac[lmac].mac, mac); @@ -995,7 +1001,7 @@ static int bgx_init_of_phy(struct bgx *bgx) bgx->lmac[lmac].lmacid = lmac; lmac++; if (lmac == MAX_LMAC_PER_BGX) { - of_node_put(np_child); + of_node_put(node); break; } } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index ec6e849676c1..1dac6c6111bf 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -702,6 +702,11 @@ struct doorbell_stats { u32 db_full; }; +struct hash_mac_addr { + struct list_head list; + u8 addr[ETH_ALEN]; +}; + struct adapter { void __iomem *regs; void __iomem *bar2; @@ -740,6 +745,7 @@ struct adapter { void *uld_handle[CXGB4_ULD_MAX]; struct list_head list_node; struct list_head rcu_node; + struct list_head mac_hlist; /* list of MAC addresses in MPS Hash */ struct tid_info tids; void **tid_release_head; @@ -1207,6 +1213,24 @@ static inline int t4_wr_mbox_ns(struct adapter *adap, int mbox, const void *cmd, return t4_wr_mbox_meat(adap, mbox, cmd, size, rpl, false); } +/** + * hash_mac_addr - return the hash value of a MAC address + * @addr: the 48-bit Ethernet MAC address + * + * Hashes a MAC address according to the hash function used by HW inexact + * (hash) address matching. + */ +static inline int hash_mac_addr(const u8 *addr) +{ + u32 a = ((u32)addr[0] << 16) | ((u32)addr[1] << 8) | addr[2]; + u32 b = ((u32)addr[3] << 16) | ((u32)addr[4] << 8) | addr[5]; + + a ^= b; + a ^= (a >> 12); + a ^= (a >> 6); + return a & 0x3f; +} + void t4_write_indirect(struct adapter *adap, unsigned int addr_reg, unsigned int data_reg, const u32 *vals, unsigned int nregs, unsigned int start_idx); @@ -1389,6 +1413,9 @@ int t4_set_rxmode(struct adapter *adap, unsigned int mbox, unsigned int viid, int t4_alloc_mac_filt(struct adapter *adap, unsigned int mbox, unsigned int viid, bool free, unsigned int naddr, const u8 **addr, u16 *idx, u64 *hash, bool sleep_ok); +int t4_free_mac_filt(struct adapter *adap, unsigned int mbox, + unsigned int viid, unsigned int naddr, + const u8 **addr, bool sleep_ok); int t4_change_mac(struct adapter *adap, unsigned int mbox, unsigned int viid, int idx, const u8 *addr, bool persist, bool add_smt); int t4_set_addr_hash(struct adapter *adap, unsigned int mbox, unsigned int viid, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index b8a5fb0c32d4..adad73f7c8cd 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -338,84 +338,108 @@ void t4_os_portmod_changed(const struct adapter *adap, int port_id) netdev_info(dev, "%s module inserted\n", mod_str[pi->mod_type]); } +int dbfifo_int_thresh = 10; /* 10 == 640 entry threshold */ +module_param(dbfifo_int_thresh, int, 0644); +MODULE_PARM_DESC(dbfifo_int_thresh, "doorbell fifo interrupt threshold"); + /* - * Configure the exact and hash address filters to handle a port's multicast - * and secondary unicast MAC addresses. + * usecs to sleep while draining the dbfifo */ -static int set_addr_filters(const struct net_device *dev, bool sleep) +static int dbfifo_drain_delay = 1000; +module_param(dbfifo_drain_delay, int, 0644); +MODULE_PARM_DESC(dbfifo_drain_delay, + "usecs to sleep while draining the dbfifo"); + +static inline int cxgb4_set_addr_hash(struct port_info *pi) { + struct adapter *adap = pi->adapter; + u64 vec = 0; + bool ucast = false; + struct hash_mac_addr *entry; + + /* Calculate the hash vector for the updated list and program it */ + list_for_each_entry(entry, &adap->mac_hlist, list) { + ucast |= is_unicast_ether_addr(entry->addr); + vec |= (1ULL << hash_mac_addr(entry->addr)); + } + return t4_set_addr_hash(adap, adap->mbox, pi->viid, ucast, + vec, false); +} + +static int cxgb4_mac_sync(struct net_device *netdev, const u8 *mac_addr) +{ + struct port_info *pi = netdev_priv(netdev); + struct adapter *adap = pi->adapter; + int ret; u64 mhash = 0; u64 uhash = 0; - bool free = true; - u16 filt_idx[7]; - const u8 *addr[7]; - int ret, naddr = 0; - const struct netdev_hw_addr *ha; - int uc_cnt = netdev_uc_count(dev); - int mc_cnt = netdev_mc_count(dev); - const struct port_info *pi = netdev_priv(dev); - unsigned int mb = pi->adapter->pf; + bool free = false; + bool ucast = is_unicast_ether_addr(mac_addr); + const u8 *maclist[1] = {mac_addr}; + struct hash_mac_addr *new_entry; - /* first do the secondary unicast addresses */ - netdev_for_each_uc_addr(ha, dev) { - addr[naddr++] = ha->addr; - if (--uc_cnt == 0 || naddr >= ARRAY_SIZE(addr)) { - ret = t4_alloc_mac_filt(pi->adapter, mb, pi->viid, free, - naddr, addr, filt_idx, &uhash, sleep); - if (ret < 0) - return ret; - - free = false; - naddr = 0; - } + ret = t4_alloc_mac_filt(adap, adap->mbox, pi->viid, free, 1, maclist, + NULL, ucast ? &uhash : &mhash, false); + if (ret < 0) + goto out; + /* if hash != 0, then add the addr to hash addr list + * so on the end we will calculate the hash for the + * list and program it + */ + if (uhash || mhash) { + new_entry = kzalloc(sizeof(*new_entry), GFP_ATOMIC); + if (!new_entry) + return -ENOMEM; + ether_addr_copy(new_entry->addr, mac_addr); + list_add_tail(&new_entry->list, &adap->mac_hlist); + ret = cxgb4_set_addr_hash(pi); } +out: + return ret < 0 ? ret : 0; +} - /* next set up the multicast addresses */ - netdev_for_each_mc_addr(ha, dev) { - addr[naddr++] = ha->addr; - if (--mc_cnt == 0 || naddr >= ARRAY_SIZE(addr)) { - ret = t4_alloc_mac_filt(pi->adapter, mb, pi->viid, free, - naddr, addr, filt_idx, &mhash, sleep); - if (ret < 0) - return ret; +static int cxgb4_mac_unsync(struct net_device *netdev, const u8 *mac_addr) +{ + struct port_info *pi = netdev_priv(netdev); + struct adapter *adap = pi->adapter; + int ret; + const u8 *maclist[1] = {mac_addr}; + struct hash_mac_addr *entry, *tmp; - free = false; - naddr = 0; + /* If the MAC address to be removed is in the hash addr + * list, delete it from the list and update hash vector + */ + list_for_each_entry_safe(entry, tmp, &adap->mac_hlist, list) { + if (ether_addr_equal(entry->addr, mac_addr)) { + list_del(&entry->list); + kfree(entry); + return cxgb4_set_addr_hash(pi); } } - return t4_set_addr_hash(pi->adapter, mb, pi->viid, uhash != 0, - uhash | mhash, sleep); + ret = t4_free_mac_filt(adap, adap->mbox, pi->viid, 1, maclist, false); + return ret < 0 ? -EINVAL : 0; } -int dbfifo_int_thresh = 10; /* 10 == 640 entry threshold */ -module_param(dbfifo_int_thresh, int, 0644); -MODULE_PARM_DESC(dbfifo_int_thresh, "doorbell fifo interrupt threshold"); - -/* - * usecs to sleep while draining the dbfifo - */ -static int dbfifo_drain_delay = 1000; -module_param(dbfifo_drain_delay, int, 0644); -MODULE_PARM_DESC(dbfifo_drain_delay, - "usecs to sleep while draining the dbfifo"); - /* * Set Rx properties of a port, such as promiscruity, address filters, and MTU. * If @mtu is -1 it is left unchanged. */ static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok) { - int ret; struct port_info *pi = netdev_priv(dev); + struct adapter *adapter = pi->adapter; - ret = set_addr_filters(dev, sleep_ok); - if (ret == 0) - ret = t4_set_rxmode(pi->adapter, pi->adapter->pf, pi->viid, mtu, - (dev->flags & IFF_PROMISC) ? 1 : 0, - (dev->flags & IFF_ALLMULTI) ? 1 : 0, 1, -1, - sleep_ok); - return ret; + if (!(dev->flags & IFF_PROMISC)) { + __dev_uc_sync(dev, cxgb4_mac_sync, cxgb4_mac_unsync); + if (!(dev->flags & IFF_ALLMULTI)) + __dev_mc_sync(dev, cxgb4_mac_sync, cxgb4_mac_unsync); + } + + return t4_set_rxmode(adapter, adapter->mbox, pi->viid, mtu, + (dev->flags & IFF_PROMISC) ? 1 : 0, + (dev->flags & IFF_ALLMULTI) ? 1 : 0, 1, -1, + sleep_ok); } /** @@ -2677,6 +2701,8 @@ static int cxgb_up(struct adapter *adap) #if IS_ENABLED(CONFIG_IPV6) update_clip(adap); #endif + /* Initialize hash mac addr list*/ + INIT_LIST_HEAD(&adap->mac_hlist); out: return err; irq_err: diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 636b4691f252..cc1736bece0f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -4433,23 +4433,6 @@ void t4_intr_disable(struct adapter *adapter) } /** - * hash_mac_addr - return the hash value of a MAC address - * @addr: the 48-bit Ethernet MAC address - * - * Hashes a MAC address according to the hash function used by HW inexact - * (hash) address matching. - */ -static int hash_mac_addr(const u8 *addr) -{ - u32 a = ((u32)addr[0] << 16) | ((u32)addr[1] << 8) | addr[2]; - u32 b = ((u32)addr[3] << 16) | ((u32)addr[4] << 8) | addr[5]; - a ^= b; - a ^= (a >> 12); - a ^= (a >> 6); - return a & 0x3f; -} - -/** * t4_config_rss_range - configure a portion of the RSS mapping table * @adapter: the adapter * @mbox: mbox to use for the FW command @@ -6738,6 +6721,81 @@ int t4_alloc_mac_filt(struct adapter *adap, unsigned int mbox, } /** + * t4_free_mac_filt - frees exact-match filters of given MAC addresses + * @adap: the adapter + * @mbox: mailbox to use for the FW command + * @viid: the VI id + * @naddr: the number of MAC addresses to allocate filters for (up to 7) + * @addr: the MAC address(es) + * @sleep_ok: call is allowed to sleep + * + * Frees the exact-match filter for each of the supplied addresses + * + * Returns a negative error number or the number of filters freed. + */ +int t4_free_mac_filt(struct adapter *adap, unsigned int mbox, + unsigned int viid, unsigned int naddr, + const u8 **addr, bool sleep_ok) +{ + int offset, ret = 0; + struct fw_vi_mac_cmd c; + unsigned int nfilters = 0; + unsigned int max_naddr = is_t4(adap->params.chip) ? + NUM_MPS_CLS_SRAM_L_INSTANCES : + NUM_MPS_T5_CLS_SRAM_L_INSTANCES; + unsigned int rem = naddr; + + if (naddr > max_naddr) + return -EINVAL; + + for (offset = 0; offset < (int)naddr ; /**/) { + unsigned int fw_naddr = (rem < ARRAY_SIZE(c.u.exact) + ? rem + : ARRAY_SIZE(c.u.exact)); + size_t len16 = DIV_ROUND_UP(offsetof(struct fw_vi_mac_cmd, + u.exact[fw_naddr]), 16); + struct fw_vi_mac_exact *p; + int i; + + memset(&c, 0, sizeof(c)); + c.op_to_viid = cpu_to_be32(FW_CMD_OP_V(FW_VI_MAC_CMD) | + FW_CMD_REQUEST_F | + FW_CMD_WRITE_F | + FW_CMD_EXEC_V(0) | + FW_VI_MAC_CMD_VIID_V(viid)); + c.freemacs_to_len16 = + cpu_to_be32(FW_VI_MAC_CMD_FREEMACS_V(0) | + FW_CMD_LEN16_V(len16)); + + for (i = 0, p = c.u.exact; i < (int)fw_naddr; i++, p++) { + p->valid_to_idx = cpu_to_be16( + FW_VI_MAC_CMD_VALID_F | + FW_VI_MAC_CMD_IDX_V(FW_VI_MAC_MAC_BASED_FREE)); + memcpy(p->macaddr, addr[offset+i], sizeof(p->macaddr)); + } + + ret = t4_wr_mbox_meat(adap, mbox, &c, sizeof(c), &c, sleep_ok); + if (ret) + break; + + for (i = 0, p = c.u.exact; i < fw_naddr; i++, p++) { + u16 index = FW_VI_MAC_CMD_IDX_G( + be16_to_cpu(p->valid_to_idx)); + + if (index < max_naddr) + nfilters++; + } + + offset += fw_naddr; + rem -= fw_naddr; + } + + if (ret == 0) + ret = nfilters; + return ret; +} + +/** * t4_change_mac - modifies the exact-match filter for a MAC address * @adap: the adapter * @mbox: mailbox to use for the FW command diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index a072d341e205..1d2d1da40c80 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -1021,6 +1021,8 @@ struct cpl_l2t_write_req { #define L2T_W_NOREPLY_V(x) ((x) << L2T_W_NOREPLY_S) #define L2T_W_NOREPLY_F L2T_W_NOREPLY_V(1U) +#define CPL_L2T_VLAN_NONE 0xfff + struct cpl_l2t_write_rpl { union opcode_tid ot; u8 status; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index a32de30ea663..c8661c77b4e3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -561,6 +561,7 @@ enum fw_flowc_mnem { FW_FLOWC_MNEM_SNDBUF, FW_FLOWC_MNEM_MSS, FW_FLOWC_MNEM_TXDATAPLEN_MAX, + FW_FLOWC_MNEM_SCHEDCLASS = 11, }; struct fw_flowc_mnemval { diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h index 6049f70e110c..4a707c32d76f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h +++ b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h @@ -348,6 +348,11 @@ struct sge { #define for_each_ethrxq(sge, iter) \ for (iter = 0; iter < (sge)->ethqsets; iter++) +struct hash_mac_addr { + struct list_head list; + u8 addr[ETH_ALEN]; +}; + /* * Per-"adapter" (Virtual Function) information. */ @@ -381,6 +386,9 @@ struct adapter { /* various locks */ spinlock_t stats_lock; + + /* list of MAC addresses in MPS Hash */ + struct list_head mac_hlist; }; enum { /* adapter flags */ diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 0cfa5d72cafd..8337514ababb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -741,6 +741,9 @@ static int adapter_up(struct adapter *adapter) */ enable_rx(adapter); t4vf_sge_start(adapter); + + /* Initialize hash mac addr list*/ + INIT_LIST_HEAD(&adapter->mac_hlist); return 0; } @@ -905,51 +908,74 @@ static inline unsigned int collect_netdev_mc_list_addrs(const struct net_device return naddr; } -/* - * Configure the exact and hash address filters to handle a port's multicast - * and secondary unicast MAC addresses. - */ -static int set_addr_filters(const struct net_device *dev, bool sleep) +static inline int cxgb4vf_set_addr_hash(struct port_info *pi) { - u64 mhash = 0; - u64 uhash = 0; - bool free = true; - unsigned int offset, naddr; - const u8 *addr[7]; - int ret; - const struct port_info *pi = netdev_priv(dev); + struct adapter *adapter = pi->adapter; + u64 vec = 0; + bool ucast = false; + struct hash_mac_addr *entry; - /* first do the secondary unicast addresses */ - for (offset = 0; ; offset += naddr) { - naddr = collect_netdev_uc_list_addrs(dev, addr, offset, - ARRAY_SIZE(addr)); - if (naddr == 0) - break; + /* Calculate the hash vector for the updated list and program it */ + list_for_each_entry(entry, &adapter->mac_hlist, list) { + ucast |= is_unicast_ether_addr(entry->addr); + vec |= (1ULL << hash_mac_addr(entry->addr)); + } + return t4vf_set_addr_hash(adapter, pi->viid, ucast, vec, false); +} - ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free, - naddr, addr, NULL, &uhash, sleep); - if (ret < 0) - return ret; +static int cxgb4vf_mac_sync(struct net_device *netdev, const u8 *mac_addr) +{ + struct port_info *pi = netdev_priv(netdev); + struct adapter *adapter = pi->adapter; + int ret; + u64 mhash = 0; + u64 uhash = 0; + bool free = false; + bool ucast = is_unicast_ether_addr(mac_addr); + const u8 *maclist[1] = {mac_addr}; + struct hash_mac_addr *new_entry; - free = false; + ret = t4vf_alloc_mac_filt(adapter, pi->viid, free, 1, maclist, + NULL, ucast ? &uhash : &mhash, false); + if (ret < 0) + goto out; + /* if hash != 0, then add the addr to hash addr list + * so on the end we will calculate the hash for the + * list and program it + */ + if (uhash || mhash) { + new_entry = kzalloc(sizeof(*new_entry), GFP_ATOMIC); + if (!new_entry) + return -ENOMEM; + ether_addr_copy(new_entry->addr, mac_addr); + list_add_tail(&new_entry->list, &adapter->mac_hlist); + ret = cxgb4vf_set_addr_hash(pi); } +out: + return ret < 0 ? ret : 0; +} - /* next set up the multicast addresses */ - for (offset = 0; ; offset += naddr) { - naddr = collect_netdev_mc_list_addrs(dev, addr, offset, - ARRAY_SIZE(addr)); - if (naddr == 0) - break; +static int cxgb4vf_mac_unsync(struct net_device *netdev, const u8 *mac_addr) +{ + struct port_info *pi = netdev_priv(netdev); + struct adapter *adapter = pi->adapter; + int ret; + const u8 *maclist[1] = {mac_addr}; + struct hash_mac_addr *entry, *tmp; - ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free, - naddr, addr, NULL, &mhash, sleep); - if (ret < 0) - return ret; - free = false; + /* If the MAC address to be removed is in the hash addr + * list, delete it from the list and update hash vector + */ + list_for_each_entry_safe(entry, tmp, &adapter->mac_hlist, list) { + if (ether_addr_equal(entry->addr, mac_addr)) { + list_del(&entry->list); + kfree(entry); + return cxgb4vf_set_addr_hash(pi); + } } - return t4vf_set_addr_hash(pi->adapter, pi->viid, uhash != 0, - uhash | mhash, sleep); + ret = t4vf_free_mac_filt(adapter, pi->viid, 1, maclist, false); + return ret < 0 ? -EINVAL : 0; } /* @@ -958,16 +984,18 @@ static int set_addr_filters(const struct net_device *dev, bool sleep) */ static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok) { - int ret; struct port_info *pi = netdev_priv(dev); - ret = set_addr_filters(dev, sleep_ok); - if (ret == 0) - ret = t4vf_set_rxmode(pi->adapter, pi->viid, -1, - (dev->flags & IFF_PROMISC) != 0, - (dev->flags & IFF_ALLMULTI) != 0, - 1, -1, sleep_ok); - return ret; + if (!(dev->flags & IFF_PROMISC)) { + __dev_uc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync); + if (!(dev->flags & IFF_ALLMULTI)) + __dev_mc_sync(dev, cxgb4vf_mac_sync, + cxgb4vf_mac_unsync); + } + return t4vf_set_rxmode(pi->adapter, pi->viid, -1, + (dev->flags & IFF_PROMISC) != 0, + (dev->flags & IFF_ALLMULTI) != 0, + 1, -1, sleep_ok); } /* diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h index 88b8981b4751..6ce302fe1a61 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h @@ -285,6 +285,24 @@ static inline int is_t4(enum chip_type chip) return CHELSIO_CHIP_VERSION(chip) == CHELSIO_T4; } +/** + * hash_mac_addr - return the hash value of a MAC address + * @addr: the 48-bit Ethernet MAC address + * + * Hashes a MAC address according to the hash function used by hardware + * inexact (hash) address matching. + */ +static inline int hash_mac_addr(const u8 *addr) +{ + u32 a = ((u32)addr[0] << 16) | ((u32)addr[1] << 8) | addr[2]; + u32 b = ((u32)addr[3] << 16) | ((u32)addr[4] << 8) | addr[5]; + + a ^= b; + a ^= (a >> 12); + a ^= (a >> 6); + return a & 0x3f; +} + int t4vf_wait_dev_ready(struct adapter *); int t4vf_port_init(struct adapter *, int); @@ -320,6 +338,8 @@ int t4vf_set_rxmode(struct adapter *, unsigned int, int, int, int, int, int, bool); int t4vf_alloc_mac_filt(struct adapter *, unsigned int, bool, unsigned int, const u8 **, u16 *, u64 *, bool); +int t4vf_free_mac_filt(struct adapter *, unsigned int, unsigned int naddr, + const u8 **, bool); int t4vf_change_mac(struct adapter *, unsigned int, int, const u8 *, bool); int t4vf_set_addr_hash(struct adapter *, unsigned int, bool, u64, bool); int t4vf_get_port_stats(struct adapter *, int, struct t4vf_port_stats *); diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c index b6fa74aafe47..54220117dcba 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c @@ -236,23 +236,6 @@ int t4vf_wr_mbox_core(struct adapter *adapter, const void *cmd, int size, return -ETIMEDOUT; } -/** - * hash_mac_addr - return the hash value of a MAC address - * @addr: the 48-bit Ethernet MAC address - * - * Hashes a MAC address according to the hash function used by hardware - * inexact (hash) address matching. - */ -static int hash_mac_addr(const u8 *addr) -{ - u32 a = ((u32)addr[0] << 16) | ((u32)addr[1] << 8) | addr[2]; - u32 b = ((u32)addr[3] << 16) | ((u32)addr[4] << 8) | addr[5]; - a ^= b; - a ^= (a >> 12); - a ^= (a >> 6); - return a & 0x3f; -} - #define ADVERT_MASK (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\ FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_40G | \ FW_PORT_CAP_SPEED_100G | FW_PORT_CAP_ANEG) @@ -1266,6 +1249,77 @@ int t4vf_alloc_mac_filt(struct adapter *adapter, unsigned int viid, bool free, } /** + * t4vf_free_mac_filt - frees exact-match filters of given MAC addresses + * @adapter: the adapter + * @viid: the VI id + * @naddr: the number of MAC addresses to allocate filters for (up to 7) + * @addr: the MAC address(es) + * @sleep_ok: call is allowed to sleep + * + * Frees the exact-match filter for each of the supplied addresses + * + * Returns a negative error number or the number of filters freed. + */ +int t4vf_free_mac_filt(struct adapter *adapter, unsigned int viid, + unsigned int naddr, const u8 **addr, bool sleep_ok) +{ + int offset, ret = 0; + struct fw_vi_mac_cmd cmd; + unsigned int nfilters = 0; + unsigned int max_naddr = adapter->params.arch.mps_tcam_size; + unsigned int rem = naddr; + + if (naddr > max_naddr) + return -EINVAL; + + for (offset = 0; offset < (int)naddr ; /**/) { + unsigned int fw_naddr = (rem < ARRAY_SIZE(cmd.u.exact) ? + rem : ARRAY_SIZE(cmd.u.exact)); + size_t len16 = DIV_ROUND_UP(offsetof(struct fw_vi_mac_cmd, + u.exact[fw_naddr]), 16); + struct fw_vi_mac_exact *p; + int i; + + memset(&cmd, 0, sizeof(cmd)); + cmd.op_to_viid = cpu_to_be32(FW_CMD_OP_V(FW_VI_MAC_CMD) | + FW_CMD_REQUEST_F | + FW_CMD_WRITE_F | + FW_CMD_EXEC_V(0) | + FW_VI_MAC_CMD_VIID_V(viid)); + cmd.freemacs_to_len16 = + cpu_to_be32(FW_VI_MAC_CMD_FREEMACS_V(0) | + FW_CMD_LEN16_V(len16)); + + for (i = 0, p = cmd.u.exact; i < (int)fw_naddr; i++, p++) { + p->valid_to_idx = cpu_to_be16( + FW_VI_MAC_CMD_VALID_F | + FW_VI_MAC_CMD_IDX_V(FW_VI_MAC_MAC_BASED_FREE)); + memcpy(p->macaddr, addr[offset+i], sizeof(p->macaddr)); + } + + ret = t4vf_wr_mbox_core(adapter, &cmd, sizeof(cmd), &cmd, + sleep_ok); + if (ret) + break; + + for (i = 0, p = cmd.u.exact; i < fw_naddr; i++, p++) { + u16 index = FW_VI_MAC_CMD_IDX_G( + be16_to_cpu(p->valid_to_idx)); + + if (index < max_naddr) + nfilters++; + } + + offset += fw_naddr; + rem -= fw_naddr; + } + + if (ret == 0) + ret = nfilters; + return ret; +} + +/** * t4vf_change_mac - modifies the exact-match filter for a MAC address * @adapter: the adapter * @viid: the Virtual Interface ID diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index cf837831304b..ab24f84060c6 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -89,6 +89,10 @@ #define BE3_MAX_TX_QS 16 #define BE3_MAX_EVT_QS 16 #define BE3_SRIOV_MAX_EVT_QS 8 +#define SH_VF_MAX_NIC_EQS 3 /* Skyhawk VFs can have a max of 4 EQs + * and at least 1 is granted to either + * SURF/DPDK + */ #define MAX_RSS_IFACES 15 #define MAX_RX_QS 32 @@ -111,6 +115,8 @@ #define RSS_INDIR_TABLE_LEN 128 #define RSS_HASH_KEY_LEN 40 +#define BE_UNKNOWN_PHY_STATE 0xFF + struct be_dma_mem { void *va; dma_addr_t dma; @@ -386,13 +392,17 @@ enum vf_state { #define BE_FLAGS_QNQ_ASYNC_EVT_RCVD BIT(7) #define BE_FLAGS_VXLAN_OFFLOADS BIT(8) #define BE_FLAGS_SETUP_DONE BIT(9) -#define BE_FLAGS_EVT_INCOMPATIBLE_SFP BIT(10) +#define BE_FLAGS_PHY_MISCONFIGURED BIT(10) #define BE_FLAGS_ERR_DETECTION_SCHEDULED BIT(11) #define BE_FLAGS_OS2BMC BIT(12) #define BE_UC_PMAC_COUNT 30 #define BE_VF_UC_PMAC_COUNT 2 +#define MAX_ERR_RECOVERY_RETRY_COUNT 3 +#define ERR_DETECTION_DELAY 1000 +#define ERR_RECOVERY_RETRY_DELAY 30000 + /* Ethtool set_dump flags */ #define LANCER_INITIATE_FW_DUMP 0x1 #define LANCER_DELETE_FW_DUMP 0x2 @@ -530,6 +540,7 @@ struct be_adapter { u16 work_counter; struct delayed_work be_err_detection_work; + u8 recovery_retries; u8 err_flags; u32 flags; u32 cmd_privileges; @@ -594,6 +605,7 @@ struct be_adapter { u32 bmc_filt_mask; u32 fat_dump_len; u16 serial_num[CNTL_SERIAL_NUM_WORDS]; + u8 phy_state; /* state of sfp optics (functional, faulted, etc.,) */ }; #define be_physfn(adapter) (!adapter->virtfn) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index b63d8ad2e115..66fa21426fe2 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -19,19 +19,25 @@ #include "be.h" #include "be_cmds.h" -static char *be_port_misconfig_evt_desc[] = { - "A valid SFP module detected", - "Optics faulted/ incorrectly installed/ not installed.", - "Optics of two types installed.", - "Incompatible optics.", - "Unknown port SFP status" +char *be_misconfig_evt_port_state[] = { + "Physical Link is functional", + "Optics faulted/incorrectly installed/not installed - Reseat optics. If issue not resolved, replace.", + "Optics of two types installed – Remove one optic or install matching pair of optics.", + "Incompatible optics – Replace with compatible optics for card to function.", + "Unqualified optics – Replace with Avago optics for Warranty and Technical Support.", + "Uncertified optics – Replace with Avago-certified optics to enable link operation." }; -static char *be_port_misconfig_remedy_desc[] = { - "", - "Reseat optics. If issue not resolved, replace", - "Remove one optic or install matching pair of optics", - "Replace with compatible optics for card to function", +static char *be_port_misconfig_evt_severity[] = { + "KERN_WARN", + "KERN_INFO", + "KERN_ERR", + "KERN_WARN" +}; + +static char *phy_state_oper_desc[] = { + "Link is non-operational", + "Link is operational", "" }; @@ -65,7 +71,22 @@ static struct be_cmd_priv_map cmd_priv_map[] = { CMD_SUBSYSTEM_COMMON, BE_PRIV_LNKMGMT | BE_PRIV_VHADM | BE_PRIV_DEVCFG | BE_PRIV_DEVSEC - } + }, + { + OPCODE_LOWLEVEL_HOST_DDR_DMA, + CMD_SUBSYSTEM_LOWLEVEL, + BE_PRIV_DEVCFG | BE_PRIV_DEVSEC + }, + { + OPCODE_LOWLEVEL_LOOPBACK_TEST, + CMD_SUBSYSTEM_LOWLEVEL, + BE_PRIV_DEVCFG | BE_PRIV_DEVSEC + }, + { + OPCODE_LOWLEVEL_SET_LOOPBACK_MODE, + CMD_SUBSYSTEM_LOWLEVEL, + BE_PRIV_DEVCFG | BE_PRIV_DEVSEC + }, }; static bool be_cmd_allowed(struct be_adapter *adapter, u8 opcode, u8 subsystem) @@ -236,7 +257,8 @@ static int be_mcc_compl_process(struct be_adapter *adapter, if (base_status != MCC_STATUS_SUCCESS && !be_skip_err_log(opcode, base_status, addl_status)) { - if (base_status == MCC_STATUS_UNAUTHORIZED_REQUEST) { + if (base_status == MCC_STATUS_UNAUTHORIZED_REQUEST || + addl_status == MCC_ADDL_STATUS_INSUFFICIENT_PRIVILEGES) { dev_warn(&adapter->pdev->dev, "VF is not privileged to issue opcode %d-%d\n", opcode, subsystem); @@ -281,22 +303,56 @@ static void be_async_port_misconfig_event_process(struct be_adapter *adapter, { struct be_async_event_misconfig_port *evt = (struct be_async_event_misconfig_port *)compl; - u32 sfp_mismatch_evt = le32_to_cpu(evt->event_data_word1); + u32 sfp_misconfig_evt_word1 = le32_to_cpu(evt->event_data_word1); + u32 sfp_misconfig_evt_word2 = le32_to_cpu(evt->event_data_word2); + u8 phy_oper_state = PHY_STATE_OPER_MSG_NONE; struct device *dev = &adapter->pdev->dev; - u8 port_misconfig_evt; + u8 msg_severity = DEFAULT_MSG_SEVERITY; + u8 phy_state_info; + u8 new_phy_state; + + new_phy_state = + (sfp_misconfig_evt_word1 >> (adapter->hba_port_num * 8)) & 0xff; + + if (new_phy_state == adapter->phy_state) + return; + + adapter->phy_state = new_phy_state; + + /* for older fw that doesn't populate link effect data */ + if (!sfp_misconfig_evt_word2) + goto log_message; - port_misconfig_evt = - ((sfp_mismatch_evt >> (adapter->hba_port_num * 8)) & 0xff); + phy_state_info = + (sfp_misconfig_evt_word2 >> (adapter->hba_port_num * 8)) & 0xff; + if (phy_state_info & PHY_STATE_INFO_VALID) { + msg_severity = (phy_state_info & PHY_STATE_MSG_SEVERITY) >> 1; + + if (be_phy_unqualified(new_phy_state)) + phy_oper_state = (phy_state_info & PHY_STATE_OPER); + } + +log_message: /* Log an error message that would allow a user to determine * whether the SFPs have an issue */ - dev_info(dev, "Port %c: %s %s", adapter->port_name, - be_port_misconfig_evt_desc[port_misconfig_evt], - be_port_misconfig_remedy_desc[port_misconfig_evt]); + if (be_phy_state_unknown(new_phy_state)) + dev_printk(be_port_misconfig_evt_severity[msg_severity], dev, + "Port %c: Unrecognized Optics state: 0x%x. %s", + adapter->port_name, + new_phy_state, + phy_state_oper_desc[phy_oper_state]); + else + dev_printk(be_port_misconfig_evt_severity[msg_severity], dev, + "Port %c: %s %s", + adapter->port_name, + be_misconfig_evt_port_state[new_phy_state], + phy_state_oper_desc[phy_oper_state]); - if (port_misconfig_evt == INCOMPATIBLE_SFP) - adapter->flags |= BE_FLAGS_EVT_INCOMPATIBLE_SFP; + /* Log Vendor name and part no. if a misconfigured SFP is detected */ + if (be_phy_misconfigured(new_phy_state)) + adapter->flags |= BE_FLAGS_PHY_MISCONFIGURED; } /* Grp5 CoS Priority evt */ @@ -1497,34 +1553,25 @@ int be_cmd_if_create(struct be_adapter *adapter, u32 cap_flags, u32 en_flags, return status; } -/* Uses MCCQ */ +/* Uses MCCQ if available else MBOX */ int be_cmd_if_destroy(struct be_adapter *adapter, int interface_id, u32 domain) { - struct be_mcc_wrb *wrb; + struct be_mcc_wrb wrb = {0}; struct be_cmd_req_if_destroy *req; int status; if (interface_id == -1) return 0; - spin_lock_bh(&adapter->mcc_lock); - - wrb = wrb_from_mccq(adapter); - if (!wrb) { - status = -EBUSY; - goto err; - } - req = embedded_payload(wrb); + req = embedded_payload(&wrb); be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON, OPCODE_COMMON_NTWK_INTERFACE_DESTROY, - sizeof(*req), wrb, NULL); + sizeof(*req), &wrb, NULL); req->hdr.domain = domain; req->interface_id = cpu_to_le32(interface_id); - status = be_mcc_notify_wait(adapter); -err: - spin_unlock_bh(&adapter->mcc_lock); + status = be_cmd_notify_wait(adapter, &wrb); return status; } @@ -3168,6 +3215,10 @@ int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num, struct be_cmd_req_set_lmode *req; int status; + if (!be_cmd_allowed(adapter, OPCODE_LOWLEVEL_SET_LOOPBACK_MODE, + CMD_SUBSYSTEM_LOWLEVEL)) + return -EPERM; + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); @@ -3213,6 +3264,10 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num, struct be_cmd_resp_loopback_test *resp; int status; + if (!be_cmd_allowed(adapter, OPCODE_LOWLEVEL_LOOPBACK_TEST, + CMD_SUBSYSTEM_LOWLEVEL)) + return -EPERM; + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); @@ -3259,6 +3314,10 @@ int be_cmd_ddr_dma_test(struct be_adapter *adapter, u64 pattern, int status; int i, j = 0; + if (!be_cmd_allowed(adapter, OPCODE_LOWLEVEL_HOST_DDR_DMA, + CMD_SUBSYSTEM_LOWLEVEL)) + return -EPERM; + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index 241819b36ca7..0b9f741f31af 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -68,7 +68,8 @@ enum mcc_addl_status { MCC_ADDL_STATUS_TOO_MANY_INTERFACES = 0x4a, MCC_ADDL_STATUS_INSUFFICIENT_VLANS = 0xab, MCC_ADDL_STATUS_INVALID_SIGNATURE = 0x56, - MCC_ADDL_STATUS_MISSING_SIGNATURE = 0x57 + MCC_ADDL_STATUS_MISSING_SIGNATURE = 0x57, + MCC_ADDL_STATUS_INSUFFICIENT_PRIVILEGES = 0x60 }; #define CQE_BASE_STATUS_MASK 0xFFFF @@ -175,10 +176,53 @@ struct be_async_event_qnq { u32 flags; } __packed; -#define INCOMPATIBLE_SFP 0x3 +enum { + BE_PHY_FUNCTIONAL = 0, + BE_PHY_NOT_PRESENT = 1, + BE_PHY_DIFF_MEDIA = 2, + BE_PHY_INCOMPATIBLE = 3, + BE_PHY_UNQUALIFIED = 4, + BE_PHY_UNCERTIFIED = 5 +}; + +#define PHY_STATE_MSG_SEVERITY 0x6 +#define PHY_STATE_OPER 0x1 +#define PHY_STATE_INFO_VALID 0x80 +#define PHY_STATE_OPER_MSG_NONE 0x2 +#define DEFAULT_MSG_SEVERITY 0x1 + +#define be_phy_state_unknown(phy_state) (phy_state > BE_PHY_UNCERTIFIED) +#define be_phy_unqualified(phy_state) \ + (phy_state == BE_PHY_UNQUALIFIED || \ + phy_state == BE_PHY_UNCERTIFIED) +#define be_phy_misconfigured(phy_state) \ + (phy_state == BE_PHY_INCOMPATIBLE || \ + phy_state == BE_PHY_UNQUALIFIED || \ + phy_state == BE_PHY_UNCERTIFIED) + +extern char *be_misconfig_evt_port_state[]; + /* async event indicating misconfigured port */ struct be_async_event_misconfig_port { + /* DATA_WORD1: + * phy state of port 0: bits 7 - 0 + * phy state of port 1: bits 15 - 8 + * phy state of port 2: bits 23 - 16 + * phy state of port 3: bits 31 - 24 + */ u32 event_data_word1; + /* DATA_WORD2: + * phy state info of port 0: bits 7 - 0 + * phy state info of port 1: bits 15 - 8 + * phy state info of port 2: bits 23 - 16 + * phy state info of port 3: bits 31 - 24 + * + * PHY STATE INFO: + * Link operability :bit 0 + * Message severity :bit 2 - 1 + * Rsvd :bits 6 - 3 + * phy state info valid :bit 7 + */ u32 event_data_word2; u32 rsvd0; u32 flags; diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index a19ac441336f..2ff691636dac 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -720,29 +720,32 @@ static int be_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state) { struct be_adapter *adapter = netdev_priv(netdev); + int status = 0; switch (state) { case ETHTOOL_ID_ACTIVE: - be_cmd_get_beacon_state(adapter, adapter->hba_port_num, - &adapter->beacon_state); - return 1; /* cycle on/off once per second */ + status = be_cmd_get_beacon_state(adapter, adapter->hba_port_num, + &adapter->beacon_state); + if (status) + return be_cmd_status(status); + return 1; /* cycle on/off once per second */ case ETHTOOL_ID_ON: - be_cmd_set_beacon_state(adapter, adapter->hba_port_num, 0, 0, - BEACON_STATE_ENABLED); + status = be_cmd_set_beacon_state(adapter, adapter->hba_port_num, + 0, 0, BEACON_STATE_ENABLED); break; case ETHTOOL_ID_OFF: - be_cmd_set_beacon_state(adapter, adapter->hba_port_num, 0, 0, - BEACON_STATE_DISABLED); + status = be_cmd_set_beacon_state(adapter, adapter->hba_port_num, + 0, 0, BEACON_STATE_DISABLED); break; case ETHTOOL_ID_INACTIVE: - be_cmd_set_beacon_state(adapter, adapter->hba_port_num, 0, 0, - adapter->beacon_state); + status = be_cmd_set_beacon_state(adapter, adapter->hba_port_num, + 0, 0, adapter->beacon_state); } - return 0; + return be_cmd_status(status); } static int be_set_dump(struct net_device *netdev, struct ethtool_dump *dump) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index f99de3657ce3..46248467e206 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1463,6 +1463,9 @@ static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid) if (lancer_chip(adapter) && vid == 0) return 0; + if (!test_bit(vid, adapter->vids)) + return 0; + clear_bit(vid, adapter->vids); adapter->vlans_added--; @@ -1914,8 +1917,7 @@ static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo) if (!aic->enable) return 0; - if (time_before_eq(now, aic->jiffies) || - jiffies_to_msecs(now - aic->jiffies) < 1) + if (jiffies_to_msecs(now - aic->jiffies) < 1) eqd = aic->prev_eqd; else eqd = be_get_new_eqd(eqo); @@ -3363,6 +3365,7 @@ done: static void be_rx_qs_destroy(struct be_adapter *adapter) { + struct rss_info *rss = &adapter->rss_info; struct be_queue_info *q; struct be_rx_obj *rxo; int i; @@ -3389,6 +3392,12 @@ static void be_rx_qs_destroy(struct be_adapter *adapter) } be_queue_free(adapter, q); } + + if (rss->rss_flags) { + rss->rss_flags = RSS_ENABLE_NONE; + be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags, + 128, rss->rss_hkey); + } } static void be_disable_if_filters(struct be_adapter *adapter) @@ -3509,20 +3518,21 @@ static int be_rx_qs_create(struct be_adapter *adapter) if (!BEx_chip(adapter)) rss->rss_flags |= RSS_ENABLE_UDP_IPV4 | RSS_ENABLE_UDP_IPV6; + + netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN); + rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags, + RSS_INDIR_TABLE_LEN, rss_key); + if (rc) { + rss->rss_flags = RSS_ENABLE_NONE; + return rc; + } + + memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN); } else { /* Disable RSS, if only default RX Q is created */ rss->rss_flags = RSS_ENABLE_NONE; } - netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN); - rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags, - RSS_INDIR_TABLE_LEN, rss_key); - if (rc) { - rss->rss_flags = RSS_ENABLE_NONE; - return rc; - } - - memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN); /* Post 1 less than RXQ-len to avoid head being equal to tail, * which is a queue empty condition @@ -3789,18 +3799,15 @@ static u16 be_calculate_vf_qs(struct be_adapter *adapter, u16 num_vfs) struct be_resources res = adapter->pool_res; u16 num_vf_qs = 1; - /* Distribute the queue resources equally among the PF and it's VFs + /* Distribute the queue resources among the PF and it's VFs * Do not distribute queue resources in multi-channel configuration. */ if (num_vfs && !be_is_mc(adapter)) { - /* If number of VFs requested is 8 less than max supported, - * assign 8 queue pairs to the PF and divide the remaining - * resources evenly among the VFs - */ - if (num_vfs < (be_max_vfs(adapter) - 8)) - num_vf_qs = (res.max_rss_qs - 8) / num_vfs; - else - num_vf_qs = res.max_rss_qs / num_vfs; + /* Divide the qpairs evenly among the VFs and the PF, capped + * at VF-EQ-count. Any remainder qpairs belong to the PF. + */ + num_vf_qs = min(SH_VF_MAX_NIC_EQS, + res.max_rss_qs / (num_vfs + 1)); /* Skyhawk-R chip supports only MAX_RSS_IFACES RSS capable * interfaces per port. Provide RSS on VFs, only if number @@ -4082,6 +4089,7 @@ static void be_setup_init(struct be_adapter *adapter) adapter->if_handle = -1; adapter->be3_native = false; adapter->if_flags = 0; + adapter->phy_state = BE_UNKNOWN_PHY_STATE; if (be_physfn(adapter)) adapter->cmd_privileges = MAX_PRIVILEGES; else @@ -4265,10 +4273,10 @@ static void be_schedule_worker(struct be_adapter *adapter) adapter->flags |= BE_FLAGS_WORKER_SCHEDULED; } -static void be_schedule_err_detection(struct be_adapter *adapter) +static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay) { schedule_delayed_work(&adapter->be_err_detection_work, - msecs_to_jiffies(1000)); + msecs_to_jiffies(delay)); adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED; } @@ -4307,6 +4315,23 @@ err: return status; } +static int be_if_create(struct be_adapter *adapter) +{ + u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS; + u32 cap_flags = be_if_cap_flags(adapter); + int status; + + if (adapter->cfg_num_qs == 1) + cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS); + + en_flags &= cap_flags; + /* will enable all the needed filter flags in be_open() */ + status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags, + &adapter->if_handle, 0); + + return status; +} + int be_update_queues(struct be_adapter *adapter) { struct net_device *netdev = adapter->netdev; @@ -4324,6 +4349,9 @@ int be_update_queues(struct be_adapter *adapter) be_msix_disable(adapter); be_clear_queues(adapter); + status = be_cmd_if_destroy(adapter, adapter->if_handle, 0); + if (status) + return status; if (!msix_enabled(adapter)) { status = be_msix_enable(adapter); @@ -4331,6 +4359,10 @@ int be_update_queues(struct be_adapter *adapter) return status; } + status = be_if_create(adapter); + if (status) + return status; + status = be_setup_queues(adapter); if (status) return status; @@ -4395,7 +4427,6 @@ static int be_func_init(struct be_adapter *adapter) static int be_setup(struct be_adapter *adapter) { struct device *dev = &adapter->pdev->dev; - u32 en_flags; int status; status = be_func_init(adapter); @@ -4428,10 +4459,7 @@ static int be_setup(struct be_adapter *adapter) goto err; /* will enable all the needed filter flags in be_open() */ - en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS; - en_flags = en_flags & be_if_cap_flags(adapter); - status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags, - &adapter->if_handle, 0); + status = be_if_create(adapter); if (status) goto err; @@ -4589,6 +4617,9 @@ static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, /* BE and Lancer chips support VEB mode only */ if (BEx_chip(adapter) || lancer_chip(adapter)) { + /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */ + if (!pci_sriov_get_totalvfs(adapter->pdev)) + return 0; hsw_mode = PORT_FWD_TYPE_VEB; } else { status = be_cmd_get_hsw_config(adapter, NULL, 0, @@ -4804,7 +4835,7 @@ static void be_netdev_init(struct net_device *netdev) netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX; - if (be_multi_rxq(adapter)) + if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS)) netdev->hw_features |= NETIF_F_RXHASH; netdev->features |= netdev->hw_features | @@ -4859,21 +4890,27 @@ static int be_resume(struct be_adapter *adapter) static int be_err_recover(struct be_adapter *adapter) { - struct device *dev = &adapter->pdev->dev; int status; + /* Error recovery is supported only Lancer as of now */ + if (!lancer_chip(adapter)) + return -EIO; + + /* Wait for adapter to reach quiescent state before + * destroying queues + */ + status = be_fw_wait_ready(adapter); + if (status) + goto err; + + be_cleanup(adapter); + status = be_resume(adapter); if (status) goto err; - dev_info(dev, "Adapter recovery successful\n"); return 0; err: - if (be_physfn(adapter)) - dev_err(dev, "Adapter recovery failed\n"); - else - dev_err(dev, "Re-trying adapter recovery\n"); - return status; } @@ -4882,21 +4919,43 @@ static void be_err_detection_task(struct work_struct *work) struct be_adapter *adapter = container_of(work, struct be_adapter, be_err_detection_work.work); - int status = 0; + struct device *dev = &adapter->pdev->dev; + int recovery_status; + int delay = ERR_DETECTION_DELAY; be_detect_error(adapter); - if (be_check_error(adapter, BE_ERROR_HW)) { - be_cleanup(adapter); - - /* As of now error recovery support is in Lancer only */ - if (lancer_chip(adapter)) - status = be_err_recover(adapter); + if (be_check_error(adapter, BE_ERROR_HW)) + recovery_status = be_err_recover(adapter); + else + goto reschedule_task; + + if (!recovery_status) { + adapter->recovery_retries = 0; + dev_info(dev, "Adapter recovery successful\n"); + goto reschedule_task; + } else if (be_virtfn(adapter)) { + /* For VFs, check if PF have allocated resources + * every second. + */ + dev_err(dev, "Re-trying adapter recovery\n"); + goto reschedule_task; + } else if (adapter->recovery_retries++ < + MAX_ERR_RECOVERY_RETRY_COUNT) { + /* In case of another error during recovery, it takes 30 sec + * for adapter to come out of error. Retry error recovery after + * this time interval. + */ + dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n"); + delay = ERR_RECOVERY_RETRY_DELAY; + goto reschedule_task; + } else { + dev_err(dev, "Adapter recovery failed\n"); } - /* Always attempt recovery on VFs */ - if (!status || be_virtfn(adapter)) - be_schedule_err_detection(adapter); + return; +reschedule_task: + be_schedule_err_detection(adapter, delay); } static void be_log_sfp_info(struct be_adapter *adapter) @@ -4906,11 +4965,13 @@ static void be_log_sfp_info(struct be_adapter *adapter) status = be_cmd_query_sfp_info(adapter); if (!status) { dev_err(&adapter->pdev->dev, - "Unqualified SFP+ detected on %c from %s part no: %s", - adapter->port_name, adapter->phy.vendor_name, + "Port %c: %s Vendor: %s part no: %s", + adapter->port_name, + be_misconfig_evt_port_state[adapter->phy_state], + adapter->phy.vendor_name, adapter->phy.vendor_pn); } - adapter->flags &= ~BE_FLAGS_EVT_INCOMPATIBLE_SFP; + adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED; } static void be_worker(struct work_struct *work) @@ -4954,7 +5015,7 @@ static void be_worker(struct work_struct *work) if (!skyhawk_chip(adapter)) be_eqd_update(adapter, false); - if (adapter->flags & BE_FLAGS_EVT_INCOMPATIBLE_SFP) + if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED) be_log_sfp_info(adapter); reschedule: @@ -5292,7 +5353,7 @@ static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id) be_roce_dev_add(adapter); - be_schedule_err_detection(adapter); + be_schedule_err_detection(adapter, ERR_DETECTION_DELAY); /* On Die temperature not supported for VF. */ if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) { @@ -5359,7 +5420,7 @@ static int be_pci_resume(struct pci_dev *pdev) if (status) return status; - be_schedule_err_detection(adapter); + be_schedule_err_detection(adapter, ERR_DETECTION_DELAY); if (adapter->wol_en) be_setup_wol(adapter, false); @@ -5395,6 +5456,8 @@ static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev, dev_err(&adapter->pdev->dev, "EEH error detected\n"); + be_roce_dev_remove(adapter); + if (!be_check_error(adapter, BE_ERROR_EEH)) { be_set_error(adapter, BE_ERROR_EEH); @@ -5459,7 +5522,9 @@ static void be_eeh_resume(struct pci_dev *pdev) if (status) goto err; - be_schedule_err_detection(adapter); + be_roce_dev_add(adapter); + + be_schedule_err_detection(adapter, ERR_DETECTION_DELAY); return; err: dev_err(&adapter->pdev->dev, "EEH resume failed\n"); diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 2106d72c91dc..195122e11f10 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -64,6 +64,7 @@ #define FEC_R_FIFO_RSEM 0x194 /* Receive FIFO section empty threshold */ #define FEC_R_FIFO_RAEM 0x198 /* Receive FIFO almost empty threshold */ #define FEC_R_FIFO_RAFL 0x19c /* Receive FIFO almost full threshold */ +#define FEC_FTRL 0x1b0 /* Frame truncation receive length*/ #define FEC_RACC 0x1c4 /* Receive Accelerator function */ #define FEC_RCMR_1 0x1c8 /* Receive classification match ring 1 */ #define FEC_RCMR_2 0x1cc /* Receive classification match ring 2 */ @@ -309,12 +310,6 @@ struct bufdesc_ex { #define FEC_R_BUFF_SIZE(X) (((X) == 1) ? FEC_R_BUFF_SIZE_1 : \ (((X) == 2) ? \ FEC_R_BUFF_SIZE_2 : FEC_R_BUFF_SIZE_0)) -#define FEC_R_DES_ACTIVE(X) (((X) == 1) ? FEC_R_DES_ACTIVE_1 : \ - (((X) == 2) ? \ - FEC_R_DES_ACTIVE_2 : FEC_R_DES_ACTIVE_0)) -#define FEC_X_DES_ACTIVE(X) (((X) == 1) ? FEC_X_DES_ACTIVE_1 : \ - (((X) == 2) ? \ - FEC_X_DES_ACTIVE_2 : FEC_X_DES_ACTIVE_0)) #define FEC_DMA_CFG(X) (((X) == 2) ? FEC_DMA_CFG_2 : FEC_DMA_CFG_1) @@ -380,6 +375,7 @@ struct bufdesc_ex { #define FEC_ENET_TS_TIMER ((uint)0x00008000) #define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF | FEC_ENET_MII | FEC_ENET_TS_TIMER) +#define FEC_NAPI_IMASK (FEC_ENET_MII | FEC_ENET_TS_TIMER) #define FEC_RX_DISABLED_IMASK (FEC_DEFAULT_IMASK & (~FEC_ENET_RXF)) /* ENET interrupt coalescing macro define */ @@ -447,33 +443,35 @@ struct bufdesc_ex { /* Controller supports RACC register */ #define FEC_QUIRK_HAS_RACC (1 << 12) +struct bufdesc_prop { + int qid; + /* Address of Rx and Tx buffers */ + struct bufdesc *base; + struct bufdesc *last; + struct bufdesc *cur; + void __iomem *reg_desc_active; + dma_addr_t dma; + unsigned short ring_size; + unsigned char dsize; + unsigned char dsize_log2; +}; + struct fec_enet_priv_tx_q { - int index; + struct bufdesc_prop bd; unsigned char *tx_bounce[TX_RING_SIZE]; struct sk_buff *tx_skbuff[TX_RING_SIZE]; - dma_addr_t bd_dma; - struct bufdesc *tx_bd_base; - uint tx_ring_size; - unsigned short tx_stop_threshold; unsigned short tx_wake_threshold; - struct bufdesc *cur_tx; struct bufdesc *dirty_tx; char *tso_hdrs; dma_addr_t tso_hdrs_dma; }; struct fec_enet_priv_rx_q { - int index; + struct bufdesc_prop bd; struct sk_buff *rx_skbuff[RX_RING_SIZE]; - - dma_addr_t bd_dma; - struct bufdesc *rx_bd_base; - uint rx_ring_size; - - struct bufdesc *cur_rx; }; /* The FEC buffer descriptors track the ring buffers. The rx_bd_base and @@ -513,8 +511,6 @@ struct fec_enet_private { unsigned long work_ts; unsigned long work_mdio; - unsigned short bufdesc_size; - struct platform_device *pdev; int dev_id; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 41c81f6ec630..bad0ba29a94a 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -217,86 +217,38 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address"); #define IS_TSO_HEADER(txq, addr) \ ((addr >= txq->tso_hdrs_dma) && \ - (addr < txq->tso_hdrs_dma + txq->tx_ring_size * TSO_HEADER_SIZE)) + (addr < txq->tso_hdrs_dma + txq->bd.ring_size * TSO_HEADER_SIZE)) static int mii_cnt; -static inline -struct bufdesc *fec_enet_get_nextdesc(struct bufdesc *bdp, - struct fec_enet_private *fep, - int queue_id) -{ - struct bufdesc *new_bd = bdp + 1; - struct bufdesc_ex *ex_new_bd = (struct bufdesc_ex *)bdp + 1; - struct fec_enet_priv_tx_q *txq = fep->tx_queue[queue_id]; - struct fec_enet_priv_rx_q *rxq = fep->rx_queue[queue_id]; - struct bufdesc_ex *ex_base; - struct bufdesc *base; - int ring_size; - - if (bdp >= txq->tx_bd_base) { - base = txq->tx_bd_base; - ring_size = txq->tx_ring_size; - ex_base = (struct bufdesc_ex *)txq->tx_bd_base; - } else { - base = rxq->rx_bd_base; - ring_size = rxq->rx_ring_size; - ex_base = (struct bufdesc_ex *)rxq->rx_bd_base; - } - - if (fep->bufdesc_ex) - return (struct bufdesc *)((ex_new_bd >= (ex_base + ring_size)) ? - ex_base : ex_new_bd); - else - return (new_bd >= (base + ring_size)) ? - base : new_bd; -} - -static inline -struct bufdesc *fec_enet_get_prevdesc(struct bufdesc *bdp, - struct fec_enet_private *fep, - int queue_id) -{ - struct bufdesc *new_bd = bdp - 1; - struct bufdesc_ex *ex_new_bd = (struct bufdesc_ex *)bdp - 1; - struct fec_enet_priv_tx_q *txq = fep->tx_queue[queue_id]; - struct fec_enet_priv_rx_q *rxq = fep->rx_queue[queue_id]; - struct bufdesc_ex *ex_base; - struct bufdesc *base; - int ring_size; - - if (bdp >= txq->tx_bd_base) { - base = txq->tx_bd_base; - ring_size = txq->tx_ring_size; - ex_base = (struct bufdesc_ex *)txq->tx_bd_base; - } else { - base = rxq->rx_bd_base; - ring_size = rxq->rx_ring_size; - ex_base = (struct bufdesc_ex *)rxq->rx_bd_base; - } +static struct bufdesc *fec_enet_get_nextdesc(struct bufdesc *bdp, + struct bufdesc_prop *bd) +{ + return (bdp >= bd->last) ? bd->base + : (struct bufdesc *)(((unsigned)bdp) + bd->dsize); +} - if (fep->bufdesc_ex) - return (struct bufdesc *)((ex_new_bd < ex_base) ? - (ex_new_bd + ring_size) : ex_new_bd); - else - return (new_bd < base) ? (new_bd + ring_size) : new_bd; +static struct bufdesc *fec_enet_get_prevdesc(struct bufdesc *bdp, + struct bufdesc_prop *bd) +{ + return (bdp <= bd->base) ? bd->last + : (struct bufdesc *)(((unsigned)bdp) - bd->dsize); } -static int fec_enet_get_bd_index(struct bufdesc *base, struct bufdesc *bdp, - struct fec_enet_private *fep) +static int fec_enet_get_bd_index(struct bufdesc *bdp, + struct bufdesc_prop *bd) { - return ((const char *)bdp - (const char *)base) / fep->bufdesc_size; + return ((const char *)bdp - (const char *)bd->base) >> bd->dsize_log2; } -static int fec_enet_get_free_txdesc_num(struct fec_enet_private *fep, - struct fec_enet_priv_tx_q *txq) +static int fec_enet_get_free_txdesc_num(struct fec_enet_priv_tx_q *txq) { int entries; - entries = ((const char *)txq->dirty_tx - - (const char *)txq->cur_tx) / fep->bufdesc_size - 1; + entries = (((const char *)txq->dirty_tx - + (const char *)txq->bd.cur) >> txq->bd.dsize_log2) - 1; - return entries > 0 ? entries : entries + txq->tx_ring_size; + return entries >= 0 ? entries : entries + txq->bd.ring_size; } static void swap_buffer(void *bufaddr, int len) @@ -329,20 +281,20 @@ static void fec_dump(struct net_device *ndev) pr_info("Nr SC addr len SKB\n"); txq = fep->tx_queue[0]; - bdp = txq->tx_bd_base; + bdp = txq->bd.base; do { pr_info("%3u %c%c 0x%04x 0x%08x %4u %p\n", index, - bdp == txq->cur_tx ? 'S' : ' ', + bdp == txq->bd.cur ? 'S' : ' ', bdp == txq->dirty_tx ? 'H' : ' ', fec16_to_cpu(bdp->cbd_sc), fec32_to_cpu(bdp->cbd_bufaddr), fec16_to_cpu(bdp->cbd_datlen), txq->tx_skbuff[index]); - bdp = fec_enet_get_nextdesc(bdp, fep, 0); + bdp = fec_enet_get_nextdesc(bdp, &txq->bd); index++; - } while (bdp != txq->tx_bd_base); + } while (bdp != txq->bd.base); } static inline bool is_ipv4_pkt(struct sk_buff *skb) @@ -373,10 +325,9 @@ fec_enet_txq_submit_frag_skb(struct fec_enet_priv_tx_q *txq, struct net_device *ndev) { struct fec_enet_private *fep = netdev_priv(ndev); - struct bufdesc *bdp = txq->cur_tx; + struct bufdesc *bdp = txq->bd.cur; struct bufdesc_ex *ebdp; int nr_frags = skb_shinfo(skb)->nr_frags; - unsigned short queue = skb_get_queue_mapping(skb); int frag, frag_len; unsigned short status; unsigned int estatus = 0; @@ -388,7 +339,7 @@ fec_enet_txq_submit_frag_skb(struct fec_enet_priv_tx_q *txq, for (frag = 0; frag < nr_frags; frag++) { this_frag = &skb_shinfo(skb)->frags[frag]; - bdp = fec_enet_get_nextdesc(bdp, fep, queue); + bdp = fec_enet_get_nextdesc(bdp, &txq->bd); ebdp = (struct bufdesc_ex *)bdp; status = fec16_to_cpu(bdp->cbd_sc); @@ -409,7 +360,7 @@ fec_enet_txq_submit_frag_skb(struct fec_enet_priv_tx_q *txq, if (fep->bufdesc_ex) { if (fep->quirks & FEC_QUIRK_HAS_AVB) - estatus |= FEC_TX_BD_FTYPE(queue); + estatus |= FEC_TX_BD_FTYPE(txq->bd.qid); if (skb->ip_summed == CHECKSUM_PARTIAL) estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS; ebdp->cbd_bdu = 0; @@ -418,7 +369,7 @@ fec_enet_txq_submit_frag_skb(struct fec_enet_priv_tx_q *txq, bufaddr = page_address(this_frag->page.p) + this_frag->page_offset; - index = fec_enet_get_bd_index(txq->tx_bd_base, bdp, fep); + index = fec_enet_get_bd_index(bdp, &txq->bd); if (((unsigned long) bufaddr) & fep->tx_align || fep->quirks & FEC_QUIRK_SWAP_FRAME) { memcpy(txq->tx_bounce[index], bufaddr, frag_len); @@ -431,7 +382,6 @@ fec_enet_txq_submit_frag_skb(struct fec_enet_priv_tx_q *txq, addr = dma_map_single(&fep->pdev->dev, bufaddr, frag_len, DMA_TO_DEVICE); if (dma_mapping_error(&fep->pdev->dev, addr)) { - dev_kfree_skb_any(skb); if (net_ratelimit()) netdev_err(ndev, "Tx DMA memory map failed\n"); goto dma_mapping_error; @@ -439,14 +389,18 @@ fec_enet_txq_submit_frag_skb(struct fec_enet_priv_tx_q *txq, bdp->cbd_bufaddr = cpu_to_fec32(addr); bdp->cbd_datlen = cpu_to_fec16(frag_len); + /* Make sure the updates to rest of the descriptor are + * performed before transferring ownership. + */ + wmb(); bdp->cbd_sc = cpu_to_fec16(status); } return bdp; dma_mapping_error: - bdp = txq->cur_tx; + bdp = txq->bd.cur; for (i = 0; i < frag; i++) { - bdp = fec_enet_get_nextdesc(bdp, fep, queue); + bdp = fec_enet_get_nextdesc(bdp, &txq->bd); dma_unmap_single(&fep->pdev->dev, fec32_to_cpu(bdp->cbd_bufaddr), fec16_to_cpu(bdp->cbd_datlen), DMA_TO_DEVICE); } @@ -463,12 +417,11 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq, dma_addr_t addr; unsigned short status; unsigned short buflen; - unsigned short queue; unsigned int estatus = 0; unsigned int index; int entries_free; - entries_free = fec_enet_get_free_txdesc_num(fep, txq); + entries_free = fec_enet_get_free_txdesc_num(txq); if (entries_free < MAX_SKB_FRAGS + 1) { dev_kfree_skb_any(skb); if (net_ratelimit()) @@ -483,7 +436,7 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq, } /* Fill in a Tx ring entry */ - bdp = txq->cur_tx; + bdp = txq->bd.cur; last_bdp = bdp; status = fec16_to_cpu(bdp->cbd_sc); status &= ~BD_ENET_TX_STATS; @@ -492,8 +445,7 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq, bufaddr = skb->data; buflen = skb_headlen(skb); - queue = skb_get_queue_mapping(skb); - index = fec_enet_get_bd_index(txq->tx_bd_base, bdp, fep); + index = fec_enet_get_bd_index(bdp, &txq->bd); if (((unsigned long) bufaddr) & fep->tx_align || fep->quirks & FEC_QUIRK_SWAP_FRAME) { memcpy(txq->tx_bounce[index], skb->data, buflen); @@ -514,8 +466,12 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq, if (nr_frags) { last_bdp = fec_enet_txq_submit_frag_skb(txq, skb, ndev); - if (IS_ERR(last_bdp)) + if (IS_ERR(last_bdp)) { + dma_unmap_single(&fep->pdev->dev, addr, + buflen, DMA_TO_DEVICE); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; + } } else { status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST); if (fep->bufdesc_ex) { @@ -525,6 +481,8 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq, estatus |= BD_ENET_TX_TS; } } + bdp->cbd_bufaddr = cpu_to_fec32(addr); + bdp->cbd_datlen = cpu_to_fec16(buflen); if (fep->bufdesc_ex) { @@ -535,7 +493,7 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq, skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; if (fep->quirks & FEC_QUIRK_HAS_AVB) - estatus |= FEC_TX_BD_FTYPE(queue); + estatus |= FEC_TX_BD_FTYPE(txq->bd.qid); if (skb->ip_summed == CHECKSUM_PARTIAL) estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS; @@ -544,12 +502,14 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq, ebdp->cbd_esc = cpu_to_fec32(estatus); } - index = fec_enet_get_bd_index(txq->tx_bd_base, last_bdp, fep); + index = fec_enet_get_bd_index(last_bdp, &txq->bd); /* Save skb pointer */ txq->tx_skbuff[index] = skb; - bdp->cbd_datlen = cpu_to_fec16(buflen); - bdp->cbd_bufaddr = cpu_to_fec32(addr); + /* Make sure the updates to rest of the descriptor are performed before + * transferring ownership. + */ + wmb(); /* Send it on its way. Tell FEC it's ready, interrupt when done, * it's the last BD of the frame, and to put the CRC on the end. @@ -558,18 +518,18 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq, bdp->cbd_sc = cpu_to_fec16(status); /* If this was the last BD in the ring, start at the beginning again. */ - bdp = fec_enet_get_nextdesc(last_bdp, fep, queue); + bdp = fec_enet_get_nextdesc(last_bdp, &txq->bd); skb_tx_timestamp(skb); /* Make sure the update to bdp and tx_skbuff are performed before - * cur_tx. + * txq->bd.cur. */ wmb(); - txq->cur_tx = bdp; + txq->bd.cur = bdp; /* Trigger transmission start */ - writel(0, fep->hwp + FEC_X_DES_ACTIVE(queue)); + writel(0, txq->bd.reg_desc_active); return 0; } @@ -582,7 +542,6 @@ fec_enet_txq_put_data_tso(struct fec_enet_priv_tx_q *txq, struct sk_buff *skb, { struct fec_enet_private *fep = netdev_priv(ndev); struct bufdesc_ex *ebdp = container_of(bdp, struct bufdesc_ex, desc); - unsigned short queue = skb_get_queue_mapping(skb); unsigned short status; unsigned int estatus = 0; dma_addr_t addr; @@ -614,7 +573,7 @@ fec_enet_txq_put_data_tso(struct fec_enet_priv_tx_q *txq, struct sk_buff *skb, if (fep->bufdesc_ex) { if (fep->quirks & FEC_QUIRK_HAS_AVB) - estatus |= FEC_TX_BD_FTYPE(queue); + estatus |= FEC_TX_BD_FTYPE(txq->bd.qid); if (skb->ip_summed == CHECKSUM_PARTIAL) estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS; ebdp->cbd_bdu = 0; @@ -643,7 +602,6 @@ fec_enet_txq_put_hdr_tso(struct fec_enet_priv_tx_q *txq, struct fec_enet_private *fep = netdev_priv(ndev); int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); struct bufdesc_ex *ebdp = container_of(bdp, struct bufdesc_ex, desc); - unsigned short queue = skb_get_queue_mapping(skb); void *bufaddr; unsigned long dmabuf; unsigned short status; @@ -678,7 +636,7 @@ fec_enet_txq_put_hdr_tso(struct fec_enet_priv_tx_q *txq, if (fep->bufdesc_ex) { if (fep->quirks & FEC_QUIRK_HAS_AVB) - estatus |= FEC_TX_BD_FTYPE(queue); + estatus |= FEC_TX_BD_FTYPE(txq->bd.qid); if (skb->ip_summed == CHECKSUM_PARTIAL) estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS; ebdp->cbd_bdu = 0; @@ -697,13 +655,12 @@ static int fec_enet_txq_submit_tso(struct fec_enet_priv_tx_q *txq, struct fec_enet_private *fep = netdev_priv(ndev); int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); int total_len, data_left; - struct bufdesc *bdp = txq->cur_tx; - unsigned short queue = skb_get_queue_mapping(skb); + struct bufdesc *bdp = txq->bd.cur; struct tso_t tso; unsigned int index = 0; int ret; - if (tso_count_descs(skb) >= fec_enet_get_free_txdesc_num(fep, txq)) { + if (tso_count_descs(skb) >= fec_enet_get_free_txdesc_num(txq)) { dev_kfree_skb_any(skb); if (net_ratelimit()) netdev_err(ndev, "NOT enough BD for TSO!\n"); @@ -723,7 +680,7 @@ static int fec_enet_txq_submit_tso(struct fec_enet_priv_tx_q *txq, while (total_len > 0) { char *hdr; - index = fec_enet_get_bd_index(txq->tx_bd_base, bdp, fep); + index = fec_enet_get_bd_index(bdp, &txq->bd); data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len); total_len -= data_left; @@ -738,9 +695,8 @@ static int fec_enet_txq_submit_tso(struct fec_enet_priv_tx_q *txq, int size; size = min_t(int, tso.size, data_left); - bdp = fec_enet_get_nextdesc(bdp, fep, queue); - index = fec_enet_get_bd_index(txq->tx_bd_base, - bdp, fep); + bdp = fec_enet_get_nextdesc(bdp, &txq->bd); + index = fec_enet_get_bd_index(bdp, &txq->bd); ret = fec_enet_txq_put_data_tso(txq, skb, ndev, bdp, index, tso.data, size, @@ -753,22 +709,22 @@ static int fec_enet_txq_submit_tso(struct fec_enet_priv_tx_q *txq, tso_build_data(skb, &tso, size); } - bdp = fec_enet_get_nextdesc(bdp, fep, queue); + bdp = fec_enet_get_nextdesc(bdp, &txq->bd); } /* Save skb pointer */ txq->tx_skbuff[index] = skb; skb_tx_timestamp(skb); - txq->cur_tx = bdp; + txq->bd.cur = bdp; /* Trigger transmission start */ if (!(fep->quirks & FEC_QUIRK_ERR007885) || - !readl(fep->hwp + FEC_X_DES_ACTIVE(queue)) || - !readl(fep->hwp + FEC_X_DES_ACTIVE(queue)) || - !readl(fep->hwp + FEC_X_DES_ACTIVE(queue)) || - !readl(fep->hwp + FEC_X_DES_ACTIVE(queue))) - writel(0, fep->hwp + FEC_X_DES_ACTIVE(queue)); + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active)) + writel(0, txq->bd.reg_desc_active); return 0; @@ -798,7 +754,7 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) if (ret) return ret; - entries_free = fec_enet_get_free_txdesc_num(fep, txq); + entries_free = fec_enet_get_free_txdesc_num(txq); if (entries_free <= txq->tx_stop_threshold) netif_tx_stop_queue(nq); @@ -819,32 +775,32 @@ static void fec_enet_bd_init(struct net_device *dev) for (q = 0; q < fep->num_rx_queues; q++) { /* Initialize the receive buffer descriptors. */ rxq = fep->rx_queue[q]; - bdp = rxq->rx_bd_base; + bdp = rxq->bd.base; - for (i = 0; i < rxq->rx_ring_size; i++) { + for (i = 0; i < rxq->bd.ring_size; i++) { /* Initialize the BD for every fragment in the page. */ if (bdp->cbd_bufaddr) bdp->cbd_sc = cpu_to_fec16(BD_ENET_RX_EMPTY); else bdp->cbd_sc = cpu_to_fec16(0); - bdp = fec_enet_get_nextdesc(bdp, fep, q); + bdp = fec_enet_get_nextdesc(bdp, &rxq->bd); } /* Set the last buffer to wrap */ - bdp = fec_enet_get_prevdesc(bdp, fep, q); + bdp = fec_enet_get_prevdesc(bdp, &rxq->bd); bdp->cbd_sc |= cpu_to_fec16(BD_SC_WRAP); - rxq->cur_rx = rxq->rx_bd_base; + rxq->bd.cur = rxq->bd.base; } for (q = 0; q < fep->num_tx_queues; q++) { /* ...and the same for transmit */ txq = fep->tx_queue[q]; - bdp = txq->tx_bd_base; - txq->cur_tx = bdp; + bdp = txq->bd.base; + txq->bd.cur = bdp; - for (i = 0; i < txq->tx_ring_size; i++) { + for (i = 0; i < txq->bd.ring_size; i++) { /* Initialize the BD for every fragment in the page. */ bdp->cbd_sc = cpu_to_fec16(0); if (txq->tx_skbuff[i]) { @@ -852,11 +808,11 @@ static void fec_enet_bd_init(struct net_device *dev) txq->tx_skbuff[i] = NULL; } bdp->cbd_bufaddr = cpu_to_fec32(0); - bdp = fec_enet_get_nextdesc(bdp, fep, q); + bdp = fec_enet_get_nextdesc(bdp, &txq->bd); } /* Set the last buffer to wrap */ - bdp = fec_enet_get_prevdesc(bdp, fep, q); + bdp = fec_enet_get_prevdesc(bdp, &txq->bd); bdp->cbd_sc |= cpu_to_fec16(BD_SC_WRAP); txq->dirty_tx = bdp; } @@ -868,7 +824,7 @@ static void fec_enet_active_rxring(struct net_device *ndev) int i; for (i = 0; i < fep->num_rx_queues; i++) - writel(0, fep->hwp + FEC_R_DES_ACTIVE(i)); + writel(0, fep->rx_queue[i]->bd.reg_desc_active); } static void fec_enet_enable_ring(struct net_device *ndev) @@ -880,7 +836,7 @@ static void fec_enet_enable_ring(struct net_device *ndev) for (i = 0; i < fep->num_rx_queues; i++) { rxq = fep->rx_queue[i]; - writel(rxq->bd_dma, fep->hwp + FEC_R_DES_START(i)); + writel(rxq->bd.dma, fep->hwp + FEC_R_DES_START(i)); writel(PKT_MAXBLR_SIZE, fep->hwp + FEC_R_BUFF_SIZE(i)); /* enable DMA1/2 */ @@ -891,7 +847,7 @@ static void fec_enet_enable_ring(struct net_device *ndev) for (i = 0; i < fep->num_tx_queues; i++) { txq = fep->tx_queue[i]; - writel(txq->bd_dma, fep->hwp + FEC_X_DES_START(i)); + writel(txq->bd.dma, fep->hwp + FEC_X_DES_START(i)); /* enable DMA1/2 */ if (i) @@ -909,7 +865,7 @@ static void fec_enet_reset_skb(struct net_device *ndev) for (i = 0; i < fep->num_tx_queues; i++) { txq = fep->tx_queue[i]; - for (j = 0; j < txq->tx_ring_size; j++) { + for (j = 0; j < txq->bd.ring_size; j++) { if (txq->tx_skbuff[j]) { dev_kfree_skb_any(txq->tx_skbuff[j]); txq->tx_skbuff[j] = NULL; @@ -988,6 +944,7 @@ fec_restart(struct net_device *ndev) val &= ~FEC_RACC_OPTIONS; writel(val, fep->hwp + FEC_RACC); } + writel(PKT_MAXBUF_SIZE, fep->hwp + FEC_FTRL); #endif /* @@ -1221,16 +1178,16 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id) bdp = txq->dirty_tx; /* get next bdp of dirty_tx */ - bdp = fec_enet_get_nextdesc(bdp, fep, queue_id); + bdp = fec_enet_get_nextdesc(bdp, &txq->bd); - while (bdp != READ_ONCE(txq->cur_tx)) { - /* Order the load of cur_tx and cbd_sc */ + while (bdp != READ_ONCE(txq->bd.cur)) { + /* Order the load of bd.cur and cbd_sc */ rmb(); status = fec16_to_cpu(READ_ONCE(bdp->cbd_sc)); if (status & BD_ENET_TX_READY) break; - index = fec_enet_get_bd_index(txq->tx_bd_base, bdp, fep); + index = fec_enet_get_bd_index(bdp, &txq->bd); skb = txq->tx_skbuff[index]; txq->tx_skbuff[index] = NULL; @@ -1241,7 +1198,7 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id) DMA_TO_DEVICE); bdp->cbd_bufaddr = cpu_to_fec32(0); if (!skb) { - bdp = fec_enet_get_nextdesc(bdp, fep, queue_id); + bdp = fec_enet_get_nextdesc(bdp, &txq->bd); continue; } @@ -1290,21 +1247,21 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id) txq->dirty_tx = bdp; /* Update pointer to next buffer descriptor to be transmitted */ - bdp = fec_enet_get_nextdesc(bdp, fep, queue_id); + bdp = fec_enet_get_nextdesc(bdp, &txq->bd); /* Since we have freed up a buffer, the ring is no longer full */ if (netif_queue_stopped(ndev)) { - entries_free = fec_enet_get_free_txdesc_num(fep, txq); + entries_free = fec_enet_get_free_txdesc_num(txq); if (entries_free >= txq->tx_wake_threshold) netif_tx_wake_queue(nq); } } /* ERR006538: Keep the transmitter going */ - if (bdp != txq->cur_tx && - readl(fep->hwp + FEC_X_DES_ACTIVE(queue_id)) == 0) - writel(0, fep->hwp + FEC_X_DES_ACTIVE(queue_id)); + if (bdp != txq->bd.cur && + readl(txq->bd.reg_desc_active) == 0) + writel(0, txq->bd.reg_desc_active); } static void @@ -1366,7 +1323,7 @@ static bool fec_enet_copybreak(struct net_device *ndev, struct sk_buff **skb, return true; } -/* During a receive, the cur_rx points to the current incoming buffer. +/* During a receive, the bd_rx.cur points to the current incoming buffer. * When we update through the ring, if the next incoming buffer has * not been given to the system, we just set the empty indicator, * effectively tossing the packet. @@ -1399,7 +1356,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) /* First, grab all of the stats for the incoming packet. * These get messed up if we get called due to a busy condition. */ - bdp = rxq->cur_rx; + bdp = rxq->bd.cur; while (!((status = fec16_to_cpu(bdp->cbd_sc)) & BD_ENET_RX_EMPTY)) { @@ -1407,37 +1364,31 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) break; pkt_received++; - /* Since we have allocated space to hold a complete frame, - * the last indicator should be set. - */ - if ((status & BD_ENET_RX_LAST) == 0) - netdev_err(ndev, "rcv is not +last\n"); - writel(FEC_ENET_RXF, fep->hwp + FEC_IEVENT); /* Check for errors. */ + status ^= BD_ENET_RX_LAST; if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO | - BD_ENET_RX_CR | BD_ENET_RX_OV)) { + BD_ENET_RX_CR | BD_ENET_RX_OV | BD_ENET_RX_LAST | + BD_ENET_RX_CL)) { ndev->stats.rx_errors++; - if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH)) { + if (status & BD_ENET_RX_OV) { + /* FIFO overrun */ + ndev->stats.rx_fifo_errors++; + goto rx_processing_done; + } + if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH + | BD_ENET_RX_LAST)) { /* Frame too long or too short. */ ndev->stats.rx_length_errors++; + if (status & BD_ENET_RX_LAST) + netdev_err(ndev, "rcv is not +last\n"); } - if (status & BD_ENET_RX_NO) /* Frame alignment */ - ndev->stats.rx_frame_errors++; if (status & BD_ENET_RX_CR) /* CRC Error */ ndev->stats.rx_crc_errors++; - if (status & BD_ENET_RX_OV) /* FIFO overrun */ - ndev->stats.rx_fifo_errors++; - } - - /* Report late collisions as a frame error. - * On this error, the BD is closed, but we don't know what we - * have in the buffer. So, just drop this frame on the floor. - */ - if (status & BD_ENET_RX_CL) { - ndev->stats.rx_errors++; - ndev->stats.rx_frame_errors++; + /* Report late collisions as a frame error. */ + if (status & (BD_ENET_RX_NO | BD_ENET_RX_CL)) + ndev->stats.rx_frame_errors++; goto rx_processing_done; } @@ -1446,7 +1397,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) pkt_len = fec16_to_cpu(bdp->cbd_datlen); ndev->stats.rx_bytes += pkt_len; - index = fec_enet_get_bd_index(rxq->rx_bd_base, bdp, fep); + index = fec_enet_get_bd_index(bdp, &rxq->bd); skb = rxq->rx_skbuff[index]; /* The packet length includes FCS, but we don't want to @@ -1535,7 +1486,6 @@ rx_processing_done: /* Mark the buffer empty */ status |= BD_ENET_RX_EMPTY; - bdp->cbd_sc = cpu_to_fec16(status); if (fep->bufdesc_ex) { struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; @@ -1544,17 +1494,22 @@ rx_processing_done: ebdp->cbd_prot = 0; ebdp->cbd_bdu = 0; } + /* Make sure the updates to rest of the descriptor are + * performed before transferring ownership. + */ + wmb(); + bdp->cbd_sc = cpu_to_fec16(status); /* Update BD pointer to next entry */ - bdp = fec_enet_get_nextdesc(bdp, fep, queue_id); + bdp = fec_enet_get_nextdesc(bdp, &rxq->bd); /* Doing this here will keep the FEC running while we process * incoming frames. On a heavily loaded network, we should be * able to keep up at the expense of system resources. */ - writel(0, fep->hwp + FEC_R_DES_ACTIVE(queue_id)); + writel(0, rxq->bd.reg_desc_active); } - rxq->cur_rx = bdp; + rxq->bd.cur = bdp; return pkt_received; } @@ -1613,7 +1568,7 @@ fec_enet_interrupt(int irq, void *dev_id) if (napi_schedule_prep(&fep->napi)) { /* Disable the NAPI interrupts */ - writel(FEC_ENET_MII, fep->hwp + FEC_IMASK); + writel(FEC_NAPI_IMASK, fep->hwp + FEC_IMASK); __napi_schedule(&fep->napi); } } @@ -2663,8 +2618,8 @@ static void fec_enet_free_buffers(struct net_device *ndev) for (q = 0; q < fep->num_rx_queues; q++) { rxq = fep->rx_queue[q]; - bdp = rxq->rx_bd_base; - for (i = 0; i < rxq->rx_ring_size; i++) { + bdp = rxq->bd.base; + for (i = 0; i < rxq->bd.ring_size; i++) { skb = rxq->rx_skbuff[i]; rxq->rx_skbuff[i] = NULL; if (skb) { @@ -2674,14 +2629,14 @@ static void fec_enet_free_buffers(struct net_device *ndev) DMA_FROM_DEVICE); dev_kfree_skb(skb); } - bdp = fec_enet_get_nextdesc(bdp, fep, q); + bdp = fec_enet_get_nextdesc(bdp, &rxq->bd); } } for (q = 0; q < fep->num_tx_queues; q++) { txq = fep->tx_queue[q]; - bdp = txq->tx_bd_base; - for (i = 0; i < txq->tx_ring_size; i++) { + bdp = txq->bd.base; + for (i = 0; i < txq->bd.ring_size; i++) { kfree(txq->tx_bounce[i]); txq->tx_bounce[i] = NULL; skb = txq->tx_skbuff[i]; @@ -2701,7 +2656,7 @@ static void fec_enet_free_queue(struct net_device *ndev) if (fep->tx_queue[i] && fep->tx_queue[i]->tso_hdrs) { txq = fep->tx_queue[i]; dma_free_coherent(NULL, - txq->tx_ring_size * TSO_HEADER_SIZE, + txq->bd.ring_size * TSO_HEADER_SIZE, txq->tso_hdrs, txq->tso_hdrs_dma); } @@ -2727,15 +2682,15 @@ static int fec_enet_alloc_queue(struct net_device *ndev) } fep->tx_queue[i] = txq; - txq->tx_ring_size = TX_RING_SIZE; - fep->total_tx_ring_size += fep->tx_queue[i]->tx_ring_size; + txq->bd.ring_size = TX_RING_SIZE; + fep->total_tx_ring_size += fep->tx_queue[i]->bd.ring_size; txq->tx_stop_threshold = FEC_MAX_SKB_DESCS; txq->tx_wake_threshold = - (txq->tx_ring_size - txq->tx_stop_threshold) / 2; + (txq->bd.ring_size - txq->tx_stop_threshold) / 2; txq->tso_hdrs = dma_alloc_coherent(NULL, - txq->tx_ring_size * TSO_HEADER_SIZE, + txq->bd.ring_size * TSO_HEADER_SIZE, &txq->tso_hdrs_dma, GFP_KERNEL); if (!txq->tso_hdrs) { @@ -2752,8 +2707,8 @@ static int fec_enet_alloc_queue(struct net_device *ndev) goto alloc_failed; } - fep->rx_queue[i]->rx_ring_size = RX_RING_SIZE; - fep->total_rx_ring_size += fep->rx_queue[i]->rx_ring_size; + fep->rx_queue[i]->bd.ring_size = RX_RING_SIZE; + fep->total_rx_ring_size += fep->rx_queue[i]->bd.ring_size; } return ret; @@ -2772,8 +2727,8 @@ fec_enet_alloc_rxq_buffers(struct net_device *ndev, unsigned int queue) struct fec_enet_priv_rx_q *rxq; rxq = fep->rx_queue[queue]; - bdp = rxq->rx_bd_base; - for (i = 0; i < rxq->rx_ring_size; i++) { + bdp = rxq->bd.base; + for (i = 0; i < rxq->bd.ring_size; i++) { skb = netdev_alloc_skb(ndev, FEC_ENET_RX_FRSIZE); if (!skb) goto err_alloc; @@ -2791,11 +2746,11 @@ fec_enet_alloc_rxq_buffers(struct net_device *ndev, unsigned int queue) ebdp->cbd_esc = cpu_to_fec32(BD_ENET_RX_INT); } - bdp = fec_enet_get_nextdesc(bdp, fep, queue); + bdp = fec_enet_get_nextdesc(bdp, &rxq->bd); } /* Set the last buffer to wrap. */ - bdp = fec_enet_get_prevdesc(bdp, fep, queue); + bdp = fec_enet_get_prevdesc(bdp, &rxq->bd); bdp->cbd_sc |= cpu_to_fec16(BD_SC_WRAP); return 0; @@ -2813,8 +2768,8 @@ fec_enet_alloc_txq_buffers(struct net_device *ndev, unsigned int queue) struct fec_enet_priv_tx_q *txq; txq = fep->tx_queue[queue]; - bdp = txq->tx_bd_base; - for (i = 0; i < txq->tx_ring_size; i++) { + bdp = txq->bd.base; + for (i = 0; i < txq->bd.ring_size; i++) { txq->tx_bounce[i] = kmalloc(FEC_ENET_TX_FRSIZE, GFP_KERNEL); if (!txq->tx_bounce[i]) goto err_alloc; @@ -2827,11 +2782,11 @@ fec_enet_alloc_txq_buffers(struct net_device *ndev, unsigned int queue) ebdp->cbd_esc = cpu_to_fec32(BD_ENET_TX_INT); } - bdp = fec_enet_get_nextdesc(bdp, fep, queue); + bdp = fec_enet_get_nextdesc(bdp, &txq->bd); } /* Set the last buffer to wrap. */ - bdp = fec_enet_get_prevdesc(bdp, fep, queue); + bdp = fec_enet_get_prevdesc(bdp, &txq->bd); bdp->cbd_sc |= cpu_to_fec16(BD_SC_WRAP); return 0; @@ -3115,6 +3070,14 @@ static const struct net_device_ops fec_netdev_ops = { .ndo_set_features = fec_set_features, }; +static const unsigned short offset_des_active_rxq[] = { + FEC_R_DES_ACTIVE_0, FEC_R_DES_ACTIVE_1, FEC_R_DES_ACTIVE_2 +}; + +static const unsigned short offset_des_active_txq[] = { + FEC_X_DES_ACTIVE_0, FEC_X_DES_ACTIVE_1, FEC_X_DES_ACTIVE_2 +}; + /* * XXX: We need to clean up on failure exits here. * @@ -3122,13 +3085,15 @@ static const struct net_device_ops fec_netdev_ops = { static int fec_enet_init(struct net_device *ndev) { struct fec_enet_private *fep = netdev_priv(ndev); - struct fec_enet_priv_tx_q *txq; - struct fec_enet_priv_rx_q *rxq; struct bufdesc *cbd_base; dma_addr_t bd_dma; int bd_size; unsigned int i; + unsigned dsize = fep->bufdesc_ex ? sizeof(struct bufdesc_ex) : + sizeof(struct bufdesc); + unsigned dsize_log2 = __fls(dsize); + WARN_ON(dsize != (1 << dsize_log2)); #if defined(CONFIG_ARM) fep->rx_align = 0xf; fep->tx_align = 0xf; @@ -3139,12 +3104,7 @@ static int fec_enet_init(struct net_device *ndev) fec_enet_alloc_queue(ndev); - if (fep->bufdesc_ex) - fep->bufdesc_size = sizeof(struct bufdesc_ex); - else - fep->bufdesc_size = sizeof(struct bufdesc); - bd_size = (fep->total_tx_ring_size + fep->total_rx_ring_size) * - fep->bufdesc_size; + bd_size = (fep->total_tx_ring_size + fep->total_rx_ring_size) * dsize; /* Allocate memory for buffer descriptors. */ cbd_base = dmam_alloc_coherent(&fep->pdev->dev, bd_size, &bd_dma, @@ -3162,33 +3122,35 @@ static int fec_enet_init(struct net_device *ndev) /* Set receive and transmit descriptor base. */ for (i = 0; i < fep->num_rx_queues; i++) { - rxq = fep->rx_queue[i]; - rxq->index = i; - rxq->rx_bd_base = (struct bufdesc *)cbd_base; - rxq->bd_dma = bd_dma; - if (fep->bufdesc_ex) { - bd_dma += sizeof(struct bufdesc_ex) * rxq->rx_ring_size; - cbd_base = (struct bufdesc *) - (((struct bufdesc_ex *)cbd_base) + rxq->rx_ring_size); - } else { - bd_dma += sizeof(struct bufdesc) * rxq->rx_ring_size; - cbd_base += rxq->rx_ring_size; - } + struct fec_enet_priv_rx_q *rxq = fep->rx_queue[i]; + unsigned size = dsize * rxq->bd.ring_size; + + rxq->bd.qid = i; + rxq->bd.base = cbd_base; + rxq->bd.cur = cbd_base; + rxq->bd.dma = bd_dma; + rxq->bd.dsize = dsize; + rxq->bd.dsize_log2 = dsize_log2; + rxq->bd.reg_desc_active = fep->hwp + offset_des_active_rxq[i]; + bd_dma += size; + cbd_base = (struct bufdesc *)(((void *)cbd_base) + size); + rxq->bd.last = (struct bufdesc *)(((void *)cbd_base) - dsize); } for (i = 0; i < fep->num_tx_queues; i++) { - txq = fep->tx_queue[i]; - txq->index = i; - txq->tx_bd_base = (struct bufdesc *)cbd_base; - txq->bd_dma = bd_dma; - if (fep->bufdesc_ex) { - bd_dma += sizeof(struct bufdesc_ex) * txq->tx_ring_size; - cbd_base = (struct bufdesc *) - (((struct bufdesc_ex *)cbd_base) + txq->tx_ring_size); - } else { - bd_dma += sizeof(struct bufdesc) * txq->tx_ring_size; - cbd_base += txq->tx_ring_size; - } + struct fec_enet_priv_tx_q *txq = fep->tx_queue[i]; + unsigned size = dsize * txq->bd.ring_size; + + txq->bd.qid = i; + txq->bd.base = cbd_base; + txq->bd.cur = cbd_base; + txq->bd.dma = bd_dma; + txq->bd.dsize = dsize; + txq->bd.dsize_log2 = dsize_log2; + txq->bd.reg_desc_active = fep->hwp + offset_des_active_txq[i]; + bd_dma += size; + cbd_base = (struct bufdesc *)(((void *)cbd_base) + size); + txq->bd.last = (struct bufdesc *)(((void *)cbd_base) - dsize); } @@ -3229,6 +3191,7 @@ static int fec_enet_init(struct net_device *ndev) static void fec_reset_phy(struct platform_device *pdev) { int err, phy_reset; + bool active_low = false; int msec = 1; struct device_node *np = pdev->dev.of_node; @@ -3244,14 +3207,17 @@ static void fec_reset_phy(struct platform_device *pdev) if (!gpio_is_valid(phy_reset)) return; + active_low = of_property_read_bool(np, "phy-reset-active-low"); + err = devm_gpio_request_one(&pdev->dev, phy_reset, - GPIOF_OUT_INIT_LOW, "phy-reset"); + active_low ? GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW, + "phy-reset"); if (err) { dev_err(&pdev->dev, "failed to get phy-reset-gpios: %d\n", err); return; } msleep(msec); - gpio_set_value_cansleep(phy_reset, 1); + gpio_set_value_cansleep(phy_reset, !active_low); } #else /* CONFIG_OF */ static void fec_reset_phy(struct platform_device *pdev) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index b243c3cbe68f..38f558e0bb62 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -1937,8 +1937,10 @@ static void fm10k_init_reta(struct fm10k_intfc *interface) u16 i, rss_i = interface->ring_feature[RING_F_RSS].indices; u32 reta, base; - /* If the netdev is initialized we have to maintain table if possible */ - if (interface->netdev->reg_state != NETREG_UNINITIALIZED) { + /* If the Rx flow indirection table has been configured manually, we + * need to maintain it when possible. + */ + if (netif_is_rxfh_configured(interface->netdev)) { for (i = FM10K_RETA_SIZE; i--;) { reta = interface->reta[i]; if ((((reta << 24) >> 24) < rss_i) && @@ -1946,6 +1948,10 @@ static void fm10k_init_reta(struct fm10k_intfc *interface) (((reta << 8) >> 24) < rss_i) && (((reta) >> 24) < rss_i)) continue; + + /* this should never happen */ + dev_err(&interface->pdev->dev, + "RSS indirection table assigned flows out of queue bounds. Reconfiguring.\n"); goto repopulate_reta; } diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 662569d5b7c0..dc1a82148ff0 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -1204,6 +1204,15 @@ err_queueing_scheme: return err; } +static int __fm10k_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +{ + if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO) + return -EINVAL; + + return fm10k_setup_tc(dev, tc->tc); +} + static int fm10k_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { switch (cmd) { @@ -1386,7 +1395,7 @@ static const struct net_device_ops fm10k_netdev_ops = { .ndo_vlan_rx_kill_vid = fm10k_vlan_rx_kill_vid, .ndo_set_rx_mode = fm10k_set_rx_mode, .ndo_get_stats64 = fm10k_get_stats64, - .ndo_setup_tc = fm10k_setup_tc, + .ndo_setup_tc = __fm10k_setup_tc, .ndo_set_vf_mac = fm10k_ndo_set_vf_mac, .ndo_set_vf_vlan = fm10k_ndo_set_vf_vlan, .ndo_set_vf_rate = fm10k_ndo_set_vf_bw, diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 68f2204ec6f3..2f6210ae8ba0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2015 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -64,9 +64,6 @@ #include "i40e_dcb.h" /* Useful i40e defaults */ -#define I40E_BASE_PF_SEID 16 -#define I40E_BASE_VSI_SEID 512 -#define I40E_BASE_VEB_SEID 288 #define I40E_MAX_VEB 16 #define I40E_MAX_NUM_DESCRIPTORS 4096 @@ -104,6 +101,7 @@ #define I40E_PRIV_FLAGS_FD_ATR BIT(2) #define I40E_PRIV_FLAGS_VEB_STATS BIT(3) #define I40E_PRIV_FLAGS_PS BIT(4) +#define I40E_PRIV_FLAGS_HW_ATR_EVICT BIT(5) #define I40E_NVM_VERSION_LO_SHIFT 0 #define I40E_NVM_VERSION_LO_MASK (0xff << I40E_NVM_VERSION_LO_SHIFT) @@ -113,6 +111,7 @@ #define I40E_OEM_VER_PATCH_MASK 0xff #define I40E_OEM_VER_BUILD_SHIFT 8 #define I40E_OEM_VER_SHIFT 24 +#define I40E_PHY_DEBUG_PORT BIT(4) /* The values in here are decimal coded as hex as is the case in the NVM map*/ #define I40E_CURRENT_NVM_VERSION_HI 0x2 @@ -137,6 +136,19 @@ /* default to trying for four seconds */ #define I40E_TRY_LINK_TIMEOUT (4 * HZ) +/** + * i40e_is_mac_710 - Return true if MAC is X710/XL710 + * @hw: ptr to the hardware info + **/ +static inline bool i40e_is_mac_710(struct i40e_hw *hw) +{ + if ((hw->mac.type == I40E_MAC_X710) || + (hw->mac.type == I40E_MAC_XL710)) + return true; + + return false; +} + /* driver state flags */ enum i40e_state_t { __I40E_TESTING, @@ -339,6 +351,12 @@ struct i40e_pf { #define I40E_FLAG_VEB_MODE_ENABLED BIT_ULL(40) #define I40E_FLAG_GENEVE_OFFLOAD_CAPABLE BIT_ULL(41) #define I40E_FLAG_NO_PCI_LINK_CHECK BIT_ULL(42) +#define I40E_FLAG_100M_SGMII_CAPABLE BIT_ULL(43) +#define I40E_FLAG_RESTART_AUTONEG BIT_ULL(44) +#define I40E_FLAG_NO_DCB_SUPPORT BIT_ULL(45) +#define I40E_FLAG_USE_SET_LLDP_MIB BIT_ULL(46) +#define I40E_FLAG_STOP_FW_LLDP BIT_ULL(47) +#define I40E_FLAG_HAVE_10GBASET_PHY BIT_ULL(48) #define I40E_FLAG_PF_MAC BIT_ULL(50) /* tracks features that get auto disabled by errors */ @@ -391,6 +409,7 @@ struct i40e_pf { struct i40e_vf *vf; int num_alloc_vfs; /* actual number of VFs allocated */ u32 vf_aq_requests; + u32 arq_overflows; /* Not fatal, possibly indicative of problems */ /* DCBx/DCBNL capability for PF that indicates * whether DCBx is managed by firmware or host @@ -423,6 +442,7 @@ struct i40e_pf { u32 ioremap_len; u32 fd_inv; + u16 phy_led_val; }; struct i40e_mac_filter { @@ -492,6 +512,7 @@ struct i40e_vsi { u32 tx_busy; u64 tx_linearize; u64 tx_force_wb; + u64 tx_lost_interrupt; u32 rx_buf_failed; u32 rx_page_failed; @@ -500,13 +521,6 @@ struct i40e_vsi { struct i40e_ring **tx_rings; u16 work_limit; - /* high bit set means dynamic, use accessor routines to read/write. - * hardware only supports 2us resolution for the ITR registers. - * these values always store the USER setting, and must be converted - * before programming to a register. - */ - u16 rx_itr_setting; - u16 tx_itr_setting; u16 int_rate_limit; /* value in usecs */ u16 rss_table_size; /* HW RSS table size */ @@ -747,6 +761,9 @@ static inline void i40e_irq_dynamic_enable(struct i40e_vsi *vsi, int vector) struct i40e_hw *hw = &pf->hw; u32 val; + /* definitely clear the PBA here, as this function is meant to + * clean out all previous interrupts AND enable the interrupt + */ val = I40E_PFINT_DYN_CTLN_INTENA_MASK | I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | (I40E_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT); @@ -754,9 +771,8 @@ static inline void i40e_irq_dynamic_enable(struct i40e_vsi *vsi, int vector) /* skip the flush */ } -void i40e_irq_dynamic_disable(struct i40e_vsi *vsi, int vector); void i40e_irq_dynamic_disable_icr0(struct i40e_pf *pf); -void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf); +void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf, bool clearpba); #ifdef I40E_FCOE struct rtnl_link_stats64 *i40e_get_netdev_stats_struct( struct net_device *netdev, @@ -786,7 +802,8 @@ struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, u8 *macaddr, bool is_vf, bool is_netdev); #ifdef I40E_FCOE int i40e_close(struct net_device *netdev); -int i40e_setup_tc(struct net_device *netdev, u8 tc); +int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto, + struct tc_to_netdev *tc); void i40e_netpoll(struct net_device *netdev); int i40e_fcoe_enable(struct net_device *netdev); int i40e_fcoe_disable(struct net_device *netdev); diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index 1fd5ea82a9bc..df8e2fd6a649 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -953,6 +953,9 @@ i40e_status i40e_clean_arq_element(struct i40e_hw *hw, u16 flags; u16 ntu; + /* pre-clean the event info */ + memset(&e->desc, 0, sizeof(e->desc)); + /* take the lock before we start messing with the ring */ mutex_lock(&hw->aq.arq_mutex); @@ -1020,14 +1023,6 @@ i40e_status i40e_clean_arq_element(struct i40e_hw *hw, hw->aq.arq.next_to_clean = ntc; hw->aq.arq.next_to_use = ntu; -clean_arq_element_out: - /* Set pending if needed, unlock and return */ - if (pending != NULL) - *pending = (ntc > ntu ? hw->aq.arq.count : 0) + (ntu - ntc); - -clean_arq_element_err: - mutex_unlock(&hw->aq.arq_mutex); - if (i40e_is_nvm_update_op(&e->desc)) { if (hw->aq.nvm_release_on_done) { i40e_release_nvm(hw); @@ -1048,6 +1043,13 @@ clean_arq_element_err: } } +clean_arq_element_out: + /* Set pending if needed, unlock and return */ + if (pending) + *pending = (ntc > ntu ? hw->aq.arq.count : 0) + (ntu - ntc); +clean_arq_element_err: + mutex_unlock(&hw->aq.arq_mutex); + return ret_code; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index b22012a446a6..8d5c65ab6267 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -34,7 +34,7 @@ */ #define I40E_FW_API_VERSION_MAJOR 0x0001 -#define I40E_FW_API_VERSION_MINOR 0x0004 +#define I40E_FW_API_VERSION_MINOR 0x0005 struct i40e_aq_desc { __le16 flags; @@ -145,6 +145,9 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_remove_statistics = 0x0202, i40e_aqc_opc_set_port_parameters = 0x0203, i40e_aqc_opc_get_switch_resource_alloc = 0x0204, + i40e_aqc_opc_set_switch_config = 0x0205, + i40e_aqc_opc_rx_ctl_reg_read = 0x0206, + i40e_aqc_opc_rx_ctl_reg_write = 0x0207, i40e_aqc_opc_add_vsi = 0x0210, i40e_aqc_opc_update_vsi_parameters = 0x0211, @@ -220,6 +223,7 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_get_phy_wol_caps = 0x0621, i40e_aqc_opc_set_phy_debug = 0x0622, i40e_aqc_opc_upload_ext_phy_fm = 0x0625, + i40e_aqc_opc_run_phy_activity = 0x0626, /* NVM commands */ i40e_aqc_opc_nvm_read = 0x0701, @@ -228,6 +232,7 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_nvm_config_read = 0x0704, i40e_aqc_opc_nvm_config_write = 0x0705, i40e_aqc_opc_oem_post_update = 0x0720, + i40e_aqc_opc_thermal_sensor = 0x0721, /* virtualization commands */ i40e_aqc_opc_send_msg_to_pf = 0x0801, @@ -402,6 +407,7 @@ struct i40e_aqc_list_capabilities_element_resp { #define I40E_AQ_CAP_ID_OS2BMC_CAP 0x0004 #define I40E_AQ_CAP_ID_FUNCTIONS_VALID 0x0005 #define I40E_AQ_CAP_ID_ALTERNATE_RAM 0x0006 +#define I40E_AQ_CAP_ID_WOL_AND_PROXY 0x0008 #define I40E_AQ_CAP_ID_SRIOV 0x0012 #define I40E_AQ_CAP_ID_VF 0x0013 #define I40E_AQ_CAP_ID_VMDQ 0x0014 @@ -422,6 +428,7 @@ struct i40e_aqc_list_capabilities_element_resp { #define I40E_AQ_CAP_ID_LED 0x0061 #define I40E_AQ_CAP_ID_SDP 0x0062 #define I40E_AQ_CAP_ID_MDIO 0x0063 +#define I40E_AQ_CAP_ID_WSR_PROT 0x0064 #define I40E_AQ_CAP_ID_FLEX10 0x00F1 #define I40E_AQ_CAP_ID_CEM 0x00F2 @@ -680,6 +687,31 @@ struct i40e_aqc_switch_resource_alloc_element_resp { I40E_CHECK_STRUCT_LEN(0x10, i40e_aqc_switch_resource_alloc_element_resp); +/* Set Switch Configuration (direct 0x0205) */ +struct i40e_aqc_set_switch_config { + __le16 flags; +#define I40E_AQ_SET_SWITCH_CFG_PROMISC 0x0001 +#define I40E_AQ_SET_SWITCH_CFG_L2_FILTER 0x0002 + __le16 valid_flags; + u8 reserved[12]; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_set_switch_config); + +/* Read Receive control registers (direct 0x0206) + * Write Receive control registers (direct 0x0207) + * used for accessing Rx control registers that can be + * slow and need special handling when under high Rx load + */ +struct i40e_aqc_rx_ctl_reg_read_write { + __le32 reserved1; + __le32 address; + __le32 reserved2; + __le32 value; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_rx_ctl_reg_read_write); + /* Add VSI (indirect 0x0210) * this indirect command uses struct i40e_aqc_vsi_properties_data * as the indirect buffer (128 bytes) @@ -906,7 +938,8 @@ struct i40e_aqc_add_veb { I40E_AQC_ADD_VEB_PORT_TYPE_SHIFT) #define I40E_AQC_ADD_VEB_PORT_TYPE_DEFAULT 0x2 #define I40E_AQC_ADD_VEB_PORT_TYPE_DATA 0x4 -#define I40E_AQC_ADD_VEB_ENABLE_L2_FILTER 0x8 +#define I40E_AQC_ADD_VEB_ENABLE_L2_FILTER 0x8 /* deprecated */ +#define I40E_AQC_ADD_VEB_ENABLE_DISABLE_STATS 0x10 u8 enable_tcs; u8 reserved[9]; }; @@ -973,6 +1006,7 @@ struct i40e_aqc_add_macvlan_element_data { #define I40E_AQC_MACVLAN_ADD_HASH_MATCH 0x0002 #define I40E_AQC_MACVLAN_ADD_IGNORE_VLAN 0x0004 #define I40E_AQC_MACVLAN_ADD_TO_QUEUE 0x0008 +#define I40E_AQC_MACVLAN_ADD_USE_SHARED_MAC 0x0010 __le16 queue_number; #define I40E_AQC_MACVLAN_CMD_QUEUE_SHIFT 0 #define I40E_AQC_MACVLAN_CMD_QUEUE_MASK (0x7FF << \ @@ -1069,6 +1103,7 @@ struct i40e_aqc_set_vsi_promiscuous_modes { #define I40E_AQC_SET_VSI_PROMISC_BROADCAST 0x04 #define I40E_AQC_SET_VSI_DEFAULT 0x08 #define I40E_AQC_SET_VSI_PROMISC_VLAN 0x10 +#define I40E_AQC_SET_VSI_PROMISC_TX 0x8000 __le16 seid; #define I40E_AQC_VSI_PROM_CMD_SEID_MASK 0x3FF __le16 vlan_tag; @@ -1257,10 +1292,16 @@ struct i40e_aqc_add_remove_cloud_filters_element_data { #define I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT 9 #define I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK 0x1E00 -#define I40E_AQC_ADD_CLOUD_TNL_TYPE_XVLAN 0 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_VXLAN 0 #define I40E_AQC_ADD_CLOUD_TNL_TYPE_NVGRE_OMAC 1 -#define I40E_AQC_ADD_CLOUD_TNL_TYPE_NGE 2 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_GENEVE 2 #define I40E_AQC_ADD_CLOUD_TNL_TYPE_IP 3 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_RESERVED 4 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_VXLAN_GPE 5 + +#define I40E_AQC_ADD_CLOUD_FLAGS_SHARED_OUTER_MAC 0x2000 +#define I40E_AQC_ADD_CLOUD_FLAGS_SHARED_INNER_MAC 0x4000 +#define I40E_AQC_ADD_CLOUD_FLAGS_SHARED_OUTER_IP 0x8000 __le32 tenant_id; u8 reserved[4]; @@ -1755,7 +1796,12 @@ struct i40e_aqc_get_link_status { u8 config; #define I40E_AQ_CONFIG_CRC_ENA 0x04 #define I40E_AQ_CONFIG_PACING_MASK 0x78 - u8 reserved[5]; + u8 external_power_ability; +#define I40E_AQ_LINK_POWER_CLASS_1 0x00 +#define I40E_AQ_LINK_POWER_CLASS_2 0x01 +#define I40E_AQ_LINK_POWER_CLASS_3 0x02 +#define I40E_AQ_LINK_POWER_CLASS_4 0x03 + u8 reserved[4]; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_get_link_status); @@ -1823,6 +1869,18 @@ enum i40e_aq_phy_reg_type { I40E_AQC_PHY_REG_EXERNAL_MODULE = 0x3 }; +/* Run PHY Activity (0x0626) */ +struct i40e_aqc_run_phy_activity { + __le16 activity_id; + u8 flags; + u8 reserved1; + __le32 control; + __le32 data; + u8 reserved2[4]; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_run_phy_activity); + /* NVM Read command (indirect 0x0701) * NVM Erase commands (direct 0x0702) * NVM Update commands (indirect 0x0703) @@ -1912,6 +1970,22 @@ struct i40e_aqc_nvm_oem_post_update_buffer { I40E_CHECK_STRUCT_LEN(0x28, i40e_aqc_nvm_oem_post_update_buffer); +/* Thermal Sensor (indirect 0x0721) + * read or set thermal sensor configs and values + * takes a sensor and command specific data buffer, not detailed here + */ +struct i40e_aqc_thermal_sensor { + u8 sensor_action; +#define I40E_AQ_THERMAL_SENSOR_READ_CONFIG 0 +#define I40E_AQ_THERMAL_SENSOR_SET_CONFIG 1 +#define I40E_AQ_THERMAL_SENSOR_READ_TEMP 2 + u8 reserved[7]; + __le32 addr_high; + __le32 addr_low; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_thermal_sensor); + /* Send to PF command (indirect 0x0801) id is only used by PF * Send to VF command (indirect 0x0802) id is only used by PF * Send to Peer PF command (indirect 0x0803) @@ -2191,6 +2265,7 @@ struct i40e_aqc_add_udp_tunnel { #define I40E_AQC_TUNNEL_TYPE_VXLAN 0x00 #define I40E_AQC_TUNNEL_TYPE_NGE 0x01 #define I40E_AQC_TUNNEL_TYPE_TEREDO 0x10 +#define I40E_AQC_TUNNEL_TYPE_VXLAN_GPE 0x11 u8 reserved1[10]; }; diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 6a034ddac36a..4596294c2ab1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2015 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -55,19 +55,13 @@ static i40e_status i40e_set_mac_type(struct i40e_hw *hw) case I40E_DEV_ID_20G_KR2_A: hw->mac.type = I40E_MAC_XL710; break; + case I40E_DEV_ID_KX_X722: + case I40E_DEV_ID_QSFP_X722: case I40E_DEV_ID_SFP_X722: case I40E_DEV_ID_1G_BASE_T_X722: case I40E_DEV_ID_10G_BASE_T_X722: hw->mac.type = I40E_MAC_X722; break; - case I40E_DEV_ID_X722_VF: - case I40E_DEV_ID_X722_VF_HV: - hw->mac.type = I40E_MAC_X722_VF; - break; - case I40E_DEV_ID_VF: - case I40E_DEV_ID_VF_HV: - hw->mac.type = I40E_MAC_VF; - break; default: hw->mac.type = I40E_MAC_GENERIC; break; @@ -1245,7 +1239,13 @@ i40e_status i40e_pf_reset(struct i40e_hw *hw) grst_del = (rd32(hw, I40E_GLGEN_RSTCTL) & I40E_GLGEN_RSTCTL_GRSTDEL_MASK) >> I40E_GLGEN_RSTCTL_GRSTDEL_SHIFT; - for (cnt = 0; cnt < grst_del + 10; cnt++) { + + /* It can take upto 15 secs for GRST steady state. + * Bump it to 16 secs max to be safe. + */ + grst_del = grst_del * 20; + + for (cnt = 0; cnt < grst_del; cnt++) { reg = rd32(hw, I40E_GLGEN_RSTAT); if (!(reg & I40E_GLGEN_RSTAT_DEVSTATE_MASK)) break; @@ -1894,6 +1894,32 @@ i40e_status i40e_aq_set_phy_int_mask(struct i40e_hw *hw, } /** + * i40e_aq_set_phy_debug + * @hw: pointer to the hw struct + * @cmd_flags: debug command flags + * @cmd_details: pointer to command details structure or NULL + * + * Reset the external PHY. + **/ +enum i40e_status_code i40e_aq_set_phy_debug(struct i40e_hw *hw, u8 cmd_flags, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_set_phy_debug *cmd = + (struct i40e_aqc_set_phy_debug *)&desc.params.raw; + enum i40e_status_code status; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_set_phy_debug); + + cmd->command_flags = cmd_flags; + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + +/** * i40e_aq_add_vsi * @hw: pointer to the hw struct * @vsi_ctx: pointer to a vsi context struct @@ -1958,12 +1984,19 @@ i40e_status i40e_aq_set_vsi_unicast_promiscuous(struct i40e_hw *hw, i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_set_vsi_promiscuous_modes); - if (set) + if (set) { flags |= I40E_AQC_SET_VSI_PROMISC_UNICAST; + if (((hw->aq.api_maj_ver == 1) && (hw->aq.api_min_ver >= 5)) || + (hw->aq.api_maj_ver > 1)) + flags |= I40E_AQC_SET_VSI_PROMISC_TX; + } cmd->promiscuous_flags = cpu_to_le16(flags); cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_UNICAST); + if (((hw->aq.api_maj_ver >= 1) && (hw->aq.api_min_ver >= 5)) || + (hw->aq.api_maj_ver > 1)) + cmd->valid_flags |= cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_TX); cmd->seid = cpu_to_le16(seid); status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); @@ -2039,6 +2072,37 @@ i40e_status i40e_aq_set_vsi_broadcast(struct i40e_hw *hw, } /** + * i40e_aq_set_vsi_vlan_promisc - control the VLAN promiscuous setting + * @hw: pointer to the hw struct + * @seid: vsi number + * @enable: set MAC L2 layer unicast promiscuous enable/disable for a given VLAN + * @cmd_details: pointer to command details structure or NULL + **/ +i40e_status i40e_aq_set_vsi_vlan_promisc(struct i40e_hw *hw, + u16 seid, bool enable, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_set_vsi_promiscuous_modes *cmd = + (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw; + i40e_status status; + u16 flags = 0; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_set_vsi_promiscuous_modes); + if (enable) + flags |= I40E_AQC_SET_VSI_PROMISC_VLAN; + + cmd->promiscuous_flags = cpu_to_le16(flags); + cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_VLAN); + cmd->seid = cpu_to_le16(seid); + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + +/** * i40e_get_vsi_params - get VSI configuration info * @hw: pointer to the hw struct * @vsi_ctx: pointer to a vsi context struct @@ -2283,8 +2347,8 @@ i40e_status i40e_update_link_info(struct i40e_hw *hw) * @downlink_seid: the VSI SEID * @enabled_tc: bitmap of TCs to be enabled * @default_port: true for default port VSI, false for control port - * @enable_l2_filtering: true to add L2 filter table rules to regular forwarding rules for cloud support * @veb_seid: pointer to where to put the resulting VEB SEID + * @enable_stats: true to turn on VEB stats * @cmd_details: pointer to command details structure or NULL * * This asks the FW to add a VEB between the uplink and downlink @@ -2292,8 +2356,8 @@ i40e_status i40e_update_link_info(struct i40e_hw *hw) **/ i40e_status i40e_aq_add_veb(struct i40e_hw *hw, u16 uplink_seid, u16 downlink_seid, u8 enabled_tc, - bool default_port, bool enable_l2_filtering, - u16 *veb_seid, + bool default_port, u16 *veb_seid, + bool enable_stats, struct i40e_asq_cmd_details *cmd_details) { struct i40e_aq_desc desc; @@ -2320,8 +2384,9 @@ i40e_status i40e_aq_add_veb(struct i40e_hw *hw, u16 uplink_seid, else veb_flags |= I40E_AQC_ADD_VEB_PORT_TYPE_DATA; - if (enable_l2_filtering) - veb_flags |= I40E_AQC_ADD_VEB_ENABLE_L2_FILTER; + /* reverse logic here: set the bitflag to disable the stats */ + if (!enable_stats) + veb_flags |= I40E_AQC_ADD_VEB_ENABLE_DISABLE_STATS; cmd->veb_flags = cpu_to_le16(veb_flags); @@ -2410,6 +2475,7 @@ i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid, (struct i40e_aqc_macvlan *)&desc.params.raw; i40e_status status; u16 buf_size; + int i; if (count == 0 || !mv_list || !hw) return I40E_ERR_PARAM; @@ -2423,12 +2489,17 @@ i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid, cmd->seid[1] = 0; cmd->seid[2] = 0; + for (i = 0; i < count; i++) + if (is_multicast_ether_addr(mv_list[i].mac_addr)) + mv_list[i].flags |= + cpu_to_le16(I40E_AQC_MACVLAN_ADD_USE_SHARED_MAC); + desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); if (buf_size > I40E_AQ_LARGE_BUF) desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); status = i40e_asq_send_command(hw, &desc, mv_list, buf_size, - cmd_details); + cmd_details); return status; } @@ -2476,6 +2547,137 @@ i40e_status i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 seid, } /** + * i40e_mirrorrule_op - Internal helper function to add/delete mirror rule + * @hw: pointer to the hw struct + * @opcode: AQ opcode for add or delete mirror rule + * @sw_seid: Switch SEID (to which rule refers) + * @rule_type: Rule Type (ingress/egress/VLAN) + * @id: Destination VSI SEID or Rule ID + * @count: length of the list + * @mr_list: list of mirrored VSI SEIDs or VLAN IDs + * @cmd_details: pointer to command details structure or NULL + * @rule_id: Rule ID returned from FW + * @rule_used: Number of rules used in internal switch + * @rule_free: Number of rules free in internal switch + * + * Add/Delete a mirror rule to a specific switch. Mirror rules are supported for + * VEBs/VEPA elements only + **/ +static i40e_status i40e_mirrorrule_op(struct i40e_hw *hw, + u16 opcode, u16 sw_seid, u16 rule_type, u16 id, + u16 count, __le16 *mr_list, + struct i40e_asq_cmd_details *cmd_details, + u16 *rule_id, u16 *rules_used, u16 *rules_free) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_add_delete_mirror_rule *cmd = + (struct i40e_aqc_add_delete_mirror_rule *)&desc.params.raw; + struct i40e_aqc_add_delete_mirror_rule_completion *resp = + (struct i40e_aqc_add_delete_mirror_rule_completion *)&desc.params.raw; + i40e_status status; + u16 buf_size; + + buf_size = count * sizeof(*mr_list); + + /* prep the rest of the request */ + i40e_fill_default_direct_cmd_desc(&desc, opcode); + cmd->seid = cpu_to_le16(sw_seid); + cmd->rule_type = cpu_to_le16(rule_type & + I40E_AQC_MIRROR_RULE_TYPE_MASK); + cmd->num_entries = cpu_to_le16(count); + /* Dest VSI for add, rule_id for delete */ + cmd->destination = cpu_to_le16(id); + if (mr_list) { + desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | + I40E_AQ_FLAG_RD)); + if (buf_size > I40E_AQ_LARGE_BUF) + desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); + } + + status = i40e_asq_send_command(hw, &desc, mr_list, buf_size, + cmd_details); + if (!status || + hw->aq.asq_last_status == I40E_AQ_RC_ENOSPC) { + if (rule_id) + *rule_id = le16_to_cpu(resp->rule_id); + if (rules_used) + *rules_used = le16_to_cpu(resp->mirror_rules_used); + if (rules_free) + *rules_free = le16_to_cpu(resp->mirror_rules_free); + } + return status; +} + +/** + * i40e_aq_add_mirrorrule - add a mirror rule + * @hw: pointer to the hw struct + * @sw_seid: Switch SEID (to which rule refers) + * @rule_type: Rule Type (ingress/egress/VLAN) + * @dest_vsi: SEID of VSI to which packets will be mirrored + * @count: length of the list + * @mr_list: list of mirrored VSI SEIDs or VLAN IDs + * @cmd_details: pointer to command details structure or NULL + * @rule_id: Rule ID returned from FW + * @rule_used: Number of rules used in internal switch + * @rule_free: Number of rules free in internal switch + * + * Add mirror rule. Mirror rules are supported for VEBs or VEPA elements only + **/ +i40e_status i40e_aq_add_mirrorrule(struct i40e_hw *hw, u16 sw_seid, + u16 rule_type, u16 dest_vsi, u16 count, __le16 *mr_list, + struct i40e_asq_cmd_details *cmd_details, + u16 *rule_id, u16 *rules_used, u16 *rules_free) +{ + if (!(rule_type == I40E_AQC_MIRROR_RULE_TYPE_ALL_INGRESS || + rule_type == I40E_AQC_MIRROR_RULE_TYPE_ALL_EGRESS)) { + if (count == 0 || !mr_list) + return I40E_ERR_PARAM; + } + + return i40e_mirrorrule_op(hw, i40e_aqc_opc_add_mirror_rule, sw_seid, + rule_type, dest_vsi, count, mr_list, + cmd_details, rule_id, rules_used, rules_free); +} + +/** + * i40e_aq_delete_mirrorrule - delete a mirror rule + * @hw: pointer to the hw struct + * @sw_seid: Switch SEID (to which rule refers) + * @rule_type: Rule Type (ingress/egress/VLAN) + * @count: length of the list + * @rule_id: Rule ID that is returned in the receive desc as part of + * add_mirrorrule. + * @mr_list: list of mirrored VLAN IDs to be removed + * @cmd_details: pointer to command details structure or NULL + * @rule_used: Number of rules used in internal switch + * @rule_free: Number of rules free in internal switch + * + * Delete a mirror rule. Mirror rules are supported for VEBs/VEPA elements only + **/ +i40e_status i40e_aq_delete_mirrorrule(struct i40e_hw *hw, u16 sw_seid, + u16 rule_type, u16 rule_id, u16 count, __le16 *mr_list, + struct i40e_asq_cmd_details *cmd_details, + u16 *rules_used, u16 *rules_free) +{ + /* Rule ID has to be valid except rule_type: INGRESS VLAN mirroring */ + if (rule_type != I40E_AQC_MIRROR_RULE_TYPE_VLAN) { + if (!rule_id) + return I40E_ERR_PARAM; + } else { + /* count and mr_list shall be valid for rule_type INGRESS VLAN + * mirroring. For other rule_type, count and rule_type should + * not matter. + */ + if (count == 0 || !mr_list) + return I40E_ERR_PARAM; + } + + return i40e_mirrorrule_op(hw, i40e_aqc_opc_delete_mirror_rule, sw_seid, + rule_type, rule_id, count, mr_list, + cmd_details, NULL, rules_used, rules_free); +} + +/** * i40e_aq_send_msg_to_vf * @hw: pointer to the hardware structure * @vfid: VF id to send msg @@ -2765,35 +2967,6 @@ i40e_aq_erase_nvm_exit: return status; } -#define I40E_DEV_FUNC_CAP_SWITCH_MODE 0x01 -#define I40E_DEV_FUNC_CAP_MGMT_MODE 0x02 -#define I40E_DEV_FUNC_CAP_NPAR 0x03 -#define I40E_DEV_FUNC_CAP_OS2BMC 0x04 -#define I40E_DEV_FUNC_CAP_VALID_FUNC 0x05 -#define I40E_DEV_FUNC_CAP_SRIOV_1_1 0x12 -#define I40E_DEV_FUNC_CAP_VF 0x13 -#define I40E_DEV_FUNC_CAP_VMDQ 0x14 -#define I40E_DEV_FUNC_CAP_802_1_QBG 0x15 -#define I40E_DEV_FUNC_CAP_802_1_QBH 0x16 -#define I40E_DEV_FUNC_CAP_VSI 0x17 -#define I40E_DEV_FUNC_CAP_DCB 0x18 -#define I40E_DEV_FUNC_CAP_FCOE 0x21 -#define I40E_DEV_FUNC_CAP_ISCSI 0x22 -#define I40E_DEV_FUNC_CAP_RSS 0x40 -#define I40E_DEV_FUNC_CAP_RX_QUEUES 0x41 -#define I40E_DEV_FUNC_CAP_TX_QUEUES 0x42 -#define I40E_DEV_FUNC_CAP_MSIX 0x43 -#define I40E_DEV_FUNC_CAP_MSIX_VF 0x44 -#define I40E_DEV_FUNC_CAP_FLOW_DIRECTOR 0x45 -#define I40E_DEV_FUNC_CAP_IEEE_1588 0x46 -#define I40E_DEV_FUNC_CAP_FLEX10 0xF1 -#define I40E_DEV_FUNC_CAP_CEM 0xF2 -#define I40E_DEV_FUNC_CAP_IWARP 0x51 -#define I40E_DEV_FUNC_CAP_LED 0x61 -#define I40E_DEV_FUNC_CAP_SDP 0x62 -#define I40E_DEV_FUNC_CAP_MDIO 0x63 -#define I40E_DEV_FUNC_CAP_WR_CSR_PROT 0x64 - /** * i40e_parse_discover_capabilities * @hw: pointer to the hw struct @@ -2832,79 +3005,79 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, major_rev = cap->major_rev; switch (id) { - case I40E_DEV_FUNC_CAP_SWITCH_MODE: + case I40E_AQ_CAP_ID_SWITCH_MODE: p->switch_mode = number; break; - case I40E_DEV_FUNC_CAP_MGMT_MODE: + case I40E_AQ_CAP_ID_MNG_MODE: p->management_mode = number; break; - case I40E_DEV_FUNC_CAP_NPAR: + case I40E_AQ_CAP_ID_NPAR_ACTIVE: p->npar_enable = number; break; - case I40E_DEV_FUNC_CAP_OS2BMC: + case I40E_AQ_CAP_ID_OS2BMC_CAP: p->os2bmc = number; break; - case I40E_DEV_FUNC_CAP_VALID_FUNC: + case I40E_AQ_CAP_ID_FUNCTIONS_VALID: p->valid_functions = number; break; - case I40E_DEV_FUNC_CAP_SRIOV_1_1: + case I40E_AQ_CAP_ID_SRIOV: if (number == 1) p->sr_iov_1_1 = true; break; - case I40E_DEV_FUNC_CAP_VF: + case I40E_AQ_CAP_ID_VF: p->num_vfs = number; p->vf_base_id = logical_id; break; - case I40E_DEV_FUNC_CAP_VMDQ: + case I40E_AQ_CAP_ID_VMDQ: if (number == 1) p->vmdq = true; break; - case I40E_DEV_FUNC_CAP_802_1_QBG: + case I40E_AQ_CAP_ID_8021QBG: if (number == 1) p->evb_802_1_qbg = true; break; - case I40E_DEV_FUNC_CAP_802_1_QBH: + case I40E_AQ_CAP_ID_8021QBR: if (number == 1) p->evb_802_1_qbh = true; break; - case I40E_DEV_FUNC_CAP_VSI: + case I40E_AQ_CAP_ID_VSI: p->num_vsis = number; break; - case I40E_DEV_FUNC_CAP_DCB: + case I40E_AQ_CAP_ID_DCB: if (number == 1) { p->dcb = true; p->enabled_tcmap = logical_id; p->maxtc = phys_id; } break; - case I40E_DEV_FUNC_CAP_FCOE: + case I40E_AQ_CAP_ID_FCOE: if (number == 1) p->fcoe = true; break; - case I40E_DEV_FUNC_CAP_ISCSI: + case I40E_AQ_CAP_ID_ISCSI: if (number == 1) p->iscsi = true; break; - case I40E_DEV_FUNC_CAP_RSS: + case I40E_AQ_CAP_ID_RSS: p->rss = true; p->rss_table_size = number; p->rss_table_entry_width = logical_id; break; - case I40E_DEV_FUNC_CAP_RX_QUEUES: + case I40E_AQ_CAP_ID_RXQ: p->num_rx_qp = number; p->base_queue = phys_id; break; - case I40E_DEV_FUNC_CAP_TX_QUEUES: + case I40E_AQ_CAP_ID_TXQ: p->num_tx_qp = number; p->base_queue = phys_id; break; - case I40E_DEV_FUNC_CAP_MSIX: + case I40E_AQ_CAP_ID_MSIX: p->num_msix_vectors = number; break; - case I40E_DEV_FUNC_CAP_MSIX_VF: + case I40E_AQ_CAP_ID_VF_MSIX: p->num_msix_vectors_vf = number; break; - case I40E_DEV_FUNC_CAP_FLEX10: + case I40E_AQ_CAP_ID_FLEX10: if (major_rev == 1) { if (number == 1) { p->flex10_enable = true; @@ -2920,38 +3093,38 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, p->flex10_mode = logical_id; p->flex10_status = phys_id; break; - case I40E_DEV_FUNC_CAP_CEM: + case I40E_AQ_CAP_ID_CEM: if (number == 1) p->mgmt_cem = true; break; - case I40E_DEV_FUNC_CAP_IWARP: + case I40E_AQ_CAP_ID_IWARP: if (number == 1) p->iwarp = true; break; - case I40E_DEV_FUNC_CAP_LED: + case I40E_AQ_CAP_ID_LED: if (phys_id < I40E_HW_CAP_MAX_GPIO) p->led[phys_id] = true; break; - case I40E_DEV_FUNC_CAP_SDP: + case I40E_AQ_CAP_ID_SDP: if (phys_id < I40E_HW_CAP_MAX_GPIO) p->sdp[phys_id] = true; break; - case I40E_DEV_FUNC_CAP_MDIO: + case I40E_AQ_CAP_ID_MDIO: if (number == 1) { p->mdio_port_num = phys_id; p->mdio_port_mode = logical_id; } break; - case I40E_DEV_FUNC_CAP_IEEE_1588: + case I40E_AQ_CAP_ID_1588: if (number == 1) p->ieee_1588 = true; break; - case I40E_DEV_FUNC_CAP_FLOW_DIRECTOR: + case I40E_AQ_CAP_ID_FLOW_DIRECTOR: p->fd = true; p->fd_filters_guaranteed = number; p->fd_filters_best_effort = logical_id; break; - case I40E_DEV_FUNC_CAP_WR_CSR_PROT: + case I40E_AQ_CAP_ID_WSR_PROT: p->wr_csr_prot = (u64)number; p->wr_csr_prot |= (u64)logical_id << 32; break; @@ -3709,7 +3882,7 @@ i40e_status i40e_set_filter_control(struct i40e_hw *hw, return ret; /* Read the PF Queue Filter control register */ - val = rd32(hw, I40E_PFQF_CTL_0); + val = i40e_read_rx_ctl(hw, I40E_PFQF_CTL_0); /* Program required PE hash buckets for the PF */ val &= ~I40E_PFQF_CTL_0_PEHSIZE_MASK; @@ -3746,7 +3919,7 @@ i40e_status i40e_set_filter_control(struct i40e_hw *hw, if (settings->enable_macvlan) val |= I40E_PFQF_CTL_0_MACVLAN_ENA_MASK; - wr32(hw, I40E_PFQF_CTL_0, val); + i40e_write_rx_ctl(hw, I40E_PFQF_CTL_0, val); return 0; } @@ -4073,3 +4246,454 @@ i40e_status i40e_aq_configure_partition_bw(struct i40e_hw *hw, return status; } + +/** + * i40e_read_phy_register + * @hw: pointer to the HW structure + * @page: registers page number + * @reg: register address in the page + * @phy_adr: PHY address on MDIO interface + * @value: PHY register value + * + * Reads specified PHY register value + **/ +i40e_status i40e_read_phy_register(struct i40e_hw *hw, + u8 page, u16 reg, u8 phy_addr, + u16 *value) +{ + i40e_status status = I40E_ERR_TIMEOUT; + u32 command = 0; + u16 retry = 1000; + u8 port_num = hw->func_caps.mdio_port_num; + + command = (reg << I40E_GLGEN_MSCA_MDIADD_SHIFT) | + (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) | + (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) | + (I40E_MDIO_OPCODE_ADDRESS) | + (I40E_MDIO_STCODE) | + (I40E_GLGEN_MSCA_MDICMD_MASK) | + (I40E_GLGEN_MSCA_MDIINPROGEN_MASK); + wr32(hw, I40E_GLGEN_MSCA(port_num), command); + do { + command = rd32(hw, I40E_GLGEN_MSCA(port_num)); + if (!(command & I40E_GLGEN_MSCA_MDICMD_MASK)) { + status = 0; + break; + } + usleep_range(10, 20); + retry--; + } while (retry); + + if (status) { + i40e_debug(hw, I40E_DEBUG_PHY, + "PHY: Can't write command to external PHY.\n"); + goto phy_read_end; + } + + command = (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) | + (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) | + (I40E_MDIO_OPCODE_READ) | + (I40E_MDIO_STCODE) | + (I40E_GLGEN_MSCA_MDICMD_MASK) | + (I40E_GLGEN_MSCA_MDIINPROGEN_MASK); + status = I40E_ERR_TIMEOUT; + retry = 1000; + wr32(hw, I40E_GLGEN_MSCA(port_num), command); + do { + command = rd32(hw, I40E_GLGEN_MSCA(port_num)); + if (!(command & I40E_GLGEN_MSCA_MDICMD_MASK)) { + status = 0; + break; + } + usleep_range(10, 20); + retry--; + } while (retry); + + if (!status) { + command = rd32(hw, I40E_GLGEN_MSRWD(port_num)); + *value = (command & I40E_GLGEN_MSRWD_MDIRDDATA_MASK) >> + I40E_GLGEN_MSRWD_MDIRDDATA_SHIFT; + } else { + i40e_debug(hw, I40E_DEBUG_PHY, + "PHY: Can't read register value from external PHY.\n"); + } + +phy_read_end: + return status; +} + +/** + * i40e_write_phy_register + * @hw: pointer to the HW structure + * @page: registers page number + * @reg: register address in the page + * @phy_adr: PHY address on MDIO interface + * @value: PHY register value + * + * Writes value to specified PHY register + **/ +i40e_status i40e_write_phy_register(struct i40e_hw *hw, + u8 page, u16 reg, u8 phy_addr, + u16 value) +{ + i40e_status status = I40E_ERR_TIMEOUT; + u32 command = 0; + u16 retry = 1000; + u8 port_num = hw->func_caps.mdio_port_num; + + command = (reg << I40E_GLGEN_MSCA_MDIADD_SHIFT) | + (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) | + (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) | + (I40E_MDIO_OPCODE_ADDRESS) | + (I40E_MDIO_STCODE) | + (I40E_GLGEN_MSCA_MDICMD_MASK) | + (I40E_GLGEN_MSCA_MDIINPROGEN_MASK); + wr32(hw, I40E_GLGEN_MSCA(port_num), command); + do { + command = rd32(hw, I40E_GLGEN_MSCA(port_num)); + if (!(command & I40E_GLGEN_MSCA_MDICMD_MASK)) { + status = 0; + break; + } + usleep_range(10, 20); + retry--; + } while (retry); + if (status) { + i40e_debug(hw, I40E_DEBUG_PHY, + "PHY: Can't write command to external PHY.\n"); + goto phy_write_end; + } + + command = value << I40E_GLGEN_MSRWD_MDIWRDATA_SHIFT; + wr32(hw, I40E_GLGEN_MSRWD(port_num), command); + + command = (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) | + (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) | + (I40E_MDIO_OPCODE_WRITE) | + (I40E_MDIO_STCODE) | + (I40E_GLGEN_MSCA_MDICMD_MASK) | + (I40E_GLGEN_MSCA_MDIINPROGEN_MASK); + status = I40E_ERR_TIMEOUT; + retry = 1000; + wr32(hw, I40E_GLGEN_MSCA(port_num), command); + do { + command = rd32(hw, I40E_GLGEN_MSCA(port_num)); + if (!(command & I40E_GLGEN_MSCA_MDICMD_MASK)) { + status = 0; + break; + } + usleep_range(10, 20); + retry--; + } while (retry); + +phy_write_end: + return status; +} + +/** + * i40e_get_phy_address + * @hw: pointer to the HW structure + * @dev_num: PHY port num that address we want + * @phy_addr: Returned PHY address + * + * Gets PHY address for current port + **/ +u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num) +{ + u8 port_num = hw->func_caps.mdio_port_num; + u32 reg_val = rd32(hw, I40E_GLGEN_MDIO_I2C_SEL(port_num)); + + return (u8)(reg_val >> ((dev_num + 1) * 5)) & 0x1f; +} + +/** + * i40e_blink_phy_led + * @hw: pointer to the HW structure + * @time: time how long led will blinks in secs + * @interval: gap between LED on and off in msecs + * + * Blinks PHY link LED + **/ +i40e_status i40e_blink_phy_link_led(struct i40e_hw *hw, + u32 time, u32 interval) +{ + i40e_status status = 0; + u32 i; + u16 led_ctl; + u16 gpio_led_port; + u16 led_reg; + u16 led_addr = I40E_PHY_LED_PROV_REG_1; + u8 phy_addr = 0; + u8 port_num; + + i = rd32(hw, I40E_PFGEN_PORTNUM); + port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK); + phy_addr = i40e_get_phy_address(hw, port_num); + + for (gpio_led_port = 0; gpio_led_port < 3; gpio_led_port++, + led_addr++) { + status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, &led_reg); + if (status) + goto phy_blinking_end; + led_ctl = led_reg; + if (led_reg & I40E_PHY_LED_LINK_MODE_MASK) { + led_reg = 0; + status = i40e_write_phy_register(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, + led_reg); + if (status) + goto phy_blinking_end; + break; + } + } + + if (time > 0 && interval > 0) { + for (i = 0; i < time * 1000; i += interval) { + status = i40e_read_phy_register(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, + &led_reg); + if (status) + goto restore_config; + if (led_reg & I40E_PHY_LED_MANUAL_ON) + led_reg = 0; + else + led_reg = I40E_PHY_LED_MANUAL_ON; + status = i40e_write_phy_register(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, + led_reg); + if (status) + goto restore_config; + msleep(interval); + } + } + +restore_config: + status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE, led_addr, + phy_addr, led_ctl); + +phy_blinking_end: + return status; +} + +/** + * i40e_led_get_phy - return current on/off mode + * @hw: pointer to the hw struct + * @led_addr: address of led register to use + * @val: original value of register to use + * + **/ +i40e_status i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr, + u16 *val) +{ + i40e_status status = 0; + u16 gpio_led_port; + u8 phy_addr = 0; + u16 reg_val; + u16 temp_addr; + u8 port_num; + u32 i; + + temp_addr = I40E_PHY_LED_PROV_REG_1; + i = rd32(hw, I40E_PFGEN_PORTNUM); + port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK); + phy_addr = i40e_get_phy_address(hw, port_num); + + for (gpio_led_port = 0; gpio_led_port < 3; gpio_led_port++, + temp_addr++) { + status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE, + temp_addr, phy_addr, ®_val); + if (status) + return status; + *val = reg_val; + if (reg_val & I40E_PHY_LED_LINK_MODE_MASK) { + *led_addr = temp_addr; + break; + } + } + return status; +} + +/** + * i40e_led_set_phy + * @hw: pointer to the HW structure + * @on: true or false + * @mode: original val plus bit for set or ignore + * Set led's on or off when controlled by the PHY + * + **/ +i40e_status i40e_led_set_phy(struct i40e_hw *hw, bool on, + u16 led_addr, u32 mode) +{ + i40e_status status = 0; + u16 led_ctl = 0; + u16 led_reg = 0; + u8 phy_addr = 0; + u8 port_num; + u32 i; + + i = rd32(hw, I40E_PFGEN_PORTNUM); + port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK); + phy_addr = i40e_get_phy_address(hw, port_num); + + status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE, led_addr, + phy_addr, &led_reg); + if (status) + return status; + led_ctl = led_reg; + if (led_reg & I40E_PHY_LED_LINK_MODE_MASK) { + led_reg = 0; + status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, led_reg); + if (status) + return status; + } + status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, &led_reg); + if (status) + goto restore_config; + if (on) + led_reg = I40E_PHY_LED_MANUAL_ON; + else + led_reg = 0; + status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, led_reg); + if (status) + goto restore_config; + if (mode & I40E_PHY_LED_MODE_ORIG) { + led_ctl = (mode & I40E_PHY_LED_MODE_MASK); + status = i40e_write_phy_register(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, led_ctl); + } + return status; +restore_config: + status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE, led_addr, + phy_addr, led_ctl); + return status; +} + +/** + * i40e_aq_rx_ctl_read_register - use FW to read from an Rx control register + * @hw: pointer to the hw struct + * @reg_addr: register address + * @reg_val: ptr to register value + * @cmd_details: pointer to command details structure or NULL + * + * Use the firmware to read the Rx control register, + * especially useful if the Rx unit is under heavy pressure + **/ +i40e_status i40e_aq_rx_ctl_read_register(struct i40e_hw *hw, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_rx_ctl_reg_read_write *cmd_resp = + (struct i40e_aqc_rx_ctl_reg_read_write *)&desc.params.raw; + i40e_status status; + + if (!reg_val) + return I40E_ERR_PARAM; + + i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_rx_ctl_reg_read); + + cmd_resp->address = cpu_to_le32(reg_addr); + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + if (status == 0) + *reg_val = le32_to_cpu(cmd_resp->value); + + return status; +} + +/** + * i40e_read_rx_ctl - read from an Rx control register + * @hw: pointer to the hw struct + * @reg_addr: register address + **/ +u32 i40e_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr) +{ + i40e_status status = 0; + bool use_register; + int retry = 5; + u32 val = 0; + + use_register = (hw->aq.api_maj_ver == 1) && (hw->aq.api_min_ver < 5); + if (!use_register) { +do_retry: + status = i40e_aq_rx_ctl_read_register(hw, reg_addr, &val, NULL); + if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN && retry) { + usleep_range(1000, 2000); + retry--; + goto do_retry; + } + } + + /* if the AQ access failed, try the old-fashioned way */ + if (status || use_register) + val = rd32(hw, reg_addr); + + return val; +} + +/** + * i40e_aq_rx_ctl_write_register + * @hw: pointer to the hw struct + * @reg_addr: register address + * @reg_val: register value + * @cmd_details: pointer to command details structure or NULL + * + * Use the firmware to write to an Rx control register, + * especially useful if the Rx unit is under heavy pressure + **/ +i40e_status i40e_aq_rx_ctl_write_register(struct i40e_hw *hw, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_rx_ctl_reg_read_write *cmd = + (struct i40e_aqc_rx_ctl_reg_read_write *)&desc.params.raw; + i40e_status status; + + i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_rx_ctl_reg_write); + + cmd->address = cpu_to_le32(reg_addr); + cmd->value = cpu_to_le32(reg_val); + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + +/** + * i40e_write_rx_ctl - write to an Rx control register + * @hw: pointer to the hw struct + * @reg_addr: register address + * @reg_val: register value + **/ +void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val) +{ + i40e_status status = 0; + bool use_register; + int retry = 5; + + use_register = (hw->aq.api_maj_ver == 1) && (hw->aq.api_min_ver < 5); + if (!use_register) { +do_retry: + status = i40e_aq_rx_ctl_write_register(hw, reg_addr, + reg_val, NULL); + if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN && retry) { + usleep_range(1000, 2000); + retry--; + goto do_retry; + } + } + + /* if the AQ access failed, try the old-fashioned way */ + if (status || use_register) + wr32(hw, reg_addr, reg_val); +} diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c index 2691277c0055..0fab3a9b51d9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c @@ -380,17 +380,20 @@ static void i40e_parse_cee_app_tlv(struct i40e_cee_feat_tlv *tlv, { u16 length, typelength, offset = 0; struct i40e_cee_app_prio *app; - u8 i, up, selector; + u8 i; typelength = ntohs(tlv->hdr.typelen); length = (u16)((typelength & I40E_LLDP_TLV_LEN_MASK) >> I40E_LLDP_TLV_LEN_SHIFT); dcbcfg->numapps = length / sizeof(*app); + if (!dcbcfg->numapps) return; for (i = 0; i < dcbcfg->numapps; i++) { + u8 up, selector; + app = (struct i40e_cee_app_prio *)(tlv->tlvinfo + offset); for (up = 0; up < I40E_MAX_USER_PRIORITY; up++) { if (app->prio_map & BIT(up)) @@ -400,13 +403,17 @@ static void i40e_parse_cee_app_tlv(struct i40e_cee_feat_tlv *tlv, /* Get Selector from lower 2 bits, and convert to IEEE */ selector = (app->upper_oui_sel & I40E_CEE_APP_SELECTOR_MASK); - if (selector == I40E_CEE_APP_SEL_ETHTYPE) + switch (selector) { + case I40E_CEE_APP_SEL_ETHTYPE: dcbcfg->app[i].selector = I40E_APP_SEL_ETHTYPE; - else if (selector == I40E_CEE_APP_SEL_TCPIP) + break; + case I40E_CEE_APP_SEL_TCPIP: dcbcfg->app[i].selector = I40E_APP_SEL_TCPIP; - else + break; + default: /* Keep selector as it is for unknown types */ dcbcfg->app[i].selector = selector; + } dcbcfg->app[i].protocolid = ntohs(app->protocol); /* Move to next app */ @@ -814,13 +821,15 @@ i40e_status i40e_get_dcb_config(struct i40e_hw *hw) struct i40e_aqc_get_cee_dcb_cfg_resp cee_cfg; struct i40e_aqc_get_cee_dcb_cfg_v1_resp cee_v1_cfg; - /* If Firmware version < v4.33 IEEE only */ - if (((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver < 33)) || - (hw->aq.fw_maj_ver < 4)) + /* If Firmware version < v4.33 on X710/XL710, IEEE only */ + if ((hw->mac.type == I40E_MAC_XL710) && + (((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver < 33)) || + (hw->aq.fw_maj_ver < 4))) return i40e_get_ieee_dcb_config(hw); - /* If Firmware version == v4.33 use old CEE struct */ - if ((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver == 33)) { + /* If Firmware version == v4.33 on X710/XL710, use old CEE struct */ + if ((hw->mac.type == I40E_MAC_XL710) && + ((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver == 33))) { ret = i40e_aq_get_cee_dcb_config(hw, &cee_v1_cfg, sizeof(cee_v1_cfg), NULL); if (!ret) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 10744a698d6f..0c97733d253c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -61,257 +61,13 @@ static struct i40e_veb *i40e_dbg_find_veb(struct i40e_pf *pf, int seid) { int i; - if ((seid < I40E_BASE_VEB_SEID) || - (seid > (I40E_BASE_VEB_SEID + I40E_MAX_VEB))) - dev_info(&pf->pdev->dev, "%d: bad seid\n", seid); - else - for (i = 0; i < I40E_MAX_VEB; i++) - if (pf->veb[i] && pf->veb[i]->seid == seid) - return pf->veb[i]; + for (i = 0; i < I40E_MAX_VEB; i++) + if (pf->veb[i] && pf->veb[i]->seid == seid) + return pf->veb[i]; return NULL; } /************************************************************** - * dump - * The dump entry in debugfs is for getting a data snapshow of - * the driver's current configuration and runtime details. - * When the filesystem entry is written, a snapshot is taken. - * When the entry is read, the most recent snapshot data is dumped. - **************************************************************/ -static char *i40e_dbg_dump_buf; -static ssize_t i40e_dbg_dump_data_len; -static ssize_t i40e_dbg_dump_buffer_len; - -/** - * i40e_dbg_dump_read - read the dump data - * @filp: the opened file - * @buffer: where to write the data for the user to read - * @count: the size of the user's buffer - * @ppos: file position offset - **/ -static ssize_t i40e_dbg_dump_read(struct file *filp, char __user *buffer, - size_t count, loff_t *ppos) -{ - int bytes_not_copied; - int len; - - /* is *ppos bigger than the available data? */ - if (*ppos >= i40e_dbg_dump_data_len || !i40e_dbg_dump_buf) - return 0; - - /* be sure to not read beyond the end of available data */ - len = min_t(int, count, (i40e_dbg_dump_data_len - *ppos)); - - bytes_not_copied = copy_to_user(buffer, &i40e_dbg_dump_buf[*ppos], len); - if (bytes_not_copied) - return -EFAULT; - - *ppos += len; - return len; -} - -/** - * i40e_dbg_prep_dump_buf - * @pf: the PF we're working with - * @buflen: the desired buffer length - * - * Return positive if success, 0 if failed - **/ -static int i40e_dbg_prep_dump_buf(struct i40e_pf *pf, int buflen) -{ - /* if not already big enough, prep for re alloc */ - if (i40e_dbg_dump_buffer_len && i40e_dbg_dump_buffer_len < buflen) { - kfree(i40e_dbg_dump_buf); - i40e_dbg_dump_buffer_len = 0; - i40e_dbg_dump_buf = NULL; - } - - /* get a new buffer if needed */ - if (!i40e_dbg_dump_buf) { - i40e_dbg_dump_buf = kzalloc(buflen, GFP_KERNEL); - if (i40e_dbg_dump_buf != NULL) - i40e_dbg_dump_buffer_len = buflen; - } - - return i40e_dbg_dump_buffer_len; -} - -/** - * i40e_dbg_dump_write - trigger a datadump snapshot - * @filp: the opened file - * @buffer: where to find the user's data - * @count: the length of the user's data - * @ppos: file position offset - * - * Any write clears the stats - **/ -static ssize_t i40e_dbg_dump_write(struct file *filp, - const char __user *buffer, - size_t count, loff_t *ppos) -{ - struct i40e_pf *pf = filp->private_data; - bool seid_found = false; - long seid = -1; - int buflen = 0; - int i, ret; - int len; - u8 *p; - - /* don't allow partial writes */ - if (*ppos != 0) - return 0; - - /* decode the SEID given to be dumped */ - ret = kstrtol_from_user(buffer, count, 0, &seid); - - if (ret) { - dev_info(&pf->pdev->dev, "bad seid value\n"); - } else if (seid == 0) { - seid_found = true; - - kfree(i40e_dbg_dump_buf); - i40e_dbg_dump_buffer_len = 0; - i40e_dbg_dump_data_len = 0; - i40e_dbg_dump_buf = NULL; - dev_info(&pf->pdev->dev, "debug buffer freed\n"); - - } else if (seid == pf->pf_seid || seid == 1) { - seid_found = true; - - buflen = sizeof(struct i40e_pf); - buflen += (sizeof(struct i40e_aq_desc) - * (pf->hw.aq.num_arq_entries + pf->hw.aq.num_asq_entries)); - - if (i40e_dbg_prep_dump_buf(pf, buflen)) { - p = i40e_dbg_dump_buf; - - len = sizeof(struct i40e_pf); - memcpy(p, pf, len); - p += len; - - len = (sizeof(struct i40e_aq_desc) - * pf->hw.aq.num_asq_entries); - memcpy(p, pf->hw.aq.asq.desc_buf.va, len); - p += len; - - len = (sizeof(struct i40e_aq_desc) - * pf->hw.aq.num_arq_entries); - memcpy(p, pf->hw.aq.arq.desc_buf.va, len); - p += len; - - i40e_dbg_dump_data_len = buflen; - dev_info(&pf->pdev->dev, - "PF seid %ld dumped %d bytes\n", - seid, (int)i40e_dbg_dump_data_len); - } - } else if (seid >= I40E_BASE_VSI_SEID) { - struct i40e_vsi *vsi = NULL; - struct i40e_mac_filter *f; - int filter_count = 0; - - mutex_lock(&pf->switch_mutex); - vsi = i40e_dbg_find_vsi(pf, seid); - if (!vsi) { - mutex_unlock(&pf->switch_mutex); - goto write_exit; - } - - buflen = sizeof(struct i40e_vsi); - buflen += sizeof(struct i40e_q_vector) * vsi->num_q_vectors; - buflen += sizeof(struct i40e_ring) * 2 * vsi->num_queue_pairs; - buflen += sizeof(struct i40e_tx_buffer) * vsi->num_queue_pairs; - buflen += sizeof(struct i40e_rx_buffer) * vsi->num_queue_pairs; - list_for_each_entry(f, &vsi->mac_filter_list, list) - filter_count++; - buflen += sizeof(struct i40e_mac_filter) * filter_count; - - if (i40e_dbg_prep_dump_buf(pf, buflen)) { - p = i40e_dbg_dump_buf; - seid_found = true; - - len = sizeof(struct i40e_vsi); - memcpy(p, vsi, len); - p += len; - - if (vsi->num_q_vectors) { - len = (sizeof(struct i40e_q_vector) - * vsi->num_q_vectors); - memcpy(p, vsi->q_vectors, len); - p += len; - } - - if (vsi->num_queue_pairs) { - len = (sizeof(struct i40e_ring) * - vsi->num_queue_pairs); - memcpy(p, vsi->tx_rings, len); - p += len; - memcpy(p, vsi->rx_rings, len); - p += len; - } - - if (vsi->tx_rings[0]) { - len = sizeof(struct i40e_tx_buffer); - for (i = 0; i < vsi->num_queue_pairs; i++) { - memcpy(p, vsi->tx_rings[i]->tx_bi, len); - p += len; - } - len = sizeof(struct i40e_rx_buffer); - for (i = 0; i < vsi->num_queue_pairs; i++) { - memcpy(p, vsi->rx_rings[i]->rx_bi, len); - p += len; - } - } - - /* macvlan filter list */ - len = sizeof(struct i40e_mac_filter); - list_for_each_entry(f, &vsi->mac_filter_list, list) { - memcpy(p, f, len); - p += len; - } - - i40e_dbg_dump_data_len = buflen; - dev_info(&pf->pdev->dev, - "VSI seid %ld dumped %d bytes\n", - seid, (int)i40e_dbg_dump_data_len); - } - mutex_unlock(&pf->switch_mutex); - } else if (seid >= I40E_BASE_VEB_SEID) { - struct i40e_veb *veb = NULL; - - mutex_lock(&pf->switch_mutex); - veb = i40e_dbg_find_veb(pf, seid); - if (!veb) { - mutex_unlock(&pf->switch_mutex); - goto write_exit; - } - - buflen = sizeof(struct i40e_veb); - if (i40e_dbg_prep_dump_buf(pf, buflen)) { - seid_found = true; - memcpy(i40e_dbg_dump_buf, veb, buflen); - i40e_dbg_dump_data_len = buflen; - dev_info(&pf->pdev->dev, - "VEB seid %ld dumped %d bytes\n", - seid, (int)i40e_dbg_dump_data_len); - } - mutex_unlock(&pf->switch_mutex); - } - -write_exit: - if (!seid_found) - dev_info(&pf->pdev->dev, "unknown seid %ld\n", seid); - - return count; -} - -static const struct file_operations i40e_dbg_dump_fops = { - .owner = THIS_MODULE, - .open = simple_open, - .read = i40e_dbg_dump_read, - .write = i40e_dbg_dump_write, -}; - -/************************************************************** * command * The command entry in debugfs is for giving the driver commands * to be executed - these may be for changing the internal switch @@ -379,19 +135,27 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) return; } dev_info(&pf->pdev->dev, "vsi seid %d\n", seid); - if (vsi->netdev) - dev_info(&pf->pdev->dev, - " netdev: name = %s\n", - vsi->netdev->name); + if (vsi->netdev) { + struct net_device *nd = vsi->netdev; + + dev_info(&pf->pdev->dev, " netdev: name = %s, state = %lu, flags = 0x%08x\n", + nd->name, nd->state, nd->flags); + dev_info(&pf->pdev->dev, " features = 0x%08lx\n", + (unsigned long int)nd->features); + dev_info(&pf->pdev->dev, " hw_features = 0x%08lx\n", + (unsigned long int)nd->hw_features); + dev_info(&pf->pdev->dev, " vlan_features = 0x%08lx\n", + (unsigned long int)nd->vlan_features); + } if (vsi->active_vlans) dev_info(&pf->pdev->dev, " vlgrp: & = %p\n", vsi->active_vlans); dev_info(&pf->pdev->dev, - " netdev_registered = %i, current_netdev_flags = 0x%04x, state = %li flags = 0x%08lx\n", - vsi->netdev_registered, - vsi->current_netdev_flags, vsi->state, vsi->flags); + " state = %li flags = 0x%08lx, netdev_registered = %i, current_netdev_flags = 0x%04x\n", + vsi->state, vsi->flags, + vsi->netdev_registered, vsi->current_netdev_flags); if (vsi == pf->vsi[pf->lan_vsi]) - dev_info(&pf->pdev->dev, "MAC address: %pM SAN MAC: %pM Port MAC: %pM\n", + dev_info(&pf->pdev->dev, " MAC address: %pM SAN MAC: %pM Port MAC: %pM\n", pf->hw.mac.addr, pf->hw.mac.san_addr, pf->hw.mac.port_addr); @@ -511,7 +275,7 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) rx_ring->dtype); dev_info(&pf->pdev->dev, " rx_rings[%i]: hsplit = %d, next_to_use = %d, next_to_clean = %d, ring_active = %i\n", - i, rx_ring->hsplit, + i, ring_is_ps_enabled(rx_ring), rx_ring->next_to_use, rx_ring->next_to_clean, rx_ring->ring_active); @@ -526,6 +290,11 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) rx_ring->rx_stats.alloc_page_failed, rx_ring->rx_stats.alloc_buff_failed); dev_info(&pf->pdev->dev, + " rx_rings[%i]: rx_stats: realloc_count = %lld, page_reuse_count = %lld\n", + i, + rx_ring->rx_stats.realloc_count, + rx_ring->rx_stats.page_reuse_count); + dev_info(&pf->pdev->dev, " rx_rings[%i]: size = %i, dma = 0x%08lx\n", i, rx_ring->size, (unsigned long int)rx_ring->dma); @@ -533,6 +302,10 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) " rx_rings[%i]: vsi = %p, q_vector = %p\n", i, rx_ring->vsi, rx_ring->q_vector); + dev_info(&pf->pdev->dev, + " rx_rings[%i]: rx_itr_setting = %d (%s)\n", + i, rx_ring->rx_itr_setting, + ITR_IS_DYNAMIC(rx_ring->rx_itr_setting) ? "dynamic" : "fixed"); } for (i = 0; i < vsi->num_queue_pairs; i++) { struct i40e_ring *tx_ring = ACCESS_ONCE(vsi->tx_rings[i]); @@ -557,8 +330,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) " tx_rings[%i]: dtype = %d\n", i, tx_ring->dtype); dev_info(&pf->pdev->dev, - " tx_rings[%i]: hsplit = %d, next_to_use = %d, next_to_clean = %d, ring_active = %i\n", - i, tx_ring->hsplit, + " tx_rings[%i]: next_to_use = %d, next_to_clean = %d, ring_active = %i\n", + i, tx_ring->next_to_use, tx_ring->next_to_clean, tx_ring->ring_active); @@ -583,14 +356,15 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) dev_info(&pf->pdev->dev, " tx_rings[%i]: DCB tc = %d\n", i, tx_ring->dcb_tc); + dev_info(&pf->pdev->dev, + " tx_rings[%i]: tx_itr_setting = %d (%s)\n", + i, tx_ring->tx_itr_setting, + ITR_IS_DYNAMIC(tx_ring->tx_itr_setting) ? "dynamic" : "fixed"); } rcu_read_unlock(); dev_info(&pf->pdev->dev, - " work_limit = %d, rx_itr_setting = %d (%s), tx_itr_setting = %d (%s)\n", - vsi->work_limit, vsi->rx_itr_setting, - ITR_IS_DYNAMIC(vsi->rx_itr_setting) ? "dynamic" : "fixed", - vsi->tx_itr_setting, - ITR_IS_DYNAMIC(vsi->tx_itr_setting) ? "dynamic" : "fixed"); + " work_limit = %d\n", + vsi->work_limit); dev_info(&pf->pdev->dev, " max_frame = %d, rx_hdr_len = %d, rx_buf_len = %d dtype = %d\n", vsi->max_frame, vsi->rx_hdr_len, vsi->rx_buf_len, vsi->dtype); @@ -815,20 +589,20 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n, if (!is_rx_ring) { txd = I40E_TX_DESC(ring, i); dev_info(&pf->pdev->dev, - " d[%03i] = 0x%016llx 0x%016llx\n", + " d[%03x] = 0x%016llx 0x%016llx\n", i, txd->buffer_addr, txd->cmd_type_offset_bsz); } else if (sizeof(union i40e_rx_desc) == sizeof(union i40e_16byte_rx_desc)) { rxd = I40E_RX_DESC(ring, i); dev_info(&pf->pdev->dev, - " d[%03i] = 0x%016llx 0x%016llx\n", + " d[%03x] = 0x%016llx 0x%016llx\n", i, rxd->read.pkt_addr, rxd->read.hdr_addr); } else { rxd = I40E_RX_DESC(ring, i); dev_info(&pf->pdev->dev, - " d[%03i] = 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n", + " d[%03x] = 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n", i, rxd->read.pkt_addr, rxd->read.hdr_addr, rxd->read.rsvd1, rxd->read.rsvd2); @@ -843,20 +617,20 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n, if (!is_rx_ring) { txd = I40E_TX_DESC(ring, desc_n); dev_info(&pf->pdev->dev, - "vsi = %02i tx ring = %02i d[%03i] = 0x%016llx 0x%016llx\n", + "vsi = %02i tx ring = %02i d[%03x] = 0x%016llx 0x%016llx\n", vsi_seid, ring_id, desc_n, txd->buffer_addr, txd->cmd_type_offset_bsz); } else if (sizeof(union i40e_rx_desc) == sizeof(union i40e_16byte_rx_desc)) { rxd = I40E_RX_DESC(ring, desc_n); dev_info(&pf->pdev->dev, - "vsi = %02i rx ring = %02i d[%03i] = 0x%016llx 0x%016llx\n", + "vsi = %02i rx ring = %02i d[%03x] = 0x%016llx 0x%016llx\n", vsi_seid, ring_id, desc_n, rxd->read.pkt_addr, rxd->read.hdr_addr); } else { rxd = I40E_RX_DESC(ring, desc_n); dev_info(&pf->pdev->dev, - "vsi = %02i rx ring = %02i d[%03i] = 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n", + "vsi = %02i rx ring = %02i d[%03x] = 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n", vsi_seid, ring_id, desc_n, rxd->read.pkt_addr, rxd->read.hdr_addr, rxd->read.rsvd1, rxd->read.rsvd2); @@ -918,12 +692,6 @@ static void i40e_dbg_dump_veb_seid(struct i40e_pf *pf, int seid) { struct i40e_veb *veb; - if ((seid < I40E_BASE_VEB_SEID) || - (seid >= (I40E_MAX_VEB + I40E_BASE_VEB_SEID))) { - dev_info(&pf->pdev->dev, "%d: bad seid\n", seid); - return; - } - veb = i40e_dbg_find_veb(pf, seid); if (!veb) { dev_info(&pf->pdev->dev, "can't find veb %d\n", seid); @@ -2202,11 +1970,6 @@ void i40e_dbg_pf_init(struct i40e_pf *pf) if (!pfile) goto create_failed; - pfile = debugfs_create_file("dump", 0600, pf->i40e_dbg_pf, pf, - &i40e_dbg_dump_fops); - if (!pfile) - goto create_failed; - pfile = debugfs_create_file("netdev_ops", 0600, pf->i40e_dbg_pf, pf, &i40e_dbg_netdev_ops_fops); if (!pfile) @@ -2227,9 +1990,6 @@ void i40e_dbg_pf_exit(struct i40e_pf *pf) { debugfs_remove_recursive(pf->i40e_dbg_pf); pf->i40e_dbg_pf = NULL; - - kfree(i40e_dbg_dump_buf); - i40e_dbg_dump_buf = NULL; } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_devids.h b/drivers/net/ethernet/intel/i40e/i40e_devids.h index 448ef4c17efb..99257fcd1ef4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_devids.h +++ b/drivers/net/ethernet/intel/i40e/i40e_devids.h @@ -39,13 +39,11 @@ #define I40E_DEV_ID_20G_KR2 0x1587 #define I40E_DEV_ID_20G_KR2_A 0x1588 #define I40E_DEV_ID_10G_BASE_T4 0x1589 -#define I40E_DEV_ID_VF 0x154C -#define I40E_DEV_ID_VF_HV 0x1571 +#define I40E_DEV_ID_KX_X722 0x37CE +#define I40E_DEV_ID_QSFP_X722 0x37CF #define I40E_DEV_ID_SFP_X722 0x37D0 #define I40E_DEV_ID_1G_BASE_T_X722 0x37D1 #define I40E_DEV_ID_10G_BASE_T_X722 0x37D2 -#define I40E_DEV_ID_X722_VF 0x37CD -#define I40E_DEV_ID_X722_VF_HV 0x37D9 #define i40e_is_40G_device(d) ((d) == I40E_DEV_ID_QSFP_A || \ (d) == I40E_DEV_ID_QSFP_B || \ diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 29d5833e24a3..784b1659457a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2015 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -89,6 +89,9 @@ static const struct i40e_stats i40e_gstrings_misc_stats[] = { I40E_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol), I40E_VSI_STAT("tx_linearize", tx_linearize), I40E_VSI_STAT("tx_force_wb", tx_force_wb), + I40E_VSI_STAT("tx_lost_interrupt", tx_lost_interrupt), + I40E_VSI_STAT("rx_alloc_fail", rx_buf_failed), + I40E_VSI_STAT("rx_pg_alloc_fail", rx_page_failed), }; /* These PF_STATs might look like duplicates of some NETDEV_STATs, @@ -143,6 +146,7 @@ static struct i40e_stats i40e_gstrings_stats[] = { I40E_PF_STAT("rx_oversize", stats.rx_oversize), I40E_PF_STAT("rx_jabber", stats.rx_jabber), I40E_PF_STAT("VF_admin_queue_requests", vf_aq_requests), + I40E_PF_STAT("arq_overflows", arq_overflows), I40E_PF_STAT("rx_hwtstamp_cleared", rx_hwtstamp_cleared), I40E_PF_STAT("fdir_flush_cnt", fd_flush_cnt), I40E_PF_STAT("fdir_atr_match", stats.fd_atr_match), @@ -232,6 +236,7 @@ static const char i40e_priv_flags_strings[][ETH_GSTRING_LEN] = { "flow-director-atr", "veb-stats", "packet-split", + "hw-atr-eviction", }; #define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_priv_flags_strings) @@ -340,7 +345,7 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, SUPPORTED_1000baseT_Full; if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB) ecmd->advertising |= ADVERTISED_1000baseT_Full; - if (pf->hw.mac.type == I40E_MAC_X722) { + if (pf->flags & I40E_FLAG_100M_SGMII_CAPABLE) { ecmd->supported |= SUPPORTED_100baseT_Full; if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB) @@ -411,6 +416,10 @@ static void i40e_get_settings_link_down(struct i40e_hw *hw, if (pf->hw.mac.type == I40E_MAC_X722) { ecmd->supported |= SUPPORTED_100baseT_Full; ecmd->advertising |= ADVERTISED_100baseT_Full; + if (pf->flags & I40E_FLAG_100M_SGMII_CAPABLE) { + ecmd->supported |= SUPPORTED_100baseT_Full; + ecmd->advertising |= ADVERTISED_100baseT_Full; + } } } if (phy_types & I40E_CAP_PHY_TYPE_XAUI || @@ -996,16 +1005,19 @@ static int i40e_get_eeprom(struct net_device *netdev, /* check for NVMUpdate access method */ magic = hw->vendor_id | (hw->device_id << 16); if (eeprom->magic && eeprom->magic != magic) { - struct i40e_nvm_access *cmd; - int errno; + struct i40e_nvm_access *cmd = (struct i40e_nvm_access *)eeprom; + int errno = 0; /* make sure it is the right magic for NVMUpdate */ if ((eeprom->magic >> 16) != hw->device_id) - return -EINVAL; + errno = -EINVAL; + else if (test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state) || + test_bit(__I40E_RESET_INTR_RECEIVED, &pf->state)) + errno = -EBUSY; + else + ret_val = i40e_nvmupd_command(hw, cmd, bytes, &errno); - cmd = (struct i40e_nvm_access *)eeprom; - ret_val = i40e_nvmupd_command(hw, cmd, bytes, &errno); - if (ret_val && (hw->debug_mask & I40E_DEBUG_NVM)) + if ((errno || ret_val) && (hw->debug_mask & I40E_DEBUG_NVM)) dev_info(&pf->pdev->dev, "NVMUpdate read failed err=%d status=0x%x errno=%d module=%d offset=0x%x size=%d\n", ret_val, hw->aq.asq_last_status, errno, @@ -1089,27 +1101,25 @@ static int i40e_set_eeprom(struct net_device *netdev, struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_hw *hw = &np->vsi->back->hw; struct i40e_pf *pf = np->vsi->back; - struct i40e_nvm_access *cmd; + struct i40e_nvm_access *cmd = (struct i40e_nvm_access *)eeprom; int ret_val = 0; - int errno; + int errno = 0; u32 magic; /* normal ethtool set_eeprom is not supported */ magic = hw->vendor_id | (hw->device_id << 16); if (eeprom->magic == magic) - return -EOPNOTSUPP; - + errno = -EOPNOTSUPP; /* check for NVMUpdate access method */ - if (!eeprom->magic || (eeprom->magic >> 16) != hw->device_id) - return -EINVAL; - - if (test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state) || - test_bit(__I40E_RESET_INTR_RECEIVED, &pf->state)) - return -EBUSY; + else if (!eeprom->magic || (eeprom->magic >> 16) != hw->device_id) + errno = -EINVAL; + else if (test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state) || + test_bit(__I40E_RESET_INTR_RECEIVED, &pf->state)) + errno = -EBUSY; + else + ret_val = i40e_nvmupd_command(hw, cmd, bytes, &errno); - cmd = (struct i40e_nvm_access *)eeprom; - ret_val = i40e_nvmupd_command(hw, cmd, bytes, &errno); - if (ret_val && (hw->debug_mask & I40E_DEBUG_NVM)) + if ((errno || ret_val) && (hw->debug_mask & I40E_DEBUG_NVM)) dev_info(&pf->pdev->dev, "NVMUpdate write failed err=%d status=0x%x errno=%d module=%d offset=0x%x size=%d\n", ret_val, hw->aq.asq_last_status, errno, @@ -1816,28 +1826,52 @@ static int i40e_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state) { struct i40e_netdev_priv *np = netdev_priv(netdev); + i40e_status ret = 0; struct i40e_pf *pf = np->vsi->back; struct i40e_hw *hw = &pf->hw; int blink_freq = 2; + u16 temp_status; switch (state) { case ETHTOOL_ID_ACTIVE: - pf->led_status = i40e_led_get(hw); + if (!(pf->flags & I40E_FLAG_HAVE_10GBASET_PHY)) { + pf->led_status = i40e_led_get(hw); + } else { + i40e_aq_set_phy_debug(hw, I40E_PHY_DEBUG_PORT, NULL); + ret = i40e_led_get_phy(hw, &temp_status, + &pf->phy_led_val); + pf->led_status = temp_status; + } return blink_freq; case ETHTOOL_ID_ON: - i40e_led_set(hw, 0xF, false); + if (!(pf->flags & I40E_FLAG_HAVE_10GBASET_PHY)) + i40e_led_set(hw, 0xf, false); + else + ret = i40e_led_set_phy(hw, true, pf->led_status, 0); break; case ETHTOOL_ID_OFF: - i40e_led_set(hw, 0x0, false); + if (!(pf->flags & I40E_FLAG_HAVE_10GBASET_PHY)) + i40e_led_set(hw, 0x0, false); + else + ret = i40e_led_set_phy(hw, false, pf->led_status, 0); break; case ETHTOOL_ID_INACTIVE: - i40e_led_set(hw, pf->led_status, false); + if (!(pf->flags & I40E_FLAG_HAVE_10GBASET_PHY)) { + i40e_led_set(hw, false, pf->led_status); + } else { + ret = i40e_led_set_phy(hw, false, pf->led_status, + (pf->phy_led_val | + I40E_PHY_LED_MODE_ORIG)); + i40e_aq_set_phy_debug(hw, 0, NULL); + } break; default: break; } - - return 0; + if (ret) + return -ENOENT; + else + return 0; } /* NOTE: i40e hardware uses a conversion factor of 2 for Interrupt @@ -1845,8 +1879,9 @@ static int i40e_set_phys_id(struct net_device *netdev, * 125us (8000 interrupts per second) == ITR(62) */ -static int i40e_get_coalesce(struct net_device *netdev, - struct ethtool_coalesce *ec) +static int __i40e_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + int queue) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; @@ -1854,14 +1889,24 @@ static int i40e_get_coalesce(struct net_device *netdev, ec->tx_max_coalesced_frames_irq = vsi->work_limit; ec->rx_max_coalesced_frames_irq = vsi->work_limit; - if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) + /* rx and tx usecs has per queue value. If user doesn't specify the queue, + * return queue 0's value to represent. + */ + if (queue < 0) { + queue = 0; + } else if (queue >= vsi->num_queue_pairs) { + return -EINVAL; + } + + if (ITR_IS_DYNAMIC(vsi->rx_rings[queue]->rx_itr_setting)) ec->use_adaptive_rx_coalesce = 1; - if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) + if (ITR_IS_DYNAMIC(vsi->tx_rings[queue]->tx_itr_setting)) ec->use_adaptive_tx_coalesce = 1; - ec->rx_coalesce_usecs = vsi->rx_itr_setting & ~I40E_ITR_DYNAMIC; - ec->tx_coalesce_usecs = vsi->tx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->rx_coalesce_usecs = vsi->rx_rings[queue]->rx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->tx_coalesce_usecs = vsi->tx_rings[queue]->tx_itr_setting & ~I40E_ITR_DYNAMIC; + /* we use the _usecs_high to store/set the interrupt rate limit * that the hardware supports, that almost but not quite * fits the original intent of the ethtool variable, @@ -1874,15 +1919,63 @@ static int i40e_get_coalesce(struct net_device *netdev, return 0; } -static int i40e_set_coalesce(struct net_device *netdev, +static int i40e_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) { - struct i40e_netdev_priv *np = netdev_priv(netdev); + return __i40e_get_coalesce(netdev, ec, -1); +} + +static int i40e_get_per_queue_coalesce(struct net_device *netdev, u32 queue, + struct ethtool_coalesce *ec) +{ + return __i40e_get_coalesce(netdev, ec, queue); +} + +static void i40e_set_itr_per_queue(struct i40e_vsi *vsi, + struct ethtool_coalesce *ec, + int queue) +{ + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; struct i40e_q_vector *q_vector; + u16 vector, intrl; + + intrl = INTRL_USEC_TO_REG(vsi->int_rate_limit); + + vsi->rx_rings[queue]->rx_itr_setting = ec->rx_coalesce_usecs; + vsi->tx_rings[queue]->tx_itr_setting = ec->tx_coalesce_usecs; + + if (ec->use_adaptive_rx_coalesce) + vsi->rx_rings[queue]->rx_itr_setting |= I40E_ITR_DYNAMIC; + else + vsi->rx_rings[queue]->rx_itr_setting &= ~I40E_ITR_DYNAMIC; + + if (ec->use_adaptive_tx_coalesce) + vsi->tx_rings[queue]->tx_itr_setting |= I40E_ITR_DYNAMIC; + else + vsi->tx_rings[queue]->tx_itr_setting &= ~I40E_ITR_DYNAMIC; + + q_vector = vsi->rx_rings[queue]->q_vector; + q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[queue]->rx_itr_setting); + vector = vsi->base_vector + q_vector->v_idx; + wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), q_vector->rx.itr); + + q_vector = vsi->tx_rings[queue]->q_vector; + q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[queue]->tx_itr_setting); + vector = vsi->base_vector + q_vector->v_idx; + wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), q_vector->tx.itr); + + wr32(hw, I40E_PFINT_RATEN(vector - 1), intrl); + i40e_flush(hw); +} + +static int __i40e_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + int queue) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; - struct i40e_hw *hw = &pf->hw; - u16 vector; int i; if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq) @@ -1899,57 +1992,53 @@ static int i40e_set_coalesce(struct net_device *netdev, return -EINVAL; } - vector = vsi->base_vector; - if ((ec->rx_coalesce_usecs >= (I40E_MIN_ITR << 1)) && - (ec->rx_coalesce_usecs <= (I40E_MAX_ITR << 1))) { - vsi->rx_itr_setting = ec->rx_coalesce_usecs; - } else if (ec->rx_coalesce_usecs == 0) { - vsi->rx_itr_setting = ec->rx_coalesce_usecs; + if (ec->rx_coalesce_usecs == 0) { if (ec->use_adaptive_rx_coalesce) netif_info(pf, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n"); - } else { - netif_info(pf, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); - return -EINVAL; + } else if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) || + (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1))) { + netif_info(pf, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); + return -EINVAL; } vsi->int_rate_limit = ec->rx_coalesce_usecs_high; - if ((ec->tx_coalesce_usecs >= (I40E_MIN_ITR << 1)) && - (ec->tx_coalesce_usecs <= (I40E_MAX_ITR << 1))) { - vsi->tx_itr_setting = ec->tx_coalesce_usecs; - } else if (ec->tx_coalesce_usecs == 0) { - vsi->tx_itr_setting = ec->tx_coalesce_usecs; + if (ec->tx_coalesce_usecs == 0) { if (ec->use_adaptive_tx_coalesce) netif_info(pf, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n"); + } else if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) || + (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1))) { + netif_info(pf, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n"); + return -EINVAL; + } + + /* rx and tx usecs has per queue value. If user doesn't specify the queue, + * apply to all queues. + */ + if (queue < 0) { + for (i = 0; i < vsi->num_queue_pairs; i++) + i40e_set_itr_per_queue(vsi, ec, i); + } else if (queue < vsi->num_queue_pairs) { + i40e_set_itr_per_queue(vsi, ec, queue); } else { - netif_info(pf, drv, netdev, - "Invalid value, tx-usecs range is 0-8160\n"); + netif_info(pf, drv, netdev, "Invalid queue value, queue range is 0 - %d\n", + vsi->num_queue_pairs - 1); return -EINVAL; } - if (ec->use_adaptive_rx_coalesce) - vsi->rx_itr_setting |= I40E_ITR_DYNAMIC; - else - vsi->rx_itr_setting &= ~I40E_ITR_DYNAMIC; - - if (ec->use_adaptive_tx_coalesce) - vsi->tx_itr_setting |= I40E_ITR_DYNAMIC; - else - vsi->tx_itr_setting &= ~I40E_ITR_DYNAMIC; - - for (i = 0; i < vsi->num_q_vectors; i++, vector++) { - u16 intrl = INTRL_USEC_TO_REG(vsi->int_rate_limit); + return 0; +} - q_vector = vsi->q_vectors[i]; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); - wr32(hw, I40E_PFINT_ITRN(0, vector - 1), q_vector->rx.itr); - q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting); - wr32(hw, I40E_PFINT_ITRN(1, vector - 1), q_vector->tx.itr); - wr32(hw, I40E_PFINT_RATEN(vector - 1), intrl); - i40e_flush(hw); - } +static int i40e_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + return __i40e_set_coalesce(netdev, ec, -1); +} - return 0; +static int i40e_set_per_queue_coalesce(struct net_device *netdev, u32 queue, + struct ethtool_coalesce *ec) +{ + return __i40e_set_coalesce(netdev, ec, queue); } /** @@ -2147,8 +2236,8 @@ static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) { struct i40e_hw *hw = &pf->hw; - u64 hena = (u64)rd32(hw, I40E_PFQF_HENA(0)) | - ((u64)rd32(hw, I40E_PFQF_HENA(1)) << 32); + u64 hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) | + ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32); /* RSS does not support anything other than hashing * to queues on src and dst IPs and ports @@ -2166,9 +2255,12 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) case TCP_V4_FLOW: switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { case 0: - hena &= ~BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP); - break; + return -EINVAL; case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK); + hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP); break; default: @@ -2178,9 +2270,12 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) case TCP_V6_FLOW: switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { case 0: - hena &= ~BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP); - break; + return -EINVAL; case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK); + hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP); break; default: @@ -2190,10 +2285,13 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) case UDP_V4_FLOW: switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { case 0: - hena &= ~(BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | - BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4)); - break; + return -EINVAL; case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | + BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP); + hena |= (BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4)); break; @@ -2204,10 +2302,13 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) case UDP_V6_FLOW: switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { case 0: - hena &= ~(BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | - BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6)); - break; + return -EINVAL; case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | + BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP); + hena |= (BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6)); break; @@ -2245,8 +2346,8 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) return -EINVAL; } - wr32(hw, I40E_PFQF_HENA(0), (u32)hena); - wr32(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32)); + i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena); + i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32)); i40e_flush(hw); /* Save setting for future output/update */ @@ -2712,6 +2813,8 @@ static u32 i40e_get_priv_flags(struct net_device *dev) I40E_PRIV_FLAGS_VEB_STATS : 0; ret_flags |= pf->flags & I40E_FLAG_RX_PS_ENABLED ? I40E_PRIV_FLAGS_PS : 0; + ret_flags |= pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE ? + 0 : I40E_PRIV_FLAGS_HW_ATR_EVICT; return ret_flags; } @@ -2763,10 +2866,21 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED; } - if (flags & I40E_PRIV_FLAGS_VEB_STATS) + if ((flags & I40E_PRIV_FLAGS_VEB_STATS) && + !(pf->flags & I40E_FLAG_VEB_STATS_ENABLED)) { pf->flags |= I40E_FLAG_VEB_STATS_ENABLED; - else + reset_required = true; + } else if (!(flags & I40E_PRIV_FLAGS_VEB_STATS) && + (pf->flags & I40E_FLAG_VEB_STATS_ENABLED)) { pf->flags &= ~I40E_FLAG_VEB_STATS_ENABLED; + reset_required = true; + } + + if ((flags & I40E_PRIV_FLAGS_HW_ATR_EVICT) && + (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)) + pf->auto_disable_flags &= ~I40E_FLAG_HW_ATR_EVICT_CAPABLE; + else + pf->auto_disable_flags |= I40E_FLAG_HW_ATR_EVICT_CAPABLE; /* if needed, issue reset to cause things to take effect */ if (reset_required) @@ -2812,6 +2926,8 @@ static const struct ethtool_ops i40e_ethtool_ops = { .get_ts_info = i40e_get_ts_info, .get_priv_flags = i40e_get_priv_flags, .set_priv_flags = i40e_set_priv_flags, + .get_per_queue_coalesce = i40e_get_per_queue_coalesce, + .set_per_queue_coalesce = i40e_set_per_queue_coalesce, }; void i40e_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c index 579a46ca82df..8ad162c16f61 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c +++ b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c @@ -295,11 +295,11 @@ void i40e_init_pf_fcoe(struct i40e_pf *pf) } /* enable FCoE hash filter */ - val = rd32(hw, I40E_PFQF_HENA(1)); + val = i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)); val |= BIT(I40E_FILTER_PCTYPE_FCOE_OX - 32); val |= BIT(I40E_FILTER_PCTYPE_FCOE_RX - 32); val &= I40E_PFQF_HENA_PTYPE_ENA_MASK; - wr32(hw, I40E_PFQF_HENA(1), val); + i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), val); /* enable flag */ pf->flags |= I40E_FLAG_FCOE_ENABLED; @@ -317,11 +317,11 @@ void i40e_init_pf_fcoe(struct i40e_pf *pf) pf->filter_settings.fcoe_cntx_num = I40E_DMA_CNTX_SIZE_4K; /* Setup max frame with FCoE_MTU plus L2 overheads */ - val = rd32(hw, I40E_GLFCOE_RCTL); + val = i40e_read_rx_ctl(hw, I40E_GLFCOE_RCTL); val &= ~I40E_GLFCOE_RCTL_MAX_SIZE_MASK; val |= ((FCOE_MTU + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) << I40E_GLFCOE_RCTL_MAX_SIZE_SHIFT); - wr32(hw, I40E_GLFCOE_RCTL, val); + i40e_write_rx_ctl(hw, I40E_GLFCOE_RCTL, val); dev_info(&pf->pdev->dev, "FCoE is supported.\n"); } @@ -1359,16 +1359,32 @@ static netdev_tx_t i40e_fcoe_xmit_frame(struct sk_buff *skb, struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping]; struct i40e_tx_buffer *first; u32 tx_flags = 0; + int fso, count; u8 hdr_len = 0; u8 sof = 0; u8 eof = 0; - int fso; if (i40e_fcoe_set_skb_header(skb)) goto out_drop; - if (!i40e_xmit_descriptor_count(skb, tx_ring)) + count = i40e_xmit_descriptor_count(skb); + if (i40e_chk_linearize(skb, count)) { + if (__skb_linearize(skb)) + goto out_drop; + count = TXD_USE_COUNT(skb->len); + tx_ring->tx_stats.tx_linearize++; + } + + /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, + * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, + * + 4 desc gap to avoid the cache line where head is, + * + 1 desc for context descriptor, + * otherwise try next time + */ + if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) { + tx_ring->tx_stats.tx_busy++; return NETDEV_TX_BUSY; + } /* prepare the xmit flags */ if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags)) @@ -1457,7 +1473,7 @@ static const struct net_device_ops i40e_fcoe_netdev_ops = { .ndo_tx_timeout = i40e_tx_timeout, .ndo_vlan_rx_add_vid = i40e_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = i40e_vlan_rx_kill_vid, - .ndo_setup_tc = i40e_setup_tc, + .ndo_setup_tc = __i40e_setup_tc, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = i40e_netpoll, diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 8f3b53e0dc46..70d9605a0d9e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2015 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -28,11 +28,6 @@ #include <linux/of_net.h> #include <linux/pci.h> -#ifdef CONFIG_SPARC -#include <asm/idprom.h> -#include <asm/prom.h> -#endif - /* Local includes */ #include "i40e.h" #include "i40e_diag.h" @@ -51,7 +46,7 @@ static const char i40e_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 4 -#define DRV_VERSION_BUILD 8 +#define DRV_VERSION_BUILD 25 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) DRV_KERN @@ -90,6 +85,8 @@ static const struct pci_device_id i40e_pci_tbl[] = { {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T4), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2), 0}, + {PCI_VDEVICE(INTEL, I40E_DEV_ID_KX_X722), 0}, + {PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_1G_BASE_T_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T_X722), 0}, @@ -110,6 +107,8 @@ MODULE_DESCRIPTION("Intel(R) Ethernet Connection XL710 Network Driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); +static struct workqueue_struct *i40e_wq; + /** * i40e_allocate_dma_mem_d - OS specific memory alloc for shared code * @hw: pointer to the HW structure @@ -295,7 +294,7 @@ static void i40e_service_event_schedule(struct i40e_pf *pf) if (!test_bit(__I40E_DOWN, &pf->state) && !test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state) && !test_and_set_bit(__I40E_SERVICE_SCHED, &pf->state)) - schedule_work(&pf->service_task); + queue_work(i40e_wq, &pf->service_task); } /** @@ -769,7 +768,7 @@ static void i40e_update_fcoe_stats(struct i40e_vsi *vsi) if (vsi->type != I40E_VSI_FCOE) return; - idx = (pf->pf_seid - I40E_BASE_PF_SEID) + I40E_FCOE_PF_STAT_OFFSET; + idx = hw->pf_id + I40E_FCOE_PF_STAT_OFFSET; fs = &vsi->fcoe_stats; ofs = &vsi->fcoe_stats_offsets; @@ -820,6 +819,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) struct i40e_eth_stats *oes; struct i40e_eth_stats *es; /* device's eth stats */ u32 tx_restart, tx_busy; + u64 tx_lost_interrupt; struct i40e_ring *p; u32 rx_page, rx_buf; u64 bytes, packets; @@ -845,6 +845,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) rx_b = rx_p = 0; tx_b = tx_p = 0; tx_restart = tx_busy = tx_linearize = tx_force_wb = 0; + tx_lost_interrupt = 0; rx_page = 0; rx_buf = 0; rcu_read_lock(); @@ -863,6 +864,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) tx_busy += p->tx_stats.tx_busy; tx_linearize += p->tx_stats.tx_linearize; tx_force_wb += p->tx_stats.tx_force_wb; + tx_lost_interrupt += p->tx_stats.tx_lost_interrupt; /* Rx queue is part of the same block as Tx queue */ p = &p[1]; @@ -881,6 +883,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) vsi->tx_busy = tx_busy; vsi->tx_linearize = tx_linearize; vsi->tx_force_wb = tx_force_wb; + vsi->tx_lost_interrupt = tx_lost_interrupt; vsi->rx_page_failed = rx_page; vsi->rx_buf_failed = rx_buf; @@ -1368,7 +1371,7 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi, f->changed = true; INIT_LIST_HEAD(&f->list); - list_add(&f->list, &vsi->mac_filter_list); + list_add_tail(&f->list, &vsi->mac_filter_list); } /* increment counter and add a new flag if needed */ @@ -1538,7 +1541,11 @@ static int i40e_set_mac(struct net_device *netdev, void *p) ether_addr_copy(netdev->dev_addr, addr->sa_data); - return i40e_sync_vsi_filters(vsi); + /* schedule our worker thread which will take care of + * applying the new filter changes + */ + i40e_service_event_schedule(vsi->back); + return 0; } /** @@ -1762,6 +1769,11 @@ bottom_of_search_loop: vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; vsi->back->flags |= I40E_FLAG_FILTER_SYNC; } + + /* schedule our worker thread which will take care of + * applying the new filter changes + */ + i40e_service_event_schedule(vsi->back); } /** @@ -1933,7 +1945,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) sizeof(struct i40e_aqc_remove_macvlan_element_data); del_list_size = filter_list_len * sizeof(struct i40e_aqc_remove_macvlan_element_data); - del_list = kzalloc(del_list_size, GFP_KERNEL); + del_list = kzalloc(del_list_size, GFP_ATOMIC); if (!del_list) { i40e_cleanup_add_list(&tmp_add_list); @@ -2011,7 +2023,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) sizeof(struct i40e_aqc_add_macvlan_element_data), add_list_size = filter_list_len * sizeof(struct i40e_aqc_add_macvlan_element_data); - add_list = kzalloc(add_list_size, GFP_KERNEL); + add_list = kzalloc(add_list_size, GFP_ATOMIC); if (!add_list) { /* Purge element from temporary lists */ i40e_cleanup_add_list(&tmp_add_list); @@ -2110,7 +2122,9 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) || test_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state)); - if (vsi->type == I40E_VSI_MAIN && pf->lan_veb != I40E_NO_VEB) { + if ((vsi->type == I40E_VSI_MAIN) && + (pf->lan_veb != I40E_NO_VEB) && + !(pf->flags & I40E_FLAG_MFP_ENABLED)) { /* set defport ON for Main VSI instead of true promisc * this way we will get all unicast/multicast and VLAN * promisc behavior but will not get VF or VMDq traffic @@ -2160,6 +2174,10 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) } } out: + /* if something went wrong then set the changed flag so we try again */ + if (retval) + vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; + clear_bit(__I40E_CONFIG_BUSY, &vsi->state); return retval; } @@ -3106,11 +3124,11 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) struct i40e_q_vector *q_vector = vsi->q_vectors[i]; q_vector->itr_countdown = ITR_COUNTDOWN_START; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); + q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[i]->rx_itr_setting); q_vector->rx.latency_range = I40E_LOW_LATENCY; wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), q_vector->rx.itr); - q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting); + q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[i]->tx_itr_setting); q_vector->tx.latency_range = I40E_LOW_LATENCY; wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), q_vector->tx.itr); @@ -3202,10 +3220,10 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi) /* set the ITR configuration */ q_vector->itr_countdown = ITR_COUNTDOWN_START; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); + q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[0]->rx_itr_setting); q_vector->rx.latency_range = I40E_LOW_LATENCY; wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.itr); - q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting); + q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[0]->tx_itr_setting); q_vector->tx.latency_range = I40E_LOW_LATENCY; wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.itr); @@ -3245,14 +3263,15 @@ void i40e_irq_dynamic_disable_icr0(struct i40e_pf *pf) /** * i40e_irq_dynamic_enable_icr0 - Enable default interrupt generation for icr0 * @pf: board private structure + * @clearpba: true when all pending interrupt events should be cleared **/ -void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf) +void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf, bool clearpba) { struct i40e_hw *hw = &pf->hw; u32 val; val = I40E_PFINT_DYN_CTL0_INTENA_MASK | - I40E_PFINT_DYN_CTL0_CLEARPBA_MASK | + (clearpba ? I40E_PFINT_DYN_CTL0_CLEARPBA_MASK : 0) | (I40E_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT); wr32(hw, I40E_PFINT_DYN_CTL0, val); @@ -3260,22 +3279,6 @@ void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf) } /** - * i40e_irq_dynamic_disable - Disable default interrupt generation settings - * @vsi: pointer to a vsi - * @vector: disable a particular Hw Interrupt vector - **/ -void i40e_irq_dynamic_disable(struct i40e_vsi *vsi, int vector) -{ - struct i40e_pf *pf = vsi->back; - struct i40e_hw *hw = &pf->hw; - u32 val; - - val = I40E_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT; - wr32(hw, I40E_PFINT_DYN_CTLN(vector - 1), val); - i40e_flush(hw); -} - -/** * i40e_msix_clean_rings - MSIX mode Interrupt Handler * @irq: interrupt number * @data: pointer to a q_vector @@ -3400,7 +3403,7 @@ static int i40e_vsi_enable_irq(struct i40e_vsi *vsi) for (i = 0; i < vsi->num_q_vectors; i++) i40e_irq_dynamic_enable(vsi, i); } else { - i40e_irq_dynamic_enable_icr0(pf); + i40e_irq_dynamic_enable_icr0(pf, true); } i40e_flush(&pf->hw); @@ -3459,16 +3462,12 @@ static irqreturn_t i40e_intr(int irq, void *data) struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; struct i40e_q_vector *q_vector = vsi->q_vectors[0]; - /* temporarily disable queue cause for NAPI processing */ - u32 qval = rd32(hw, I40E_QINT_RQCTL(0)); - - qval &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK; - wr32(hw, I40E_QINT_RQCTL(0), qval); - - qval = rd32(hw, I40E_QINT_TQCTL(0)); - qval &= ~I40E_QINT_TQCTL_CAUSE_ENA_MASK; - wr32(hw, I40E_QINT_TQCTL(0), qval); - + /* We do not have a way to disarm Queue causes while leaving + * interrupt enabled for all other causes, ideally + * interrupt should be disabled while we are in NAPI but + * this is not a performance path and napi_schedule() + * can deal with rescheduling. + */ if (!test_bit(__I40E_DOWN, &pf->state)) napi_schedule_irqoff(&q_vector->napi); } @@ -3476,6 +3475,7 @@ static irqreturn_t i40e_intr(int irq, void *data) if (icr0 & I40E_PFINT_ICR0_ADMINQ_MASK) { ena_mask &= ~I40E_PFINT_ICR0_ENA_ADMINQ_MASK; set_bit(__I40E_ADMINQ_EVENT_PENDING, &pf->state); + i40e_debug(&pf->hw, I40E_DEBUG_NVM, "AdminQ event\n"); } if (icr0 & I40E_PFINT_ICR0_MAL_DETECT_MASK) { @@ -3546,7 +3546,7 @@ enable_intr: wr32(hw, I40E_PFINT_ICR0_ENA, ena_mask); if (!test_bit(__I40E_DOWN, &pf->state)) { i40e_service_event_schedule(pf); - i40e_irq_dynamic_enable_icr0(pf); + i40e_irq_dynamic_enable_icr0(pf, false); } return ret; @@ -3750,7 +3750,7 @@ static int i40e_vsi_request_irq(struct i40e_vsi *vsi, char *basename) #ifdef CONFIG_NET_POLL_CONTROLLER /** - * i40e_netpoll - A Polling 'interrupt'handler + * i40e_netpoll - A Polling 'interrupt' handler * @netdev: network interface device structure * * This is used by netconsole to send skbs without having to re-enable @@ -3929,6 +3929,9 @@ static int i40e_vsi_control_rx(struct i40e_vsi *vsi, bool enable) else rx_reg &= ~I40E_QRX_ENA_QENA_REQ_MASK; wr32(hw, I40E_QRX_ENA(pf_q), rx_reg); + /* No waiting for the Tx queue to disable */ + if (!enable && test_bit(__I40E_PORT_TX_SUSPENDED, &pf->state)) + continue; /* wait for the change to finish */ ret = i40e_pf_rxq_wait(pf, pf_q, enable); @@ -4287,12 +4290,12 @@ static void i40e_pf_unquiesce_all_vsi(struct i40e_pf *pf) #ifdef CONFIG_I40E_DCB /** - * i40e_vsi_wait_txq_disabled - Wait for VSI's queues to be disabled + * i40e_vsi_wait_queues_disabled - Wait for VSI's queues to be disabled * @vsi: the VSI being configured * - * This function waits for the given VSI's Tx queues to be disabled. + * This function waits for the given VSI's queues to be disabled. **/ -static int i40e_vsi_wait_txq_disabled(struct i40e_vsi *vsi) +static int i40e_vsi_wait_queues_disabled(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; int i, pf_q, ret; @@ -4309,24 +4312,36 @@ static int i40e_vsi_wait_txq_disabled(struct i40e_vsi *vsi) } } + pf_q = vsi->base_queue; + for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) { + /* Check and wait for the disable status of the queue */ + ret = i40e_pf_rxq_wait(pf, pf_q, false); + if (ret) { + dev_info(&pf->pdev->dev, + "VSI seid %d Rx ring %d disable timeout\n", + vsi->seid, pf_q); + return ret; + } + } + return 0; } /** - * i40e_pf_wait_txq_disabled - Wait for all queues of PF VSIs to be disabled + * i40e_pf_wait_queues_disabled - Wait for all queues of PF VSIs to be disabled * @pf: the PF * - * This function waits for the Tx queues to be in disabled state for all the + * This function waits for the queues to be in disabled state for all the * VSIs that are managed by this PF. **/ -static int i40e_pf_wait_txq_disabled(struct i40e_pf *pf) +static int i40e_pf_wait_queues_disabled(struct i40e_pf *pf) { int v, ret = 0; for (v = 0; v < pf->hw.func_caps.num_vsis; v++) { /* No need to wait for FCoE VSI queues */ if (pf->vsi[v] && pf->vsi[v]->type != I40E_VSI_FCOE) { - ret = i40e_vsi_wait_txq_disabled(pf->vsi[v]); + ret = i40e_vsi_wait_queues_disabled(pf->vsi[v]); if (ret) break; } @@ -4352,7 +4367,7 @@ static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi) { struct i40e_ring *tx_ring = NULL; struct i40e_pf *pf; - u32 head, val, tx_pending; + u32 head, val, tx_pending_hw; int i; pf = vsi->back; @@ -4378,16 +4393,9 @@ static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi) else val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0); - /* Bail out if interrupts are disabled because napi_poll - * execution in-progress or will get scheduled soon. - * napi_poll cleans TX and RX queues and updates 'next_to_clean'. - */ - if (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK)) - return; - head = i40e_get_head(tx_ring); - tx_pending = i40e_get_tx_pending(tx_ring); + tx_pending_hw = i40e_get_tx_pending(tx_ring, false); /* HW is done executing descriptors, updated HEAD write back, * but SW hasn't processed those descriptors. If interrupt is @@ -4395,12 +4403,12 @@ static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi) * dev_watchdog detecting timeout on those netdev_queue, * hence proactively trigger SW interrupt. */ - if (tx_pending) { + if (tx_pending_hw && (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))) { /* NAPI Poll didn't run and clear since it was set */ if (test_and_clear_bit(I40E_Q_VECTOR_HUNG_DETECT, &tx_ring->q_vector->hung_detected)) { - netdev_info(vsi->netdev, "VSI_seid %d, Hung TX queue %d, tx_pending: %d, NTC:0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x\n", - vsi->seid, q_idx, tx_pending, + netdev_info(vsi->netdev, "VSI_seid %d, Hung TX queue %d, tx_pending_hw: %d, NTC:0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x\n", + vsi->seid, q_idx, tx_pending_hw, tx_ring->next_to_clean, head, tx_ring->next_to_use, readl(tx_ring->tail)); @@ -4413,6 +4421,17 @@ static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi) &tx_ring->q_vector->hung_detected); } } + + /* This is the case where we have interrupts missing, + * so the tx_pending in HW will most likely be 0, but we + * will have tx_pending in SW since the WB happened but the + * interrupt got lost. + */ + if ((!tx_pending_hw) && i40e_get_tx_pending(tx_ring, true) && + (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))) { + if (napi_reschedule(&tx_ring->q_vector->napi)) + tx_ring->tx_stats.tx_lost_interrupt++; + } } /** @@ -5016,8 +5035,7 @@ static int i40e_init_pf_dcb(struct i40e_pf *pf) int err = 0; /* Do not enable DCB for SW1 and SW2 images even if the FW is capable */ - if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) || - (pf->hw.aq.fw_maj_ver < 4)) + if (pf->flags & I40E_FLAG_NO_DCB_SUPPORT) goto out; /* Get the initial DCB configuration */ @@ -5249,11 +5267,7 @@ void i40e_down(struct i40e_vsi *vsi) * @netdev: net device to configure * @tc: number of traffic classes to enable **/ -#ifdef I40E_FCOE -int i40e_setup_tc(struct net_device *netdev, u8 tc) -#else static int i40e_setup_tc(struct net_device *netdev, u8 tc) -#endif { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; @@ -5306,6 +5320,19 @@ exit: return ret; } +#ifdef I40E_FCOE +int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +#else +static int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +#endif +{ + if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO) + return -EINVAL; + return i40e_setup_tc(netdev, tc->tc); +} + /** * i40e_open - Called when a network interface is made active * @netdev: network interface device structure @@ -5348,7 +5375,8 @@ int i40e_open(struct net_device *netdev) vxlan_get_rx_port(netdev); #endif #ifdef CONFIG_I40E_GENEVE - geneve_get_rx_port(netdev); + if (pf->flags & I40E_FLAG_GENEVE_OFFLOAD_CAPABLE) + geneve_get_rx_port(netdev); #endif return 0; @@ -5713,8 +5741,8 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf, if (ret) goto exit; - /* Wait for the PF's Tx queues to be disabled */ - ret = i40e_pf_wait_txq_disabled(pf); + /* Wait for the PF's queues to be disabled */ + ret = i40e_pf_wait_queues_disabled(pf); if (ret) { /* Schedule PF reset to recover */ set_bit(__I40E_PF_RESET_REQUESTED, &pf->state); @@ -6244,6 +6272,7 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf) if (hw->debug_mask & I40E_DEBUG_AQ) dev_info(&pf->pdev->dev, "ARQ Overflow Error detected\n"); val &= ~I40E_PF_ARQLEN_ARQOVFL_MASK; + pf->arq_overflows++; } if (val & I40E_PF_ARQLEN_ARQCRIT_MASK) { if (hw->debug_mask & I40E_DEBUG_AQ) @@ -6319,7 +6348,9 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf) case i40e_aqc_opc_nvm_erase: case i40e_aqc_opc_nvm_update: case i40e_aqc_opc_oem_post_update: - i40e_debug(&pf->hw, I40E_DEBUG_NVM, "ARQ NVM operation completed\n"); + i40e_debug(&pf->hw, I40E_DEBUG_NVM, + "ARQ NVM operation 0x%04x completed\n", + opcode); break; default: dev_info(&pf->pdev->dev, @@ -6803,12 +6834,12 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit) if (ret) goto end_core_reset; - /* driver is only interested in link up/down and module qualification - * reports from firmware + /* The driver only wants link up/down and module qualification + * reports from firmware. Note the negative logic. */ ret = i40e_aq_set_phy_int_mask(&pf->hw, - I40E_AQ_EVENT_LINK_UPDOWN | - I40E_AQ_EVENT_MODULE_QUAL_FAIL, NULL); + ~(I40E_AQ_EVENT_LINK_UPDOWN | + I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL); if (ret) dev_info(&pf->pdev->dev, "set phy mask fail, err %s aq_err %s\n", i40e_stat_str(&pf->hw, ret), @@ -6889,8 +6920,7 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit) wr32(hw, I40E_REG_MSS, val); } - if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) || - (pf->hw.aq.fw_maj_ver < 4)) { + if (pf->flags & I40E_FLAG_RESTART_AUTONEG) { msleep(75); ret = i40e_aq_set_link_restart_an(&pf->hw, true, NULL); if (ret) @@ -7079,12 +7109,13 @@ static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf) ret = i40e_aq_del_udp_tunnel(hw, i, NULL); if (ret) { - dev_info(&pf->pdev->dev, - "%s vxlan port %d, index %d failed, err %s aq_err %s\n", - port ? "add" : "delete", - ntohs(port), i, - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, + dev_dbg(&pf->pdev->dev, + "%s %s port %d, index %d failed, err %s aq_err %s\n", + pf->udp_ports[i].type ? "vxlan" : "geneve", + port ? "add" : "delete", + ntohs(port), i, + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); pf->udp_ports[i].index = 0; } @@ -7111,6 +7142,7 @@ static void i40e_service_task(struct work_struct *work) } i40e_detect_recover_hung(pf); + i40e_sync_filters_subtask(pf); i40e_reset_subtask(pf); i40e_handle_mdd_event(pf); i40e_vc_process_vflr_event(pf); @@ -7290,8 +7322,6 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type) set_bit(__I40E_DOWN, &vsi->state); vsi->flags = 0; vsi->idx = vsi_idx; - vsi->rx_itr_setting = pf->rx_itr_default; - vsi->tx_itr_setting = pf->tx_itr_default; vsi->int_rate_limit = 0; vsi->rss_table_size = (vsi->type == I40E_VSI_MAIN) ? pf->rss_table_size : 64; @@ -7458,8 +7488,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) tx_ring->dcb_tc = 0; if (vsi->back->flags & I40E_FLAG_WB_ON_ITR_CAPABLE) tx_ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; - if (vsi->back->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE) - tx_ring->flags |= I40E_TXR_FLAGS_OUTER_UDP_CSUM; + tx_ring->tx_itr_setting = pf->tx_itr_default; vsi->tx_rings[i] = tx_ring; rx_ring = &tx_ring[1]; @@ -7476,6 +7505,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) set_ring_16byte_desc_enabled(rx_ring); else clear_ring_16byte_desc_enabled(rx_ring); + rx_ring->rx_itr_setting = pf->rx_itr_default; vsi->rx_rings[i] = rx_ring; } @@ -7852,7 +7882,7 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf) i40e_flush(hw); - i40e_irq_dynamic_enable_icr0(pf); + i40e_irq_dynamic_enable_icr0(pf, true); return err; } @@ -7936,6 +7966,52 @@ static int i40e_vsi_config_rss(struct i40e_vsi *vsi) } /** + * i40e_get_rss_aq - Get RSS keys and lut by using AQ commands + * @vsi: Pointer to vsi structure + * @seed: Buffter to store the hash keys + * @lut: Buffer to store the lookup table entries + * @lut_size: Size of buffer to store the lookup table entries + * + * Return 0 on success, negative on failure + */ +static int i40e_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed, + u8 *lut, u16 lut_size) +{ + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + int ret = 0; + + if (seed) { + ret = i40e_aq_get_rss_key(hw, vsi->id, + (struct i40e_aqc_get_set_rss_key_data *)seed); + if (ret) { + dev_info(&pf->pdev->dev, + "Cannot get RSS key, err %s aq_err %s\n", + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, + pf->hw.aq.asq_last_status)); + return ret; + } + } + + if (lut) { + bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false; + + ret = i40e_aq_get_rss_lut(hw, vsi->id, pf_lut, lut, lut_size); + if (ret) { + dev_info(&pf->pdev->dev, + "Cannot get RSS lut, err %s aq_err %s\n", + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, + pf->hw.aq.asq_last_status)); + return ret; + } + } + + return ret; +} + +/** * i40e_config_rss_reg - Configure RSS keys and lut by writing registers * @vsi: Pointer to vsi structure * @seed: RSS hash seed @@ -7956,7 +8032,7 @@ static int i40e_config_rss_reg(struct i40e_vsi *vsi, const u8 *seed, u32 *seed_dw = (u32 *)seed; for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) - wr32(hw, I40E_PFQF_HKEY(i), seed_dw[i]); + i40e_write_rx_ctl(hw, I40E_PFQF_HKEY(i), seed_dw[i]); } if (lut) { @@ -7993,7 +8069,7 @@ static int i40e_get_rss_reg(struct i40e_vsi *vsi, u8 *seed, u32 *seed_dw = (u32 *)seed; for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) - seed_dw[i] = rd32(hw, I40E_PFQF_HKEY(i)); + seed_dw[i] = i40e_read_rx_ctl(hw, I40E_PFQF_HKEY(i)); } if (lut) { u32 *lut_dw = (u32 *)lut; @@ -8037,7 +8113,12 @@ int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) */ int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) { - return i40e_get_rss_reg(vsi, seed, lut, lut_size); + struct i40e_pf *pf = vsi->back; + + if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) + return i40e_get_rss_aq(vsi, seed, lut, lut_size); + else + return i40e_get_rss_reg(vsi, seed, lut, lut_size); } /** @@ -8071,19 +8152,19 @@ static int i40e_pf_config_rss(struct i40e_pf *pf) int ret; /* By default we enable TCP/UDP with IPv4/IPv6 ptypes */ - hena = (u64)rd32(hw, I40E_PFQF_HENA(0)) | - ((u64)rd32(hw, I40E_PFQF_HENA(1)) << 32); + hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) | + ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32); hena |= i40e_pf_get_default_rss_hena(pf); - wr32(hw, I40E_PFQF_HENA(0), (u32)hena); - wr32(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32)); + i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena); + i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32)); /* Determine the RSS table size based on the hardware capabilities */ - reg_val = rd32(hw, I40E_PFQF_CTL_0); + reg_val = i40e_read_rx_ctl(hw, I40E_PFQF_CTL_0); reg_val = (pf->rss_table_size == 512) ? (reg_val | I40E_PFQF_CTL_0_HASHLUTSIZE_512) : (reg_val & ~I40E_PFQF_CTL_0_HASHLUTSIZE_512); - wr32(hw, I40E_PFQF_CTL_0, reg_val); + i40e_write_rx_ctl(hw, I40E_PFQF_CTL_0, reg_val); /* Determine the RSS size of the VSI */ if (!vsi->rss_size) @@ -8367,6 +8448,26 @@ static int i40e_sw_init(struct i40e_pf *pf) pf->hw.func_caps.fd_filters_best_effort; } + if (i40e_is_mac_710(&pf->hw) && + (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) || + (pf->hw.aq.fw_maj_ver < 4))) { + pf->flags |= I40E_FLAG_RESTART_AUTONEG; + /* No DCB support for FW < v4.33 */ + pf->flags |= I40E_FLAG_NO_DCB_SUPPORT; + } + + /* Disable FW LLDP if FW < v4.3 */ + if (i40e_is_mac_710(&pf->hw) && + (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 3)) || + (pf->hw.aq.fw_maj_ver < 4))) + pf->flags |= I40E_FLAG_STOP_FW_LLDP; + + /* Use the FW Set LLDP MIB API if FW > v4.40 */ + if (i40e_is_mac_710(&pf->hw) && + (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver >= 40)) || + (pf->hw.aq.fw_maj_ver >= 5))) + pf->flags |= I40E_FLAG_USE_SET_LLDP_MIB; + if (pf->hw.func_caps.vmdq) { pf->num_vmdq_vsis = I40E_DEFAULT_NUM_VMDQ_VSI; pf->flags |= I40E_FLAG_VMDQ_ENABLED; @@ -8393,8 +8494,19 @@ static int i40e_sw_init(struct i40e_pf *pf) I40E_FLAG_OUTER_UDP_CSUM_CAPABLE | I40E_FLAG_WB_ON_ITR_CAPABLE | I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE | + I40E_FLAG_100M_SGMII_CAPABLE | + I40E_FLAG_USE_SET_LLDP_MIB | I40E_FLAG_GENEVE_OFFLOAD_CAPABLE; + } else if ((pf->hw.aq.api_maj_ver > 1) || + ((pf->hw.aq.api_maj_ver == 1) && + (pf->hw.aq.api_min_ver > 4))) { + /* Supported in FW API version higher than 1.4 */ + pf->flags |= I40E_FLAG_GENEVE_OFFLOAD_CAPABLE; + pf->auto_disable_flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE; + } else { + pf->auto_disable_flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE; } + pf->eeprom_version = 0xDEAD; pf->lan_veb = I40E_NO_VEB; pf->lan_vsi = I40E_NO_VSI; @@ -8530,9 +8642,6 @@ static void i40e_add_vxlan_port(struct net_device *netdev, u8 next_idx; u8 idx; - if (sa_family == AF_INET6) - return; - idx = i40e_get_udp_port_idx(pf, port); /* Check if port already exists */ @@ -8572,9 +8681,6 @@ static void i40e_del_vxlan_port(struct net_device *netdev, struct i40e_pf *pf = vsi->back; u8 idx; - if (sa_family == AF_INET6) - return; - idx = i40e_get_udp_port_idx(pf, port); /* Check if port already exists */ @@ -8608,7 +8714,7 @@ static void i40e_add_geneve_port(struct net_device *netdev, u8 next_idx; u8 idx; - if (sa_family == AF_INET6) + if (!(pf->flags & I40E_FLAG_GENEVE_OFFLOAD_CAPABLE)) return; idx = i40e_get_udp_port_idx(pf, port); @@ -8652,7 +8758,7 @@ static void i40e_del_geneve_port(struct net_device *netdev, struct i40e_pf *pf = vsi->back; u8 idx; - if (sa_family == AF_INET6) + if (!(pf->flags & I40E_FLAG_GENEVE_OFFLOAD_CAPABLE)) return; idx = i40e_get_udp_port_idx(pf, port); @@ -8890,7 +8996,7 @@ static const struct net_device_ops i40e_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = i40e_netpoll, #endif - .ndo_setup_tc = i40e_setup_tc, + .ndo_setup_tc = __i40e_setup_tc, #ifdef I40E_FCOE .ndo_fcoe_enable = i40e_fcoe_enable, .ndo_fcoe_disable = i40e_fcoe_disable, @@ -8942,11 +9048,15 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) np = netdev_priv(netdev); np->vsi = vsi; - netdev->hw_enc_features |= NETIF_F_IP_CSUM | - NETIF_F_RXCSUM | - NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_GRE | - NETIF_F_TSO; + netdev->hw_enc_features |= NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM | + NETIF_F_TSO | + NETIF_F_TSO6 | + NETIF_F_TSO_ECN | + NETIF_F_GSO_GRE | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + 0; netdev->features = NETIF_F_SG | NETIF_F_IP_CSUM | @@ -8967,6 +9077,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) if (!(pf->flags & I40E_FLAG_MFP_ENABLED)) netdev->features |= NETIF_F_NTUPLE; + if (pf->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE) + netdev->features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; /* copy netdev features into list of user selectable features */ netdev->hw_features |= netdev->features; @@ -9471,10 +9583,15 @@ vector_setup_out: **/ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi) { - struct i40e_pf *pf = vsi->back; + struct i40e_pf *pf; u8 enabled_tc; int ret; + if (!vsi) + return NULL; + + pf = vsi->back; + i40e_put_lump(pf->qp_pile, vsi->base_queue, vsi->idx); i40e_vsi_clear_rings(vsi); @@ -9975,13 +10092,13 @@ static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi) { struct i40e_pf *pf = veb->pf; bool is_default = veb->pf->cur_promisc; - bool is_cloud = false; + bool enable_stats = !!(pf->flags & I40E_FLAG_VEB_STATS_ENABLED); int ret; /* get a VEB from the hardware */ ret = i40e_aq_add_veb(&pf->hw, veb->uplink_seid, vsi->seid, veb->enabled_tc, is_default, - is_cloud, &veb->seid, NULL); + &veb->seid, enable_stats, NULL); if (ret) { dev_info(&pf->pdev->dev, "couldn't add VEB, err %s aq_err %s\n", @@ -10538,21 +10655,9 @@ static void i40e_print_features(struct i40e_pf *pf) **/ static void i40e_get_platform_mac_addr(struct pci_dev *pdev, struct i40e_pf *pf) { - struct device_node *dp = pci_device_to_OF_node(pdev); - const unsigned char *addr; - u8 *mac_addr = pf->hw.mac.addr; - pf->flags &= ~I40E_FLAG_PF_MAC; - addr = of_get_mac_address(dp); - if (addr) { - ether_addr_copy(mac_addr, addr); + if (!eth_platform_get_mac_address(&pdev->dev, pf->hw.mac.addr)) pf->flags |= I40E_FLAG_PF_MAC; -#ifdef CONFIG_SPARC - } else { - ether_addr_copy(mac_addr, idprom->id_ethaddr); - pf->flags |= I40E_FLAG_PF_MAC; -#endif /* CONFIG_SPARC */ - } } /** @@ -10575,7 +10680,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) u16 wol_nvm_bits; u16 link_status; int err; - u32 len; u32 val; u32 i; u8 set_fc_aq_fail; @@ -10758,8 +10862,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * Ignore error return codes because if it was already disabled via * hardware settings this will fail */ - if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 3)) || - (pf->hw.aq.fw_maj_ver < 4)) { + if (pf->flags & I40E_FLAG_STOP_FW_LLDP) { dev_info(&pdev->dev, "Stopping firmware LLDP agent.\n"); i40e_aq_stop_lldp(hw, true, NULL); } @@ -10834,8 +10937,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pf->num_alloc_vsi = pf->hw.func_caps.num_vsis; /* Set up the *vsi struct and our local tracking of the MAIN PF vsi. */ - len = sizeof(struct i40e_vsi *) * pf->num_alloc_vsi; - pf->vsi = kzalloc(len, GFP_KERNEL); + pf->vsi = kcalloc(pf->num_alloc_vsi, sizeof(struct i40e_vsi *), + GFP_KERNEL); if (!pf->vsi) { err = -ENOMEM; goto err_switch_setup; @@ -10882,12 +10985,12 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } } - /* driver is only interested in link up/down and module qualification - * reports from firmware + /* The driver only wants link up/down and module qualification + * reports from firmware. Note the negative logic. */ err = i40e_aq_set_phy_int_mask(&pf->hw, - I40E_AQ_EVENT_LINK_UPDOWN | - I40E_AQ_EVENT_MODULE_QUAL_FAIL, NULL); + ~(I40E_AQ_EVENT_LINK_UPDOWN | + I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL); if (err) dev_info(&pf->pdev->dev, "set phy mask fail, err %s aq_err %s\n", i40e_stat_str(&pf->hw, err), @@ -10904,8 +11007,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) wr32(hw, I40E_REG_MSS, val); } - if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) || - (pf->hw.aq.fw_maj_ver < 4)) { + if (pf->flags & I40E_FLAG_RESTART_AUTONEG) { msleep(75); err = i40e_aq_set_link_restart_an(&pf->hw, true, NULL); if (err) @@ -10939,8 +11041,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) && (pf->flags & I40E_FLAG_MSIX_ENABLED) && !test_bit(__I40E_BAD_EEPROM, &pf->state)) { - u32 val; - /* disable link interrupts for VFs */ val = rd32(hw, I40E_PFGEN_PORTMDIO_NUM); val &= ~I40E_PFGEN_PORTMDIO_NUM_VFLINK_STAT_ENA_MASK; @@ -11051,6 +11151,10 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) i40e_add_filter_to_drop_tx_flow_control_frames(&pf->hw, pf->main_vsi_seid); + if ((pf->hw.device_id == I40E_DEV_ID_10G_BASE_T) || + (pf->hw.device_id == I40E_DEV_ID_10G_BASE_T4)) + pf->flags |= I40E_FLAG_HAVE_10GBASET_PHY; + /* print a string summarizing features */ i40e_print_features(pf); @@ -11107,10 +11211,11 @@ static void i40e_remove(struct pci_dev *pdev) i40e_ptp_stop(pf); /* Disable RSS in hw */ - wr32(hw, I40E_PFQF_HENA(0), 0); - wr32(hw, I40E_PFQF_HENA(1), 0); + i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), 0); + i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), 0); /* no more scheduling of any task */ + set_bit(__I40E_SUSPENDED, &pf->state); set_bit(__I40E_DOWN, &pf->state); del_timer_sync(&pf->service_timer); cancel_work_sync(&pf->service_task); @@ -11141,8 +11246,8 @@ static void i40e_remove(struct pci_dev *pdev) i40e_vsi_release(pf->vsi[pf->lan_vsi]); /* shutdown and destroy the HMC */ - if (pf->hw.hmc.hmc_obj) { - ret_code = i40e_shutdown_lan_hmc(&pf->hw); + if (hw->hmc.hmc_obj) { + ret_code = i40e_shutdown_lan_hmc(hw); if (ret_code) dev_warn(&pdev->dev, "Failed to destroy the HMC resources: %d\n", @@ -11150,7 +11255,7 @@ static void i40e_remove(struct pci_dev *pdev) } /* shutdown the adminq */ - ret_code = i40e_shutdown_adminq(&pf->hw); + ret_code = i40e_shutdown_adminq(hw); if (ret_code) dev_warn(&pdev->dev, "Failed to destroy the Admin Queue resources: %d\n", @@ -11178,7 +11283,7 @@ static void i40e_remove(struct pci_dev *pdev) kfree(pf->qp_pile); kfree(pf->vsi); - iounmap(pf->hw.hw_addr); + iounmap(hw->hw_addr); kfree(pf); pci_release_selected_regions(pdev, pci_select_bars(pdev, IORESOURCE_MEM)); @@ -11413,6 +11518,16 @@ static int __init i40e_init_module(void) i40e_driver_string, i40e_driver_version_str); pr_info("%s: %s\n", i40e_driver_name, i40e_copyright); + /* we will see if single thread per module is enough for now, + * it can't be any worse than using the system workqueue which + * was already single threaded + */ + i40e_wq = create_singlethread_workqueue(i40e_driver_name); + if (!i40e_wq) { + pr_err("%s: Failed to create workqueue\n", i40e_driver_name); + return -ENOMEM; + } + i40e_dbg_init(); return pci_register_driver(&i40e_driver); } @@ -11427,6 +11542,7 @@ module_init(i40e_init_module); static void __exit i40e_exit_module(void) { pci_unregister_driver(&i40e_driver); + destroy_workqueue(i40e_wq); i40e_dbg_exit(); } module_exit(i40e_exit_module); diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index 6100cdd9ad13..5730f8091e1b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -693,10 +693,11 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw, /* early check for status command and debug msgs */ upd_cmd = i40e_nvmupd_validate_command(hw, cmd, perrno); - i40e_debug(hw, I40E_DEBUG_NVM, "%s state %d nvm_release_on_hold %d\n", + i40e_debug(hw, I40E_DEBUG_NVM, "%s state %d nvm_release_on_hold %d cmd 0x%08x config 0x%08x offset 0x%08x data_size 0x%08x\n", i40e_nvm_update_state_str[upd_cmd], hw->nvmupd_state, - hw->aq.nvm_release_on_done); + hw->aq.nvm_release_on_done, + cmd->command, cmd->config, cmd->offset, cmd->data_size); if (upd_cmd == I40E_NVMUPD_INVALID) { *perrno = -EFAULT; diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index bb9d583e5416..d51eee5bf79a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2015 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -74,6 +74,12 @@ i40e_status i40e_aq_set_rss_key(struct i40e_hw *hw, u32 i40e_led_get(struct i40e_hw *hw); void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink); +i40e_status i40e_led_set_phy(struct i40e_hw *hw, bool on, + u16 led_addr, u32 mode); +i40e_status i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr, + u16 *val); +i40e_status i40e_blink_phy_link_led(struct i40e_hw *hw, + u32 time, u32 interval); /* admin send queue commands */ @@ -127,6 +133,9 @@ i40e_status i40e_aq_set_vsi_unicast_promiscuous(struct i40e_hw *hw, u16 vsi_id, bool set, struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_set_vsi_multicast_promiscuous(struct i40e_hw *hw, u16 vsi_id, bool set, struct i40e_asq_cmd_details *cmd_details); +i40e_status i40e_aq_set_vsi_vlan_promisc(struct i40e_hw *hw, + u16 seid, bool enable, + struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_get_vsi_params(struct i40e_hw *hw, struct i40e_vsi_context *vsi_ctx, struct i40e_asq_cmd_details *cmd_details); @@ -135,8 +144,8 @@ i40e_status i40e_aq_update_vsi_params(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_add_veb(struct i40e_hw *hw, u16 uplink_seid, u16 downlink_seid, u8 enabled_tc, - bool default_port, bool enable_l2_filtering, - u16 *pveb_seid, + bool default_port, u16 *pveb_seid, + bool enable_stats, struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_get_veb_parameters(struct i40e_hw *hw, u16 veb_seid, u16 *switch_id, bool *floating, @@ -149,6 +158,15 @@ i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 vsi_id, i40e_status i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 vsi_id, struct i40e_aqc_remove_macvlan_element_data *mv_list, u16 count, struct i40e_asq_cmd_details *cmd_details); +i40e_status i40e_aq_add_mirrorrule(struct i40e_hw *hw, u16 sw_seid, + u16 rule_type, u16 dest_vsi, u16 count, __le16 *mr_list, + struct i40e_asq_cmd_details *cmd_details, + u16 *rule_id, u16 *rules_used, u16 *rules_free); +i40e_status i40e_aq_delete_mirrorrule(struct i40e_hw *hw, u16 sw_seid, + u16 rule_type, u16 rule_id, u16 count, __le16 *mr_list, + struct i40e_asq_cmd_details *cmd_details, + u16 *rules_used, u16 *rules_free); + i40e_status i40e_aq_send_msg_to_vf(struct i40e_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval, u8 *msg, u16 msglen, struct i40e_asq_cmd_details *cmd_details); @@ -324,4 +342,19 @@ i40e_status i40e_aq_debug_dump(struct i40e_hw *hw, u8 cluster_id, struct i40e_asq_cmd_details *cmd_details); void i40e_add_filter_to_drop_tx_flow_control_frames(struct i40e_hw *hw, u16 vsi_seid); +i40e_status i40e_aq_rx_ctl_read_register(struct i40e_hw *hw, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details); +u32 i40e_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr); +i40e_status i40e_aq_rx_ctl_write_register(struct i40e_hw *hw, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details); +void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val); +i40e_status i40e_read_phy_register(struct i40e_hw *hw, u8 page, + u16 reg, u8 phy_addr, u16 *value); +i40e_status i40e_write_phy_register(struct i40e_hw *hw, u8 page, + u16 reg, u8 phy_addr, u16 value); +u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num); +i40e_status i40e_blink_phy_link_led(struct i40e_hw *hw, + u32 time, u32 interval); #endif /* _I40E_PROTOTYPE_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h index dc0402fe3370..86ca27f72f02 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_register.h +++ b/drivers/net/ethernet/intel/i40e/i40e_register.h @@ -2045,6 +2045,14 @@ #define I40E_PRTPM_TLPIC 0x001E43C0 /* Reset: GLOBR */ #define I40E_PRTPM_TLPIC_ETLPIC_SHIFT 0 #define I40E_PRTPM_TLPIC_ETLPIC_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTPM_TLPIC_ETLPIC_SHIFT) +#define I40E_GL_PRS_FVBM(_i) (0x00269760 + ((_i) * 4)) /* _i=0...3 */ /* Reset: CORER */ +#define I40E_GL_PRS_FVBM_MAX_INDEX 3 +#define I40E_GL_PRS_FVBM_FV_BYTE_INDX_SHIFT 0 +#define I40E_GL_PRS_FVBM_FV_BYTE_INDX_MASK I40E_MASK(0x7F, I40E_GL_PRS_FVBM_FV_BYTE_INDX_SHIFT) +#define I40E_GL_PRS_FVBM_RULE_BUS_INDX_SHIFT 8 +#define I40E_GL_PRS_FVBM_RULE_BUS_INDX_MASK I40E_MASK(0x3F, I40E_GL_PRS_FVBM_RULE_BUS_INDX_SHIFT) +#define I40E_GL_PRS_FVBM_MSK_ENA_SHIFT 31 +#define I40E_GL_PRS_FVBM_MSK_ENA_MASK I40E_MASK(0x1, I40E_GL_PRS_FVBM_MSK_ENA_SHIFT) #define I40E_GLRPB_DPSS 0x000AC828 /* Reset: CORER */ #define I40E_GLRPB_DPSS_DPS_TCN_SHIFT 0 #define I40E_GLRPB_DPSS_DPS_TCN_MASK I40E_MASK(0xFFFFF, I40E_GLRPB_DPSS_DPS_TCN_SHIFT) @@ -2216,6 +2224,14 @@ #define I40E_PRTQF_FD_FLXINSET_MAX_INDEX 63 #define I40E_PRTQF_FD_FLXINSET_INSET_SHIFT 0 #define I40E_PRTQF_FD_FLXINSET_INSET_MASK I40E_MASK(0xFF, I40E_PRTQF_FD_FLXINSET_INSET_SHIFT) +#define I40E_PRTQF_FD_INSET(_i, _j) (0x00250000 + ((_i) * 64 + (_j) * 32)) /* _i=0...63, _j=0...1 */ /* Reset: CORER */ +#define I40E_PRTQF_FD_INSET_MAX_INDEX 63 +#define I40E_PRTQF_FD_INSET_INSET_SHIFT 0 +#define I40E_PRTQF_FD_INSET_INSET_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTQF_FD_INSET_INSET_SHIFT) +#define I40E_PRTQF_FD_INSET(_i, _j) (0x00250000 + ((_i) * 64 + (_j) * 32)) /* _i=0...63, _j=0...1 */ /* Reset: CORER */ +#define I40E_PRTQF_FD_INSET_MAX_INDEX 63 +#define I40E_PRTQF_FD_INSET_INSET_SHIFT 0 +#define I40E_PRTQF_FD_INSET_INSET_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTQF_FD_INSET_INSET_SHIFT) #define I40E_PRTQF_FD_MSK(_i, _j) (0x00252000 + ((_i) * 64 + (_j) * 32)) /* _i=0...63, _j=0...1 */ /* Reset: CORER */ #define I40E_PRTQF_FD_MSK_MAX_INDEX 63 #define I40E_PRTQF_FD_MSK_MASK_SHIFT 0 @@ -5155,6 +5171,38 @@ #define I40E_GLQF_FD_PCTYPES_MAX_INDEX 63 #define I40E_GLQF_FD_PCTYPES_FD_PCTYPE_SHIFT 0 #define I40E_GLQF_FD_PCTYPES_FD_PCTYPE_MASK I40E_MASK(0x3F, I40E_GLQF_FD_PCTYPES_FD_PCTYPE_SHIFT) +#define I40E_GLQF_FD_MSK(_i, _j) (0x00267200 + ((_i) * 4 + (_j) * 8)) /* _i=0...1, _j=0...63 */ /* Reset: CORER */ +#define I40E_GLQF_FD_MSK_MAX_INDEX 1 +#define I40E_GLQF_FD_MSK_MASK_SHIFT 0 +#define I40E_GLQF_FD_MSK_MASK_MASK I40E_MASK(0xFFFF, I40E_GLQF_FD_MSK_MASK_SHIFT) +#define I40E_GLQF_FD_MSK_OFFSET_SHIFT 16 +#define I40E_GLQF_FD_MSK_OFFSET_MASK I40E_MASK(0x3F, I40E_GLQF_FD_MSK_OFFSET_SHIFT) +#define I40E_GLQF_HASH_INSET(_i, _j) (0x00267600 + ((_i) * 4 + (_j) * 8)) /* _i=0...1, _j=0...63 */ /* Reset: CORER */ +#define I40E_GLQF_HASH_INSET_MAX_INDEX 1 +#define I40E_GLQF_HASH_INSET_INSET_SHIFT 0 +#define I40E_GLQF_HASH_INSET_INSET_MASK I40E_MASK(0xFFFFFFFF, I40E_GLQF_HASH_INSET_INSET_SHIFT) +#define I40E_GLQF_HASH_MSK(_i, _j) (0x00267A00 + ((_i) * 4 + (_j) * 8)) /* _i=0...1, _j=0...63 */ /* Reset: CORER */ +#define I40E_GLQF_HASH_MSK_MAX_INDEX 1 +#define I40E_GLQF_HASH_MSK_MASK_SHIFT 0 +#define I40E_GLQF_HASH_MSK_MASK_MASK I40E_MASK(0xFFFF, I40E_GLQF_HASH_MSK_MASK_SHIFT) +#define I40E_GLQF_HASH_MSK_OFFSET_SHIFT 16 +#define I40E_GLQF_HASH_MSK_OFFSET_MASK I40E_MASK(0x3F, I40E_GLQF_HASH_MSK_OFFSET_SHIFT) +#define I40E_GLQF_ORT(_i) (0x00268900 + ((_i) * 4)) /* _i=0...63 */ /* Reset: CORER */ +#define I40E_GLQF_ORT_MAX_INDEX 63 +#define I40E_GLQF_ORT_PIT_INDX_SHIFT 0 +#define I40E_GLQF_ORT_PIT_INDX_MASK I40E_MASK(0x1F, I40E_GLQF_ORT_PIT_INDX_SHIFT) +#define I40E_GLQF_ORT_FIELD_CNT_SHIFT 5 +#define I40E_GLQF_ORT_FIELD_CNT_MASK I40E_MASK(0x3, I40E_GLQF_ORT_FIELD_CNT_SHIFT) +#define I40E_GLQF_ORT_FLX_PAYLOAD_SHIFT 7 +#define I40E_GLQF_ORT_FLX_PAYLOAD_MASK I40E_MASK(0x1, I40E_GLQF_ORT_FLX_PAYLOAD_SHIFT) +#define I40E_GLQF_PIT(_i) (0x00268C80 + ((_i) * 4)) /* _i=0...23 */ /* Reset: CORER */ +#define I40E_GLQF_PIT_MAX_INDEX 23 +#define I40E_GLQF_PIT_SOURCE_OFF_SHIFT 0 +#define I40E_GLQF_PIT_SOURCE_OFF_MASK I40E_MASK(0x1F, I40E_GLQF_PIT_SOURCE_OFF_SHIFT) +#define I40E_GLQF_PIT_FSIZE_SHIFT 5 +#define I40E_GLQF_PIT_FSIZE_MASK I40E_MASK(0x1F, I40E_GLQF_PIT_FSIZE_SHIFT) +#define I40E_GLQF_PIT_DEST_OFF_SHIFT 10 +#define I40E_GLQF_PIT_DEST_OFF_MASK I40E_MASK(0x3F, I40E_GLQF_PIT_DEST_OFF_SHIFT) #define I40E_GLQF_FDEVICTENA(_i) (0x00270384 + ((_i) * 4)) /* _i=0...1 */ /* Reset: CORER */ #define I40E_GLQF_FDEVICTENA_MAX_INDEX 1 #define I40E_GLQF_FDEVICTENA_GLQF_FDEVICTENA_SHIFT 0 diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 47bd8b3145a7..084d0ab316b7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -610,15 +610,19 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring) /** * i40e_get_tx_pending - how many tx descriptors not processed * @tx_ring: the ring of descriptors + * @in_sw: is tx_pending being checked in SW or HW * * Since there is no access to the ring head register * in XL710, we need to use our local copies **/ -u32 i40e_get_tx_pending(struct i40e_ring *ring) +u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw) { u32 head, tail; - head = i40e_get_head(ring); + if (!in_sw) + head = i40e_get_head(ring); + else + head = ring->next_to_clean; tail = readl(ring->tail); if (head != tail) @@ -741,7 +745,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) * them to be written back in case we stay in NAPI. * In this mode on X722 we do not enable Interrupt. */ - j = i40e_get_tx_pending(tx_ring); + j = i40e_get_tx_pending(tx_ring, false); if (budget && ((j / (WB_STRIDE + 1)) == 0) && (j != 0) && @@ -774,29 +778,48 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) } /** - * i40e_force_wb - Arm hardware to do a wb on noncache aligned descriptors + * i40e_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled * @vsi: the VSI we care about - * @q_vector: the vector on which to force writeback + * @q_vector: the vector on which to enable writeback * **/ -void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) +static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi, + struct i40e_q_vector *q_vector) { u16 flags = q_vector->tx.ring[0].flags; + u32 val; - if (flags & I40E_TXR_FLAGS_WB_ON_ITR) { - u32 val; + if (!(flags & I40E_TXR_FLAGS_WB_ON_ITR)) + return; - if (q_vector->arm_wb_state) - return; + if (q_vector->arm_wb_state) + return; - val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK; + if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { + val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK | + I40E_PFINT_DYN_CTLN_ITR_INDX_MASK; /* set noitr */ wr32(&vsi->back->hw, - I40E_PFINT_DYN_CTLN(q_vector->v_idx + - vsi->base_vector - 1), + I40E_PFINT_DYN_CTLN(q_vector->v_idx + vsi->base_vector - 1), val); - q_vector->arm_wb_state = true; - } else if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { + } else { + val = I40E_PFINT_DYN_CTL0_WB_ON_ITR_MASK | + I40E_PFINT_DYN_CTL0_ITR_INDX_MASK; /* set noitr */ + + wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val); + } + q_vector->arm_wb_state = true; +} + +/** + * i40e_force_wb - Issue SW Interrupt so HW does a wb + * @vsi: the VSI we care about + * @q_vector: the vector on which to force writeback + * + **/ +void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) +{ + if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK | I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */ I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK | @@ -1041,7 +1064,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring) if (rx_bi->page_dma) { dma_unmap_page(dev, rx_bi->page_dma, - PAGE_SIZE / 2, + PAGE_SIZE, DMA_FROM_DEVICE); rx_bi->page_dma = 0; } @@ -1176,16 +1199,19 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val) * i40e_alloc_rx_buffers_ps - Replace used receive buffers; packet split * @rx_ring: ring to place buffers on * @cleaned_count: number of buffers to replace + * + * Returns true if any errors on allocation **/ -void i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count) +bool i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count) { u16 i = rx_ring->next_to_use; union i40e_rx_desc *rx_desc; struct i40e_rx_buffer *bi; + const int current_node = numa_node_id(); /* do nothing if no valid netdev defined */ if (!rx_ring->netdev || !cleaned_count) - return; + return false; while (cleaned_count--) { rx_desc = I40E_RX_DESC(rx_ring, i); @@ -1193,56 +1219,79 @@ void i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count) if (bi->skb) /* desc is in use */ goto no_buffers; + + /* If we've been moved to a different NUMA node, release the + * page so we can get a new one on the current node. + */ + if (bi->page && page_to_nid(bi->page) != current_node) { + dma_unmap_page(rx_ring->dev, + bi->page_dma, + PAGE_SIZE, + DMA_FROM_DEVICE); + __free_page(bi->page); + bi->page = NULL; + bi->page_dma = 0; + rx_ring->rx_stats.realloc_count++; + } else if (bi->page) { + rx_ring->rx_stats.page_reuse_count++; + } + if (!bi->page) { bi->page = alloc_page(GFP_ATOMIC); if (!bi->page) { rx_ring->rx_stats.alloc_page_failed++; goto no_buffers; } - } - - if (!bi->page_dma) { - /* use a half page if we're re-using */ - bi->page_offset ^= PAGE_SIZE / 2; bi->page_dma = dma_map_page(rx_ring->dev, bi->page, - bi->page_offset, - PAGE_SIZE / 2, + 0, + PAGE_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(rx_ring->dev, - bi->page_dma)) { + if (dma_mapping_error(rx_ring->dev, bi->page_dma)) { rx_ring->rx_stats.alloc_page_failed++; + __free_page(bi->page); + bi->page = NULL; bi->page_dma = 0; + bi->page_offset = 0; goto no_buffers; } + bi->page_offset = 0; } - dma_sync_single_range_for_device(rx_ring->dev, - bi->dma, - 0, - rx_ring->rx_hdr_len, - DMA_FROM_DEVICE); /* Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info. */ - rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma); + rx_desc->read.pkt_addr = + cpu_to_le64(bi->page_dma + bi->page_offset); rx_desc->read.hdr_addr = cpu_to_le64(bi->dma); i++; if (i == rx_ring->count) i = 0; } + if (rx_ring->next_to_use != i) + i40e_release_rx_desc(rx_ring, i); + + return false; + no_buffers: if (rx_ring->next_to_use != i) i40e_release_rx_desc(rx_ring, i); + + /* make sure to come back via polling to try again after + * allocation failure + */ + return true; } /** * i40e_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer * @rx_ring: ring to place buffers on * @cleaned_count: number of buffers to replace + * + * Returns true if any errors on allocation **/ -void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) +bool i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) { u16 i = rx_ring->next_to_use; union i40e_rx_desc *rx_desc; @@ -1251,7 +1300,7 @@ void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) /* do nothing if no valid netdev defined */ if (!rx_ring->netdev || !cleaned_count) - return; + return false; while (cleaned_count--) { rx_desc = I40E_RX_DESC(rx_ring, i); @@ -1259,8 +1308,10 @@ void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) skb = bi->skb; if (!skb) { - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - rx_ring->rx_buf_len); + skb = __netdev_alloc_skb_ip_align(rx_ring->netdev, + rx_ring->rx_buf_len, + GFP_ATOMIC | + __GFP_NOWARN); if (!skb) { rx_ring->rx_stats.alloc_buff_failed++; goto no_buffers; @@ -1278,6 +1329,8 @@ void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) if (dma_mapping_error(rx_ring->dev, bi->dma)) { rx_ring->rx_stats.alloc_buff_failed++; bi->dma = 0; + dev_kfree_skb(bi->skb); + bi->skb = NULL; goto no_buffers; } } @@ -1289,9 +1342,19 @@ void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) i = 0; } + if (rx_ring->next_to_use != i) + i40e_release_rx_desc(rx_ring, i); + + return false; + no_buffers: if (rx_ring->next_to_use != i) i40e_release_rx_desc(rx_ring, i); + + /* make sure to come back via polling to try again after + * allocation failure + */ + return true; } /** @@ -1326,16 +1389,7 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, u16 rx_ptype) { struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype); - bool ipv4 = false, ipv6 = false; - bool ipv4_tunnel, ipv6_tunnel; - __wsum rx_udp_csum; - struct iphdr *iph; - __sum16 csum; - - ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) && - (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4); - ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) && - (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4); + bool ipv4, ipv6, ipv4_tunnel, ipv6_tunnel; skb->ip_summed = CHECKSUM_NONE; @@ -1351,12 +1405,10 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, if (!(decoded.known && decoded.outer_ip)) return; - if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && - decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) - ipv4 = true; - else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && - decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) - ipv6 = true; + ipv4 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && + (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4); + ipv6 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && + (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6); if (ipv4 && (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) | @@ -1380,37 +1432,17 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT)) return; - /* If VXLAN/GENEVE traffic has an outer UDPv4 checksum we need to check - * it in the driver, hardware does not do it for us. - * Since L3L4P bit was set we assume a valid IHL value (>=5) - * so the total length of IPv4 header is IHL*4 bytes - * The UDP_0 bit *may* bet set if the *inner* header is UDP + /* The hardware supported by this driver does not validate outer + * checksums for tunneled VXLAN or GENEVE frames. I don't agree + * with it but the specification states that you "MAY validate", it + * doesn't make it a hard requirement so if we have validated the + * inner checksum report CHECKSUM_UNNECESSARY. */ - if (!(vsi->back->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE) && - (ipv4_tunnel)) { - skb->transport_header = skb->mac_header + - sizeof(struct ethhdr) + - (ip_hdr(skb)->ihl * 4); - - /* Add 4 bytes for VLAN tagged packets */ - skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) || - skb->protocol == htons(ETH_P_8021AD)) - ? VLAN_HLEN : 0; - - if ((ip_hdr(skb)->protocol == IPPROTO_UDP) && - (udp_hdr(skb)->check != 0)) { - rx_udp_csum = udp_csum(skb); - iph = ip_hdr(skb); - csum = csum_tcpudp_magic( - iph->saddr, iph->daddr, - (skb->len - skb_transport_offset(skb)), - IPPROTO_UDP, rx_udp_csum); - - if (udp_hdr(skb)->check != csum) - goto checksum_fail; - - } /* else its GRE and so no outer UDP header */ - } + + ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) && + (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4); + ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) && + (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4); skb->ip_summed = CHECKSUM_UNNECESSARY; skb->csum_level = ipv4_tunnel || ipv6_tunnel; @@ -1475,18 +1507,19 @@ static inline void i40e_rx_hash(struct i40e_ring *ring, * * Returns true if there's any budget left (e.g. the clean is finished) **/ -static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) +static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, const int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo; u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); - const int current_node = numa_mem_id(); struct i40e_vsi *vsi = rx_ring->vsi; u16 i = rx_ring->next_to_clean; union i40e_rx_desc *rx_desc; u32 rx_error, rx_status; + bool failure = false; u8 rx_ptype; u64 qword; + u32 copysize; if (budget <= 0) return 0; @@ -1497,7 +1530,9 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) u16 vlan_tag; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= I40E_RX_BUFFER_WRITE) { - i40e_alloc_rx_buffers_ps(rx_ring, cleaned_count); + failure = failure || + i40e_alloc_rx_buffers_ps(rx_ring, + cleaned_count); cleaned_count = 0; } @@ -1515,6 +1550,12 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) * DD bit is set. */ dma_rmb(); + /* sync header buffer for reading */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_ring->rx_bi[0].dma, + i * rx_ring->rx_hdr_len, + rx_ring->rx_hdr_len, + DMA_FROM_DEVICE); if (i40e_rx_is_programming_status(qword)) { i40e_clean_programming_status(rx_ring, rx_desc); I40E_RX_INCREMENT(rx_ring, i); @@ -1523,10 +1564,13 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) rx_bi = &rx_ring->rx_bi[i]; skb = rx_bi->skb; if (likely(!skb)) { - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - rx_ring->rx_hdr_len); + skb = __netdev_alloc_skb_ip_align(rx_ring->netdev, + rx_ring->rx_hdr_len, + GFP_ATOMIC | + __GFP_NOWARN); if (!skb) { rx_ring->rx_stats.alloc_buff_failed++; + failure = true; break; } @@ -1534,8 +1578,8 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) skb_record_rx_queue(skb, rx_ring->queue_index); /* we are reusing so sync this buffer for CPU use */ dma_sync_single_range_for_cpu(rx_ring->dev, - rx_bi->dma, - 0, + rx_ring->rx_bi[0].dma, + i * rx_ring->rx_hdr_len, rx_ring->rx_hdr_len, DMA_FROM_DEVICE); } @@ -1553,9 +1597,16 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT; - prefetch(rx_bi->page); + /* sync half-page for reading */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_bi->page_dma, + rx_bi->page_offset, + PAGE_SIZE / 2, + DMA_FROM_DEVICE); + prefetch(page_address(rx_bi->page) + rx_bi->page_offset); rx_bi->skb = NULL; cleaned_count++; + copysize = 0; if (rx_hbo || rx_sph) { int len; @@ -1566,38 +1617,50 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) memcpy(__skb_put(skb, len), rx_bi->hdr_buf, len); } else if (skb->len == 0) { int len; + unsigned char *va = page_address(rx_bi->page) + + rx_bi->page_offset; - len = (rx_packet_len > skb_headlen(skb) ? - skb_headlen(skb) : rx_packet_len); - memcpy(__skb_put(skb, len), - rx_bi->page + rx_bi->page_offset, - len); - rx_bi->page_offset += len; + len = min(rx_packet_len, rx_ring->rx_hdr_len); + memcpy(__skb_put(skb, len), va, len); + copysize = len; rx_packet_len -= len; } - /* Get the rest of the data if this was a header split */ if (rx_packet_len) { - skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, - rx_bi->page, - rx_bi->page_offset, - rx_packet_len); - - skb->len += rx_packet_len; - skb->data_len += rx_packet_len; - skb->truesize += rx_packet_len; - - if ((page_count(rx_bi->page) == 1) && - (page_to_nid(rx_bi->page) == current_node)) - get_page(rx_bi->page); - else + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + rx_bi->page, + rx_bi->page_offset + copysize, + rx_packet_len, I40E_RXBUFFER_2048); + + /* If the page count is more than 2, then both halves + * of the page are used and we need to free it. Do it + * here instead of in the alloc code. Otherwise one + * of the half-pages might be released between now and + * then, and we wouldn't know which one to use. + * Don't call get_page and free_page since those are + * both expensive atomic operations that just change + * the refcount in opposite directions. Just give the + * page to the stack; he can have our refcount. + */ + if (page_count(rx_bi->page) > 2) { + dma_unmap_page(rx_ring->dev, + rx_bi->page_dma, + PAGE_SIZE, + DMA_FROM_DEVICE); rx_bi->page = NULL; + rx_bi->page_dma = 0; + rx_ring->rx_stats.realloc_count++; + } else { + get_page(rx_bi->page); + /* switch to the other half-page here; the + * allocation code programs the right addr + * into HW. If we haven't used this half-page, + * the address won't be changed, and HW can + * just use it next time through. + */ + rx_bi->page_offset ^= PAGE_SIZE / 2; + } - dma_unmap_page(rx_ring->dev, - rx_bi->page_dma, - PAGE_SIZE / 2, - DMA_FROM_DEVICE); - rx_bi->page_dma = 0; } I40E_RX_INCREMENT(rx_ring, i); @@ -1656,7 +1719,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) rx_ring->q_vector->rx.total_packets += total_rx_packets; rx_ring->q_vector->rx.total_bytes += total_rx_bytes; - return total_rx_packets; + return failure ? budget : total_rx_packets; } /** @@ -1674,6 +1737,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) union i40e_rx_desc *rx_desc; u32 rx_error, rx_status; u16 rx_packet_len; + bool failure = false; u8 rx_ptype; u64 qword; u16 i; @@ -1684,7 +1748,9 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) u16 vlan_tag; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= I40E_RX_BUFFER_WRITE) { - i40e_alloc_rx_buffers_1buf(rx_ring, cleaned_count); + failure = failure || + i40e_alloc_rx_buffers_1buf(rx_ring, + cleaned_count); cleaned_count = 0; } @@ -1783,7 +1849,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) rx_ring->q_vector->rx.total_packets += total_rx_packets; rx_ring->q_vector->rx.total_bytes += total_rx_bytes; - return total_rx_packets; + return failure ? budget : total_rx_packets; } static u32 i40e_buildreg_itr(const int type, const u16 itr) @@ -1791,7 +1857,9 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr) u32 val; val = I40E_PFINT_DYN_CTLN_INTENA_MASK | - I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | + /* Don't clear PBA because that can cause lost interrupts that + * came in while we were cleaning/polling + */ (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) | (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT); @@ -1814,6 +1882,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, bool rx = false, tx = false; u32 rxval, txval; int vector; + int idx = q_vector->v_idx; vector = (q_vector->v_idx + vsi->base_vector); @@ -1823,17 +1892,17 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); if (q_vector->itr_countdown > 0 || - (!ITR_IS_DYNAMIC(vsi->rx_itr_setting) && - !ITR_IS_DYNAMIC(vsi->tx_itr_setting))) { + (!ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting) && + !ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting))) { goto enable_int; } - if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) { + if (ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting)) { rx = i40e_set_new_dynamic_itr(&q_vector->rx); rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); } - if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) { + if (ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting)) { tx = i40e_set_new_dynamic_itr(&q_vector->tx); txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); } @@ -1906,7 +1975,8 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) * budget and be more aggressive about cleaning up the Tx descriptors. */ i40e_for_each_ring(ring, q_vector->tx) { - clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit); + clean_complete = clean_complete && + i40e_clean_tx_irq(ring, vsi->work_limit); arm_wb = arm_wb || ring->arm_wb; ring->arm_wb = false; } @@ -1930,7 +2000,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) work_done += cleaned; /* if we didn't clean as many as budgeted, we must be done */ - clean_complete &= (budget_per_ring != cleaned); + clean_complete = clean_complete && (budget_per_ring > cleaned); } /* If work not completed, return budget and polling will return */ @@ -1938,7 +2008,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) tx_only: if (arm_wb) { q_vector->tx.ring[0].tx_stats.tx_force_wb++; - i40e_force_wb(vsi, q_vector); + i40e_enable_wb_on_itr(vsi, q_vector); } return budget; } @@ -1951,20 +2021,7 @@ tx_only: if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { i40e_update_enable_itr(vsi, q_vector); } else { /* Legacy mode */ - struct i40e_hw *hw = &vsi->back->hw; - /* We re-enable the queue 0 cause, but - * don't worry about dynamic_enable - * because we left it on for the other - * possible interrupts during napi - */ - u32 qval = rd32(hw, I40E_QINT_RQCTL(0)) | - I40E_QINT_RQCTL_CAUSE_ENA_MASK; - - wr32(hw, I40E_QINT_RQCTL(0), qval); - qval = rd32(hw, I40E_QINT_TQCTL(0)) | - I40E_QINT_TQCTL_CAUSE_ENA_MASK; - wr32(hw, I40E_QINT_TQCTL(0), qval); - i40e_irq_dynamic_enable_icr0(vsi->back); + i40e_irq_dynamic_enable_icr0(vsi->back, false); } return 0; } @@ -1974,10 +2031,9 @@ tx_only: * @tx_ring: ring to add programming descriptor to * @skb: send buffer * @tx_flags: send tx flags - * @protocol: wire protocol **/ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, - u32 tx_flags, __be16 protocol) + u32 tx_flags) { struct i40e_filter_program_desc *fdir_desc; struct i40e_pf *pf = tx_ring->vsi->back; @@ -1989,6 +2045,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, struct tcphdr *th; unsigned int hlen; u32 flex_ptype, dtype_cmd; + int l4_proto; u16 i; /* make sure ATR is enabled */ @@ -2002,36 +2059,28 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, if (!tx_ring->atr_sample_rate) return; + /* Currently only IPv4/IPv6 with TCP is supported */ if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6))) return; - if (!(tx_flags & I40E_TX_FLAGS_UDP_TUNNEL)) { - /* snag network header to get L4 type and address */ - hdr.network = skb_network_header(skb); + /* snag network header to get L4 type and address */ + hdr.network = (tx_flags & I40E_TX_FLAGS_UDP_TUNNEL) ? + skb_inner_network_header(skb) : skb_network_header(skb); - /* Currently only IPv4/IPv6 with TCP is supported - * access ihl as u8 to avoid unaligned access on ia64 - */ - if (tx_flags & I40E_TX_FLAGS_IPV4) - hlen = (hdr.network[0] & 0x0F) << 2; - else if (protocol == htons(ETH_P_IPV6)) - hlen = sizeof(struct ipv6hdr); - else - return; + /* Note: tx_flags gets modified to reflect inner protocols in + * tx_enable_csum function if encap is enabled. + */ + if (tx_flags & I40E_TX_FLAGS_IPV4) { + /* access ihl as u8 to avoid unaligned access on ia64 */ + hlen = (hdr.network[0] & 0x0F) << 2; + l4_proto = hdr.ipv4->protocol; } else { - hdr.network = skb_inner_network_header(skb); - hlen = skb_inner_network_header_len(skb); + hlen = hdr.network - skb->data; + l4_proto = ipv6_find_hdr(skb, &hlen, IPPROTO_TCP, NULL, NULL); + hlen -= hdr.network - skb->data; } - /* Currently only IPv4/IPv6 with TCP is supported - * Note: tx_flags gets modified to reflect inner protocols in - * tx_enable_csum function if encap is enabled. - */ - if ((tx_flags & I40E_TX_FLAGS_IPV4) && - (hdr.ipv4->protocol != IPPROTO_TCP)) - return; - else if ((tx_flags & I40E_TX_FLAGS_IPV6) && - (hdr.ipv6->nexthdr != IPPROTO_TCP)) + if (l4_proto != IPPROTO_TCP) return; th = (struct tcphdr *)(hdr.network + hlen); @@ -2039,7 +2088,8 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, /* Due to lack of space, no more new filters can be programmed */ if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) return; - if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) { + if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) && + (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) { /* HW ATR eviction will take care of removing filters on FIN * and RST packets. */ @@ -2067,7 +2117,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) & I40E_TXD_FLTR_QW0_QINDEX_MASK; - flex_ptype |= (protocol == htons(ETH_P_IP)) ? + flex_ptype |= (tx_flags & I40E_TX_FLAGS_IPV4) ? (I40E_FILTER_PCTYPE_NONF_IPV4_TCP << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) : (I40E_FILTER_PCTYPE_NONF_IPV6_TCP << @@ -2101,7 +2151,8 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & I40E_TXD_FLTR_QW1_CNTINDEX_MASK; - if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) + if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) && + (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK; fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype); @@ -2206,13 +2257,23 @@ out: static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) { - u32 cd_cmd, cd_tso_len, cd_mss; - struct ipv6hdr *ipv6h; - struct tcphdr *tcph; - struct iphdr *iph; - u32 l4len; + u64 cd_cmd, cd_tso_len, cd_mss; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + struct udphdr *udp; + unsigned char *hdr; + } l4; + u32 paylen, l4_offset; int err; + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + if (!skb_is_gso(skb)) return 0; @@ -2220,35 +2281,60 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, if (err < 0) return err; - iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); - ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); - - if (iph->version == 4) { - tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); - iph->tot_len = 0; - iph->check = 0; - tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - 0, IPPROTO_TCP, 0); - } else if (ipv6h->version == 6) { - tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); - ipv6h->payload_len = 0; - tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, - 0, IPPROTO_TCP, 0); + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + + /* initialize outer IP header fields */ + if (ip.v4->version == 4) { + ip.v4->tot_len = 0; + ip.v4->check = 0; + } else { + ip.v6->payload_len = 0; } - l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb); - *hdr_len = (skb->encapsulation - ? (skb_inner_transport_header(skb) - skb->data) - : skb_transport_offset(skb)) + l4len; + if (skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE | + SKB_GSO_UDP_TUNNEL_CSUM)) { + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) { + /* determine offset of outer transport header */ + l4_offset = l4.hdr - skb->data; + + /* remove payload length from outer checksum */ + paylen = (__force u16)l4.udp->check; + paylen += ntohs(1) * (u16)~(skb->len - l4_offset); + l4.udp->check = ~csum_fold((__force __wsum)paylen); + } + + /* reset pointers to inner headers */ + ip.hdr = skb_inner_network_header(skb); + l4.hdr = skb_inner_transport_header(skb); + + /* initialize inner IP header fields */ + if (ip.v4->version == 4) { + ip.v4->tot_len = 0; + ip.v4->check = 0; + } else { + ip.v6->payload_len = 0; + } + } + + /* determine offset of inner transport header */ + l4_offset = l4.hdr - skb->data; + + /* remove payload length from inner checksum */ + paylen = (__force u16)l4.tcp->check; + paylen += ntohs(1) * (u16)~(skb->len - l4_offset); + l4.tcp->check = ~csum_fold((__force __wsum)paylen); + + /* compute length of segmentation header */ + *hdr_len = (l4.tcp->doff * 4) + l4_offset; /* find the field values */ cd_cmd = I40E_TX_CTX_DESC_TSO; cd_tso_len = skb->len - *hdr_len; cd_mss = skb_shinfo(skb)->gso_size; - *cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | - ((u64)cd_tso_len << - I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | - ((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); + *cd_type_cmd_tso_mss |= (cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | + (cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | + (cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); return 1; } @@ -2303,129 +2389,154 @@ static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb, * @tx_ring: Tx descriptor ring * @cd_tunneling: ptr to context desc bits **/ -static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, - u32 *td_cmd, u32 *td_offset, - struct i40e_ring *tx_ring, - u32 *cd_tunneling) +static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, + u32 *td_cmd, u32 *td_offset, + struct i40e_ring *tx_ring, + u32 *cd_tunneling) { - struct ipv6hdr *this_ipv6_hdr; - unsigned int this_tcp_hdrlen; - struct iphdr *this_ip_hdr; - u32 network_hdr_len; - u8 l4_hdr = 0; - struct udphdr *oudph = NULL; - struct iphdr *oiph = NULL; - u32 l4_tunnel = 0; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + struct udphdr *udp; + unsigned char *hdr; + } l4; + unsigned char *exthdr; + u32 offset, cmd = 0, tunnel = 0; + __be16 frag_off; + u8 l4_proto = 0; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + + /* compute outer L2 header size */ + offset = ((ip.hdr - skb->data) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; if (skb->encapsulation) { - switch (ip_hdr(skb)->protocol) { + /* define outer network header type */ + if (*tx_flags & I40E_TX_FLAGS_IPV4) { + tunnel |= (*tx_flags & I40E_TX_FLAGS_TSO) ? + I40E_TX_CTX_EXT_IP_IPV4 : + I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; + + l4_proto = ip.v4->protocol; + } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { + tunnel |= I40E_TX_CTX_EXT_IP_IPV6; + + exthdr = ip.hdr + sizeof(*ip.v6); + l4_proto = ip.v6->nexthdr; + if (l4.hdr != exthdr) + ipv6_skip_exthdr(skb, exthdr - skb->data, + &l4_proto, &frag_off); + } + + /* compute outer L3 header size */ + tunnel |= ((l4.hdr - ip.hdr) / 4) << + I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT; + + /* switch IP header pointer from outer to inner header */ + ip.hdr = skb_inner_network_header(skb); + + /* define outer transport */ + switch (l4_proto) { case IPPROTO_UDP: - oudph = udp_hdr(skb); - oiph = ip_hdr(skb); - l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING; + tunnel |= I40E_TXD_CTX_UDP_TUNNELING; *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL; break; case IPPROTO_GRE: - l4_tunnel = I40E_TXD_CTX_GRE_TUNNELING; + tunnel |= I40E_TXD_CTX_GRE_TUNNELING; + *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL; break; default: - return; - } - network_hdr_len = skb_inner_network_header_len(skb); - this_ip_hdr = inner_ip_hdr(skb); - this_ipv6_hdr = inner_ipv6_hdr(skb); - this_tcp_hdrlen = inner_tcp_hdrlen(skb); - - if (*tx_flags & I40E_TX_FLAGS_IPV4) { - if (*tx_flags & I40E_TX_FLAGS_TSO) { - *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4; - ip_hdr(skb)->check = 0; - } else { - *cd_tunneling |= - I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; - } - } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { - *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6; if (*tx_flags & I40E_TX_FLAGS_TSO) - ip_hdr(skb)->check = 0; + return -1; + + skb_checksum_help(skb); + return 0; } - /* Now set the ctx descriptor fields */ - *cd_tunneling |= (skb_network_header_len(skb) >> 2) << - I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT | - l4_tunnel | - ((skb_inner_network_offset(skb) - - skb_transport_offset(skb)) >> 1) << - I40E_TXD_CTX_QW0_NATLEN_SHIFT; - if (this_ip_hdr->version == 6) { - *tx_flags &= ~I40E_TX_FLAGS_IPV4; + /* compute tunnel header size */ + tunnel |= ((ip.hdr - l4.hdr) / 2) << + I40E_TXD_CTX_QW0_NATLEN_SHIFT; + + /* indicate if we need to offload outer UDP header */ + if ((*tx_flags & I40E_TX_FLAGS_TSO) && + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) + tunnel |= I40E_TXD_CTX_QW0_L4T_CS_MASK; + + /* record tunnel offload values */ + *cd_tunneling |= tunnel; + + /* switch L4 header pointer from outer to inner */ + l4.hdr = skb_inner_transport_header(skb); + l4_proto = 0; + + /* reset type as we transition from outer to inner headers */ + *tx_flags &= ~(I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6); + if (ip.v4->version == 4) + *tx_flags |= I40E_TX_FLAGS_IPV4; + if (ip.v6->version == 6) *tx_flags |= I40E_TX_FLAGS_IPV6; - } - if ((tx_ring->flags & I40E_TXR_FLAGS_OUTER_UDP_CSUM) && - (l4_tunnel == I40E_TXD_CTX_UDP_TUNNELING) && - (*cd_tunneling & I40E_TXD_CTX_QW0_EXT_IP_MASK)) { - oudph->check = ~csum_tcpudp_magic(oiph->saddr, - oiph->daddr, - (skb->len - skb_transport_offset(skb)), - IPPROTO_UDP, 0); - *cd_tunneling |= I40E_TXD_CTX_QW0_L4T_CS_MASK; - } - } else { - network_hdr_len = skb_network_header_len(skb); - this_ip_hdr = ip_hdr(skb); - this_ipv6_hdr = ipv6_hdr(skb); - this_tcp_hdrlen = tcp_hdrlen(skb); } /* Enable IP checksum offloads */ if (*tx_flags & I40E_TX_FLAGS_IPV4) { - l4_hdr = this_ip_hdr->protocol; + l4_proto = ip.v4->protocol; /* the stack computes the IP header already, the only time we * need the hardware to recompute it is in the case of TSO. */ - if (*tx_flags & I40E_TX_FLAGS_TSO) { - *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; - this_ip_hdr->check = 0; - } else { - *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; - } - /* Now set the td_offset for IP header length */ - *td_offset = (network_hdr_len >> 2) << - I40E_TX_DESC_LENGTH_IPLEN_SHIFT; + cmd |= (*tx_flags & I40E_TX_FLAGS_TSO) ? + I40E_TX_DESC_CMD_IIPT_IPV4_CSUM : + I40E_TX_DESC_CMD_IIPT_IPV4; } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { - l4_hdr = this_ipv6_hdr->nexthdr; - *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; - /* Now set the td_offset for IP header length */ - *td_offset = (network_hdr_len >> 2) << - I40E_TX_DESC_LENGTH_IPLEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; + + exthdr = ip.hdr + sizeof(*ip.v6); + l4_proto = ip.v6->nexthdr; + if (l4.hdr != exthdr) + ipv6_skip_exthdr(skb, exthdr - skb->data, + &l4_proto, &frag_off); } - /* words in MACLEN + dwords in IPLEN + dwords in L4Len */ - *td_offset |= (skb_network_offset(skb) >> 1) << - I40E_TX_DESC_LENGTH_MACLEN_SHIFT; + + /* compute inner L3 header size */ + offset |= ((l4.hdr - ip.hdr) / 4) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; /* Enable L4 checksum offloads */ - switch (l4_hdr) { + switch (l4_proto) { case IPPROTO_TCP: /* enable checksum offloads */ - *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; - *td_offset |= (this_tcp_hdrlen >> 2) << - I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; + offset |= l4.tcp->doff << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; break; case IPPROTO_SCTP: /* enable SCTP checksum offload */ - *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; - *td_offset |= (sizeof(struct sctphdr) >> 2) << - I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; + offset |= (sizeof(struct sctphdr) >> 2) << + I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; break; case IPPROTO_UDP: /* enable UDP checksum offload */ - *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; - *td_offset |= (sizeof(struct udphdr) >> 2) << - I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; + offset |= (sizeof(struct udphdr) >> 2) << + I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; break; default: - break; + if (*tx_flags & I40E_TX_FLAGS_TSO) + return -1; + skb_checksum_help(skb); + return 0; } + + *td_cmd |= cmd; + *td_offset |= offset; + + return 1; } /** @@ -2466,7 +2577,7 @@ static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, * * Returns -EBUSY if a stop is needed, else 0 **/ -static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) +int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) { netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); /* Memory barrier before checking head and tail */ @@ -2483,77 +2594,71 @@ static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) } /** - * i40e_maybe_stop_tx - 1st level check for tx stop conditions - * @tx_ring: the ring to be checked - * @size: the size buffer we want to assure is available - * - * Returns 0 if stop is not needed - **/ -#ifdef I40E_FCOE -inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) -#else -static inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) -#endif -{ - if (likely(I40E_DESC_UNUSED(tx_ring) >= size)) - return 0; - return __i40e_maybe_stop_tx(tx_ring, size); -} - -/** - * i40e_chk_linearize - Check if there are more than 8 fragments per packet + * __i40e_chk_linearize - Check if there are more than 8 fragments per packet * @skb: send buffer - * @tx_flags: collected send information * * Note: Our HW can't scatter-gather more than 8 fragments to build * a packet on the wire and so we need to figure out the cases where we * need to linearize the skb. **/ -static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags) +bool __i40e_chk_linearize(struct sk_buff *skb) { - struct skb_frag_struct *frag; - bool linearize = false; - unsigned int size = 0; - u16 num_frags; - u16 gso_segs; + const struct skb_frag_struct *frag, *stale; + int gso_size, nr_frags, sum; - num_frags = skb_shinfo(skb)->nr_frags; - gso_segs = skb_shinfo(skb)->gso_segs; + /* check to see if TSO is enabled, if so we may get a repreive */ + gso_size = skb_shinfo(skb)->gso_size; + if (unlikely(!gso_size)) + return true; - if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) { - u16 j = 0; + /* no need to check if number of frags is less than 8 */ + nr_frags = skb_shinfo(skb)->nr_frags; + if (nr_frags < I40E_MAX_BUFFER_TXD) + return false; - if (num_frags < (I40E_MAX_BUFFER_TXD)) - goto linearize_chk_done; - /* try the simple math, if we have too many frags per segment */ - if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) > - I40E_MAX_BUFFER_TXD) { - linearize = true; - goto linearize_chk_done; - } - frag = &skb_shinfo(skb)->frags[0]; - /* we might still have more fragments per segment */ - do { - size += skb_frag_size(frag); - frag++; j++; - if ((size >= skb_shinfo(skb)->gso_size) && - (j < I40E_MAX_BUFFER_TXD)) { - size = (size % skb_shinfo(skb)->gso_size); - j = (size) ? 1 : 0; - } - if (j == I40E_MAX_BUFFER_TXD) { - linearize = true; - break; - } - num_frags--; - } while (num_frags); - } else { - if (num_frags >= I40E_MAX_BUFFER_TXD) - linearize = true; + /* We need to walk through the list and validate that each group + * of 6 fragments totals at least gso_size. However we don't need + * to perform such validation on the first or last 6 since the first + * 6 cannot inherit any data from a descriptor before them, and the + * last 6 cannot inherit any data from a descriptor after them. + */ + nr_frags -= I40E_MAX_BUFFER_TXD - 1; + frag = &skb_shinfo(skb)->frags[0]; + + /* Initialize size to the negative value of gso_size minus 1. We + * use this as the worst case scenerio in which the frag ahead + * of us only provides one byte which is why we are limited to 6 + * descriptors for a single transmit as the header and previous + * fragment are already consuming 2 descriptors. + */ + sum = 1 - gso_size; + + /* Add size of frags 1 through 5 to create our initial sum */ + sum += skb_frag_size(++frag); + sum += skb_frag_size(++frag); + sum += skb_frag_size(++frag); + sum += skb_frag_size(++frag); + sum += skb_frag_size(++frag); + + /* Walk through fragments adding latest fragment, testing it, and + * then removing stale fragments from the sum. + */ + stale = &skb_shinfo(skb)->frags[0]; + for (;;) { + sum += skb_frag_size(++frag); + + /* if sum is negative we failed to make sufficient progress */ + if (sum < 0) + return true; + + /* use pre-decrement to avoid processing last fragment */ + if (!--nr_frags) + break; + + sum -= skb_frag_size(++stale); } -linearize_chk_done: - return linearize; + return false; } /** @@ -2760,43 +2865,6 @@ dma_error: } /** - * i40e_xmit_descriptor_count - calculate number of tx descriptors needed - * @skb: send buffer - * @tx_ring: ring to send buffer on - * - * Returns number of data descriptors needed for this skb. Returns 0 to indicate - * there is not enough descriptors available in this ring since we need at least - * one descriptor. - **/ -#ifdef I40E_FCOE -inline int i40e_xmit_descriptor_count(struct sk_buff *skb, - struct i40e_ring *tx_ring) -#else -static inline int i40e_xmit_descriptor_count(struct sk_buff *skb, - struct i40e_ring *tx_ring) -#endif -{ - unsigned int f; - int count = 0; - - /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, - * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, - * + 4 desc gap to avoid the cache line where head is, - * + 1 desc for context descriptor, - * otherwise try next time - */ - for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) - count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); - - count += TXD_USE_COUNT(skb_headlen(skb)); - if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) { - tx_ring->tx_stats.tx_busy++; - return 0; - } - return count; -} - -/** * i40e_xmit_frame_ring - Sends buffer on Tx ring * @skb: send buffer * @tx_ring: ring to send buffer on @@ -2814,14 +2882,30 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, __be16 protocol; u32 td_cmd = 0; u8 hdr_len = 0; + int tso, count; int tsyn; - int tso; /* prefetch the data, we'll need it later */ prefetch(skb->data); - if (0 == i40e_xmit_descriptor_count(skb, tx_ring)) + count = i40e_xmit_descriptor_count(skb); + if (i40e_chk_linearize(skb, count)) { + if (__skb_linearize(skb)) + goto out_drop; + count = TXD_USE_COUNT(skb->len); + tx_ring->tx_stats.tx_linearize++; + } + + /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, + * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, + * + 4 desc gap to avoid the cache line where head is, + * + 1 desc for context descriptor, + * otherwise try next time + */ + if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) { + tx_ring->tx_stats.tx_busy++; return NETDEV_TX_BUSY; + } /* prepare the xmit flags */ if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags)) @@ -2846,29 +2930,22 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, else if (tso) tx_flags |= I40E_TX_FLAGS_TSO; + /* Always offload the checksum, since it's in the data descriptor */ + tso = i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset, + tx_ring, &cd_tunneling); + if (tso < 0) + goto out_drop; + tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss); if (tsyn) tx_flags |= I40E_TX_FLAGS_TSYN; - if (i40e_chk_linearize(skb, tx_flags)) { - if (skb_linearize(skb)) - goto out_drop; - tx_ring->tx_stats.tx_linearize++; - } skb_tx_timestamp(skb); /* always enable CRC insertion offload */ td_cmd |= I40E_TX_DESC_CMD_ICRC; - /* Always offload the checksum, since it's in the data descriptor */ - if (skb->ip_summed == CHECKSUM_PARTIAL) { - tx_flags |= I40E_TX_FLAGS_CSUM; - - i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset, - tx_ring, &cd_tunneling); - } - i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss, cd_tunneling, cd_l2tag2); @@ -2876,7 +2953,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, * * NOTE: this must always be directly before the data descriptor. */ - i40e_atr(tx_ring, skb, tx_flags, protocol); + i40e_atr(tx_ring, skb, tx_flags); i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len, td_cmd, td_offset); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 3f081e25e097..cdd5dc00aec5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -153,7 +153,6 @@ enum i40e_dyn_idx_t { #define DESC_NEEDED (MAX_SKB_FRAGS + 4) #define I40E_MIN_DESC_PENDING 4 -#define I40E_TX_FLAGS_CSUM BIT(0) #define I40E_TX_FLAGS_HW_VLAN BIT(1) #define I40E_TX_FLAGS_SW_VLAN BIT(2) #define I40E_TX_FLAGS_TSO BIT(3) @@ -203,12 +202,15 @@ struct i40e_tx_queue_stats { u64 tx_done_old; u64 tx_linearize; u64 tx_force_wb; + u64 tx_lost_interrupt; }; struct i40e_rx_queue_stats { u64 non_eop_descs; u64 alloc_page_failed; u64 alloc_buff_failed; + u64 page_reuse_count; + u64 realloc_count; }; enum i40e_ring_state_t { @@ -246,6 +248,14 @@ struct i40e_ring { u8 dcb_tc; /* Traffic class of ring */ u8 __iomem *tail; + /* high bit set means dynamic, use accessor routines to read/write. + * hardware only supports 2us resolution for the ITR registers. + * these values always store the USER setting, and must be converted + * before programming to a register. + */ + u16 rx_itr_setting; + u16 tx_itr_setting; + u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ u16 rx_hdr_len; @@ -254,7 +264,6 @@ struct i40e_ring { #define I40E_RX_DTYPE_NO_SPLIT 0 #define I40E_RX_DTYPE_HEADER_SPLIT 1 #define I40E_RX_DTYPE_SPLIT_ALWAYS 2 - u8 hsplit; #define I40E_RX_SPLIT_L2 0x1 #define I40E_RX_SPLIT_IP 0x2 #define I40E_RX_SPLIT_TCP_UDP 0x4 @@ -275,7 +284,6 @@ struct i40e_ring { u16 flags; #define I40E_TXR_FLAGS_WB_ON_ITR BIT(0) -#define I40E_TXR_FLAGS_OUTER_UDP_CSUM BIT(1) #define I40E_TXR_FLAGS_LAST_XMIT_MORE_SET BIT(2) /* stats structs */ @@ -316,8 +324,8 @@ struct i40e_ring_container { #define i40e_for_each_ring(pos, head) \ for (pos = (head).ring; pos != NULL; pos = pos->next) -void i40e_alloc_rx_buffers_ps(struct i40e_ring *rxr, u16 cleaned_count); -void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rxr, u16 cleaned_count); +bool i40e_alloc_rx_buffers_ps(struct i40e_ring *rxr, u16 cleaned_count); +bool i40e_alloc_rx_buffers_1buf(struct i40e_ring *rxr, u16 cleaned_count); void i40e_alloc_rx_headers(struct i40e_ring *rxr); netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev); void i40e_clean_tx_ring(struct i40e_ring *tx_ring); @@ -331,13 +339,13 @@ int i40e_napi_poll(struct napi_struct *napi, int budget); void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, struct i40e_tx_buffer *first, u32 tx_flags, const u8 hdr_len, u32 td_cmd, u32 td_offset); -int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size); -int i40e_xmit_descriptor_count(struct sk_buff *skb, struct i40e_ring *tx_ring); int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, struct i40e_ring *tx_ring, u32 *flags); #endif void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector); -u32 i40e_get_tx_pending(struct i40e_ring *ring); +u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw); +int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size); +bool __i40e_chk_linearize(struct sk_buff *skb); /** * i40e_get_head - Retrieve head from head writeback @@ -352,4 +360,63 @@ static inline u32 i40e_get_head(struct i40e_ring *tx_ring) return le32_to_cpu(*(volatile __le32 *)head); } + +/** + * i40e_xmit_descriptor_count - calculate number of Tx descriptors needed + * @skb: send buffer + * @tx_ring: ring to send buffer on + * + * Returns number of data descriptors needed for this skb. Returns 0 to indicate + * there is not enough descriptors available in this ring since we need at least + * one descriptor. + **/ +static inline int i40e_xmit_descriptor_count(struct sk_buff *skb) +{ + const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + unsigned int nr_frags = skb_shinfo(skb)->nr_frags; + int count = 0, size = skb_headlen(skb); + + for (;;) { + count += TXD_USE_COUNT(size); + + if (!nr_frags--) + break; + + size = skb_frag_size(frag++); + } + + return count; +} + +/** + * i40e_maybe_stop_tx - 1st level check for Tx stop conditions + * @tx_ring: the ring to be checked + * @size: the size buffer we want to assure is available + * + * Returns 0 if stop is not needed + **/ +static inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) +{ + if (likely(I40E_DESC_UNUSED(tx_ring) >= size)) + return 0; + return __i40e_maybe_stop_tx(tx_ring, size); +} + +/** + * i40e_chk_linearize - Check if there are more than 8 fragments per packet + * @skb: send buffer + * @count: number of buffers used + * + * Note: Our HW can't scatter-gather more than 8 fragments to build + * a packet on the wire and so we need to figure out the cases where we + * need to linearize the skb. + **/ +static inline bool i40e_chk_linearize(struct sk_buff *skb, int count) +{ + /* we can only support up to 8 data buffers for a single send */ + if (likely(count <= I40E_MAX_BUFFER_TXD)) + return false; + + return __i40e_chk_linearize(skb); +} #endif /* _I40E_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index dd2da356d9a1..0a0baf71041b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -90,6 +90,22 @@ enum i40e_debug_mask { I40E_DEBUG_ALL = 0xFFFFFFFF }; +#define I40E_MDIO_STCODE 0 +#define I40E_MDIO_OPCODE_ADDRESS 0 +#define I40E_MDIO_OPCODE_WRITE I40E_MASK(1, \ + I40E_GLGEN_MSCA_OPCODE_SHIFT) +#define I40E_MDIO_OPCODE_READ_INC_ADDR I40E_MASK(2, \ + I40E_GLGEN_MSCA_OPCODE_SHIFT) +#define I40E_MDIO_OPCODE_READ I40E_MASK(3, \ + I40E_GLGEN_MSCA_OPCODE_SHIFT) + +#define I40E_PHY_COM_REG_PAGE 0x1E +#define I40E_PHY_LED_LINK_MODE_MASK 0xF0 +#define I40E_PHY_LED_MANUAL_ON 0x100 +#define I40E_PHY_LED_PROV_REG_1 0xC430 +#define I40E_PHY_LED_MODE_MASK 0xFFFF +#define I40E_PHY_LED_MODE_ORIG 0x80000000 + /* These are structs for managing the hardware information and the operations. * The structures of function pointers are filled out at init time when we * know for sure exactly which hardware we're working with. This gives us the @@ -1098,6 +1114,10 @@ enum i40e_filter_program_desc_pcmd { I40E_TXD_FLTR_QW1_CMD_SHIFT) #define I40E_TXD_FLTR_QW1_ATR_MASK BIT_ULL(I40E_TXD_FLTR_QW1_ATR_SHIFT) +#define I40E_TXD_FLTR_QW1_ATR_SHIFT (0xEULL + \ + I40E_TXD_FLTR_QW1_CMD_SHIFT) +#define I40E_TXD_FLTR_QW1_ATR_MASK BIT_ULL(I40E_TXD_FLTR_QW1_ATR_SHIFT) + #define I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT 20 #define I40E_TXD_FLTR_QW1_CNTINDEX_MASK (0x1FFUL << \ I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 63e62f9aec6e..acd2693a4e97 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2015 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -461,7 +461,7 @@ static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id, rx_ctx.hbuff = info->hdr_size >> I40E_RXQ_CTX_HBUFF_SHIFT; /* set splitalways mode 10b */ - rx_ctx.dtype = 0x2; + rx_ctx.dtype = I40E_RX_DTYPE_HEADER_SPLIT; } /* databuffer length validation */ @@ -602,8 +602,8 @@ static void i40e_enable_vf_mappings(struct i40e_vf *vf) * that VF queues be mapped using this method, even when they are * contiguous in real life */ - wr32(hw, I40E_VSILAN_QBASE(vf->lan_vsi_id), - I40E_VSILAN_QBASE_VSIQTABLE_ENA_MASK); + i40e_write_rx_ctl(hw, I40E_VSILAN_QBASE(vf->lan_vsi_id), + I40E_VSILAN_QBASE_VSIQTABLE_ENA_MASK); /* enable VF vplan_qtable mappings */ reg = I40E_VPLAN_MAPENA_TXRX_ENA_MASK; @@ -630,7 +630,8 @@ static void i40e_enable_vf_mappings(struct i40e_vf *vf) (j * 2) + 1); reg |= qid << 16; } - wr32(hw, I40E_VSILAN_QTABLE(j, vf->lan_vsi_id), reg); + i40e_write_rx_ctl(hw, I40E_VSILAN_QTABLE(j, vf->lan_vsi_id), + reg); } i40e_flush(hw); @@ -980,7 +981,7 @@ err_alloc: i40e_free_vfs(pf); err_iov: /* Re-enable interrupt 0. */ - i40e_irq_dynamic_enable_icr0(pf); + i40e_irq_dynamic_enable_icr0(pf, false); return ret; } @@ -1213,9 +1214,21 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_RSS_REG; } + if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) { + if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) + vfres->vf_offload_flags |= + I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2; + } + if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING) vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING; + if (pf->flags & I40E_FLAG_WB_ON_ITR_CAPABLE) { + if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR) + vfres->vf_offload_flags |= + I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR; + } + vfres->num_vsis = num_vsis; vfres->num_queue_pairs = vf->num_queue_pairs; vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf; @@ -2025,7 +2038,11 @@ int i40e_vc_process_vflr_event(struct i40e_pf *pf) if (!test_bit(__I40E_VFLR_EVENT_PENDING, &pf->state)) return 0; - /* re-enable vflr interrupt cause */ + /* Re-enable the VFLR interrupt cause here, before looking for which + * VF got reset. Otherwise, if another VF gets a reset while the + * first one is being processed, that interrupt will be lost, and + * that VF will be stuck in reset forever. + */ reg = rd32(hw, I40E_PFINT_ICR0_ENA); reg |= I40E_PFINT_ICR0_ENA_VFLR_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); @@ -2186,6 +2203,8 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, * and then reloading the VF driver. */ i40e_vc_disable_vf(pf, vf); + /* During reset the VF got a new VSI, so refresh the pointer. */ + vsi = pf->vsi[vf->lan_vsi_idx]; } /* Check for condition where there was already a port VLAN ID @@ -2294,6 +2313,9 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, case I40E_LINK_SPEED_40GB: speed = 40000; break; + case I40E_LINK_SPEED_20GB: + speed = 20000; + break; case I40E_LINK_SPEED_10GB: speed = 10000; break; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index da44995def42..e74642a0c42e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -91,8 +91,8 @@ struct i40e_vf { * When assigned, these will be non-zero, because VSI 0 is always * the main LAN VSI for the PF. */ - u8 lan_vsi_idx; /* index into PF struct */ - u8 lan_vsi_id; /* ID as used by firmware */ + u16 lan_vsi_idx; /* index into PF struct */ + u16 lan_vsi_id; /* ID as used by firmware */ u8 num_queue_pairs; /* num of qps assigned to VF vsis */ u64 num_mdd_events; /* num of mdd events detected */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c index 3f65e39b3fe4..44f7ed7583dd 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -887,6 +887,9 @@ i40e_status i40evf_clean_arq_element(struct i40e_hw *hw, u16 flags; u16 ntu; + /* pre-clean the event info */ + memset(&e->desc, 0, sizeof(e->desc)); + /* take the lock before we start messing with the ring */ mutex_lock(&hw->aq.arq_mutex); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index f5b2b369dc7c..aad8d6277110 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -34,7 +34,7 @@ */ #define I40E_FW_API_VERSION_MAJOR 0x0001 -#define I40E_FW_API_VERSION_MINOR 0x0004 +#define I40E_FW_API_VERSION_MINOR 0x0005 struct i40e_aq_desc { __le16 flags; @@ -145,6 +145,9 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_remove_statistics = 0x0202, i40e_aqc_opc_set_port_parameters = 0x0203, i40e_aqc_opc_get_switch_resource_alloc = 0x0204, + i40e_aqc_opc_set_switch_config = 0x0205, + i40e_aqc_opc_rx_ctl_reg_read = 0x0206, + i40e_aqc_opc_rx_ctl_reg_write = 0x0207, i40e_aqc_opc_add_vsi = 0x0210, i40e_aqc_opc_update_vsi_parameters = 0x0211, @@ -220,6 +223,7 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_get_phy_wol_caps = 0x0621, i40e_aqc_opc_set_phy_debug = 0x0622, i40e_aqc_opc_upload_ext_phy_fm = 0x0625, + i40e_aqc_opc_run_phy_activity = 0x0626, /* NVM commands */ i40e_aqc_opc_nvm_read = 0x0701, @@ -228,6 +232,7 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_nvm_config_read = 0x0704, i40e_aqc_opc_nvm_config_write = 0x0705, i40e_aqc_opc_oem_post_update = 0x0720, + i40e_aqc_opc_thermal_sensor = 0x0721, /* virtualization commands */ i40e_aqc_opc_send_msg_to_pf = 0x0801, @@ -399,6 +404,7 @@ struct i40e_aqc_list_capabilities_element_resp { #define I40E_AQ_CAP_ID_OS2BMC_CAP 0x0004 #define I40E_AQ_CAP_ID_FUNCTIONS_VALID 0x0005 #define I40E_AQ_CAP_ID_ALTERNATE_RAM 0x0006 +#define I40E_AQ_CAP_ID_WOL_AND_PROXY 0x0008 #define I40E_AQ_CAP_ID_SRIOV 0x0012 #define I40E_AQ_CAP_ID_VF 0x0013 #define I40E_AQ_CAP_ID_VMDQ 0x0014 @@ -419,6 +425,7 @@ struct i40e_aqc_list_capabilities_element_resp { #define I40E_AQ_CAP_ID_LED 0x0061 #define I40E_AQ_CAP_ID_SDP 0x0062 #define I40E_AQ_CAP_ID_MDIO 0x0063 +#define I40E_AQ_CAP_ID_WSR_PROT 0x0064 #define I40E_AQ_CAP_ID_FLEX10 0x00F1 #define I40E_AQ_CAP_ID_CEM 0x00F2 @@ -677,6 +684,31 @@ struct i40e_aqc_switch_resource_alloc_element_resp { I40E_CHECK_STRUCT_LEN(0x10, i40e_aqc_switch_resource_alloc_element_resp); +/* Set Switch Configuration (direct 0x0205) */ +struct i40e_aqc_set_switch_config { + __le16 flags; +#define I40E_AQ_SET_SWITCH_CFG_PROMISC 0x0001 +#define I40E_AQ_SET_SWITCH_CFG_L2_FILTER 0x0002 + __le16 valid_flags; + u8 reserved[12]; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_set_switch_config); + +/* Read Receive control registers (direct 0x0206) + * Write Receive control registers (direct 0x0207) + * used for accessing Rx control registers that can be + * slow and need special handling when under high Rx load + */ +struct i40e_aqc_rx_ctl_reg_read_write { + __le32 reserved1; + __le32 address; + __le32 reserved2; + __le32 value; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_rx_ctl_reg_read_write); + /* Add VSI (indirect 0x0210) * this indirect command uses struct i40e_aqc_vsi_properties_data * as the indirect buffer (128 bytes) @@ -903,7 +935,8 @@ struct i40e_aqc_add_veb { I40E_AQC_ADD_VEB_PORT_TYPE_SHIFT) #define I40E_AQC_ADD_VEB_PORT_TYPE_DEFAULT 0x2 #define I40E_AQC_ADD_VEB_PORT_TYPE_DATA 0x4 -#define I40E_AQC_ADD_VEB_ENABLE_L2_FILTER 0x8 +#define I40E_AQC_ADD_VEB_ENABLE_L2_FILTER 0x8 /* deprecated */ +#define I40E_AQC_ADD_VEB_ENABLE_DISABLE_STATS 0x10 u8 enable_tcs; u8 reserved[9]; }; @@ -970,6 +1003,7 @@ struct i40e_aqc_add_macvlan_element_data { #define I40E_AQC_MACVLAN_ADD_HASH_MATCH 0x0002 #define I40E_AQC_MACVLAN_ADD_IGNORE_VLAN 0x0004 #define I40E_AQC_MACVLAN_ADD_TO_QUEUE 0x0008 +#define I40E_AQC_MACVLAN_ADD_USE_SHARED_MAC 0x0010 __le16 queue_number; #define I40E_AQC_MACVLAN_CMD_QUEUE_SHIFT 0 #define I40E_AQC_MACVLAN_CMD_QUEUE_MASK (0x7FF << \ @@ -1066,6 +1100,7 @@ struct i40e_aqc_set_vsi_promiscuous_modes { #define I40E_AQC_SET_VSI_PROMISC_BROADCAST 0x04 #define I40E_AQC_SET_VSI_DEFAULT 0x08 #define I40E_AQC_SET_VSI_PROMISC_VLAN 0x10 +#define I40E_AQC_SET_VSI_PROMISC_TX 0x8000 __le16 seid; #define I40E_AQC_VSI_PROM_CMD_SEID_MASK 0x3FF __le16 vlan_tag; @@ -1254,10 +1289,16 @@ struct i40e_aqc_add_remove_cloud_filters_element_data { #define I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT 9 #define I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK 0x1E00 -#define I40E_AQC_ADD_CLOUD_TNL_TYPE_XVLAN 0 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_VXLAN 0 #define I40E_AQC_ADD_CLOUD_TNL_TYPE_NVGRE_OMAC 1 -#define I40E_AQC_ADD_CLOUD_TNL_TYPE_NGE 2 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_GENEVE 2 #define I40E_AQC_ADD_CLOUD_TNL_TYPE_IP 3 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_RESERVED 4 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_VXLAN_GPE 5 + +#define I40E_AQC_ADD_CLOUD_FLAGS_SHARED_OUTER_MAC 0x2000 +#define I40E_AQC_ADD_CLOUD_FLAGS_SHARED_INNER_MAC 0x4000 +#define I40E_AQC_ADD_CLOUD_FLAGS_SHARED_OUTER_IP 0x8000 __le32 tenant_id; u8 reserved[4]; @@ -1752,7 +1793,12 @@ struct i40e_aqc_get_link_status { u8 config; #define I40E_AQ_CONFIG_CRC_ENA 0x04 #define I40E_AQ_CONFIG_PACING_MASK 0x78 - u8 reserved[5]; + u8 external_power_ability; +#define I40E_AQ_LINK_POWER_CLASS_1 0x00 +#define I40E_AQ_LINK_POWER_CLASS_2 0x01 +#define I40E_AQ_LINK_POWER_CLASS_3 0x02 +#define I40E_AQ_LINK_POWER_CLASS_4 0x03 + u8 reserved[4]; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_get_link_status); @@ -1820,6 +1866,18 @@ enum i40e_aq_phy_reg_type { I40E_AQC_PHY_REG_EXERNAL_MODULE = 0x3 }; +/* Run PHY Activity (0x0626) */ +struct i40e_aqc_run_phy_activity { + __le16 activity_id; + u8 flags; + u8 reserved1; + __le32 control; + __le32 data; + u8 reserved2[4]; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_run_phy_activity); + /* NVM Read command (indirect 0x0701) * NVM Erase commands (direct 0x0702) * NVM Update commands (indirect 0x0703) @@ -1909,6 +1967,22 @@ struct i40e_aqc_nvm_oem_post_update_buffer { I40E_CHECK_STRUCT_LEN(0x28, i40e_aqc_nvm_oem_post_update_buffer); +/* Thermal Sensor (indirect 0x0721) + * read or set thermal sensor configs and values + * takes a sensor and command specific data buffer, not detailed here + */ +struct i40e_aqc_thermal_sensor { + u8 sensor_action; +#define I40E_AQ_THERMAL_SENSOR_READ_CONFIG 0 +#define I40E_AQ_THERMAL_SENSOR_SET_CONFIG 1 +#define I40E_AQ_THERMAL_SENSOR_READ_TEMP 2 + u8 reserved[7]; + __le32 addr_high; + __le32 addr_low; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_thermal_sensor); + /* Send to PF command (indirect 0x0801) id is only used by PF * Send to VF command (indirect 0x0802) id is only used by PF * Send to Peer PF command (indirect 0x0803) @@ -2083,6 +2157,7 @@ struct i40e_aqc_add_udp_tunnel { #define I40E_AQC_TUNNEL_TYPE_VXLAN 0x00 #define I40E_AQC_TUNNEL_TYPE_NGE 0x01 #define I40E_AQC_TUNNEL_TYPE_TEREDO 0x10 +#define I40E_AQC_TUNNEL_TYPE_VXLAN_GPE 0x11 u8 reserved1[10]; }; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c index 938783e0baac..771ac6ad8cda 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_common.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c @@ -904,6 +904,131 @@ struct i40e_rx_ptype_decoded i40evf_ptype_lookup[] = { }; /** + * i40evf_aq_rx_ctl_read_register - use FW to read from an Rx control register + * @hw: pointer to the hw struct + * @reg_addr: register address + * @reg_val: ptr to register value + * @cmd_details: pointer to command details structure or NULL + * + * Use the firmware to read the Rx control register, + * especially useful if the Rx unit is under heavy pressure + **/ +i40e_status i40evf_aq_rx_ctl_read_register(struct i40e_hw *hw, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_rx_ctl_reg_read_write *cmd_resp = + (struct i40e_aqc_rx_ctl_reg_read_write *)&desc.params.raw; + i40e_status status; + + if (!reg_val) + return I40E_ERR_PARAM; + + i40evf_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_rx_ctl_reg_read); + + cmd_resp->address = cpu_to_le32(reg_addr); + + status = i40evf_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + if (status == 0) + *reg_val = le32_to_cpu(cmd_resp->value); + + return status; +} + +/** + * i40evf_read_rx_ctl - read from an Rx control register + * @hw: pointer to the hw struct + * @reg_addr: register address + **/ +u32 i40evf_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr) +{ + i40e_status status = 0; + bool use_register; + int retry = 5; + u32 val = 0; + + use_register = (hw->aq.api_maj_ver == 1) && (hw->aq.api_min_ver < 5); + if (!use_register) { +do_retry: + status = i40evf_aq_rx_ctl_read_register(hw, reg_addr, + &val, NULL); + if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN && retry) { + usleep_range(1000, 2000); + retry--; + goto do_retry; + } + } + + /* if the AQ access failed, try the old-fashioned way */ + if (status || use_register) + val = rd32(hw, reg_addr); + + return val; +} + +/** + * i40evf_aq_rx_ctl_write_register + * @hw: pointer to the hw struct + * @reg_addr: register address + * @reg_val: register value + * @cmd_details: pointer to command details structure or NULL + * + * Use the firmware to write to an Rx control register, + * especially useful if the Rx unit is under heavy pressure + **/ +i40e_status i40evf_aq_rx_ctl_write_register(struct i40e_hw *hw, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_rx_ctl_reg_read_write *cmd = + (struct i40e_aqc_rx_ctl_reg_read_write *)&desc.params.raw; + i40e_status status; + + i40evf_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_rx_ctl_reg_write); + + cmd->address = cpu_to_le32(reg_addr); + cmd->value = cpu_to_le32(reg_val); + + status = i40evf_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + +/** + * i40evf_write_rx_ctl - write to an Rx control register + * @hw: pointer to the hw struct + * @reg_addr: register address + * @reg_val: register value + **/ +void i40evf_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val) +{ + i40e_status status = 0; + bool use_register; + int retry = 5; + + use_register = (hw->aq.api_maj_ver == 1) && (hw->aq.api_min_ver < 5); + if (!use_register) { +do_retry: + status = i40evf_aq_rx_ctl_write_register(hw, reg_addr, + reg_val, NULL); + if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN && retry) { + usleep_range(1000, 2000); + retry--; + goto do_retry; + } + } + + /* if the AQ access failed, try the old-fashioned way */ + if (status || use_register) + wr32(hw, reg_addr, reg_val); +} + +/** * i40e_aq_send_msg_to_pf * @hw: pointer to the hardware structure * @v_opcode: opcodes for VF-PF communication diff --git a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h index cbd9a1b078ab..d89d52109efa 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h @@ -103,4 +103,19 @@ i40e_status i40e_aq_add_rem_control_packet_filter(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details); void i40e_add_filter_to_drop_tx_flow_control_frames(struct i40e_hw *hw, u16 vsi_seid); +i40e_status i40evf_aq_rx_ctl_read_register(struct i40e_hw *hw, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details); +u32 i40evf_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr); +i40e_status i40evf_aq_rx_ctl_write_register(struct i40e_hw *hw, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details); +void i40evf_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val); +i40e_status i40e_read_phy_register(struct i40e_hw *hw, u8 page, + u16 reg, u8 phy_addr, u16 *value); +i40e_status i40e_write_phy_register(struct i40e_hw *hw, u8 page, + u16 reg, u8 phy_addr, u16 value); +u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num); +i40e_status i40e_blink_phy_link_led(struct i40e_hw *hw, + u32 time, u32 interval); #endif /* _I40E_PROTOTYPE_H_ */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 7a00657dacda..ebcc25c05796 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -129,15 +129,19 @@ void i40evf_free_tx_resources(struct i40e_ring *tx_ring) /** * i40evf_get_tx_pending - how many Tx descriptors not processed * @tx_ring: the ring of descriptors + * @in_sw: is tx_pending being checked in SW or HW * * Since there is no access to the ring head register * in XL710, we need to use our local copies **/ -u32 i40evf_get_tx_pending(struct i40e_ring *ring) +u32 i40evf_get_tx_pending(struct i40e_ring *ring, bool in_sw) { u32 head, tail; - head = i40e_get_head(ring); + if (!in_sw) + head = i40e_get_head(ring); + else + head = ring->next_to_clean; tail = readl(ring->tail); if (head != tail) @@ -252,6 +256,22 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) tx_ring->q_vector->tx.total_bytes += total_bytes; tx_ring->q_vector->tx.total_packets += total_packets; + if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) { + unsigned int j = 0; + /* check to see if there are < 4 descriptors + * waiting to be written back, then kick the hardware to force + * them to be written back in case we stay in NAPI. + * In this mode on X722 we do not enable Interrupt. + */ + j = i40evf_get_tx_pending(tx_ring, false); + + if (budget && + ((j / (WB_STRIDE + 1)) == 0) && (j > 0) && + !test_bit(__I40E_DOWN, &tx_ring->vsi->state) && + (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) + tx_ring->arm_wb = true; + } + netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index), total_packets, total_bytes); @@ -276,39 +296,49 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) } /** - * i40evf_force_wb -Arm hardware to do a wb on noncache aligned descriptors + * i40evf_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled * @vsi: the VSI we care about - * @q_vector: the vector on which to force writeback + * @q_vector: the vector on which to enable writeback * **/ -static void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) +static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi, + struct i40e_q_vector *q_vector) { u16 flags = q_vector->tx.ring[0].flags; + u32 val; - if (flags & I40E_TXR_FLAGS_WB_ON_ITR) { - u32 val; + if (!(flags & I40E_TXR_FLAGS_WB_ON_ITR)) + return; - if (q_vector->arm_wb_state) - return; + if (q_vector->arm_wb_state) + return; - val = I40E_VFINT_DYN_CTLN1_WB_ON_ITR_MASK; + val = I40E_VFINT_DYN_CTLN1_WB_ON_ITR_MASK | + I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK; /* set noitr */ - wr32(&vsi->back->hw, - I40E_VFINT_DYN_CTLN1(q_vector->v_idx + - vsi->base_vector - 1), - val); - q_vector->arm_wb_state = true; - } else { - u32 val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | - I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */ - I40E_VFINT_DYN_CTLN1_SWINT_TRIG_MASK | - I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK; - /* allow 00 to be written to the index */ - - wr32(&vsi->back->hw, - I40E_VFINT_DYN_CTLN1(q_vector->v_idx + - vsi->base_vector - 1), val); - } + wr32(&vsi->back->hw, + I40E_VFINT_DYN_CTLN1(q_vector->v_idx + + vsi->base_vector - 1), val); + q_vector->arm_wb_state = true; +} + +/** + * i40evf_force_wb - Issue SW Interrupt so HW does a wb + * @vsi: the VSI we care about + * @q_vector: the vector on which to force writeback + * + **/ +void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) +{ + u32 val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | + I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */ + I40E_VFINT_DYN_CTLN1_SWINT_TRIG_MASK | + I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK + /* allow 00 to be written to the index */; + + wr32(&vsi->back->hw, + I40E_VFINT_DYN_CTLN1(q_vector->v_idx + vsi->base_vector - 1), + val); } /** @@ -506,7 +536,7 @@ void i40evf_clean_rx_ring(struct i40e_ring *rx_ring) if (rx_bi->page_dma) { dma_unmap_page(dev, rx_bi->page_dma, - PAGE_SIZE / 2, + PAGE_SIZE, DMA_FROM_DEVICE); rx_bi->page_dma = 0; } @@ -641,16 +671,19 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val) * i40evf_alloc_rx_buffers_ps - Replace used receive buffers; packet split * @rx_ring: ring to place buffers on * @cleaned_count: number of buffers to replace + * + * Returns true if any errors on allocation **/ -void i40evf_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count) +bool i40evf_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count) { u16 i = rx_ring->next_to_use; union i40e_rx_desc *rx_desc; struct i40e_rx_buffer *bi; + const int current_node = numa_node_id(); /* do nothing if no valid netdev defined */ if (!rx_ring->netdev || !cleaned_count) - return; + return false; while (cleaned_count--) { rx_desc = I40E_RX_DESC(rx_ring, i); @@ -658,56 +691,79 @@ void i40evf_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count) if (bi->skb) /* desc is in use */ goto no_buffers; + + /* If we've been moved to a different NUMA node, release the + * page so we can get a new one on the current node. + */ + if (bi->page && page_to_nid(bi->page) != current_node) { + dma_unmap_page(rx_ring->dev, + bi->page_dma, + PAGE_SIZE, + DMA_FROM_DEVICE); + __free_page(bi->page); + bi->page = NULL; + bi->page_dma = 0; + rx_ring->rx_stats.realloc_count++; + } else if (bi->page) { + rx_ring->rx_stats.page_reuse_count++; + } + if (!bi->page) { bi->page = alloc_page(GFP_ATOMIC); if (!bi->page) { rx_ring->rx_stats.alloc_page_failed++; goto no_buffers; } - } - - if (!bi->page_dma) { - /* use a half page if we're re-using */ - bi->page_offset ^= PAGE_SIZE / 2; bi->page_dma = dma_map_page(rx_ring->dev, bi->page, - bi->page_offset, - PAGE_SIZE / 2, + 0, + PAGE_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(rx_ring->dev, - bi->page_dma)) { + if (dma_mapping_error(rx_ring->dev, bi->page_dma)) { rx_ring->rx_stats.alloc_page_failed++; + __free_page(bi->page); + bi->page = NULL; bi->page_dma = 0; + bi->page_offset = 0; goto no_buffers; } + bi->page_offset = 0; } - dma_sync_single_range_for_device(rx_ring->dev, - bi->dma, - 0, - rx_ring->rx_hdr_len, - DMA_FROM_DEVICE); /* Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info. */ - rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma); + rx_desc->read.pkt_addr = + cpu_to_le64(bi->page_dma + bi->page_offset); rx_desc->read.hdr_addr = cpu_to_le64(bi->dma); i++; if (i == rx_ring->count) i = 0; } + if (rx_ring->next_to_use != i) + i40e_release_rx_desc(rx_ring, i); + + return false; + no_buffers: if (rx_ring->next_to_use != i) i40e_release_rx_desc(rx_ring, i); + + /* make sure to come back via polling to try again after + * allocation failure + */ + return true; } /** * i40evf_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer * @rx_ring: ring to place buffers on * @cleaned_count: number of buffers to replace + * + * Returns true if any errors on allocation **/ -void i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) +bool i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) { u16 i = rx_ring->next_to_use; union i40e_rx_desc *rx_desc; @@ -716,7 +772,7 @@ void i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) /* do nothing if no valid netdev defined */ if (!rx_ring->netdev || !cleaned_count) - return; + return false; while (cleaned_count--) { rx_desc = I40E_RX_DESC(rx_ring, i); @@ -724,8 +780,10 @@ void i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) skb = bi->skb; if (!skb) { - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - rx_ring->rx_buf_len); + skb = __netdev_alloc_skb_ip_align(rx_ring->netdev, + rx_ring->rx_buf_len, + GFP_ATOMIC | + __GFP_NOWARN); if (!skb) { rx_ring->rx_stats.alloc_buff_failed++; goto no_buffers; @@ -743,6 +801,8 @@ void i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) if (dma_mapping_error(rx_ring->dev, bi->dma)) { rx_ring->rx_stats.alloc_buff_failed++; bi->dma = 0; + dev_kfree_skb(bi->skb); + bi->skb = NULL; goto no_buffers; } } @@ -754,9 +814,19 @@ void i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) i = 0; } + if (rx_ring->next_to_use != i) + i40e_release_rx_desc(rx_ring, i); + + return false; + no_buffers: if (rx_ring->next_to_use != i) i40e_release_rx_desc(rx_ring, i); + + /* make sure to come back via polling to try again after + * allocation failure + */ + return true; } /** @@ -791,16 +861,7 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, u16 rx_ptype) { struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype); - bool ipv4 = false, ipv6 = false; - bool ipv4_tunnel, ipv6_tunnel; - __wsum rx_udp_csum; - struct iphdr *iph; - __sum16 csum; - - ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) && - (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4); - ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) && - (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4); + bool ipv4, ipv6, ipv4_tunnel, ipv6_tunnel; skb->ip_summed = CHECKSUM_NONE; @@ -816,12 +877,10 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, if (!(decoded.known && decoded.outer_ip)) return; - if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && - decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) - ipv4 = true; - else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && - decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) - ipv6 = true; + ipv4 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && + (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4); + ipv6 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && + (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6); if (ipv4 && (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) | @@ -845,36 +904,17 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT)) return; - /* If VXLAN traffic has an outer UDPv4 checksum we need to check - * it in the driver, hardware does not do it for us. - * Since L3L4P bit was set we assume a valid IHL value (>=5) - * so the total length of IPv4 header is IHL*4 bytes - * The UDP_0 bit *may* bet set if the *inner* header is UDP + /* The hardware supported by this driver does not validate outer + * checksums for tunneled VXLAN or GENEVE frames. I don't agree + * with it but the specification states that you "MAY validate", it + * doesn't make it a hard requirement so if we have validated the + * inner checksum report CHECKSUM_UNNECESSARY. */ - if (ipv4_tunnel) { - skb->transport_header = skb->mac_header + - sizeof(struct ethhdr) + - (ip_hdr(skb)->ihl * 4); - - /* Add 4 bytes for VLAN tagged packets */ - skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) || - skb->protocol == htons(ETH_P_8021AD)) - ? VLAN_HLEN : 0; - - if ((ip_hdr(skb)->protocol == IPPROTO_UDP) && - (udp_hdr(skb)->check != 0)) { - rx_udp_csum = udp_csum(skb); - iph = ip_hdr(skb); - csum = csum_tcpudp_magic(iph->saddr, iph->daddr, - (skb->len - - skb_transport_offset(skb)), - IPPROTO_UDP, rx_udp_csum); - - if (udp_hdr(skb)->check != csum) - goto checksum_fail; - - } /* else its GRE and so no outer UDP header */ - } + + ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) && + (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4); + ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) && + (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4); skb->ip_summed = CHECKSUM_UNNECESSARY; skb->csum_level = ipv4_tunnel || ipv6_tunnel; @@ -939,18 +979,19 @@ static inline void i40e_rx_hash(struct i40e_ring *ring, * * Returns true if there's any budget left (e.g. the clean is finished) **/ -static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) +static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, const int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo; u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); - const int current_node = numa_mem_id(); struct i40e_vsi *vsi = rx_ring->vsi; u16 i = rx_ring->next_to_clean; union i40e_rx_desc *rx_desc; u32 rx_error, rx_status; + bool failure = false; u8 rx_ptype; u64 qword; + u32 copysize; do { struct i40e_rx_buffer *rx_bi; @@ -958,7 +999,9 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) u16 vlan_tag; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= I40E_RX_BUFFER_WRITE) { - i40evf_alloc_rx_buffers_ps(rx_ring, cleaned_count); + failure = failure || + i40evf_alloc_rx_buffers_ps(rx_ring, + cleaned_count); cleaned_count = 0; } @@ -976,13 +1019,22 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) * DD bit is set. */ dma_rmb(); + /* sync header buffer for reading */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_ring->rx_bi[0].dma, + i * rx_ring->rx_hdr_len, + rx_ring->rx_hdr_len, + DMA_FROM_DEVICE); rx_bi = &rx_ring->rx_bi[i]; skb = rx_bi->skb; if (likely(!skb)) { - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - rx_ring->rx_hdr_len); + skb = __netdev_alloc_skb_ip_align(rx_ring->netdev, + rx_ring->rx_hdr_len, + GFP_ATOMIC | + __GFP_NOWARN); if (!skb) { rx_ring->rx_stats.alloc_buff_failed++; + failure = true; break; } @@ -990,8 +1042,8 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) skb_record_rx_queue(skb, rx_ring->queue_index); /* we are reusing so sync this buffer for CPU use */ dma_sync_single_range_for_cpu(rx_ring->dev, - rx_bi->dma, - 0, + rx_ring->rx_bi[0].dma, + i * rx_ring->rx_hdr_len, rx_ring->rx_hdr_len, DMA_FROM_DEVICE); } @@ -1009,9 +1061,16 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT; - prefetch(rx_bi->page); + /* sync half-page for reading */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_bi->page_dma, + rx_bi->page_offset, + PAGE_SIZE / 2, + DMA_FROM_DEVICE); + prefetch(page_address(rx_bi->page) + rx_bi->page_offset); rx_bi->skb = NULL; cleaned_count++; + copysize = 0; if (rx_hbo || rx_sph) { int len; @@ -1022,38 +1081,50 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) memcpy(__skb_put(skb, len), rx_bi->hdr_buf, len); } else if (skb->len == 0) { int len; + unsigned char *va = page_address(rx_bi->page) + + rx_bi->page_offset; - len = (rx_packet_len > skb_headlen(skb) ? - skb_headlen(skb) : rx_packet_len); - memcpy(__skb_put(skb, len), - rx_bi->page + rx_bi->page_offset, - len); - rx_bi->page_offset += len; + len = min(rx_packet_len, rx_ring->rx_hdr_len); + memcpy(__skb_put(skb, len), va, len); + copysize = len; rx_packet_len -= len; } - /* Get the rest of the data if this was a header split */ if (rx_packet_len) { - skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, - rx_bi->page, - rx_bi->page_offset, - rx_packet_len); - - skb->len += rx_packet_len; - skb->data_len += rx_packet_len; - skb->truesize += rx_packet_len; - - if ((page_count(rx_bi->page) == 1) && - (page_to_nid(rx_bi->page) == current_node)) - get_page(rx_bi->page); - else + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + rx_bi->page, + rx_bi->page_offset + copysize, + rx_packet_len, I40E_RXBUFFER_2048); + + /* If the page count is more than 2, then both halves + * of the page are used and we need to free it. Do it + * here instead of in the alloc code. Otherwise one + * of the half-pages might be released between now and + * then, and we wouldn't know which one to use. + * Don't call get_page and free_page since those are + * both expensive atomic operations that just change + * the refcount in opposite directions. Just give the + * page to the stack; he can have our refcount. + */ + if (page_count(rx_bi->page) > 2) { + dma_unmap_page(rx_ring->dev, + rx_bi->page_dma, + PAGE_SIZE, + DMA_FROM_DEVICE); rx_bi->page = NULL; + rx_bi->page_dma = 0; + rx_ring->rx_stats.realloc_count++; + } else { + get_page(rx_bi->page); + /* switch to the other half-page here; the + * allocation code programs the right addr + * into HW. If we haven't used this half-page, + * the address won't be changed, and HW can + * just use it next time through. + */ + rx_bi->page_offset ^= PAGE_SIZE / 2; + } - dma_unmap_page(rx_ring->dev, - rx_bi->page_dma, - PAGE_SIZE / 2, - DMA_FROM_DEVICE); - rx_bi->page_dma = 0; } I40E_RX_INCREMENT(rx_ring, i); @@ -1105,7 +1176,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) rx_ring->q_vector->rx.total_packets += total_rx_packets; rx_ring->q_vector->rx.total_bytes += total_rx_bytes; - return total_rx_packets; + return failure ? budget : total_rx_packets; } /** @@ -1123,6 +1194,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) union i40e_rx_desc *rx_desc; u32 rx_error, rx_status; u16 rx_packet_len; + bool failure = false; u8 rx_ptype; u64 qword; u16 i; @@ -1133,7 +1205,9 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) u16 vlan_tag; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= I40E_RX_BUFFER_WRITE) { - i40evf_alloc_rx_buffers_1buf(rx_ring, cleaned_count); + failure = failure || + i40evf_alloc_rx_buffers_1buf(rx_ring, + cleaned_count); cleaned_count = 0; } @@ -1214,7 +1288,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) rx_ring->q_vector->rx.total_packets += total_rx_packets; rx_ring->q_vector->rx.total_bytes += total_rx_bytes; - return total_rx_packets; + return failure ? budget : total_rx_packets; } static u32 i40e_buildreg_itr(const int type, const u16 itr) @@ -1222,7 +1296,9 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr) u32 val; val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | - I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK | + /* Don't clear PBA because that can cause lost interrupts that + * came in while we were cleaning/polling + */ (type << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) | (itr << I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT); @@ -1335,7 +1411,8 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget) * budget and be more aggressive about cleaning up the Tx descriptors. */ i40e_for_each_ring(ring, q_vector->tx) { - clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit); + clean_complete = clean_complete && + i40e_clean_tx_irq(ring, vsi->work_limit); arm_wb = arm_wb || ring->arm_wb; ring->arm_wb = false; } @@ -1359,7 +1436,7 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget) work_done += cleaned; /* if we didn't clean as many as budgeted, we must be done */ - clean_complete &= (budget_per_ring != cleaned); + clean_complete = clean_complete && (budget_per_ring > cleaned); } /* If work not completed, return budget and polling will return */ @@ -1367,7 +1444,7 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget) tx_only: if (arm_wb) { q_vector->tx.ring[0].tx_stats.tx_force_wb++; - i40evf_force_wb(vsi, q_vector); + i40e_enable_wb_on_itr(vsi, q_vector); } return budget; } @@ -1447,13 +1524,23 @@ out: static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) { - u32 cd_cmd, cd_tso_len, cd_mss; - struct ipv6hdr *ipv6h; - struct tcphdr *tcph; - struct iphdr *iph; - u32 l4len; + u64 cd_cmd, cd_tso_len, cd_mss; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + struct udphdr *udp; + unsigned char *hdr; + } l4; + u32 paylen, l4_offset; int err; + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + if (!skb_is_gso(skb)) return 0; @@ -1461,35 +1548,60 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, if (err < 0) return err; - iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); - ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); - - if (iph->version == 4) { - tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); - iph->tot_len = 0; - iph->check = 0; - tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - 0, IPPROTO_TCP, 0); - } else if (ipv6h->version == 6) { - tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); - ipv6h->payload_len = 0; - tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, - 0, IPPROTO_TCP, 0); + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + + /* initialize outer IP header fields */ + if (ip.v4->version == 4) { + ip.v4->tot_len = 0; + ip.v4->check = 0; + } else { + ip.v6->payload_len = 0; + } + + if (skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE | + SKB_GSO_UDP_TUNNEL_CSUM)) { + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) { + /* determine offset of outer transport header */ + l4_offset = l4.hdr - skb->data; + + /* remove payload length from outer checksum */ + paylen = (__force u16)l4.udp->check; + paylen += ntohs(1) * (u16)~(skb->len - l4_offset); + l4.udp->check = ~csum_fold((__force __wsum)paylen); + } + + /* reset pointers to inner headers */ + ip.hdr = skb_inner_network_header(skb); + l4.hdr = skb_inner_transport_header(skb); + + /* initialize inner IP header fields */ + if (ip.v4->version == 4) { + ip.v4->tot_len = 0; + ip.v4->check = 0; + } else { + ip.v6->payload_len = 0; + } } - l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb); - *hdr_len = (skb->encapsulation - ? (skb_inner_transport_header(skb) - skb->data) - : skb_transport_offset(skb)) + l4len; + /* determine offset of inner transport header */ + l4_offset = l4.hdr - skb->data; + + /* remove payload length from inner checksum */ + paylen = (__force u16)l4.tcp->check; + paylen += ntohs(1) * (u16)~(skb->len - l4_offset); + l4.tcp->check = ~csum_fold((__force __wsum)paylen); + + /* compute length of segmentation header */ + *hdr_len = (l4.tcp->doff * 4) + l4_offset; /* find the field values */ cd_cmd = I40E_TX_CTX_DESC_TSO; cd_tso_len = skb->len - *hdr_len; cd_mss = skb_shinfo(skb)->gso_size; - *cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | - ((u64)cd_tso_len << - I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | - ((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); + *cd_type_cmd_tso_mss |= (cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | + (cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | + (cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); return 1; } @@ -1499,129 +1611,157 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, * @tx_flags: pointer to Tx flags currently set * @td_cmd: Tx descriptor command bits to set * @td_offset: Tx descriptor header offsets to set + * @tx_ring: Tx descriptor ring * @cd_tunneling: ptr to context desc bits **/ -static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, - u32 *td_cmd, u32 *td_offset, - struct i40e_ring *tx_ring, - u32 *cd_tunneling) +static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, + u32 *td_cmd, u32 *td_offset, + struct i40e_ring *tx_ring, + u32 *cd_tunneling) { - struct ipv6hdr *this_ipv6_hdr; - unsigned int this_tcp_hdrlen; - struct iphdr *this_ip_hdr; - u32 network_hdr_len; - u8 l4_hdr = 0; - struct udphdr *oudph; - struct iphdr *oiph; - u32 l4_tunnel = 0; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + struct udphdr *udp; + unsigned char *hdr; + } l4; + unsigned char *exthdr; + u32 offset, cmd = 0, tunnel = 0; + __be16 frag_off; + u8 l4_proto = 0; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + + /* compute outer L2 header size */ + offset = ((ip.hdr - skb->data) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; if (skb->encapsulation) { - switch (ip_hdr(skb)->protocol) { + /* define outer network header type */ + if (*tx_flags & I40E_TX_FLAGS_IPV4) { + tunnel |= (*tx_flags & I40E_TX_FLAGS_TSO) ? + I40E_TX_CTX_EXT_IP_IPV4 : + I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; + + l4_proto = ip.v4->protocol; + } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { + tunnel |= I40E_TX_CTX_EXT_IP_IPV6; + + exthdr = ip.hdr + sizeof(*ip.v6); + l4_proto = ip.v6->nexthdr; + if (l4.hdr != exthdr) + ipv6_skip_exthdr(skb, exthdr - skb->data, + &l4_proto, &frag_off); + } + + /* compute outer L3 header size */ + tunnel |= ((l4.hdr - ip.hdr) / 4) << + I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT; + + /* switch IP header pointer from outer to inner header */ + ip.hdr = skb_inner_network_header(skb); + + /* define outer transport */ + switch (l4_proto) { case IPPROTO_UDP: - oudph = udp_hdr(skb); - oiph = ip_hdr(skb); - l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING; + tunnel |= I40E_TXD_CTX_UDP_TUNNELING; + *tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL; + break; + case IPPROTO_GRE: + tunnel |= I40E_TXD_CTX_GRE_TUNNELING; *tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL; break; default: - return; - } - network_hdr_len = skb_inner_network_header_len(skb); - this_ip_hdr = inner_ip_hdr(skb); - this_ipv6_hdr = inner_ipv6_hdr(skb); - this_tcp_hdrlen = inner_tcp_hdrlen(skb); - - if (*tx_flags & I40E_TX_FLAGS_IPV4) { - if (*tx_flags & I40E_TX_FLAGS_TSO) { - *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4; - ip_hdr(skb)->check = 0; - } else { - *cd_tunneling |= - I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; - } - } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { - *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6; if (*tx_flags & I40E_TX_FLAGS_TSO) - ip_hdr(skb)->check = 0; - } + return -1; - /* Now set the ctx descriptor fields */ - *cd_tunneling |= (skb_network_header_len(skb) >> 2) << - I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT | - l4_tunnel | - ((skb_inner_network_offset(skb) - - skb_transport_offset(skb)) >> 1) << - I40E_TXD_CTX_QW0_NATLEN_SHIFT; - if (this_ip_hdr->version == 6) { - *tx_flags &= ~I40E_TX_FLAGS_IPV4; - *tx_flags |= I40E_TX_FLAGS_IPV6; + skb_checksum_help(skb); + return 0; } - if ((tx_ring->flags & I40E_TXR_FLAGS_OUTER_UDP_CSUM) && - (l4_tunnel == I40E_TXD_CTX_UDP_TUNNELING) && - (*cd_tunneling & I40E_TXD_CTX_QW0_EXT_IP_MASK)) { - oudph->check = ~csum_tcpudp_magic(oiph->saddr, - oiph->daddr, - (skb->len - skb_transport_offset(skb)), - IPPROTO_UDP, 0); - *cd_tunneling |= I40E_TXD_CTX_QW0_L4T_CS_MASK; - } - } else { - network_hdr_len = skb_network_header_len(skb); - this_ip_hdr = ip_hdr(skb); - this_ipv6_hdr = ipv6_hdr(skb); - this_tcp_hdrlen = tcp_hdrlen(skb); + /* compute tunnel header size */ + tunnel |= ((ip.hdr - l4.hdr) / 2) << + I40E_TXD_CTX_QW0_NATLEN_SHIFT; + + /* indicate if we need to offload outer UDP header */ + if ((*tx_flags & I40E_TX_FLAGS_TSO) && + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) + tunnel |= I40E_TXD_CTX_QW0_L4T_CS_MASK; + + /* record tunnel offload values */ + *cd_tunneling |= tunnel; + + /* switch L4 header pointer from outer to inner */ + l4.hdr = skb_inner_transport_header(skb); + l4_proto = 0; + + /* reset type as we transition from outer to inner headers */ + *tx_flags &= ~(I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6); + if (ip.v4->version == 4) + *tx_flags |= I40E_TX_FLAGS_IPV4; + if (ip.v6->version == 6) + *tx_flags |= I40E_TX_FLAGS_IPV6; } /* Enable IP checksum offloads */ if (*tx_flags & I40E_TX_FLAGS_IPV4) { - l4_hdr = this_ip_hdr->protocol; + l4_proto = ip.v4->protocol; /* the stack computes the IP header already, the only time we * need the hardware to recompute it is in the case of TSO. */ - if (*tx_flags & I40E_TX_FLAGS_TSO) { - *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; - this_ip_hdr->check = 0; - } else { - *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; - } - /* Now set the td_offset for IP header length */ - *td_offset = (network_hdr_len >> 2) << - I40E_TX_DESC_LENGTH_IPLEN_SHIFT; + cmd |= (*tx_flags & I40E_TX_FLAGS_TSO) ? + I40E_TX_DESC_CMD_IIPT_IPV4_CSUM : + I40E_TX_DESC_CMD_IIPT_IPV4; } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { - l4_hdr = this_ipv6_hdr->nexthdr; - *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; - /* Now set the td_offset for IP header length */ - *td_offset = (network_hdr_len >> 2) << - I40E_TX_DESC_LENGTH_IPLEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; + + exthdr = ip.hdr + sizeof(*ip.v6); + l4_proto = ip.v6->nexthdr; + if (l4.hdr != exthdr) + ipv6_skip_exthdr(skb, exthdr - skb->data, + &l4_proto, &frag_off); } - /* words in MACLEN + dwords in IPLEN + dwords in L4Len */ - *td_offset |= (skb_network_offset(skb) >> 1) << - I40E_TX_DESC_LENGTH_MACLEN_SHIFT; + + /* compute inner L3 header size */ + offset |= ((l4.hdr - ip.hdr) / 4) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; /* Enable L4 checksum offloads */ - switch (l4_hdr) { + switch (l4_proto) { case IPPROTO_TCP: /* enable checksum offloads */ - *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; - *td_offset |= (this_tcp_hdrlen >> 2) << - I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; + offset |= l4.tcp->doff << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; break; case IPPROTO_SCTP: /* enable SCTP checksum offload */ - *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; - *td_offset |= (sizeof(struct sctphdr) >> 2) << - I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; + offset |= (sizeof(struct sctphdr) >> 2) << + I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; break; case IPPROTO_UDP: /* enable UDP checksum offload */ - *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; - *td_offset |= (sizeof(struct udphdr) >> 2) << - I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; + offset |= (sizeof(struct udphdr) >> 2) << + I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; break; default: - break; + if (*tx_flags & I40E_TX_FLAGS_TSO) + return -1; + skb_checksum_help(skb); + return 0; } + + *td_cmd |= cmd; + *td_offset |= offset; + + return 1; } /** @@ -1656,59 +1796,71 @@ static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, } /** - * i40e_chk_linearize - Check if there are more than 8 fragments per packet + * __i40evf_chk_linearize - Check if there are more than 8 fragments per packet * @skb: send buffer - * @tx_flags: collected send information * * Note: Our HW can't scatter-gather more than 8 fragments to build * a packet on the wire and so we need to figure out the cases where we * need to linearize the skb. **/ -static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags) +bool __i40evf_chk_linearize(struct sk_buff *skb) { - struct skb_frag_struct *frag; - bool linearize = false; - unsigned int size = 0; - u16 num_frags; - u16 gso_segs; + const struct skb_frag_struct *frag, *stale; + int gso_size, nr_frags, sum; - num_frags = skb_shinfo(skb)->nr_frags; - gso_segs = skb_shinfo(skb)->gso_segs; + /* check to see if TSO is enabled, if so we may get a repreive */ + gso_size = skb_shinfo(skb)->gso_size; + if (unlikely(!gso_size)) + return true; - if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) { - u16 j = 0; + /* no need to check if number of frags is less than 8 */ + nr_frags = skb_shinfo(skb)->nr_frags; + if (nr_frags < I40E_MAX_BUFFER_TXD) + return false; - if (num_frags < (I40E_MAX_BUFFER_TXD)) - goto linearize_chk_done; - /* try the simple math, if we have too many frags per segment */ - if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) > - I40E_MAX_BUFFER_TXD) { - linearize = true; - goto linearize_chk_done; - } - frag = &skb_shinfo(skb)->frags[0]; - /* we might still have more fragments per segment */ - do { - size += skb_frag_size(frag); - frag++; j++; - if ((size >= skb_shinfo(skb)->gso_size) && - (j < I40E_MAX_BUFFER_TXD)) { - size = (size % skb_shinfo(skb)->gso_size); - j = (size) ? 1 : 0; - } - if (j == I40E_MAX_BUFFER_TXD) { - linearize = true; - break; - } - num_frags--; - } while (num_frags); - } else { - if (num_frags >= I40E_MAX_BUFFER_TXD) - linearize = true; + /* We need to walk through the list and validate that each group + * of 6 fragments totals at least gso_size. However we don't need + * to perform such validation on the first or last 6 since the first + * 6 cannot inherit any data from a descriptor before them, and the + * last 6 cannot inherit any data from a descriptor after them. + */ + nr_frags -= I40E_MAX_BUFFER_TXD - 1; + frag = &skb_shinfo(skb)->frags[0]; + + /* Initialize size to the negative value of gso_size minus 1. We + * use this as the worst case scenerio in which the frag ahead + * of us only provides one byte which is why we are limited to 6 + * descriptors for a single transmit as the header and previous + * fragment are already consuming 2 descriptors. + */ + sum = 1 - gso_size; + + /* Add size of frags 1 through 5 to create our initial sum */ + sum += skb_frag_size(++frag); + sum += skb_frag_size(++frag); + sum += skb_frag_size(++frag); + sum += skb_frag_size(++frag); + sum += skb_frag_size(++frag); + + /* Walk through fragments adding latest fragment, testing it, and + * then removing stale fragments from the sum. + */ + stale = &skb_shinfo(skb)->frags[0]; + for (;;) { + sum += skb_frag_size(++frag); + + /* if sum is negative we failed to make sufficient progress */ + if (sum < 0) + return true; + + /* use pre-decrement to avoid processing last fragment */ + if (!--nr_frags) + break; + + sum -= skb_frag_size(++stale); } -linearize_chk_done: - return linearize; + return false; } /** @@ -1718,7 +1870,7 @@ linearize_chk_done: * * Returns -EBUSY if a stop is needed, else 0 **/ -static inline int __i40evf_maybe_stop_tx(struct i40e_ring *tx_ring, int size) +int __i40evf_maybe_stop_tx(struct i40e_ring *tx_ring, int size) { netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); /* Memory barrier before checking head and tail */ @@ -1735,20 +1887,6 @@ static inline int __i40evf_maybe_stop_tx(struct i40e_ring *tx_ring, int size) } /** - * i40evf_maybe_stop_tx - 1st level check for tx stop conditions - * @tx_ring: the ring to be checked - * @size: the size buffer we want to assure is available - * - * Returns 0 if stop is not needed - **/ -static inline int i40evf_maybe_stop_tx(struct i40e_ring *tx_ring, int size) -{ - if (likely(I40E_DESC_UNUSED(tx_ring) >= size)) - return 0; - return __i40evf_maybe_stop_tx(tx_ring, size); -} - -/** * i40evf_tx_map - Build the Tx descriptor * @tx_ring: ring to send buffer on * @skb: send buffer @@ -1863,7 +2001,7 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index), first->bytecount); - i40evf_maybe_stop_tx(tx_ring, DESC_NEEDED); + i40e_maybe_stop_tx(tx_ring, DESC_NEEDED); /* Algorithm to optimize tail and RS bit setting: * if xmit_more is supported @@ -1946,38 +2084,6 @@ dma_error: } /** - * i40evf_xmit_descriptor_count - calculate number of tx descriptors needed - * @skb: send buffer - * @tx_ring: ring to send buffer on - * - * Returns number of data descriptors needed for this skb. Returns 0 to indicate - * there is not enough descriptors available in this ring since we need at least - * one descriptor. - **/ -static inline int i40evf_xmit_descriptor_count(struct sk_buff *skb, - struct i40e_ring *tx_ring) -{ - unsigned int f; - int count = 0; - - /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, - * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, - * + 4 desc gap to avoid the cache line where head is, - * + 1 desc for context descriptor, - * otherwise try next time - */ - for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) - count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); - - count += TXD_USE_COUNT(skb_headlen(skb)); - if (i40evf_maybe_stop_tx(tx_ring, count + 4 + 1)) { - tx_ring->tx_stats.tx_busy++; - return 0; - } - return count; -} - -/** * i40e_xmit_frame_ring - Sends buffer on Tx ring * @skb: send buffer * @tx_ring: ring to send buffer on @@ -1995,13 +2101,29 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, __be16 protocol; u32 td_cmd = 0; u8 hdr_len = 0; - int tso; + int tso, count; /* prefetch the data, we'll need it later */ prefetch(skb->data); - if (0 == i40evf_xmit_descriptor_count(skb, tx_ring)) + count = i40e_xmit_descriptor_count(skb); + if (i40e_chk_linearize(skb, count)) { + if (__skb_linearize(skb)) + goto out_drop; + count = TXD_USE_COUNT(skb->len); + tx_ring->tx_stats.tx_linearize++; + } + + /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, + * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, + * + 4 desc gap to avoid the cache line where head is, + * + 1 desc for context descriptor, + * otherwise try next time + */ + if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) { + tx_ring->tx_stats.tx_busy++; return NETDEV_TX_BUSY; + } /* prepare the xmit flags */ if (i40evf_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags)) @@ -2026,24 +2148,17 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, else if (tso) tx_flags |= I40E_TX_FLAGS_TSO; - if (i40e_chk_linearize(skb, tx_flags)) { - if (skb_linearize(skb)) - goto out_drop; - tx_ring->tx_stats.tx_linearize++; - } + /* Always offload the checksum, since it's in the data descriptor */ + tso = i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset, + tx_ring, &cd_tunneling); + if (tso < 0) + goto out_drop; + skb_tx_timestamp(skb); /* always enable CRC insertion offload */ td_cmd |= I40E_TX_DESC_CMD_ICRC; - /* Always offload the checksum, since it's in the data descriptor */ - if (skb->ip_summed == CHECKSUM_PARTIAL) { - tx_flags |= I40E_TX_FLAGS_CSUM; - - i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset, - tx_ring, &cd_tunneling); - } - i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss, cd_tunneling, cd_l2tag2); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index e29bb3e86cfd..c1dd8c5c9666 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -153,7 +153,6 @@ enum i40e_dyn_idx_t { #define DESC_NEEDED (MAX_SKB_FRAGS + 4) #define I40E_MIN_DESC_PENDING 4 -#define I40E_TX_FLAGS_CSUM BIT(0) #define I40E_TX_FLAGS_HW_VLAN BIT(1) #define I40E_TX_FLAGS_SW_VLAN BIT(2) #define I40E_TX_FLAGS_TSO BIT(3) @@ -202,12 +201,15 @@ struct i40e_tx_queue_stats { u64 tx_done_old; u64 tx_linearize; u64 tx_force_wb; + u64 tx_lost_interrupt; }; struct i40e_rx_queue_stats { u64 non_eop_descs; u64 alloc_page_failed; u64 alloc_buff_failed; + u64 page_reuse_count; + u64 realloc_count; }; enum i40e_ring_state_t { @@ -253,7 +255,6 @@ struct i40e_ring { #define I40E_RX_DTYPE_NO_SPLIT 0 #define I40E_RX_DTYPE_HEADER_SPLIT 1 #define I40E_RX_DTYPE_SPLIT_ALWAYS 2 - u8 hsplit; #define I40E_RX_SPLIT_L2 0x1 #define I40E_RX_SPLIT_IP 0x2 #define I40E_RX_SPLIT_TCP_UDP 0x4 @@ -273,7 +274,6 @@ struct i40e_ring { u16 flags; #define I40E_TXR_FLAGS_WB_ON_ITR BIT(0) -#define I40E_TXR_FLAGS_OUTER_UDP_CSUM BIT(1) /* stats structs */ struct i40e_queue_stats stats; @@ -313,8 +313,8 @@ struct i40e_ring_container { #define i40e_for_each_ring(pos, head) \ for (pos = (head).ring; pos != NULL; pos = pos->next) -void i40evf_alloc_rx_buffers_ps(struct i40e_ring *rxr, u16 cleaned_count); -void i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rxr, u16 cleaned_count); +bool i40evf_alloc_rx_buffers_ps(struct i40e_ring *rxr, u16 cleaned_count); +bool i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rxr, u16 cleaned_count); void i40evf_alloc_rx_headers(struct i40e_ring *rxr); netdev_tx_t i40evf_xmit_frame(struct sk_buff *skb, struct net_device *netdev); void i40evf_clean_tx_ring(struct i40e_ring *tx_ring); @@ -324,7 +324,10 @@ int i40evf_setup_rx_descriptors(struct i40e_ring *rx_ring); void i40evf_free_tx_resources(struct i40e_ring *tx_ring); void i40evf_free_rx_resources(struct i40e_ring *rx_ring); int i40evf_napi_poll(struct napi_struct *napi, int budget); -u32 i40evf_get_tx_pending(struct i40e_ring *ring); +void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector); +u32 i40evf_get_tx_pending(struct i40e_ring *ring, bool in_sw); +int __i40evf_maybe_stop_tx(struct i40e_ring *tx_ring, int size); +bool __i40evf_chk_linearize(struct sk_buff *skb); /** * i40e_get_head - Retrieve head from head writeback @@ -339,4 +342,63 @@ static inline u32 i40e_get_head(struct i40e_ring *tx_ring) return le32_to_cpu(*(volatile __le32 *)head); } + +/** + * i40e_xmit_descriptor_count - calculate number of Tx descriptors needed + * @skb: send buffer + * @tx_ring: ring to send buffer on + * + * Returns number of data descriptors needed for this skb. Returns 0 to indicate + * there is not enough descriptors available in this ring since we need at least + * one descriptor. + **/ +static inline int i40e_xmit_descriptor_count(struct sk_buff *skb) +{ + const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + unsigned int nr_frags = skb_shinfo(skb)->nr_frags; + int count = 0, size = skb_headlen(skb); + + for (;;) { + count += TXD_USE_COUNT(size); + + if (!nr_frags--) + break; + + size = skb_frag_size(frag++); + } + + return count; +} + +/** + * i40e_maybe_stop_tx - 1st level check for Tx stop conditions + * @tx_ring: the ring to be checked + * @size: the size buffer we want to assure is available + * + * Returns 0 if stop is not needed + **/ +static inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) +{ + if (likely(I40E_DESC_UNUSED(tx_ring) >= size)) + return 0; + return __i40evf_maybe_stop_tx(tx_ring, size); +} + +/** + * i40e_chk_linearize - Check if there are more than 8 fragments per packet + * @skb: send buffer + * @count: number of buffers used + * + * Note: Our HW can't scatter-gather more than 8 fragments to build + * a packet on the wire and so we need to figure out the cases where we + * need to linearize the skb. + **/ +static inline bool i40e_chk_linearize(struct sk_buff *skb, int count) +{ + /* we can only support up to 8 data buffers for a single send */ + if (likely(count <= I40E_MAX_BUFFER_TXD)) + return false; + + return __i40evf_chk_linearize(skb); +} #endif /* _I40E_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index be1b72b93888..e657eccd232c 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -173,6 +173,7 @@ enum i40evf_state_t { __I40EVF_RESETTING, /* in reset */ /* Below here, watchdog is running */ __I40EVF_DOWN, /* ready, can be opened */ + __I40EVF_DOWN_PENDING, /* descending, waiting for watchdog */ __I40EVF_TESTING, /* in ethtool self-test */ __I40EVF_RUNNING, /* opened, working */ }; @@ -273,6 +274,9 @@ struct i40evf_adapter { }; +/* Ethtool Private Flags */ +#define I40EVF_PRIV_FLAGS_PS BIT(0) + /* needed by i40evf_ethtool.c */ extern char i40evf_driver_name[]; extern const char i40evf_driver_version[]; @@ -280,6 +284,7 @@ extern const char i40evf_driver_version[]; int i40evf_up(struct i40evf_adapter *adapter); void i40evf_down(struct i40evf_adapter *adapter); int i40evf_process_config(struct i40evf_adapter *adapter); +void i40evf_schedule_reset(struct i40evf_adapter *adapter); void i40evf_reset(struct i40evf_adapter *adapter); void i40evf_set_ethtool_ops(struct net_device *netdev); void i40evf_update_stats(struct i40evf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index a4c9feb589e7..dd4430aae7fa 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2015 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -63,6 +63,12 @@ static const struct i40evf_stats i40evf_gstrings_stats[] = { #define I40EVF_STATS_LEN(_dev) \ (I40EVF_GLOBAL_STATS_LEN + I40EVF_QUEUE_STATS_LEN(_dev)) +static const char i40evf_priv_flags_strings[][ETH_GSTRING_LEN] = { + "packet-split", +}; + +#define I40EVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40evf_priv_flags_strings) + /** * i40evf_get_settings - Get Link Speed and Duplex settings * @netdev: network interface device structure @@ -97,6 +103,8 @@ static int i40evf_get_sset_count(struct net_device *netdev, int sset) { if (sset == ETH_SS_STATS) return I40EVF_STATS_LEN(netdev); + else if (sset == ETH_SS_PRIV_FLAGS) + return I40EVF_PRIV_FLAGS_STR_LEN; else return -EINVAL; } @@ -162,6 +170,12 @@ static void i40evf_get_strings(struct net_device *netdev, u32 sset, u8 *data) snprintf(p, ETH_GSTRING_LEN, "rx-%u.bytes", i); p += ETH_GSTRING_LEN; } + } else if (sset == ETH_SS_PRIV_FLAGS) { + for (i = 0; i < I40EVF_PRIV_FLAGS_STR_LEN; i++) { + memcpy(data, i40evf_priv_flags_strings[i], + ETH_GSTRING_LEN); + data += ETH_GSTRING_LEN; + } } } @@ -211,6 +225,7 @@ static void i40evf_get_drvinfo(struct net_device *netdev, strlcpy(drvinfo->version, i40evf_driver_version, 32); strlcpy(drvinfo->fw_version, "N/A", 4); strlcpy(drvinfo->bus_info, pci_name(adapter->pdev), 32); + drvinfo->n_priv_flags = I40EVF_PRIV_FLAGS_STR_LEN; } /** @@ -459,6 +474,7 @@ static int i40evf_set_rss_hash_opt(struct i40evf_adapter *adapter, struct ethtool_rxnfc *nfc) { struct i40e_hw *hw = &adapter->hw; + u32 flags = adapter->vf_res->vf_offload_flags; u64 hena = (u64)rd32(hw, I40E_VFQF_HENA(0)) | ((u64)rd32(hw, I40E_VFQF_HENA(1)) << 32); @@ -477,54 +493,50 @@ static int i40evf_set_rss_hash_opt(struct i40evf_adapter *adapter, switch (nfc->flow_type) { case TCP_V4_FLOW: - switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - case 0: - hena &= ~BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP); - break; - case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + if (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + if (flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK); + hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP); - break; - default: + } else { return -EINVAL; } break; case TCP_V6_FLOW: - switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - case 0: - hena &= ~BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP); - break; - case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + if (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + if (flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK); + hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP); - break; - default: + } else { return -EINVAL; } break; case UDP_V4_FLOW: - switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - case 0: - hena &= ~(BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | - BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4)); - break; - case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + if (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + if (flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | + BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP); + hena |= (BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4)); - break; - default: + } else { return -EINVAL; } break; case UDP_V6_FLOW: - switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - case 0: - hena &= ~(BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | - BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6)); - break; - case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + if (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + if (flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | + BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP); + hena |= (BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6)); - break; - default: + } else { return -EINVAL; } break; @@ -713,6 +725,54 @@ static int i40evf_set_rxfh(struct net_device *netdev, const u32 *indir, I40EVF_HLUT_ARRAY_SIZE); } +/** + * i40evf_get_priv_flags - report device private flags + * @dev: network interface device structure + * + * The get string set count and the string set should be matched for each + * flag returned. Add new strings for each flag to the i40e_priv_flags_strings + * array. + * + * Returns a u32 bitmap of flags. + **/ +static u32 i40evf_get_priv_flags(struct net_device *dev) +{ + struct i40evf_adapter *adapter = netdev_priv(dev); + u32 ret_flags = 0; + + ret_flags |= adapter->flags & I40EVF_FLAG_RX_PS_ENABLED ? + I40EVF_PRIV_FLAGS_PS : 0; + + return ret_flags; +} + +/** + * i40evf_set_priv_flags - set private flags + * @dev: network interface device structure + * @flags: bit flags to be set + **/ +static int i40evf_set_priv_flags(struct net_device *dev, u32 flags) +{ + struct i40evf_adapter *adapter = netdev_priv(dev); + bool reset_required = false; + + if ((flags & I40EVF_PRIV_FLAGS_PS) && + !(adapter->flags & I40EVF_FLAG_RX_PS_ENABLED)) { + adapter->flags |= I40EVF_FLAG_RX_PS_ENABLED; + reset_required = true; + } else if (!(flags & I40EVF_PRIV_FLAGS_PS) && + (adapter->flags & I40EVF_FLAG_RX_PS_ENABLED)) { + adapter->flags &= ~I40EVF_FLAG_RX_PS_ENABLED; + reset_required = true; + } + + /* if needed, issue reset to cause things to take effect */ + if (reset_required) + i40evf_schedule_reset(adapter); + + return 0; +} + static const struct ethtool_ops i40evf_ethtool_ops = { .get_settings = i40evf_get_settings, .get_drvinfo = i40evf_get_drvinfo, @@ -722,6 +782,8 @@ static const struct ethtool_ops i40evf_ethtool_ops = { .get_strings = i40evf_get_strings, .get_ethtool_stats = i40evf_get_ethtool_stats, .get_sset_count = i40evf_get_sset_count, + .get_priv_flags = i40evf_get_priv_flags, + .set_priv_flags = i40evf_set_priv_flags, .get_msglevel = i40evf_get_msglevel, .set_msglevel = i40evf_set_msglevel, .get_coalesce = i40evf_get_coalesce, diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 94da913b151d..4b70aae2fa84 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2015 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -32,13 +32,13 @@ static int i40evf_close(struct net_device *netdev); char i40evf_driver_name[] = "i40evf"; static const char i40evf_driver_string[] = - "Intel(R) XL710/X710 Virtual Function Network Driver"; + "Intel(R) 40-10 Gigabit Virtual Function Network Driver"; #define DRV_KERN "-k" #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 4 -#define DRV_VERSION_BUILD 4 +#define DRV_VERSION_BUILD 15 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) \ @@ -69,6 +69,8 @@ MODULE_DESCRIPTION("Intel(R) XL710 X710 Virtual Function Network Driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); +static struct workqueue_struct *i40evf_wq; + /** * i40evf_allocate_dma_mem_d - OS specific memory alloc for shared code * @hw: pointer to the HW structure @@ -171,6 +173,19 @@ void i40evf_debug_d(void *hw, u32 mask, char *fmt_str, ...) } /** + * i40evf_schedule_reset - Set the flags and schedule a reset event + * @adapter: board private structure + **/ +void i40evf_schedule_reset(struct i40evf_adapter *adapter) +{ + if (!(adapter->flags & + (I40EVF_FLAG_RESET_PENDING | I40EVF_FLAG_RESET_NEEDED))) { + adapter->flags |= I40EVF_FLAG_RESET_NEEDED; + schedule_work(&adapter->reset_task); + } +} + +/** * i40evf_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ @@ -179,11 +194,7 @@ static void i40evf_tx_timeout(struct net_device *netdev) struct i40evf_adapter *adapter = netdev_priv(netdev); adapter->tx_timeout_count++; - if (!(adapter->flags & (I40EVF_FLAG_RESET_PENDING | - I40EVF_FLAG_RESET_NEEDED))) { - adapter->flags |= I40EVF_FLAG_RESET_NEEDED; - schedule_work(&adapter->reset_task); - } + i40evf_schedule_reset(adapter); } /** @@ -636,35 +647,22 @@ static void i40evf_configure_rx(struct i40evf_adapter *adapter) int rx_buf_len; - adapter->flags &= ~I40EVF_FLAG_RX_PS_CAPABLE; - adapter->flags |= I40EVF_FLAG_RX_1BUF_CAPABLE; - - /* Decide whether to use packet split mode or not */ - if (netdev->mtu > ETH_DATA_LEN) { - if (adapter->flags & I40EVF_FLAG_RX_PS_CAPABLE) - adapter->flags |= I40EVF_FLAG_RX_PS_ENABLED; - else - adapter->flags &= ~I40EVF_FLAG_RX_PS_ENABLED; - } else { - if (adapter->flags & I40EVF_FLAG_RX_1BUF_CAPABLE) - adapter->flags &= ~I40EVF_FLAG_RX_PS_ENABLED; - else - adapter->flags |= I40EVF_FLAG_RX_PS_ENABLED; - } - /* Set the RX buffer length according to the mode */ - if (adapter->flags & I40EVF_FLAG_RX_PS_ENABLED) { - rx_buf_len = I40E_RX_HDR_SIZE; - } else { - if (netdev->mtu <= ETH_DATA_LEN) - rx_buf_len = I40EVF_RXBUFFER_2048; - else - rx_buf_len = ALIGN(max_frame, 1024); - } + if (adapter->flags & I40EVF_FLAG_RX_PS_ENABLED || + netdev->mtu <= ETH_DATA_LEN) + rx_buf_len = I40EVF_RXBUFFER_2048; + else + rx_buf_len = ALIGN(max_frame, 1024); for (i = 0; i < adapter->num_active_queues; i++) { adapter->rx_rings[i].tail = hw->hw_addr + I40E_QRX_TAIL1(i); adapter->rx_rings[i].rx_buf_len = rx_buf_len; + if (adapter->flags & I40EVF_FLAG_RX_PS_ENABLED) { + set_ring_ps_enabled(&adapter->rx_rings[i]); + adapter->rx_rings[i].rx_hdr_len = I40E_RX_HDR_SIZE; + } else { + clear_ring_ps_enabled(&adapter->rx_rings[i]); + } } } @@ -1001,7 +999,12 @@ static void i40evf_configure(struct i40evf_adapter *adapter) for (i = 0; i < adapter->num_active_queues; i++) { struct i40e_ring *ring = &adapter->rx_rings[i]; + if (adapter->flags & I40EVF_FLAG_RX_PS_ENABLED) { + i40evf_alloc_rx_headers(ring); + i40evf_alloc_rx_buffers_ps(ring, ring->count); + } else { i40evf_alloc_rx_buffers_1buf(ring, ring->count); + } ring->next_to_use = ring->count - 1; writel(ring->next_to_use, ring->tail); } @@ -1032,7 +1035,7 @@ void i40evf_down(struct i40evf_adapter *adapter) struct net_device *netdev = adapter->netdev; struct i40evf_mac_filter *f; - if (adapter->state == __I40EVF_DOWN) + if (adapter->state <= __I40EVF_DOWN_PENDING) return; while (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK, @@ -1122,7 +1125,9 @@ static void i40evf_free_queues(struct i40evf_adapter *adapter) if (!adapter->vsi_res) return; kfree(adapter->tx_rings); + adapter->tx_rings = NULL; kfree(adapter->rx_rings); + adapter->rx_rings = NULL; } /** @@ -1454,7 +1459,11 @@ static int i40evf_init_rss(struct i40evf_adapter *adapter) int ret; /* Enable PCTYPES for RSS, TCP/UDP with IPv4/IPv6 */ - hena = I40E_DEFAULT_RSS_HENA; + if (adapter->vf_res->vf_offload_flags & + I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) + hena = I40E_DEFAULT_RSS_HENA_EXPANDED; + else + hena = I40E_DEFAULT_RSS_HENA; wr32(hw, I40E_VFQF_HENA(0), (u32)hena); wr32(hw, I40E_VFQF_HENA(1), (u32)(hena >> 32)); @@ -1829,6 +1838,7 @@ static void i40evf_reset_task(struct work_struct *work) break; msleep(I40EVF_RESET_WAIT_MS); } + pci_set_master(adapter->pdev); /* extra wait to make sure minimum wait is met */ msleep(I40EVF_RESET_WAIT_MS); if (i == I40EVF_RESET_WAIT_COUNT) { @@ -1873,6 +1883,7 @@ static void i40evf_reset_task(struct work_struct *work) adapter->netdev->flags &= ~IFF_UP; clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section); adapter->flags &= ~I40EVF_FLAG_RESET_PENDING; + adapter->state = __I40EVF_DOWN; dev_info(&adapter->pdev->dev, "Reset task did not complete, VF disabled\n"); return; /* Do not attempt to reinit. It's dead, Jim. */ } @@ -2142,7 +2153,8 @@ static int i40evf_open(struct net_device *netdev) dev_err(&adapter->pdev->dev, "Unable to open device due to PF driver failure.\n"); return -EIO; } - if (adapter->state != __I40EVF_DOWN || adapter->aq_required) + + if (adapter->state != __I40EVF_DOWN) return -EBUSY; /* allocate transmit descriptors */ @@ -2197,14 +2209,14 @@ static int i40evf_close(struct net_device *netdev) { struct i40evf_adapter *adapter = netdev_priv(netdev); - if (adapter->state <= __I40EVF_DOWN) + if (adapter->state <= __I40EVF_DOWN_PENDING) return 0; set_bit(__I40E_DOWN, &adapter->vsi.state); i40evf_down(adapter); - adapter->state = __I40EVF_DOWN; + adapter->state = __I40EVF_DOWN_PENDING; i40evf_free_traffic_irqs(adapter); return 0; @@ -2325,9 +2337,24 @@ int i40evf_process_config(struct i40evf_adapter *adapter) NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_TSO_ECN | + NETIF_F_GSO_GRE | + NETIF_F_GSO_UDP_TUNNEL | NETIF_F_RXCSUM | NETIF_F_GRO; + netdev->hw_enc_features |= NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM | + NETIF_F_TSO | + NETIF_F_TSO6 | + NETIF_F_TSO_ECN | + NETIF_F_GSO_GRE | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + + if (adapter->flags & I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE) + netdev->features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; + /* copy netdev features into list of user selectable features */ netdev->hw_features |= netdev->features; netdev->hw_features &= ~NETIF_F_RXCSUM; @@ -2466,11 +2493,20 @@ static void i40evf_init_task(struct work_struct *work) default: goto err_alloc; } + + if (hw->mac.type == I40E_MAC_X722_VF) + adapter->flags |= I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE; + if (i40evf_process_config(adapter)) goto err_alloc; adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN; adapter->flags |= I40EVF_FLAG_RX_CSUM_ENABLED; + adapter->flags |= I40EVF_FLAG_RX_1BUF_CAPABLE; + adapter->flags |= I40EVF_FLAG_RX_PS_CAPABLE; + + /* Default to single buffer rx, can be changed through ethtool. */ + adapter->flags &= ~I40EVF_FLAG_RX_PS_ENABLED; netdev->netdev_ops = &i40evf_netdev_ops; i40evf_set_ethtool_ops(netdev); @@ -2502,10 +2538,9 @@ static void i40evf_init_task(struct work_struct *work) goto err_sw_init; i40evf_map_rings_to_vectors(adapter); if (adapter->vf_res->vf_offload_flags & - I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR) + I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR) adapter->flags |= I40EVF_FLAG_WB_ON_ITR_CAPABLE; - if (!RSS_AQ(adapter)) - i40evf_init_rss(adapter); + err = i40evf_request_misc_irq(adapter); if (err) goto err_sw_init; @@ -2885,6 +2920,11 @@ static int __init i40evf_init_module(void) pr_info("%s\n", i40evf_copyright); + i40evf_wq = create_singlethread_workqueue(i40evf_driver_name); + if (!i40evf_wq) { + pr_err("%s: Failed to create workqueue\n", i40evf_driver_name); + return -ENOMEM; + } ret = pci_register_driver(&i40evf_driver); return ret; } @@ -2900,6 +2940,7 @@ module_init(i40evf_init_module); static void __exit i40evf_exit_module(void) { pci_unregister_driver(&i40evf_driver); + destroy_workqueue(i40evf_wq); } module_exit(i40evf_exit_module); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index c1c526283757..488e738f76c6 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -270,6 +270,10 @@ void i40evf_configure_queues(struct i40evf_adapter *adapter) vqpi->rxq.max_pkt_size = adapter->netdev->mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN; vqpi->rxq.databuffer_size = adapter->rx_rings[i].rx_buf_len; + if (adapter->flags & I40EVF_FLAG_RX_PS_ENABLED) { + vqpi->rxq.splithdr_enabled = true; + vqpi->rxq.hdr_size = I40E_RX_HDR_SIZE; + } vqpi++; } @@ -804,6 +808,8 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, case I40E_VIRTCHNL_OP_DISABLE_QUEUES: i40evf_free_all_tx_resources(adapter); i40evf_free_all_rx_resources(adapter); + if (adapter->state == __I40EVF_DOWN_PENDING) + adapter->state = __I40EVF_DOWN; break; case I40E_VIRTCHNL_OP_VERSION: case I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP: diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index adb33e2a0137..9a1a9c7b0748 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -34,6 +34,7 @@ #include "e1000_mac.h" #include "e1000_82575.h" #include "e1000_i210.h" +#include "igb.h" static s32 igb_get_invariants_82575(struct e1000_hw *); static s32 igb_acquire_phy_82575(struct e1000_hw *); @@ -71,6 +72,32 @@ static s32 igb_update_nvm_checksum_i350(struct e1000_hw *hw); static const u16 e1000_82580_rxpbs_table[] = { 36, 72, 144, 1, 2, 4, 8, 16, 35, 70, 140 }; +/* Due to a hw errata, if the host tries to configure the VFTA register + * while performing queries from the BMC or DMA, then the VFTA in some + * cases won't be written. + */ + +/** + * igb_write_vfta_i350 - Write value to VLAN filter table + * @hw: pointer to the HW structure + * @offset: register offset in VLAN filter table + * @value: register value written to VLAN filter table + * + * Writes value at the given offset in the register array which stores + * the VLAN filter table. + **/ +static void igb_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value) +{ + struct igb_adapter *adapter = hw->back; + int i; + + for (i = 10; i--;) + array_wr32(E1000_VFTA, offset, value); + + wrfl(); + adapter->shadow_vfta[offset] = value; +} + /** * igb_sgmii_uses_mdio_82575 - Determine if I2C pins are for external MDIO * @hw: pointer to the HW structure @@ -398,6 +425,8 @@ static s32 igb_init_mac_params_82575(struct e1000_hw *hw) /* Set mta register count */ mac->mta_reg_count = 128; + /* Set uta register count */ + mac->uta_reg_count = (hw->mac.type == e1000_82575) ? 0 : 128; /* Set rar entry count */ switch (mac->type) { case e1000_82576: @@ -429,6 +458,11 @@ static s32 igb_init_mac_params_82575(struct e1000_hw *hw) mac->ops.release_swfw_sync = igb_release_swfw_sync_82575; } + if ((hw->mac.type == e1000_i350) || (hw->mac.type == e1000_i354)) + mac->ops.write_vfta = igb_write_vfta_i350; + else + mac->ops.write_vfta = igb_write_vfta; + /* Set if part includes ASF firmware */ mac->asf_firmware_present = true; /* Set if manageability features are enabled. */ @@ -1517,10 +1551,7 @@ static s32 igb_init_hw_82575(struct e1000_hw *hw) /* Disabling VLAN filtering */ hw_dbg("Initializing the IEEE VLAN\n"); - if ((hw->mac.type == e1000_i350) || (hw->mac.type == e1000_i354)) - igb_clear_vfta_i350(hw); - else - igb_clear_vfta(hw); + igb_clear_vfta(hw); /* Setup the receive address */ igb_init_rx_addrs(hw, rar_count); diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index c3c598c347a9..e9f23ee8f15e 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -356,7 +356,8 @@ /* Ethertype field values */ #define ETHERNET_IEEE_VLAN_TYPE 0x8100 /* 802.3ac packet */ -#define MAX_JUMBO_FRAME_SIZE 0x3F00 +/* As per the EAS the maximum supported size is 9.5KB (9728 bytes) */ +#define MAX_JUMBO_FRAME_SIZE 0x2600 /* PBA constants */ #define E1000_PBA_34K 0x0022 diff --git a/drivers/net/ethernet/intel/igb/e1000_hw.h b/drivers/net/ethernet/intel/igb/e1000_hw.h index 4034207eb5cc..f0c416e21d2c 100644 --- a/drivers/net/ethernet/intel/igb/e1000_hw.h +++ b/drivers/net/ethernet/intel/igb/e1000_hw.h @@ -325,7 +325,7 @@ struct e1000_mac_operations { s32 (*get_thermal_sensor_data)(struct e1000_hw *); s32 (*init_thermal_sensor_thresh)(struct e1000_hw *); #endif - + void (*write_vfta)(struct e1000_hw *, u32, u32); }; struct e1000_phy_operations { diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c index 2a88595f956c..07cf4fe58338 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mac.c +++ b/drivers/net/ethernet/intel/igb/e1000_mac.c @@ -92,10 +92,8 @@ void igb_clear_vfta(struct e1000_hw *hw) { u32 offset; - for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) { - array_wr32(E1000_VFTA, offset, 0); - wrfl(); - } + for (offset = E1000_VLAN_FILTER_TBL_SIZE; offset--;) + hw->mac.ops.write_vfta(hw, offset, 0); } /** @@ -107,54 +105,14 @@ void igb_clear_vfta(struct e1000_hw *hw) * Writes value at the given offset in the register array which stores * the VLAN filter table. **/ -static void igb_write_vfta(struct e1000_hw *hw, u32 offset, u32 value) +void igb_write_vfta(struct e1000_hw *hw, u32 offset, u32 value) { + struct igb_adapter *adapter = hw->back; + array_wr32(E1000_VFTA, offset, value); wrfl(); -} - -/* Due to a hw errata, if the host tries to configure the VFTA register - * while performing queries from the BMC or DMA, then the VFTA in some - * cases won't be written. - */ -/** - * igb_clear_vfta_i350 - Clear VLAN filter table - * @hw: pointer to the HW structure - * - * Clears the register array which contains the VLAN filter table by - * setting all the values to 0. - **/ -void igb_clear_vfta_i350(struct e1000_hw *hw) -{ - u32 offset; - int i; - - for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) { - for (i = 0; i < 10; i++) - array_wr32(E1000_VFTA, offset, 0); - - wrfl(); - } -} - -/** - * igb_write_vfta_i350 - Write value to VLAN filter table - * @hw: pointer to the HW structure - * @offset: register offset in VLAN filter table - * @value: register value written to VLAN filter table - * - * Writes value at the given offset in the register array which stores - * the VLAN filter table. - **/ -static void igb_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value) -{ - int i; - - for (i = 0; i < 10; i++) - array_wr32(E1000_VFTA, offset, value); - - wrfl(); + adapter->shadow_vfta[offset] = value; } /** @@ -183,40 +141,155 @@ void igb_init_rx_addrs(struct e1000_hw *hw, u16 rar_count) } /** + * igb_find_vlvf_slot - find the VLAN id or the first empty slot + * @hw: pointer to hardware structure + * @vlan: VLAN id to write to VLAN filter + * @vlvf_bypass: skip VLVF if no match is found + * + * return the VLVF index where this VLAN id should be placed + * + **/ +static s32 igb_find_vlvf_slot(struct e1000_hw *hw, u32 vlan, bool vlvf_bypass) +{ + s32 regindex, first_empty_slot; + u32 bits; + + /* short cut the special case */ + if (vlan == 0) + return 0; + + /* if vlvf_bypass is set we don't want to use an empty slot, we + * will simply bypass the VLVF if there are no entries present in the + * VLVF that contain our VLAN + */ + first_empty_slot = vlvf_bypass ? -E1000_ERR_NO_SPACE : 0; + + /* Search for the VLAN id in the VLVF entries. Save off the first empty + * slot found along the way. + * + * pre-decrement loop covering (IXGBE_VLVF_ENTRIES - 1) .. 1 + */ + for (regindex = E1000_VLVF_ARRAY_SIZE; --regindex > 0;) { + bits = rd32(E1000_VLVF(regindex)) & E1000_VLVF_VLANID_MASK; + if (bits == vlan) + return regindex; + if (!first_empty_slot && !bits) + first_empty_slot = regindex; + } + + return first_empty_slot ? : -E1000_ERR_NO_SPACE; +} + +/** * igb_vfta_set - enable or disable vlan in VLAN filter table * @hw: pointer to the HW structure - * @vid: VLAN id to add or remove - * @add: if true add filter, if false remove + * @vlan: VLAN id to add or remove + * @vind: VMDq output index that maps queue to VLAN id + * @vlan_on: if true add filter, if false remove * * Sets or clears a bit in the VLAN filter table array based on VLAN id * and if we are adding or removing the filter **/ -s32 igb_vfta_set(struct e1000_hw *hw, u32 vid, bool add) +s32 igb_vfta_set(struct e1000_hw *hw, u32 vlan, u32 vind, + bool vlan_on, bool vlvf_bypass) { - u32 index = (vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK; - u32 mask = 1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK); - u32 vfta; struct igb_adapter *adapter = hw->back; - s32 ret_val = 0; + u32 regidx, vfta_delta, vfta, bits; + s32 vlvf_index; - vfta = adapter->shadow_vfta[index]; + if ((vlan > 4095) || (vind > 7)) + return -E1000_ERR_PARAM; - /* bit was set/cleared before we started */ - if ((!!(vfta & mask)) == add) { - ret_val = -E1000_ERR_CONFIG; - } else { - if (add) - vfta |= mask; - else - vfta &= ~mask; + /* this is a 2 part operation - first the VFTA, then the + * VLVF and VLVFB if VT Mode is set + * We don't write the VFTA until we know the VLVF part succeeded. + */ + + /* Part 1 + * The VFTA is a bitstring made up of 128 32-bit registers + * that enable the particular VLAN id, much like the MTA: + * bits[11-5]: which register + * bits[4-0]: which bit in the register + */ + regidx = vlan / 32; + vfta_delta = 1 << (vlan % 32); + vfta = adapter->shadow_vfta[regidx]; + + /* vfta_delta represents the difference between the current value + * of vfta and the value we want in the register. Since the diff + * is an XOR mask we can just update vfta using an XOR. + */ + vfta_delta &= vlan_on ? ~vfta : vfta; + vfta ^= vfta_delta; + + /* Part 2 + * If VT Mode is set + * Either vlan_on + * make sure the VLAN is in VLVF + * set the vind bit in the matching VLVFB + * Or !vlan_on + * clear the pool bit and possibly the vind + */ + if (!adapter->vfs_allocated_count) + goto vfta_update; + + vlvf_index = igb_find_vlvf_slot(hw, vlan, vlvf_bypass); + if (vlvf_index < 0) { + if (vlvf_bypass) + goto vfta_update; + return vlvf_index; } - if ((hw->mac.type == e1000_i350) || (hw->mac.type == e1000_i354)) - igb_write_vfta_i350(hw, index, vfta); - else - igb_write_vfta(hw, index, vfta); - adapter->shadow_vfta[index] = vfta; - return ret_val; + bits = rd32(E1000_VLVF(vlvf_index)); + + /* set the pool bit */ + bits |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vind); + if (vlan_on) + goto vlvf_update; + + /* clear the pool bit */ + bits ^= 1 << (E1000_VLVF_POOLSEL_SHIFT + vind); + + if (!(bits & E1000_VLVF_POOLSEL_MASK)) { + /* Clear VFTA first, then disable VLVF. Otherwise + * we run the risk of stray packets leaking into + * the PF via the default pool + */ + if (vfta_delta) + hw->mac.ops.write_vfta(hw, regidx, vfta); + + /* disable VLVF and clear remaining bit from pool */ + wr32(E1000_VLVF(vlvf_index), 0); + + return 0; + } + + /* If there are still bits set in the VLVFB registers + * for the VLAN ID indicated we need to see if the + * caller is requesting that we clear the VFTA entry bit. + * If the caller has requested that we clear the VFTA + * entry bit but there are still pools/VFs using this VLAN + * ID entry then ignore the request. We're not worried + * about the case where we're turning the VFTA VLAN ID + * entry bit on, only when requested to turn it off as + * there may be multiple pools and/or VFs using the + * VLAN ID entry. In that case we cannot clear the + * VFTA bit until all pools/VFs using that VLAN ID have also + * been cleared. This will be indicated by "bits" being + * zero. + */ + vfta_delta = 0; + +vlvf_update: + /* record pool change and enable VLAN ID if not already enabled */ + wr32(E1000_VLVF(vlvf_index), bits | vlan | E1000_VLVF_VLANID_ENABLE); + +vfta_update: + /* bit was set/cleared before we started */ + if (vfta_delta) + hw->mac.ops.write_vfta(hw, regidx, vfta); + + return 0; } /** diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.h b/drivers/net/ethernet/intel/igb/e1000_mac.h index ea24961b0d70..90c8893c3eed 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mac.h +++ b/drivers/net/ethernet/intel/igb/e1000_mac.h @@ -56,8 +56,9 @@ s32 igb_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg, void igb_clear_hw_cntrs_base(struct e1000_hw *hw); void igb_clear_vfta(struct e1000_hw *hw); -void igb_clear_vfta_i350(struct e1000_hw *hw); -s32 igb_vfta_set(struct e1000_hw *hw, u32 vid, bool add); +void igb_write_vfta(struct e1000_hw *hw, u32 offset, u32 value); +s32 igb_vfta_set(struct e1000_hw *hw, u32 vid, u32 vind, + bool vlan_on, bool vlvf_bypass); void igb_config_collision_dist(struct e1000_hw *hw); void igb_init_rx_addrs(struct e1000_hw *hw, u16 rar_count); void igb_mta_set(struct e1000_hw *hw, u32 hash_value); diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.c b/drivers/net/ethernet/intel/igb/e1000_mbx.c index 162cc49345d0..10f5c9e016a9 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mbx.c +++ b/drivers/net/ethernet/intel/igb/e1000_mbx.c @@ -322,14 +322,20 @@ static s32 igb_obtain_mbx_lock_pf(struct e1000_hw *hw, u16 vf_number) { s32 ret_val = -E1000_ERR_MBX; u32 p2v_mailbox; + int count = 10; - /* Take ownership of the buffer */ - wr32(E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_PFU); + do { + /* Take ownership of the buffer */ + wr32(E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_PFU); - /* reserve mailbox for vf use */ - p2v_mailbox = rd32(E1000_P2VMAILBOX(vf_number)); - if (p2v_mailbox & E1000_P2VMAILBOX_PFU) - ret_val = 0; + /* reserve mailbox for vf use */ + p2v_mailbox = rd32(E1000_P2VMAILBOX(vf_number)); + if (p2v_mailbox & E1000_P2VMAILBOX_PFU) { + ret_val = 0; + break; + } + udelay(1000); + } while (count-- > 0); return ret_val; } diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index e3cb93bdb21a..707ae5c297ea 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -95,7 +95,6 @@ struct vf_data_storage { unsigned char vf_mac_addresses[ETH_ALEN]; u16 vf_mc_hashes[IGB_MAX_VF_MC_ENTRIES]; u16 num_vf_mc_hashes; - u16 vlans_enabled; u32 flags; unsigned long last_nack; u16 pf_vlan; /* When set, guest VLAN config not allowed. */ @@ -482,6 +481,7 @@ struct igb_adapter { #define IGB_FLAG_MAS_ENABLE (1 << 12) #define IGB_FLAG_HAS_MSIX (1 << 13) #define IGB_FLAG_EEE (1 << 14) +#define IGB_FLAG_VLAN_PROMISC BIT(15) /* Media Auto Sense */ #define IGB_MAS_ENABLE_0 0X0001 diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 31e5f3942839..af46fcf8a50e 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -140,7 +140,7 @@ static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats); static int igb_change_mtu(struct net_device *, int); static int igb_set_mac(struct net_device *, void *); -static void igb_set_uta(struct igb_adapter *adapter); +static void igb_set_uta(struct igb_adapter *adapter, bool set); static irqreturn_t igb_intr(int irq, void *); static irqreturn_t igb_intr_msi(int irq, void *); static irqreturn_t igb_msix_other(int irq, void *); @@ -1534,12 +1534,13 @@ static void igb_irq_enable(struct igb_adapter *adapter) static void igb_update_mng_vlan(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; + u16 pf_id = adapter->vfs_allocated_count; u16 vid = adapter->hw.mng_cookie.vlan_id; u16 old_vid = adapter->mng_vlan_id; if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) { /* add VID to filter table */ - igb_vfta_set(hw, vid, true); + igb_vfta_set(hw, vid, pf_id, true, true); adapter->mng_vlan_id = vid; } else { adapter->mng_vlan_id = IGB_MNG_VLAN_NONE; @@ -1549,7 +1550,7 @@ static void igb_update_mng_vlan(struct igb_adapter *adapter) (vid != old_vid) && !test_bit(old_vid, adapter->active_vlans)) { /* remove VID from filter table */ - igb_vfta_set(hw, old_vid, false); + igb_vfta_set(hw, vid, pf_id, false, true); } } @@ -1818,6 +1819,10 @@ void igb_down(struct igb_adapter *adapter) if (!pci_channel_offline(adapter->pdev)) igb_reset(adapter); + + /* clear VLAN promisc flag so VFTA will be updated if necessary */ + adapter->flags &= ~IGB_FLAG_VLAN_PROMISC; + igb_clean_all_tx_rings(adapter); igb_clean_all_rx_rings(adapter); #ifdef CONFIG_IGB_DCA @@ -1862,7 +1867,7 @@ void igb_reset(struct igb_adapter *adapter) struct e1000_hw *hw = &adapter->hw; struct e1000_mac_info *mac = &hw->mac; struct e1000_fc_info *fc = &hw->fc; - u32 pba = 0, tx_space, min_tx_space, min_rx_space, hwm; + u32 pba, hwm; /* Repartition Pba for greater than 9k mtu * To take effect CTRL.RST is required. @@ -1886,9 +1891,10 @@ void igb_reset(struct igb_adapter *adapter) break; } - if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) && - (mac->type < e1000_82576)) { - /* adjust PBA for jumbo frames */ + if (mac->type == e1000_82575) { + u32 min_rx_space, min_tx_space, needed_tx_space; + + /* write Rx PBA so that hardware can report correct Tx PBA */ wr32(E1000_PBA, pba); /* To maintain wire speed transmits, the Tx FIFO should be @@ -1898,31 +1904,26 @@ void igb_reset(struct igb_adapter *adapter) * one full receive packet and is similarly rounded up and * expressed in KB. */ - pba = rd32(E1000_PBA); - /* upper 16 bits has Tx packet buffer allocation size in KB */ - tx_space = pba >> 16; - /* lower 16 bits has Rx packet buffer allocation size in KB */ - pba &= 0xffff; - /* the Tx fifo also stores 16 bytes of information about the Tx - * but don't include ethernet FCS because hardware appends it + min_rx_space = DIV_ROUND_UP(MAX_JUMBO_FRAME_SIZE, 1024); + + /* The Tx FIFO also stores 16 bytes of information about the Tx + * but don't include Ethernet FCS because hardware appends it. + * We only need to round down to the nearest 512 byte block + * count since the value we care about is 2 frames, not 1. */ - min_tx_space = (adapter->max_frame_size + - sizeof(union e1000_adv_tx_desc) - - ETH_FCS_LEN) * 2; - min_tx_space = ALIGN(min_tx_space, 1024); - min_tx_space >>= 10; - /* software strips receive CRC, so leave room for it */ - min_rx_space = adapter->max_frame_size; - min_rx_space = ALIGN(min_rx_space, 1024); - min_rx_space >>= 10; + min_tx_space = adapter->max_frame_size; + min_tx_space += sizeof(union e1000_adv_tx_desc) - ETH_FCS_LEN; + min_tx_space = DIV_ROUND_UP(min_tx_space, 512); + + /* upper 16 bits has Tx packet buffer allocation size in KB */ + needed_tx_space = min_tx_space - (rd32(E1000_PBA) >> 16); /* If current Tx allocation is less than the min Tx FIFO size, * and the min Tx FIFO size is less than the current Rx FIFO - * allocation, take space away from current Rx allocation + * allocation, take space away from current Rx allocation. */ - if (tx_space < min_tx_space && - ((min_tx_space - tx_space) < pba)) { - pba = pba - (min_tx_space - tx_space); + if (needed_tx_space < pba) { + pba -= needed_tx_space; /* if short on Rx space, Rx wins and must trump Tx * adjustment @@ -1930,18 +1931,20 @@ void igb_reset(struct igb_adapter *adapter) if (pba < min_rx_space) pba = min_rx_space; } + + /* adjust PBA for jumbo frames */ wr32(E1000_PBA, pba); } - /* flow control settings */ - /* The high water mark must be low enough to fit one full frame - * (or the size used for early receive) above it in the Rx FIFO. - * Set it to the lower of: - * - 90% of the Rx FIFO size, or - * - the full Rx FIFO size minus one full frame + /* flow control settings + * The high water mark must be low enough to fit one full frame + * after transmitting the pause frame. As such we must have enough + * space to allow for us to complete our current transmit and then + * receive the frame that is in progress from the link partner. + * Set it to: + * - the full Rx FIFO size minus one full Tx plus one full Rx frame */ - hwm = min(((pba << 10) * 9 / 10), - ((pba << 10) - 2 * adapter->max_frame_size)); + hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE); fc->high_water = hwm & 0xFFFFFFF0; /* 16-byte granularity */ fc->low_water = fc->high_water - 16; @@ -2051,7 +2054,7 @@ static int igb_set_features(struct net_device *netdev, if (changed & NETIF_F_HW_VLAN_CTAG_RX) igb_vlan_mode(netdev, features); - if (!(changed & NETIF_F_RXALL)) + if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE))) return 0; netdev->features = features; @@ -2064,6 +2067,25 @@ static int igb_set_features(struct net_device *netdev, return 0; } +static int igb_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid, + u16 flags) +{ + /* guarantee we can provide a unique filter for the unicast address */ + if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) { + struct igb_adapter *adapter = netdev_priv(dev); + struct e1000_hw *hw = &adapter->hw; + int vfn = adapter->vfs_allocated_count; + int rar_entries = hw->mac.rar_entry_count - (vfn + 1); + + if (netdev_uc_count(dev) >= rar_entries) + return -ENOMEM; + } + + return ndo_dflt_fdb_add(ndm, tb, dev, addr, vid, flags); +} + static const struct net_device_ops igb_netdev_ops = { .ndo_open = igb_open, .ndo_stop = igb_close, @@ -2087,6 +2109,7 @@ static const struct net_device_ops igb_netdev_ops = { #endif .ndo_fix_features = igb_fix_features, .ndo_set_features = igb_set_features, + .ndo_fdb_add = igb_ndo_fdb_add, .ndo_features_check = passthru_features_check, }; @@ -2921,14 +2944,6 @@ void igb_set_flag_queue_pairs(struct igb_adapter *adapter, /* Device supports enough interrupts without queue pairing. */ break; case e1000_82576: - /* If VFs are going to be allocated with RSS queues then we - * should pair the queues in order to conserve interrupts due - * to limited supply. - */ - if ((adapter->rss_queues > 1) && - (adapter->vfs_allocated_count > 6)) - adapter->flags |= IGB_FLAG_QUEUE_PAIRS; - /* fall through */ case e1000_82580: case e1000_i350: case e1000_i354: @@ -2939,6 +2954,8 @@ void igb_set_flag_queue_pairs(struct igb_adapter *adapter, */ if (adapter->rss_queues > (max_rss_queues / 2)) adapter->flags |= IGB_FLAG_QUEUE_PAIRS; + else + adapter->flags &= ~IGB_FLAG_QUEUE_PAIRS; break; } } @@ -3498,7 +3515,7 @@ void igb_setup_rctl(struct igb_adapter *adapter) /* disable store bad packets and clear size bits. */ rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256); - /* enable LPE to prevent packets larger than max_frame_size */ + /* enable LPE to allow for reception of jumbo frames */ rctl |= E1000_RCTL_LPE; /* disable queue 0 to prevent tail write w/o re-config */ @@ -3522,8 +3539,7 @@ void igb_setup_rctl(struct igb_adapter *adapter) E1000_RCTL_BAM | /* RX All Bcast Pkts */ E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */ - rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */ - E1000_RCTL_DPF | /* Allow filtered pause */ + rctl &= ~(E1000_RCTL_DPF | /* Allow filtered pause */ E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */ /* Do not mess with E1000_CTRL_VME, it affects transmit as well, * and that breaks VLANs. @@ -3539,12 +3555,8 @@ static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size, struct e1000_hw *hw = &adapter->hw; u32 vmolr; - /* if it isn't the PF check to see if VFs are enabled and - * increase the size to support vlan tags - */ - if (vfn < adapter->vfs_allocated_count && - adapter->vf_data[vfn].vlans_enabled) - size += VLAN_TAG_SIZE; + if (size > MAX_JUMBO_FRAME_SIZE) + size = MAX_JUMBO_FRAME_SIZE; vmolr = rd32(E1000_VMOLR(vfn)); vmolr &= ~E1000_VMOLR_RLPML_MASK; @@ -3554,32 +3566,6 @@ static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size, return 0; } -/** - * igb_rlpml_set - set maximum receive packet size - * @adapter: board private structure - * - * Configure maximum receivable packet size. - **/ -static void igb_rlpml_set(struct igb_adapter *adapter) -{ - u32 max_frame_size = adapter->max_frame_size; - struct e1000_hw *hw = &adapter->hw; - u16 pf_id = adapter->vfs_allocated_count; - - if (pf_id) { - igb_set_vf_rlpml(adapter, max_frame_size, pf_id); - /* If we're in VMDQ or SR-IOV mode, then set global RLPML - * to our max jumbo frame size, in case we need to enable - * jumbo frames on one of the rings later. - * This will not pass over-length frames into the default - * queue because it's gated by the VMOLR.RLPML. - */ - max_frame_size = MAX_JUMBO_FRAME_SIZE; - } - - wr32(E1000_RLPML, max_frame_size); -} - static inline void igb_set_vmolr(struct igb_adapter *adapter, int vfn, bool aupe) { @@ -3684,9 +3670,6 @@ static void igb_configure_rx(struct igb_adapter *adapter) { int i; - /* set UTA to appropriate mode */ - igb_set_uta(adapter); - /* set the correct pool for the PF default MAC address in entry 0 */ igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0, adapter->vfs_allocated_count); @@ -4004,6 +3987,130 @@ static int igb_write_uc_addr_list(struct net_device *netdev) return count; } +static int igb_vlan_promisc_enable(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 i, pf_id; + + switch (hw->mac.type) { + case e1000_i210: + case e1000_i211: + case e1000_i350: + /* VLAN filtering needed for VLAN prio filter */ + if (adapter->netdev->features & NETIF_F_NTUPLE) + break; + /* fall through */ + case e1000_82576: + case e1000_82580: + case e1000_i354: + /* VLAN filtering needed for pool filtering */ + if (adapter->vfs_allocated_count) + break; + /* fall through */ + default: + return 1; + } + + /* We are already in VLAN promisc, nothing to do */ + if (adapter->flags & IGB_FLAG_VLAN_PROMISC) + return 0; + + if (!adapter->vfs_allocated_count) + goto set_vfta; + + /* Add PF to all active pools */ + pf_id = adapter->vfs_allocated_count + E1000_VLVF_POOLSEL_SHIFT; + + for (i = E1000_VLVF_ARRAY_SIZE; --i;) { + u32 vlvf = rd32(E1000_VLVF(i)); + + vlvf |= 1 << pf_id; + wr32(E1000_VLVF(i), vlvf); + } + +set_vfta: + /* Set all bits in the VLAN filter table array */ + for (i = E1000_VLAN_FILTER_TBL_SIZE; i--;) + hw->mac.ops.write_vfta(hw, i, ~0U); + + /* Set flag so we don't redo unnecessary work */ + adapter->flags |= IGB_FLAG_VLAN_PROMISC; + + return 0; +} + +#define VFTA_BLOCK_SIZE 8 +static void igb_scrub_vfta(struct igb_adapter *adapter, u32 vfta_offset) +{ + struct e1000_hw *hw = &adapter->hw; + u32 vfta[VFTA_BLOCK_SIZE] = { 0 }; + u32 vid_start = vfta_offset * 32; + u32 vid_end = vid_start + (VFTA_BLOCK_SIZE * 32); + u32 i, vid, word, bits, pf_id; + + /* guarantee that we don't scrub out management VLAN */ + vid = adapter->mng_vlan_id; + if (vid >= vid_start && vid < vid_end) + vfta[(vid - vid_start) / 32] |= 1 << (vid % 32); + + if (!adapter->vfs_allocated_count) + goto set_vfta; + + pf_id = adapter->vfs_allocated_count + E1000_VLVF_POOLSEL_SHIFT; + + for (i = E1000_VLVF_ARRAY_SIZE; --i;) { + u32 vlvf = rd32(E1000_VLVF(i)); + + /* pull VLAN ID from VLVF */ + vid = vlvf & VLAN_VID_MASK; + + /* only concern ourselves with a certain range */ + if (vid < vid_start || vid >= vid_end) + continue; + + if (vlvf & E1000_VLVF_VLANID_ENABLE) { + /* record VLAN ID in VFTA */ + vfta[(vid - vid_start) / 32] |= 1 << (vid % 32); + + /* if PF is part of this then continue */ + if (test_bit(vid, adapter->active_vlans)) + continue; + } + + /* remove PF from the pool */ + bits = ~(1 << pf_id); + bits &= rd32(E1000_VLVF(i)); + wr32(E1000_VLVF(i), bits); + } + +set_vfta: + /* extract values from active_vlans and write back to VFTA */ + for (i = VFTA_BLOCK_SIZE; i--;) { + vid = (vfta_offset + i) * 32; + word = vid / BITS_PER_LONG; + bits = vid % BITS_PER_LONG; + + vfta[i] |= adapter->active_vlans[word] >> bits; + + hw->mac.ops.write_vfta(hw, vfta_offset + i, vfta[i]); + } +} + +static void igb_vlan_promisc_disable(struct igb_adapter *adapter) +{ + u32 i; + + /* We are not in VLAN promisc, nothing to do */ + if (!(adapter->flags & IGB_FLAG_VLAN_PROMISC)) + return; + + /* Set flag so we don't redo unnecessary work */ + adapter->flags &= ~IGB_FLAG_VLAN_PROMISC; + + for (i = 0; i < E1000_VLAN_FILTER_TBL_SIZE; i += VFTA_BLOCK_SIZE) + igb_scrub_vfta(adapter, i); +} + /** * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set * @netdev: network interface device structure @@ -4018,21 +4125,17 @@ static void igb_set_rx_mode(struct net_device *netdev) struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; unsigned int vfn = adapter->vfs_allocated_count; - u32 rctl, vmolr = 0; + u32 rctl = 0, vmolr = 0; int count; /* Check for Promiscuous and All Multicast modes */ - rctl = rd32(E1000_RCTL); - - /* clear the effected bits */ - rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE); - if (netdev->flags & IFF_PROMISC) { - /* retain VLAN HW filtering if in VT mode */ - if (adapter->vfs_allocated_count) - rctl |= E1000_RCTL_VFE; - rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); - vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME); + rctl |= E1000_RCTL_UPE | E1000_RCTL_MPE; + vmolr |= E1000_VMOLR_MPME; + + /* enable use of UTA filter to force packets to default pool */ + if (hw->mac.type == e1000_82576) + vmolr |= E1000_VMOLR_ROPE; } else { if (netdev->flags & IFF_ALLMULTI) { rctl |= E1000_RCTL_MPE; @@ -4050,17 +4153,34 @@ static void igb_set_rx_mode(struct net_device *netdev) vmolr |= E1000_VMOLR_ROMPE; } } - /* Write addresses to available RAR registers, if there is not - * sufficient space to store all the addresses then enable - * unicast promiscuous mode - */ - count = igb_write_uc_addr_list(netdev); - if (count < 0) { - rctl |= E1000_RCTL_UPE; - vmolr |= E1000_VMOLR_ROPE; - } - rctl |= E1000_RCTL_VFE; } + + /* Write addresses to available RAR registers, if there is not + * sufficient space to store all the addresses then enable + * unicast promiscuous mode + */ + count = igb_write_uc_addr_list(netdev); + if (count < 0) { + rctl |= E1000_RCTL_UPE; + vmolr |= E1000_VMOLR_ROPE; + } + + /* enable VLAN filtering by default */ + rctl |= E1000_RCTL_VFE; + + /* disable VLAN filtering for modes that require it */ + if ((netdev->flags & IFF_PROMISC) || + (netdev->features & NETIF_F_RXALL)) { + /* if we fail to set all rules then just clear VFE */ + if (igb_vlan_promisc_enable(adapter)) + rctl &= ~E1000_RCTL_VFE; + } else { + igb_vlan_promisc_disable(adapter); + } + + /* update state of unicast, multicast, and VLAN filtering modes */ + rctl |= rd32(E1000_RCTL) & ~(E1000_RCTL_UPE | E1000_RCTL_MPE | + E1000_RCTL_VFE); wr32(E1000_RCTL, rctl); /* In order to support SR-IOV and eventually VMDq it is necessary to set @@ -4071,9 +4191,19 @@ static void igb_set_rx_mode(struct net_device *netdev) if ((hw->mac.type < e1000_82576) || (hw->mac.type > e1000_i350)) return; + /* set UTA to appropriate mode */ + igb_set_uta(adapter, !!(vmolr & E1000_VMOLR_ROPE)); + vmolr |= rd32(E1000_VMOLR(vfn)) & ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE); + + /* enable Rx jumbo frames, no need for restriction */ + vmolr &= ~E1000_VMOLR_RLPML_MASK; + vmolr |= MAX_JUMBO_FRAME_SIZE | E1000_VMOLR_LPE; + wr32(E1000_VMOLR(vfn), vmolr); + wr32(E1000_RLPML, MAX_JUMBO_FRAME_SIZE); + igb_restore_vf_multicasts(adapter); } @@ -5088,16 +5218,6 @@ static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, { struct igb_adapter *adapter = netdev_priv(netdev); - if (test_bit(__IGB_DOWN, &adapter->state)) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } - - if (skb->len <= 0) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } - /* The minimum packet size with TCTL.PSP set is 17 so pad the skb * in order to meet this minimum size requirement. */ @@ -5792,125 +5912,132 @@ static void igb_restore_vf_multicasts(struct igb_adapter *adapter) static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf) { struct e1000_hw *hw = &adapter->hw; - u32 pool_mask, reg, vid; - int i; + u32 pool_mask, vlvf_mask, i; - pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); + /* create mask for VF and other pools */ + pool_mask = E1000_VLVF_POOLSEL_MASK; + vlvf_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); + + /* drop PF from pool bits */ + pool_mask &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + + adapter->vfs_allocated_count)); /* Find the vlan filter for this id */ - for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { - reg = rd32(E1000_VLVF(i)); + for (i = E1000_VLVF_ARRAY_SIZE; i--;) { + u32 vlvf = rd32(E1000_VLVF(i)); + u32 vfta_mask, vid, vfta; /* remove the vf from the pool */ - reg &= ~pool_mask; - - /* if pool is empty then remove entry from vfta */ - if (!(reg & E1000_VLVF_POOLSEL_MASK) && - (reg & E1000_VLVF_VLANID_ENABLE)) { - reg = 0; - vid = reg & E1000_VLVF_VLANID_MASK; - igb_vfta_set(hw, vid, false); - } + if (!(vlvf & vlvf_mask)) + continue; + + /* clear out bit from VLVF */ + vlvf ^= vlvf_mask; + + /* if other pools are present, just remove ourselves */ + if (vlvf & pool_mask) + goto update_vlvfb; + + /* if PF is present, leave VFTA */ + if (vlvf & E1000_VLVF_POOLSEL_MASK) + goto update_vlvf; + + vid = vlvf & E1000_VLVF_VLANID_MASK; + vfta_mask = 1 << (vid % 32); - wr32(E1000_VLVF(i), reg); + /* clear bit from VFTA */ + vfta = adapter->shadow_vfta[vid / 32]; + if (vfta & vfta_mask) + hw->mac.ops.write_vfta(hw, vid / 32, vfta ^ vfta_mask); +update_vlvf: + /* clear pool selection enable */ + if (adapter->flags & IGB_FLAG_VLAN_PROMISC) + vlvf &= E1000_VLVF_POOLSEL_MASK; + else + vlvf = 0; +update_vlvfb: + /* clear pool bits */ + wr32(E1000_VLVF(i), vlvf); } +} + +static int igb_find_vlvf_entry(struct e1000_hw *hw, u32 vlan) +{ + u32 vlvf; + int idx; - adapter->vf_data[vf].vlans_enabled = 0; + /* short cut the special case */ + if (vlan == 0) + return 0; + + /* Search for the VLAN id in the VLVF entries */ + for (idx = E1000_VLVF_ARRAY_SIZE; --idx;) { + vlvf = rd32(E1000_VLVF(idx)); + if ((vlvf & VLAN_VID_MASK) == vlan) + break; + } + + return idx; } -static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf) +void igb_update_pf_vlvf(struct igb_adapter *adapter, u32 vid) { struct e1000_hw *hw = &adapter->hw; - u32 reg, i; - - /* The vlvf table only exists on 82576 hardware and newer */ - if (hw->mac.type < e1000_82576) - return -1; + u32 bits, pf_id; + int idx; - /* we only need to do this if VMDq is enabled */ - if (!adapter->vfs_allocated_count) - return -1; + idx = igb_find_vlvf_entry(hw, vid); + if (!idx) + return; - /* Find the vlan filter for this id */ - for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { - reg = rd32(E1000_VLVF(i)); - if ((reg & E1000_VLVF_VLANID_ENABLE) && - vid == (reg & E1000_VLVF_VLANID_MASK)) - break; + /* See if any other pools are set for this VLAN filter + * entry other than the PF. + */ + pf_id = adapter->vfs_allocated_count + E1000_VLVF_POOLSEL_SHIFT; + bits = ~(1 << pf_id) & E1000_VLVF_POOLSEL_MASK; + bits &= rd32(E1000_VLVF(idx)); + + /* Disable the filter so this falls into the default pool. */ + if (!bits) { + if (adapter->flags & IGB_FLAG_VLAN_PROMISC) + wr32(E1000_VLVF(idx), 1 << pf_id); + else + wr32(E1000_VLVF(idx), 0); } +} - if (add) { - if (i == E1000_VLVF_ARRAY_SIZE) { - /* Did not find a matching VLAN ID entry that was - * enabled. Search for a free filter entry, i.e. - * one without the enable bit set - */ - for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { - reg = rd32(E1000_VLVF(i)); - if (!(reg & E1000_VLVF_VLANID_ENABLE)) - break; - } - } - if (i < E1000_VLVF_ARRAY_SIZE) { - /* Found an enabled/available entry */ - reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); - - /* if !enabled we need to set this up in vfta */ - if (!(reg & E1000_VLVF_VLANID_ENABLE)) { - /* add VID to filter table */ - igb_vfta_set(hw, vid, true); - reg |= E1000_VLVF_VLANID_ENABLE; - } - reg &= ~E1000_VLVF_VLANID_MASK; - reg |= vid; - wr32(E1000_VLVF(i), reg); - - /* do not modify RLPML for PF devices */ - if (vf >= adapter->vfs_allocated_count) - return 0; - - if (!adapter->vf_data[vf].vlans_enabled) { - u32 size; - - reg = rd32(E1000_VMOLR(vf)); - size = reg & E1000_VMOLR_RLPML_MASK; - size += 4; - reg &= ~E1000_VMOLR_RLPML_MASK; - reg |= size; - wr32(E1000_VMOLR(vf), reg); - } +static s32 igb_set_vf_vlan(struct igb_adapter *adapter, u32 vid, + bool add, u32 vf) +{ + int pf_id = adapter->vfs_allocated_count; + struct e1000_hw *hw = &adapter->hw; + int err; - adapter->vf_data[vf].vlans_enabled++; - } - } else { - if (i < E1000_VLVF_ARRAY_SIZE) { - /* remove vf from the pool */ - reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf)); - /* if pool is empty then remove entry from vfta */ - if (!(reg & E1000_VLVF_POOLSEL_MASK)) { - reg = 0; - igb_vfta_set(hw, vid, false); - } - wr32(E1000_VLVF(i), reg); - - /* do not modify RLPML for PF devices */ - if (vf >= adapter->vfs_allocated_count) - return 0; - - adapter->vf_data[vf].vlans_enabled--; - if (!adapter->vf_data[vf].vlans_enabled) { - u32 size; - - reg = rd32(E1000_VMOLR(vf)); - size = reg & E1000_VMOLR_RLPML_MASK; - size -= 4; - reg &= ~E1000_VMOLR_RLPML_MASK; - reg |= size; - wr32(E1000_VMOLR(vf), reg); - } - } + /* If VLAN overlaps with one the PF is currently monitoring make + * sure that we are able to allocate a VLVF entry. This may be + * redundant but it guarantees PF will maintain visibility to + * the VLAN. + */ + if (add && test_bit(vid, adapter->active_vlans)) { + err = igb_vfta_set(hw, vid, pf_id, true, false); + if (err) + return err; } - return 0; + + err = igb_vfta_set(hw, vid, vf, add, false); + + if (add && !err) + return err; + + /* If we failed to add the VF VLAN or we are removing the VF VLAN + * we may need to drop the PF pool bit in order to allow us to free + * up the VLVF resources. + */ + if (test_bit(vid, adapter->active_vlans) || + (adapter->flags & IGB_FLAG_VLAN_PROMISC)) + igb_update_pf_vlvf(adapter, vid); + + return err; } static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf) @@ -5923,130 +6050,97 @@ static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf) wr32(E1000_VMVIR(vf), 0); } -static int igb_ndo_set_vf_vlan(struct net_device *netdev, - int vf, u16 vlan, u8 qos) +static int igb_enable_port_vlan(struct igb_adapter *adapter, int vf, + u16 vlan, u8 qos) { - int err = 0; - struct igb_adapter *adapter = netdev_priv(netdev); + int err; - if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7)) - return -EINVAL; - if (vlan || qos) { - err = igb_vlvf_set(adapter, vlan, !!vlan, vf); - if (err) - goto out; - igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf); - igb_set_vmolr(adapter, vf, !vlan); - adapter->vf_data[vf].pf_vlan = vlan; - adapter->vf_data[vf].pf_qos = qos; - dev_info(&adapter->pdev->dev, - "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf); - if (test_bit(__IGB_DOWN, &adapter->state)) { - dev_warn(&adapter->pdev->dev, - "The VF VLAN has been set, but the PF device is not up.\n"); - dev_warn(&adapter->pdev->dev, - "Bring the PF device up before attempting to use the VF device.\n"); - } - } else { - igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan, - false, vf); - igb_set_vmvir(adapter, vlan, vf); - igb_set_vmolr(adapter, vf, true); - adapter->vf_data[vf].pf_vlan = 0; - adapter->vf_data[vf].pf_qos = 0; + err = igb_set_vf_vlan(adapter, vlan, true, vf); + if (err) + return err; + + igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf); + igb_set_vmolr(adapter, vf, !vlan); + + /* revoke access to previous VLAN */ + if (vlan != adapter->vf_data[vf].pf_vlan) + igb_set_vf_vlan(adapter, adapter->vf_data[vf].pf_vlan, + false, vf); + + adapter->vf_data[vf].pf_vlan = vlan; + adapter->vf_data[vf].pf_qos = qos; + dev_info(&adapter->pdev->dev, + "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf); + if (test_bit(__IGB_DOWN, &adapter->state)) { + dev_warn(&adapter->pdev->dev, + "The VF VLAN has been set, but the PF device is not up.\n"); + dev_warn(&adapter->pdev->dev, + "Bring the PF device up before attempting to use the VF device.\n"); } -out: + return err; } -static int igb_find_vlvf_entry(struct igb_adapter *adapter, int vid) +static int igb_disable_port_vlan(struct igb_adapter *adapter, int vf) { - struct e1000_hw *hw = &adapter->hw; - int i; - u32 reg; + /* Restore tagless access via VLAN 0 */ + igb_set_vf_vlan(adapter, 0, true, vf); - /* Find the vlan filter for this id */ - for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { - reg = rd32(E1000_VLVF(i)); - if ((reg & E1000_VLVF_VLANID_ENABLE) && - vid == (reg & E1000_VLVF_VLANID_MASK)) - break; - } + igb_set_vmvir(adapter, 0, vf); + igb_set_vmolr(adapter, vf, true); - if (i >= E1000_VLVF_ARRAY_SIZE) - i = -1; + /* Remove any PF assigned VLAN */ + if (adapter->vf_data[vf].pf_vlan) + igb_set_vf_vlan(adapter, adapter->vf_data[vf].pf_vlan, + false, vf); + + adapter->vf_data[vf].pf_vlan = 0; + adapter->vf_data[vf].pf_qos = 0; - return i; + return 0; } -static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) +static int igb_ndo_set_vf_vlan(struct net_device *netdev, + int vf, u16 vlan, u8 qos) { - struct e1000_hw *hw = &adapter->hw; - int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; - int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK); - int err = 0; + struct igb_adapter *adapter = netdev_priv(netdev); - /* If in promiscuous mode we need to make sure the PF also has - * the VLAN filter set. - */ - if (add && (adapter->netdev->flags & IFF_PROMISC)) - err = igb_vlvf_set(adapter, vid, add, - adapter->vfs_allocated_count); - if (err) - goto out; + if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7)) + return -EINVAL; - err = igb_vlvf_set(adapter, vid, add, vf); + return (vlan || qos) ? igb_enable_port_vlan(adapter, vf, vlan, qos) : + igb_disable_port_vlan(adapter, vf); +} - if (err) - goto out; +static int igb_set_vf_vlan_msg(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) +{ + int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; + int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK); - /* Go through all the checks to see if the VLAN filter should - * be wiped completely. - */ - if (!add && (adapter->netdev->flags & IFF_PROMISC)) { - u32 vlvf, bits; - int regndx = igb_find_vlvf_entry(adapter, vid); - - if (regndx < 0) - goto out; - /* See if any other pools are set for this VLAN filter - * entry other than the PF. - */ - vlvf = bits = rd32(E1000_VLVF(regndx)); - bits &= 1 << (E1000_VLVF_POOLSEL_SHIFT + - adapter->vfs_allocated_count); - /* If the filter was removed then ensure PF pool bit - * is cleared if the PF only added itself to the pool - * because the PF is in promiscuous mode. - */ - if ((vlvf & VLAN_VID_MASK) == vid && - !test_bit(vid, adapter->active_vlans) && - !bits) - igb_vlvf_set(adapter, vid, add, - adapter->vfs_allocated_count); - } + if (adapter->vf_data[vf].pf_vlan) + return -1; -out: - return err; + /* VLAN 0 is a special case, don't allow it to be removed */ + if (!vid && !add) + return 0; + + return igb_set_vf_vlan(adapter, vid, !!add, vf); } static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf) { - /* clear flags - except flag that indicates PF has set the MAC */ - adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC; - adapter->vf_data[vf].last_nack = jiffies; + struct vf_data_storage *vf_data = &adapter->vf_data[vf]; - /* reset offloads to defaults */ - igb_set_vmolr(adapter, vf, true); + /* clear flags - except flag that indicates PF has set the MAC */ + vf_data->flags &= IGB_VF_FLAG_PF_SET_MAC; + vf_data->last_nack = jiffies; /* reset vlans for device */ igb_clear_vf_vfta(adapter, vf); - if (adapter->vf_data[vf].pf_vlan) - igb_ndo_set_vf_vlan(adapter->netdev, vf, - adapter->vf_data[vf].pf_vlan, - adapter->vf_data[vf].pf_qos); - else - igb_clear_vf_vfta(adapter, vf); + igb_set_vf_vlan(adapter, vf_data->pf_vlan, true, vf); + igb_set_vmvir(adapter, vf_data->pf_vlan | + (vf_data->pf_qos << VLAN_PRIO_SHIFT), vf); + igb_set_vmolr(adapter, vf, !vf_data->pf_vlan); /* reset multicast table array for vf */ adapter->vf_data[vf].num_vf_mc_hashes = 0; @@ -6191,7 +6285,7 @@ static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf) "VF %d attempted to override administratively set VLAN tag\nReload the VF driver to resume operations\n", vf); else - retval = igb_set_vf_vlan(adapter, msgbuf, vf); + retval = igb_set_vf_vlan_msg(adapter, msgbuf, vf); break; default: dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]); @@ -6233,6 +6327,7 @@ static void igb_msg_task(struct igb_adapter *adapter) /** * igb_set_uta - Set unicast filter table address * @adapter: board private structure + * @set: boolean indicating if we are setting or clearing bits * * The unicast table address is a register array of 32-bit registers. * The table is meant to be used in a way similar to how the MTA is used @@ -6240,21 +6335,18 @@ static void igb_msg_task(struct igb_adapter *adapter) * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous * enable bit to allow vlan tag stripping when promiscuous mode is enabled **/ -static void igb_set_uta(struct igb_adapter *adapter) +static void igb_set_uta(struct igb_adapter *adapter, bool set) { struct e1000_hw *hw = &adapter->hw; + u32 uta = set ? ~0 : 0; int i; - /* The UTA table only exists on 82576 hardware and newer */ - if (hw->mac.type < e1000_82576) - return; - /* we only need to do this if VMDq is enabled */ if (!adapter->vfs_allocated_count) return; - for (i = 0; i < hw->mac.uta_reg_count; i++) - array_wr32(E1000_UTA, i, ~0); + for (i = hw->mac.uta_reg_count; i--;) + array_wr32(E1000_UTA, i, uta); } /** @@ -7201,8 +7293,6 @@ static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features) ctrl &= ~E1000_CTRL_VME; wr32(E1000_CTRL, ctrl); } - - igb_rlpml_set(adapter); } static int igb_vlan_rx_add_vid(struct net_device *netdev, @@ -7212,11 +7302,9 @@ static int igb_vlan_rx_add_vid(struct net_device *netdev, struct e1000_hw *hw = &adapter->hw; int pf_id = adapter->vfs_allocated_count; - /* attempt to add filter to vlvf array */ - igb_vlvf_set(adapter, vid, true, pf_id); - /* add the filter since PF can receive vlans w/o entry in vlvf */ - igb_vfta_set(hw, vid, true); + if (!vid || !(adapter->flags & IGB_FLAG_VLAN_PROMISC)) + igb_vfta_set(hw, vid, pf_id, true, !!vid); set_bit(vid, adapter->active_vlans); @@ -7227,16 +7315,12 @@ static int igb_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; int pf_id = adapter->vfs_allocated_count; - s32 err; - - /* remove vlan from VLVF table array */ - err = igb_vlvf_set(adapter, vid, false, pf_id); + struct e1000_hw *hw = &adapter->hw; - /* if vid was not present in VLVF just remove it from table */ - if (err) - igb_vfta_set(hw, vid, false); + /* remove VID from filter table */ + if (vid && !(adapter->flags & IGB_FLAG_VLAN_PROMISC)) + igb_vfta_set(hw, vid, pf_id, false, true); clear_bit(vid, adapter->active_vlans); @@ -7245,11 +7329,12 @@ static int igb_vlan_rx_kill_vid(struct net_device *netdev, static void igb_restore_vlan(struct igb_adapter *adapter) { - u16 vid; + u16 vid = 1; igb_vlan_mode(adapter->netdev, adapter->netdev->features); + igb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), 0); - for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) + for_each_set_bit_from(vid, adapter->active_vlans, VLAN_N_VID) igb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid); } @@ -7704,15 +7789,14 @@ static void igb_io_resume(struct pci_dev *pdev) static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index, u8 qsel) { - u32 rar_low, rar_high; struct e1000_hw *hw = &adapter->hw; + u32 rar_low, rar_high; /* HW expects these in little endian so we reverse the byte order - * from network order (big endian) to little endian + * from network order (big endian) to CPU endian */ - rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) | - ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); - rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); + rar_low = le32_to_cpup((__be32 *)(addr)); + rar_high = le16_to_cpup((__be16 *)(addr + 4)); /* Indicate to hardware the Address is Valid. */ rar_high |= E1000_RAH_AV; @@ -7959,9 +8043,7 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) * than the Rx threshold. Set hwm to PBA - max frame * size in 16B units, capping it at PBA - 6KB. */ - hwm = 64 * pba - adapter->max_frame_size / 16; - if (hwm < 64 * (pba - 6)) - hwm = 64 * (pba - 6); + hwm = 64 * (pba - 6); reg = rd32(E1000_FCRTC); reg &= ~E1000_FCRTC_RTH_COAL_MASK; reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) @@ -7971,9 +8053,7 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) /* Set the DMA Coalescing Rx threshold to PBA - 2 * max * frame size, capping it at PBA - 10KB. */ - dmac_thr = pba - adapter->max_frame_size / 512; - if (dmac_thr < pba - 10) - dmac_thr = pba - 10; + dmac_thr = pba - 10; reg = rd32(E1000_DMACR); reg &= ~E1000_DMACR_DMACTHR_MASK; reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT) diff --git a/drivers/net/ethernet/intel/igbvf/mbx.c b/drivers/net/ethernet/intel/igbvf/mbx.c index 7b6cb4c3764c..01752f44ace2 100644 --- a/drivers/net/ethernet/intel/igbvf/mbx.c +++ b/drivers/net/ethernet/intel/igbvf/mbx.c @@ -234,13 +234,19 @@ static s32 e1000_check_for_rst_vf(struct e1000_hw *hw) static s32 e1000_obtain_mbx_lock_vf(struct e1000_hw *hw) { s32 ret_val = -E1000_ERR_MBX; - - /* Take ownership of the buffer */ - ew32(V2PMAILBOX(0), E1000_V2PMAILBOX_VFU); - - /* reserve mailbox for VF use */ - if (e1000_read_v2p_mailbox(hw) & E1000_V2PMAILBOX_VFU) - ret_val = E1000_SUCCESS; + int count = 10; + + do { + /* Take ownership of the buffer */ + ew32(V2PMAILBOX(0), E1000_V2PMAILBOX_VFU); + + /* reserve mailbox for VF use */ + if (e1000_read_v2p_mailbox(hw) & E1000_V2PMAILBOX_VFU) { + ret_val = 0; + break; + } + udelay(1000); + } while (count-- > 0); return ret_val; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 4b9156cd8b93..84fa28ceb200 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -796,6 +796,10 @@ struct ixgbe_adapter { u8 default_up; unsigned long fwd_bitmask; /* Bitmask indicating in use pools */ +#define IXGBE_MAX_LINK_HANDLE 10 + struct ixgbe_mat_field *jump_tables[IXGBE_MAX_LINK_HANDLE]; + unsigned long tables; + /* maximum number of RETA entries among all devices supported by ixgbe * driver: currently it's x550 device in non-SRIOV mode */ @@ -925,6 +929,9 @@ s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw, u16 soft_id); void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input, union ixgbe_atr_input *mask); +int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter, + struct ixgbe_fdir_filter *input, + u16 sw_idx); void ixgbe_set_rx_mode(struct net_device *netdev); #ifdef CONFIG_IXGBE_DCB void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index bea96b3bc90c..726e0eeee63b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -2520,9 +2520,9 @@ static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, return ret; } -static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter, - struct ixgbe_fdir_filter *input, - u16 sw_idx) +int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter, + struct ixgbe_fdir_filter *input, + u16 sw_idx) { struct ixgbe_hw *hw = &adapter->hw; struct hlist_node *node2; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index c4003a88bbf6..cf4b729c92d7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -51,6 +51,8 @@ #include <linux/prefetch.h> #include <scsi/fc/fc_fcoe.h> #include <net/vxlan.h> +#include <net/pkt_cls.h> +#include <net/tc_act/tc_gact.h> #ifdef CONFIG_OF #include <linux/of_net.h> @@ -65,6 +67,7 @@ #include "ixgbe_common.h" #include "ixgbe_dcb_82599.h" #include "ixgbe_sriov.h" +#include "ixgbe_model.h" char ixgbe_driver_name[] = "ixgbe"; static const char ixgbe_driver_string[] = @@ -1089,7 +1092,7 @@ static void ixgbe_tx_timeout_reset(struct ixgbe_adapter *adapter) * @tx_ring: tx ring to clean **/ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, - struct ixgbe_ring *tx_ring) + struct ixgbe_ring *tx_ring, int napi_budget) { struct ixgbe_adapter *adapter = q_vector->adapter; struct ixgbe_tx_buffer *tx_buffer; @@ -1127,7 +1130,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, total_packets += tx_buffer->gso_segs; /* free the skb */ - dev_consume_skb_any(tx_buffer->skb); + napi_consume_skb(tx_buffer->skb, napi_budget); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -2784,7 +2787,7 @@ int ixgbe_poll(struct napi_struct *napi, int budget) #endif ixgbe_for_each_ring(ring, q_vector->tx) - clean_complete &= !!ixgbe_clean_tx_irq(q_vector, ring); + clean_complete &= !!ixgbe_clean_tx_irq(q_vector, ring, budget); /* Exit if we are called by netpoll or busy polling is active */ if ((budget <= 0) || !ixgbe_qv_lock_napi(q_vector)) @@ -5545,6 +5548,9 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter) #endif /* CONFIG_IXGBE_DCB */ #endif /* IXGBE_FCOE */ + /* initialize static ixgbe jump table entries */ + adapter->jump_tables[0] = ixgbe_ipv4_fields; + adapter->mac_table = kzalloc(sizeof(struct ixgbe_mac_addr) * hw->mac.num_rar_entries, GFP_ATOMIC); @@ -8200,6 +8206,228 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc) return 0; } +static int ixgbe_delete_clsu32(struct ixgbe_adapter *adapter, + struct tc_cls_u32_offload *cls) +{ + int err; + + spin_lock(&adapter->fdir_perfect_lock); + err = ixgbe_update_ethtool_fdir_entry(adapter, NULL, cls->knode.handle); + spin_unlock(&adapter->fdir_perfect_lock); + return err; +} + +static int ixgbe_configure_clsu32_add_hnode(struct ixgbe_adapter *adapter, + __be16 protocol, + struct tc_cls_u32_offload *cls) +{ + /* This ixgbe devices do not support hash tables at the moment + * so abort when given hash tables. + */ + if (cls->hnode.divisor > 0) + return -EINVAL; + + set_bit(TC_U32_USERHTID(cls->hnode.handle), &adapter->tables); + return 0; +} + +static int ixgbe_configure_clsu32_del_hnode(struct ixgbe_adapter *adapter, + struct tc_cls_u32_offload *cls) +{ + clear_bit(TC_U32_USERHTID(cls->hnode.handle), &adapter->tables); + return 0; +} + +static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter, + __be16 protocol, + struct tc_cls_u32_offload *cls) +{ + u32 loc = cls->knode.handle & 0xfffff; + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_mat_field *field_ptr; + struct ixgbe_fdir_filter *input; + union ixgbe_atr_input mask; +#ifdef CONFIG_NET_CLS_ACT + const struct tc_action *a; +#endif + int i, err = 0; + u8 queue; + u32 handle; + + memset(&mask, 0, sizeof(union ixgbe_atr_input)); + handle = cls->knode.handle; + + /* At the moment cls_u32 jumps to transport layer and skips past + * L2 headers. The canonical method to match L2 frames is to use + * negative values. However this is error prone at best but really + * just broken because there is no way to "know" what sort of hdr + * is in front of the transport layer. Fix cls_u32 to support L2 + * headers when needed. + */ + if (protocol != htons(ETH_P_IP)) + return -EINVAL; + + if (cls->knode.link_handle || + cls->knode.link_handle >= IXGBE_MAX_LINK_HANDLE) { + struct ixgbe_nexthdr *nexthdr = ixgbe_ipv4_jumps; + u32 uhtid = TC_U32_USERHTID(cls->knode.link_handle); + + if (!test_bit(uhtid, &adapter->tables)) + return -EINVAL; + + for (i = 0; nexthdr[i].jump; i++) { + if (nexthdr->o != cls->knode.sel->offoff || + nexthdr->s != cls->knode.sel->offshift || + nexthdr->m != cls->knode.sel->offmask || + /* do not support multiple key jumps its just mad */ + cls->knode.sel->nkeys > 1) + return -EINVAL; + + if (nexthdr->off != cls->knode.sel->keys[0].off || + nexthdr->val != cls->knode.sel->keys[0].val || + nexthdr->mask != cls->knode.sel->keys[0].mask) + return -EINVAL; + + if (uhtid >= IXGBE_MAX_LINK_HANDLE) + return -EINVAL; + + adapter->jump_tables[uhtid] = nexthdr->jump; + } + return 0; + } + + if (loc >= ((1024 << adapter->fdir_pballoc) - 2)) { + e_err(drv, "Location out of range\n"); + return -EINVAL; + } + + /* cls u32 is a graph starting at root node 0x800. The driver tracks + * links and also the fields used to advance the parser across each + * link (e.g. nexthdr/eat parameters from 'tc'). This way we can map + * the u32 graph onto the hardware parse graph denoted in ixgbe_model.h + * To add support for new nodes update ixgbe_model.h parse structures + * this function _should_ be generic try not to hardcode values here. + */ + if (TC_U32_USERHTID(handle) == 0x800) { + field_ptr = adapter->jump_tables[0]; + } else { + if (TC_U32_USERHTID(handle) >= ARRAY_SIZE(adapter->jump_tables)) + return -EINVAL; + + field_ptr = adapter->jump_tables[TC_U32_USERHTID(handle)]; + } + + if (!field_ptr) + return -EINVAL; + + input = kzalloc(sizeof(*input), GFP_KERNEL); + if (!input) + return -ENOMEM; + + for (i = 0; i < cls->knode.sel->nkeys; i++) { + int off = cls->knode.sel->keys[i].off; + __be32 val = cls->knode.sel->keys[i].val; + __be32 m = cls->knode.sel->keys[i].mask; + bool found_entry = false; + int j; + + for (j = 0; field_ptr[j].val; j++) { + if (field_ptr[j].off == off && + field_ptr[j].mask == m) { + field_ptr[j].val(input, &mask, val, m); + input->filter.formatted.flow_type |= + field_ptr[j].type; + found_entry = true; + break; + } + } + + if (!found_entry) + goto err_out; + } + + mask.formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK | + IXGBE_ATR_L4TYPE_MASK; + + if (input->filter.formatted.flow_type == IXGBE_ATR_FLOW_TYPE_IPV4) + mask.formatted.flow_type &= IXGBE_ATR_L4TYPE_IPV6_MASK; + +#ifdef CONFIG_NET_CLS_ACT + if (list_empty(&cls->knode.exts->actions)) + goto err_out; + + list_for_each_entry(a, &cls->knode.exts->actions, list) { + if (!is_tcf_gact_shot(a)) + goto err_out; + } +#endif + + input->action = IXGBE_FDIR_DROP_QUEUE; + queue = IXGBE_FDIR_DROP_QUEUE; + input->sw_idx = loc; + + spin_lock(&adapter->fdir_perfect_lock); + + if (hlist_empty(&adapter->fdir_filter_list)) { + memcpy(&adapter->fdir_mask, &mask, sizeof(mask)); + err = ixgbe_fdir_set_input_mask_82599(hw, &mask); + if (err) + goto err_out_w_lock; + } else if (memcmp(&adapter->fdir_mask, &mask, sizeof(mask))) { + err = -EINVAL; + goto err_out_w_lock; + } + + ixgbe_atr_compute_perfect_hash_82599(&input->filter, &mask); + err = ixgbe_fdir_write_perfect_filter_82599(hw, &input->filter, + input->sw_idx, queue); + if (!err) + ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx); + spin_unlock(&adapter->fdir_perfect_lock); + + return err; +err_out_w_lock: + spin_unlock(&adapter->fdir_perfect_lock); +err_out: + kfree(input); + return -EINVAL; +} + +int __ixgbe_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +{ + struct ixgbe_adapter *adapter = netdev_priv(dev); + + if (TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS) && + tc->type == TC_SETUP_CLSU32) { + if (!(dev->features & NETIF_F_HW_TC)) + return -EINVAL; + + switch (tc->cls_u32->command) { + case TC_CLSU32_NEW_KNODE: + case TC_CLSU32_REPLACE_KNODE: + return ixgbe_configure_clsu32(adapter, + proto, tc->cls_u32); + case TC_CLSU32_DELETE_KNODE: + return ixgbe_delete_clsu32(adapter, tc->cls_u32); + case TC_CLSU32_NEW_HNODE: + case TC_CLSU32_REPLACE_HNODE: + return ixgbe_configure_clsu32_add_hnode(adapter, proto, + tc->cls_u32); + case TC_CLSU32_DELETE_HNODE: + return ixgbe_configure_clsu32_del_hnode(adapter, + tc->cls_u32); + default: + return -EINVAL; + } + } + + if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO) + return -EINVAL; + + return ixgbe_setup_tc(dev, tc->tc); +} + #ifdef CONFIG_PCI_IOV void ixgbe_sriov_reinit(struct ixgbe_adapter *adapter) { @@ -8262,19 +8490,17 @@ static int ixgbe_set_features(struct net_device *netdev, } /* - * Check if Flow Director n-tuple support was enabled or disabled. If - * the state changed, we need to reset. + * Check if Flow Director n-tuple support or hw_tc support was + * enabled or disabled. If the state changed, we need to reset. */ - switch (features & NETIF_F_NTUPLE) { - case NETIF_F_NTUPLE: + if ((features & NETIF_F_NTUPLE) || (features & NETIF_F_HW_TC)) { /* turn off ATR, enable perfect filters and reset */ if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) need_reset = true; adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; adapter->flags |= IXGBE_FLAG_FDIR_PERFECT_CAPABLE; - break; - default: + } else { /* turn off perfect filters, enable ATR and reset */ if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) need_reset = true; @@ -8282,23 +8508,16 @@ static int ixgbe_set_features(struct net_device *netdev, adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE; /* We cannot enable ATR if SR-IOV is enabled */ - if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) - break; - - /* We cannot enable ATR if we have 2 or more traffic classes */ - if (netdev_get_num_tc(netdev) > 1) - break; - - /* We cannot enable ATR if RSS is disabled */ - if (adapter->ring_feature[RING_F_RSS].limit <= 1) - break; - - /* A sample rate of 0 indicates ATR disabled */ - if (!adapter->atr_sample_rate) - break; - - adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE; - break; + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED || + /* We cannot enable ATR if we have 2 or more tcs */ + (netdev_get_num_tc(netdev) > 1) || + /* We cannot enable ATR if RSS is disabled */ + (adapter->ring_feature[RING_F_RSS].limit <= 1) || + /* A sample rate of 0 indicates ATR disabled */ + (!adapter->atr_sample_rate)) + ; /* do nothing not supported */ + else /* otherwise supported and set the flag */ + adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE; } if (features & NETIF_F_HW_VLAN_CTAG_RX) @@ -8657,9 +8876,7 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_set_vf_trust = ixgbe_ndo_set_vf_trust, .ndo_get_vf_config = ixgbe_ndo_get_vf_config, .ndo_get_stats64 = ixgbe_get_stats64, -#ifdef CONFIG_IXGBE_DCB - .ndo_setup_tc = ixgbe_setup_tc, -#endif + .ndo_setup_tc = __ixgbe_setup_tc, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ixgbe_netpoll, #endif @@ -9030,7 +9247,8 @@ skip_sriov: case ixgbe_mac_X550EM_x: netdev->features |= NETIF_F_SCTP_CRC; netdev->hw_features |= NETIF_F_SCTP_CRC | - NETIF_F_NTUPLE; + NETIF_F_NTUPLE | + NETIF_F_HW_TC; break; default: break; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h new file mode 100644 index 000000000000..ce48872d4782 --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h @@ -0,0 +1,112 @@ +/******************************************************************************* + * + * Intel 10 Gigabit PCI Express Linux drive + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see <http://www.gnu.org/licenses/>. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Contact Information: + * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> + * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + * + ******************************************************************************/ + +#ifndef _IXGBE_MODEL_H_ +#define _IXGBE_MODEL_H_ + +#include "ixgbe.h" +#include "ixgbe_type.h" + +struct ixgbe_mat_field { + unsigned int off; + unsigned int mask; + int (*val)(struct ixgbe_fdir_filter *input, + union ixgbe_atr_input *mask, + u32 val, u32 m); + unsigned int type; +}; + +static inline int ixgbe_mat_prgm_sip(struct ixgbe_fdir_filter *input, + union ixgbe_atr_input *mask, + u32 val, u32 m) +{ + input->filter.formatted.src_ip[0] = val; + mask->formatted.src_ip[0] = m; + return 0; +} + +static inline int ixgbe_mat_prgm_dip(struct ixgbe_fdir_filter *input, + union ixgbe_atr_input *mask, + u32 val, u32 m) +{ + input->filter.formatted.dst_ip[0] = val; + mask->formatted.dst_ip[0] = m; + return 0; +} + +static struct ixgbe_mat_field ixgbe_ipv4_fields[] = { + { .off = 12, .mask = -1, .val = ixgbe_mat_prgm_sip, + .type = IXGBE_ATR_FLOW_TYPE_IPV4}, + { .off = 16, .mask = -1, .val = ixgbe_mat_prgm_dip, + .type = IXGBE_ATR_FLOW_TYPE_IPV4}, + { .val = NULL } /* terminal node */ +}; + +static inline int ixgbe_mat_prgm_sport(struct ixgbe_fdir_filter *input, + union ixgbe_atr_input *mask, + u32 val, u32 m) +{ + input->filter.formatted.src_port = val & 0xffff; + mask->formatted.src_port = m & 0xffff; + return 0; +}; + +static inline int ixgbe_mat_prgm_dport(struct ixgbe_fdir_filter *input, + union ixgbe_atr_input *mask, + u32 val, u32 m) +{ + input->filter.formatted.dst_port = val & 0xffff; + mask->formatted.dst_port = m & 0xffff; + return 0; +}; + +static struct ixgbe_mat_field ixgbe_tcp_fields[] = { + {.off = 0, .mask = 0xffff, .val = ixgbe_mat_prgm_sport, + .type = IXGBE_ATR_FLOW_TYPE_TCPV4}, + {.off = 2, .mask = 0xffff, .val = ixgbe_mat_prgm_dport, + .type = IXGBE_ATR_FLOW_TYPE_TCPV4}, + { .val = NULL } /* terminal node */ +}; + +struct ixgbe_nexthdr { + /* offset, shift, and mask of position to next header */ + unsigned int o; + u32 s; + u32 m; + /* match criteria to make this jump*/ + unsigned int off; + u32 val; + u32 mask; + /* location of jump to make */ + struct ixgbe_mat_field *jump; +}; + +static struct ixgbe_nexthdr ixgbe_ipv4_jumps[] = { + { .o = 0, .s = 6, .m = 0xf, + .off = 8, .val = 0x600, .mask = 0xff00, .jump = ixgbe_tcp_fields}, + { .jump = NULL } /* terminal node */ +}; +#endif /* _IXGBE_MODEL_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index f191a1612589..96d95cb36c52 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -69,6 +69,15 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up) return 0; } +static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +{ + if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO) + return -EINVAL; + + return mlx4_en_setup_tc(dev, tc->tc); +} + #ifdef CONFIG_RFS_ACCEL struct mlx4_en_filter { @@ -2462,7 +2471,7 @@ static const struct net_device_ops mlx4_netdev_ops = { #endif .ndo_set_features = mlx4_en_set_features, .ndo_fix_features = mlx4_en_fix_features, - .ndo_setup_tc = mlx4_en_setup_tc, + .ndo_setup_tc = __mlx4_en_setup_tc, #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx4_en_filter_rfs, #endif @@ -2500,7 +2509,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = { #endif .ndo_set_features = mlx4_en_set_features, .ndo_fix_features = mlx4_en_fix_features, - .ndo_setup_tc = mlx4_en_setup_tc, + .ndo_setup_tc = __mlx4_en_setup_tc, #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx4_en_filter_rfs, #endif diff --git a/drivers/net/ethernet/pasemi/Kconfig b/drivers/net/ethernet/pasemi/Kconfig index db19c6f49859..7c92e8306c19 100644 --- a/drivers/net/ethernet/pasemi/Kconfig +++ b/drivers/net/ethernet/pasemi/Kconfig @@ -5,7 +5,7 @@ config NET_VENDOR_PASEMI bool "PA Semi devices" default y - depends on PPC_PASEMI && PCI && INET + depends on PPC_PASEMI && PCI ---help--- If you have a network (Ethernet) card belonging to this class, say Y. @@ -18,9 +18,8 @@ if NET_VENDOR_PASEMI config PASEMI_MAC tristate "PA Semi 1/10Gbit MAC" - depends on PPC_PASEMI && PCI && INET + depends on PPC_PASEMI && PCI select PHYLIB - select INET_LRO ---help--- This driver supports the on-chip 1/10Gbit Ethernet controller on PA Semi's PWRficient line of chips. diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c index 57a6e6cd74fc..af54df52aa6b 100644 --- a/drivers/net/ethernet/pasemi/pasemi_mac.c +++ b/drivers/net/ethernet/pasemi/pasemi_mac.c @@ -30,9 +30,7 @@ #include <linux/skbuff.h> #include <linux/ip.h> -#include <linux/tcp.h> #include <net/checksum.h> -#include <linux/inet_lro.h> #include <linux/prefetch.h> #include <asm/irq.h> @@ -52,12 +50,9 @@ * * - Multicast support * - Large MTU support - * - SW LRO * - Multiqueue RX/TX */ -#define LRO_MAX_AGGR 64 - #define PE_MIN_MTU 64 #define PE_MAX_MTU 9000 #define PE_DEF_MTU ETH_DATA_LEN @@ -257,37 +252,6 @@ static int pasemi_mac_set_mac_addr(struct net_device *dev, void *p) return 0; } -static int get_skb_hdr(struct sk_buff *skb, void **iphdr, - void **tcph, u64 *hdr_flags, void *data) -{ - u64 macrx = (u64) data; - unsigned int ip_len; - struct iphdr *iph; - - /* IPv4 header checksum failed */ - if ((macrx & XCT_MACRX_HTY_M) != XCT_MACRX_HTY_IPV4_OK) - return -1; - - /* non tcp packet */ - skb_reset_network_header(skb); - iph = ip_hdr(skb); - if (iph->protocol != IPPROTO_TCP) - return -1; - - ip_len = ip_hdrlen(skb); - skb_set_transport_header(skb, ip_len); - *tcph = tcp_hdr(skb); - - /* check if ip header and tcp header are complete */ - if (ntohs(iph->tot_len) < ip_len + tcp_hdrlen(skb)) - return -1; - - *hdr_flags = LRO_IPV4 | LRO_TCP; - *iphdr = iph; - - return 0; -} - static int pasemi_mac_unmap_tx_skb(struct pasemi_mac *mac, const int nfrags, struct sk_buff *skb, @@ -817,7 +781,7 @@ static int pasemi_mac_clean_rx(struct pasemi_mac_rxring *rx, skb_put(skb, len-4); skb->protocol = eth_type_trans(skb, mac->netdev); - lro_receive_skb(&mac->lro_mgr, skb, (void *)macrx); + napi_gro_receive(&mac->napi, skb); next: RX_DESC(rx, n) = 0; @@ -839,8 +803,6 @@ next: rx_ring(mac)->next_to_clean = n; - lro_flush_all(&mac->lro_mgr); - /* Increase is in number of 16-byte entries, and since each descriptor * with an 8BRES takes up 3x8 bytes (padded to 4x8), increase with * count*2. @@ -1754,16 +1716,6 @@ pasemi_mac_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev->features = NETIF_F_IP_CSUM | NETIF_F_LLTX | NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_GSO; - mac->lro_mgr.max_aggr = LRO_MAX_AGGR; - mac->lro_mgr.max_desc = MAX_LRO_DESCRIPTORS; - mac->lro_mgr.lro_arr = mac->lro_desc; - mac->lro_mgr.get_skb_header = get_skb_hdr; - mac->lro_mgr.features = LRO_F_NAPI | LRO_F_EXTRACT_VLAN_ID; - mac->lro_mgr.dev = mac->netdev; - mac->lro_mgr.ip_summed = CHECKSUM_UNNECESSARY; - mac->lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY; - - mac->dma_pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa007, NULL); if (!mac->dma_pdev) { dev_err(&mac->pdev->dev, "Can't find DMA Controller\n"); diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.h b/drivers/net/ethernet/pasemi/pasemi_mac.h index a5807703ab96..161c99a98403 100644 --- a/drivers/net/ethernet/pasemi/pasemi_mac.h +++ b/drivers/net/ethernet/pasemi/pasemi_mac.h @@ -31,7 +31,6 @@ #define CS_RING_SIZE (TX_RING_SIZE*2) -#define MAX_LRO_DESCRIPTORS 8 #define MAX_CS 2 struct pasemi_mac_txring { @@ -84,10 +83,7 @@ struct pasemi_mac { u8 mac_addr[ETH_ALEN]; - struct net_lro_mgr lro_mgr; - struct net_lro_desc lro_desc[MAX_LRO_DESCRIPTORS]; struct timer_list rxtimer; - unsigned int lro_max_aggr; struct pasemi_mac_txring *tx; struct pasemi_mac_rxring *rx; diff --git a/drivers/net/ethernet/pasemi/pasemi_mac_ethtool.c b/drivers/net/ethernet/pasemi/pasemi_mac_ethtool.c index 25fae568261f..f046bfc18e7d 100644 --- a/drivers/net/ethernet/pasemi/pasemi_mac_ethtool.c +++ b/drivers/net/ethernet/pasemi/pasemi_mac_ethtool.c @@ -20,7 +20,6 @@ #include <linux/netdevice.h> #include <linux/ethtool.h> #include <linux/pci.h> -#include <linux/inet_lro.h> #include <asm/pasemi_dma.h> #include "pasemi_mac.h" diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 1292c360390c..66b021e3c1be 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -26,7 +26,7 @@ #include "qed_hsi.h" extern const struct qed_common_ops qed_common_ops_pass; -#define DRV_MODULE_VERSION "8.4.0.0" +#define DRV_MODULE_VERSION "8.7.0.0" #define MAX_HWFNS_PER_DEVICE (4) #define NAME_SIZE 16 @@ -70,8 +70,8 @@ struct qed_sb_sp_info; struct qed_mcp_info; struct qed_rt_data { - u32 init_val; - bool b_valid; + u32 *init_val; + bool *b_valid; }; /* The PCI personality is not quite synonymous to protocol ID: @@ -120,6 +120,10 @@ enum QED_PORT_MODE { QED_PORT_MODE_DE_1X25G }; +enum qed_dev_cap { + QED_DEV_CAP_ETH, +}; + struct qed_hw_info { /* PCI personality */ enum qed_pci_personality personality; @@ -151,6 +155,7 @@ struct qed_hw_info { u32 port_mode; u32 hw_mode; + unsigned long device_capabilities; }; struct qed_hw_cid_data { @@ -267,7 +272,7 @@ struct qed_hwfn { struct qed_hw_info hw_info; /* rt_array (for init-tool) */ - struct qed_rt_data *rt_data; + struct qed_rt_data rt_data; /* SPQ */ struct qed_spq *p_spq; @@ -350,9 +355,20 @@ struct qed_dev { char name[NAME_SIZE]; u8 type; -#define QED_DEV_TYPE_BB_A0 (0 << 0) -#define QED_DEV_TYPE_MASK (0x3) -#define QED_DEV_TYPE_SHIFT (0) +#define QED_DEV_TYPE_BB (0 << 0) +#define QED_DEV_TYPE_AH BIT(0) +/* Translate type/revision combo into the proper conditions */ +#define QED_IS_BB(dev) ((dev)->type == QED_DEV_TYPE_BB) +#define QED_IS_BB_A0(dev) (QED_IS_BB(dev) && \ + CHIP_REV_IS_A0(dev)) +#define QED_IS_BB_B0(dev) (QED_IS_BB(dev) && \ + CHIP_REV_IS_B0(dev)) + +#define QED_GET_TYPE(dev) (QED_IS_BB_A0(dev) ? CHIP_BB_A0 : \ + QED_IS_BB_B0(dev) ? CHIP_BB_B0 : CHIP_K2) + + u16 vendor_id; + u16 device_id; u16 chip_num; #define CHIP_NUM_MASK 0xffff @@ -361,6 +377,8 @@ struct qed_dev { u16 chip_rev; #define CHIP_REV_MASK 0xf #define CHIP_REV_SHIFT 12 +#define CHIP_REV_IS_A0(_cdev) (!(_cdev)->chip_rev) +#define CHIP_REV_IS_B0(_cdev) ((_cdev)->chip_rev == 1) u16 chip_metal; #define CHIP_METAL_MASK 0xff @@ -375,10 +393,10 @@ struct qed_dev { u8 num_funcs_in_port; u8 path_id; - enum mf_mode mf_mode; -#define IS_MF(_p_hwfn) (((_p_hwfn)->cdev)->mf_mode != SF) -#define IS_MF_SI(_p_hwfn) (((_p_hwfn)->cdev)->mf_mode == MF_NPAR) -#define IS_MF_SD(_p_hwfn) (((_p_hwfn)->cdev)->mf_mode == MF_OVLAN) + enum qed_mf_mode mf_mode; +#define IS_MF_DEFAULT(_p_hwfn) (((_p_hwfn)->cdev)->mf_mode == QED_MF_DEFAULT) +#define IS_MF_SI(_p_hwfn) (((_p_hwfn)->cdev)->mf_mode == QED_MF_NPAR) +#define IS_MF_SD(_p_hwfn) (((_p_hwfn)->cdev)->mf_mode == QED_MF_OVLAN) int pcie_width; int pcie_speed; @@ -441,11 +459,6 @@ struct qed_dev { const struct firmware *firmware; }; -#define QED_GET_TYPE(dev) (((dev)->type & QED_DEV_TYPE_MASK) >> \ - QED_DEV_TYPE_SHIFT) -#define QED_IS_BB_A0(dev) (QED_GET_TYPE(dev) == QED_DEV_TYPE_BB_A0) -#define QED_IS_BB(dev) (QED_IS_BB_A0(dev)) - #define NUM_OF_SBS(dev) MAX_SB_PER_PATH_BB #define NUM_OF_ENG_PFS(dev) MAX_NUM_PFS_BB diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index 7ccdb46c6764..fc767c07a264 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -448,7 +448,7 @@ int qed_cxt_mngr_alloc(struct qed_hwfn *p_hwfn) struct qed_cxt_mngr *p_mngr; u32 i; - p_mngr = kzalloc(sizeof(*p_mngr), GFP_ATOMIC); + p_mngr = kzalloc(sizeof(*p_mngr), GFP_KERNEL); if (!p_mngr) { DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_cxt_mngr'\n"); return -ENOMEM; @@ -581,7 +581,8 @@ void qed_qm_init_pf(struct qed_hwfn *p_hwfn) params.num_pf_cids = iids.cids; params.start_pq = qm_info->start_pq; params.num_pf_pqs = qm_info->num_pqs; - params.start_vport = qm_info->num_vports; + params.start_vport = qm_info->start_vport; + params.num_vports = qm_info->num_vports; params.pf_wfq = qm_info->pf_wfq; params.pf_rl = qm_info->pf_rl; params.pq_params = qm_info->qm_pq_params; diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 817bbd5476ff..acfe7be49a58 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -134,17 +134,17 @@ static int qed_init_qm_info(struct qed_hwfn *p_hwfn) /* PQs will be arranged as follows: First per-TC PQ then pure-LB quete. */ qm_info->qm_pq_params = kzalloc(sizeof(*qm_info->qm_pq_params) * - num_pqs, GFP_ATOMIC); + num_pqs, GFP_KERNEL); if (!qm_info->qm_pq_params) goto alloc_err; qm_info->qm_vport_params = kzalloc(sizeof(*qm_info->qm_vport_params) * - num_vports, GFP_ATOMIC); + num_vports, GFP_KERNEL); if (!qm_info->qm_vport_params) goto alloc_err; qm_info->qm_port_params = kzalloc(sizeof(*qm_info->qm_port_params) * - MAX_NUM_PORTS, GFP_ATOMIC); + MAX_NUM_PORTS, GFP_KERNEL); if (!qm_info->qm_port_params) goto alloc_err; @@ -341,11 +341,6 @@ void qed_resc_setup(struct qed_dev *cdev) } } -#define FINAL_CLEANUP_CMD_OFFSET (0) -#define FINAL_CLEANUP_CMD (0x1) -#define FINAL_CLEANUP_VALID_OFFSET (6) -#define FINAL_CLEANUP_VFPF_ID_SHIFT (7) -#define FINAL_CLEANUP_COMP (0x2) #define FINAL_CLEANUP_POLL_CNT (100) #define FINAL_CLEANUP_POLL_TIME (10) int qed_final_cleanup(struct qed_hwfn *p_hwfn, @@ -355,12 +350,14 @@ int qed_final_cleanup(struct qed_hwfn *p_hwfn, u32 command = 0, addr, count = FINAL_CLEANUP_POLL_CNT; int rc = -EBUSY; - addr = GTT_BAR0_MAP_REG_USDM_RAM + USTORM_FLR_FINAL_ACK_OFFSET; + addr = GTT_BAR0_MAP_REG_USDM_RAM + + USTORM_FLR_FINAL_ACK_OFFSET(p_hwfn->rel_pf_id); - command |= FINAL_CLEANUP_CMD << FINAL_CLEANUP_CMD_OFFSET; - command |= 1 << FINAL_CLEANUP_VALID_OFFSET; - command |= id << FINAL_CLEANUP_VFPF_ID_SHIFT; - command |= FINAL_CLEANUP_COMP << SDM_OP_GEN_COMP_TYPE_SHIFT; + command |= X_FINAL_CLEANUP_AGG_INT << + SDM_AGG_INT_COMP_PARAMS_AGG_INT_INDEX_SHIFT; + command |= 1 << SDM_AGG_INT_COMP_PARAMS_AGG_VECTOR_ENABLE_SHIFT; + command |= id << SDM_AGG_INT_COMP_PARAMS_AGG_VECTOR_BIT_SHIFT; + command |= SDM_COMP_TYPE_AGG_INT << SDM_OP_GEN_COMP_TYPE_SHIFT; /* Make sure notification is not set before initiating final cleanup */ if (REG_RD(p_hwfn, addr)) { @@ -415,18 +412,16 @@ static void qed_calc_hw_mode(struct qed_hwfn *p_hwfn) } switch (p_hwfn->cdev->mf_mode) { - case SF: - hw_mode |= 1 << MODE_SF; + case QED_MF_DEFAULT: + case QED_MF_NPAR: + hw_mode |= 1 << MODE_MF_SI; break; - case MF_OVLAN: + case QED_MF_OVLAN: hw_mode |= 1 << MODE_MF_SD; break; - case MF_NPAR: - hw_mode |= 1 << MODE_MF_SI; - break; default: - DP_NOTICE(p_hwfn, "Unsupported MF mode, init as SF\n"); - hw_mode |= 1 << MODE_SF; + DP_NOTICE(p_hwfn, "Unsupported MF mode, init as DEFAULT\n"); + hw_mode |= 1 << MODE_MF_SI; } hw_mode |= 1 << MODE_ASIC; @@ -1018,8 +1013,7 @@ static void qed_hw_get_resc(struct qed_hwfn *p_hwfn) u32 *resc_num = p_hwfn->hw_info.resc_num; int num_funcs, i; - num_funcs = IS_MF(p_hwfn) ? MAX_NUM_PFS_BB - : p_hwfn->cdev->num_ports_in_engines; + num_funcs = MAX_NUM_PFS_BB; resc_num[QED_SB] = min_t(u32, (MAX_SB_PER_PATH_BB / num_funcs), @@ -1071,7 +1065,7 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { u32 nvm_cfg1_offset, mf_mode, addr, generic_cont0, core_cfg; - u32 port_cfg_addr, link_temp, val, nvm_cfg_addr; + u32 port_cfg_addr, link_temp, nvm_cfg_addr, device_capabilities; struct qed_mcp_link_params *link; /* Read global nvm_cfg address */ @@ -1134,21 +1128,6 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, break; } - addr = MCP_REG_SCRATCH + nvm_cfg1_offset + - offsetof(struct nvm_cfg1, func[MCP_PF_ID(p_hwfn)]) + - offsetof(struct nvm_cfg1_func, device_id); - val = qed_rd(p_hwfn, p_ptt, addr); - - if (IS_MF(p_hwfn)) { - p_hwfn->hw_info.device_id = - (val & NVM_CFG1_FUNC_MF_VENDOR_DEVICE_ID_MASK) >> - NVM_CFG1_FUNC_MF_VENDOR_DEVICE_ID_OFFSET; - } else { - p_hwfn->hw_info.device_id = - (val & NVM_CFG1_FUNC_VENDOR_DEVICE_ID_MASK) >> - NVM_CFG1_FUNC_VENDOR_DEVICE_ID_OFFSET; - } - /* Read default link configuration */ link = &p_hwfn->mcp_info->link_input; port_cfg_addr = MCP_REG_SCRATCH + nvm_cfg1_offset + @@ -1220,18 +1199,28 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, switch (mf_mode) { case NVM_CFG1_GLOB_MF_MODE_MF_ALLOWED: - p_hwfn->cdev->mf_mode = MF_OVLAN; + p_hwfn->cdev->mf_mode = QED_MF_OVLAN; break; case NVM_CFG1_GLOB_MF_MODE_NPAR1_0: - p_hwfn->cdev->mf_mode = MF_NPAR; + p_hwfn->cdev->mf_mode = QED_MF_NPAR; break; - case NVM_CFG1_GLOB_MF_MODE_FORCED_SF: - p_hwfn->cdev->mf_mode = SF; + case NVM_CFG1_GLOB_MF_MODE_DEFAULT: + p_hwfn->cdev->mf_mode = QED_MF_DEFAULT; break; } DP_INFO(p_hwfn, "Multi function mode is %08x\n", p_hwfn->cdev->mf_mode); + /* Read Multi-function information from shmem */ + addr = MCP_REG_SCRATCH + nvm_cfg1_offset + + offsetof(struct nvm_cfg1, glob) + + offsetof(struct nvm_cfg1_glob, device_capabilities); + + device_capabilities = qed_rd(p_hwfn, p_ptt, addr); + if (device_capabilities & NVM_CFG1_GLOB_DEVICE_CAPABILITIES_ETHERNET) + __set_bit(QED_DEV_CAP_ETH, + &p_hwfn->hw_info.device_capabilities); + return qed_mcp_fill_shmem_func_info(p_hwfn, p_ptt); } @@ -1293,29 +1282,36 @@ qed_get_hw_info(struct qed_hwfn *p_hwfn, static void qed_get_dev_info(struct qed_dev *cdev) { + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); u32 tmp; - cdev->chip_num = (u16)qed_rd(cdev->hwfns, cdev->hwfns[0].p_main_ptt, + /* Read Vendor Id / Device Id */ + pci_read_config_word(cdev->pdev, PCI_VENDOR_ID, + &cdev->vendor_id); + pci_read_config_word(cdev->pdev, PCI_DEVICE_ID, + &cdev->device_id); + cdev->chip_num = (u16)qed_rd(p_hwfn, p_hwfn->p_main_ptt, MISCS_REG_CHIP_NUM); - cdev->chip_rev = (u16)qed_rd(cdev->hwfns, cdev->hwfns[0].p_main_ptt, + cdev->chip_rev = (u16)qed_rd(p_hwfn, p_hwfn->p_main_ptt, MISCS_REG_CHIP_REV); MASK_FIELD(CHIP_REV, cdev->chip_rev); + cdev->type = QED_DEV_TYPE_BB; /* Learn number of HW-functions */ - tmp = qed_rd(cdev->hwfns, cdev->hwfns[0].p_main_ptt, + tmp = qed_rd(p_hwfn, p_hwfn->p_main_ptt, MISCS_REG_CMT_ENABLED_FOR_PAIR); - if (tmp & (1 << cdev->hwfns[0].rel_pf_id)) { + if (tmp & (1 << p_hwfn->rel_pf_id)) { DP_NOTICE(cdev->hwfns, "device in CMT mode\n"); cdev->num_hwfns = 2; } else { cdev->num_hwfns = 1; } - cdev->chip_bond_id = qed_rd(cdev->hwfns, cdev->hwfns[0].p_main_ptt, + cdev->chip_bond_id = qed_rd(p_hwfn, p_hwfn->p_main_ptt, MISCS_REG_CHIP_TEST_REG) >> 4; MASK_FIELD(CHIP_BOND_ID, cdev->chip_bond_id); - cdev->chip_metal = (u16)qed_rd(cdev->hwfns, cdev->hwfns[0].p_main_ptt, + cdev->chip_metal = (u16)qed_rd(p_hwfn, p_hwfn->p_main_ptt, MISCS_REG_CHIP_METAL); MASK_FIELD(CHIP_METAL, cdev->chip_metal); diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 264e954675d1..49bbf696a16d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -34,6 +34,8 @@ enum common_event_opcode { COMMON_EVENT_RESERVED3, COMMON_EVENT_RESERVED4, COMMON_EVENT_RESERVED5, + COMMON_EVENT_RESERVED6, + COMMON_EVENT_EMPTY, MAX_COMMON_EVENT_OPCODE }; @@ -45,6 +47,7 @@ enum common_ramrod_cmd_id { COMMON_RAMROD_RESERVED, COMMON_RAMROD_RESERVED2, COMMON_RAMROD_RESERVED3, + COMMON_RAMROD_EMPTY, MAX_COMMON_RAMROD_CMD_ID }; @@ -331,6 +334,179 @@ struct xstorm_core_conn_ag_ctx { __le16 word15 /* word15 */; }; +struct tstorm_core_conn_ag_ctx { + u8 byte0 /* cdu_validation */; + u8 byte1 /* state */; + u8 flags0; +#define TSTORM_CORE_CONN_AG_CTX_BIT0_MASK 0x1 /* exist_in_qm0 */ +#define TSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT 0 +#define TSTORM_CORE_CONN_AG_CTX_BIT1_MASK 0x1 /* exist_in_qm1 */ +#define TSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT 1 +#define TSTORM_CORE_CONN_AG_CTX_BIT2_MASK 0x1 /* bit2 */ +#define TSTORM_CORE_CONN_AG_CTX_BIT2_SHIFT 2 +#define TSTORM_CORE_CONN_AG_CTX_BIT3_MASK 0x1 /* bit3 */ +#define TSTORM_CORE_CONN_AG_CTX_BIT3_SHIFT 3 +#define TSTORM_CORE_CONN_AG_CTX_BIT4_MASK 0x1 /* bit4 */ +#define TSTORM_CORE_CONN_AG_CTX_BIT4_SHIFT 4 +#define TSTORM_CORE_CONN_AG_CTX_BIT5_MASK 0x1 /* bit5 */ +#define TSTORM_CORE_CONN_AG_CTX_BIT5_SHIFT 5 +#define TSTORM_CORE_CONN_AG_CTX_CF0_MASK 0x3 /* timer0cf */ +#define TSTORM_CORE_CONN_AG_CTX_CF0_SHIFT 6 + u8 flags1; +#define TSTORM_CORE_CONN_AG_CTX_CF1_MASK 0x3 /* timer1cf */ +#define TSTORM_CORE_CONN_AG_CTX_CF1_SHIFT 0 +#define TSTORM_CORE_CONN_AG_CTX_CF2_MASK 0x3 /* timer2cf */ +#define TSTORM_CORE_CONN_AG_CTX_CF2_SHIFT 2 +#define TSTORM_CORE_CONN_AG_CTX_CF3_MASK 0x3 /* timer_stop_all */ +#define TSTORM_CORE_CONN_AG_CTX_CF3_SHIFT 4 +#define TSTORM_CORE_CONN_AG_CTX_CF4_MASK 0x3 /* cf4 */ +#define TSTORM_CORE_CONN_AG_CTX_CF4_SHIFT 6 + u8 flags2; +#define TSTORM_CORE_CONN_AG_CTX_CF5_MASK 0x3 /* cf5 */ +#define TSTORM_CORE_CONN_AG_CTX_CF5_SHIFT 0 +#define TSTORM_CORE_CONN_AG_CTX_CF6_MASK 0x3 /* cf6 */ +#define TSTORM_CORE_CONN_AG_CTX_CF6_SHIFT 2 +#define TSTORM_CORE_CONN_AG_CTX_CF7_MASK 0x3 /* cf7 */ +#define TSTORM_CORE_CONN_AG_CTX_CF7_SHIFT 4 +#define TSTORM_CORE_CONN_AG_CTX_CF8_MASK 0x3 /* cf8 */ +#define TSTORM_CORE_CONN_AG_CTX_CF8_SHIFT 6 + u8 flags3; +#define TSTORM_CORE_CONN_AG_CTX_CF9_MASK 0x3 /* cf9 */ +#define TSTORM_CORE_CONN_AG_CTX_CF9_SHIFT 0 +#define TSTORM_CORE_CONN_AG_CTX_CF10_MASK 0x3 /* cf10 */ +#define TSTORM_CORE_CONN_AG_CTX_CF10_SHIFT 2 +#define TSTORM_CORE_CONN_AG_CTX_CF0EN_MASK 0x1 /* cf0en */ +#define TSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT 4 +#define TSTORM_CORE_CONN_AG_CTX_CF1EN_MASK 0x1 /* cf1en */ +#define TSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT 5 +#define TSTORM_CORE_CONN_AG_CTX_CF2EN_MASK 0x1 /* cf2en */ +#define TSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT 6 +#define TSTORM_CORE_CONN_AG_CTX_CF3EN_MASK 0x1 /* cf3en */ +#define TSTORM_CORE_CONN_AG_CTX_CF3EN_SHIFT 7 + u8 flags4; +#define TSTORM_CORE_CONN_AG_CTX_CF4EN_MASK 0x1 /* cf4en */ +#define TSTORM_CORE_CONN_AG_CTX_CF4EN_SHIFT 0 +#define TSTORM_CORE_CONN_AG_CTX_CF5EN_MASK 0x1 /* cf5en */ +#define TSTORM_CORE_CONN_AG_CTX_CF5EN_SHIFT 1 +#define TSTORM_CORE_CONN_AG_CTX_CF6EN_MASK 0x1 /* cf6en */ +#define TSTORM_CORE_CONN_AG_CTX_CF6EN_SHIFT 2 +#define TSTORM_CORE_CONN_AG_CTX_CF7EN_MASK 0x1 /* cf7en */ +#define TSTORM_CORE_CONN_AG_CTX_CF7EN_SHIFT 3 +#define TSTORM_CORE_CONN_AG_CTX_CF8EN_MASK 0x1 /* cf8en */ +#define TSTORM_CORE_CONN_AG_CTX_CF8EN_SHIFT 4 +#define TSTORM_CORE_CONN_AG_CTX_CF9EN_MASK 0x1 /* cf9en */ +#define TSTORM_CORE_CONN_AG_CTX_CF9EN_SHIFT 5 +#define TSTORM_CORE_CONN_AG_CTX_CF10EN_MASK 0x1 /* cf10en */ +#define TSTORM_CORE_CONN_AG_CTX_CF10EN_SHIFT 6 +#define TSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK 0x1 /* rule0en */ +#define TSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT 7 + u8 flags5; +#define TSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK 0x1 /* rule1en */ +#define TSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT 0 +#define TSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK 0x1 /* rule2en */ +#define TSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT 1 +#define TSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK 0x1 /* rule3en */ +#define TSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT 2 +#define TSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK 0x1 /* rule4en */ +#define TSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT 3 +#define TSTORM_CORE_CONN_AG_CTX_RULE5EN_MASK 0x1 /* rule5en */ +#define TSTORM_CORE_CONN_AG_CTX_RULE5EN_SHIFT 4 +#define TSTORM_CORE_CONN_AG_CTX_RULE6EN_MASK 0x1 /* rule6en */ +#define TSTORM_CORE_CONN_AG_CTX_RULE6EN_SHIFT 5 +#define TSTORM_CORE_CONN_AG_CTX_RULE7EN_MASK 0x1 /* rule7en */ +#define TSTORM_CORE_CONN_AG_CTX_RULE7EN_SHIFT 6 +#define TSTORM_CORE_CONN_AG_CTX_RULE8EN_MASK 0x1 /* rule8en */ +#define TSTORM_CORE_CONN_AG_CTX_RULE8EN_SHIFT 7 + __le32 reg0 /* reg0 */; + __le32 reg1 /* reg1 */; + __le32 reg2 /* reg2 */; + __le32 reg3 /* reg3 */; + __le32 reg4 /* reg4 */; + __le32 reg5 /* reg5 */; + __le32 reg6 /* reg6 */; + __le32 reg7 /* reg7 */; + __le32 reg8 /* reg8 */; + u8 byte2 /* byte2 */; + u8 byte3 /* byte3 */; + __le16 word0 /* word0 */; + u8 byte4 /* byte4 */; + u8 byte5 /* byte5 */; + __le16 word1 /* word1 */; + __le16 word2 /* conn_dpi */; + __le16 word3 /* word3 */; + __le32 reg9 /* reg9 */; + __le32 reg10 /* reg10 */; +}; + +struct ustorm_core_conn_ag_ctx { + u8 reserved /* cdu_validation */; + u8 byte1 /* state */; + u8 flags0; +#define USTORM_CORE_CONN_AG_CTX_BIT0_MASK 0x1 /* exist_in_qm0 */ +#define USTORM_CORE_CONN_AG_CTX_BIT0_SHIFT 0 +#define USTORM_CORE_CONN_AG_CTX_BIT1_MASK 0x1 /* exist_in_qm1 */ +#define USTORM_CORE_CONN_AG_CTX_BIT1_SHIFT 1 +#define USTORM_CORE_CONN_AG_CTX_CF0_MASK 0x3 /* timer0cf */ +#define USTORM_CORE_CONN_AG_CTX_CF0_SHIFT 2 +#define USTORM_CORE_CONN_AG_CTX_CF1_MASK 0x3 /* timer1cf */ +#define USTORM_CORE_CONN_AG_CTX_CF1_SHIFT 4 +#define USTORM_CORE_CONN_AG_CTX_CF2_MASK 0x3 /* timer2cf */ +#define USTORM_CORE_CONN_AG_CTX_CF2_SHIFT 6 + u8 flags1; +#define USTORM_CORE_CONN_AG_CTX_CF3_MASK 0x3 /* timer_stop_all */ +#define USTORM_CORE_CONN_AG_CTX_CF3_SHIFT 0 +#define USTORM_CORE_CONN_AG_CTX_CF4_MASK 0x3 /* cf4 */ +#define USTORM_CORE_CONN_AG_CTX_CF4_SHIFT 2 +#define USTORM_CORE_CONN_AG_CTX_CF5_MASK 0x3 /* cf5 */ +#define USTORM_CORE_CONN_AG_CTX_CF5_SHIFT 4 +#define USTORM_CORE_CONN_AG_CTX_CF6_MASK 0x3 /* cf6 */ +#define USTORM_CORE_CONN_AG_CTX_CF6_SHIFT 6 + u8 flags2; +#define USTORM_CORE_CONN_AG_CTX_CF0EN_MASK 0x1 /* cf0en */ +#define USTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT 0 +#define USTORM_CORE_CONN_AG_CTX_CF1EN_MASK 0x1 /* cf1en */ +#define USTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT 1 +#define USTORM_CORE_CONN_AG_CTX_CF2EN_MASK 0x1 /* cf2en */ +#define USTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT 2 +#define USTORM_CORE_CONN_AG_CTX_CF3EN_MASK 0x1 /* cf3en */ +#define USTORM_CORE_CONN_AG_CTX_CF3EN_SHIFT 3 +#define USTORM_CORE_CONN_AG_CTX_CF4EN_MASK 0x1 /* cf4en */ +#define USTORM_CORE_CONN_AG_CTX_CF4EN_SHIFT 4 +#define USTORM_CORE_CONN_AG_CTX_CF5EN_MASK 0x1 /* cf5en */ +#define USTORM_CORE_CONN_AG_CTX_CF5EN_SHIFT 5 +#define USTORM_CORE_CONN_AG_CTX_CF6EN_MASK 0x1 /* cf6en */ +#define USTORM_CORE_CONN_AG_CTX_CF6EN_SHIFT 6 +#define USTORM_CORE_CONN_AG_CTX_RULE0EN_MASK 0x1 /* rule0en */ +#define USTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT 7 + u8 flags3; +#define USTORM_CORE_CONN_AG_CTX_RULE1EN_MASK 0x1 /* rule1en */ +#define USTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT 0 +#define USTORM_CORE_CONN_AG_CTX_RULE2EN_MASK 0x1 /* rule2en */ +#define USTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT 1 +#define USTORM_CORE_CONN_AG_CTX_RULE3EN_MASK 0x1 /* rule3en */ +#define USTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT 2 +#define USTORM_CORE_CONN_AG_CTX_RULE4EN_MASK 0x1 /* rule4en */ +#define USTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT 3 +#define USTORM_CORE_CONN_AG_CTX_RULE5EN_MASK 0x1 /* rule5en */ +#define USTORM_CORE_CONN_AG_CTX_RULE5EN_SHIFT 4 +#define USTORM_CORE_CONN_AG_CTX_RULE6EN_MASK 0x1 /* rule6en */ +#define USTORM_CORE_CONN_AG_CTX_RULE6EN_SHIFT 5 +#define USTORM_CORE_CONN_AG_CTX_RULE7EN_MASK 0x1 /* rule7en */ +#define USTORM_CORE_CONN_AG_CTX_RULE7EN_SHIFT 6 +#define USTORM_CORE_CONN_AG_CTX_RULE8EN_MASK 0x1 /* rule8en */ +#define USTORM_CORE_CONN_AG_CTX_RULE8EN_SHIFT 7 + u8 byte2 /* byte2 */; + u8 byte3 /* byte3 */; + __le16 word0 /* conn_dpi */; + __le16 word1 /* word1 */; + __le32 rx_producers /* reg0 */; + __le32 reg1 /* reg1 */; + __le32 reg2 /* reg2 */; + __le32 reg3 /* reg3 */; + __le16 word2 /* word2 */; + __le16 word3 /* word3 */; +}; + /* The core storm context for the Mstorm */ struct mstorm_core_conn_st_ctx { __le32 reserved[24]; @@ -349,8 +525,9 @@ struct core_conn_context { struct regpair pstorm_st_padding[2]; struct xstorm_core_conn_st_ctx xstorm_st_context; struct xstorm_core_conn_ag_ctx xstorm_ag_context; + struct tstorm_core_conn_ag_ctx tstorm_ag_context; + struct ustorm_core_conn_ag_ctx ustorm_ag_context; struct mstorm_core_conn_st_ctx mstorm_st_context; - struct regpair mstorm_st_padding[2]; struct ustorm_core_conn_st_ctx ustorm_st_context; struct regpair ustorm_st_padding[2] /* padding */; }; @@ -397,10 +574,12 @@ union event_ring_element { }; enum personality_type { + BAD_PERSONALITY_TYP, PERSONALITY_RESERVED, PERSONALITY_RESERVED2, PERSONALITY_RDMA_AND_ETH /* Roce or Iwarp */, PERSONALITY_RESERVED3, + PERSONALITY_CORE, PERSONALITY_ETH /* Ethernet */, PERSONALITY_RESERVED4, MAX_PERSONALITY_TYPE @@ -570,7 +749,7 @@ enum block_addr { GRCBASE_NWM = 0x800000, GRCBASE_NWS = 0x700000, GRCBASE_MS = 0x6a0000, - GRCBASE_PHY_PCIE = 0x618000, + GRCBASE_PHY_PCIE = 0x620000, GRCBASE_MISC_AEU = 0x8000, GRCBASE_BAR0_MAP = 0x1c00000, MAX_BLOCK_ADDR @@ -795,13 +974,13 @@ enum init_modes { MODE_RESERVED3, MODE_RESERVED4, MODE_RESERVED5, + MODE_RESERVED6, MODE_SF, MODE_MF_SD, MODE_MF_SI, MODE_PORTS_PER_ENG_1, MODE_PORTS_PER_ENG_2, MODE_PORTS_PER_ENG_4, - MODE_40G, MODE_100G, MODE_EAGLE_ENG1_WORKAROUND, MAX_INIT_MODES @@ -816,43 +995,6 @@ enum init_phases { MAX_INIT_PHASES }; -struct mstorm_core_conn_ag_ctx { - u8 byte0 /* cdu_validation */; - u8 byte1 /* state */; - u8 flags0; -#define MSTORM_CORE_CONN_AG_CTX_BIT0_MASK 0x1 /* exist_in_qm0 */ -#define MSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT 0 -#define MSTORM_CORE_CONN_AG_CTX_BIT1_MASK 0x1 /* exist_in_qm1 */ -#define MSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT 1 -#define MSTORM_CORE_CONN_AG_CTX_CF0_MASK 0x3 /* cf0 */ -#define MSTORM_CORE_CONN_AG_CTX_CF0_SHIFT 2 -#define MSTORM_CORE_CONN_AG_CTX_CF1_MASK 0x3 /* cf1 */ -#define MSTORM_CORE_CONN_AG_CTX_CF1_SHIFT 4 -#define MSTORM_CORE_CONN_AG_CTX_CF2_MASK 0x3 /* cf2 */ -#define MSTORM_CORE_CONN_AG_CTX_CF2_SHIFT 6 - u8 flags1; -#define MSTORM_CORE_CONN_AG_CTX_CF0EN_MASK 0x1 /* cf0en */ -#define MSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT 0 -#define MSTORM_CORE_CONN_AG_CTX_CF1EN_MASK 0x1 /* cf1en */ -#define MSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT 1 -#define MSTORM_CORE_CONN_AG_CTX_CF2EN_MASK 0x1 /* cf2en */ -#define MSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT 2 -#define MSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK 0x1 /* rule0en */ -#define MSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT 3 -#define MSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK 0x1 /* rule1en */ -#define MSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT 4 -#define MSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK 0x1 /* rule2en */ -#define MSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT 5 -#define MSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK 0x1 /* rule3en */ -#define MSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT 6 -#define MSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK 0x1 /* rule4en */ -#define MSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT 7 - __le16 word0 /* word0 */; - __le16 word1 /* word1 */; - __le32 reg0 /* reg0 */; - __le32 reg1 /* reg1 */; -}; - /* per encapsulation type enabling flags */ struct prs_reg_encapsulation_type_en { u8 flags; @@ -945,6 +1087,17 @@ struct qm_rf_pq_map { #define QM_RF_PQ_MAP_RESERVED_SHIFT 26 }; +/* Completion params for aggregated interrupt completion */ +struct sdm_agg_int_comp_params { + __le16 params; +#define SDM_AGG_INT_COMP_PARAMS_AGG_INT_INDEX_MASK 0x3F +#define SDM_AGG_INT_COMP_PARAMS_AGG_INT_INDEX_SHIFT 0 +#define SDM_AGG_INT_COMP_PARAMS_AGG_VECTOR_ENABLE_MASK 0x1 +#define SDM_AGG_INT_COMP_PARAMS_AGG_VECTOR_ENABLE_SHIFT 6 +#define SDM_AGG_INT_COMP_PARAMS_AGG_VECTOR_BIT_MASK 0x1FF +#define SDM_AGG_INT_COMP_PARAMS_AGG_VECTOR_BIT_SHIFT 7 +}; + /* SDM operation gen command (generate aggregative interrupt) */ struct sdm_op_gen { __le32 command; @@ -956,223 +1109,6 @@ struct sdm_op_gen { #define SDM_OP_GEN_RESERVED_SHIFT 20 }; -struct tstorm_core_conn_ag_ctx { - u8 byte0 /* cdu_validation */; - u8 byte1 /* state */; - u8 flags0; -#define TSTORM_CORE_CONN_AG_CTX_BIT0_MASK 0x1 /* exist_in_qm0 */ -#define TSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT 0 -#define TSTORM_CORE_CONN_AG_CTX_BIT1_MASK 0x1 /* exist_in_qm1 */ -#define TSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT 1 -#define TSTORM_CORE_CONN_AG_CTX_BIT2_MASK 0x1 /* bit2 */ -#define TSTORM_CORE_CONN_AG_CTX_BIT2_SHIFT 2 -#define TSTORM_CORE_CONN_AG_CTX_BIT3_MASK 0x1 /* bit3 */ -#define TSTORM_CORE_CONN_AG_CTX_BIT3_SHIFT 3 -#define TSTORM_CORE_CONN_AG_CTX_BIT4_MASK 0x1 /* bit4 */ -#define TSTORM_CORE_CONN_AG_CTX_BIT4_SHIFT 4 -#define TSTORM_CORE_CONN_AG_CTX_BIT5_MASK 0x1 /* bit5 */ -#define TSTORM_CORE_CONN_AG_CTX_BIT5_SHIFT 5 -#define TSTORM_CORE_CONN_AG_CTX_CF0_MASK 0x3 /* timer0cf */ -#define TSTORM_CORE_CONN_AG_CTX_CF0_SHIFT 6 - u8 flags1; -#define TSTORM_CORE_CONN_AG_CTX_CF1_MASK 0x3 /* timer1cf */ -#define TSTORM_CORE_CONN_AG_CTX_CF1_SHIFT 0 -#define TSTORM_CORE_CONN_AG_CTX_CF2_MASK 0x3 /* timer2cf */ -#define TSTORM_CORE_CONN_AG_CTX_CF2_SHIFT 2 -#define TSTORM_CORE_CONN_AG_CTX_CF3_MASK 0x3 /* timer_stop_all */ -#define TSTORM_CORE_CONN_AG_CTX_CF3_SHIFT 4 -#define TSTORM_CORE_CONN_AG_CTX_CF4_MASK 0x3 /* cf4 */ -#define TSTORM_CORE_CONN_AG_CTX_CF4_SHIFT 6 - u8 flags2; -#define TSTORM_CORE_CONN_AG_CTX_CF5_MASK 0x3 /* cf5 */ -#define TSTORM_CORE_CONN_AG_CTX_CF5_SHIFT 0 -#define TSTORM_CORE_CONN_AG_CTX_CF6_MASK 0x3 /* cf6 */ -#define TSTORM_CORE_CONN_AG_CTX_CF6_SHIFT 2 -#define TSTORM_CORE_CONN_AG_CTX_CF7_MASK 0x3 /* cf7 */ -#define TSTORM_CORE_CONN_AG_CTX_CF7_SHIFT 4 -#define TSTORM_CORE_CONN_AG_CTX_CF8_MASK 0x3 /* cf8 */ -#define TSTORM_CORE_CONN_AG_CTX_CF8_SHIFT 6 - u8 flags3; -#define TSTORM_CORE_CONN_AG_CTX_CF9_MASK 0x3 /* cf9 */ -#define TSTORM_CORE_CONN_AG_CTX_CF9_SHIFT 0 -#define TSTORM_CORE_CONN_AG_CTX_CF10_MASK 0x3 /* cf10 */ -#define TSTORM_CORE_CONN_AG_CTX_CF10_SHIFT 2 -#define TSTORM_CORE_CONN_AG_CTX_CF0EN_MASK 0x1 /* cf0en */ -#define TSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT 4 -#define TSTORM_CORE_CONN_AG_CTX_CF1EN_MASK 0x1 /* cf1en */ -#define TSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT 5 -#define TSTORM_CORE_CONN_AG_CTX_CF2EN_MASK 0x1 /* cf2en */ -#define TSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT 6 -#define TSTORM_CORE_CONN_AG_CTX_CF3EN_MASK 0x1 /* cf3en */ -#define TSTORM_CORE_CONN_AG_CTX_CF3EN_SHIFT 7 - u8 flags4; -#define TSTORM_CORE_CONN_AG_CTX_CF4EN_MASK 0x1 /* cf4en */ -#define TSTORM_CORE_CONN_AG_CTX_CF4EN_SHIFT 0 -#define TSTORM_CORE_CONN_AG_CTX_CF5EN_MASK 0x1 /* cf5en */ -#define TSTORM_CORE_CONN_AG_CTX_CF5EN_SHIFT 1 -#define TSTORM_CORE_CONN_AG_CTX_CF6EN_MASK 0x1 /* cf6en */ -#define TSTORM_CORE_CONN_AG_CTX_CF6EN_SHIFT 2 -#define TSTORM_CORE_CONN_AG_CTX_CF7EN_MASK 0x1 /* cf7en */ -#define TSTORM_CORE_CONN_AG_CTX_CF7EN_SHIFT 3 -#define TSTORM_CORE_CONN_AG_CTX_CF8EN_MASK 0x1 /* cf8en */ -#define TSTORM_CORE_CONN_AG_CTX_CF8EN_SHIFT 4 -#define TSTORM_CORE_CONN_AG_CTX_CF9EN_MASK 0x1 /* cf9en */ -#define TSTORM_CORE_CONN_AG_CTX_CF9EN_SHIFT 5 -#define TSTORM_CORE_CONN_AG_CTX_CF10EN_MASK 0x1 /* cf10en */ -#define TSTORM_CORE_CONN_AG_CTX_CF10EN_SHIFT 6 -#define TSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK 0x1 /* rule0en */ -#define TSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT 7 - u8 flags5; -#define TSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK 0x1 /* rule1en */ -#define TSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT 0 -#define TSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK 0x1 /* rule2en */ -#define TSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT 1 -#define TSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK 0x1 /* rule3en */ -#define TSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT 2 -#define TSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK 0x1 /* rule4en */ -#define TSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT 3 -#define TSTORM_CORE_CONN_AG_CTX_RULE5EN_MASK 0x1 /* rule5en */ -#define TSTORM_CORE_CONN_AG_CTX_RULE5EN_SHIFT 4 -#define TSTORM_CORE_CONN_AG_CTX_RULE6EN_MASK 0x1 /* rule6en */ -#define TSTORM_CORE_CONN_AG_CTX_RULE6EN_SHIFT 5 -#define TSTORM_CORE_CONN_AG_CTX_RULE7EN_MASK 0x1 /* rule7en */ -#define TSTORM_CORE_CONN_AG_CTX_RULE7EN_SHIFT 6 -#define TSTORM_CORE_CONN_AG_CTX_RULE8EN_MASK 0x1 /* rule8en */ -#define TSTORM_CORE_CONN_AG_CTX_RULE8EN_SHIFT 7 - __le32 reg0 /* reg0 */; - __le32 reg1 /* reg1 */; - __le32 reg2 /* reg2 */; - __le32 reg3 /* reg3 */; - __le32 reg4 /* reg4 */; - __le32 reg5 /* reg5 */; - __le32 reg6 /* reg6 */; - __le32 reg7 /* reg7 */; - __le32 reg8 /* reg8 */; - u8 byte2 /* byte2 */; - u8 byte3 /* byte3 */; - __le16 word0 /* word0 */; - u8 byte4 /* byte4 */; - u8 byte5 /* byte5 */; - __le16 word1 /* word1 */; - __le16 word2 /* conn_dpi */; - __le16 word3 /* word3 */; - __le32 reg9 /* reg9 */; - __le32 reg10 /* reg10 */; -}; - -struct ustorm_core_conn_ag_ctx { - u8 reserved /* cdu_validation */; - u8 byte1 /* state */; - u8 flags0; -#define USTORM_CORE_CONN_AG_CTX_BIT0_MASK 0x1 /* exist_in_qm0 */ -#define USTORM_CORE_CONN_AG_CTX_BIT0_SHIFT 0 -#define USTORM_CORE_CONN_AG_CTX_BIT1_MASK 0x1 /* exist_in_qm1 */ -#define USTORM_CORE_CONN_AG_CTX_BIT1_SHIFT 1 -#define USTORM_CORE_CONN_AG_CTX_CF0_MASK 0x3 /* timer0cf */ -#define USTORM_CORE_CONN_AG_CTX_CF0_SHIFT 2 -#define USTORM_CORE_CONN_AG_CTX_CF1_MASK 0x3 /* timer1cf */ -#define USTORM_CORE_CONN_AG_CTX_CF1_SHIFT 4 -#define USTORM_CORE_CONN_AG_CTX_CF2_MASK 0x3 /* timer2cf */ -#define USTORM_CORE_CONN_AG_CTX_CF2_SHIFT 6 - u8 flags1; -#define USTORM_CORE_CONN_AG_CTX_CF3_MASK 0x3 /* timer_stop_all */ -#define USTORM_CORE_CONN_AG_CTX_CF3_SHIFT 0 -#define USTORM_CORE_CONN_AG_CTX_CF4_MASK 0x3 /* cf4 */ -#define USTORM_CORE_CONN_AG_CTX_CF4_SHIFT 2 -#define USTORM_CORE_CONN_AG_CTX_CF5_MASK 0x3 /* cf5 */ -#define USTORM_CORE_CONN_AG_CTX_CF5_SHIFT 4 -#define USTORM_CORE_CONN_AG_CTX_CF6_MASK 0x3 /* cf6 */ -#define USTORM_CORE_CONN_AG_CTX_CF6_SHIFT 6 - u8 flags2; -#define USTORM_CORE_CONN_AG_CTX_CF0EN_MASK 0x1 /* cf0en */ -#define USTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT 0 -#define USTORM_CORE_CONN_AG_CTX_CF1EN_MASK 0x1 /* cf1en */ -#define USTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT 1 -#define USTORM_CORE_CONN_AG_CTX_CF2EN_MASK 0x1 /* cf2en */ -#define USTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT 2 -#define USTORM_CORE_CONN_AG_CTX_CF3EN_MASK 0x1 /* cf3en */ -#define USTORM_CORE_CONN_AG_CTX_CF3EN_SHIFT 3 -#define USTORM_CORE_CONN_AG_CTX_CF4EN_MASK 0x1 /* cf4en */ -#define USTORM_CORE_CONN_AG_CTX_CF4EN_SHIFT 4 -#define USTORM_CORE_CONN_AG_CTX_CF5EN_MASK 0x1 /* cf5en */ -#define USTORM_CORE_CONN_AG_CTX_CF5EN_SHIFT 5 -#define USTORM_CORE_CONN_AG_CTX_CF6EN_MASK 0x1 /* cf6en */ -#define USTORM_CORE_CONN_AG_CTX_CF6EN_SHIFT 6 -#define USTORM_CORE_CONN_AG_CTX_RULE0EN_MASK 0x1 /* rule0en */ -#define USTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT 7 - u8 flags3; -#define USTORM_CORE_CONN_AG_CTX_RULE1EN_MASK 0x1 /* rule1en */ -#define USTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT 0 -#define USTORM_CORE_CONN_AG_CTX_RULE2EN_MASK 0x1 /* rule2en */ -#define USTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT 1 -#define USTORM_CORE_CONN_AG_CTX_RULE3EN_MASK 0x1 /* rule3en */ -#define USTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT 2 -#define USTORM_CORE_CONN_AG_CTX_RULE4EN_MASK 0x1 /* rule4en */ -#define USTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT 3 -#define USTORM_CORE_CONN_AG_CTX_RULE5EN_MASK 0x1 /* rule5en */ -#define USTORM_CORE_CONN_AG_CTX_RULE5EN_SHIFT 4 -#define USTORM_CORE_CONN_AG_CTX_RULE6EN_MASK 0x1 /* rule6en */ -#define USTORM_CORE_CONN_AG_CTX_RULE6EN_SHIFT 5 -#define USTORM_CORE_CONN_AG_CTX_RULE7EN_MASK 0x1 /* rule7en */ -#define USTORM_CORE_CONN_AG_CTX_RULE7EN_SHIFT 6 -#define USTORM_CORE_CONN_AG_CTX_RULE8EN_MASK 0x1 /* rule8en */ -#define USTORM_CORE_CONN_AG_CTX_RULE8EN_SHIFT 7 - u8 byte2 /* byte2 */; - u8 byte3 /* byte3 */; - __le16 word0 /* conn_dpi */; - __le16 word1 /* word1 */; - __le32 rx_producers /* reg0 */; - __le32 reg1 /* reg1 */; - __le32 reg2 /* reg2 */; - __le32 reg3 /* reg3 */; - __le16 word2 /* word2 */; - __le16 word3 /* word3 */; -}; - -struct ystorm_core_conn_ag_ctx { - u8 byte0 /* cdu_validation */; - u8 byte1 /* state */; - u8 flags0; -#define YSTORM_CORE_CONN_AG_CTX_BIT0_MASK 0x1 /* exist_in_qm0 */ -#define YSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT 0 -#define YSTORM_CORE_CONN_AG_CTX_BIT1_MASK 0x1 /* exist_in_qm1 */ -#define YSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT 1 -#define YSTORM_CORE_CONN_AG_CTX_CF0_MASK 0x3 /* cf0 */ -#define YSTORM_CORE_CONN_AG_CTX_CF0_SHIFT 2 -#define YSTORM_CORE_CONN_AG_CTX_CF1_MASK 0x3 /* cf1 */ -#define YSTORM_CORE_CONN_AG_CTX_CF1_SHIFT 4 -#define YSTORM_CORE_CONN_AG_CTX_CF2_MASK 0x3 /* cf2 */ -#define YSTORM_CORE_CONN_AG_CTX_CF2_SHIFT 6 - u8 flags1; -#define YSTORM_CORE_CONN_AG_CTX_CF0EN_MASK 0x1 /* cf0en */ -#define YSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT 0 -#define YSTORM_CORE_CONN_AG_CTX_CF1EN_MASK 0x1 /* cf1en */ -#define YSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT 1 -#define YSTORM_CORE_CONN_AG_CTX_CF2EN_MASK 0x1 /* cf2en */ -#define YSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT 2 -#define YSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK 0x1 /* rule0en */ -#define YSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT 3 -#define YSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK 0x1 /* rule1en */ -#define YSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT 4 -#define YSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK 0x1 /* rule2en */ -#define YSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT 5 -#define YSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK 0x1 /* rule3en */ -#define YSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT 6 -#define YSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK 0x1 /* rule4en */ -#define YSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT 7 - u8 byte2 /* byte2 */; - u8 byte3 /* byte3 */; - __le16 word0 /* word0 */; - __le32 reg0 /* reg0 */; - __le32 reg1 /* reg1 */; - __le16 word1 /* word1 */; - __le16 word2 /* word2 */; - __le16 word3 /* word3 */; - __le16 word4 /* word4 */; - __le32 reg2 /* reg2 */; - __le32 reg3 /* reg3 */; -}; - /*********************************** Init ************************************/ /* Width of GRC address in bits (addresses are specified in dwords) */ @@ -1274,13 +1210,6 @@ enum chip_ids { MAX_CHIP_IDS }; -enum idle_chk_severity_types { - IDLE_CHK_SEVERITY_ERROR /* idle check failure should cause an error */, - IDLE_CHK_SEVERITY_ERROR_NO_TRAFFIC, - IDLE_CHK_SEVERITY_WARNING, - MAX_IDLE_CHK_SEVERITY_TYPES -}; - struct init_array_raw_hdr { __le32 data; #define INIT_ARRAY_RAW_HDR_TYPE_MASK 0xF @@ -1340,14 +1269,6 @@ struct init_callback_op { __le16 block_id /* Blocks ID */; }; -/* init comparison types */ -enum init_comparison_types { - INIT_COMPARISON_EQ /* init value is included in the init command */, - INIT_COMPARISON_OR /* init value is all zeros */, - INIT_COMPARISON_AND /* init value is an array of values */, - MAX_INIT_COMPARISON_TYPES -}; - /* init operation: delay */ struct init_delay_op { __le32 op_data; @@ -1444,12 +1365,10 @@ struct init_read_op { __le32 op_data; #define INIT_READ_OP_OP_MASK 0xF #define INIT_READ_OP_OP_SHIFT 0 -#define INIT_READ_OP_POLL_COMP_MASK 0x7 -#define INIT_READ_OP_POLL_COMP_SHIFT 4 +#define INIT_READ_OP_POLL_TYPE_MASK 0xF +#define INIT_READ_OP_POLL_TYPE_SHIFT 4 #define INIT_READ_OP_RESERVED_MASK 0x1 -#define INIT_READ_OP_RESERVED_SHIFT 7 -#define INIT_READ_OP_POLL_MASK 0x1 -#define INIT_READ_OP_POLL_SHIFT 8 +#define INIT_READ_OP_RESERVED_SHIFT 8 #define INIT_READ_OP_ADDRESS_MASK 0x7FFFFF #define INIT_READ_OP_ADDRESS_SHIFT 9 __le32 expected_val; @@ -1477,6 +1396,14 @@ enum init_op_types { MAX_INIT_OP_TYPES }; +enum init_poll_types { + INIT_POLL_NONE /* No polling */, + INIT_POLL_EQ /* init value is included in the init command */, + INIT_POLL_OR /* init value is all zeros */, + INIT_POLL_AND /* init value is an array of values */, + MAX_INIT_POLL_TYPES +}; + /* init source types */ enum init_source_types { INIT_SRC_INLINE /* init value is included in the init command */, @@ -1677,175 +1604,213 @@ bool qed_send_qm_stop_cmd(struct qed_hwfn *p_hwfn, u16 num_pqs); /* Ystorm flow control mode. Use enum fw_flow_ctrl_mode */ -#define YSTORM_FLOW_CONTROL_MODE_OFFSET (IRO[0].base) -#define YSTORM_FLOW_CONTROL_MODE_SIZE (IRO[0].size) +#define YSTORM_FLOW_CONTROL_MODE_OFFSET (IRO[0].base) +#define YSTORM_FLOW_CONTROL_MODE_SIZE (IRO[0].size) /* Tstorm port statistics */ -#define TSTORM_PORT_STAT_OFFSET(port_id) (IRO[1].base + \ - ((port_id) * \ - IRO[1].m1)) -#define TSTORM_PORT_STAT_SIZE (IRO[1].size) +#define TSTORM_PORT_STAT_OFFSET(port_id) (IRO[1].base + ((port_id) * IRO[1].m1)) +#define TSTORM_PORT_STAT_SIZE (IRO[1].size) +/* Tstorm ll2 port statistics */ +#define TSTORM_LL2_PORT_STAT_OFFSET(port_id) \ + (IRO[2].base + ((port_id) * IRO[2].m1)) +#define TSTORM_LL2_PORT_STAT_SIZE (IRO[2].size) /* Ustorm VF-PF Channel ready flag */ -#define USTORM_VF_PF_CHANNEL_READY_OFFSET(vf_id) (IRO[2].base + \ - ((vf_id) * \ - IRO[2].m1)) -#define USTORM_VF_PF_CHANNEL_READY_SIZE (IRO[2].size) +#define USTORM_VF_PF_CHANNEL_READY_OFFSET(vf_id) \ + (IRO[3].base + ((vf_id) * IRO[3].m1)) +#define USTORM_VF_PF_CHANNEL_READY_SIZE (IRO[3].size) /* Ustorm Final flr cleanup ack */ -#define USTORM_FLR_FINAL_ACK_OFFSET (IRO[3].base) -#define USTORM_FLR_FINAL_ACK_SIZE (IRO[3].size) +#define USTORM_FLR_FINAL_ACK_OFFSET(pf_id) (IRO[4].base + ((pf_id) * IRO[4].m1)) +#define USTORM_FLR_FINAL_ACK_SIZE (IRO[4].size) /* Ustorm Event ring consumer */ -#define USTORM_EQE_CONS_OFFSET(pf_id) (IRO[4].base + \ - ((pf_id) * \ - IRO[4].m1)) -#define USTORM_EQE_CONS_SIZE (IRO[4].size) -/* Ustorm Completion ring consumer */ -#define USTORM_CQ_CONS_OFFSET(global_queue_id) (IRO[5].base + \ - ((global_queue_id) * \ - IRO[5].m1)) -#define USTORM_CQ_CONS_SIZE (IRO[5].size) +#define USTORM_EQE_CONS_OFFSET(pf_id) (IRO[5].base + ((pf_id) * IRO[5].m1)) +#define USTORM_EQE_CONS_SIZE (IRO[5].size) +/* Ustorm Common Queue ring consumer */ +#define USTORM_COMMON_QUEUE_CONS_OFFSET(global_queue_id) \ + (IRO[6].base + ((global_queue_id) * IRO[6].m1)) +#define USTORM_COMMON_QUEUE_CONS_SIZE (IRO[6].size) /* Xstorm Integration Test Data */ -#define XSTORM_INTEG_TEST_DATA_OFFSET (IRO[6].base) -#define XSTORM_INTEG_TEST_DATA_SIZE (IRO[6].size) +#define XSTORM_INTEG_TEST_DATA_OFFSET (IRO[7].base) +#define XSTORM_INTEG_TEST_DATA_SIZE (IRO[7].size) /* Ystorm Integration Test Data */ -#define YSTORM_INTEG_TEST_DATA_OFFSET (IRO[7].base) -#define YSTORM_INTEG_TEST_DATA_SIZE (IRO[7].size) +#define YSTORM_INTEG_TEST_DATA_OFFSET (IRO[8].base) +#define YSTORM_INTEG_TEST_DATA_SIZE (IRO[8].size) /* Pstorm Integration Test Data */ -#define PSTORM_INTEG_TEST_DATA_OFFSET (IRO[8].base) -#define PSTORM_INTEG_TEST_DATA_SIZE (IRO[8].size) +#define PSTORM_INTEG_TEST_DATA_OFFSET (IRO[9].base) +#define PSTORM_INTEG_TEST_DATA_SIZE (IRO[9].size) /* Tstorm Integration Test Data */ -#define TSTORM_INTEG_TEST_DATA_OFFSET (IRO[9].base) -#define TSTORM_INTEG_TEST_DATA_SIZE (IRO[9].size) +#define TSTORM_INTEG_TEST_DATA_OFFSET (IRO[10].base) +#define TSTORM_INTEG_TEST_DATA_SIZE (IRO[10].size) /* Mstorm Integration Test Data */ -#define MSTORM_INTEG_TEST_DATA_OFFSET (IRO[10].base) -#define MSTORM_INTEG_TEST_DATA_SIZE (IRO[10].size) +#define MSTORM_INTEG_TEST_DATA_OFFSET (IRO[11].base) +#define MSTORM_INTEG_TEST_DATA_SIZE (IRO[11].size) /* Ustorm Integration Test Data */ -#define USTORM_INTEG_TEST_DATA_OFFSET (IRO[11].base) -#define USTORM_INTEG_TEST_DATA_SIZE (IRO[11].size) +#define USTORM_INTEG_TEST_DATA_OFFSET (IRO[12].base) +#define USTORM_INTEG_TEST_DATA_SIZE (IRO[12].size) /* Tstorm producers */ -#define TSTORM_LL2_RX_PRODS_OFFSET(core_rx_queue_id) (IRO[12].base + \ - ((core_rx_queue_id) * \ - IRO[12].m1)) -#define TSTORM_LL2_RX_PRODS_SIZE (IRO[12].size) -/* Tstorm LiteL2 queue statistics */ -#define CORE_LL2_TSTORM_PER_QUEUE_STAT_OFFSET(core_rx_q_id) (IRO[13].base + \ - ((core_rx_q_id) * \ - IRO[13].m1)) -#define CORE_LL2_TSTORM_PER_QUEUE_STAT_SIZE (IRO[13].size) +#define TSTORM_LL2_RX_PRODS_OFFSET(core_rx_queue_id) \ + (IRO[13].base + ((core_rx_queue_id) * IRO[13].m1)) +#define TSTORM_LL2_RX_PRODS_SIZE (IRO[13].size) +/* Tstorm LightL2 queue statistics */ +#define CORE_LL2_TSTORM_PER_QUEUE_STAT_OFFSET(core_rx_queue_id) \ + (IRO[14].base + ((core_rx_queue_id) * IRO[14].m1)) +#define CORE_LL2_TSTORM_PER_QUEUE_STAT_SIZE (IRO[14].size) /* Ustorm LiteL2 queue statistics */ -#define CORE_LL2_USTORM_PER_QUEUE_STAT_OFFSET(core_rx_q_id) (IRO[14].base + \ - ((core_rx_q_id) * \ - IRO[14].m1)) -#define CORE_LL2_USTORM_PER_QUEUE_STAT_SIZE (IRO[14].size) +#define CORE_LL2_USTORM_PER_QUEUE_STAT_OFFSET(core_rx_queue_id) \ + (IRO[15].base + ((core_rx_queue_id) * IRO[15].m1)) +#define CORE_LL2_USTORM_PER_QUEUE_STAT_SIZE (IRO[15].size) /* Pstorm LiteL2 queue statistics */ -#define CORE_LL2_PSTORM_PER_QUEUE_STAT_OFFSET(core_txst_id) (IRO[15].base + \ - ((core_txst_id) * \ - IRO[15].m1)) -#define CORE_LL2_PSTORM_PER_QUEUE_STAT_SIZE (IRO[15].size) +#define CORE_LL2_PSTORM_PER_QUEUE_STAT_OFFSET(core_tx_stats_id) \ + (IRO[16].base + ((core_tx_stats_id) * IRO[16].m1)) +#define CORE_LL2_PSTORM_PER_QUEUE_STAT_SIZE (IRO[16].size) /* Mstorm queue statistics */ -#define MSTORM_QUEUE_STAT_OFFSET(stat_counter_id) (IRO[16].base + \ - ((stat_counter_id) * \ - IRO[16].m1)) -#define MSTORM_QUEUE_STAT_SIZE (IRO[16].size) +#define MSTORM_QUEUE_STAT_OFFSET(stat_counter_id) \ + (IRO[17].base + ((stat_counter_id) * IRO[17].m1)) +#define MSTORM_QUEUE_STAT_SIZE (IRO[17].size) /* Mstorm producers */ -#define MSTORM_PRODS_OFFSET(queue_id) (IRO[17].base + \ - ((queue_id) * \ - IRO[17].m1)) -#define MSTORM_PRODS_SIZE (IRO[17].size) +#define MSTORM_PRODS_OFFSET(queue_id) (IRO[18].base + ((queue_id) * IRO[18].m1)) +#define MSTORM_PRODS_SIZE (IRO[18].size) /* TPA agregation timeout in us resolution (on ASIC) */ -#define MSTORM_TPA_TIMEOUT_US_OFFSET (IRO[18].base) -#define MSTORM_TPA_TIMEOUT_US_SIZE (IRO[18].size) +#define MSTORM_TPA_TIMEOUT_US_OFFSET (IRO[19].base) +#define MSTORM_TPA_TIMEOUT_US_SIZE (IRO[19].size) /* Ustorm queue statistics */ -#define USTORM_QUEUE_STAT_OFFSET(stat_counter_id) (IRO[19].base + \ - ((stat_counter_id) * \ - IRO[19].m1)) -#define USTORM_QUEUE_STAT_SIZE (IRO[19].size) +#define USTORM_QUEUE_STAT_OFFSET(stat_counter_id) \ + (IRO[20].base + ((stat_counter_id) * IRO[20].m1)) +#define USTORM_QUEUE_STAT_SIZE (IRO[20].size) /* Ustorm queue zone */ -#define USTORM_ETH_QUEUE_ZONE_OFFSET(queue_id) (IRO[20].base + \ - ((queue_id) * \ - IRO[20].m1)) -#define USTORM_ETH_QUEUE_ZONE_SIZE (IRO[20].size) +#define USTORM_ETH_QUEUE_ZONE_OFFSET(queue_id) \ + (IRO[21].base + ((queue_id) * IRO[21].m1)) +#define USTORM_ETH_QUEUE_ZONE_SIZE (IRO[21].size) /* Pstorm queue statistics */ -#define PSTORM_QUEUE_STAT_OFFSET(stat_counter_id) (IRO[21].base + \ - ((stat_counter_id) * \ - IRO[21].m1)) -#define PSTORM_QUEUE_STAT_SIZE (IRO[21].size) +#define PSTORM_QUEUE_STAT_OFFSET(stat_counter_id) \ + (IRO[22].base + ((stat_counter_id) * IRO[22].m1)) +#define PSTORM_QUEUE_STAT_SIZE (IRO[22].size) /* Tstorm last parser message */ -#define TSTORM_ETH_PRS_INPUT_OFFSET(pf_id) (IRO[22].base + \ - ((pf_id) * \ - IRO[22].m1)) -#define TSTORM_ETH_PRS_INPUT_SIZE (IRO[22].size) +#define TSTORM_ETH_PRS_INPUT_OFFSET (IRO[23].base) +#define TSTORM_ETH_PRS_INPUT_SIZE (IRO[23].size) +/* Tstorm Eth limit Rx rate */ +#define ETH_RX_RATE_LIMIT_OFFSET(pf_id) (IRO[24].base + ((pf_id) * IRO[24].m1)) +#define ETH_RX_RATE_LIMIT_SIZE (IRO[24].size) /* Ystorm queue zone */ -#define YSTORM_ETH_QUEUE_ZONE_OFFSET(queue_id) (IRO[23].base + \ - ((queue_id) * \ - IRO[23].m1)) -#define YSTORM_ETH_QUEUE_ZONE_SIZE (IRO[23].size) +#define YSTORM_ETH_QUEUE_ZONE_OFFSET(queue_id) \ + (IRO[25].base + ((queue_id) * IRO[25].m1)) +#define YSTORM_ETH_QUEUE_ZONE_SIZE (IRO[25].size) /* Ystorm cqe producer */ -#define YSTORM_TOE_CQ_PROD_OFFSET(rss_id) (IRO[24].base + \ - ((rss_id) * \ - IRO[24].m1)) -#define YSTORM_TOE_CQ_PROD_SIZE (IRO[24].size) +#define YSTORM_TOE_CQ_PROD_OFFSET(rss_id) \ + (IRO[26].base + ((rss_id) * IRO[26].m1)) +#define YSTORM_TOE_CQ_PROD_SIZE (IRO[26].size) /* Ustorm cqe producer */ -#define USTORM_TOE_CQ_PROD_OFFSET(rss_id) (IRO[25].base + \ - ((rss_id) * \ - IRO[25].m1)) -#define USTORM_TOE_CQ_PROD_SIZE (IRO[25].size) +#define USTORM_TOE_CQ_PROD_OFFSET(rss_id) \ + (IRO[27].base + ((rss_id) * IRO[27].m1)) +#define USTORM_TOE_CQ_PROD_SIZE (IRO[27].size) /* Ustorm grq producer */ -#define USTORM_TOE_GRQ_PROD_OFFSET(pf_id) (IRO[26].base + \ - ((pf_id) * \ - IRO[26].m1)) -#define USTORM_TOE_GRQ_PROD_SIZE (IRO[26].size) +#define USTORM_TOE_GRQ_PROD_OFFSET(pf_id) \ + (IRO[28].base + ((pf_id) * IRO[28].m1)) +#define USTORM_TOE_GRQ_PROD_SIZE (IRO[28].size) /* Tstorm cmdq-cons of given command queue-id */ -#define TSTORM_SCSI_CMDQ_CONS_OFFSET(cmdq_queue_id) (IRO[27].base + \ - ((cmdq_queue_id) * \ - IRO[27].m1)) -#define TSTORM_SCSI_CMDQ_CONS_SIZE (IRO[27].size) +#define TSTORM_SCSI_CMDQ_CONS_OFFSET(cmdq_queue_id) \ + (IRO[29].base + ((cmdq_queue_id) * IRO[29].m1)) +#define TSTORM_SCSI_CMDQ_CONS_SIZE (IRO[29].size) /* Mstorm rq-cons of given queue-id */ -#define MSTORM_SCSI_RQ_CONS_OFFSET(rq_queue_id) (IRO[28].base + \ - ((rq_queue_id) * \ - IRO[28].m1)) -#define MSTORM_SCSI_RQ_CONS_SIZE (IRO[28].size) +#define MSTORM_SCSI_RQ_CONS_OFFSET(rq_queue_id) \ + (IRO[30].base + ((rq_queue_id) * IRO[30].m1)) +#define MSTORM_SCSI_RQ_CONS_SIZE (IRO[30].size) +/* Mstorm bdq-external-producer of given BDQ function ID, BDqueue-id */ +#define MSTORM_SCSI_BDQ_EXT_PROD_OFFSET(func_id, bdq_id) \ + (IRO[31].base + ((func_id) * IRO[31].m1) + ((bdq_id) * IRO[31].m2)) +#define MSTORM_SCSI_BDQ_EXT_PROD_SIZE (IRO[31].size) +/* Tstorm (reflects M-Storm) bdq-external-producer of given fn ID, BDqueue-id */ +#define TSTORM_SCSI_BDQ_EXT_PROD_OFFSET(func_id, bdq_id) \ + (IRO[32].base + ((func_id) * IRO[32].m1) + ((bdq_id) * IRO[32].m2)) +#define TSTORM_SCSI_BDQ_EXT_PROD_SIZE (IRO[32].size) +/* Tstorm iSCSI RX stats */ +#define TSTORM_ISCSI_RX_STATS_OFFSET(pf_id) \ + (IRO[33].base + ((pf_id) * IRO[33].m1)) +#define TSTORM_ISCSI_RX_STATS_SIZE (IRO[33].size) +/* Mstorm iSCSI RX stats */ +#define MSTORM_ISCSI_RX_STATS_OFFSET(pf_id) \ + (IRO[34].base + ((pf_id) * IRO[34].m1)) +#define MSTORM_ISCSI_RX_STATS_SIZE (IRO[34].size) +/* Ustorm iSCSI RX stats */ +#define USTORM_ISCSI_RX_STATS_OFFSET(pf_id) \ + (IRO[35].base + ((pf_id) * IRO[35].m1)) +#define USTORM_ISCSI_RX_STATS_SIZE (IRO[35].size) +/* Xstorm iSCSI TX stats */ +#define XSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \ + (IRO[36].base + ((pf_id) * IRO[36].m1)) +#define XSTORM_ISCSI_TX_STATS_SIZE (IRO[36].size) +/* Ystorm iSCSI TX stats */ +#define YSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \ + (IRO[37].base + ((pf_id) * IRO[37].m1)) +#define YSTORM_ISCSI_TX_STATS_SIZE (IRO[37].size) +/* Pstorm iSCSI TX stats */ +#define PSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \ + (IRO[38].base + ((pf_id) * IRO[38].m1)) +#define PSTORM_ISCSI_TX_STATS_SIZE (IRO[38].size) +/* Tstorm FCoE RX stats */ +#define TSTORM_FCOE_RX_STATS_OFFSET(pf_id) \ + (IRO[39].base + ((pf_id) * IRO[39].m1)) +#define TSTORM_FCOE_RX_STATS_SIZE (IRO[39].size) +/* Mstorm FCoE RX stats */ +#define MSTORM_FCOE_RX_STATS_OFFSET(pf_id) \ + (IRO[40].base + ((pf_id) * IRO[40].m1)) +#define MSTORM_FCOE_RX_STATS_SIZE (IRO[40].size) +/* Pstorm FCoE TX stats */ +#define PSTORM_FCOE_TX_STATS_OFFSET(pf_id) \ + (IRO[41].base + ((pf_id) * IRO[41].m1)) +#define PSTORM_FCOE_TX_STATS_SIZE (IRO[41].size) /* Pstorm RoCE statistics */ -#define PSTORM_ROCE_STAT_OFFSET(stat_counter_id) (IRO[29].base + \ - ((stat_counter_id) * \ - IRO[29].m1)) -#define PSTORM_ROCE_STAT_SIZE (IRO[29].size) +#define PSTORM_ROCE_STAT_OFFSET(stat_counter_id) \ + (IRO[42].base + ((stat_counter_id) * IRO[42].m1)) +#define PSTORM_ROCE_STAT_SIZE (IRO[42].size) /* Tstorm RoCE statistics */ -#define TSTORM_ROCE_STAT_OFFSET(stat_counter_id) (IRO[30].base + \ - ((stat_counter_id) * \ - IRO[30].m1)) -#define TSTORM_ROCE_STAT_SIZE (IRO[30].size) - -static const struct iro iro_arr[31] = { - { 0x10, 0x0, 0x0, 0x0, 0x8 }, - { 0x4448, 0x60, 0x0, 0x0, 0x60 }, - { 0x498, 0x8, 0x0, 0x0, 0x4 }, - { 0x494, 0x0, 0x0, 0x0, 0x4 }, - { 0x10, 0x8, 0x0, 0x0, 0x2 }, - { 0x90, 0x8, 0x0, 0x0, 0x2 }, - { 0x4540, 0x0, 0x0, 0x0, 0xf8 }, - { 0x39e0, 0x0, 0x0, 0x0, 0xf8 }, - { 0x2598, 0x0, 0x0, 0x0, 0xf8 }, - { 0x4350, 0x0, 0x0, 0x0, 0xf8 }, - { 0x52d0, 0x0, 0x0, 0x0, 0xf8 }, - { 0x7a48, 0x0, 0x0, 0x0, 0xf8 }, - { 0x100, 0x8, 0x0, 0x0, 0x8 }, - { 0x5808, 0x10, 0x0, 0x0, 0x10 }, - { 0xb100, 0x30, 0x0, 0x0, 0x30 }, - { 0x95c0, 0x30, 0x0, 0x0, 0x30 }, - { 0x54f8, 0x40, 0x0, 0x0, 0x40 }, - { 0x200, 0x10, 0x0, 0x0, 0x8 }, - { 0x9e70, 0x0, 0x0, 0x0, 0x4 }, - { 0x7ca0, 0x40, 0x0, 0x0, 0x30 }, - { 0xd00, 0x8, 0x0, 0x0, 0x8 }, - { 0x2790, 0x80, 0x0, 0x0, 0x38 }, - { 0xa520, 0xf0, 0x0, 0x0, 0xf0 }, - { 0x80, 0x8, 0x0, 0x0, 0x8 }, - { 0xac0, 0x8, 0x0, 0x0, 0x8 }, - { 0x2580, 0x8, 0x0, 0x0, 0x8 }, - { 0x2500, 0x8, 0x0, 0x0, 0x8 }, - { 0x440, 0x8, 0x0, 0x0, 0x2 }, - { 0x1800, 0x8, 0x0, 0x0, 0x2 }, - { 0x27c8, 0x80, 0x0, 0x0, 0x10 }, - { 0x4710, 0x10, 0x0, 0x0, 0x10 }, +#define TSTORM_ROCE_STAT_OFFSET(stat_counter_id) \ + (IRO[43].base + ((stat_counter_id) * IRO[43].m1)) +#define TSTORM_ROCE_STAT_SIZE (IRO[43].size) + +static const struct iro iro_arr[44] = { + { 0x10, 0x0, 0x0, 0x0, 0x8 }, + { 0x47c8, 0x60, 0x0, 0x0, 0x60 }, + { 0x5e30, 0x20, 0x0, 0x0, 0x20 }, + { 0x510, 0x8, 0x0, 0x0, 0x4 }, + { 0x490, 0x8, 0x0, 0x0, 0x4 }, + { 0x10, 0x8, 0x0, 0x0, 0x2 }, + { 0x90, 0x8, 0x0, 0x0, 0x2 }, + { 0x4940, 0x0, 0x0, 0x0, 0x78 }, + { 0x3de0, 0x0, 0x0, 0x0, 0x78 }, + { 0x2998, 0x0, 0x0, 0x0, 0x78 }, + { 0x4750, 0x0, 0x0, 0x0, 0x78 }, + { 0x56d0, 0x0, 0x0, 0x0, 0x78 }, + { 0x7e50, 0x0, 0x0, 0x0, 0x78 }, + { 0x100, 0x8, 0x0, 0x0, 0x8 }, + { 0x5c10, 0x10, 0x0, 0x0, 0x10 }, + { 0xb508, 0x30, 0x0, 0x0, 0x30 }, + { 0x95c0, 0x30, 0x0, 0x0, 0x30 }, + { 0x58a0, 0x40, 0x0, 0x0, 0x40 }, + { 0x200, 0x10, 0x0, 0x0, 0x8 }, + { 0xa230, 0x0, 0x0, 0x0, 0x4 }, + { 0x8058, 0x40, 0x0, 0x0, 0x30 }, + { 0xd00, 0x8, 0x0, 0x0, 0x8 }, + { 0x2b30, 0x80, 0x0, 0x0, 0x38 }, + { 0xa808, 0x0, 0x0, 0x0, 0xf0 }, + { 0xa8f8, 0x8, 0x0, 0x0, 0x8 }, + { 0x80, 0x8, 0x0, 0x0, 0x8 }, + { 0xac0, 0x8, 0x0, 0x0, 0x8 }, + { 0x2580, 0x8, 0x0, 0x0, 0x8 }, + { 0x2500, 0x8, 0x0, 0x0, 0x8 }, + { 0x440, 0x8, 0x0, 0x0, 0x2 }, + { 0x1800, 0x8, 0x0, 0x0, 0x2 }, + { 0x1a00, 0x10, 0x8, 0x0, 0x2 }, + { 0x640, 0x10, 0x8, 0x0, 0x2 }, + { 0xd9b8, 0x38, 0x0, 0x0, 0x24 }, + { 0x11048, 0x10, 0x0, 0x0, 0x8 }, + { 0x11678, 0x38, 0x0, 0x0, 0x18 }, + { 0xaec0, 0x30, 0x0, 0x0, 0x10 }, + { 0x8700, 0x28, 0x0, 0x0, 0x18 }, + { 0xec00, 0x10, 0x0, 0x0, 0x10 }, + { 0xde38, 0x40, 0x0, 0x0, 0x30 }, + { 0x121a8, 0x38, 0x0, 0x0, 0x8 }, + { 0xf068, 0x20, 0x0, 0x0, 0x20 }, + { 0x2b68, 0x80, 0x0, 0x0, 0x10 }, + { 0x4ab8, 0x10, 0x0, 0x0, 0x10 }, }; /* Runtime array offsets */ @@ -1866,426 +1831,427 @@ static const struct iro iro_arr[31] = { #define DORQ_REG_VF_MAX_ICID_6_RT_OFFSET 14 #define DORQ_REG_VF_MAX_ICID_7_RT_OFFSET 15 #define DORQ_REG_PF_WAKE_ALL_RT_OFFSET 16 -#define IGU_REG_PF_CONFIGURATION_RT_OFFSET 17 -#define IGU_REG_VF_CONFIGURATION_RT_OFFSET 18 -#define IGU_REG_ATTN_MSG_ADDR_L_RT_OFFSET 19 -#define IGU_REG_ATTN_MSG_ADDR_H_RT_OFFSET 20 -#define IGU_REG_LEADING_EDGE_LATCH_RT_OFFSET 21 -#define IGU_REG_TRAILING_EDGE_LATCH_RT_OFFSET 22 -#define CAU_REG_CQE_AGG_UNIT_SIZE_RT_OFFSET 23 -#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET 760 +#define DORQ_REG_TAG1_ETHERTYPE_RT_OFFSET 17 +#define IGU_REG_PF_CONFIGURATION_RT_OFFSET 18 +#define IGU_REG_VF_CONFIGURATION_RT_OFFSET 19 +#define IGU_REG_ATTN_MSG_ADDR_L_RT_OFFSET 20 +#define IGU_REG_ATTN_MSG_ADDR_H_RT_OFFSET 21 +#define IGU_REG_LEADING_EDGE_LATCH_RT_OFFSET 22 +#define IGU_REG_TRAILING_EDGE_LATCH_RT_OFFSET 23 +#define CAU_REG_CQE_AGG_UNIT_SIZE_RT_OFFSET 24 +#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET 761 #define CAU_REG_SB_VAR_MEMORY_RT_SIZE 736 -#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET 760 +#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET 761 #define CAU_REG_SB_VAR_MEMORY_RT_SIZE 736 -#define CAU_REG_SB_ADDR_MEMORY_RT_OFFSET 1496 +#define CAU_REG_SB_ADDR_MEMORY_RT_OFFSET 1497 #define CAU_REG_SB_ADDR_MEMORY_RT_SIZE 736 -#define CAU_REG_PI_MEMORY_RT_OFFSET 2232 +#define CAU_REG_PI_MEMORY_RT_OFFSET 2233 #define CAU_REG_PI_MEMORY_RT_SIZE 4416 -#define PRS_REG_SEARCH_RESP_INITIATOR_TYPE_RT_OFFSET 6648 -#define PRS_REG_TASK_ID_MAX_INITIATOR_PF_RT_OFFSET 6649 -#define PRS_REG_TASK_ID_MAX_INITIATOR_VF_RT_OFFSET 6650 -#define PRS_REG_TASK_ID_MAX_TARGET_PF_RT_OFFSET 6651 -#define PRS_REG_TASK_ID_MAX_TARGET_VF_RT_OFFSET 6652 -#define PRS_REG_SEARCH_TCP_RT_OFFSET 6653 -#define PRS_REG_SEARCH_FCOE_RT_OFFSET 6654 -#define PRS_REG_SEARCH_ROCE_RT_OFFSET 6655 -#define PRS_REG_ROCE_DEST_QP_MAX_VF_RT_OFFSET 6656 -#define PRS_REG_ROCE_DEST_QP_MAX_PF_RT_OFFSET 6657 -#define PRS_REG_SEARCH_OPENFLOW_RT_OFFSET 6658 -#define PRS_REG_SEARCH_NON_IP_AS_OPENFLOW_RT_OFFSET 6659 -#define PRS_REG_OPENFLOW_SUPPORT_ONLY_KNOWN_OVER_IP_RT_OFFSET 6660 -#define PRS_REG_OPENFLOW_SEARCH_KEY_MASK_RT_OFFSET 6661 -#define PRS_REG_LIGHT_L2_ETHERTYPE_EN_RT_OFFSET 6662 -#define SRC_REG_FIRSTFREE_RT_OFFSET 6663 +#define PRS_REG_SEARCH_RESP_INITIATOR_TYPE_RT_OFFSET 6649 +#define PRS_REG_TASK_ID_MAX_INITIATOR_PF_RT_OFFSET 6650 +#define PRS_REG_TASK_ID_MAX_INITIATOR_VF_RT_OFFSET 6651 +#define PRS_REG_TASK_ID_MAX_TARGET_PF_RT_OFFSET 6652 +#define PRS_REG_TASK_ID_MAX_TARGET_VF_RT_OFFSET 6653 +#define PRS_REG_SEARCH_TCP_RT_OFFSET 6654 +#define PRS_REG_SEARCH_FCOE_RT_OFFSET 6655 +#define PRS_REG_SEARCH_ROCE_RT_OFFSET 6656 +#define PRS_REG_ROCE_DEST_QP_MAX_VF_RT_OFFSET 6657 +#define PRS_REG_ROCE_DEST_QP_MAX_PF_RT_OFFSET 6658 +#define PRS_REG_SEARCH_OPENFLOW_RT_OFFSET 6659 +#define PRS_REG_SEARCH_NON_IP_AS_OPENFLOW_RT_OFFSET 6660 +#define PRS_REG_OPENFLOW_SUPPORT_ONLY_KNOWN_OVER_IP_RT_OFFSET 6661 +#define PRS_REG_OPENFLOW_SEARCH_KEY_MASK_RT_OFFSET 6662 +#define PRS_REG_TAG_ETHERTYPE_0_RT_OFFSET 6663 +#define PRS_REG_LIGHT_L2_ETHERTYPE_EN_RT_OFFSET 6664 +#define SRC_REG_FIRSTFREE_RT_OFFSET 6665 #define SRC_REG_FIRSTFREE_RT_SIZE 2 -#define SRC_REG_LASTFREE_RT_OFFSET 6665 +#define SRC_REG_LASTFREE_RT_OFFSET 6667 #define SRC_REG_LASTFREE_RT_SIZE 2 -#define SRC_REG_COUNTFREE_RT_OFFSET 6667 -#define SRC_REG_NUMBER_HASH_BITS_RT_OFFSET 6668 -#define PSWRQ2_REG_CDUT_P_SIZE_RT_OFFSET 6669 -#define PSWRQ2_REG_CDUC_P_SIZE_RT_OFFSET 6670 -#define PSWRQ2_REG_TM_P_SIZE_RT_OFFSET 6671 -#define PSWRQ2_REG_QM_P_SIZE_RT_OFFSET 6672 -#define PSWRQ2_REG_SRC_P_SIZE_RT_OFFSET 6673 -#define PSWRQ2_REG_TM_FIRST_ILT_RT_OFFSET 6674 -#define PSWRQ2_REG_TM_LAST_ILT_RT_OFFSET 6675 -#define PSWRQ2_REG_QM_FIRST_ILT_RT_OFFSET 6676 -#define PSWRQ2_REG_QM_LAST_ILT_RT_OFFSET 6677 -#define PSWRQ2_REG_SRC_FIRST_ILT_RT_OFFSET 6678 -#define PSWRQ2_REG_SRC_LAST_ILT_RT_OFFSET 6679 -#define PSWRQ2_REG_CDUC_FIRST_ILT_RT_OFFSET 6680 -#define PSWRQ2_REG_CDUC_LAST_ILT_RT_OFFSET 6681 -#define PSWRQ2_REG_CDUT_FIRST_ILT_RT_OFFSET 6682 -#define PSWRQ2_REG_CDUT_LAST_ILT_RT_OFFSET 6683 -#define PSWRQ2_REG_TSDM_FIRST_ILT_RT_OFFSET 6684 -#define PSWRQ2_REG_TSDM_LAST_ILT_RT_OFFSET 6685 -#define PSWRQ2_REG_TM_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6686 -#define PSWRQ2_REG_CDUT_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6687 -#define PSWRQ2_REG_CDUC_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6688 -#define PSWRQ2_REG_TM_VF_BLOCKS_RT_OFFSET 6689 -#define PSWRQ2_REG_CDUT_VF_BLOCKS_RT_OFFSET 6690 -#define PSWRQ2_REG_CDUC_VF_BLOCKS_RT_OFFSET 6691 -#define PSWRQ2_REG_TM_BLOCKS_FACTOR_RT_OFFSET 6692 -#define PSWRQ2_REG_CDUT_BLOCKS_FACTOR_RT_OFFSET 6693 -#define PSWRQ2_REG_CDUC_BLOCKS_FACTOR_RT_OFFSET 6694 -#define PSWRQ2_REG_VF_BASE_RT_OFFSET 6695 -#define PSWRQ2_REG_VF_LAST_ILT_RT_OFFSET 6696 -#define PSWRQ2_REG_WR_MBS0_RT_OFFSET 6697 -#define PSWRQ2_REG_RD_MBS0_RT_OFFSET 6698 -#define PSWRQ2_REG_DRAM_ALIGN_WR_RT_OFFSET 6699 -#define PSWRQ2_REG_DRAM_ALIGN_RD_RT_OFFSET 6700 -#define PSWRQ2_REG_ILT_MEMORY_RT_OFFSET 6701 +#define SRC_REG_COUNTFREE_RT_OFFSET 6669 +#define SRC_REG_NUMBER_HASH_BITS_RT_OFFSET 6670 +#define PSWRQ2_REG_CDUT_P_SIZE_RT_OFFSET 6671 +#define PSWRQ2_REG_CDUC_P_SIZE_RT_OFFSET 6672 +#define PSWRQ2_REG_TM_P_SIZE_RT_OFFSET 6673 +#define PSWRQ2_REG_QM_P_SIZE_RT_OFFSET 6674 +#define PSWRQ2_REG_SRC_P_SIZE_RT_OFFSET 6675 +#define PSWRQ2_REG_TM_FIRST_ILT_RT_OFFSET 6676 +#define PSWRQ2_REG_TM_LAST_ILT_RT_OFFSET 6677 +#define PSWRQ2_REG_QM_FIRST_ILT_RT_OFFSET 6678 +#define PSWRQ2_REG_QM_LAST_ILT_RT_OFFSET 6679 +#define PSWRQ2_REG_SRC_FIRST_ILT_RT_OFFSET 6680 +#define PSWRQ2_REG_SRC_LAST_ILT_RT_OFFSET 6681 +#define PSWRQ2_REG_CDUC_FIRST_ILT_RT_OFFSET 6682 +#define PSWRQ2_REG_CDUC_LAST_ILT_RT_OFFSET 6683 +#define PSWRQ2_REG_CDUT_FIRST_ILT_RT_OFFSET 6684 +#define PSWRQ2_REG_CDUT_LAST_ILT_RT_OFFSET 6685 +#define PSWRQ2_REG_TSDM_FIRST_ILT_RT_OFFSET 6686 +#define PSWRQ2_REG_TSDM_LAST_ILT_RT_OFFSET 6687 +#define PSWRQ2_REG_TM_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6688 +#define PSWRQ2_REG_CDUT_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6689 +#define PSWRQ2_REG_CDUC_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6690 +#define PSWRQ2_REG_TM_VF_BLOCKS_RT_OFFSET 6691 +#define PSWRQ2_REG_CDUT_VF_BLOCKS_RT_OFFSET 6692 +#define PSWRQ2_REG_CDUC_VF_BLOCKS_RT_OFFSET 6693 +#define PSWRQ2_REG_TM_BLOCKS_FACTOR_RT_OFFSET 6694 +#define PSWRQ2_REG_CDUT_BLOCKS_FACTOR_RT_OFFSET 6695 +#define PSWRQ2_REG_CDUC_BLOCKS_FACTOR_RT_OFFSET 6696 +#define PSWRQ2_REG_VF_BASE_RT_OFFSET 6697 +#define PSWRQ2_REG_VF_LAST_ILT_RT_OFFSET 6698 +#define PSWRQ2_REG_WR_MBS0_RT_OFFSET 6699 +#define PSWRQ2_REG_RD_MBS0_RT_OFFSET 6700 +#define PSWRQ2_REG_DRAM_ALIGN_WR_RT_OFFSET 6701 +#define PSWRQ2_REG_DRAM_ALIGN_RD_RT_OFFSET 6702 +#define PSWRQ2_REG_ILT_MEMORY_RT_OFFSET 6703 #define PSWRQ2_REG_ILT_MEMORY_RT_SIZE 22000 -#define PGLUE_REG_B_VF_BASE_RT_OFFSET 28701 -#define PGLUE_REG_B_CACHE_LINE_SIZE_RT_OFFSET 28702 -#define PGLUE_REG_B_PF_BAR0_SIZE_RT_OFFSET 28703 -#define PGLUE_REG_B_PF_BAR1_SIZE_RT_OFFSET 28704 -#define PGLUE_REG_B_VF_BAR1_SIZE_RT_OFFSET 28705 -#define TM_REG_VF_ENABLE_CONN_RT_OFFSET 28706 -#define TM_REG_PF_ENABLE_CONN_RT_OFFSET 28707 -#define TM_REG_PF_ENABLE_TASK_RT_OFFSET 28708 -#define TM_REG_GROUP_SIZE_RESOLUTION_CONN_RT_OFFSET 28709 -#define TM_REG_GROUP_SIZE_RESOLUTION_TASK_RT_OFFSET 28710 -#define TM_REG_CONFIG_CONN_MEM_RT_OFFSET 28711 +#define PGLUE_REG_B_VF_BASE_RT_OFFSET 28703 +#define PGLUE_REG_B_CACHE_LINE_SIZE_RT_OFFSET 28704 +#define PGLUE_REG_B_PF_BAR0_SIZE_RT_OFFSET 28705 +#define PGLUE_REG_B_PF_BAR1_SIZE_RT_OFFSET 28706 +#define PGLUE_REG_B_VF_BAR1_SIZE_RT_OFFSET 28707 +#define TM_REG_VF_ENABLE_CONN_RT_OFFSET 28708 +#define TM_REG_PF_ENABLE_CONN_RT_OFFSET 28709 +#define TM_REG_PF_ENABLE_TASK_RT_OFFSET 28710 +#define TM_REG_GROUP_SIZE_RESOLUTION_CONN_RT_OFFSET 28711 +#define TM_REG_GROUP_SIZE_RESOLUTION_TASK_RT_OFFSET 28712 +#define TM_REG_CONFIG_CONN_MEM_RT_OFFSET 28713 #define TM_REG_CONFIG_CONN_MEM_RT_SIZE 416 -#define TM_REG_CONFIG_TASK_MEM_RT_OFFSET 29127 +#define TM_REG_CONFIG_TASK_MEM_RT_OFFSET 29129 #define TM_REG_CONFIG_TASK_MEM_RT_SIZE 512 -#define QM_REG_MAXPQSIZE_0_RT_OFFSET 29639 -#define QM_REG_MAXPQSIZE_1_RT_OFFSET 29640 -#define QM_REG_MAXPQSIZE_2_RT_OFFSET 29641 -#define QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET 29642 -#define QM_REG_MAXPQSIZETXSEL_1_RT_OFFSET 29643 -#define QM_REG_MAXPQSIZETXSEL_2_RT_OFFSET 29644 -#define QM_REG_MAXPQSIZETXSEL_3_RT_OFFSET 29645 -#define QM_REG_MAXPQSIZETXSEL_4_RT_OFFSET 29646 -#define QM_REG_MAXPQSIZETXSEL_5_RT_OFFSET 29647 -#define QM_REG_MAXPQSIZETXSEL_6_RT_OFFSET 29648 -#define QM_REG_MAXPQSIZETXSEL_7_RT_OFFSET 29649 -#define QM_REG_MAXPQSIZETXSEL_8_RT_OFFSET 29650 -#define QM_REG_MAXPQSIZETXSEL_9_RT_OFFSET 29651 -#define QM_REG_MAXPQSIZETXSEL_10_RT_OFFSET 29652 -#define QM_REG_MAXPQSIZETXSEL_11_RT_OFFSET 29653 -#define QM_REG_MAXPQSIZETXSEL_12_RT_OFFSET 29654 -#define QM_REG_MAXPQSIZETXSEL_13_RT_OFFSET 29655 -#define QM_REG_MAXPQSIZETXSEL_14_RT_OFFSET 29656 -#define QM_REG_MAXPQSIZETXSEL_15_RT_OFFSET 29657 -#define QM_REG_MAXPQSIZETXSEL_16_RT_OFFSET 29658 -#define QM_REG_MAXPQSIZETXSEL_17_RT_OFFSET 29659 -#define QM_REG_MAXPQSIZETXSEL_18_RT_OFFSET 29660 -#define QM_REG_MAXPQSIZETXSEL_19_RT_OFFSET 29661 -#define QM_REG_MAXPQSIZETXSEL_20_RT_OFFSET 29662 -#define QM_REG_MAXPQSIZETXSEL_21_RT_OFFSET 29663 -#define QM_REG_MAXPQSIZETXSEL_22_RT_OFFSET 29664 -#define QM_REG_MAXPQSIZETXSEL_23_RT_OFFSET 29665 -#define QM_REG_MAXPQSIZETXSEL_24_RT_OFFSET 29666 -#define QM_REG_MAXPQSIZETXSEL_25_RT_OFFSET 29667 -#define QM_REG_MAXPQSIZETXSEL_26_RT_OFFSET 29668 -#define QM_REG_MAXPQSIZETXSEL_27_RT_OFFSET 29669 -#define QM_REG_MAXPQSIZETXSEL_28_RT_OFFSET 29670 -#define QM_REG_MAXPQSIZETXSEL_29_RT_OFFSET 29671 -#define QM_REG_MAXPQSIZETXSEL_30_RT_OFFSET 29672 -#define QM_REG_MAXPQSIZETXSEL_31_RT_OFFSET 29673 -#define QM_REG_MAXPQSIZETXSEL_32_RT_OFFSET 29674 -#define QM_REG_MAXPQSIZETXSEL_33_RT_OFFSET 29675 -#define QM_REG_MAXPQSIZETXSEL_34_RT_OFFSET 29676 -#define QM_REG_MAXPQSIZETXSEL_35_RT_OFFSET 29677 -#define QM_REG_MAXPQSIZETXSEL_36_RT_OFFSET 29678 -#define QM_REG_MAXPQSIZETXSEL_37_RT_OFFSET 29679 -#define QM_REG_MAXPQSIZETXSEL_38_RT_OFFSET 29680 -#define QM_REG_MAXPQSIZETXSEL_39_RT_OFFSET 29681 -#define QM_REG_MAXPQSIZETXSEL_40_RT_OFFSET 29682 -#define QM_REG_MAXPQSIZETXSEL_41_RT_OFFSET 29683 -#define QM_REG_MAXPQSIZETXSEL_42_RT_OFFSET 29684 -#define QM_REG_MAXPQSIZETXSEL_43_RT_OFFSET 29685 -#define QM_REG_MAXPQSIZETXSEL_44_RT_OFFSET 29686 -#define QM_REG_MAXPQSIZETXSEL_45_RT_OFFSET 29687 -#define QM_REG_MAXPQSIZETXSEL_46_RT_OFFSET 29688 -#define QM_REG_MAXPQSIZETXSEL_47_RT_OFFSET 29689 -#define QM_REG_MAXPQSIZETXSEL_48_RT_OFFSET 29690 -#define QM_REG_MAXPQSIZETXSEL_49_RT_OFFSET 29691 -#define QM_REG_MAXPQSIZETXSEL_50_RT_OFFSET 29692 -#define QM_REG_MAXPQSIZETXSEL_51_RT_OFFSET 29693 -#define QM_REG_MAXPQSIZETXSEL_52_RT_OFFSET 29694 -#define QM_REG_MAXPQSIZETXSEL_53_RT_OFFSET 29695 -#define QM_REG_MAXPQSIZETXSEL_54_RT_OFFSET 29696 -#define QM_REG_MAXPQSIZETXSEL_55_RT_OFFSET 29697 -#define QM_REG_MAXPQSIZETXSEL_56_RT_OFFSET 29698 -#define QM_REG_MAXPQSIZETXSEL_57_RT_OFFSET 29699 -#define QM_REG_MAXPQSIZETXSEL_58_RT_OFFSET 29700 -#define QM_REG_MAXPQSIZETXSEL_59_RT_OFFSET 29701 -#define QM_REG_MAXPQSIZETXSEL_60_RT_OFFSET 29702 -#define QM_REG_MAXPQSIZETXSEL_61_RT_OFFSET 29703 -#define QM_REG_MAXPQSIZETXSEL_62_RT_OFFSET 29704 -#define QM_REG_MAXPQSIZETXSEL_63_RT_OFFSET 29705 -#define QM_REG_BASEADDROTHERPQ_RT_OFFSET 29706 +#define QM_REG_MAXPQSIZE_0_RT_OFFSET 29641 +#define QM_REG_MAXPQSIZE_1_RT_OFFSET 29642 +#define QM_REG_MAXPQSIZE_2_RT_OFFSET 29643 +#define QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET 29644 +#define QM_REG_MAXPQSIZETXSEL_1_RT_OFFSET 29645 +#define QM_REG_MAXPQSIZETXSEL_2_RT_OFFSET 29646 +#define QM_REG_MAXPQSIZETXSEL_3_RT_OFFSET 29647 +#define QM_REG_MAXPQSIZETXSEL_4_RT_OFFSET 29648 +#define QM_REG_MAXPQSIZETXSEL_5_RT_OFFSET 29649 +#define QM_REG_MAXPQSIZETXSEL_6_RT_OFFSET 29650 +#define QM_REG_MAXPQSIZETXSEL_7_RT_OFFSET 29651 +#define QM_REG_MAXPQSIZETXSEL_8_RT_OFFSET 29652 +#define QM_REG_MAXPQSIZETXSEL_9_RT_OFFSET 29653 +#define QM_REG_MAXPQSIZETXSEL_10_RT_OFFSET 29654 +#define QM_REG_MAXPQSIZETXSEL_11_RT_OFFSET 29655 +#define QM_REG_MAXPQSIZETXSEL_12_RT_OFFSET 29656 +#define QM_REG_MAXPQSIZETXSEL_13_RT_OFFSET 29657 +#define QM_REG_MAXPQSIZETXSEL_14_RT_OFFSET 29658 +#define QM_REG_MAXPQSIZETXSEL_15_RT_OFFSET 29659 +#define QM_REG_MAXPQSIZETXSEL_16_RT_OFFSET 29660 +#define QM_REG_MAXPQSIZETXSEL_17_RT_OFFSET 29661 +#define QM_REG_MAXPQSIZETXSEL_18_RT_OFFSET 29662 +#define QM_REG_MAXPQSIZETXSEL_19_RT_OFFSET 29663 +#define QM_REG_MAXPQSIZETXSEL_20_RT_OFFSET 29664 +#define QM_REG_MAXPQSIZETXSEL_21_RT_OFFSET 29665 +#define QM_REG_MAXPQSIZETXSEL_22_RT_OFFSET 29666 +#define QM_REG_MAXPQSIZETXSEL_23_RT_OFFSET 29667 +#define QM_REG_MAXPQSIZETXSEL_24_RT_OFFSET 29668 +#define QM_REG_MAXPQSIZETXSEL_25_RT_OFFSET 29669 +#define QM_REG_MAXPQSIZETXSEL_26_RT_OFFSET 29670 +#define QM_REG_MAXPQSIZETXSEL_27_RT_OFFSET 29671 +#define QM_REG_MAXPQSIZETXSEL_28_RT_OFFSET 29672 +#define QM_REG_MAXPQSIZETXSEL_29_RT_OFFSET 29673 +#define QM_REG_MAXPQSIZETXSEL_30_RT_OFFSET 29674 +#define QM_REG_MAXPQSIZETXSEL_31_RT_OFFSET 29675 +#define QM_REG_MAXPQSIZETXSEL_32_RT_OFFSET 29676 +#define QM_REG_MAXPQSIZETXSEL_33_RT_OFFSET 29677 +#define QM_REG_MAXPQSIZETXSEL_34_RT_OFFSET 29678 +#define QM_REG_MAXPQSIZETXSEL_35_RT_OFFSET 29679 +#define QM_REG_MAXPQSIZETXSEL_36_RT_OFFSET 29680 +#define QM_REG_MAXPQSIZETXSEL_37_RT_OFFSET 29681 +#define QM_REG_MAXPQSIZETXSEL_38_RT_OFFSET 29682 +#define QM_REG_MAXPQSIZETXSEL_39_RT_OFFSET 29683 +#define QM_REG_MAXPQSIZETXSEL_40_RT_OFFSET 29684 +#define QM_REG_MAXPQSIZETXSEL_41_RT_OFFSET 29685 +#define QM_REG_MAXPQSIZETXSEL_42_RT_OFFSET 29686 +#define QM_REG_MAXPQSIZETXSEL_43_RT_OFFSET 29687 +#define QM_REG_MAXPQSIZETXSEL_44_RT_OFFSET 29688 +#define QM_REG_MAXPQSIZETXSEL_45_RT_OFFSET 29689 +#define QM_REG_MAXPQSIZETXSEL_46_RT_OFFSET 29690 +#define QM_REG_MAXPQSIZETXSEL_47_RT_OFFSET 29691 +#define QM_REG_MAXPQSIZETXSEL_48_RT_OFFSET 29692 +#define QM_REG_MAXPQSIZETXSEL_49_RT_OFFSET 29693 +#define QM_REG_MAXPQSIZETXSEL_50_RT_OFFSET 29694 +#define QM_REG_MAXPQSIZETXSEL_51_RT_OFFSET 29695 +#define QM_REG_MAXPQSIZETXSEL_52_RT_OFFSET 29696 +#define QM_REG_MAXPQSIZETXSEL_53_RT_OFFSET 29697 +#define QM_REG_MAXPQSIZETXSEL_54_RT_OFFSET 29698 +#define QM_REG_MAXPQSIZETXSEL_55_RT_OFFSET 29699 +#define QM_REG_MAXPQSIZETXSEL_56_RT_OFFSET 29700 +#define QM_REG_MAXPQSIZETXSEL_57_RT_OFFSET 29701 +#define QM_REG_MAXPQSIZETXSEL_58_RT_OFFSET 29702 +#define QM_REG_MAXPQSIZETXSEL_59_RT_OFFSET 29703 +#define QM_REG_MAXPQSIZETXSEL_60_RT_OFFSET 29704 +#define QM_REG_MAXPQSIZETXSEL_61_RT_OFFSET 29705 +#define QM_REG_MAXPQSIZETXSEL_62_RT_OFFSET 29706 +#define QM_REG_MAXPQSIZETXSEL_63_RT_OFFSET 29707 +#define QM_REG_BASEADDROTHERPQ_RT_OFFSET 29708 #define QM_REG_BASEADDROTHERPQ_RT_SIZE 128 -#define QM_REG_VOQCRDLINE_RT_OFFSET 29834 +#define QM_REG_VOQCRDLINE_RT_OFFSET 29836 #define QM_REG_VOQCRDLINE_RT_SIZE 20 -#define QM_REG_VOQINITCRDLINE_RT_OFFSET 29854 +#define QM_REG_VOQINITCRDLINE_RT_OFFSET 29856 #define QM_REG_VOQINITCRDLINE_RT_SIZE 20 -#define QM_REG_AFULLQMBYPTHRPFWFQ_RT_OFFSET 29874 -#define QM_REG_AFULLQMBYPTHRVPWFQ_RT_OFFSET 29875 -#define QM_REG_AFULLQMBYPTHRPFRL_RT_OFFSET 29876 -#define QM_REG_AFULLQMBYPTHRGLBLRL_RT_OFFSET 29877 -#define QM_REG_AFULLOPRTNSTCCRDMASK_RT_OFFSET 29878 -#define QM_REG_WRROTHERPQGRP_0_RT_OFFSET 29879 -#define QM_REG_WRROTHERPQGRP_1_RT_OFFSET 29880 -#define QM_REG_WRROTHERPQGRP_2_RT_OFFSET 29881 -#define QM_REG_WRROTHERPQGRP_3_RT_OFFSET 29882 -#define QM_REG_WRROTHERPQGRP_4_RT_OFFSET 29883 -#define QM_REG_WRROTHERPQGRP_5_RT_OFFSET 29884 -#define QM_REG_WRROTHERPQGRP_6_RT_OFFSET 29885 -#define QM_REG_WRROTHERPQGRP_7_RT_OFFSET 29886 -#define QM_REG_WRROTHERPQGRP_8_RT_OFFSET 29887 -#define QM_REG_WRROTHERPQGRP_9_RT_OFFSET 29888 -#define QM_REG_WRROTHERPQGRP_10_RT_OFFSET 29889 -#define QM_REG_WRROTHERPQGRP_11_RT_OFFSET 29890 -#define QM_REG_WRROTHERPQGRP_12_RT_OFFSET 29891 -#define QM_REG_WRROTHERPQGRP_13_RT_OFFSET 29892 -#define QM_REG_WRROTHERPQGRP_14_RT_OFFSET 29893 -#define QM_REG_WRROTHERPQGRP_15_RT_OFFSET 29894 -#define QM_REG_WRROTHERGRPWEIGHT_0_RT_OFFSET 29895 -#define QM_REG_WRROTHERGRPWEIGHT_1_RT_OFFSET 29896 -#define QM_REG_WRROTHERGRPWEIGHT_2_RT_OFFSET 29897 -#define QM_REG_WRROTHERGRPWEIGHT_3_RT_OFFSET 29898 -#define QM_REG_WRRTXGRPWEIGHT_0_RT_OFFSET 29899 -#define QM_REG_WRRTXGRPWEIGHT_1_RT_OFFSET 29900 -#define QM_REG_PQTX2PF_0_RT_OFFSET 29901 -#define QM_REG_PQTX2PF_1_RT_OFFSET 29902 -#define QM_REG_PQTX2PF_2_RT_OFFSET 29903 -#define QM_REG_PQTX2PF_3_RT_OFFSET 29904 -#define QM_REG_PQTX2PF_4_RT_OFFSET 29905 -#define QM_REG_PQTX2PF_5_RT_OFFSET 29906 -#define QM_REG_PQTX2PF_6_RT_OFFSET 29907 -#define QM_REG_PQTX2PF_7_RT_OFFSET 29908 -#define QM_REG_PQTX2PF_8_RT_OFFSET 29909 -#define QM_REG_PQTX2PF_9_RT_OFFSET 29910 -#define QM_REG_PQTX2PF_10_RT_OFFSET 29911 -#define QM_REG_PQTX2PF_11_RT_OFFSET 29912 -#define QM_REG_PQTX2PF_12_RT_OFFSET 29913 -#define QM_REG_PQTX2PF_13_RT_OFFSET 29914 -#define QM_REG_PQTX2PF_14_RT_OFFSET 29915 -#define QM_REG_PQTX2PF_15_RT_OFFSET 29916 -#define QM_REG_PQTX2PF_16_RT_OFFSET 29917 -#define QM_REG_PQTX2PF_17_RT_OFFSET 29918 -#define QM_REG_PQTX2PF_18_RT_OFFSET 29919 -#define QM_REG_PQTX2PF_19_RT_OFFSET 29920 -#define QM_REG_PQTX2PF_20_RT_OFFSET 29921 -#define QM_REG_PQTX2PF_21_RT_OFFSET 29922 -#define QM_REG_PQTX2PF_22_RT_OFFSET 29923 -#define QM_REG_PQTX2PF_23_RT_OFFSET 29924 -#define QM_REG_PQTX2PF_24_RT_OFFSET 29925 -#define QM_REG_PQTX2PF_25_RT_OFFSET 29926 -#define QM_REG_PQTX2PF_26_RT_OFFSET 29927 -#define QM_REG_PQTX2PF_27_RT_OFFSET 29928 -#define QM_REG_PQTX2PF_28_RT_OFFSET 29929 -#define QM_REG_PQTX2PF_29_RT_OFFSET 29930 -#define QM_REG_PQTX2PF_30_RT_OFFSET 29931 -#define QM_REG_PQTX2PF_31_RT_OFFSET 29932 -#define QM_REG_PQTX2PF_32_RT_OFFSET 29933 -#define QM_REG_PQTX2PF_33_RT_OFFSET 29934 -#define QM_REG_PQTX2PF_34_RT_OFFSET 29935 -#define QM_REG_PQTX2PF_35_RT_OFFSET 29936 -#define QM_REG_PQTX2PF_36_RT_OFFSET 29937 -#define QM_REG_PQTX2PF_37_RT_OFFSET 29938 -#define QM_REG_PQTX2PF_38_RT_OFFSET 29939 -#define QM_REG_PQTX2PF_39_RT_OFFSET 29940 -#define QM_REG_PQTX2PF_40_RT_OFFSET 29941 -#define QM_REG_PQTX2PF_41_RT_OFFSET 29942 -#define QM_REG_PQTX2PF_42_RT_OFFSET 29943 -#define QM_REG_PQTX2PF_43_RT_OFFSET 29944 -#define QM_REG_PQTX2PF_44_RT_OFFSET 29945 -#define QM_REG_PQTX2PF_45_RT_OFFSET 29946 -#define QM_REG_PQTX2PF_46_RT_OFFSET 29947 -#define QM_REG_PQTX2PF_47_RT_OFFSET 29948 -#define QM_REG_PQTX2PF_48_RT_OFFSET 29949 -#define QM_REG_PQTX2PF_49_RT_OFFSET 29950 -#define QM_REG_PQTX2PF_50_RT_OFFSET 29951 -#define QM_REG_PQTX2PF_51_RT_OFFSET 29952 -#define QM_REG_PQTX2PF_52_RT_OFFSET 29953 -#define QM_REG_PQTX2PF_53_RT_OFFSET 29954 -#define QM_REG_PQTX2PF_54_RT_OFFSET 29955 -#define QM_REG_PQTX2PF_55_RT_OFFSET 29956 -#define QM_REG_PQTX2PF_56_RT_OFFSET 29957 -#define QM_REG_PQTX2PF_57_RT_OFFSET 29958 -#define QM_REG_PQTX2PF_58_RT_OFFSET 29959 -#define QM_REG_PQTX2PF_59_RT_OFFSET 29960 -#define QM_REG_PQTX2PF_60_RT_OFFSET 29961 -#define QM_REG_PQTX2PF_61_RT_OFFSET 29962 -#define QM_REG_PQTX2PF_62_RT_OFFSET 29963 -#define QM_REG_PQTX2PF_63_RT_OFFSET 29964 -#define QM_REG_PQOTHER2PF_0_RT_OFFSET 29965 -#define QM_REG_PQOTHER2PF_1_RT_OFFSET 29966 -#define QM_REG_PQOTHER2PF_2_RT_OFFSET 29967 -#define QM_REG_PQOTHER2PF_3_RT_OFFSET 29968 -#define QM_REG_PQOTHER2PF_4_RT_OFFSET 29969 -#define QM_REG_PQOTHER2PF_5_RT_OFFSET 29970 -#define QM_REG_PQOTHER2PF_6_RT_OFFSET 29971 -#define QM_REG_PQOTHER2PF_7_RT_OFFSET 29972 -#define QM_REG_PQOTHER2PF_8_RT_OFFSET 29973 -#define QM_REG_PQOTHER2PF_9_RT_OFFSET 29974 -#define QM_REG_PQOTHER2PF_10_RT_OFFSET 29975 -#define QM_REG_PQOTHER2PF_11_RT_OFFSET 29976 -#define QM_REG_PQOTHER2PF_12_RT_OFFSET 29977 -#define QM_REG_PQOTHER2PF_13_RT_OFFSET 29978 -#define QM_REG_PQOTHER2PF_14_RT_OFFSET 29979 -#define QM_REG_PQOTHER2PF_15_RT_OFFSET 29980 -#define QM_REG_RLGLBLPERIOD_0_RT_OFFSET 29981 -#define QM_REG_RLGLBLPERIOD_1_RT_OFFSET 29982 -#define QM_REG_RLGLBLPERIODTIMER_0_RT_OFFSET 29983 -#define QM_REG_RLGLBLPERIODTIMER_1_RT_OFFSET 29984 -#define QM_REG_RLGLBLPERIODSEL_0_RT_OFFSET 29985 -#define QM_REG_RLGLBLPERIODSEL_1_RT_OFFSET 29986 -#define QM_REG_RLGLBLPERIODSEL_2_RT_OFFSET 29987 -#define QM_REG_RLGLBLPERIODSEL_3_RT_OFFSET 29988 -#define QM_REG_RLGLBLPERIODSEL_4_RT_OFFSET 29989 -#define QM_REG_RLGLBLPERIODSEL_5_RT_OFFSET 29990 -#define QM_REG_RLGLBLPERIODSEL_6_RT_OFFSET 29991 -#define QM_REG_RLGLBLPERIODSEL_7_RT_OFFSET 29992 -#define QM_REG_RLGLBLINCVAL_RT_OFFSET 29993 +#define QM_REG_AFULLQMBYPTHRPFWFQ_RT_OFFSET 29876 +#define QM_REG_AFULLQMBYPTHRVPWFQ_RT_OFFSET 29877 +#define QM_REG_AFULLQMBYPTHRPFRL_RT_OFFSET 29878 +#define QM_REG_AFULLQMBYPTHRGLBLRL_RT_OFFSET 29879 +#define QM_REG_AFULLOPRTNSTCCRDMASK_RT_OFFSET 29880 +#define QM_REG_WRROTHERPQGRP_0_RT_OFFSET 29881 +#define QM_REG_WRROTHERPQGRP_1_RT_OFFSET 29882 +#define QM_REG_WRROTHERPQGRP_2_RT_OFFSET 29883 +#define QM_REG_WRROTHERPQGRP_3_RT_OFFSET 29884 +#define QM_REG_WRROTHERPQGRP_4_RT_OFFSET 29885 +#define QM_REG_WRROTHERPQGRP_5_RT_OFFSET 29886 +#define QM_REG_WRROTHERPQGRP_6_RT_OFFSET 29887 +#define QM_REG_WRROTHERPQGRP_7_RT_OFFSET 29888 +#define QM_REG_WRROTHERPQGRP_8_RT_OFFSET 29889 +#define QM_REG_WRROTHERPQGRP_9_RT_OFFSET 29890 +#define QM_REG_WRROTHERPQGRP_10_RT_OFFSET 29891 +#define QM_REG_WRROTHERPQGRP_11_RT_OFFSET 29892 +#define QM_REG_WRROTHERPQGRP_12_RT_OFFSET 29893 +#define QM_REG_WRROTHERPQGRP_13_RT_OFFSET 29894 +#define QM_REG_WRROTHERPQGRP_14_RT_OFFSET 29895 +#define QM_REG_WRROTHERPQGRP_15_RT_OFFSET 29896 +#define QM_REG_WRROTHERGRPWEIGHT_0_RT_OFFSET 29897 +#define QM_REG_WRROTHERGRPWEIGHT_1_RT_OFFSET 29898 +#define QM_REG_WRROTHERGRPWEIGHT_2_RT_OFFSET 29899 +#define QM_REG_WRROTHERGRPWEIGHT_3_RT_OFFSET 29900 +#define QM_REG_WRRTXGRPWEIGHT_0_RT_OFFSET 29901 +#define QM_REG_WRRTXGRPWEIGHT_1_RT_OFFSET 29902 +#define QM_REG_PQTX2PF_0_RT_OFFSET 29903 +#define QM_REG_PQTX2PF_1_RT_OFFSET 29904 +#define QM_REG_PQTX2PF_2_RT_OFFSET 29905 +#define QM_REG_PQTX2PF_3_RT_OFFSET 29906 +#define QM_REG_PQTX2PF_4_RT_OFFSET 29907 +#define QM_REG_PQTX2PF_5_RT_OFFSET 29908 +#define QM_REG_PQTX2PF_6_RT_OFFSET 29909 +#define QM_REG_PQTX2PF_7_RT_OFFSET 29910 +#define QM_REG_PQTX2PF_8_RT_OFFSET 29911 +#define QM_REG_PQTX2PF_9_RT_OFFSET 29912 +#define QM_REG_PQTX2PF_10_RT_OFFSET 29913 +#define QM_REG_PQTX2PF_11_RT_OFFSET 29914 +#define QM_REG_PQTX2PF_12_RT_OFFSET 29915 +#define QM_REG_PQTX2PF_13_RT_OFFSET 29916 +#define QM_REG_PQTX2PF_14_RT_OFFSET 29917 +#define QM_REG_PQTX2PF_15_RT_OFFSET 29918 +#define QM_REG_PQTX2PF_16_RT_OFFSET 29919 +#define QM_REG_PQTX2PF_17_RT_OFFSET 29920 +#define QM_REG_PQTX2PF_18_RT_OFFSET 29921 +#define QM_REG_PQTX2PF_19_RT_OFFSET 29922 +#define QM_REG_PQTX2PF_20_RT_OFFSET 29923 +#define QM_REG_PQTX2PF_21_RT_OFFSET 29924 +#define QM_REG_PQTX2PF_22_RT_OFFSET 29925 +#define QM_REG_PQTX2PF_23_RT_OFFSET 29926 +#define QM_REG_PQTX2PF_24_RT_OFFSET 29927 +#define QM_REG_PQTX2PF_25_RT_OFFSET 29928 +#define QM_REG_PQTX2PF_26_RT_OFFSET 29929 +#define QM_REG_PQTX2PF_27_RT_OFFSET 29930 +#define QM_REG_PQTX2PF_28_RT_OFFSET 29931 +#define QM_REG_PQTX2PF_29_RT_OFFSET 29932 +#define QM_REG_PQTX2PF_30_RT_OFFSET 29933 +#define QM_REG_PQTX2PF_31_RT_OFFSET 29934 +#define QM_REG_PQTX2PF_32_RT_OFFSET 29935 +#define QM_REG_PQTX2PF_33_RT_OFFSET 29936 +#define QM_REG_PQTX2PF_34_RT_OFFSET 29937 +#define QM_REG_PQTX2PF_35_RT_OFFSET 29938 +#define QM_REG_PQTX2PF_36_RT_OFFSET 29939 +#define QM_REG_PQTX2PF_37_RT_OFFSET 29940 +#define QM_REG_PQTX2PF_38_RT_OFFSET 29941 +#define QM_REG_PQTX2PF_39_RT_OFFSET 29942 +#define QM_REG_PQTX2PF_40_RT_OFFSET 29943 +#define QM_REG_PQTX2PF_41_RT_OFFSET 29944 +#define QM_REG_PQTX2PF_42_RT_OFFSET 29945 +#define QM_REG_PQTX2PF_43_RT_OFFSET 29946 +#define QM_REG_PQTX2PF_44_RT_OFFSET 29947 +#define QM_REG_PQTX2PF_45_RT_OFFSET 29948 +#define QM_REG_PQTX2PF_46_RT_OFFSET 29949 +#define QM_REG_PQTX2PF_47_RT_OFFSET 29950 +#define QM_REG_PQTX2PF_48_RT_OFFSET 29951 +#define QM_REG_PQTX2PF_49_RT_OFFSET 29952 +#define QM_REG_PQTX2PF_50_RT_OFFSET 29953 +#define QM_REG_PQTX2PF_51_RT_OFFSET 29954 +#define QM_REG_PQTX2PF_52_RT_OFFSET 29955 +#define QM_REG_PQTX2PF_53_RT_OFFSET 29956 +#define QM_REG_PQTX2PF_54_RT_OFFSET 29957 +#define QM_REG_PQTX2PF_55_RT_OFFSET 29958 +#define QM_REG_PQTX2PF_56_RT_OFFSET 29959 +#define QM_REG_PQTX2PF_57_RT_OFFSET 29960 +#define QM_REG_PQTX2PF_58_RT_OFFSET 29961 +#define QM_REG_PQTX2PF_59_RT_OFFSET 29962 +#define QM_REG_PQTX2PF_60_RT_OFFSET 29963 +#define QM_REG_PQTX2PF_61_RT_OFFSET 29964 +#define QM_REG_PQTX2PF_62_RT_OFFSET 29965 +#define QM_REG_PQTX2PF_63_RT_OFFSET 29966 +#define QM_REG_PQOTHER2PF_0_RT_OFFSET 29967 +#define QM_REG_PQOTHER2PF_1_RT_OFFSET 29968 +#define QM_REG_PQOTHER2PF_2_RT_OFFSET 29969 +#define QM_REG_PQOTHER2PF_3_RT_OFFSET 29970 +#define QM_REG_PQOTHER2PF_4_RT_OFFSET 29971 +#define QM_REG_PQOTHER2PF_5_RT_OFFSET 29972 +#define QM_REG_PQOTHER2PF_6_RT_OFFSET 29973 +#define QM_REG_PQOTHER2PF_7_RT_OFFSET 29974 +#define QM_REG_PQOTHER2PF_8_RT_OFFSET 29975 +#define QM_REG_PQOTHER2PF_9_RT_OFFSET 29976 +#define QM_REG_PQOTHER2PF_10_RT_OFFSET 29977 +#define QM_REG_PQOTHER2PF_11_RT_OFFSET 29978 +#define QM_REG_PQOTHER2PF_12_RT_OFFSET 29979 +#define QM_REG_PQOTHER2PF_13_RT_OFFSET 29980 +#define QM_REG_PQOTHER2PF_14_RT_OFFSET 29981 +#define QM_REG_PQOTHER2PF_15_RT_OFFSET 29982 +#define QM_REG_RLGLBLPERIOD_0_RT_OFFSET 29983 +#define QM_REG_RLGLBLPERIOD_1_RT_OFFSET 29984 +#define QM_REG_RLGLBLPERIODTIMER_0_RT_OFFSET 29985 +#define QM_REG_RLGLBLPERIODTIMER_1_RT_OFFSET 29986 +#define QM_REG_RLGLBLPERIODSEL_0_RT_OFFSET 29987 +#define QM_REG_RLGLBLPERIODSEL_1_RT_OFFSET 29988 +#define QM_REG_RLGLBLPERIODSEL_2_RT_OFFSET 29989 +#define QM_REG_RLGLBLPERIODSEL_3_RT_OFFSET 29990 +#define QM_REG_RLGLBLPERIODSEL_4_RT_OFFSET 29991 +#define QM_REG_RLGLBLPERIODSEL_5_RT_OFFSET 29992 +#define QM_REG_RLGLBLPERIODSEL_6_RT_OFFSET 29993 +#define QM_REG_RLGLBLPERIODSEL_7_RT_OFFSET 29994 +#define QM_REG_RLGLBLINCVAL_RT_OFFSET 29995 #define QM_REG_RLGLBLINCVAL_RT_SIZE 256 -#define QM_REG_RLGLBLUPPERBOUND_RT_OFFSET 30249 +#define QM_REG_RLGLBLUPPERBOUND_RT_OFFSET 30251 #define QM_REG_RLGLBLUPPERBOUND_RT_SIZE 256 -#define QM_REG_RLGLBLCRD_RT_OFFSET 30505 +#define QM_REG_RLGLBLCRD_RT_OFFSET 30507 #define QM_REG_RLGLBLCRD_RT_SIZE 256 -#define QM_REG_RLGLBLENABLE_RT_OFFSET 30761 -#define QM_REG_RLPFPERIOD_RT_OFFSET 30762 -#define QM_REG_RLPFPERIODTIMER_RT_OFFSET 30763 -#define QM_REG_RLPFINCVAL_RT_OFFSET 30764 +#define QM_REG_RLGLBLENABLE_RT_OFFSET 30763 +#define QM_REG_RLPFPERIOD_RT_OFFSET 30764 +#define QM_REG_RLPFPERIODTIMER_RT_OFFSET 30765 +#define QM_REG_RLPFINCVAL_RT_OFFSET 30766 #define QM_REG_RLPFINCVAL_RT_SIZE 16 -#define QM_REG_RLPFUPPERBOUND_RT_OFFSET 30780 +#define QM_REG_RLPFUPPERBOUND_RT_OFFSET 30782 #define QM_REG_RLPFUPPERBOUND_RT_SIZE 16 -#define QM_REG_RLPFCRD_RT_OFFSET 30796 +#define QM_REG_RLPFCRD_RT_OFFSET 30798 #define QM_REG_RLPFCRD_RT_SIZE 16 -#define QM_REG_RLPFENABLE_RT_OFFSET 30812 -#define QM_REG_RLPFVOQENABLE_RT_OFFSET 30813 -#define QM_REG_WFQPFWEIGHT_RT_OFFSET 30814 +#define QM_REG_RLPFENABLE_RT_OFFSET 30814 +#define QM_REG_RLPFVOQENABLE_RT_OFFSET 30815 +#define QM_REG_WFQPFWEIGHT_RT_OFFSET 30816 #define QM_REG_WFQPFWEIGHT_RT_SIZE 16 -#define QM_REG_WFQPFUPPERBOUND_RT_OFFSET 30830 +#define QM_REG_WFQPFUPPERBOUND_RT_OFFSET 30832 #define QM_REG_WFQPFUPPERBOUND_RT_SIZE 16 -#define QM_REG_WFQPFCRD_RT_OFFSET 30846 +#define QM_REG_WFQPFCRD_RT_OFFSET 30848 #define QM_REG_WFQPFCRD_RT_SIZE 160 -#define QM_REG_WFQPFENABLE_RT_OFFSET 31006 -#define QM_REG_WFQVPENABLE_RT_OFFSET 31007 -#define QM_REG_BASEADDRTXPQ_RT_OFFSET 31008 +#define QM_REG_WFQPFENABLE_RT_OFFSET 31008 +#define QM_REG_WFQVPENABLE_RT_OFFSET 31009 +#define QM_REG_BASEADDRTXPQ_RT_OFFSET 31010 #define QM_REG_BASEADDRTXPQ_RT_SIZE 512 -#define QM_REG_TXPQMAP_RT_OFFSET 31520 +#define QM_REG_TXPQMAP_RT_OFFSET 31522 #define QM_REG_TXPQMAP_RT_SIZE 512 -#define QM_REG_WFQVPWEIGHT_RT_OFFSET 32032 +#define QM_REG_WFQVPWEIGHT_RT_OFFSET 32034 #define QM_REG_WFQVPWEIGHT_RT_SIZE 512 -#define QM_REG_WFQVPUPPERBOUND_RT_OFFSET 32544 -#define QM_REG_WFQVPUPPERBOUND_RT_SIZE 512 -#define QM_REG_WFQVPCRD_RT_OFFSET 33056 +#define QM_REG_WFQVPCRD_RT_OFFSET 32546 #define QM_REG_WFQVPCRD_RT_SIZE 512 -#define QM_REG_WFQVPMAP_RT_OFFSET 33568 +#define QM_REG_WFQVPMAP_RT_OFFSET 33058 #define QM_REG_WFQVPMAP_RT_SIZE 512 -#define QM_REG_WFQPFCRD_MSB_RT_OFFSET 34080 +#define QM_REG_WFQPFCRD_MSB_RT_OFFSET 33570 #define QM_REG_WFQPFCRD_MSB_RT_SIZE 160 -#define NIG_REG_LLH_CLS_TYPE_DUALMODE_RT_OFFSET 34240 -#define NIG_REG_OUTER_TAG_VALUE_LIST0_RT_OFFSET 34241 -#define NIG_REG_OUTER_TAG_VALUE_LIST1_RT_OFFSET 34242 -#define NIG_REG_OUTER_TAG_VALUE_LIST2_RT_OFFSET 34243 -#define NIG_REG_OUTER_TAG_VALUE_LIST3_RT_OFFSET 34244 -#define NIG_REG_OUTER_TAG_VALUE_MASK_RT_OFFSET 34245 -#define NIG_REG_LLH_FUNC_TAGMAC_CLS_TYPE_RT_OFFSET 34246 -#define NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET 34247 +#define NIG_REG_TAG_ETHERTYPE_0_RT_OFFSET 33730 +#define NIG_REG_OUTER_TAG_VALUE_LIST0_RT_OFFSET 33731 +#define NIG_REG_OUTER_TAG_VALUE_LIST1_RT_OFFSET 33732 +#define NIG_REG_OUTER_TAG_VALUE_LIST2_RT_OFFSET 33733 +#define NIG_REG_OUTER_TAG_VALUE_LIST3_RT_OFFSET 33734 +#define NIG_REG_OUTER_TAG_VALUE_MASK_RT_OFFSET 33735 +#define NIG_REG_LLH_FUNC_TAGMAC_CLS_TYPE_RT_OFFSET 33736 +#define NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET 33737 #define NIG_REG_LLH_FUNC_TAG_EN_RT_SIZE 4 -#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_OFFSET 34251 +#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_OFFSET 33741 #define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_SIZE 4 -#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET 34255 +#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET 33745 #define NIG_REG_LLH_FUNC_TAG_VALUE_RT_SIZE 4 -#define NIG_REG_LLH_FUNC_NO_TAG_RT_OFFSET 34259 -#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_OFFSET 34260 +#define NIG_REG_LLH_FUNC_NO_TAG_RT_OFFSET 33749 +#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_OFFSET 33750 #define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_SIZE 32 -#define NIG_REG_LLH_FUNC_FILTER_EN_RT_OFFSET 34292 +#define NIG_REG_LLH_FUNC_FILTER_EN_RT_OFFSET 33782 #define NIG_REG_LLH_FUNC_FILTER_EN_RT_SIZE 16 -#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_OFFSET 34308 +#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_OFFSET 33798 #define NIG_REG_LLH_FUNC_FILTER_MODE_RT_SIZE 16 -#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET 34324 +#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET 33814 #define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_SIZE 16 -#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_OFFSET 34340 +#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_OFFSET 33830 #define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_SIZE 16 -#define NIG_REG_TX_EDPM_CTRL_RT_OFFSET 34356 -#define CDU_REG_CID_ADDR_PARAMS_RT_OFFSET 34357 -#define CDU_REG_SEGMENT0_PARAMS_RT_OFFSET 34358 -#define CDU_REG_SEGMENT1_PARAMS_RT_OFFSET 34359 -#define CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET 34360 -#define CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET 34361 -#define CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET 34362 -#define CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET 34363 -#define CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET 34364 -#define CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET 34365 -#define CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET 34366 -#define CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET 34367 -#define CDU_REG_VF_SEG_TYPE_OFFSET_RT_OFFSET 34368 -#define CDU_REG_VF_FL_SEG_TYPE_OFFSET_RT_OFFSET 34369 -#define PBF_REG_BTB_SHARED_AREA_SIZE_RT_OFFSET 34370 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET 34371 -#define PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET 34372 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ0_RT_OFFSET 34373 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET 34374 -#define PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET 34375 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ1_RT_OFFSET 34376 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ2_RT_OFFSET 34377 -#define PBF_REG_BTB_GUARANTEED_VOQ2_RT_OFFSET 34378 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ2_RT_OFFSET 34379 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ3_RT_OFFSET 34380 -#define PBF_REG_BTB_GUARANTEED_VOQ3_RT_OFFSET 34381 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ3_RT_OFFSET 34382 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ4_RT_OFFSET 34383 -#define PBF_REG_BTB_GUARANTEED_VOQ4_RT_OFFSET 34384 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ4_RT_OFFSET 34385 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ5_RT_OFFSET 34386 -#define PBF_REG_BTB_GUARANTEED_VOQ5_RT_OFFSET 34387 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ5_RT_OFFSET 34388 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ6_RT_OFFSET 34389 -#define PBF_REG_BTB_GUARANTEED_VOQ6_RT_OFFSET 34390 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ6_RT_OFFSET 34391 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ7_RT_OFFSET 34392 -#define PBF_REG_BTB_GUARANTEED_VOQ7_RT_OFFSET 34393 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ7_RT_OFFSET 34394 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ8_RT_OFFSET 34395 -#define PBF_REG_BTB_GUARANTEED_VOQ8_RT_OFFSET 34396 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ8_RT_OFFSET 34397 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ9_RT_OFFSET 34398 -#define PBF_REG_BTB_GUARANTEED_VOQ9_RT_OFFSET 34399 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ9_RT_OFFSET 34400 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ10_RT_OFFSET 34401 -#define PBF_REG_BTB_GUARANTEED_VOQ10_RT_OFFSET 34402 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ10_RT_OFFSET 34403 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ11_RT_OFFSET 34404 -#define PBF_REG_BTB_GUARANTEED_VOQ11_RT_OFFSET 34405 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ11_RT_OFFSET 34406 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ12_RT_OFFSET 34407 -#define PBF_REG_BTB_GUARANTEED_VOQ12_RT_OFFSET 34408 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ12_RT_OFFSET 34409 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ13_RT_OFFSET 34410 -#define PBF_REG_BTB_GUARANTEED_VOQ13_RT_OFFSET 34411 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ13_RT_OFFSET 34412 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ14_RT_OFFSET 34413 -#define PBF_REG_BTB_GUARANTEED_VOQ14_RT_OFFSET 34414 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ14_RT_OFFSET 34415 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ15_RT_OFFSET 34416 -#define PBF_REG_BTB_GUARANTEED_VOQ15_RT_OFFSET 34417 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ15_RT_OFFSET 34418 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ16_RT_OFFSET 34419 -#define PBF_REG_BTB_GUARANTEED_VOQ16_RT_OFFSET 34420 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ16_RT_OFFSET 34421 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ17_RT_OFFSET 34422 -#define PBF_REG_BTB_GUARANTEED_VOQ17_RT_OFFSET 34423 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ17_RT_OFFSET 34424 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ18_RT_OFFSET 34425 -#define PBF_REG_BTB_GUARANTEED_VOQ18_RT_OFFSET 34426 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ18_RT_OFFSET 34427 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ19_RT_OFFSET 34428 -#define PBF_REG_BTB_GUARANTEED_VOQ19_RT_OFFSET 34429 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ19_RT_OFFSET 34430 -#define XCM_REG_CON_PHY_Q3_RT_OFFSET 34431 - -#define RUNTIME_ARRAY_SIZE 34432 +#define NIG_REG_TX_EDPM_CTRL_RT_OFFSET 33846 +#define CDU_REG_CID_ADDR_PARAMS_RT_OFFSET 33847 +#define CDU_REG_SEGMENT0_PARAMS_RT_OFFSET 33848 +#define CDU_REG_SEGMENT1_PARAMS_RT_OFFSET 33849 +#define CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET 33850 +#define CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET 33851 +#define CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET 33852 +#define CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET 33853 +#define CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET 33854 +#define CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET 33855 +#define CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET 33856 +#define CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET 33857 +#define CDU_REG_VF_SEG_TYPE_OFFSET_RT_OFFSET 33858 +#define CDU_REG_VF_FL_SEG_TYPE_OFFSET_RT_OFFSET 33859 +#define PBF_REG_TAG_ETHERTYPE_0_RT_OFFSET 33860 +#define PBF_REG_BTB_SHARED_AREA_SIZE_RT_OFFSET 33861 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET 33862 +#define PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET 33863 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ0_RT_OFFSET 33864 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET 33865 +#define PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET 33866 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ1_RT_OFFSET 33867 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ2_RT_OFFSET 33868 +#define PBF_REG_BTB_GUARANTEED_VOQ2_RT_OFFSET 33869 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ2_RT_OFFSET 33870 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ3_RT_OFFSET 33871 +#define PBF_REG_BTB_GUARANTEED_VOQ3_RT_OFFSET 33872 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ3_RT_OFFSET 33873 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ4_RT_OFFSET 33874 +#define PBF_REG_BTB_GUARANTEED_VOQ4_RT_OFFSET 33875 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ4_RT_OFFSET 33876 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ5_RT_OFFSET 33877 +#define PBF_REG_BTB_GUARANTEED_VOQ5_RT_OFFSET 33878 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ5_RT_OFFSET 33879 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ6_RT_OFFSET 33880 +#define PBF_REG_BTB_GUARANTEED_VOQ6_RT_OFFSET 33881 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ6_RT_OFFSET 33882 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ7_RT_OFFSET 33883 +#define PBF_REG_BTB_GUARANTEED_VOQ7_RT_OFFSET 33884 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ7_RT_OFFSET 33885 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ8_RT_OFFSET 33886 +#define PBF_REG_BTB_GUARANTEED_VOQ8_RT_OFFSET 33887 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ8_RT_OFFSET 33888 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ9_RT_OFFSET 33889 +#define PBF_REG_BTB_GUARANTEED_VOQ9_RT_OFFSET 33890 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ9_RT_OFFSET 33891 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ10_RT_OFFSET 33892 +#define PBF_REG_BTB_GUARANTEED_VOQ10_RT_OFFSET 33893 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ10_RT_OFFSET 33894 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ11_RT_OFFSET 33895 +#define PBF_REG_BTB_GUARANTEED_VOQ11_RT_OFFSET 33896 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ11_RT_OFFSET 33897 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ12_RT_OFFSET 33898 +#define PBF_REG_BTB_GUARANTEED_VOQ12_RT_OFFSET 33899 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ12_RT_OFFSET 33900 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ13_RT_OFFSET 33901 +#define PBF_REG_BTB_GUARANTEED_VOQ13_RT_OFFSET 33902 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ13_RT_OFFSET 33903 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ14_RT_OFFSET 33904 +#define PBF_REG_BTB_GUARANTEED_VOQ14_RT_OFFSET 33905 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ14_RT_OFFSET 33906 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ15_RT_OFFSET 33907 +#define PBF_REG_BTB_GUARANTEED_VOQ15_RT_OFFSET 33908 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ15_RT_OFFSET 33909 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ16_RT_OFFSET 33910 +#define PBF_REG_BTB_GUARANTEED_VOQ16_RT_OFFSET 33911 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ16_RT_OFFSET 33912 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ17_RT_OFFSET 33913 +#define PBF_REG_BTB_GUARANTEED_VOQ17_RT_OFFSET 33914 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ17_RT_OFFSET 33915 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ18_RT_OFFSET 33916 +#define PBF_REG_BTB_GUARANTEED_VOQ18_RT_OFFSET 33917 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ18_RT_OFFSET 33918 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ19_RT_OFFSET 33919 +#define PBF_REG_BTB_GUARANTEED_VOQ19_RT_OFFSET 33920 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ19_RT_OFFSET 33921 +#define XCM_REG_CON_PHY_Q3_RT_OFFSET 33922 + +#define RUNTIME_ARRAY_SIZE 33923 -/* The eth storm context for the Ystorm */ -struct ystorm_eth_conn_st_ctx { +/* The eth storm context for the Tstorm */ +struct tstorm_eth_conn_st_ctx { __le32 reserved[4]; }; @@ -2562,14 +2528,226 @@ struct xstorm_eth_conn_ag_ctx { __le16 word15 /* word15 */; }; -/* The eth storm context for the Tstorm */ -struct tstorm_eth_conn_st_ctx { - __le32 reserved[4]; +/* The eth storm context for the Ystorm */ +struct ystorm_eth_conn_st_ctx { + __le32 reserved[8]; }; -/* The eth storm context for the Mstorm */ -struct mstorm_eth_conn_st_ctx { - __le32 reserved[8]; +struct ystorm_eth_conn_ag_ctx { + u8 byte0 /* cdu_validation */; + u8 byte1 /* state */; + u8 flags0; +#define YSTORM_ETH_CONN_AG_CTX_BIT0_MASK 0x1 /* exist_in_qm0 */ +#define YSTORM_ETH_CONN_AG_CTX_BIT0_SHIFT 0 +#define YSTORM_ETH_CONN_AG_CTX_BIT1_MASK 0x1 /* exist_in_qm1 */ +#define YSTORM_ETH_CONN_AG_CTX_BIT1_SHIFT 1 +#define YSTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_MASK 0x3 /* cf0 */ +#define YSTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_SHIFT 2 +#define YSTORM_ETH_CONN_AG_CTX_PMD_TERMINATE_CF_MASK 0x3 /* cf1 */ +#define YSTORM_ETH_CONN_AG_CTX_PMD_TERMINATE_CF_SHIFT 4 +#define YSTORM_ETH_CONN_AG_CTX_CF2_MASK 0x3 /* cf2 */ +#define YSTORM_ETH_CONN_AG_CTX_CF2_SHIFT 6 + u8 flags1; +#define YSTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_MASK 0x1 /* cf0en */ +#define YSTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_SHIFT 0 +#define YSTORM_ETH_CONN_AG_CTX_PMD_TERMINATE_CF_EN_MASK 0x1 /* cf1en */ +#define YSTORM_ETH_CONN_AG_CTX_PMD_TERMINATE_CF_EN_SHIFT 1 +#define YSTORM_ETH_CONN_AG_CTX_CF2EN_MASK 0x1 /* cf2en */ +#define YSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT 2 +#define YSTORM_ETH_CONN_AG_CTX_RULE0EN_MASK 0x1 /* rule0en */ +#define YSTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT 3 +#define YSTORM_ETH_CONN_AG_CTX_RULE1EN_MASK 0x1 /* rule1en */ +#define YSTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT 4 +#define YSTORM_ETH_CONN_AG_CTX_RULE2EN_MASK 0x1 /* rule2en */ +#define YSTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT 5 +#define YSTORM_ETH_CONN_AG_CTX_RULE3EN_MASK 0x1 /* rule3en */ +#define YSTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT 6 +#define YSTORM_ETH_CONN_AG_CTX_RULE4EN_MASK 0x1 /* rule4en */ +#define YSTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT 7 + u8 byte2 /* byte2 */; + u8 byte3 /* byte3 */; + __le16 word0 /* word0 */; + __le32 terminate_spqe /* reg0 */; + __le32 reg1 /* reg1 */; + __le16 tx_bd_cons_upd /* word1 */; + __le16 word2 /* word2 */; + __le16 word3 /* word3 */; + __le16 word4 /* word4 */; + __le32 reg2 /* reg2 */; + __le32 reg3 /* reg3 */; +}; + +struct tstorm_eth_conn_ag_ctx { + u8 byte0 /* cdu_validation */; + u8 byte1 /* state */; + u8 flags0; +#define TSTORM_ETH_CONN_AG_CTX_BIT0_MASK 0x1 /* exist_in_qm0 */ +#define TSTORM_ETH_CONN_AG_CTX_BIT0_SHIFT 0 +#define TSTORM_ETH_CONN_AG_CTX_BIT1_MASK 0x1 /* exist_in_qm1 */ +#define TSTORM_ETH_CONN_AG_CTX_BIT1_SHIFT 1 +#define TSTORM_ETH_CONN_AG_CTX_BIT2_MASK 0x1 /* bit2 */ +#define TSTORM_ETH_CONN_AG_CTX_BIT2_SHIFT 2 +#define TSTORM_ETH_CONN_AG_CTX_BIT3_MASK 0x1 /* bit3 */ +#define TSTORM_ETH_CONN_AG_CTX_BIT3_SHIFT 3 +#define TSTORM_ETH_CONN_AG_CTX_BIT4_MASK 0x1 /* bit4 */ +#define TSTORM_ETH_CONN_AG_CTX_BIT4_SHIFT 4 +#define TSTORM_ETH_CONN_AG_CTX_BIT5_MASK 0x1 /* bit5 */ +#define TSTORM_ETH_CONN_AG_CTX_BIT5_SHIFT 5 +#define TSTORM_ETH_CONN_AG_CTX_CF0_MASK 0x3 /* timer0cf */ +#define TSTORM_ETH_CONN_AG_CTX_CF0_SHIFT 6 + u8 flags1; +#define TSTORM_ETH_CONN_AG_CTX_CF1_MASK 0x3 /* timer1cf */ +#define TSTORM_ETH_CONN_AG_CTX_CF1_SHIFT 0 +#define TSTORM_ETH_CONN_AG_CTX_CF2_MASK 0x3 /* timer2cf */ +#define TSTORM_ETH_CONN_AG_CTX_CF2_SHIFT 2 +#define TSTORM_ETH_CONN_AG_CTX_CF3_MASK 0x3 /* timer_stop_all */ +#define TSTORM_ETH_CONN_AG_CTX_CF3_SHIFT 4 +#define TSTORM_ETH_CONN_AG_CTX_CF4_MASK 0x3 /* cf4 */ +#define TSTORM_ETH_CONN_AG_CTX_CF4_SHIFT 6 + u8 flags2; +#define TSTORM_ETH_CONN_AG_CTX_CF5_MASK 0x3 /* cf5 */ +#define TSTORM_ETH_CONN_AG_CTX_CF5_SHIFT 0 +#define TSTORM_ETH_CONN_AG_CTX_CF6_MASK 0x3 /* cf6 */ +#define TSTORM_ETH_CONN_AG_CTX_CF6_SHIFT 2 +#define TSTORM_ETH_CONN_AG_CTX_CF7_MASK 0x3 /* cf7 */ +#define TSTORM_ETH_CONN_AG_CTX_CF7_SHIFT 4 +#define TSTORM_ETH_CONN_AG_CTX_CF8_MASK 0x3 /* cf8 */ +#define TSTORM_ETH_CONN_AG_CTX_CF8_SHIFT 6 + u8 flags3; +#define TSTORM_ETH_CONN_AG_CTX_CF9_MASK 0x3 /* cf9 */ +#define TSTORM_ETH_CONN_AG_CTX_CF9_SHIFT 0 +#define TSTORM_ETH_CONN_AG_CTX_CF10_MASK 0x3 /* cf10 */ +#define TSTORM_ETH_CONN_AG_CTX_CF10_SHIFT 2 +#define TSTORM_ETH_CONN_AG_CTX_CF0EN_MASK 0x1 /* cf0en */ +#define TSTORM_ETH_CONN_AG_CTX_CF0EN_SHIFT 4 +#define TSTORM_ETH_CONN_AG_CTX_CF1EN_MASK 0x1 /* cf1en */ +#define TSTORM_ETH_CONN_AG_CTX_CF1EN_SHIFT 5 +#define TSTORM_ETH_CONN_AG_CTX_CF2EN_MASK 0x1 /* cf2en */ +#define TSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT 6 +#define TSTORM_ETH_CONN_AG_CTX_CF3EN_MASK 0x1 /* cf3en */ +#define TSTORM_ETH_CONN_AG_CTX_CF3EN_SHIFT 7 + u8 flags4; +#define TSTORM_ETH_CONN_AG_CTX_CF4EN_MASK 0x1 /* cf4en */ +#define TSTORM_ETH_CONN_AG_CTX_CF4EN_SHIFT 0 +#define TSTORM_ETH_CONN_AG_CTX_CF5EN_MASK 0x1 /* cf5en */ +#define TSTORM_ETH_CONN_AG_CTX_CF5EN_SHIFT 1 +#define TSTORM_ETH_CONN_AG_CTX_CF6EN_MASK 0x1 /* cf6en */ +#define TSTORM_ETH_CONN_AG_CTX_CF6EN_SHIFT 2 +#define TSTORM_ETH_CONN_AG_CTX_CF7EN_MASK 0x1 /* cf7en */ +#define TSTORM_ETH_CONN_AG_CTX_CF7EN_SHIFT 3 +#define TSTORM_ETH_CONN_AG_CTX_CF8EN_MASK 0x1 /* cf8en */ +#define TSTORM_ETH_CONN_AG_CTX_CF8EN_SHIFT 4 +#define TSTORM_ETH_CONN_AG_CTX_CF9EN_MASK 0x1 /* cf9en */ +#define TSTORM_ETH_CONN_AG_CTX_CF9EN_SHIFT 5 +#define TSTORM_ETH_CONN_AG_CTX_CF10EN_MASK 0x1 /* cf10en */ +#define TSTORM_ETH_CONN_AG_CTX_CF10EN_SHIFT 6 +#define TSTORM_ETH_CONN_AG_CTX_RULE0EN_MASK 0x1 /* rule0en */ +#define TSTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT 7 + u8 flags5; +#define TSTORM_ETH_CONN_AG_CTX_RULE1EN_MASK 0x1 /* rule1en */ +#define TSTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT 0 +#define TSTORM_ETH_CONN_AG_CTX_RULE2EN_MASK 0x1 /* rule2en */ +#define TSTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT 1 +#define TSTORM_ETH_CONN_AG_CTX_RULE3EN_MASK 0x1 /* rule3en */ +#define TSTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT 2 +#define TSTORM_ETH_CONN_AG_CTX_RULE4EN_MASK 0x1 /* rule4en */ +#define TSTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT 3 +#define TSTORM_ETH_CONN_AG_CTX_RULE5EN_MASK 0x1 /* rule5en */ +#define TSTORM_ETH_CONN_AG_CTX_RULE5EN_SHIFT 4 +#define TSTORM_ETH_CONN_AG_CTX_RX_BD_EN_MASK 0x1 /* rule6en */ +#define TSTORM_ETH_CONN_AG_CTX_RX_BD_EN_SHIFT 5 +#define TSTORM_ETH_CONN_AG_CTX_RULE7EN_MASK 0x1 /* rule7en */ +#define TSTORM_ETH_CONN_AG_CTX_RULE7EN_SHIFT 6 +#define TSTORM_ETH_CONN_AG_CTX_RULE8EN_MASK 0x1 /* rule8en */ +#define TSTORM_ETH_CONN_AG_CTX_RULE8EN_SHIFT 7 + __le32 reg0 /* reg0 */; + __le32 reg1 /* reg1 */; + __le32 reg2 /* reg2 */; + __le32 reg3 /* reg3 */; + __le32 reg4 /* reg4 */; + __le32 reg5 /* reg5 */; + __le32 reg6 /* reg6 */; + __le32 reg7 /* reg7 */; + __le32 reg8 /* reg8 */; + u8 byte2 /* byte2 */; + u8 byte3 /* byte3 */; + __le16 rx_bd_cons /* word0 */; + u8 byte4 /* byte4 */; + u8 byte5 /* byte5 */; + __le16 rx_bd_prod /* word1 */; + __le16 word2 /* conn_dpi */; + __le16 word3 /* word3 */; + __le32 reg9 /* reg9 */; + __le32 reg10 /* reg10 */; +}; + +struct ustorm_eth_conn_ag_ctx { + u8 byte0 /* cdu_validation */; + u8 byte1 /* state */; + u8 flags0; +#define USTORM_ETH_CONN_AG_CTX_BIT0_MASK 0x1 /* exist_in_qm0 */ +#define USTORM_ETH_CONN_AG_CTX_BIT0_SHIFT 0 +#define USTORM_ETH_CONN_AG_CTX_BIT1_MASK 0x1 /* exist_in_qm1 */ +#define USTORM_ETH_CONN_AG_CTX_BIT1_SHIFT 1 +#define USTORM_ETH_CONN_AG_CTX_TX_PMD_TERMINATE_CF_MASK 0x3 /* timer0cf */ +#define USTORM_ETH_CONN_AG_CTX_TX_PMD_TERMINATE_CF_SHIFT 2 +#define USTORM_ETH_CONN_AG_CTX_RX_PMD_TERMINATE_CF_MASK 0x3 /* timer1cf */ +#define USTORM_ETH_CONN_AG_CTX_RX_PMD_TERMINATE_CF_SHIFT 4 +#define USTORM_ETH_CONN_AG_CTX_CF2_MASK 0x3 /* timer2cf */ +#define USTORM_ETH_CONN_AG_CTX_CF2_SHIFT 6 + u8 flags1; +#define USTORM_ETH_CONN_AG_CTX_CF3_MASK 0x3 /* timer_stop_all */ +#define USTORM_ETH_CONN_AG_CTX_CF3_SHIFT 0 +#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_MASK 0x3 /* cf4 */ +#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_SHIFT 2 +#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_MASK 0x3 /* cf5 */ +#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_SHIFT 4 +#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_MASK 0x3 /* cf6 */ +#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_SHIFT 6 + u8 flags2; +#define USTORM_ETH_CONN_AG_CTX_TX_PMD_TERMINATE_CF_EN_MASK 0x1 /* cf0en */ +#define USTORM_ETH_CONN_AG_CTX_TX_PMD_TERMINATE_CF_EN_SHIFT 0 +#define USTORM_ETH_CONN_AG_CTX_RX_PMD_TERMINATE_CF_EN_MASK 0x1 /* cf1en */ +#define USTORM_ETH_CONN_AG_CTX_RX_PMD_TERMINATE_CF_EN_SHIFT 1 +#define USTORM_ETH_CONN_AG_CTX_CF2EN_MASK 0x1 /* cf2en */ +#define USTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT 2 +#define USTORM_ETH_CONN_AG_CTX_CF3EN_MASK 0x1 /* cf3en */ +#define USTORM_ETH_CONN_AG_CTX_CF3EN_SHIFT 3 +#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_EN_MASK 0x1 /* cf4en */ +#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_EN_SHIFT 4 +#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_EN_MASK 0x1 /* cf5en */ +#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_EN_SHIFT 5 +#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_MASK 0x1 /* cf6en */ +#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_SHIFT 6 +#define USTORM_ETH_CONN_AG_CTX_RULE0EN_MASK 0x1 /* rule0en */ +#define USTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT 7 + u8 flags3; +#define USTORM_ETH_CONN_AG_CTX_RULE1EN_MASK 0x1 /* rule1en */ +#define USTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT 0 +#define USTORM_ETH_CONN_AG_CTX_RULE2EN_MASK 0x1 /* rule2en */ +#define USTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT 1 +#define USTORM_ETH_CONN_AG_CTX_RULE3EN_MASK 0x1 /* rule3en */ +#define USTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT 2 +#define USTORM_ETH_CONN_AG_CTX_RULE4EN_MASK 0x1 /* rule4en */ +#define USTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT 3 +#define USTORM_ETH_CONN_AG_CTX_RULE5EN_MASK 0x1 /* rule5en */ +#define USTORM_ETH_CONN_AG_CTX_RULE5EN_SHIFT 4 +#define USTORM_ETH_CONN_AG_CTX_RULE6EN_MASK 0x1 /* rule6en */ +#define USTORM_ETH_CONN_AG_CTX_RULE6EN_SHIFT 5 +#define USTORM_ETH_CONN_AG_CTX_RULE7EN_MASK 0x1 /* rule7en */ +#define USTORM_ETH_CONN_AG_CTX_RULE7EN_SHIFT 6 +#define USTORM_ETH_CONN_AG_CTX_RULE8EN_MASK 0x1 /* rule8en */ +#define USTORM_ETH_CONN_AG_CTX_RULE8EN_SHIFT 7 + u8 byte2 /* byte2 */; + u8 byte3 /* byte3 */; + __le16 word0 /* conn_dpi */; + __le16 tx_bd_cons /* word1 */; + __le32 reg0 /* reg0 */; + __le32 reg1 /* reg1 */; + __le32 reg2 /* reg2 */; + __le32 tx_int_coallecing_timeset /* reg3 */; + __le16 tx_drv_bd_cons /* word2 */; + __le16 rx_drv_cqe_cons /* word3 */; }; /* The eth storm context for the Ustorm */ @@ -2577,24 +2755,30 @@ struct ustorm_eth_conn_st_ctx { __le32 reserved[40]; }; +/* The eth storm context for the Mstorm */ +struct mstorm_eth_conn_st_ctx { + __le32 reserved[8]; +}; + /* eth connection context */ struct eth_conn_context { - struct ystorm_eth_conn_st_ctx ystorm_st_context; - struct regpair ystorm_st_padding[2] /* padding */; + struct tstorm_eth_conn_st_ctx tstorm_st_context; + struct regpair tstorm_st_padding[2]; struct pstorm_eth_conn_st_ctx pstorm_st_context; - struct regpair pstorm_st_padding[2] /* padding */; struct xstorm_eth_conn_st_ctx xstorm_st_context; struct xstorm_eth_conn_ag_ctx xstorm_ag_context; - struct tstorm_eth_conn_st_ctx tstorm_st_context; - struct regpair tstorm_st_padding[2] /* padding */; - struct mstorm_eth_conn_st_ctx mstorm_st_context; + struct ystorm_eth_conn_st_ctx ystorm_st_context; + struct ystorm_eth_conn_ag_ctx ystorm_ag_context; + struct tstorm_eth_conn_ag_ctx tstorm_ag_context; + struct ustorm_eth_conn_ag_ctx ustorm_ag_context; struct ustorm_eth_conn_st_ctx ustorm_st_context; + struct mstorm_eth_conn_st_ctx mstorm_st_context; }; enum eth_filter_action { ETH_FILTER_ACTION_REMOVE, ETH_FILTER_ACTION_ADD, - ETH_FILTER_ACTION_REPLACE, + ETH_FILTER_ACTION_REMOVE_ALL, MAX_ETH_FILTER_ACTION }; @@ -2653,6 +2837,32 @@ enum eth_ramrod_cmd_id { MAX_ETH_RAMROD_CMD_ID }; +enum eth_tx_err { + ETH_TX_ERR_DROP /* Drop erronous packet. */, + ETH_TX_ERR_ASSERT_MALICIOUS, + MAX_ETH_TX_ERR +}; + +struct eth_tx_err_vals { + __le16 values; +#define ETH_TX_ERR_VALS_ILLEGAL_VLAN_MODE_MASK 0x1 +#define ETH_TX_ERR_VALS_ILLEGAL_VLAN_MODE_SHIFT 0 +#define ETH_TX_ERR_VALS_PACKET_TOO_SMALL_MASK 0x1 +#define ETH_TX_ERR_VALS_PACKET_TOO_SMALL_SHIFT 1 +#define ETH_TX_ERR_VALS_ANTI_SPOOFING_ERR_MASK 0x1 +#define ETH_TX_ERR_VALS_ANTI_SPOOFING_ERR_SHIFT 2 +#define ETH_TX_ERR_VALS_ILLEGAL_INBAND_TAGS_MASK 0x1 +#define ETH_TX_ERR_VALS_ILLEGAL_INBAND_TAGS_SHIFT 3 +#define ETH_TX_ERR_VALS_VLAN_INSERTION_W_INBAND_TAG_MASK 0x1 +#define ETH_TX_ERR_VALS_VLAN_INSERTION_W_INBAND_TAG_SHIFT 4 +#define ETH_TX_ERR_VALS_MTU_VIOLATION_MASK 0x1 +#define ETH_TX_ERR_VALS_MTU_VIOLATION_SHIFT 5 +#define ETH_TX_ERR_VALS_ILLEGAL_CONTROL_FRAME_MASK 0x1 +#define ETH_TX_ERR_VALS_ILLEGAL_CONTROL_FRAME_SHIFT 6 +#define ETH_TX_ERR_VALS_RESERVED_MASK 0x1FF +#define ETH_TX_ERR_VALS_RESERVED_SHIFT 7 +}; + struct eth_vport_rss_config { __le16 capabilities; #define ETH_VPORT_RSS_CONFIG_IPV4_CAPABILITY_MASK 0x1 @@ -2669,12 +2879,8 @@ struct eth_vport_rss_config { #define ETH_VPORT_RSS_CONFIG_IPV6_UDP_CAPABILITY_SHIFT 5 #define ETH_VPORT_RSS_CONFIG_EN_5_TUPLE_CAPABILITY_MASK 0x1 #define ETH_VPORT_RSS_CONFIG_EN_5_TUPLE_CAPABILITY_SHIFT 6 -#define ETH_VPORT_RSS_CONFIG_CALC_4TUP_TCP_FRAG_MASK 0x1 -#define ETH_VPORT_RSS_CONFIG_CALC_4TUP_TCP_FRAG_SHIFT 7 -#define ETH_VPORT_RSS_CONFIG_CALC_4TUP_UDP_FRAG_MASK 0x1 -#define ETH_VPORT_RSS_CONFIG_CALC_4TUP_UDP_FRAG_SHIFT 8 -#define ETH_VPORT_RSS_CONFIG_RESERVED0_MASK 0x7F -#define ETH_VPORT_RSS_CONFIG_RESERVED0_SHIFT 9 +#define ETH_VPORT_RSS_CONFIG_RESERVED0_MASK 0x1FF +#define ETH_VPORT_RSS_CONFIG_RESERVED0_SHIFT 7 u8 rss_id; u8 rss_mode; u8 update_rss_key; @@ -2749,10 +2955,14 @@ struct rx_queue_start_ramrod_data { u8 pxp_tph_valid_pkt; u8 pxp_st_hint; __le16 pxp_st_index; - u8 reserved[4]; - struct regpair cqe_pbl_addr; - struct regpair bd_base; - struct regpair sge_base; + u8 pmd_mode; + u8 notify_en; + u8 toggle_val; + u8 reserved[7]; + __le16 reserved1; + struct regpair cqe_pbl_addr; + struct regpair bd_base; + struct regpair reserved2; }; struct rx_queue_stop_ramrod_data { @@ -2764,23 +2974,24 @@ struct rx_queue_stop_ramrod_data { }; struct rx_queue_update_ramrod_data { - __le16 rx_queue_id; - u8 complete_cqe_flg; - u8 complete_event_flg; - u8 init_sge_ring_flg; - u8 vport_id; - u8 pxp_tph_valid_sge; - u8 pxp_st_hint; - __le16 pxp_st_index; - u8 reserved[6]; - struct regpair sge_base; + __le16 rx_queue_id; + u8 complete_cqe_flg; + u8 complete_event_flg; + u8 vport_id; + u8 reserved[4]; + u8 reserved1; + u8 reserved2; + u8 reserved3; + __le16 reserved4; + __le16 reserved5; + struct regpair reserved6; }; struct tx_queue_start_ramrod_data { __le16 sb_id; u8 sb_index; u8 vport_id; - u8 tc; + u8 reserved0; u8 stats_counter_id; __le16 qm_pq_id; u8 flags; @@ -2790,18 +3001,25 @@ struct tx_queue_start_ramrod_data { #define TX_QUEUE_START_RAMROD_DATA_TEST_MODE_PKT_DUP_SHIFT 1 #define TX_QUEUE_START_RAMROD_DATA_TEST_MODE_TX_DEST_MASK 0x1 #define TX_QUEUE_START_RAMROD_DATA_TEST_MODE_TX_DEST_SHIFT 2 -#define TX_QUEUE_START_RAMROD_DATA_RESERVED0_MASK 0x1F -#define TX_QUEUE_START_RAMROD_DATA_RESERVED0_SHIFT 3 - u8 pin_context; - u8 pxp_tph_valid_bd; - u8 pxp_tph_valid_pkt; - __le16 pxp_st_index; - u8 pxp_st_hint; - u8 reserved1[3]; - __le16 queue_zone_id; - __le16 test_dup_count; - __le16 pbl_size; - struct regpair pbl_base_addr; +#define TX_QUEUE_START_RAMROD_DATA_PMD_MODE_MASK 0x1 +#define TX_QUEUE_START_RAMROD_DATA_PMD_MODE_SHIFT 3 +#define TX_QUEUE_START_RAMROD_DATA_NOTIFY_EN_MASK 0x1 +#define TX_QUEUE_START_RAMROD_DATA_NOTIFY_EN_SHIFT 4 +#define TX_QUEUE_START_RAMROD_DATA_PIN_CONTEXT_MASK 0x1 +#define TX_QUEUE_START_RAMROD_DATA_PIN_CONTEXT_SHIFT 5 +#define TX_QUEUE_START_RAMROD_DATA_RESERVED1_MASK 0x3 +#define TX_QUEUE_START_RAMROD_DATA_RESERVED1_SHIFT 6 + u8 pxp_st_hint; + u8 pxp_tph_valid_bd; + u8 pxp_tph_valid_pkt; + __le16 pxp_st_index; + __le16 comp_agg_size; + __le16 queue_zone_id; + __le16 test_dup_count; + __le16 pbl_size; + __le16 tx_queue_id; + struct regpair pbl_base_addr; + struct regpair bd_cons_address; }; struct tx_queue_stop_ramrod_data { @@ -2822,16 +3040,16 @@ struct vport_start_ramrod_data { struct eth_vport_rx_mode rx_mode; struct eth_vport_tx_mode tx_mode; struct eth_vport_tpa_param tpa_param; - __le16 sge_buff_size; - u8 max_sges_num; - u8 tx_switching_en; - u8 anti_spoofing_en; - u8 default_vlan_en; - u8 handle_ptp_pkts; - u8 silent_vlan_removal_en; - __le16 default_vlan; - u8 untagged; - u8 reserved[7]; + __le16 default_vlan; + u8 tx_switching_en; + u8 anti_spoofing_en; + u8 default_vlan_en; + u8 handle_ptp_pkts; + u8 silent_vlan_removal_en; + u8 untagged; + struct eth_tx_err_vals tx_err_behav; + u8 zero_placement_offset; + u8 reserved[7]; }; struct vport_stop_ramrod_data { @@ -2840,36 +3058,35 @@ struct vport_stop_ramrod_data { }; struct vport_update_ramrod_data_cmn { - u8 vport_id; - u8 update_rx_active_flg; - u8 rx_active_flg; - u8 update_tx_active_flg; - u8 tx_active_flg; - u8 update_rx_mode_flg; - u8 update_tx_mode_flg; - u8 update_approx_mcast_flg; - u8 update_rss_flg; - u8 update_inner_vlan_removal_en_flg; - u8 inner_vlan_removal_en; - u8 update_tpa_param_flg; - u8 update_tpa_en_flg; - u8 update_sge_param_flg; - __le16 sge_buff_size; - u8 max_sges_num; - u8 update_tx_switching_en_flg; - u8 tx_switching_en; - u8 update_anti_spoofing_en_flg; - u8 anti_spoofing_en; - u8 update_handle_ptp_pkts; - u8 handle_ptp_pkts; - u8 update_default_vlan_en_flg; - u8 default_vlan_en; - u8 update_default_vlan_flg; - __le16 default_vlan; - u8 update_accept_any_vlan_flg; - u8 accept_any_vlan; - u8 silent_vlan_removal_en; - u8 reserved; + u8 vport_id; + u8 update_rx_active_flg; + u8 rx_active_flg; + u8 update_tx_active_flg; + u8 tx_active_flg; + u8 update_rx_mode_flg; + u8 update_tx_mode_flg; + u8 update_approx_mcast_flg; + u8 update_rss_flg; + u8 update_inner_vlan_removal_en_flg; + u8 inner_vlan_removal_en; + u8 update_tpa_param_flg; + u8 update_tpa_en_flg; + u8 update_tx_switching_en_flg; + u8 tx_switching_en; + u8 update_anti_spoofing_en_flg; + u8 anti_spoofing_en; + u8 update_handle_ptp_pkts; + u8 handle_ptp_pkts; + u8 update_default_vlan_en_flg; + u8 default_vlan_en; + u8 update_default_vlan_flg; + __le16 default_vlan; + u8 update_accept_any_vlan_flg; + u8 accept_any_vlan; + u8 silent_vlan_removal_en; + u8 update_mtu_flg; + __le16 mtu; + u8 reserved[2]; }; struct vport_update_ramrod_mcast { @@ -2885,436 +3102,6 @@ struct vport_update_ramrod_data { struct eth_vport_rss_config rss_config; }; -struct mstorm_eth_conn_ag_ctx { - u8 byte0 /* cdu_validation */; - u8 byte1 /* state */; - u8 flags0; -#define MSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM0_MASK 0x1 /* exist_in_qm0 */ -#define MSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM0_SHIFT 0 -#define MSTORM_ETH_CONN_AG_CTX_BIT1_MASK 0x1 /* exist_in_qm1 */ -#define MSTORM_ETH_CONN_AG_CTX_BIT1_SHIFT 1 -#define MSTORM_ETH_CONN_AG_CTX_CF0_MASK 0x3 /* cf0 */ -#define MSTORM_ETH_CONN_AG_CTX_CF0_SHIFT 2 -#define MSTORM_ETH_CONN_AG_CTX_CF1_MASK 0x3 /* cf1 */ -#define MSTORM_ETH_CONN_AG_CTX_CF1_SHIFT 4 -#define MSTORM_ETH_CONN_AG_CTX_CF2_MASK 0x3 /* cf2 */ -#define MSTORM_ETH_CONN_AG_CTX_CF2_SHIFT 6 - u8 flags1; -#define MSTORM_ETH_CONN_AG_CTX_CF0EN_MASK 0x1 /* cf0en */ -#define MSTORM_ETH_CONN_AG_CTX_CF0EN_SHIFT 0 -#define MSTORM_ETH_CONN_AG_CTX_CF1EN_MASK 0x1 /* cf1en */ -#define MSTORM_ETH_CONN_AG_CTX_CF1EN_SHIFT 1 -#define MSTORM_ETH_CONN_AG_CTX_CF2EN_MASK 0x1 /* cf2en */ -#define MSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT 2 -#define MSTORM_ETH_CONN_AG_CTX_RULE0EN_MASK 0x1 /* rule0en */ -#define MSTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT 3 -#define MSTORM_ETH_CONN_AG_CTX_RULE1EN_MASK 0x1 /* rule1en */ -#define MSTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT 4 -#define MSTORM_ETH_CONN_AG_CTX_RULE2EN_MASK 0x1 /* rule2en */ -#define MSTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT 5 -#define MSTORM_ETH_CONN_AG_CTX_RULE3EN_MASK 0x1 /* rule3en */ -#define MSTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT 6 -#define MSTORM_ETH_CONN_AG_CTX_RULE4EN_MASK 0x1 /* rule4en */ -#define MSTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT 7 - __le16 word0 /* word0 */; - __le16 word1 /* word1 */; - __le32 reg0 /* reg0 */; - __le32 reg1 /* reg1 */; -}; - -struct tstorm_eth_conn_ag_ctx { - u8 byte0 /* cdu_validation */; - u8 byte1 /* state */; - u8 flags0; -#define TSTORM_ETH_CONN_AG_CTX_BIT0_MASK 0x1 /* exist_in_qm0 */ -#define TSTORM_ETH_CONN_AG_CTX_BIT0_SHIFT 0 -#define TSTORM_ETH_CONN_AG_CTX_BIT1_MASK 0x1 /* exist_in_qm1 */ -#define TSTORM_ETH_CONN_AG_CTX_BIT1_SHIFT 1 -#define TSTORM_ETH_CONN_AG_CTX_BIT2_MASK 0x1 /* bit2 */ -#define TSTORM_ETH_CONN_AG_CTX_BIT2_SHIFT 2 -#define TSTORM_ETH_CONN_AG_CTX_BIT3_MASK 0x1 /* bit3 */ -#define TSTORM_ETH_CONN_AG_CTX_BIT3_SHIFT 3 -#define TSTORM_ETH_CONN_AG_CTX_BIT4_MASK 0x1 /* bit4 */ -#define TSTORM_ETH_CONN_AG_CTX_BIT4_SHIFT 4 -#define TSTORM_ETH_CONN_AG_CTX_BIT5_MASK 0x1 /* bit5 */ -#define TSTORM_ETH_CONN_AG_CTX_BIT5_SHIFT 5 -#define TSTORM_ETH_CONN_AG_CTX_CF0_MASK 0x3 /* timer0cf */ -#define TSTORM_ETH_CONN_AG_CTX_CF0_SHIFT 6 - u8 flags1; -#define TSTORM_ETH_CONN_AG_CTX_CF1_MASK 0x3 /* timer1cf */ -#define TSTORM_ETH_CONN_AG_CTX_CF1_SHIFT 0 -#define TSTORM_ETH_CONN_AG_CTX_CF2_MASK 0x3 /* timer2cf */ -#define TSTORM_ETH_CONN_AG_CTX_CF2_SHIFT 2 -#define TSTORM_ETH_CONN_AG_CTX_CF3_MASK 0x3 /* timer_stop_all */ -#define TSTORM_ETH_CONN_AG_CTX_CF3_SHIFT 4 -#define TSTORM_ETH_CONN_AG_CTX_CF4_MASK 0x3 /* cf4 */ -#define TSTORM_ETH_CONN_AG_CTX_CF4_SHIFT 6 - u8 flags2; -#define TSTORM_ETH_CONN_AG_CTX_CF5_MASK 0x3 /* cf5 */ -#define TSTORM_ETH_CONN_AG_CTX_CF5_SHIFT 0 -#define TSTORM_ETH_CONN_AG_CTX_CF6_MASK 0x3 /* cf6 */ -#define TSTORM_ETH_CONN_AG_CTX_CF6_SHIFT 2 -#define TSTORM_ETH_CONN_AG_CTX_CF7_MASK 0x3 /* cf7 */ -#define TSTORM_ETH_CONN_AG_CTX_CF7_SHIFT 4 -#define TSTORM_ETH_CONN_AG_CTX_CF8_MASK 0x3 /* cf8 */ -#define TSTORM_ETH_CONN_AG_CTX_CF8_SHIFT 6 - u8 flags3; -#define TSTORM_ETH_CONN_AG_CTX_CF9_MASK 0x3 /* cf9 */ -#define TSTORM_ETH_CONN_AG_CTX_CF9_SHIFT 0 -#define TSTORM_ETH_CONN_AG_CTX_CF10_MASK 0x3 /* cf10 */ -#define TSTORM_ETH_CONN_AG_CTX_CF10_SHIFT 2 -#define TSTORM_ETH_CONN_AG_CTX_CF0EN_MASK 0x1 /* cf0en */ -#define TSTORM_ETH_CONN_AG_CTX_CF0EN_SHIFT 4 -#define TSTORM_ETH_CONN_AG_CTX_CF1EN_MASK 0x1 /* cf1en */ -#define TSTORM_ETH_CONN_AG_CTX_CF1EN_SHIFT 5 -#define TSTORM_ETH_CONN_AG_CTX_CF2EN_MASK 0x1 /* cf2en */ -#define TSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT 6 -#define TSTORM_ETH_CONN_AG_CTX_CF3EN_MASK 0x1 /* cf3en */ -#define TSTORM_ETH_CONN_AG_CTX_CF3EN_SHIFT 7 - u8 flags4; -#define TSTORM_ETH_CONN_AG_CTX_CF4EN_MASK 0x1 /* cf4en */ -#define TSTORM_ETH_CONN_AG_CTX_CF4EN_SHIFT 0 -#define TSTORM_ETH_CONN_AG_CTX_CF5EN_MASK 0x1 /* cf5en */ -#define TSTORM_ETH_CONN_AG_CTX_CF5EN_SHIFT 1 -#define TSTORM_ETH_CONN_AG_CTX_CF6EN_MASK 0x1 /* cf6en */ -#define TSTORM_ETH_CONN_AG_CTX_CF6EN_SHIFT 2 -#define TSTORM_ETH_CONN_AG_CTX_CF7EN_MASK 0x1 /* cf7en */ -#define TSTORM_ETH_CONN_AG_CTX_CF7EN_SHIFT 3 -#define TSTORM_ETH_CONN_AG_CTX_CF8EN_MASK 0x1 /* cf8en */ -#define TSTORM_ETH_CONN_AG_CTX_CF8EN_SHIFT 4 -#define TSTORM_ETH_CONN_AG_CTX_CF9EN_MASK 0x1 /* cf9en */ -#define TSTORM_ETH_CONN_AG_CTX_CF9EN_SHIFT 5 -#define TSTORM_ETH_CONN_AG_CTX_CF10EN_MASK 0x1 /* cf10en */ -#define TSTORM_ETH_CONN_AG_CTX_CF10EN_SHIFT 6 -#define TSTORM_ETH_CONN_AG_CTX_RULE0EN_MASK 0x1 /* rule0en */ -#define TSTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT 7 - u8 flags5; -#define TSTORM_ETH_CONN_AG_CTX_RULE1EN_MASK 0x1 /* rule1en */ -#define TSTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT 0 -#define TSTORM_ETH_CONN_AG_CTX_RULE2EN_MASK 0x1 /* rule2en */ -#define TSTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT 1 -#define TSTORM_ETH_CONN_AG_CTX_RULE3EN_MASK 0x1 /* rule3en */ -#define TSTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT 2 -#define TSTORM_ETH_CONN_AG_CTX_RULE4EN_MASK 0x1 /* rule4en */ -#define TSTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT 3 -#define TSTORM_ETH_CONN_AG_CTX_RULE5EN_MASK 0x1 /* rule5en */ -#define TSTORM_ETH_CONN_AG_CTX_RULE5EN_SHIFT 4 -#define TSTORM_ETH_CONN_AG_CTX_RX_BD_EN_MASK 0x1 /* rule6en */ -#define TSTORM_ETH_CONN_AG_CTX_RX_BD_EN_SHIFT 5 -#define TSTORM_ETH_CONN_AG_CTX_RULE7EN_MASK 0x1 /* rule7en */ -#define TSTORM_ETH_CONN_AG_CTX_RULE7EN_SHIFT 6 -#define TSTORM_ETH_CONN_AG_CTX_RULE8EN_MASK 0x1 /* rule8en */ -#define TSTORM_ETH_CONN_AG_CTX_RULE8EN_SHIFT 7 - __le32 reg0 /* reg0 */; - __le32 reg1 /* reg1 */; - __le32 reg2 /* reg2 */; - __le32 reg3 /* reg3 */; - __le32 reg4 /* reg4 */; - __le32 reg5 /* reg5 */; - __le32 reg6 /* reg6 */; - __le32 reg7 /* reg7 */; - __le32 reg8 /* reg8 */; - u8 byte2 /* byte2 */; - u8 byte3 /* byte3 */; - __le16 rx_bd_cons /* word0 */; - u8 byte4 /* byte4 */; - u8 byte5 /* byte5 */; - __le16 rx_bd_prod /* word1 */; - __le16 word2 /* conn_dpi */; - __le16 word3 /* word3 */; - __le32 reg9 /* reg9 */; - __le32 reg10 /* reg10 */; -}; - -struct ustorm_eth_conn_ag_ctx { - u8 byte0 /* cdu_validation */; - u8 byte1 /* state */; - u8 flags0; -#define USTORM_ETH_CONN_AG_CTX_BIT0_MASK 0x1 -#define USTORM_ETH_CONN_AG_CTX_BIT0_SHIFT 0 -#define USTORM_ETH_CONN_AG_CTX_BIT1_MASK 0x1 -#define USTORM_ETH_CONN_AG_CTX_BIT1_SHIFT 1 -#define USTORM_ETH_CONN_AG_CTX_CF0_MASK 0x3 /* timer0cf */ -#define USTORM_ETH_CONN_AG_CTX_CF0_SHIFT 2 -#define USTORM_ETH_CONN_AG_CTX_CF1_MASK 0x3 /* timer1cf */ -#define USTORM_ETH_CONN_AG_CTX_CF1_SHIFT 4 -#define USTORM_ETH_CONN_AG_CTX_CF2_MASK 0x3 /* timer2cf */ -#define USTORM_ETH_CONN_AG_CTX_CF2_SHIFT 6 - u8 flags1; -#define USTORM_ETH_CONN_AG_CTX_CF3_MASK 0x3 -#define USTORM_ETH_CONN_AG_CTX_CF3_SHIFT 0 -#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_MASK 0x3 /* cf4 */ -#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_SHIFT 2 -#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_MASK 0x3 /* cf5 */ -#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_SHIFT 4 -#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_MASK 0x3 /* cf6 */ -#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_SHIFT 6 - u8 flags2; -#define USTORM_ETH_CONN_AG_CTX_CF0EN_MASK 0x1 /* cf0en */ -#define USTORM_ETH_CONN_AG_CTX_CF0EN_SHIFT 0 -#define USTORM_ETH_CONN_AG_CTX_CF1EN_MASK 0x1 /* cf1en */ -#define USTORM_ETH_CONN_AG_CTX_CF1EN_SHIFT 1 -#define USTORM_ETH_CONN_AG_CTX_CF2EN_MASK 0x1 /* cf2en */ -#define USTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT 2 -#define USTORM_ETH_CONN_AG_CTX_CF3EN_MASK 0x1 /* cf3en */ -#define USTORM_ETH_CONN_AG_CTX_CF3EN_SHIFT 3 -#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_EN_MASK 0x1 /* cf4en */ -#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_EN_SHIFT 4 -#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_EN_MASK 0x1 /* cf5en */ -#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_EN_SHIFT 5 -#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_MASK 0x1 /* cf6en */ -#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_SHIFT 6 -#define USTORM_ETH_CONN_AG_CTX_RULE0EN_MASK 0x1 /* rule0en */ -#define USTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT 7 - u8 flags3; -#define USTORM_ETH_CONN_AG_CTX_RULE1EN_MASK 0x1 /* rule1en */ -#define USTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT 0 -#define USTORM_ETH_CONN_AG_CTX_RULE2EN_MASK 0x1 /* rule2en */ -#define USTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT 1 -#define USTORM_ETH_CONN_AG_CTX_RULE3EN_MASK 0x1 /* rule3en */ -#define USTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT 2 -#define USTORM_ETH_CONN_AG_CTX_RULE4EN_MASK 0x1 /* rule4en */ -#define USTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT 3 -#define USTORM_ETH_CONN_AG_CTX_RULE5EN_MASK 0x1 /* rule5en */ -#define USTORM_ETH_CONN_AG_CTX_RULE5EN_SHIFT 4 -#define USTORM_ETH_CONN_AG_CTX_RULE6EN_MASK 0x1 /* rule6en */ -#define USTORM_ETH_CONN_AG_CTX_RULE6EN_SHIFT 5 -#define USTORM_ETH_CONN_AG_CTX_RULE7EN_MASK 0x1 /* rule7en */ -#define USTORM_ETH_CONN_AG_CTX_RULE7EN_SHIFT 6 -#define USTORM_ETH_CONN_AG_CTX_RULE8EN_MASK 0x1 /* rule8en */ -#define USTORM_ETH_CONN_AG_CTX_RULE8EN_SHIFT 7 - u8 byte2 /* byte2 */; - u8 byte3 /* byte3 */; - __le16 word0 /* conn_dpi */; - __le16 tx_bd_cons /* word1 */; - __le32 reg0 /* reg0 */; - __le32 reg1 /* reg1 */; - __le32 reg2 /* reg2 */; - __le32 reg3 /* reg3 */; - __le16 tx_drv_bd_cons /* word2 */; - __le16 rx_drv_cqe_cons /* word3 */; -}; - -struct xstorm_eth_hw_conn_ag_ctx { - u8 reserved0 /* cdu_validation */; - u8 eth_state /* state */; - u8 flags0; -#define XSTORM_ETH_HW_CONN_AG_CTX_EXIST_IN_QM0_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_EXIST_IN_QM0_SHIFT 0 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED1_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED1_SHIFT 1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED2_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED2_SHIFT 2 -#define XSTORM_ETH_HW_CONN_AG_CTX_EXIST_IN_QM3_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_EXIST_IN_QM3_SHIFT 3 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED3_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED3_SHIFT 4 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED4_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED4_SHIFT 5 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED5_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED5_SHIFT 6 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED6_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED6_SHIFT 7 - u8 flags1; -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED7_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED7_SHIFT 0 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED8_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED8_SHIFT 1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED9_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED9_SHIFT 2 -#define XSTORM_ETH_HW_CONN_AG_CTX_BIT11_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_BIT11_SHIFT 3 -#define XSTORM_ETH_HW_CONN_AG_CTX_BIT12_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_BIT12_SHIFT 4 -#define XSTORM_ETH_HW_CONN_AG_CTX_BIT13_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_BIT13_SHIFT 5 -#define XSTORM_ETH_HW_CONN_AG_CTX_TX_RULE_ACTIVE_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_TX_RULE_ACTIVE_SHIFT 6 -#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_ACTIVE_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_ACTIVE_SHIFT 7 - u8 flags2; -#define XSTORM_ETH_HW_CONN_AG_CTX_CF0_MASK 0x3 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF0_SHIFT 0 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF1_MASK 0x3 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF1_SHIFT 2 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF2_MASK 0x3 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF2_SHIFT 4 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF3_MASK 0x3 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF3_SHIFT 6 - u8 flags3; -#define XSTORM_ETH_HW_CONN_AG_CTX_CF4_MASK 0x3 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF4_SHIFT 0 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF5_MASK 0x3 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF5_SHIFT 2 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF6_MASK 0x3 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF6_SHIFT 4 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF7_MASK 0x3 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF7_SHIFT 6 - u8 flags4; -#define XSTORM_ETH_HW_CONN_AG_CTX_CF8_MASK 0x3 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF8_SHIFT 0 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF9_MASK 0x3 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF9_SHIFT 2 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF10_MASK 0x3 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF10_SHIFT 4 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF11_MASK 0x3 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF11_SHIFT 6 - u8 flags5; -#define XSTORM_ETH_HW_CONN_AG_CTX_CF12_MASK 0x3 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF12_SHIFT 0 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF13_MASK 0x3 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF13_SHIFT 2 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF14_MASK 0x3 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF14_SHIFT 4 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF15_MASK 0x3 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF15_SHIFT 6 - u8 flags6; -#define XSTORM_ETH_HW_CONN_AG_CTX_GO_TO_BD_CONS_CF_MASK 0x3 -#define XSTORM_ETH_HW_CONN_AG_CTX_GO_TO_BD_CONS_CF_SHIFT 0 -#define XSTORM_ETH_HW_CONN_AG_CTX_MULTI_UNICAST_CF_MASK 0x3 -#define XSTORM_ETH_HW_CONN_AG_CTX_MULTI_UNICAST_CF_SHIFT 2 -#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_MASK 0x3 -#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_SHIFT 4 -#define XSTORM_ETH_HW_CONN_AG_CTX_TERMINATE_CF_MASK 0x3 -#define XSTORM_ETH_HW_CONN_AG_CTX_TERMINATE_CF_SHIFT 6 - u8 flags7; -#define XSTORM_ETH_HW_CONN_AG_CTX_FLUSH_Q0_MASK 0x3 -#define XSTORM_ETH_HW_CONN_AG_CTX_FLUSH_Q0_SHIFT 0 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED10_MASK 0x3 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED10_SHIFT 2 -#define XSTORM_ETH_HW_CONN_AG_CTX_SLOW_PATH_MASK 0x3 -#define XSTORM_ETH_HW_CONN_AG_CTX_SLOW_PATH_SHIFT 4 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF0EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF0EN_SHIFT 6 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF1EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF1EN_SHIFT 7 - u8 flags8; -#define XSTORM_ETH_HW_CONN_AG_CTX_CF2EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF2EN_SHIFT 0 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF3EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF3EN_SHIFT 1 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF4EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF4EN_SHIFT 2 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF5EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF5EN_SHIFT 3 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF6EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF6EN_SHIFT 4 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF7EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF7EN_SHIFT 5 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF8EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF8EN_SHIFT 6 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF9EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF9EN_SHIFT 7 - u8 flags9; -#define XSTORM_ETH_HW_CONN_AG_CTX_CF10EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF10EN_SHIFT 0 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF11EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF11EN_SHIFT 1 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF12EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF12EN_SHIFT 2 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF13EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF13EN_SHIFT 3 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF14EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF14EN_SHIFT 4 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF15EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_CF15EN_SHIFT 5 -#define XSTORM_ETH_HW_CONN_AG_CTX_GO_TO_BD_CONS_CF_EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_GO_TO_BD_CONS_CF_EN_SHIFT 6 -#define XSTORM_ETH_HW_CONN_AG_CTX_MULTI_UNICAST_CF_EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_MULTI_UNICAST_CF_EN_SHIFT 7 - u8 flags10; -#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_EN_SHIFT 0 -#define XSTORM_ETH_HW_CONN_AG_CTX_TERMINATE_CF_EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_TERMINATE_CF_EN_SHIFT 1 -#define XSTORM_ETH_HW_CONN_AG_CTX_FLUSH_Q0_EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT 2 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED11_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED11_SHIFT 3 -#define XSTORM_ETH_HW_CONN_AG_CTX_SLOW_PATH_EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_SLOW_PATH_EN_SHIFT 4 -#define XSTORM_ETH_HW_CONN_AG_CTX_TPH_ENABLE_EN_RESERVED_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_TPH_ENABLE_EN_RESERVED_SHIFT 5 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED12_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED12_SHIFT 6 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED13_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED13_SHIFT 7 - u8 flags11; -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED14_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED14_SHIFT 0 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED15_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED15_SHIFT 1 -#define XSTORM_ETH_HW_CONN_AG_CTX_TX_DEC_RULE_EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_TX_DEC_RULE_EN_SHIFT 2 -#define XSTORM_ETH_HW_CONN_AG_CTX_RULE5EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RULE5EN_SHIFT 3 -#define XSTORM_ETH_HW_CONN_AG_CTX_RULE6EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RULE6EN_SHIFT 4 -#define XSTORM_ETH_HW_CONN_AG_CTX_RULE7EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RULE7EN_SHIFT 5 -#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED1_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED1_SHIFT 6 -#define XSTORM_ETH_HW_CONN_AG_CTX_RULE9EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RULE9EN_SHIFT 7 - u8 flags12; -#define XSTORM_ETH_HW_CONN_AG_CTX_RULE10EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RULE10EN_SHIFT 0 -#define XSTORM_ETH_HW_CONN_AG_CTX_RULE11EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RULE11EN_SHIFT 1 -#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED2_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED2_SHIFT 2 -#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED3_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED3_SHIFT 3 -#define XSTORM_ETH_HW_CONN_AG_CTX_RULE14EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RULE14EN_SHIFT 4 -#define XSTORM_ETH_HW_CONN_AG_CTX_RULE15EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RULE15EN_SHIFT 5 -#define XSTORM_ETH_HW_CONN_AG_CTX_RULE16EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RULE16EN_SHIFT 6 -#define XSTORM_ETH_HW_CONN_AG_CTX_RULE17EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RULE17EN_SHIFT 7 - u8 flags13; -#define XSTORM_ETH_HW_CONN_AG_CTX_RULE18EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RULE18EN_SHIFT 0 -#define XSTORM_ETH_HW_CONN_AG_CTX_RULE19EN_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_RULE19EN_SHIFT 1 -#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED4_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED4_SHIFT 2 -#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED5_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED5_SHIFT 3 -#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED6_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED6_SHIFT 4 -#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED7_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED7_SHIFT 5 -#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED8_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED8_SHIFT 6 -#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED9_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED9_SHIFT 7 - u8 flags14; -#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_USE_EXT_HDR_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_USE_EXT_HDR_SHIFT 0 -#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_SEND_RAW_L3L4_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_SEND_RAW_L3L4_SHIFT 1 -#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_INBAND_PROP_HDR_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_INBAND_PROP_HDR_SHIFT 2 -#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_SEND_EXT_TUNNEL_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_SEND_EXT_TUNNEL_SHIFT 3 -#define XSTORM_ETH_HW_CONN_AG_CTX_L2_EDPM_ENABLE_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_L2_EDPM_ENABLE_SHIFT 4 -#define XSTORM_ETH_HW_CONN_AG_CTX_ROCE_EDPM_ENABLE_MASK 0x1 -#define XSTORM_ETH_HW_CONN_AG_CTX_ROCE_EDPM_ENABLE_SHIFT 5 -#define XSTORM_ETH_HW_CONN_AG_CTX_TPH_ENABLE_MASK 0x3 -#define XSTORM_ETH_HW_CONN_AG_CTX_TPH_ENABLE_SHIFT 6 - u8 edpm_event_id /* byte2 */; - __le16 physical_q0 /* physical_q0 */; - __le16 word1 /* physical_q1 */; - __le16 edpm_num_bds /* physical_q2 */; - __le16 tx_bd_cons /* word3 */; - __le16 tx_bd_prod /* word4 */; - __le16 go_to_bd_cons /* word5 */; - __le16 conn_dpi /* conn_dpi */; -}; - #define VF_MAX_STATIC 192 /* In case of K2 */ #define MCP_GLOB_PATH_MAX 2 @@ -3818,6 +3605,10 @@ struct public_port { struct dcbx_local_params local_admin_dcbx_mib; struct dcbx_mib remote_dcbx_mib; struct dcbx_mib operational_dcbx_mib; + + u32 fc_npiv_nvram_tbl_addr; + u32 fc_npiv_nvram_tbl_size; + u32 transceiver_data; }; /**************************************/ @@ -3830,7 +3621,11 @@ struct public_func { u32 iscsi_boot_signature; u32 iscsi_boot_block_offset; - u32 reserved[8]; + u32 mtu_size; + u32 c2s_pcp_map_lower; + u32 c2s_pcp_map_upper; + u32 c2s_pcp_map_default; + u32 reserved[4]; u32 config; @@ -3894,10 +3689,10 @@ struct public_func { #define DRV_ID_MCP_HSI_VER_SHIFT 16 #define DRV_ID_MCP_HSI_VER_CURRENT BIT(DRV_ID_MCP_HSI_VER_SHIFT) -#define DRV_ID_DRV_TYPE_MASK 0xff000000 +#define DRV_ID_DRV_TYPE_MASK 0x7f000000 #define DRV_ID_DRV_TYPE_SHIFT 24 #define DRV_ID_DRV_TYPE_UNKNOWN (0 << DRV_ID_DRV_TYPE_SHIFT) -#define DRV_ID_DRV_TYPE_LINUX BIT(DRV_ID_DRV_TYPE_SHIFT) +#define DRV_ID_DRV_TYPE_LINUX (1 << DRV_ID_DRV_TYPE_SHIFT) #define DRV_ID_DRV_TYPE_WINDOWS (2 << DRV_ID_DRV_TYPE_SHIFT) #define DRV_ID_DRV_TYPE_DIAG (3 << DRV_ID_DRV_TYPE_SHIFT) #define DRV_ID_DRV_TYPE_PREBOOT (4 << DRV_ID_DRV_TYPE_SHIFT) @@ -3905,6 +3700,10 @@ struct public_func { #define DRV_ID_DRV_TYPE_VMWARE (6 << DRV_ID_DRV_TYPE_SHIFT) #define DRV_ID_DRV_TYPE_FREEBSD (7 << DRV_ID_DRV_TYPE_SHIFT) #define DRV_ID_DRV_TYPE_AIX (8 << DRV_ID_DRV_TYPE_SHIFT) + +#define DRV_ID_DRV_INIT_HW_MASK 0x80000000 +#define DRV_ID_DRV_INIT_HW_SHIFT 31 +#define DRV_ID_DRV_INIT_HW_FLAG BIT(DRV_ID_DRV_INIT_HW_SHIFT) }; /**************************************/ @@ -3964,6 +3763,7 @@ struct public_drv_mb { #define DRV_MSG_CODE_MASK 0xffff0000 #define DRV_MSG_CODE_LOAD_REQ 0x10000000 #define DRV_MSG_CODE_LOAD_DONE 0x11000000 +#define DRV_MSG_CODE_INIT_HW 0x12000000 #define DRV_MSG_CODE_UNLOAD_REQ 0x20000000 #define DRV_MSG_CODE_UNLOAD_DONE 0x21000000 #define DRV_MSG_CODE_INIT_PHY 0x22000000 @@ -4100,6 +3900,7 @@ struct public_drv_mb { #define FW_MSG_CODE_SET_SECURE_MODE_ERROR 0x00130000 #define FW_MSG_CODE_SET_SECURE_MODE_OK 0x00140000 #define FW_MSG_MODE_PHY_PRIVILEGE_ERROR 0x00150000 +#define FW_MSG_CODE_OK 0x00160000 #define FW_MSG_SEQ_NUMBER_MASK 0x0000ffff @@ -4212,7 +4013,7 @@ struct nvm_cfg1_glob { #define NVM_CFG1_GLOB_MF_MODE_MASK 0x00000FF0 #define NVM_CFG1_GLOB_MF_MODE_OFFSET 4 #define NVM_CFG1_GLOB_MF_MODE_MF_ALLOWED 0x0 -#define NVM_CFG1_GLOB_MF_MODE_FORCED_SF 0x1 +#define NVM_CFG1_GLOB_MF_MODE_DEFAULT 0x1 #define NVM_CFG1_GLOB_MF_MODE_SPIO4 0x2 #define NVM_CFG1_GLOB_MF_MODE_NPAR1_0 0x3 #define NVM_CFG1_GLOB_MF_MODE_NPAR1_5 0x4 @@ -4643,8 +4444,12 @@ struct nvm_cfg1_glob { #define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO29 0x1E #define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO30 0x1F #define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO31 0x20 - - u32 reserved[46]; /* 0x88 */ + u32 device_capabilities; /* 0x88 */ +#define NVM_CFG1_GLOB_DEVICE_CAPABILITIES_ETHERNET 0x1 + u32 power_dissipated; /* 0x8C */ + u32 power_consumed; /* 0x90 */ + u32 efi_version; /* 0x94 */ + u32 reserved[42]; /* 0x98 */ }; struct nvm_cfg1_path { @@ -4652,26 +4457,8 @@ struct nvm_cfg1_path { }; struct nvm_cfg1_port { - u32 power_dissipated; /* 0x0 */ -#define NVM_CFG1_PORT_POWER_DIS_D0_MASK 0x000000FF -#define NVM_CFG1_PORT_POWER_DIS_D0_OFFSET 0 -#define NVM_CFG1_PORT_POWER_DIS_D1_MASK 0x0000FF00 -#define NVM_CFG1_PORT_POWER_DIS_D1_OFFSET 8 -#define NVM_CFG1_PORT_POWER_DIS_D2_MASK 0x00FF0000 -#define NVM_CFG1_PORT_POWER_DIS_D2_OFFSET 16 -#define NVM_CFG1_PORT_POWER_DIS_D3_MASK 0xFF000000 -#define NVM_CFG1_PORT_POWER_DIS_D3_OFFSET 24 - - u32 power_consumed; /* 0x4 */ -#define NVM_CFG1_PORT_POWER_CONS_D0_MASK 0x000000FF -#define NVM_CFG1_PORT_POWER_CONS_D0_OFFSET 0 -#define NVM_CFG1_PORT_POWER_CONS_D1_MASK 0x0000FF00 -#define NVM_CFG1_PORT_POWER_CONS_D1_OFFSET 8 -#define NVM_CFG1_PORT_POWER_CONS_D2_MASK 0x00FF0000 -#define NVM_CFG1_PORT_POWER_CONS_D2_OFFSET 16 -#define NVM_CFG1_PORT_POWER_CONS_D3_MASK 0xFF000000 -#define NVM_CFG1_PORT_POWER_CONS_D3_OFFSET 24 - + u32 reserved__m_relocated_to_option_123; /* 0x0 */ + u32 reserved__m_relocated_to_option_124; /* 0x4 */ u32 generic_cont0; /* 0x8 */ #define NVM_CFG1_PORT_LED_MODE_MASK 0x000000FF #define NVM_CFG1_PORT_LED_MODE_OFFSET 0 @@ -4699,7 +4486,9 @@ struct nvm_cfg1_port { #define NVM_CFG1_PORT_DCBX_MODE_IEEE 0x1 #define NVM_CFG1_PORT_DCBX_MODE_CEE 0x2 #define NVM_CFG1_PORT_DCBX_MODE_DYNAMIC 0x3 - +#define NVM_CFG1_PORT_DEFAULT_ENABLED_PROTOCOLS_MASK 0x00F00000 +#define NVM_CFG1_PORT_DEFAULT_ENABLED_PROTOCOLS_OFFSET 20 +#define NVM_CFG1_PORT_DEFAULT_ENABLED_PROTOCOLS_ETHERNET 0x1 u32 pcie_cfg; /* 0xC */ #define NVM_CFG1_PORT_RESERVED15_MASK 0x00000007 #define NVM_CFG1_PORT_RESERVED15_OFFSET 0 @@ -4784,10 +4573,11 @@ struct nvm_cfg1_port { #define NVM_CFG1_PORT_SERDES_NET_INTERFACE_SFI 0x9 #define NVM_CFG1_PORT_SERDES_NET_INTERFACE_1000X 0xB #define NVM_CFG1_PORT_SERDES_NET_INTERFACE_SGMII 0xC -#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_XLAUI 0xD -#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_CAUI 0xE -#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_XLPPI 0xF -#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_CPPI 0x10 +#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_XLAUI 0x11 +#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_XLPPI 0x12 +#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_CAUI 0x21 +#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_CPPI 0x22 +#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_25GAUI 0x31 #define NVM_CFG1_PORT_AN_MODE_MASK 0xFF000000 #define NVM_CFG1_PORT_AN_MODE_OFFSET 24 #define NVM_CFG1_PORT_AN_MODE_NONE 0x0 @@ -4801,9 +4591,6 @@ struct nvm_cfg1_port { u32 mgmt_traffic; /* 0x20 */ #define NVM_CFG1_PORT_RESERVED61_MASK 0x0000000F #define NVM_CFG1_PORT_RESERVED61_OFFSET 0 -#define NVM_CFG1_PORT_RESERVED61_DISABLED 0x0 -#define NVM_CFG1_PORT_RESERVED61_NCSI_OVER_RMII 0x1 -#define NVM_CFG1_PORT_RESERVED61_NCSI_OVER_SMBUS 0x2 u32 ext_phy; /* 0x24 */ #define NVM_CFG1_PORT_EXTERNAL_PHY_TYPE_MASK 0x000000FF @@ -4814,16 +4601,12 @@ struct nvm_cfg1_port { #define NVM_CFG1_PORT_EXTERNAL_PHY_ADDRESS_OFFSET 8 u32 mba_cfg1; /* 0x28 */ -#define NVM_CFG1_PORT_MBA_MASK 0x00000001 -#define NVM_CFG1_PORT_MBA_OFFSET 0 -#define NVM_CFG1_PORT_MBA_DISABLED 0x0 -#define NVM_CFG1_PORT_MBA_ENABLED 0x1 -#define NVM_CFG1_PORT_MBA_BOOT_TYPE_MASK 0x00000006 -#define NVM_CFG1_PORT_MBA_BOOT_TYPE_OFFSET 1 -#define NVM_CFG1_PORT_MBA_BOOT_TYPE_AUTO 0x0 -#define NVM_CFG1_PORT_MBA_BOOT_TYPE_BBS 0x1 -#define NVM_CFG1_PORT_MBA_BOOT_TYPE_INT18H 0x2 -#define NVM_CFG1_PORT_MBA_BOOT_TYPE_INT19H 0x3 +#define NVM_CFG1_PORT_PREBOOT_OPROM_MASK 0x00000001 +#define NVM_CFG1_PORT_PREBOOT_OPROM_OFFSET 0 +#define NVM_CFG1_PORT_PREBOOT_OPROM_DISABLED 0x0 +#define NVM_CFG1_PORT_PREBOOT_OPROM_ENABLED 0x1 +#define NVM_CFG1_PORT_RESERVED__M_MBA_BOOT_TYPE_MASK 0x00000006 +#define NVM_CFG1_PORT_RESERVED__M_MBA_BOOT_TYPE_OFFSET 1 #define NVM_CFG1_PORT_MBA_DELAY_TIME_MASK 0x00000078 #define NVM_CFG1_PORT_MBA_DELAY_TIME_OFFSET 3 #define NVM_CFG1_PORT_MBA_SETUP_HOT_KEY_MASK 0x00000080 @@ -4836,61 +4619,30 @@ struct nvm_cfg1_port { #define NVM_CFG1_PORT_MBA_HIDE_SETUP_PROMPT_ENABLED 0x1 #define NVM_CFG1_PORT_RESERVED5_MASK 0x0001FE00 #define NVM_CFG1_PORT_RESERVED5_OFFSET 9 -#define NVM_CFG1_PORT_RESERVED5_DISABLED 0x0 -#define NVM_CFG1_PORT_RESERVED5_2K 0x1 -#define NVM_CFG1_PORT_RESERVED5_4K 0x2 -#define NVM_CFG1_PORT_RESERVED5_8K 0x3 -#define NVM_CFG1_PORT_RESERVED5_16K 0x4 -#define NVM_CFG1_PORT_RESERVED5_32K 0x5 -#define NVM_CFG1_PORT_RESERVED5_64K 0x6 -#define NVM_CFG1_PORT_RESERVED5_128K 0x7 -#define NVM_CFG1_PORT_RESERVED5_256K 0x8 -#define NVM_CFG1_PORT_RESERVED5_512K 0x9 -#define NVM_CFG1_PORT_RESERVED5_1M 0xA -#define NVM_CFG1_PORT_RESERVED5_2M 0xB -#define NVM_CFG1_PORT_RESERVED5_4M 0xC -#define NVM_CFG1_PORT_RESERVED5_8M 0xD -#define NVM_CFG1_PORT_RESERVED5_16M 0xE -#define NVM_CFG1_PORT_RESERVED5_32M 0xF -#define NVM_CFG1_PORT_MBA_LINK_SPEED_MASK 0x001E0000 -#define NVM_CFG1_PORT_MBA_LINK_SPEED_OFFSET 17 -#define NVM_CFG1_PORT_MBA_LINK_SPEED_AUTONEG 0x0 -#define NVM_CFG1_PORT_MBA_LINK_SPEED_1G 0x1 -#define NVM_CFG1_PORT_MBA_LINK_SPEED_10G 0x2 -#define NVM_CFG1_PORT_MBA_LINK_SPEED_25G 0x4 -#define NVM_CFG1_PORT_MBA_LINK_SPEED_40G 0x5 -#define NVM_CFG1_PORT_MBA_LINK_SPEED_50G 0x6 -#define NVM_CFG1_PORT_MBA_LINK_SPEED_100G 0x7 -#define NVM_CFG1_PORT_MBA_BOOT_RETRY_COUNT_MASK 0x00E00000 -#define NVM_CFG1_PORT_MBA_BOOT_RETRY_COUNT_OFFSET 21 +#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_MASK 0x001E0000 +#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_OFFSET 17 +#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_AUTONEG 0x0 +#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_1G 0x1 +#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_10G 0x2 +#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_25G 0x4 +#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_40G 0x5 +#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_50G 0x6 +#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_100G 0x7 +#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_SMARTLINQ 0x8 +#define NVM_CFG1_PORT_RESERVED__M_MBA_BOOT_RETRY_COUNT_MASK 0x00E00000 +#define NVM_CFG1_PORT_RESERVED__M_MBA_BOOT_RETRY_COUNT_OFFSET 21 u32 mba_cfg2; /* 0x2C */ -#define NVM_CFG1_PORT_MBA_VLAN_VALUE_MASK 0x0000FFFF -#define NVM_CFG1_PORT_MBA_VLAN_VALUE_OFFSET 0 -#define NVM_CFG1_PORT_MBA_VLAN_MASK 0x00010000 -#define NVM_CFG1_PORT_MBA_VLAN_OFFSET 16 +#define NVM_CFG1_PORT_RESERVED65_MASK 0x0000FFFF +#define NVM_CFG1_PORT_RESERVED65_OFFSET 0 +#define NVM_CFG1_PORT_RESERVED66_MASK 0x00010000 +#define NVM_CFG1_PORT_RESERVED66_OFFSET 16 u32 vf_cfg; /* 0x30 */ #define NVM_CFG1_PORT_RESERVED8_MASK 0x0000FFFF #define NVM_CFG1_PORT_RESERVED8_OFFSET 0 #define NVM_CFG1_PORT_RESERVED6_MASK 0x000F0000 #define NVM_CFG1_PORT_RESERVED6_OFFSET 16 -#define NVM_CFG1_PORT_RESERVED6_DISABLED 0x0 -#define NVM_CFG1_PORT_RESERVED6_4K 0x1 -#define NVM_CFG1_PORT_RESERVED6_8K 0x2 -#define NVM_CFG1_PORT_RESERVED6_16K 0x3 -#define NVM_CFG1_PORT_RESERVED6_32K 0x4 -#define NVM_CFG1_PORT_RESERVED6_64K 0x5 -#define NVM_CFG1_PORT_RESERVED6_128K 0x6 -#define NVM_CFG1_PORT_RESERVED6_256K 0x7 -#define NVM_CFG1_PORT_RESERVED6_512K 0x8 -#define NVM_CFG1_PORT_RESERVED6_1M 0x9 -#define NVM_CFG1_PORT_RESERVED6_2M 0xA -#define NVM_CFG1_PORT_RESERVED6_4M 0xB -#define NVM_CFG1_PORT_RESERVED6_8M 0xC -#define NVM_CFG1_PORT_RESERVED6_16M 0xD -#define NVM_CFG1_PORT_RESERVED6_32M 0xE -#define NVM_CFG1_PORT_RESERVED6_64M 0xF struct nvm_cfg_mac_address lldp_mac_address; /* 0x34 */ @@ -4973,18 +4725,16 @@ struct nvm_cfg1_func { u32 device_id; /* 0x10 */ #define NVM_CFG1_FUNC_MF_VENDOR_DEVICE_ID_MASK 0x0000FFFF #define NVM_CFG1_FUNC_MF_VENDOR_DEVICE_ID_OFFSET 0 -#define NVM_CFG1_FUNC_VENDOR_DEVICE_ID_MASK 0xFFFF0000 -#define NVM_CFG1_FUNC_VENDOR_DEVICE_ID_OFFSET 16 +#define NVM_CFG1_FUNC_RESERVED77_MASK 0xFFFF0000 +#define NVM_CFG1_FUNC_RESERVED77_OFFSET 16 u32 cmn_cfg; /* 0x14 */ -#define NVM_CFG1_FUNC_MBA_BOOT_PROTOCOL_MASK 0x00000007 -#define NVM_CFG1_FUNC_MBA_BOOT_PROTOCOL_OFFSET 0 -#define NVM_CFG1_FUNC_MBA_BOOT_PROTOCOL_PXE 0x0 -#define NVM_CFG1_FUNC_MBA_BOOT_PROTOCOL_RPL 0x1 -#define NVM_CFG1_FUNC_MBA_BOOT_PROTOCOL_BOOTP 0x2 -#define NVM_CFG1_FUNC_MBA_BOOT_PROTOCOL_ISCSI_BOOT 0x3 -#define NVM_CFG1_FUNC_MBA_BOOT_PROTOCOL_FCOE_BOOT 0x4 -#define NVM_CFG1_FUNC_MBA_BOOT_PROTOCOL_NONE 0x7 +#define NVM_CFG1_FUNC_PREBOOT_BOOT_PROTOCOL_MASK 0x00000007 +#define NVM_CFG1_FUNC_PREBOOT_BOOT_PROTOCOL_OFFSET 0 +#define NVM_CFG1_FUNC_PREBOOT_BOOT_PROTOCOL_PXE 0x0 +#define NVM_CFG1_FUNC_PREBOOT_BOOT_PROTOCOL_ISCSI_BOOT 0x3 +#define NVM_CFG1_FUNC_PREBOOT_BOOT_PROTOCOL_FCOE_BOOT 0x4 +#define NVM_CFG1_FUNC_PREBOOT_BOOT_PROTOCOL_NONE 0x7 #define NVM_CFG1_FUNC_VF_PCI_DEVICE_ID_MASK 0x0007FFF8 #define NVM_CFG1_FUNC_VF_PCI_DEVICE_ID_OFFSET 3 #define NVM_CFG1_FUNC_PERSONALITY_MASK 0x00780000 @@ -5029,8 +4779,8 @@ struct nvm_cfg1_func { struct nvm_cfg_mac_address fcoe_node_wwn_mac_addr; /* 0x1C */ struct nvm_cfg_mac_address fcoe_port_wwn_mac_addr; /* 0x24 */ - - u32 reserved[9]; /* 0x2C */ + u32 preboot_generic_cfg; /* 0x2C */ + u32 reserved[8]; /* 0x30 */ }; struct nvm_cfg1 { diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c index ffa99273b353..a95a3e4b3101 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hw.c +++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c @@ -44,7 +44,7 @@ struct qed_ptt_pool { int qed_ptt_pool_alloc(struct qed_hwfn *p_hwfn) { struct qed_ptt_pool *p_pool = kmalloc(sizeof(*p_pool), - GFP_ATOMIC); + GFP_KERNEL); int i; if (!p_pool) diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c index 0b21a553cc7d..f55ebdc3c832 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c +++ b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c @@ -513,17 +513,14 @@ static int qed_pf_rl_rt_init(struct qed_hwfn *p_hwfn, * Return -1 on error. */ static int qed_vp_wfq_rt_init(struct qed_hwfn *p_hwfn, - u8 start_vport, u8 num_vports, struct init_qm_vport_params *vport_params) { - u8 tc, i, vport_id; u32 inc_val; + u8 tc, i; /* go over all PF VPORTs */ - for (i = 0, vport_id = start_vport; i < num_vports; i++, vport_id++) { - u32 temp = QM_REG_WFQVPUPPERBOUND_RT_OFFSET; - u16 *pq_ids = &vport_params[i].first_tx_pq_id[0]; + for (i = 0; i < num_vports; i++) { if (!vport_params[i].vport_wfq) continue; @@ -539,20 +536,16 @@ static int qed_vp_wfq_rt_init(struct qed_hwfn *p_hwfn, * different TCs */ for (tc = 0; tc < NUM_OF_TCS; tc++) { - u16 vport_pq_id = pq_ids[tc]; + u16 vport_pq_id = vport_params[i].first_tx_pq_id[tc]; if (vport_pq_id != QM_INVALID_PQ_ID) { STORE_RT_REG(p_hwfn, - QM_REG_WFQVPWEIGHT_RT_OFFSET + - vport_pq_id, inc_val); - STORE_RT_REG(p_hwfn, temp + vport_pq_id, - QM_WFQ_UPPER_BOUND | - QM_WFQ_CRD_REG_SIGN_BIT); - STORE_RT_REG(p_hwfn, QM_REG_WFQVPCRD_RT_OFFSET + vport_pq_id, - QM_WFQ_INIT_CRD(inc_val) | QM_WFQ_CRD_REG_SIGN_BIT); + STORE_RT_REG(p_hwfn, + QM_REG_WFQVPWEIGHT_RT_OFFSET + + vport_pq_id, inc_val); } } } @@ -709,8 +702,7 @@ int qed_qm_pf_rt_init(struct qed_hwfn *p_hwfn, if (qed_pf_rl_rt_init(p_hwfn, p_params->pf_id, p_params->pf_rl)) return -1; - if (qed_vp_wfq_rt_init(p_hwfn, p_params->start_vport, - p_params->num_vports, vport_params)) + if (qed_vp_wfq_rt_init(p_hwfn, p_params->num_vports, vport_params)) return -1; if (qed_vport_rl_rt_init(p_hwfn, p_params->start_vport, diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c index 796f1390e598..3269b3610e03 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c +++ b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c @@ -55,63 +55,98 @@ void qed_init_clear_rt_data(struct qed_hwfn *p_hwfn) int i; for (i = 0; i < RUNTIME_ARRAY_SIZE; i++) - p_hwfn->rt_data[i].b_valid = false; + p_hwfn->rt_data.b_valid[i] = false; } void qed_init_store_rt_reg(struct qed_hwfn *p_hwfn, u32 rt_offset, u32 val) { - p_hwfn->rt_data[rt_offset].init_val = val; - p_hwfn->rt_data[rt_offset].b_valid = true; + p_hwfn->rt_data.init_val[rt_offset] = val; + p_hwfn->rt_data.b_valid[rt_offset] = true; } void qed_init_store_rt_agg(struct qed_hwfn *p_hwfn, - u32 rt_offset, - u32 *val, + u32 rt_offset, u32 *p_val, size_t size) { size_t i; for (i = 0; i < size / sizeof(u32); i++) { - p_hwfn->rt_data[rt_offset + i].init_val = val[i]; - p_hwfn->rt_data[rt_offset + i].b_valid = true; + p_hwfn->rt_data.init_val[rt_offset + i] = p_val[i]; + p_hwfn->rt_data.b_valid[rt_offset + i] = true; } } -static void qed_init_rt(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u32 addr, - u32 rt_offset, - u32 size) +static int qed_init_rt(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 addr, + u16 rt_offset, + u16 size, + bool b_must_dmae) { - struct qed_rt_data *rt_data = p_hwfn->rt_data + rt_offset; - u32 i; + u32 *p_init_val = &p_hwfn->rt_data.init_val[rt_offset]; + bool *p_valid = &p_hwfn->rt_data.b_valid[rt_offset]; + u16 i, segment; + int rc = 0; + /* Since not all RT entries are initialized, go over the RT and + * for each segment of initialized values use DMA. + */ for (i = 0; i < size; i++) { - if (!rt_data[i].b_valid) + if (!p_valid[i]) continue; - qed_wr(p_hwfn, p_ptt, addr + (i << 2), rt_data[i].init_val); + + /* In case there isn't any wide-bus configuration here, + * simply write the data instead of using dmae. + */ + if (!b_must_dmae) { + qed_wr(p_hwfn, p_ptt, addr + (i << 2), + p_init_val[i]); + continue; + } + + /* Start of a new segment */ + for (segment = 1; i + segment < size; segment++) + if (!p_valid[i + segment]) + break; + + rc = qed_dmae_host2grc(p_hwfn, p_ptt, + (uintptr_t)(p_init_val + i), + addr + (i << 2), segment, 0); + if (rc != 0) + return rc; + + /* Jump over the entire segment, including invalid entry */ + i += segment; } + + return rc; } int qed_init_alloc(struct qed_hwfn *p_hwfn) { - struct qed_rt_data *rt_data; + struct qed_rt_data *rt_data = &p_hwfn->rt_data; - rt_data = kzalloc(sizeof(*rt_data) * RUNTIME_ARRAY_SIZE, GFP_ATOMIC); - if (!rt_data) + rt_data->b_valid = kzalloc(sizeof(bool) * RUNTIME_ARRAY_SIZE, + GFP_KERNEL); + if (!rt_data->b_valid) return -ENOMEM; - p_hwfn->rt_data = rt_data; + rt_data->init_val = kzalloc(sizeof(u32) * RUNTIME_ARRAY_SIZE, + GFP_KERNEL); + if (!rt_data->init_val) { + kfree(rt_data->b_valid); + return -ENOMEM; + } return 0; } void qed_init_free(struct qed_hwfn *p_hwfn) { - kfree(p_hwfn->rt_data); - p_hwfn->rt_data = NULL; + kfree(p_hwfn->rt_data.init_val); + kfree(p_hwfn->rt_data.b_valid); } static int qed_init_array_dmae(struct qed_hwfn *p_hwfn, @@ -289,7 +324,8 @@ static int qed_init_cmd_wr(struct qed_hwfn *p_hwfn, case INIT_SRC_RUNTIME: qed_init_rt(p_hwfn, p_ptt, addr, le16_to_cpu(arg->runtime.offset), - le16_to_cpu(arg->runtime.size)); + le16_to_cpu(arg->runtime.size), + b_must_dmae); break; } @@ -316,49 +352,50 @@ static void qed_init_cmd_rd(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, struct init_read_op *cmd) { - u32 data = le32_to_cpu(cmd->op_data); - u32 addr = GET_FIELD(data, INIT_READ_OP_ADDRESS) << 2; + bool (*comp_check)(u32 val, u32 expected_val); + u32 delay = QED_INIT_POLL_PERIOD_US, val; + u32 data, addr, poll; + int i; + + data = le32_to_cpu(cmd->op_data); + addr = GET_FIELD(data, INIT_READ_OP_ADDRESS) << 2; + poll = GET_FIELD(data, INIT_READ_OP_POLL_TYPE); - bool (*comp_check)(u32 val, - u32 expected_val); - u32 delay = QED_INIT_POLL_PERIOD_US, val; val = qed_rd(p_hwfn, p_ptt, addr); - data = le32_to_cpu(cmd->op_data); - if (GET_FIELD(data, INIT_READ_OP_POLL)) { - int i; + if (poll == INIT_POLL_NONE) + return; - switch (GET_FIELD(data, INIT_READ_OP_POLL_COMP)) { - case INIT_COMPARISON_EQ: - comp_check = comp_eq; - break; - case INIT_COMPARISON_OR: - comp_check = comp_or; - break; - case INIT_COMPARISON_AND: - comp_check = comp_and; - break; - default: - comp_check = NULL; - DP_ERR(p_hwfn, "Invalid poll comparison type %08x\n", - data); - return; - } + switch (poll) { + case INIT_POLL_EQ: + comp_check = comp_eq; + break; + case INIT_POLL_OR: + comp_check = comp_or; + break; + case INIT_POLL_AND: + comp_check = comp_and; + break; + default: + DP_ERR(p_hwfn, "Invalid poll comparison type %08x\n", + cmd->op_data); + return; + } - for (i = 0; - i < QED_INIT_MAX_POLL_COUNT && - !comp_check(val, le32_to_cpu(cmd->expected_val)); - i++) { - udelay(delay); - val = qed_rd(p_hwfn, p_ptt, addr); - } + data = le32_to_cpu(cmd->expected_val); + for (i = 0; + i < QED_INIT_MAX_POLL_COUNT && !comp_check(val, data); + i++) { + udelay(delay); + val = qed_rd(p_hwfn, p_ptt, addr); + } - if (i == QED_INIT_MAX_POLL_COUNT) - DP_ERR(p_hwfn, - "Timeout when polling reg: 0x%08x [ Waiting-for: %08x Got: %08x (comparsion %08x)]\n", - addr, le32_to_cpu(cmd->expected_val), - val, data); + if (i == QED_INIT_MAX_POLL_COUNT) { + DP_ERR(p_hwfn, + "Timeout when polling reg: 0x%08x [ Waiting-for: %08x Got: %08x (comparsion %08x)]\n", + addr, le32_to_cpu(cmd->expected_val), + val, le32_to_cpu(cmd->op_data)); } } diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index 9cc9d62c1fec..fa73daa94655 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -399,7 +399,7 @@ static int qed_int_sb_attn_alloc(struct qed_hwfn *p_hwfn, dma_addr_t p_phys = 0; /* SB struct */ - p_sb = kmalloc(sizeof(*p_sb), GFP_ATOMIC); + p_sb = kmalloc(sizeof(*p_sb), GFP_KERNEL); if (!p_sb) { DP_NOTICE(cdev, "Failed to allocate `struct qed_sb_attn_info'\n"); return -ENOMEM; @@ -473,20 +473,20 @@ void qed_int_cau_conf_sb(struct qed_hwfn *p_hwfn, u8 vf_valid) { struct cau_sb_entry sb_entry; - u32 val; qed_init_cau_sb_entry(p_hwfn, &sb_entry, p_hwfn->rel_pf_id, vf_number, vf_valid); if (p_hwfn->hw_init_done) { - val = CAU_REG_SB_ADDR_MEMORY + igu_sb_id * sizeof(u64); - qed_wr(p_hwfn, p_ptt, val, lower_32_bits(sb_phys)); - qed_wr(p_hwfn, p_ptt, val + sizeof(u32), - upper_32_bits(sb_phys)); - - val = CAU_REG_SB_VAR_MEMORY + igu_sb_id * sizeof(u64); - qed_wr(p_hwfn, p_ptt, val, sb_entry.data); - qed_wr(p_hwfn, p_ptt, val + sizeof(u32), sb_entry.params); + /* Wide-bus, initialize via DMAE */ + u64 phys_addr = (u64)sb_phys; + + qed_dmae_host2grc(p_hwfn, p_ptt, (u64)(uintptr_t)&phys_addr, + CAU_REG_SB_ADDR_MEMORY + + igu_sb_id * sizeof(u64), 2, 0); + qed_dmae_host2grc(p_hwfn, p_ptt, (u64)(uintptr_t)&sb_entry, + CAU_REG_SB_VAR_MEMORY + + igu_sb_id * sizeof(u64), 2, 0); } else { /* Initialize Status Block Address */ STORE_RT_REG_AGG(p_hwfn, @@ -666,7 +666,7 @@ static int qed_int_sp_sb_alloc(struct qed_hwfn *p_hwfn, void *p_virt; /* SB struct */ - p_sb = kmalloc(sizeof(*p_sb), GFP_ATOMIC); + p_sb = kmalloc(sizeof(*p_sb), GFP_KERNEL); if (!p_sb) { DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_sb_info'\n"); return -ENOMEM; @@ -946,7 +946,7 @@ int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn, u16 sb_id; u16 prev_sb_id = 0xFF; - p_hwfn->hw_info.p_igu_info = kzalloc(sizeof(*p_igu_info), GFP_ATOMIC); + p_hwfn->hw_info.p_igu_info = kzalloc(sizeof(*p_igu_info), GFP_KERNEL); if (!p_hwfn->hw_info.p_igu_info) return -ENOMEM; @@ -1072,7 +1072,7 @@ static void qed_int_sp_dpc_setup(struct qed_hwfn *p_hwfn) static int qed_int_sp_dpc_alloc(struct qed_hwfn *p_hwfn) { - p_hwfn->sp_dpc = kmalloc(sizeof(*p_hwfn->sp_dpc), GFP_ATOMIC); + p_hwfn->sp_dpc = kmalloc(sizeof(*p_hwfn->sp_dpc), GFP_KERNEL); if (!p_hwfn->sp_dpc) return -ENOMEM; diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index f72036a2ef5b..bba59c51f72c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -124,6 +124,8 @@ struct qed_sp_vport_update_params { u8 update_vport_active_tx_flg; u8 vport_active_tx_flg; u8 update_approx_mcast_flg; + u8 update_accept_any_vlan_flg; + u8 accept_any_vlan; unsigned long bins[8]; struct qed_rss_params *rss_params; struct qed_filter_accept_flags accept_flags; @@ -140,9 +142,9 @@ static int qed_sp_vport_start(struct qed_hwfn *p_hwfn, u8 drop_ttl0_flg, u8 inner_vlan_removal_en_flg) { - struct qed_sp_init_request_params params; struct vport_start_ramrod_data *p_ramrod = NULL; struct qed_spq_entry *p_ent = NULL; + struct qed_sp_init_data init_data; int rc = -EINVAL; u16 rx_mode = 0; u8 abs_vport_id = 0; @@ -151,16 +153,14 @@ static int qed_sp_vport_start(struct qed_hwfn *p_hwfn, if (rc != 0) return rc; - memset(¶ms, 0, sizeof(params)); - params.ramrod_data_size = sizeof(*p_ramrod); - params.comp_mode = QED_SPQ_MODE_EBLOCK; + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qed_spq_get_cid(p_hwfn); + init_data.opaque_fid = opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; rc = qed_sp_init_request(p_hwfn, &p_ent, - qed_spq_get_cid(p_hwfn), - opaque_fid, ETH_RAMROD_VPORT_START, - PROTOCOLID_ETH, - ¶ms); + PROTOCOLID_ETH, &init_data); if (rc) return rc; @@ -360,7 +360,7 @@ qed_sp_vport_update(struct qed_hwfn *p_hwfn, { struct qed_rss_params *p_rss_params = p_params->rss_params; struct vport_update_ramrod_data_cmn *p_cmn; - struct qed_sp_init_request_params sp_params; + struct qed_sp_init_data init_data; struct vport_update_ramrod_data *p_ramrod = NULL; struct qed_spq_entry *p_ent = NULL; u8 abs_vport_id = 0; @@ -370,17 +370,15 @@ qed_sp_vport_update(struct qed_hwfn *p_hwfn, if (rc != 0) return rc; - memset(&sp_params, 0, sizeof(sp_params)); - sp_params.ramrod_data_size = sizeof(*p_ramrod); - sp_params.comp_mode = comp_mode; - sp_params.p_comp_data = p_comp_data; + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qed_spq_get_cid(p_hwfn); + init_data.opaque_fid = p_params->opaque_fid; + init_data.comp_mode = comp_mode; + init_data.p_comp_data = p_comp_data; rc = qed_sp_init_request(p_hwfn, &p_ent, - qed_spq_get_cid(p_hwfn), - p_params->opaque_fid, ETH_RAMROD_VPORT_UPDATE, - PROTOCOLID_ETH, - &sp_params); + PROTOCOLID_ETH, &init_data); if (rc) return rc; @@ -393,7 +391,9 @@ qed_sp_vport_update(struct qed_hwfn *p_hwfn, p_cmn->update_rx_active_flg = p_params->update_vport_active_rx_flg; p_cmn->tx_active_flg = p_params->vport_active_tx_flg; p_cmn->update_tx_active_flg = p_params->update_vport_active_tx_flg; - + p_cmn->accept_any_vlan = p_params->accept_any_vlan; + p_cmn->update_accept_any_vlan_flg = + p_params->update_accept_any_vlan_flg; rc = qed_sp_vport_update_rss(p_hwfn, p_ramrod, p_rss_params); if (rc) { /* Return spq entry which is taken in qed_sp_init_request()*/ @@ -412,8 +412,8 @@ static int qed_sp_vport_stop(struct qed_hwfn *p_hwfn, u16 opaque_fid, u8 vport_id) { - struct qed_sp_init_request_params sp_params; struct vport_stop_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; struct qed_spq_entry *p_ent; u8 abs_vport_id = 0; int rc; @@ -422,16 +422,14 @@ static int qed_sp_vport_stop(struct qed_hwfn *p_hwfn, if (rc != 0) return rc; - memset(&sp_params, 0, sizeof(sp_params)); - sp_params.ramrod_data_size = sizeof(*p_ramrod); - sp_params.comp_mode = QED_SPQ_MODE_EBLOCK; + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qed_spq_get_cid(p_hwfn); + init_data.opaque_fid = opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; rc = qed_sp_init_request(p_hwfn, &p_ent, - qed_spq_get_cid(p_hwfn), - opaque_fid, ETH_RAMROD_VPORT_STOP, - PROTOCOLID_ETH, - &sp_params); + PROTOCOLID_ETH, &init_data); if (rc) return rc; @@ -444,8 +442,10 @@ static int qed_sp_vport_stop(struct qed_hwfn *p_hwfn, static int qed_filter_accept_cmd(struct qed_dev *cdev, u8 vport, struct qed_filter_accept_flags accept_flags, - enum spq_mode comp_mode, - struct qed_spq_comp_cb *p_comp_data) + u8 update_accept_any_vlan, + u8 accept_any_vlan, + enum spq_mode comp_mode, + struct qed_spq_comp_cb *p_comp_data) { struct qed_sp_vport_update_params vport_update_params; int i, rc; @@ -454,6 +454,8 @@ static int qed_filter_accept_cmd(struct qed_dev *cdev, memset(&vport_update_params, 0, sizeof(vport_update_params)); vport_update_params.vport_id = vport; vport_update_params.accept_flags = accept_flags; + vport_update_params.update_accept_any_vlan_flg = update_accept_any_vlan; + vport_update_params.accept_any_vlan = accept_any_vlan; for_each_hwfn(cdev, i) { struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; @@ -471,6 +473,10 @@ static int qed_filter_accept_cmd(struct qed_dev *cdev, "Accept filter configured, flags = [Rx]%x [Tx]%x\n", accept_flags.rx_accept_filter, accept_flags.tx_accept_filter); + if (update_accept_any_vlan) + DP_VERBOSE(p_hwfn, QED_MSG_SP, + "accept_any_vlan=%d configured\n", + accept_any_vlan); } return 0; @@ -502,8 +508,8 @@ qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn, u16 cqe_pbl_size) { struct rx_queue_start_ramrod_data *p_ramrod = NULL; - struct qed_sp_init_request_params sp_params; struct qed_spq_entry *p_ent = NULL; + struct qed_sp_init_data init_data; struct qed_hw_cid_data *p_rx_cid; u16 abs_rx_q_id = 0; u8 abs_vport_id = 0; @@ -528,15 +534,15 @@ qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn, opaque_fid, cid, params->queue_id, params->vport_id, params->sb); - memset(&sp_params, 0, sizeof(params)); - sp_params.comp_mode = QED_SPQ_MODE_EBLOCK; - sp_params.ramrod_data_size = sizeof(*p_ramrod); + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = cid; + init_data.opaque_fid = opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; rc = qed_sp_init_request(p_hwfn, &p_ent, - cid, opaque_fid, ETH_RAMROD_RX_QUEUE_START, - PROTOCOLID_ETH, - &sp_params); + PROTOCOLID_ETH, &init_data); if (rc) return rc; @@ -551,12 +557,10 @@ qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn, p_ramrod->complete_event_flg = 1; p_ramrod->bd_max_bytes = cpu_to_le16(bd_max_bytes); - p_ramrod->bd_base.hi = DMA_HI_LE(bd_chain_phys_addr); - p_ramrod->bd_base.lo = DMA_LO_LE(bd_chain_phys_addr); + DMA_REGPAIR_LE(p_ramrod->bd_base, bd_chain_phys_addr); p_ramrod->num_of_pbl_pages = cpu_to_le16(cqe_pbl_size); - p_ramrod->cqe_pbl_addr.hi = DMA_HI_LE(cqe_pbl_addr); - p_ramrod->cqe_pbl_addr.lo = DMA_LO_LE(cqe_pbl_addr); + DMA_REGPAIR_LE(p_ramrod->cqe_pbl_addr, cqe_pbl_addr); rc = qed_spq_post(p_hwfn, p_ent, NULL); @@ -628,21 +632,20 @@ static int qed_sp_eth_rx_queue_stop(struct qed_hwfn *p_hwfn, { struct qed_hw_cid_data *p_rx_cid = &p_hwfn->p_rx_cids[rx_queue_id]; struct rx_queue_stop_ramrod_data *p_ramrod = NULL; - struct qed_sp_init_request_params sp_params; struct qed_spq_entry *p_ent = NULL; + struct qed_sp_init_data init_data; u16 abs_rx_q_id = 0; int rc = -EINVAL; - memset(&sp_params, 0, sizeof(sp_params)); - sp_params.ramrod_data_size = sizeof(*p_ramrod); - sp_params.comp_mode = QED_SPQ_MODE_EBLOCK; + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = p_rx_cid->cid; + init_data.opaque_fid = p_rx_cid->opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; rc = qed_sp_init_request(p_hwfn, &p_ent, - p_rx_cid->cid, - p_rx_cid->opaque_fid, ETH_RAMROD_RX_QUEUE_STOP, - PROTOCOLID_ETH, - &sp_params); + PROTOCOLID_ETH, &init_data); if (rc) return rc; @@ -680,8 +683,8 @@ qed_sp_eth_txq_start_ramrod(struct qed_hwfn *p_hwfn, union qed_qm_pq_params *p_pq_params) { struct tx_queue_start_ramrod_data *p_ramrod = NULL; - struct qed_sp_init_request_params sp_params; struct qed_spq_entry *p_ent = NULL; + struct qed_sp_init_data init_data; struct qed_hw_cid_data *p_tx_cid; u8 abs_vport_id; int rc = -EINVAL; @@ -696,15 +699,15 @@ qed_sp_eth_txq_start_ramrod(struct qed_hwfn *p_hwfn, if (rc) return rc; - memset(&sp_params, 0, sizeof(sp_params)); - sp_params.ramrod_data_size = sizeof(*p_ramrod); - sp_params.comp_mode = QED_SPQ_MODE_EBLOCK; + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = cid; + init_data.opaque_fid = opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; - rc = qed_sp_init_request(p_hwfn, &p_ent, cid, - opaque_fid, + rc = qed_sp_init_request(p_hwfn, &p_ent, ETH_RAMROD_TX_QUEUE_START, - PROTOCOLID_ETH, - &sp_params); + PROTOCOLID_ETH, &init_data); if (rc) return rc; @@ -714,11 +717,9 @@ qed_sp_eth_txq_start_ramrod(struct qed_hwfn *p_hwfn, p_ramrod->sb_id = cpu_to_le16(p_params->sb); p_ramrod->sb_index = p_params->sb_idx; p_ramrod->stats_counter_id = stats_id; - p_ramrod->tc = p_pq_params->eth.tc; p_ramrod->pbl_size = cpu_to_le16(pbl_size); - p_ramrod->pbl_base_addr.hi = DMA_HI_LE(pbl_addr); - p_ramrod->pbl_base_addr.lo = DMA_LO_LE(pbl_addr); + DMA_REGPAIR_LE(p_ramrod->pbl_base_addr, pbl_addr); pq_id = qed_get_qm_pq(p_hwfn, PROTOCOLID_ETH, @@ -785,20 +786,19 @@ static int qed_sp_eth_tx_queue_stop(struct qed_hwfn *p_hwfn, u16 tx_queue_id) { struct qed_hw_cid_data *p_tx_cid = &p_hwfn->p_tx_cids[tx_queue_id]; - struct qed_sp_init_request_params sp_params; struct qed_spq_entry *p_ent = NULL; + struct qed_sp_init_data init_data; int rc = -EINVAL; - memset(&sp_params, 0, sizeof(sp_params)); - sp_params.ramrod_data_size = sizeof(struct tx_queue_stop_ramrod_data); - sp_params.comp_mode = QED_SPQ_MODE_EBLOCK; + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = p_tx_cid->cid; + init_data.opaque_fid = p_tx_cid->opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; rc = qed_sp_init_request(p_hwfn, &p_ent, - p_tx_cid->cid, - p_tx_cid->opaque_fid, ETH_RAMROD_TX_QUEUE_STOP, - PROTOCOLID_ETH, - &sp_params); + PROTOCOLID_ETH, &init_data); if (rc) return rc; @@ -821,9 +821,8 @@ qed_filter_action(enum qed_filter_opcode opcode) case QED_FILTER_REMOVE: action = ETH_FILTER_ACTION_REMOVE; break; - case QED_FILTER_REPLACE: case QED_FILTER_FLUSH: - action = ETH_FILTER_ACTION_REPLACE; + action = ETH_FILTER_ACTION_REMOVE_ALL; break; default: action = MAX_ETH_FILTER_ACTION; @@ -856,9 +855,9 @@ qed_filter_ucast_common(struct qed_hwfn *p_hwfn, { u8 vport_to_add_to = 0, vport_to_remove_from = 0; struct vport_filter_update_ramrod_data *p_ramrod; - struct qed_sp_init_request_params sp_params; struct eth_filter_cmd *p_first_filter; struct eth_filter_cmd *p_second_filter; + struct qed_sp_init_data init_data; enum eth_filter_action action; int rc; @@ -872,17 +871,16 @@ qed_filter_ucast_common(struct qed_hwfn *p_hwfn, if (rc) return rc; - memset(&sp_params, 0, sizeof(sp_params)); - sp_params.ramrod_data_size = sizeof(**pp_ramrod); - sp_params.comp_mode = comp_mode; - sp_params.p_comp_data = p_comp_data; + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qed_spq_get_cid(p_hwfn); + init_data.opaque_fid = opaque_fid; + init_data.comp_mode = comp_mode; + init_data.p_comp_data = p_comp_data; rc = qed_sp_init_request(p_hwfn, pp_ent, - qed_spq_get_cid(p_hwfn), - opaque_fid, ETH_RAMROD_FILTERS_UPDATE, - PROTOCOLID_ETH, - &sp_params); + PROTOCOLID_ETH, &init_data); if (rc) return rc; @@ -892,8 +890,7 @@ qed_filter_ucast_common(struct qed_hwfn *p_hwfn, p_ramrod->filter_cmd_hdr.tx = p_filter_cmd->is_tx_filter ? 1 : 0; switch (p_filter_cmd->opcode) { - case QED_FILTER_FLUSH: - p_ramrod->filter_cmd_hdr.cmd_cnt = 0; break; + case QED_FILTER_REPLACE: case QED_FILTER_MOVE: p_ramrod->filter_cmd_hdr.cmd_cnt = 2; break; default: @@ -962,6 +959,12 @@ qed_filter_ucast_common(struct qed_hwfn *p_hwfn, p_second_filter->action = ETH_FILTER_ACTION_ADD; p_second_filter->vport_id = vport_to_add_to; + } else if (p_filter_cmd->opcode == QED_FILTER_REPLACE) { + p_first_filter->vport_id = vport_to_add_to; + memcpy(p_second_filter, p_first_filter, + sizeof(*p_second_filter)); + p_first_filter->action = ETH_FILTER_ACTION_REMOVE_ALL; + p_second_filter->action = ETH_FILTER_ACTION_ADD; } else { action = qed_filter_action(p_filter_cmd->opcode); @@ -1101,8 +1104,8 @@ qed_sp_eth_filter_mcast(struct qed_hwfn *p_hwfn, { unsigned long bins[ETH_MULTICAST_MAC_BINS_IN_REGS]; struct vport_update_ramrod_data *p_ramrod = NULL; - struct qed_sp_init_request_params sp_params; struct qed_spq_entry *p_ent = NULL; + struct qed_sp_init_data init_data; u8 abs_vport_id = 0; int rc, i; @@ -1118,18 +1121,16 @@ qed_sp_eth_filter_mcast(struct qed_hwfn *p_hwfn, return rc; } - memset(&sp_params, 0, sizeof(sp_params)); - sp_params.ramrod_data_size = sizeof(*p_ramrod); - sp_params.comp_mode = comp_mode; - sp_params.p_comp_data = p_comp_data; + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qed_spq_get_cid(p_hwfn); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = comp_mode; + init_data.p_comp_data = p_comp_data; rc = qed_sp_init_request(p_hwfn, &p_ent, - qed_spq_get_cid(p_hwfn), - p_hwfn->hw_info.opaque_fid, ETH_RAMROD_VPORT_UPDATE, - PROTOCOLID_ETH, - &sp_params); - + PROTOCOLID_ETH, &init_data); if (rc) { DP_ERR(p_hwfn, "Multi-cast command failed %d\n", rc); return rc; @@ -1344,6 +1345,9 @@ static int qed_update_vport(struct qed_dev *cdev, params->update_vport_active_flg; sp_params.vport_active_rx_flg = params->vport_active_flg; sp_params.vport_active_tx_flg = params->vport_active_flg; + sp_params.accept_any_vlan = params->accept_any_vlan; + sp_params.update_accept_any_vlan_flg = + params->update_accept_any_vlan_flg; /* RSS - is a bit tricky, since upper-layer isn't familiar with hwfns. * We need to re-fix the rss values per engine for CMT. @@ -1563,7 +1567,7 @@ static int qed_configure_filter_rx_mode(struct qed_dev *cdev, else if (type == QED_FILTER_RX_MODE_TYPE_MULTI_PROMISC) accept_flags.rx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED; - return qed_filter_accept_cmd(cdev, 0, accept_flags, + return qed_filter_accept_cmd(cdev, 0, accept_flags, false, false, QED_SPQ_MODE_CB, NULL); } diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 9d76ce249277..593f8871adb6 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -190,7 +190,7 @@ int qed_fill_dev_info(struct qed_dev *cdev, dev_info->pci_mem_start = cdev->pci_params.mem_start; dev_info->pci_mem_end = cdev->pci_params.mem_end; dev_info->pci_irq = cdev->pci_params.irq; - dev_info->is_mf = IS_MF(&cdev->hwfns[0]); + dev_info->is_mf_default = IS_MF_DEFAULT(&cdev->hwfns[0]); ether_addr_copy(dev_info->hw_mac, cdev->hwfns[0].hw_info.hw_mac_addr); dev_info->fw_major = FW_MAJOR_VERSION; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index ba1b1f1ef789..f23ce734ab63 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -147,7 +147,7 @@ int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn, u32 size; /* Allocate mcp_info structure */ - p_hwfn->mcp_info = kzalloc(sizeof(*p_hwfn->mcp_info), GFP_ATOMIC); + p_hwfn->mcp_info = kzalloc(sizeof(*p_hwfn->mcp_info), GFP_KERNEL); if (!p_hwfn->mcp_info) goto err; p_info = p_hwfn->mcp_info; @@ -161,10 +161,10 @@ int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn, } size = MFW_DRV_MSG_MAX_DWORDS(p_info->mfw_mb_length) * sizeof(u32); - p_info->mfw_mb_cur = kzalloc(size, GFP_ATOMIC); + p_info->mfw_mb_cur = kzalloc(size, GFP_KERNEL); p_info->mfw_mb_shadow = kzalloc(sizeof(u32) * MFW_DRV_MSG_MAX_DWORDS( - p_info->mfw_mb_length), GFP_ATOMIC); + p_info->mfw_mb_length), GFP_KERNEL); if (!p_info->mfw_mb_shadow || !p_info->mfw_mb_addr) goto err; @@ -720,26 +720,25 @@ int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn, return -EINVAL; } - if (p_hwfn->cdev->mf_mode != SF) { - info->bandwidth_min = (shmem_info.config & - FUNC_MF_CFG_MIN_BW_MASK) >> - FUNC_MF_CFG_MIN_BW_SHIFT; - if (info->bandwidth_min < 1 || info->bandwidth_min > 100) { - DP_INFO(p_hwfn, - "bandwidth minimum out of bounds [%02x]. Set to 1\n", - info->bandwidth_min); - info->bandwidth_min = 1; - } - info->bandwidth_max = (shmem_info.config & - FUNC_MF_CFG_MAX_BW_MASK) >> - FUNC_MF_CFG_MAX_BW_SHIFT; - if (info->bandwidth_max < 1 || info->bandwidth_max > 100) { - DP_INFO(p_hwfn, - "bandwidth maximum out of bounds [%02x]. Set to 100\n", - info->bandwidth_max); - info->bandwidth_max = 100; - } + info->bandwidth_min = (shmem_info.config & + FUNC_MF_CFG_MIN_BW_MASK) >> + FUNC_MF_CFG_MIN_BW_SHIFT; + if (info->bandwidth_min < 1 || info->bandwidth_min > 100) { + DP_INFO(p_hwfn, + "bandwidth minimum out of bounds [%02x]. Set to 1\n", + info->bandwidth_min); + info->bandwidth_min = 1; + } + + info->bandwidth_max = (shmem_info.config & + FUNC_MF_CFG_MAX_BW_MASK) >> + FUNC_MF_CFG_MAX_BW_SHIFT; + if (info->bandwidth_max < 1 || info->bandwidth_max > 100) { + DP_INFO(p_hwfn, + "bandwidth maximum out of bounds [%02x]. Set to 100\n", + info->bandwidth_max); + info->bandwidth_max = 100; } if (shmem_info.mac_upper || shmem_info.mac_lower) { diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h index 287fadfab52d..d39f914b66ee 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h @@ -311,19 +311,20 @@ void qed_consq_free(struct qed_hwfn *p_hwfn, #define QED_SP_EQ_COMPLETION 0x01 #define QED_SP_CQE_COMPLETION 0x02 -struct qed_sp_init_request_params { - size_t ramrod_data_size; +struct qed_sp_init_data { + u32 cid; + u16 opaque_fid; + + /* Information regarding operation upon sending & completion */ enum spq_mode comp_mode; struct qed_spq_comp_cb *p_comp_data; }; int qed_sp_init_request(struct qed_hwfn *p_hwfn, struct qed_spq_entry **pp_ent, - u32 cid, - u16 opaque_fid, u8 cmd, u8 protocol, - struct qed_sp_init_request_params *p_params); + struct qed_sp_init_data *p_data); /** * @brief qed_sp_pf_start - PF Function Start Ramrod @@ -343,7 +344,7 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, */ int qed_sp_pf_start(struct qed_hwfn *p_hwfn, - enum mf_mode mode); + enum qed_mf_mode mode); /** * @brief qed_sp_pf_stop - PF Function Stop Ramrod diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c index 6f7879136633..1c06c37d4c3d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c @@ -23,15 +23,13 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, struct qed_spq_entry **pp_ent, - u32 cid, - u16 opaque_fid, u8 cmd, u8 protocol, - struct qed_sp_init_request_params *p_params) + struct qed_sp_init_data *p_data) { - int rc = -EINVAL; + u32 opaque_cid = p_data->opaque_fid << 16 | p_data->cid; struct qed_spq_entry *p_ent = NULL; - u32 opaque_cid = opaque_fid << 16 | cid; + int rc; if (!pp_ent) return -ENOMEM; @@ -48,7 +46,7 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, p_ent->elem.hdr.protocol_id = protocol; p_ent->priority = QED_SPQ_PRIORITY_NORMAL; - p_ent->comp_mode = p_params->comp_mode; + p_ent->comp_mode = p_data->comp_mode; p_ent->comp_done.done = 0; switch (p_ent->comp_mode) { @@ -57,17 +55,17 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, break; case QED_SPQ_MODE_BLOCK: - if (!p_params->p_comp_data) + if (!p_data->p_comp_data) return -EINVAL; - p_ent->comp_cb.cookie = p_params->p_comp_data->cookie; + p_ent->comp_cb.cookie = p_data->p_comp_data->cookie; break; case QED_SPQ_MODE_CB: - if (!p_params->p_comp_data) + if (!p_data->p_comp_data) p_ent->comp_cb.function = NULL; else - p_ent->comp_cb = *p_params->p_comp_data; + p_ent->comp_cb = *p_data->p_comp_data; break; default: @@ -83,37 +81,35 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, D_TRINE(p_ent->comp_mode, QED_SPQ_MODE_EBLOCK, QED_SPQ_MODE_BLOCK, "MODE_EBLOCK", "MODE_BLOCK", "MODE_CB")); - if (p_params->ramrod_data_size) - memset(&p_ent->ramrod, 0, p_params->ramrod_data_size); + + memset(&p_ent->ramrod, 0, sizeof(p_ent->ramrod)); return 0; } int qed_sp_pf_start(struct qed_hwfn *p_hwfn, - enum mf_mode mode) + enum qed_mf_mode mode) { - struct qed_sp_init_request_params params; struct pf_start_ramrod_data *p_ramrod = NULL; u16 sb = qed_int_get_sp_sb_id(p_hwfn); u8 sb_index = p_hwfn->p_eq->eq_sb_index; struct qed_spq_entry *p_ent = NULL; + struct qed_sp_init_data init_data; int rc = -EINVAL; /* update initial eq producer */ qed_eq_prod_update(p_hwfn, qed_chain_get_prod_idx(&p_hwfn->p_eq->chain)); - memset(¶ms, 0, sizeof(params)); - params.ramrod_data_size = sizeof(*p_ramrod); - params.comp_mode = QED_SPQ_MODE_EBLOCK; + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qed_spq_get_cid(p_hwfn); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; - rc = qed_sp_init_request(p_hwfn, - &p_ent, - qed_spq_get_cid(p_hwfn), - p_hwfn->hw_info.opaque_fid, + rc = qed_sp_init_request(p_hwfn, &p_ent, COMMON_RAMROD_PF_START, PROTOCOLID_COMMON, - ¶ms); + &init_data); if (rc) return rc; @@ -125,26 +121,33 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn, p_ramrod->dont_log_ramrods = 0; p_ramrod->log_type_mask = cpu_to_le16(0xf); p_ramrod->mf_mode = mode; + switch (mode) { + case QED_MF_DEFAULT: + case QED_MF_NPAR: + p_ramrod->mf_mode = MF_NPAR; + break; + case QED_MF_OVLAN: + p_ramrod->mf_mode = MF_OVLAN; + break; + default: + DP_NOTICE(p_hwfn, "Unsupported MF mode, init as DEFAULT\n"); + p_ramrod->mf_mode = MF_NPAR; + } p_ramrod->outer_tag = p_hwfn->hw_info.ovlan; /* Place EQ address in RAMROD */ - p_ramrod->event_ring_pbl_addr.hi = - DMA_HI_LE(p_hwfn->p_eq->chain.pbl.p_phys_table); - p_ramrod->event_ring_pbl_addr.lo = - DMA_LO_LE(p_hwfn->p_eq->chain.pbl.p_phys_table); + DMA_REGPAIR_LE(p_ramrod->event_ring_pbl_addr, + p_hwfn->p_eq->chain.pbl.p_phys_table); p_ramrod->event_ring_num_pages = (u8)p_hwfn->p_eq->chain.page_cnt; - p_ramrod->consolid_q_pbl_addr.hi = - DMA_HI_LE(p_hwfn->p_consq->chain.pbl.p_phys_table); - p_ramrod->consolid_q_pbl_addr.lo = - DMA_LO_LE(p_hwfn->p_consq->chain.pbl.p_phys_table); + DMA_REGPAIR_LE(p_ramrod->consolid_q_pbl_addr, + p_hwfn->p_consq->chain.pbl.p_phys_table); p_hwfn->hw_info.personality = PERSONALITY_ETH; DP_VERBOSE(p_hwfn, QED_MSG_SPQ, - "Setting event_ring_sb [id %04x index %02x], mf [%s] outer_tag [%d]\n", + "Setting event_ring_sb [id %04x index %02x], outer_tag [%d]\n", sb, sb_index, - (p_ramrod->mf_mode == SF) ? "SF" : "Multi-Pf", p_ramrod->outer_tag); return qed_spq_post(p_hwfn, p_ent, NULL); @@ -152,17 +155,19 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn, int qed_sp_pf_stop(struct qed_hwfn *p_hwfn) { - struct qed_sp_init_request_params params; struct qed_spq_entry *p_ent = NULL; + struct qed_sp_init_data init_data; int rc = -EINVAL; - memset(¶ms, 0, sizeof(params)); - params.comp_mode = QED_SPQ_MODE_EBLOCK; + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qed_spq_get_cid(p_hwfn); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; - rc = qed_sp_init_request(p_hwfn, &p_ent, qed_spq_get_cid(p_hwfn), - p_hwfn->hw_info.opaque_fid, + rc = qed_sp_init_request(p_hwfn, &p_ent, COMMON_RAMROD_PF_STOP, PROTOCOLID_COMMON, - ¶ms); + &init_data); if (rc) return rc; diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index 3dd548ab8df1..89469d5aae25 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -183,10 +183,8 @@ static void qed_spq_hw_initialize(struct qed_hwfn *p_hwfn, p_cxt->xstorm_st_context.spq_base_hi = DMA_HI_LE(p_spq->chain.p_phys_addr); - p_cxt->xstorm_st_context.consolid_base_addr.lo = - DMA_LO_LE(p_hwfn->p_consq->chain.p_phys_addr); - p_cxt->xstorm_st_context.consolid_base_addr.hi = - DMA_HI_LE(p_hwfn->p_consq->chain.p_phys_addr); + DMA_REGPAIR_LE(p_cxt->xstorm_st_context.consolid_base_addr, + p_hwfn->p_consq->chain.p_phys_addr); } static int qed_spq_hw_post(struct qed_hwfn *p_hwfn, @@ -327,7 +325,7 @@ struct qed_eq *qed_eq_alloc(struct qed_hwfn *p_hwfn, struct qed_eq *p_eq; /* Allocate EQ struct */ - p_eq = kzalloc(sizeof(*p_eq), GFP_ATOMIC); + p_eq = kzalloc(sizeof(*p_eq), GFP_KERNEL); if (!p_eq) { DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_eq'\n"); return NULL; @@ -423,8 +421,7 @@ void qed_spq_setup(struct qed_hwfn *p_hwfn) p_virt = p_spq->p_virt; for (i = 0; i < p_spq->chain.capacity; i++) { - p_virt->elem.data_ptr.hi = DMA_HI_LE(p_phys); - p_virt->elem.data_ptr.lo = DMA_LO_LE(p_phys); + DMA_REGPAIR_LE(p_virt->elem.data_ptr, p_phys); list_add_tail(&p_virt->list, &p_spq->free_pool); @@ -457,7 +454,7 @@ int qed_spq_alloc(struct qed_hwfn *p_hwfn) /* SPQ struct */ p_spq = - kzalloc(sizeof(struct qed_spq), GFP_ATOMIC); + kzalloc(sizeof(struct qed_spq), GFP_KERNEL); if (!p_spq) { DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_spq'\n"); return -ENOMEM; @@ -853,7 +850,7 @@ struct qed_consq *qed_consq_alloc(struct qed_hwfn *p_hwfn) struct qed_consq *p_consq; /* Allocate ConsQ struct */ - p_consq = kzalloc(sizeof(*p_consq), GFP_ATOMIC); + p_consq = kzalloc(sizeof(*p_consq), GFP_KERNEL); if (!p_consq) { DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_consq'\n"); return NULL; diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 7c6caf7f6612..02e17d331f22 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -24,7 +24,7 @@ #include <linux/qed/qed_eth_if.h> #define QEDE_MAJOR_VERSION 8 -#define QEDE_MINOR_VERSION 4 +#define QEDE_MINOR_VERSION 7 #define QEDE_REVISION_VERSION 0 #define QEDE_ENGINEERING_VERSION 0 #define DRV_MODULE_VERSION __stringify(QEDE_MAJOR_VERSION) "." \ @@ -100,6 +100,12 @@ struct qede_stats { u64 tx_mac_ctrl_frames; }; +struct qede_vlan { + struct list_head list; + u16 vid; + bool configured; +}; + struct qede_dev { struct qed_dev *cdev; struct net_device *ndev; @@ -154,6 +160,10 @@ struct qede_dev { u16 q_num_rx_buffers; /* Must be a power of two */ u16 q_num_tx_buffers; /* Must be a power of two */ + struct list_head vlan_list; + u16 configured_vlans; + u16 non_configured_vlans; + bool accept_any_vlan; struct delayed_work sp_task; unsigned long sp_flags; }; @@ -173,9 +183,9 @@ enum QEDE_STATE { * skb are built only after the frame was DMA-ed. */ struct sw_rx_data { - u8 *data; - - DEFINE_DMA_UNMAP_ADDR(mapping); + struct page *data; + dma_addr_t mapping; + unsigned int page_offset; }; struct qede_rx_queue { @@ -188,6 +198,7 @@ struct qede_rx_queue { void __iomem *hw_rxq_prod_addr; int rx_buf_size; + unsigned int rx_buf_seg_size; u16 num_rx_buffers; u16 rxq_id; @@ -281,6 +292,7 @@ void qede_fill_by_demand_stats(struct qede_dev *edev); #define NUM_TX_BDS_MIN 128 #define NUM_TX_BDS_DEF NUM_TX_BDS_MAX +#define QEDE_RX_HDR_SIZE 256 #define for_each_rss(i) for (i = 0; i < edev->num_rss; i++) #endif /* _QEDE_H_ */ diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index e442b85c9a5e..c49dc10ce151 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -217,9 +217,9 @@ static int qede_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) struct qed_link_params params; u32 speed; - if (edev->dev_info.common.is_mf) { + if (!edev->dev_info.common.is_mf_default) { DP_INFO(edev, - "Link parameters can not be changed in MF mode\n"); + "Link parameters can not be changed in non-default mode\n"); return -EOPNOTSUPP; } @@ -428,7 +428,7 @@ static int qede_set_pauseparam(struct net_device *dev, struct qed_link_params params; struct qed_link_output current_link; - if (!edev->dev_info.common.is_mf) { + if (!edev->dev_info.common.is_mf_default) { DP_INFO(edev, "Pause parameters can not be updated in non-default mode\n"); return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 6237f10b5119..5f15e23a0f7d 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -330,15 +330,15 @@ static void qede_set_params_for_ipv6_ext(struct sk_buff *skb, struct eth_tx_3rd_bd *third_bd) { u8 l4_proto; - u16 bd2_bits = 0, bd2_bits2 = 0; + u16 bd2_bits1 = 0, bd2_bits2 = 0; - bd2_bits2 |= (1 << ETH_TX_DATA_2ND_BD_IPV6_EXT_SHIFT); + bd2_bits1 |= (1 << ETH_TX_DATA_2ND_BD_IPV6_EXT_SHIFT); - bd2_bits |= ((((u8 *)skb_transport_header(skb) - skb->data) >> 1) & + bd2_bits2 |= ((((u8 *)skb_transport_header(skb) - skb->data) >> 1) & ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_MASK) << ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_SHIFT; - bd2_bits2 |= (ETH_L4_PSEUDO_CSUM_CORRECT_LENGTH << + bd2_bits1 |= (ETH_L4_PSEUDO_CSUM_CORRECT_LENGTH << ETH_TX_DATA_2ND_BD_L4_PSEUDO_CSUM_MODE_SHIFT); if (vlan_get_protocol(skb) == htons(ETH_P_IPV6)) @@ -347,16 +347,15 @@ static void qede_set_params_for_ipv6_ext(struct sk_buff *skb, l4_proto = ip_hdr(skb)->protocol; if (l4_proto == IPPROTO_UDP) - bd2_bits2 |= 1 << ETH_TX_DATA_2ND_BD_L4_UDP_SHIFT; + bd2_bits1 |= 1 << ETH_TX_DATA_2ND_BD_L4_UDP_SHIFT; - if (third_bd) { + if (third_bd) third_bd->data.bitfields |= - ((tcp_hdrlen(skb) / 4) & - ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_MASK) << - ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_SHIFT; - } + cpu_to_le16(((tcp_hdrlen(skb) / 4) & + ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_MASK) << + ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_SHIFT); - second_bd->data.bitfields = cpu_to_le16(bd2_bits); + second_bd->data.bitfields1 = cpu_to_le16(bd2_bits1); second_bd->data.bitfields2 = cpu_to_le16(bd2_bits2); } @@ -464,12 +463,16 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, /* Fill the parsing flags & params according to the requested offload */ if (xmit_type & XMIT_L4_CSUM) { + u16 temp = 1 << ETH_TX_DATA_1ST_BD_TUNN_CFG_OVERRIDE_SHIFT; + /* We don't re-calculate IP checksum as it is already done by * the upper stack */ first_bd->data.bd_flags.bitfields |= 1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT; + first_bd->data.bitfields |= cpu_to_le16(temp); + /* If the packet is IPv6 with extension header, indicate that * to FW and pass few params, since the device cracker doesn't * support parsing IPv6 with extension header/s. @@ -491,7 +494,7 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, /* @@@TBD - if will not be removed need to check */ third_bd->data.bitfields |= - (1 << ETH_TX_DATA_3RD_BD_HDR_NBD_SHIFT); + cpu_to_le16((1 << ETH_TX_DATA_3RD_BD_HDR_NBD_SHIFT)); /* Make life easier for FW guys who can't deal with header and * data on same BD. If we need to split, use the second bd... @@ -719,26 +722,52 @@ static bool qede_has_tx_work(struct qede_fastpath *fp) return false; } -/* This function copies the Rx buffer from the CONS position to the PROD - * position, since we failed to allocate a new Rx buffer. +/* This function reuses the buffer(from an offset) from + * consumer index to producer index in the bd ring */ -static void qede_reuse_rx_data(struct qede_rx_queue *rxq) +static inline void qede_reuse_page(struct qede_dev *edev, + struct qede_rx_queue *rxq, + struct sw_rx_data *curr_cons) { - struct eth_rx_bd *rx_bd_cons = qed_chain_consume(&rxq->rx_bd_ring); struct eth_rx_bd *rx_bd_prod = qed_chain_produce(&rxq->rx_bd_ring); - struct sw_rx_data *sw_rx_data_cons = - &rxq->sw_rx_ring[rxq->sw_rx_cons & NUM_RX_BDS_MAX]; - struct sw_rx_data *sw_rx_data_prod = - &rxq->sw_rx_ring[rxq->sw_rx_prod & NUM_RX_BDS_MAX]; + struct sw_rx_data *curr_prod; + dma_addr_t new_mapping; - dma_unmap_addr_set(sw_rx_data_prod, mapping, - dma_unmap_addr(sw_rx_data_cons, mapping)); + curr_prod = &rxq->sw_rx_ring[rxq->sw_rx_prod & NUM_RX_BDS_MAX]; + *curr_prod = *curr_cons; - sw_rx_data_prod->data = sw_rx_data_cons->data; - memcpy(rx_bd_prod, rx_bd_cons, sizeof(struct eth_rx_bd)); + new_mapping = curr_prod->mapping + curr_prod->page_offset; + + rx_bd_prod->addr.hi = cpu_to_le32(upper_32_bits(new_mapping)); + rx_bd_prod->addr.lo = cpu_to_le32(lower_32_bits(new_mapping)); - rxq->sw_rx_cons++; rxq->sw_rx_prod++; + curr_cons->data = NULL; +} + +static inline int qede_realloc_rx_buffer(struct qede_dev *edev, + struct qede_rx_queue *rxq, + struct sw_rx_data *curr_cons) +{ + /* Move to the next segment in the page */ + curr_cons->page_offset += rxq->rx_buf_seg_size; + + if (curr_cons->page_offset == PAGE_SIZE) { + if (unlikely(qede_alloc_rx_buffer(edev, rxq))) + return -ENOMEM; + + dma_unmap_page(&edev->pdev->dev, curr_cons->mapping, + PAGE_SIZE, DMA_FROM_DEVICE); + } else { + /* Increment refcount of the page as we don't want + * network stack to take the ownership of the page + * which can be recycled multiple times by the driver. + */ + atomic_inc(&curr_cons->data->_count); + qede_reuse_page(edev, rxq, curr_cons); + } + + return 0; } static inline void qede_update_rx_prod(struct qede_dev *edev, @@ -857,9 +886,10 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget) struct sw_rx_data *sw_rx_data; union eth_rx_cqe *cqe; struct sk_buff *skb; + struct page *data; + __le16 flags; u16 len, pad; u32 rx_hash; - u8 *data; /* Get the CQE from the completion ring */ cqe = (union eth_rx_cqe *) @@ -879,56 +909,110 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget) data = sw_rx_data->data; fp_cqe = &cqe->fast_path_regular; - len = le16_to_cpu(fp_cqe->pkt_len); + len = le16_to_cpu(fp_cqe->len_on_first_bd); pad = fp_cqe->placement_offset; + flags = cqe->fast_path_regular.pars_flags.flags; - /* For every Rx BD consumed, we allocate a new BD so the BD ring - * is always with a fixed size. If allocation fails, we take the - * consumed BD and return it to the ring in the PROD position. - * The packet that was received on that BD will be dropped (and - * not passed to the upper stack). - */ - if (likely(qede_alloc_rx_buffer(edev, rxq) == 0)) { - dma_unmap_single(&edev->pdev->dev, - dma_unmap_addr(sw_rx_data, mapping), - rxq->rx_buf_size, DMA_FROM_DEVICE); - - /* If this is an error packet then drop it */ - parse_flag = - le16_to_cpu(cqe->fast_path_regular.pars_flags.flags); - csum_flag = qede_check_csum(parse_flag); - if (csum_flag == QEDE_CSUM_ERROR) { - DP_NOTICE(edev, - "CQE in CONS = %u has error, flags = %x, dropping incoming packet\n", - sw_comp_cons, parse_flag); - rxq->rx_hw_errors++; - kfree(data); - goto next_rx; - } - - skb = build_skb(data, 0); - - if (unlikely(!skb)) { - DP_NOTICE(edev, - "Build_skb failed, dropping incoming packet\n"); - kfree(data); - rxq->rx_alloc_errors++; - goto next_rx; - } + /* If this is an error packet then drop it */ + parse_flag = le16_to_cpu(flags); - skb_reserve(skb, pad); + csum_flag = qede_check_csum(parse_flag); + if (unlikely(csum_flag == QEDE_CSUM_ERROR)) { + DP_NOTICE(edev, + "CQE in CONS = %u has error, flags = %x, dropping incoming packet\n", + sw_comp_cons, parse_flag); + rxq->rx_hw_errors++; + qede_reuse_page(edev, rxq, sw_rx_data); + goto next_rx; + } - } else { + skb = netdev_alloc_skb(edev->ndev, QEDE_RX_HDR_SIZE); + if (unlikely(!skb)) { DP_NOTICE(edev, - "New buffer allocation failed, dropping incoming packet and reusing its buffer\n"); - qede_reuse_rx_data(rxq); + "Build_skb failed, dropping incoming packet\n"); + qede_reuse_page(edev, rxq, sw_rx_data); rxq->rx_alloc_errors++; - goto next_cqe; + goto next_rx; + } + + /* Copy data into SKB */ + if (len + pad <= QEDE_RX_HDR_SIZE) { + memcpy(skb_put(skb, len), + page_address(data) + pad + + sw_rx_data->page_offset, len); + qede_reuse_page(edev, rxq, sw_rx_data); + } else { + struct skb_frag_struct *frag; + unsigned int pull_len; + unsigned char *va; + + frag = &skb_shinfo(skb)->frags[0]; + + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, data, + pad + sw_rx_data->page_offset, + len, rxq->rx_buf_seg_size); + + va = skb_frag_address(frag); + pull_len = eth_get_headlen(va, QEDE_RX_HDR_SIZE); + + /* Align the pull_len to optimize memcpy */ + memcpy(skb->data, va, ALIGN(pull_len, sizeof(long))); + + skb_frag_size_sub(frag, pull_len); + frag->page_offset += pull_len; + skb->data_len -= pull_len; + skb->tail += pull_len; + + if (unlikely(qede_realloc_rx_buffer(edev, rxq, + sw_rx_data))) { + DP_ERR(edev, "Failed to allocate rx buffer\n"); + rxq->rx_alloc_errors++; + goto next_cqe; + } } - sw_rx_data->data = NULL; + if (fp_cqe->bd_num != 1) { + u16 pkt_len = le16_to_cpu(fp_cqe->pkt_len); + u8 num_frags; + + pkt_len -= len; + + for (num_frags = fp_cqe->bd_num - 1; num_frags > 0; + num_frags--) { + u16 cur_size = pkt_len > rxq->rx_buf_size ? + rxq->rx_buf_size : pkt_len; + + WARN_ONCE(!cur_size, + "Still got %d BDs for mapping jumbo, but length became 0\n", + num_frags); + + if (unlikely(qede_alloc_rx_buffer(edev, rxq))) + goto next_cqe; + + rxq->sw_rx_cons++; + sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS_MAX; + sw_rx_data = &rxq->sw_rx_ring[sw_rx_index]; + qed_chain_consume(&rxq->rx_bd_ring); + dma_unmap_page(&edev->pdev->dev, + sw_rx_data->mapping, + PAGE_SIZE, DMA_FROM_DEVICE); + + skb_fill_page_desc(skb, + skb_shinfo(skb)->nr_frags++, + sw_rx_data->data, 0, + cur_size); + + skb->truesize += PAGE_SIZE; + skb->data_len += cur_size; + skb->len += cur_size; + pkt_len -= cur_size; + } - skb_put(skb, len); + if (pkt_len) + DP_ERR(edev, + "Mapped all BDs of jumbo, but still have %d bytes\n", + pkt_len); + } skb->protocol = eth_type_trans(skb, edev->ndev); @@ -1056,6 +1140,21 @@ static int qede_set_ucast_rx_mac(struct qede_dev *edev, return edev->ops->filter_config(edev->cdev, &filter_cmd); } +static int qede_set_ucast_rx_vlan(struct qede_dev *edev, + enum qed_filter_xcast_params_type opcode, + u16 vid) +{ + struct qed_filter_params filter_cmd; + + memset(&filter_cmd, 0, sizeof(filter_cmd)); + filter_cmd.type = QED_FILTER_TYPE_UCAST; + filter_cmd.filter.ucast.type = opcode; + filter_cmd.filter.ucast.vlan_valid = 1; + filter_cmd.filter.ucast.vlan = vid; + + return edev->ops->filter_config(edev->cdev, &filter_cmd); +} + void qede_fill_by_demand_stats(struct qede_dev *edev) { struct qed_eth_stats stats; @@ -1168,6 +1267,247 @@ static struct rtnl_link_stats64 *qede_get_stats64( return stats; } +static void qede_config_accept_any_vlan(struct qede_dev *edev, bool action) +{ + struct qed_update_vport_params params; + int rc; + + /* Proceed only if action actually needs to be performed */ + if (edev->accept_any_vlan == action) + return; + + memset(¶ms, 0, sizeof(params)); + + params.vport_id = 0; + params.accept_any_vlan = action; + params.update_accept_any_vlan_flg = 1; + + rc = edev->ops->vport_update(edev->cdev, ¶ms); + if (rc) { + DP_ERR(edev, "Failed to %s accept-any-vlan\n", + action ? "enable" : "disable"); + } else { + DP_INFO(edev, "%s accept-any-vlan\n", + action ? "enabled" : "disabled"); + edev->accept_any_vlan = action; + } +} + +static int qede_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct qede_dev *edev = netdev_priv(dev); + struct qede_vlan *vlan, *tmp; + int rc; + + DP_VERBOSE(edev, NETIF_MSG_IFUP, "Adding vlan 0x%04x\n", vid); + + vlan = kzalloc(sizeof(*vlan), GFP_KERNEL); + if (!vlan) { + DP_INFO(edev, "Failed to allocate struct for vlan\n"); + return -ENOMEM; + } + INIT_LIST_HEAD(&vlan->list); + vlan->vid = vid; + vlan->configured = false; + + /* Verify vlan isn't already configured */ + list_for_each_entry(tmp, &edev->vlan_list, list) { + if (tmp->vid == vlan->vid) { + DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), + "vlan already configured\n"); + kfree(vlan); + return -EEXIST; + } + } + + /* If interface is down, cache this VLAN ID and return */ + if (edev->state != QEDE_STATE_OPEN) { + DP_VERBOSE(edev, NETIF_MSG_IFDOWN, + "Interface is down, VLAN %d will be configured when interface is up\n", + vid); + if (vid != 0) + edev->non_configured_vlans++; + list_add(&vlan->list, &edev->vlan_list); + + return 0; + } + + /* Check for the filter limit. + * Note - vlan0 has a reserved filter and can be added without + * worrying about quota + */ + if ((edev->configured_vlans < edev->dev_info.num_vlan_filters) || + (vlan->vid == 0)) { + rc = qede_set_ucast_rx_vlan(edev, + QED_FILTER_XCAST_TYPE_ADD, + vlan->vid); + if (rc) { + DP_ERR(edev, "Failed to configure VLAN %d\n", + vlan->vid); + kfree(vlan); + return -EINVAL; + } + vlan->configured = true; + + /* vlan0 filter isn't consuming out of our quota */ + if (vlan->vid != 0) + edev->configured_vlans++; + } else { + /* Out of quota; Activate accept-any-VLAN mode */ + if (!edev->non_configured_vlans) + qede_config_accept_any_vlan(edev, true); + + edev->non_configured_vlans++; + } + + list_add(&vlan->list, &edev->vlan_list); + + return 0; +} + +static void qede_del_vlan_from_list(struct qede_dev *edev, + struct qede_vlan *vlan) +{ + /* vlan0 filter isn't consuming out of our quota */ + if (vlan->vid != 0) { + if (vlan->configured) + edev->configured_vlans--; + else + edev->non_configured_vlans--; + } + + list_del(&vlan->list); + kfree(vlan); +} + +static int qede_configure_vlan_filters(struct qede_dev *edev) +{ + int rc = 0, real_rc = 0, accept_any_vlan = 0; + struct qed_dev_eth_info *dev_info; + struct qede_vlan *vlan = NULL; + + if (list_empty(&edev->vlan_list)) + return 0; + + dev_info = &edev->dev_info; + + /* Configure non-configured vlans */ + list_for_each_entry(vlan, &edev->vlan_list, list) { + if (vlan->configured) + continue; + + /* We have used all our credits, now enable accept_any_vlan */ + if ((vlan->vid != 0) && + (edev->configured_vlans == dev_info->num_vlan_filters)) { + accept_any_vlan = 1; + continue; + } + + DP_VERBOSE(edev, NETIF_MSG_IFUP, "Adding vlan %d\n", vlan->vid); + + rc = qede_set_ucast_rx_vlan(edev, QED_FILTER_XCAST_TYPE_ADD, + vlan->vid); + if (rc) { + DP_ERR(edev, "Failed to configure VLAN %u\n", + vlan->vid); + real_rc = rc; + continue; + } + + vlan->configured = true; + /* vlan0 filter doesn't consume our VLAN filter's quota */ + if (vlan->vid != 0) { + edev->non_configured_vlans--; + edev->configured_vlans++; + } + } + + /* enable accept_any_vlan mode if we have more VLANs than credits, + * or remove accept_any_vlan mode if we've actually removed + * a non-configured vlan, and all remaining vlans are truly configured. + */ + + if (accept_any_vlan) + qede_config_accept_any_vlan(edev, true); + else if (!edev->non_configured_vlans) + qede_config_accept_any_vlan(edev, false); + + return real_rc; +} + +static int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct qede_dev *edev = netdev_priv(dev); + struct qede_vlan *vlan = NULL; + int rc; + + DP_VERBOSE(edev, NETIF_MSG_IFDOWN, "Removing vlan 0x%04x\n", vid); + + /* Find whether entry exists */ + list_for_each_entry(vlan, &edev->vlan_list, list) + if (vlan->vid == vid) + break; + + if (!vlan || (vlan->vid != vid)) { + DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), + "Vlan isn't configured\n"); + return 0; + } + + if (edev->state != QEDE_STATE_OPEN) { + /* As interface is already down, we don't have a VPORT + * instance to remove vlan filter. So just update vlan list + */ + DP_VERBOSE(edev, NETIF_MSG_IFDOWN, + "Interface is down, removing VLAN from list only\n"); + qede_del_vlan_from_list(edev, vlan); + return 0; + } + + /* Remove vlan */ + rc = qede_set_ucast_rx_vlan(edev, QED_FILTER_XCAST_TYPE_DEL, vid); + if (rc) { + DP_ERR(edev, "Failed to remove VLAN %d\n", vid); + return -EINVAL; + } + + qede_del_vlan_from_list(edev, vlan); + + /* We have removed a VLAN - try to see if we can + * configure non-configured VLAN from the list. + */ + rc = qede_configure_vlan_filters(edev); + + return rc; +} + +static void qede_vlan_mark_nonconfigured(struct qede_dev *edev) +{ + struct qede_vlan *vlan = NULL; + + if (list_empty(&edev->vlan_list)) + return; + + list_for_each_entry(vlan, &edev->vlan_list, list) { + if (!vlan->configured) + continue; + + vlan->configured = false; + + /* vlan0 filter isn't consuming out of our quota */ + if (vlan->vid != 0) { + edev->non_configured_vlans++; + edev->configured_vlans--; + } + + DP_VERBOSE(edev, NETIF_MSG_IFDOWN, + "marked vlan %d as non-configured\n", + vlan->vid); + } + + edev->accept_any_vlan = false; +} + static const struct net_device_ops qede_netdev_ops = { .ndo_open = qede_open, .ndo_stop = qede_close, @@ -1176,6 +1516,8 @@ static const struct net_device_ops qede_netdev_ops = { .ndo_set_mac_address = qede_set_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = qede_change_mtu, + .ndo_vlan_rx_add_vid = qede_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = qede_vlan_rx_kill_vid, .ndo_get_stats64 = qede_get_stats64, }; @@ -1220,6 +1562,8 @@ static struct qede_dev *qede_alloc_etherdev(struct qed_dev *cdev, edev->num_tc = edev->dev_info.num_tc; + INIT_LIST_HEAD(&edev->vlan_list); + return edev; } @@ -1251,7 +1595,7 @@ static void qede_init_ndev(struct qede_dev *edev) NETIF_F_HIGHDMA; ndev->features = hw_features | NETIF_F_RXHASH | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HIGHDMA | - NETIF_F_HW_VLAN_CTAG_TX; + NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_TX; ndev->hw_features = hw_features; @@ -1566,17 +1910,17 @@ static void qede_free_rx_buffers(struct qede_dev *edev, for (i = rxq->sw_rx_cons; i != rxq->sw_rx_prod; i++) { struct sw_rx_data *rx_buf; - u8 *data; + struct page *data; rx_buf = &rxq->sw_rx_ring[i & NUM_RX_BDS_MAX]; data = rx_buf->data; - dma_unmap_single(&edev->pdev->dev, - dma_unmap_addr(rx_buf, mapping), - rxq->rx_buf_size, DMA_FROM_DEVICE); + dma_unmap_page(&edev->pdev->dev, + rx_buf->mapping, + PAGE_SIZE, DMA_FROM_DEVICE); rx_buf->data = NULL; - kfree(data); + __free_page(data); } } @@ -1600,29 +1944,32 @@ static int qede_alloc_rx_buffer(struct qede_dev *edev, struct sw_rx_data *sw_rx_data; struct eth_rx_bd *rx_bd; dma_addr_t mapping; + struct page *data; u16 rx_buf_size; - u8 *data; rx_buf_size = rxq->rx_buf_size; - data = kmalloc(rx_buf_size, GFP_ATOMIC); + data = alloc_pages(GFP_ATOMIC, 0); if (unlikely(!data)) { - DP_NOTICE(edev, "Failed to allocate Rx data\n"); + DP_NOTICE(edev, "Failed to allocate Rx data [page]\n"); return -ENOMEM; } - mapping = dma_map_single(&edev->pdev->dev, data, - rx_buf_size, DMA_FROM_DEVICE); + /* Map the entire page as it would be used + * for multiple RX buffer segment size mapping. + */ + mapping = dma_map_page(&edev->pdev->dev, data, 0, + PAGE_SIZE, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(&edev->pdev->dev, mapping))) { - kfree(data); + __free_page(data); DP_NOTICE(edev, "Failed to map Rx buffer\n"); return -ENOMEM; } sw_rx_data = &rxq->sw_rx_ring[rxq->sw_rx_prod & NUM_RX_BDS_MAX]; + sw_rx_data->page_offset = 0; sw_rx_data->data = data; - - dma_unmap_addr_set(sw_rx_data, mapping, mapping); + sw_rx_data->mapping = mapping; /* Advance PROD and get BD pointer */ rx_bd = (struct eth_rx_bd *)qed_chain_produce(&rxq->rx_bd_ring); @@ -1643,13 +1990,16 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev, rxq->num_rx_buffers = edev->q_num_rx_buffers; - rxq->rx_buf_size = NET_IP_ALIGN + - ETH_OVERHEAD + - edev->ndev->mtu + - QEDE_FW_RX_ALIGN_END; + rxq->rx_buf_size = NET_IP_ALIGN + ETH_OVERHEAD + + edev->ndev->mtu; + if (rxq->rx_buf_size > PAGE_SIZE) + rxq->rx_buf_size = PAGE_SIZE; + + /* Segment size to spilt a page in multiple equal parts */ + rxq->rx_buf_seg_size = roundup_pow_of_two(rxq->rx_buf_size); /* Allocate the parallel driver ring for Rx buffers */ - size = sizeof(*rxq->sw_rx_ring) * NUM_RX_BDS_MAX; + size = sizeof(*rxq->sw_rx_ring) * RX_RING_SIZE; rxq->sw_rx_ring = kzalloc(size, GFP_KERNEL); if (!rxq->sw_rx_ring) { DP_ERR(edev, "Rx buffers ring allocation failed\n"); @@ -1660,7 +2010,7 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev, rc = edev->ops->common->chain_alloc(edev->cdev, QED_CHAIN_USE_TO_CONSUME_PRODUCE, QED_CHAIN_MODE_NEXT_PTR, - NUM_RX_BDS_MAX, + RX_RING_SIZE, sizeof(struct eth_rx_bd), &rxq->rx_bd_ring); @@ -1671,7 +2021,7 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev, rc = edev->ops->common->chain_alloc(edev->cdev, QED_CHAIN_USE_TO_CONSUME, QED_CHAIN_MODE_PBL, - NUM_RX_BDS_MAX, + RX_RING_SIZE, sizeof(union eth_rx_cqe), &rxq->rx_comp_ring); if (rc) @@ -2252,6 +2602,7 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode) DP_INFO(edev, "Stopped Queues\n"); + qede_vlan_mark_nonconfigured(edev); edev->ops->fastpath_stop(edev->cdev); /* Release the interrupts */ @@ -2320,6 +2671,9 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode) edev->state = QEDE_STATE_OPEN; mutex_unlock(&edev->qede_lock); + /* Program un-configured VLANs */ + qede_configure_vlan_filters(edev); + /* Ask for link-up using current configuration */ memset(&link_params, 0, sizeof(link_params)); link_params.link_up = true; @@ -2580,6 +2934,17 @@ static void qede_config_rx_mode(struct net_device *ndev) goto out; } + /* take care of VLAN mode */ + if (ndev->flags & IFF_PROMISC) { + qede_config_accept_any_vlan(edev, true); + } else if (!edev->non_configured_vlans) { + /* It's possible that accept_any_vlan mode is set due to a + * previous setting of IFF_PROMISC. If vlan credits are + * sufficient, disable accept_any_vlan. + */ + qede_config_accept_any_vlan(edev, false); + } + rx_mode.filter.accept_flags = accept_flags; edev->ops->filter_config(edev->cdev, &rx_mode); out: diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index 9fbe92ac225b..b2160d1b9c71 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -2,7 +2,7 @@ * * Copyright (C) 2014-2015 Renesas Electronics Corporation * Copyright (C) 2015 Renesas Solutions Corp. - * Copyright (C) 2015 Cogent Embedded, Inc. <source@cogentembedded.com> + * Copyright (C) 2015-2016 Cogent Embedded, Inc. <source@cogentembedded.com> * * Based on the SuperH Ethernet driver * @@ -837,6 +837,8 @@ static inline void ravb_write(struct net_device *ndev, u32 data, iowrite32(data, priv->addr + reg); } +void ravb_modify(struct net_device *ndev, enum ravb_reg reg, u32 clear, + u32 set); int ravb_wait(struct net_device *ndev, enum ravb_reg reg, u32 mask, u32 value); irqreturn_t ravb_ptp_interrupt(struct net_device *ndev); diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 744d7806a9ee..88656ceb6e29 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2,7 +2,7 @@ * * Copyright (C) 2014-2015 Renesas Electronics Corporation * Copyright (C) 2015 Renesas Solutions Corp. - * Copyright (C) 2015 Cogent Embedded, Inc. <source@cogentembedded.com> + * Copyright (C) 2015-2016 Cogent Embedded, Inc. <source@cogentembedded.com> * * Based on the SuperH Ethernet driver * @@ -42,6 +42,12 @@ NETIF_MSG_RX_ERR | \ NETIF_MSG_TX_ERR) +void ravb_modify(struct net_device *ndev, enum ravb_reg reg, u32 clear, + u32 set) +{ + ravb_write(ndev, (ravb_read(ndev, reg) & ~clear) | set, reg); +} + int ravb_wait(struct net_device *ndev, enum ravb_reg reg, u32 mask, u32 value) { int i; @@ -59,8 +65,7 @@ static int ravb_config(struct net_device *ndev) int error; /* Set config mode */ - ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_OPC) | CCC_OPC_CONFIG, - CCC); + ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG); /* Check if the operating mode is changed to the config mode */ error = ravb_wait(ndev, CSR, CSR_OPS, CSR_OPS_CONFIG); if (error) @@ -72,13 +77,8 @@ static int ravb_config(struct net_device *ndev) static void ravb_set_duplex(struct net_device *ndev) { struct ravb_private *priv = netdev_priv(ndev); - u32 ecmr = ravb_read(ndev, ECMR); - if (priv->duplex) /* Full */ - ecmr |= ECMR_DM; - else /* Half */ - ecmr &= ~ECMR_DM; - ravb_write(ndev, ecmr, ECMR); + ravb_modify(ndev, ECMR, ECMR_DM, priv->duplex ? ECMR_DM : 0); } static void ravb_set_rate(struct net_device *ndev) @@ -92,8 +92,6 @@ static void ravb_set_rate(struct net_device *ndev) case 1000: /* 1000BASE */ ravb_write(ndev, GECMR_SPEED_1000, GECMR); break; - default: - break; } } @@ -131,13 +129,8 @@ static void ravb_mdio_ctrl(struct mdiobb_ctrl *ctrl, u32 mask, int set) { struct ravb_private *priv = container_of(ctrl, struct ravb_private, mdiobb); - u32 pir = ravb_read(priv->ndev, PIR); - if (set) - pir |= mask; - else - pir &= ~mask; - ravb_write(priv->ndev, pir, PIR); + ravb_modify(priv->ndev, PIR, mask, set ? mask : 0); } /* MDC pin control */ @@ -393,9 +386,9 @@ static int ravb_dmac_init(struct net_device *ndev) ravb_ring_format(ndev, RAVB_NC); #if defined(__LITTLE_ENDIAN) - ravb_write(ndev, ravb_read(ndev, CCC) & ~CCC_BOC, CCC); + ravb_modify(ndev, CCC, CCC_BOC, 0); #else - ravb_write(ndev, ravb_read(ndev, CCC) | CCC_BOC, CCC); + ravb_modify(ndev, CCC, CCC_BOC, CCC_BOC); #endif /* Set AVB RX */ @@ -418,8 +411,7 @@ static int ravb_dmac_init(struct net_device *ndev) ravb_write(ndev, TIC_FTE0 | TIC_FTE1 | TIC_TFUE, TIC); /* Setting the control will start the AVB-DMAC process. */ - ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_OPC) | CCC_OPC_OPERATION, - CCC); + ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_OPERATION); return 0; } @@ -493,7 +485,7 @@ static void ravb_get_tx_tstamp(struct net_device *ndev) break; } } - ravb_write(ndev, ravb_read(ndev, TCCR) | TCCR_TFR, TCCR); + ravb_modify(ndev, TCCR, TCCR_TFR, TCCR_TFR); } } @@ -613,13 +605,13 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q) static void ravb_rcv_snd_disable(struct net_device *ndev) { /* Disable TX and RX */ - ravb_write(ndev, ravb_read(ndev, ECMR) & ~(ECMR_RE | ECMR_TE), ECMR); + ravb_modify(ndev, ECMR, ECMR_RE | ECMR_TE, 0); } static void ravb_rcv_snd_enable(struct net_device *ndev) { /* Enable TX and RX */ - ravb_write(ndev, ravb_read(ndev, ECMR) | ECMR_RE | ECMR_TE, ECMR); + ravb_modify(ndev, ECMR, ECMR_RE | ECMR_TE, ECMR_RE | ECMR_TE); } /* function for waiting dma process finished */ @@ -812,8 +804,8 @@ static int ravb_poll(struct napi_struct *napi, int budget) /* Re-enable RX/TX interrupts */ spin_lock_irqsave(&priv->lock, flags); - ravb_write(ndev, ravb_read(ndev, RIC0) | mask, RIC0); - ravb_write(ndev, ravb_read(ndev, TIC) | mask, TIC); + ravb_modify(ndev, RIC0, mask, mask); + ravb_modify(ndev, TIC, mask, mask); mmiowb(); spin_unlock_irqrestore(&priv->lock, flags); @@ -852,8 +844,7 @@ static void ravb_adjust_link(struct net_device *ndev) ravb_set_rate(ndev); } if (!priv->link) { - ravb_write(ndev, ravb_read(ndev, ECMR) & ~ECMR_TXF, - ECMR); + ravb_modify(ndev, ECMR, ECMR_TXF, 0); new_state = true; priv->link = phydev->link; if (priv->no_avb_link) @@ -1397,7 +1388,7 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev) desc--; desc->die_dt = DT_FSTART; - ravb_write(ndev, ravb_read(ndev, TCCR) | (TCCR_TSRQ0 << q), TCCR); + ravb_modify(ndev, TCCR, TCCR_TSRQ0 << q, TCCR_TSRQ0 << q); priv->cur_tx[q] += NUM_TX_DESC; if (priv->cur_tx[q] - priv->dirty_tx[q] > @@ -1472,15 +1463,10 @@ static void ravb_set_rx_mode(struct net_device *ndev) { struct ravb_private *priv = netdev_priv(ndev); unsigned long flags; - u32 ecmr; spin_lock_irqsave(&priv->lock, flags); - ecmr = ravb_read(ndev, ECMR); - if (ndev->flags & IFF_PROMISC) - ecmr |= ECMR_PRM; - else - ecmr &= ~ECMR_PRM; - ravb_write(ndev, ecmr, ECMR); + ravb_modify(ndev, ECMR, ECMR_PRM, + ndev->flags & IFF_PROMISC ? ECMR_PRM : 0); mmiowb(); spin_unlock_irqrestore(&priv->lock, flags); } @@ -1808,14 +1794,12 @@ static int ravb_probe(struct platform_device *pdev) /* Set AVB config mode */ if (chip_id == RCAR_GEN2) { - ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_OPC) | - CCC_OPC_CONFIG, CCC); + ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG); /* Set CSEL value */ - ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_CSEL) | - CCC_CSEL_HPB, CCC); + ravb_modify(ndev, CCC, CCC_CSEL, CCC_CSEL_HPB); } else { - ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_OPC) | - CCC_OPC_CONFIG | CCC_GAC | CCC_CSEL_HPB, CCC); + ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG | + CCC_GAC | CCC_CSEL_HPB); } /* Set GTI value */ @@ -1824,7 +1808,7 @@ static int ravb_probe(struct platform_device *pdev) goto out_release; /* Request GTI loading */ - ravb_write(ndev, ravb_read(ndev, GCCR) | GCCR_LTI, GCCR); + ravb_modify(ndev, GCCR, GCCR_LTI, GCCR_LTI); /* Allocate descriptor base address table */ priv->desc_bat_size = sizeof(struct ravb_desc) * DBAT_ENTRY_NUM; diff --git a/drivers/net/ethernet/renesas/ravb_ptp.c b/drivers/net/ethernet/renesas/ravb_ptp.c index 7a8ce920c49e..57992ccc4657 100644 --- a/drivers/net/ethernet/renesas/ravb_ptp.c +++ b/drivers/net/ethernet/renesas/ravb_ptp.c @@ -2,7 +2,7 @@ * * Copyright (C) 2013-2015 Renesas Electronics Corporation * Copyright (C) 2015 Renesas Solutions Corp. - * Copyright (C) 2015 Cogent Embedded, Inc. <source@cogentembedded.com> + * Copyright (C) 2015-2016 Cogent Embedded, Inc. <source@cogentembedded.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -21,7 +21,7 @@ static int ravb_ptp_tcr_request(struct ravb_private *priv, u32 request) if (error) return error; - ravb_write(ndev, ravb_read(ndev, GCCR) | request, GCCR); + ravb_modify(ndev, GCCR, request, request); return ravb_wait(ndev, GCCR, GCCR_TCR, GCCR_TCR_NOREQ); } @@ -185,7 +185,6 @@ static int ravb_ptp_extts(struct ptp_clock_info *ptp, ptp.info); struct net_device *ndev = priv->ndev; unsigned long flags; - u32 gic; if (req->index) return -EINVAL; @@ -195,12 +194,7 @@ static int ravb_ptp_extts(struct ptp_clock_info *ptp, priv->ptp.extts[req->index] = on; spin_lock_irqsave(&priv->lock, flags); - gic = ravb_read(ndev, GIC); - if (on) - gic |= GIC_PTCE; - else - gic &= ~GIC_PTCE; - ravb_write(ndev, gic, GIC); + ravb_modify(ndev, GIC, GIC_PTCE, on ? GIC_PTCE : 0); mmiowb(); spin_unlock_irqrestore(&priv->lock, flags); @@ -216,7 +210,6 @@ static int ravb_ptp_perout(struct ptp_clock_info *ptp, struct ravb_ptp_perout *perout; unsigned long flags; int error = 0; - u32 gic; if (req->index) return -EINVAL; @@ -248,9 +241,7 @@ static int ravb_ptp_perout(struct ptp_clock_info *ptp, error = ravb_ptp_update_compare(priv, (u32)start_ns); if (!error) { /* Unmask interrupt */ - gic = ravb_read(ndev, GIC); - gic |= GIC_PTME; - ravb_write(ndev, gic, GIC); + ravb_modify(ndev, GIC, GIC_PTME, GIC_PTME); } } else { spin_lock_irqsave(&priv->lock, flags); @@ -259,9 +250,7 @@ static int ravb_ptp_perout(struct ptp_clock_info *ptp, perout->period = 0; /* Mask interrupt */ - gic = ravb_read(ndev, GIC); - gic &= ~GIC_PTME; - ravb_write(ndev, gic, GIC); + ravb_modify(ndev, GIC, GIC_PTME, 0); } mmiowb(); spin_unlock_irqrestore(&priv->lock, flags); @@ -331,7 +320,6 @@ void ravb_ptp_init(struct net_device *ndev, struct platform_device *pdev) { struct ravb_private *priv = netdev_priv(ndev); unsigned long flags; - u32 gccr; priv->ptp.info = ravb_ptp_info; @@ -340,8 +328,7 @@ void ravb_ptp_init(struct net_device *ndev, struct platform_device *pdev) spin_lock_irqsave(&priv->lock, flags); ravb_wait(ndev, GCCR, GCCR_TCR, GCCR_TCR_NOREQ); - gccr = ravb_read(ndev, GCCR) & ~GCCR_TCSS; - ravb_write(ndev, gccr | GCCR_TCSS_ADJGPTP, GCCR); + ravb_modify(ndev, GCCR, GCCR_TCSS, GCCR_TCSS_ADJGPTP); mmiowb(); spin_unlock_irqrestore(&priv->lock, flags); diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index dfa9e59c9442..a2767336b7c5 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3,7 +3,7 @@ * Copyright (C) 2014 Renesas Electronics Corporation * Copyright (C) 2006-2012 Nobuhiro Iwamatsu * Copyright (C) 2008-2014 Renesas Solutions Corp. - * Copyright (C) 2013-2014 Cogent Embedded, Inc. + * Copyright (C) 2013-2016 Cogent Embedded, Inc. * Copyright (C) 2014 Codethink Limited * * This program is free software; you can redistribute it and/or modify it @@ -428,6 +428,13 @@ static u32 sh_eth_read(struct net_device *ndev, int enum_index) return ioread32(mdp->addr + offset); } +static void sh_eth_modify(struct net_device *ndev, int enum_index, u32 clear, + u32 set) +{ + sh_eth_write(ndev, (sh_eth_read(ndev, enum_index) & ~clear) | set, + enum_index); +} + static bool sh_eth_is_gether(struct sh_eth_private *mdp) { return mdp->reg_offset == sh_eth_offset_gigabit; @@ -467,10 +474,7 @@ static void sh_eth_set_duplex(struct net_device *ndev) { struct sh_eth_private *mdp = netdev_priv(ndev); - if (mdp->duplex) /* Full */ - sh_eth_write(ndev, sh_eth_read(ndev, ECMR) | ECMR_DM, ECMR); - else /* Half */ - sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & ~ECMR_DM, ECMR); + sh_eth_modify(ndev, ECMR, ECMR_DM, mdp->duplex ? ECMR_DM : 0); } static void sh_eth_chip_reset(struct net_device *ndev) @@ -496,8 +500,6 @@ static void sh_eth_set_rate_gether(struct net_device *ndev) case 1000: /* 1000BASE */ sh_eth_write(ndev, GECMR_1000, GECMR); break; - default: - break; } } @@ -583,12 +585,10 @@ static void sh_eth_set_rate_r8a777x(struct net_device *ndev) switch (mdp->speed) { case 10: /* 10BASE */ - sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & ~ECMR_ELB, ECMR); + sh_eth_modify(ndev, ECMR, ECMR_ELB, 0); break; case 100:/* 100BASE */ - sh_eth_write(ndev, sh_eth_read(ndev, ECMR) | ECMR_ELB, ECMR); - break; - default: + sh_eth_modify(ndev, ECMR, ECMR_ELB, ECMR_ELB); break; } } @@ -649,12 +649,10 @@ static void sh_eth_set_rate_sh7724(struct net_device *ndev) switch (mdp->speed) { case 10: /* 10BASE */ - sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & ~ECMR_RTM, ECMR); + sh_eth_modify(ndev, ECMR, ECMR_RTM, 0); break; case 100:/* 100BASE */ - sh_eth_write(ndev, sh_eth_read(ndev, ECMR) | ECMR_RTM, ECMR); - break; - default: + sh_eth_modify(ndev, ECMR, ECMR_RTM, ECMR_RTM); break; } } @@ -694,8 +692,6 @@ static void sh_eth_set_rate_sh7757(struct net_device *ndev) case 100:/* 100BASE */ sh_eth_write(ndev, 1, RTRATE); break; - default: - break; } } @@ -763,8 +759,6 @@ static void sh_eth_set_rate_giga(struct net_device *ndev) case 1000: /* 1000BASE */ sh_eth_write(ndev, 0x00000020, GECMR); break; - default: - break; } } @@ -924,8 +918,7 @@ static int sh_eth_reset(struct net_device *ndev) if (sh_eth_is_gether(mdp) || sh_eth_is_rz_fast_ether(mdp)) { sh_eth_write(ndev, EDSR_ENALL, EDSR); - sh_eth_write(ndev, sh_eth_read(ndev, EDMR) | EDMR_SRST_GETHER, - EDMR); + sh_eth_modify(ndev, EDMR, EDMR_SRST_GETHER, EDMR_SRST_GETHER); ret = sh_eth_check_reset(ndev); if (ret) @@ -949,11 +942,9 @@ static int sh_eth_reset(struct net_device *ndev) if (mdp->cd->select_mii) sh_eth_select_mii(ndev); } else { - sh_eth_write(ndev, sh_eth_read(ndev, EDMR) | EDMR_SRST_ETHER, - EDMR); + sh_eth_modify(ndev, EDMR, EDMR_SRST_ETHER, EDMR_SRST_ETHER); mdelay(3); - sh_eth_write(ndev, sh_eth_read(ndev, EDMR) & ~EDMR_SRST_ETHER, - EDMR); + sh_eth_modify(ndev, EDMR, EDMR_SRST_ETHER, 0); } return ret; @@ -1285,7 +1276,7 @@ static int sh_eth_dev_init(struct net_device *ndev, bool start) sh_eth_write(ndev, ndev->mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN, RFLR); - sh_eth_write(ndev, sh_eth_read(ndev, EESR), EESR); + sh_eth_modify(ndev, EESR, 0, 0); if (start) { mdp->irq_enabled = true; sh_eth_write(ndev, mdp->cd->eesipr_value, EESIPR); @@ -1532,15 +1523,13 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota) static void sh_eth_rcv_snd_disable(struct net_device *ndev) { /* disable tx and rx */ - sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & - ~(ECMR_RE | ECMR_TE), ECMR); + sh_eth_modify(ndev, ECMR, ECMR_RE | ECMR_TE, 0); } static void sh_eth_rcv_snd_enable(struct net_device *ndev) { /* enable tx and rx */ - sh_eth_write(ndev, sh_eth_read(ndev, ECMR) | - (ECMR_RE | ECMR_TE), ECMR); + sh_eth_modify(ndev, ECMR, ECMR_RE | ECMR_TE, ECMR_RE | ECMR_TE); } /* error control function */ @@ -1569,13 +1558,11 @@ static void sh_eth_error(struct net_device *ndev, u32 intr_status) sh_eth_rcv_snd_disable(ndev); } else { /* Link Up */ - sh_eth_write(ndev, sh_eth_read(ndev, EESIPR) & - ~DMAC_M_ECI, EESIPR); + sh_eth_modify(ndev, EESIPR, DMAC_M_ECI, 0); /* clear int */ - sh_eth_write(ndev, sh_eth_read(ndev, ECSR), - ECSR); - sh_eth_write(ndev, sh_eth_read(ndev, EESIPR) | - DMAC_M_ECI, EESIPR); + sh_eth_modify(ndev, ECSR, 0, 0); + sh_eth_modify(ndev, EESIPR, DMAC_M_ECI, + DMAC_M_ECI); /* enable tx and rx */ sh_eth_rcv_snd_enable(ndev); } @@ -1765,9 +1752,7 @@ static void sh_eth_adjust_link(struct net_device *ndev) mdp->cd->set_rate(ndev); } if (!mdp->link) { - sh_eth_write(ndev, - sh_eth_read(ndev, ECMR) & ~ECMR_TXF, - ECMR); + sh_eth_modify(ndev, ECMR, ECMR_TXF, 0); new_state = 1; mdp->link = phydev->link; if (mdp->cd->no_psr || mdp->no_ether_link) @@ -2922,8 +2907,6 @@ static const u16 *sh_eth_get_register_offset(int register_type) case SH_ETH_REG_FAST_SH3_SH2: reg_offset = sh_eth_offset_fast_sh3_sh2; break; - default: - break; } return reg_offset; diff --git a/drivers/net/ethernet/rocker/Makefile b/drivers/net/ethernet/rocker/Makefile index f85fb12f36f1..faa36acee223 100644 --- a/drivers/net/ethernet/rocker/Makefile +++ b/drivers/net/ethernet/rocker/Makefile @@ -3,3 +3,4 @@ # obj-$(CONFIG_ROCKER) += rocker.o +rocker-y := rocker_main.o rocker_tlv.o rocker_ofdpa.o diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c deleted file mode 100644 index 166a7fc87e2f..000000000000 --- a/drivers/net/ethernet/rocker/rocker.c +++ /dev/null @@ -1,5495 +0,0 @@ -/* - * drivers/net/ethernet/rocker/rocker.c - Rocker switch device driver - * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us> - * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/pci.h> -#include <linux/interrupt.h> -#include <linux/sched.h> -#include <linux/wait.h> -#include <linux/spinlock.h> -#include <linux/hashtable.h> -#include <linux/crc32.h> -#include <linux/sort.h> -#include <linux/random.h> -#include <linux/netdevice.h> -#include <linux/inetdevice.h> -#include <linux/skbuff.h> -#include <linux/socket.h> -#include <linux/etherdevice.h> -#include <linux/ethtool.h> -#include <linux/if_ether.h> -#include <linux/if_vlan.h> -#include <linux/if_bridge.h> -#include <linux/bitops.h> -#include <linux/ctype.h> -#include <net/switchdev.h> -#include <net/rtnetlink.h> -#include <net/ip_fib.h> -#include <net/netevent.h> -#include <net/arp.h> -#include <linux/io-64-nonatomic-lo-hi.h> -#include <generated/utsrelease.h> - -#include "rocker.h" - -static const char rocker_driver_name[] = "rocker"; - -static const struct pci_device_id rocker_pci_id_table[] = { - {PCI_VDEVICE(REDHAT, PCI_DEVICE_ID_REDHAT_ROCKER), 0}, - {0, } -}; - -struct rocker_flow_tbl_key { - u32 priority; - enum rocker_of_dpa_table_id tbl_id; - union { - struct { - u32 in_pport; - u32 in_pport_mask; - enum rocker_of_dpa_table_id goto_tbl; - } ig_port; - struct { - u32 in_pport; - __be16 vlan_id; - __be16 vlan_id_mask; - enum rocker_of_dpa_table_id goto_tbl; - bool untagged; - __be16 new_vlan_id; - } vlan; - struct { - u32 in_pport; - u32 in_pport_mask; - __be16 eth_type; - u8 eth_dst[ETH_ALEN]; - u8 eth_dst_mask[ETH_ALEN]; - __be16 vlan_id; - __be16 vlan_id_mask; - enum rocker_of_dpa_table_id goto_tbl; - bool copy_to_cpu; - } term_mac; - struct { - __be16 eth_type; - __be32 dst4; - __be32 dst4_mask; - enum rocker_of_dpa_table_id goto_tbl; - u32 group_id; - } ucast_routing; - struct { - u8 eth_dst[ETH_ALEN]; - u8 eth_dst_mask[ETH_ALEN]; - int has_eth_dst; - int has_eth_dst_mask; - __be16 vlan_id; - u32 tunnel_id; - enum rocker_of_dpa_table_id goto_tbl; - u32 group_id; - bool copy_to_cpu; - } bridge; - struct { - u32 in_pport; - u32 in_pport_mask; - u8 eth_src[ETH_ALEN]; - u8 eth_src_mask[ETH_ALEN]; - u8 eth_dst[ETH_ALEN]; - u8 eth_dst_mask[ETH_ALEN]; - __be16 eth_type; - __be16 vlan_id; - __be16 vlan_id_mask; - u8 ip_proto; - u8 ip_proto_mask; - u8 ip_tos; - u8 ip_tos_mask; - u32 group_id; - } acl; - }; -}; - -struct rocker_flow_tbl_entry { - struct hlist_node entry; - u32 cmd; - u64 cookie; - struct rocker_flow_tbl_key key; - size_t key_len; - u32 key_crc32; /* key */ -}; - -struct rocker_group_tbl_entry { - struct hlist_node entry; - u32 cmd; - u32 group_id; /* key */ - u16 group_count; - u32 *group_ids; - union { - struct { - u8 pop_vlan; - } l2_interface; - struct { - u8 eth_src[ETH_ALEN]; - u8 eth_dst[ETH_ALEN]; - __be16 vlan_id; - u32 group_id; - } l2_rewrite; - struct { - u8 eth_src[ETH_ALEN]; - u8 eth_dst[ETH_ALEN]; - __be16 vlan_id; - bool ttl_check; - u32 group_id; - } l3_unicast; - }; -}; - -struct rocker_fdb_tbl_entry { - struct hlist_node entry; - u32 key_crc32; /* key */ - bool learned; - unsigned long touched; - struct rocker_fdb_tbl_key { - struct rocker_port *rocker_port; - u8 addr[ETH_ALEN]; - __be16 vlan_id; - } key; -}; - -struct rocker_internal_vlan_tbl_entry { - struct hlist_node entry; - int ifindex; /* key */ - u32 ref_count; - __be16 vlan_id; -}; - -struct rocker_neigh_tbl_entry { - struct hlist_node entry; - __be32 ip_addr; /* key */ - struct net_device *dev; - u32 ref_count; - u32 index; - u8 eth_dst[ETH_ALEN]; - bool ttl_check; -}; - -struct rocker_desc_info { - char *data; /* mapped */ - size_t data_size; - size_t tlv_size; - struct rocker_desc *desc; - dma_addr_t mapaddr; -}; - -struct rocker_dma_ring_info { - size_t size; - u32 head; - u32 tail; - struct rocker_desc *desc; /* mapped */ - dma_addr_t mapaddr; - struct rocker_desc_info *desc_info; - unsigned int type; -}; - -struct rocker; - -enum { - ROCKER_CTRL_LINK_LOCAL_MCAST, - ROCKER_CTRL_LOCAL_ARP, - ROCKER_CTRL_IPV4_MCAST, - ROCKER_CTRL_IPV6_MCAST, - ROCKER_CTRL_DFLT_BRIDGING, - ROCKER_CTRL_DFLT_OVS, - ROCKER_CTRL_MAX, -}; - -#define ROCKER_INTERNAL_VLAN_ID_BASE 0x0f00 -#define ROCKER_N_INTERNAL_VLANS 255 -#define ROCKER_VLAN_BITMAP_LEN BITS_TO_LONGS(VLAN_N_VID) -#define ROCKER_INTERNAL_VLAN_BITMAP_LEN BITS_TO_LONGS(ROCKER_N_INTERNAL_VLANS) - -struct rocker_port { - struct net_device *dev; - struct net_device *bridge_dev; - struct rocker *rocker; - unsigned int port_number; - u32 pport; - __be16 internal_vlan_id; - int stp_state; - u32 brport_flags; - unsigned long ageing_time; - bool ctrls[ROCKER_CTRL_MAX]; - unsigned long vlan_bitmap[ROCKER_VLAN_BITMAP_LEN]; - struct napi_struct napi_tx; - struct napi_struct napi_rx; - struct rocker_dma_ring_info tx_ring; - struct rocker_dma_ring_info rx_ring; -}; - -struct rocker { - struct pci_dev *pdev; - u8 __iomem *hw_addr; - struct msix_entry *msix_entries; - unsigned int port_count; - struct rocker_port **ports; - struct { - u64 id; - } hw; - spinlock_t cmd_ring_lock; /* for cmd ring accesses */ - struct rocker_dma_ring_info cmd_ring; - struct rocker_dma_ring_info event_ring; - DECLARE_HASHTABLE(flow_tbl, 16); - spinlock_t flow_tbl_lock; /* for flow tbl accesses */ - u64 flow_tbl_next_cookie; - DECLARE_HASHTABLE(group_tbl, 16); - spinlock_t group_tbl_lock; /* for group tbl accesses */ - struct timer_list fdb_cleanup_timer; - DECLARE_HASHTABLE(fdb_tbl, 16); - spinlock_t fdb_tbl_lock; /* for fdb tbl accesses */ - unsigned long internal_vlan_bitmap[ROCKER_INTERNAL_VLAN_BITMAP_LEN]; - DECLARE_HASHTABLE(internal_vlan_tbl, 8); - spinlock_t internal_vlan_tbl_lock; /* for vlan tbl accesses */ - DECLARE_HASHTABLE(neigh_tbl, 16); - spinlock_t neigh_tbl_lock; /* for neigh tbl accesses */ - u32 neigh_tbl_next_index; -}; - -static const u8 zero_mac[ETH_ALEN] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; -static const u8 ff_mac[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; -static const u8 ll_mac[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; -static const u8 ll_mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0 }; -static const u8 mcast_mac[ETH_ALEN] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00 }; -static const u8 ipv4_mcast[ETH_ALEN] = { 0x01, 0x00, 0x5e, 0x00, 0x00, 0x00 }; -static const u8 ipv4_mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0x80, 0x00, 0x00 }; -static const u8 ipv6_mcast[ETH_ALEN] = { 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 }; -static const u8 ipv6_mask[ETH_ALEN] = { 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 }; - -/* Rocker priority levels for flow table entries. Higher - * priority match takes precedence over lower priority match. - */ - -enum { - ROCKER_PRIORITY_UNKNOWN = 0, - ROCKER_PRIORITY_IG_PORT = 1, - ROCKER_PRIORITY_VLAN = 1, - ROCKER_PRIORITY_TERM_MAC_UCAST = 0, - ROCKER_PRIORITY_TERM_MAC_MCAST = 1, - ROCKER_PRIORITY_BRIDGING_VLAN_DFLT_EXACT = 1, - ROCKER_PRIORITY_BRIDGING_VLAN_DFLT_WILD = 2, - ROCKER_PRIORITY_BRIDGING_VLAN = 3, - ROCKER_PRIORITY_BRIDGING_TENANT_DFLT_EXACT = 1, - ROCKER_PRIORITY_BRIDGING_TENANT_DFLT_WILD = 2, - ROCKER_PRIORITY_BRIDGING_TENANT = 3, - ROCKER_PRIORITY_ACL_CTRL = 3, - ROCKER_PRIORITY_ACL_NORMAL = 2, - ROCKER_PRIORITY_ACL_DFLT = 1, -}; - -static bool rocker_vlan_id_is_internal(__be16 vlan_id) -{ - u16 start = ROCKER_INTERNAL_VLAN_ID_BASE; - u16 end = 0xffe; - u16 _vlan_id = ntohs(vlan_id); - - return (_vlan_id >= start && _vlan_id <= end); -} - -static __be16 rocker_port_vid_to_vlan(const struct rocker_port *rocker_port, - u16 vid, bool *pop_vlan) -{ - __be16 vlan_id; - - if (pop_vlan) - *pop_vlan = false; - vlan_id = htons(vid); - if (!vlan_id) { - vlan_id = rocker_port->internal_vlan_id; - if (pop_vlan) - *pop_vlan = true; - } - - return vlan_id; -} - -static u16 rocker_port_vlan_to_vid(const struct rocker_port *rocker_port, - __be16 vlan_id) -{ - if (rocker_vlan_id_is_internal(vlan_id)) - return 0; - - return ntohs(vlan_id); -} - -static bool rocker_port_is_bridged(const struct rocker_port *rocker_port) -{ - return rocker_port->bridge_dev && - netif_is_bridge_master(rocker_port->bridge_dev); -} - -static bool rocker_port_is_ovsed(const struct rocker_port *rocker_port) -{ - return rocker_port->bridge_dev && - netif_is_ovs_master(rocker_port->bridge_dev); -} - -#define ROCKER_OP_FLAG_REMOVE BIT(0) -#define ROCKER_OP_FLAG_NOWAIT BIT(1) -#define ROCKER_OP_FLAG_LEARNED BIT(2) -#define ROCKER_OP_FLAG_REFRESH BIT(3) - -static void *__rocker_port_mem_alloc(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - size_t size) -{ - struct switchdev_trans_item *elem = NULL; - gfp_t gfp_flags = (flags & ROCKER_OP_FLAG_NOWAIT) ? - GFP_ATOMIC : GFP_KERNEL; - - /* If in transaction prepare phase, allocate the memory - * and enqueue it on a transaction. If in transaction - * commit phase, dequeue the memory from the transaction - * rather than re-allocating the memory. The idea is the - * driver code paths for prepare and commit are identical - * so the memory allocated in the prepare phase is the - * memory used in the commit phase. - */ - - if (!trans) { - elem = kzalloc(size + sizeof(*elem), gfp_flags); - } else if (switchdev_trans_ph_prepare(trans)) { - elem = kzalloc(size + sizeof(*elem), gfp_flags); - if (!elem) - return NULL; - switchdev_trans_item_enqueue(trans, elem, kfree, elem); - } else { - elem = switchdev_trans_item_dequeue(trans); - } - - return elem ? elem + 1 : NULL; -} - -static void *rocker_port_kzalloc(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - size_t size) -{ - return __rocker_port_mem_alloc(rocker_port, trans, flags, size); -} - -static void *rocker_port_kcalloc(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - size_t n, size_t size) -{ - return __rocker_port_mem_alloc(rocker_port, trans, flags, n * size); -} - -static void rocker_port_kfree(struct switchdev_trans *trans, const void *mem) -{ - struct switchdev_trans_item *elem; - - /* Frees are ignored if in transaction prepare phase. The - * memory remains on the per-port list until freed in the - * commit phase. - */ - - if (switchdev_trans_ph_prepare(trans)) - return; - - elem = (struct switchdev_trans_item *) mem - 1; - kfree(elem); -} - -struct rocker_wait { - wait_queue_head_t wait; - bool done; - bool nowait; -}; - -static void rocker_wait_reset(struct rocker_wait *wait) -{ - wait->done = false; - wait->nowait = false; -} - -static void rocker_wait_init(struct rocker_wait *wait) -{ - init_waitqueue_head(&wait->wait); - rocker_wait_reset(wait); -} - -static struct rocker_wait *rocker_wait_create(struct rocker_port *rocker_port, - struct switchdev_trans *trans, - int flags) -{ - struct rocker_wait *wait; - - wait = rocker_port_kzalloc(rocker_port, trans, flags, sizeof(*wait)); - if (!wait) - return NULL; - rocker_wait_init(wait); - return wait; -} - -static void rocker_wait_destroy(struct switchdev_trans *trans, - struct rocker_wait *wait) -{ - rocker_port_kfree(trans, wait); -} - -static bool rocker_wait_event_timeout(struct rocker_wait *wait, - unsigned long timeout) -{ - wait_event_timeout(wait->wait, wait->done, HZ / 10); - if (!wait->done) - return false; - return true; -} - -static void rocker_wait_wake_up(struct rocker_wait *wait) -{ - wait->done = true; - wake_up(&wait->wait); -} - -static u32 rocker_msix_vector(const struct rocker *rocker, unsigned int vector) -{ - return rocker->msix_entries[vector].vector; -} - -static u32 rocker_msix_tx_vector(const struct rocker_port *rocker_port) -{ - return rocker_msix_vector(rocker_port->rocker, - ROCKER_MSIX_VEC_TX(rocker_port->port_number)); -} - -static u32 rocker_msix_rx_vector(const struct rocker_port *rocker_port) -{ - return rocker_msix_vector(rocker_port->rocker, - ROCKER_MSIX_VEC_RX(rocker_port->port_number)); -} - -#define rocker_write32(rocker, reg, val) \ - writel((val), (rocker)->hw_addr + (ROCKER_ ## reg)) -#define rocker_read32(rocker, reg) \ - readl((rocker)->hw_addr + (ROCKER_ ## reg)) -#define rocker_write64(rocker, reg, val) \ - writeq((val), (rocker)->hw_addr + (ROCKER_ ## reg)) -#define rocker_read64(rocker, reg) \ - readq((rocker)->hw_addr + (ROCKER_ ## reg)) - -/***************************** - * HW basic testing functions - *****************************/ - -static int rocker_reg_test(const struct rocker *rocker) -{ - const struct pci_dev *pdev = rocker->pdev; - u64 test_reg; - u64 rnd; - - rnd = prandom_u32(); - rnd >>= 1; - rocker_write32(rocker, TEST_REG, rnd); - test_reg = rocker_read32(rocker, TEST_REG); - if (test_reg != rnd * 2) { - dev_err(&pdev->dev, "unexpected 32bit register value %08llx, expected %08llx\n", - test_reg, rnd * 2); - return -EIO; - } - - rnd = prandom_u32(); - rnd <<= 31; - rnd |= prandom_u32(); - rocker_write64(rocker, TEST_REG64, rnd); - test_reg = rocker_read64(rocker, TEST_REG64); - if (test_reg != rnd * 2) { - dev_err(&pdev->dev, "unexpected 64bit register value %16llx, expected %16llx\n", - test_reg, rnd * 2); - return -EIO; - } - - return 0; -} - -static int rocker_dma_test_one(const struct rocker *rocker, - struct rocker_wait *wait, u32 test_type, - dma_addr_t dma_handle, const unsigned char *buf, - const unsigned char *expect, size_t size) -{ - const struct pci_dev *pdev = rocker->pdev; - int i; - - rocker_wait_reset(wait); - rocker_write32(rocker, TEST_DMA_CTRL, test_type); - - if (!rocker_wait_event_timeout(wait, HZ / 10)) { - dev_err(&pdev->dev, "no interrupt received within a timeout\n"); - return -EIO; - } - - for (i = 0; i < size; i++) { - if (buf[i] != expect[i]) { - dev_err(&pdev->dev, "unexpected memory content %02x at byte %x\n, %02x expected", - buf[i], i, expect[i]); - return -EIO; - } - } - return 0; -} - -#define ROCKER_TEST_DMA_BUF_SIZE (PAGE_SIZE * 4) -#define ROCKER_TEST_DMA_FILL_PATTERN 0x96 - -static int rocker_dma_test_offset(const struct rocker *rocker, - struct rocker_wait *wait, int offset) -{ - struct pci_dev *pdev = rocker->pdev; - unsigned char *alloc; - unsigned char *buf; - unsigned char *expect; - dma_addr_t dma_handle; - int i; - int err; - - alloc = kzalloc(ROCKER_TEST_DMA_BUF_SIZE * 2 + offset, - GFP_KERNEL | GFP_DMA); - if (!alloc) - return -ENOMEM; - buf = alloc + offset; - expect = buf + ROCKER_TEST_DMA_BUF_SIZE; - - dma_handle = pci_map_single(pdev, buf, ROCKER_TEST_DMA_BUF_SIZE, - PCI_DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(pdev, dma_handle)) { - err = -EIO; - goto free_alloc; - } - - rocker_write64(rocker, TEST_DMA_ADDR, dma_handle); - rocker_write32(rocker, TEST_DMA_SIZE, ROCKER_TEST_DMA_BUF_SIZE); - - memset(expect, ROCKER_TEST_DMA_FILL_PATTERN, ROCKER_TEST_DMA_BUF_SIZE); - err = rocker_dma_test_one(rocker, wait, ROCKER_TEST_DMA_CTRL_FILL, - dma_handle, buf, expect, - ROCKER_TEST_DMA_BUF_SIZE); - if (err) - goto unmap; - - memset(expect, 0, ROCKER_TEST_DMA_BUF_SIZE); - err = rocker_dma_test_one(rocker, wait, ROCKER_TEST_DMA_CTRL_CLEAR, - dma_handle, buf, expect, - ROCKER_TEST_DMA_BUF_SIZE); - if (err) - goto unmap; - - prandom_bytes(buf, ROCKER_TEST_DMA_BUF_SIZE); - for (i = 0; i < ROCKER_TEST_DMA_BUF_SIZE; i++) - expect[i] = ~buf[i]; - err = rocker_dma_test_one(rocker, wait, ROCKER_TEST_DMA_CTRL_INVERT, - dma_handle, buf, expect, - ROCKER_TEST_DMA_BUF_SIZE); - if (err) - goto unmap; - -unmap: - pci_unmap_single(pdev, dma_handle, ROCKER_TEST_DMA_BUF_SIZE, - PCI_DMA_BIDIRECTIONAL); -free_alloc: - kfree(alloc); - - return err; -} - -static int rocker_dma_test(const struct rocker *rocker, - struct rocker_wait *wait) -{ - int i; - int err; - - for (i = 0; i < 8; i++) { - err = rocker_dma_test_offset(rocker, wait, i); - if (err) - return err; - } - return 0; -} - -static irqreturn_t rocker_test_irq_handler(int irq, void *dev_id) -{ - struct rocker_wait *wait = dev_id; - - rocker_wait_wake_up(wait); - - return IRQ_HANDLED; -} - -static int rocker_basic_hw_test(const struct rocker *rocker) -{ - const struct pci_dev *pdev = rocker->pdev; - struct rocker_wait wait; - int err; - - err = rocker_reg_test(rocker); - if (err) { - dev_err(&pdev->dev, "reg test failed\n"); - return err; - } - - err = request_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_TEST), - rocker_test_irq_handler, 0, - rocker_driver_name, &wait); - if (err) { - dev_err(&pdev->dev, "cannot assign test irq\n"); - return err; - } - - rocker_wait_init(&wait); - rocker_write32(rocker, TEST_IRQ, ROCKER_MSIX_VEC_TEST); - - if (!rocker_wait_event_timeout(&wait, HZ / 10)) { - dev_err(&pdev->dev, "no interrupt received within a timeout\n"); - err = -EIO; - goto free_irq; - } - - err = rocker_dma_test(rocker, &wait); - if (err) - dev_err(&pdev->dev, "dma test failed\n"); - -free_irq: - free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_TEST), &wait); - return err; -} - -/****** - * TLV - ******/ - -#define ROCKER_TLV_ALIGNTO 8U -#define ROCKER_TLV_ALIGN(len) \ - (((len) + ROCKER_TLV_ALIGNTO - 1) & ~(ROCKER_TLV_ALIGNTO - 1)) -#define ROCKER_TLV_HDRLEN ROCKER_TLV_ALIGN(sizeof(struct rocker_tlv)) - -/* <------- ROCKER_TLV_HDRLEN -------> <--- ROCKER_TLV_ALIGN(payload) ---> - * +-----------------------------+- - -+- - - - - - - - - - - - - - -+- - -+ - * | Header | Pad | Payload | Pad | - * | (struct rocker_tlv) | ing | | ing | - * +-----------------------------+- - -+- - - - - - - - - - - - - - -+- - -+ - * <--------------------------- tlv->len --------------------------> - */ - -static struct rocker_tlv *rocker_tlv_next(const struct rocker_tlv *tlv, - int *remaining) -{ - int totlen = ROCKER_TLV_ALIGN(tlv->len); - - *remaining -= totlen; - return (struct rocker_tlv *) ((char *) tlv + totlen); -} - -static int rocker_tlv_ok(const struct rocker_tlv *tlv, int remaining) -{ - return remaining >= (int) ROCKER_TLV_HDRLEN && - tlv->len >= ROCKER_TLV_HDRLEN && - tlv->len <= remaining; -} - -#define rocker_tlv_for_each(pos, head, len, rem) \ - for (pos = head, rem = len; \ - rocker_tlv_ok(pos, rem); \ - pos = rocker_tlv_next(pos, &(rem))) - -#define rocker_tlv_for_each_nested(pos, tlv, rem) \ - rocker_tlv_for_each(pos, rocker_tlv_data(tlv), \ - rocker_tlv_len(tlv), rem) - -static int rocker_tlv_attr_size(int payload) -{ - return ROCKER_TLV_HDRLEN + payload; -} - -static int rocker_tlv_total_size(int payload) -{ - return ROCKER_TLV_ALIGN(rocker_tlv_attr_size(payload)); -} - -static int rocker_tlv_padlen(int payload) -{ - return rocker_tlv_total_size(payload) - rocker_tlv_attr_size(payload); -} - -static int rocker_tlv_type(const struct rocker_tlv *tlv) -{ - return tlv->type; -} - -static void *rocker_tlv_data(const struct rocker_tlv *tlv) -{ - return (char *) tlv + ROCKER_TLV_HDRLEN; -} - -static int rocker_tlv_len(const struct rocker_tlv *tlv) -{ - return tlv->len - ROCKER_TLV_HDRLEN; -} - -static u8 rocker_tlv_get_u8(const struct rocker_tlv *tlv) -{ - return *(u8 *) rocker_tlv_data(tlv); -} - -static u16 rocker_tlv_get_u16(const struct rocker_tlv *tlv) -{ - return *(u16 *) rocker_tlv_data(tlv); -} - -static __be16 rocker_tlv_get_be16(const struct rocker_tlv *tlv) -{ - return *(__be16 *) rocker_tlv_data(tlv); -} - -static u32 rocker_tlv_get_u32(const struct rocker_tlv *tlv) -{ - return *(u32 *) rocker_tlv_data(tlv); -} - -static u64 rocker_tlv_get_u64(const struct rocker_tlv *tlv) -{ - return *(u64 *) rocker_tlv_data(tlv); -} - -static void rocker_tlv_parse(const struct rocker_tlv **tb, int maxtype, - const char *buf, int buf_len) -{ - const struct rocker_tlv *tlv; - const struct rocker_tlv *head = (const struct rocker_tlv *) buf; - int rem; - - memset(tb, 0, sizeof(struct rocker_tlv *) * (maxtype + 1)); - - rocker_tlv_for_each(tlv, head, buf_len, rem) { - u32 type = rocker_tlv_type(tlv); - - if (type > 0 && type <= maxtype) - tb[type] = tlv; - } -} - -static void rocker_tlv_parse_nested(const struct rocker_tlv **tb, int maxtype, - const struct rocker_tlv *tlv) -{ - rocker_tlv_parse(tb, maxtype, rocker_tlv_data(tlv), - rocker_tlv_len(tlv)); -} - -static void rocker_tlv_parse_desc(const struct rocker_tlv **tb, int maxtype, - const struct rocker_desc_info *desc_info) -{ - rocker_tlv_parse(tb, maxtype, desc_info->data, - desc_info->desc->tlv_size); -} - -static struct rocker_tlv *rocker_tlv_start(struct rocker_desc_info *desc_info) -{ - return (struct rocker_tlv *) ((char *) desc_info->data + - desc_info->tlv_size); -} - -static int rocker_tlv_put(struct rocker_desc_info *desc_info, - int attrtype, int attrlen, const void *data) -{ - int tail_room = desc_info->data_size - desc_info->tlv_size; - int total_size = rocker_tlv_total_size(attrlen); - struct rocker_tlv *tlv; - - if (unlikely(tail_room < total_size)) - return -EMSGSIZE; - - tlv = rocker_tlv_start(desc_info); - desc_info->tlv_size += total_size; - tlv->type = attrtype; - tlv->len = rocker_tlv_attr_size(attrlen); - memcpy(rocker_tlv_data(tlv), data, attrlen); - memset((char *) tlv + tlv->len, 0, rocker_tlv_padlen(attrlen)); - return 0; -} - -static int rocker_tlv_put_u8(struct rocker_desc_info *desc_info, - int attrtype, u8 value) -{ - return rocker_tlv_put(desc_info, attrtype, sizeof(u8), &value); -} - -static int rocker_tlv_put_u16(struct rocker_desc_info *desc_info, - int attrtype, u16 value) -{ - return rocker_tlv_put(desc_info, attrtype, sizeof(u16), &value); -} - -static int rocker_tlv_put_be16(struct rocker_desc_info *desc_info, - int attrtype, __be16 value) -{ - return rocker_tlv_put(desc_info, attrtype, sizeof(__be16), &value); -} - -static int rocker_tlv_put_u32(struct rocker_desc_info *desc_info, - int attrtype, u32 value) -{ - return rocker_tlv_put(desc_info, attrtype, sizeof(u32), &value); -} - -static int rocker_tlv_put_be32(struct rocker_desc_info *desc_info, - int attrtype, __be32 value) -{ - return rocker_tlv_put(desc_info, attrtype, sizeof(__be32), &value); -} - -static int rocker_tlv_put_u64(struct rocker_desc_info *desc_info, - int attrtype, u64 value) -{ - return rocker_tlv_put(desc_info, attrtype, sizeof(u64), &value); -} - -static struct rocker_tlv * -rocker_tlv_nest_start(struct rocker_desc_info *desc_info, int attrtype) -{ - struct rocker_tlv *start = rocker_tlv_start(desc_info); - - if (rocker_tlv_put(desc_info, attrtype, 0, NULL) < 0) - return NULL; - - return start; -} - -static void rocker_tlv_nest_end(struct rocker_desc_info *desc_info, - struct rocker_tlv *start) -{ - start->len = (char *) rocker_tlv_start(desc_info) - (char *) start; -} - -static void rocker_tlv_nest_cancel(struct rocker_desc_info *desc_info, - const struct rocker_tlv *start) -{ - desc_info->tlv_size = (const char *) start - desc_info->data; -} - -/****************************************** - * DMA rings and descriptors manipulations - ******************************************/ - -static u32 __pos_inc(u32 pos, size_t limit) -{ - return ++pos == limit ? 0 : pos; -} - -static int rocker_desc_err(const struct rocker_desc_info *desc_info) -{ - int err = desc_info->desc->comp_err & ~ROCKER_DMA_DESC_COMP_ERR_GEN; - - switch (err) { - case ROCKER_OK: - return 0; - case -ROCKER_ENOENT: - return -ENOENT; - case -ROCKER_ENXIO: - return -ENXIO; - case -ROCKER_ENOMEM: - return -ENOMEM; - case -ROCKER_EEXIST: - return -EEXIST; - case -ROCKER_EINVAL: - return -EINVAL; - case -ROCKER_EMSGSIZE: - return -EMSGSIZE; - case -ROCKER_ENOTSUP: - return -EOPNOTSUPP; - case -ROCKER_ENOBUFS: - return -ENOBUFS; - } - - return -EINVAL; -} - -static void rocker_desc_gen_clear(const struct rocker_desc_info *desc_info) -{ - desc_info->desc->comp_err &= ~ROCKER_DMA_DESC_COMP_ERR_GEN; -} - -static bool rocker_desc_gen(const struct rocker_desc_info *desc_info) -{ - u32 comp_err = desc_info->desc->comp_err; - - return comp_err & ROCKER_DMA_DESC_COMP_ERR_GEN ? true : false; -} - -static void *rocker_desc_cookie_ptr_get(const struct rocker_desc_info *desc_info) -{ - return (void *)(uintptr_t)desc_info->desc->cookie; -} - -static void rocker_desc_cookie_ptr_set(const struct rocker_desc_info *desc_info, - void *ptr) -{ - desc_info->desc->cookie = (uintptr_t) ptr; -} - -static struct rocker_desc_info * -rocker_desc_head_get(const struct rocker_dma_ring_info *info) -{ - static struct rocker_desc_info *desc_info; - u32 head = __pos_inc(info->head, info->size); - - desc_info = &info->desc_info[info->head]; - if (head == info->tail) - return NULL; /* ring full */ - desc_info->tlv_size = 0; - return desc_info; -} - -static void rocker_desc_commit(const struct rocker_desc_info *desc_info) -{ - desc_info->desc->buf_size = desc_info->data_size; - desc_info->desc->tlv_size = desc_info->tlv_size; -} - -static void rocker_desc_head_set(const struct rocker *rocker, - struct rocker_dma_ring_info *info, - const struct rocker_desc_info *desc_info) -{ - u32 head = __pos_inc(info->head, info->size); - - BUG_ON(head == info->tail); - rocker_desc_commit(desc_info); - info->head = head; - rocker_write32(rocker, DMA_DESC_HEAD(info->type), head); -} - -static struct rocker_desc_info * -rocker_desc_tail_get(struct rocker_dma_ring_info *info) -{ - static struct rocker_desc_info *desc_info; - - if (info->tail == info->head) - return NULL; /* nothing to be done between head and tail */ - desc_info = &info->desc_info[info->tail]; - if (!rocker_desc_gen(desc_info)) - return NULL; /* gen bit not set, desc is not ready yet */ - info->tail = __pos_inc(info->tail, info->size); - desc_info->tlv_size = desc_info->desc->tlv_size; - return desc_info; -} - -static void rocker_dma_ring_credits_set(const struct rocker *rocker, - const struct rocker_dma_ring_info *info, - u32 credits) -{ - if (credits) - rocker_write32(rocker, DMA_DESC_CREDITS(info->type), credits); -} - -static unsigned long rocker_dma_ring_size_fix(size_t size) -{ - return max(ROCKER_DMA_SIZE_MIN, - min(roundup_pow_of_two(size), ROCKER_DMA_SIZE_MAX)); -} - -static int rocker_dma_ring_create(const struct rocker *rocker, - unsigned int type, - size_t size, - struct rocker_dma_ring_info *info) -{ - int i; - - BUG_ON(size != rocker_dma_ring_size_fix(size)); - info->size = size; - info->type = type; - info->head = 0; - info->tail = 0; - info->desc_info = kcalloc(info->size, sizeof(*info->desc_info), - GFP_KERNEL); - if (!info->desc_info) - return -ENOMEM; - - info->desc = pci_alloc_consistent(rocker->pdev, - info->size * sizeof(*info->desc), - &info->mapaddr); - if (!info->desc) { - kfree(info->desc_info); - return -ENOMEM; - } - - for (i = 0; i < info->size; i++) - info->desc_info[i].desc = &info->desc[i]; - - rocker_write32(rocker, DMA_DESC_CTRL(info->type), - ROCKER_DMA_DESC_CTRL_RESET); - rocker_write64(rocker, DMA_DESC_ADDR(info->type), info->mapaddr); - rocker_write32(rocker, DMA_DESC_SIZE(info->type), info->size); - - return 0; -} - -static void rocker_dma_ring_destroy(const struct rocker *rocker, - const struct rocker_dma_ring_info *info) -{ - rocker_write64(rocker, DMA_DESC_ADDR(info->type), 0); - - pci_free_consistent(rocker->pdev, - info->size * sizeof(struct rocker_desc), - info->desc, info->mapaddr); - kfree(info->desc_info); -} - -static void rocker_dma_ring_pass_to_producer(const struct rocker *rocker, - struct rocker_dma_ring_info *info) -{ - int i; - - BUG_ON(info->head || info->tail); - - /* When ring is consumer, we need to advance head for each desc. - * That tells hw that the desc is ready to be used by it. - */ - for (i = 0; i < info->size - 1; i++) - rocker_desc_head_set(rocker, info, &info->desc_info[i]); - rocker_desc_commit(&info->desc_info[i]); -} - -static int rocker_dma_ring_bufs_alloc(const struct rocker *rocker, - const struct rocker_dma_ring_info *info, - int direction, size_t buf_size) -{ - struct pci_dev *pdev = rocker->pdev; - int i; - int err; - - for (i = 0; i < info->size; i++) { - struct rocker_desc_info *desc_info = &info->desc_info[i]; - struct rocker_desc *desc = &info->desc[i]; - dma_addr_t dma_handle; - char *buf; - - buf = kzalloc(buf_size, GFP_KERNEL | GFP_DMA); - if (!buf) { - err = -ENOMEM; - goto rollback; - } - - dma_handle = pci_map_single(pdev, buf, buf_size, direction); - if (pci_dma_mapping_error(pdev, dma_handle)) { - kfree(buf); - err = -EIO; - goto rollback; - } - - desc_info->data = buf; - desc_info->data_size = buf_size; - dma_unmap_addr_set(desc_info, mapaddr, dma_handle); - - desc->buf_addr = dma_handle; - desc->buf_size = buf_size; - } - return 0; - -rollback: - for (i--; i >= 0; i--) { - const struct rocker_desc_info *desc_info = &info->desc_info[i]; - - pci_unmap_single(pdev, dma_unmap_addr(desc_info, mapaddr), - desc_info->data_size, direction); - kfree(desc_info->data); - } - return err; -} - -static void rocker_dma_ring_bufs_free(const struct rocker *rocker, - const struct rocker_dma_ring_info *info, - int direction) -{ - struct pci_dev *pdev = rocker->pdev; - int i; - - for (i = 0; i < info->size; i++) { - const struct rocker_desc_info *desc_info = &info->desc_info[i]; - struct rocker_desc *desc = &info->desc[i]; - - desc->buf_addr = 0; - desc->buf_size = 0; - pci_unmap_single(pdev, dma_unmap_addr(desc_info, mapaddr), - desc_info->data_size, direction); - kfree(desc_info->data); - } -} - -static int rocker_dma_rings_init(struct rocker *rocker) -{ - const struct pci_dev *pdev = rocker->pdev; - int err; - - err = rocker_dma_ring_create(rocker, ROCKER_DMA_CMD, - ROCKER_DMA_CMD_DEFAULT_SIZE, - &rocker->cmd_ring); - if (err) { - dev_err(&pdev->dev, "failed to create command dma ring\n"); - return err; - } - - spin_lock_init(&rocker->cmd_ring_lock); - - err = rocker_dma_ring_bufs_alloc(rocker, &rocker->cmd_ring, - PCI_DMA_BIDIRECTIONAL, PAGE_SIZE); - if (err) { - dev_err(&pdev->dev, "failed to alloc command dma ring buffers\n"); - goto err_dma_cmd_ring_bufs_alloc; - } - - err = rocker_dma_ring_create(rocker, ROCKER_DMA_EVENT, - ROCKER_DMA_EVENT_DEFAULT_SIZE, - &rocker->event_ring); - if (err) { - dev_err(&pdev->dev, "failed to create event dma ring\n"); - goto err_dma_event_ring_create; - } - - err = rocker_dma_ring_bufs_alloc(rocker, &rocker->event_ring, - PCI_DMA_FROMDEVICE, PAGE_SIZE); - if (err) { - dev_err(&pdev->dev, "failed to alloc event dma ring buffers\n"); - goto err_dma_event_ring_bufs_alloc; - } - rocker_dma_ring_pass_to_producer(rocker, &rocker->event_ring); - return 0; - -err_dma_event_ring_bufs_alloc: - rocker_dma_ring_destroy(rocker, &rocker->event_ring); -err_dma_event_ring_create: - rocker_dma_ring_bufs_free(rocker, &rocker->cmd_ring, - PCI_DMA_BIDIRECTIONAL); -err_dma_cmd_ring_bufs_alloc: - rocker_dma_ring_destroy(rocker, &rocker->cmd_ring); - return err; -} - -static void rocker_dma_rings_fini(struct rocker *rocker) -{ - rocker_dma_ring_bufs_free(rocker, &rocker->event_ring, - PCI_DMA_BIDIRECTIONAL); - rocker_dma_ring_destroy(rocker, &rocker->event_ring); - rocker_dma_ring_bufs_free(rocker, &rocker->cmd_ring, - PCI_DMA_BIDIRECTIONAL); - rocker_dma_ring_destroy(rocker, &rocker->cmd_ring); -} - -static int rocker_dma_rx_ring_skb_map(const struct rocker_port *rocker_port, - struct rocker_desc_info *desc_info, - struct sk_buff *skb, size_t buf_len) -{ - const struct rocker *rocker = rocker_port->rocker; - struct pci_dev *pdev = rocker->pdev; - dma_addr_t dma_handle; - - dma_handle = pci_map_single(pdev, skb->data, buf_len, - PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(pdev, dma_handle)) - return -EIO; - if (rocker_tlv_put_u64(desc_info, ROCKER_TLV_RX_FRAG_ADDR, dma_handle)) - goto tlv_put_failure; - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_RX_FRAG_MAX_LEN, buf_len)) - goto tlv_put_failure; - return 0; - -tlv_put_failure: - pci_unmap_single(pdev, dma_handle, buf_len, PCI_DMA_FROMDEVICE); - desc_info->tlv_size = 0; - return -EMSGSIZE; -} - -static size_t rocker_port_rx_buf_len(const struct rocker_port *rocker_port) -{ - return rocker_port->dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; -} - -static int rocker_dma_rx_ring_skb_alloc(const struct rocker_port *rocker_port, - struct rocker_desc_info *desc_info) -{ - struct net_device *dev = rocker_port->dev; - struct sk_buff *skb; - size_t buf_len = rocker_port_rx_buf_len(rocker_port); - int err; - - /* Ensure that hw will see tlv_size zero in case of an error. - * That tells hw to use another descriptor. - */ - rocker_desc_cookie_ptr_set(desc_info, NULL); - desc_info->tlv_size = 0; - - skb = netdev_alloc_skb_ip_align(dev, buf_len); - if (!skb) - return -ENOMEM; - err = rocker_dma_rx_ring_skb_map(rocker_port, desc_info, skb, buf_len); - if (err) { - dev_kfree_skb_any(skb); - return err; - } - rocker_desc_cookie_ptr_set(desc_info, skb); - return 0; -} - -static void rocker_dma_rx_ring_skb_unmap(const struct rocker *rocker, - const struct rocker_tlv **attrs) -{ - struct pci_dev *pdev = rocker->pdev; - dma_addr_t dma_handle; - size_t len; - - if (!attrs[ROCKER_TLV_RX_FRAG_ADDR] || - !attrs[ROCKER_TLV_RX_FRAG_MAX_LEN]) - return; - dma_handle = rocker_tlv_get_u64(attrs[ROCKER_TLV_RX_FRAG_ADDR]); - len = rocker_tlv_get_u16(attrs[ROCKER_TLV_RX_FRAG_MAX_LEN]); - pci_unmap_single(pdev, dma_handle, len, PCI_DMA_FROMDEVICE); -} - -static void rocker_dma_rx_ring_skb_free(const struct rocker *rocker, - const struct rocker_desc_info *desc_info) -{ - const struct rocker_tlv *attrs[ROCKER_TLV_RX_MAX + 1]; - struct sk_buff *skb = rocker_desc_cookie_ptr_get(desc_info); - - if (!skb) - return; - rocker_tlv_parse_desc(attrs, ROCKER_TLV_RX_MAX, desc_info); - rocker_dma_rx_ring_skb_unmap(rocker, attrs); - dev_kfree_skb_any(skb); -} - -static int rocker_dma_rx_ring_skbs_alloc(const struct rocker_port *rocker_port) -{ - const struct rocker_dma_ring_info *rx_ring = &rocker_port->rx_ring; - const struct rocker *rocker = rocker_port->rocker; - int i; - int err; - - for (i = 0; i < rx_ring->size; i++) { - err = rocker_dma_rx_ring_skb_alloc(rocker_port, - &rx_ring->desc_info[i]); - if (err) - goto rollback; - } - return 0; - -rollback: - for (i--; i >= 0; i--) - rocker_dma_rx_ring_skb_free(rocker, &rx_ring->desc_info[i]); - return err; -} - -static void rocker_dma_rx_ring_skbs_free(const struct rocker_port *rocker_port) -{ - const struct rocker_dma_ring_info *rx_ring = &rocker_port->rx_ring; - const struct rocker *rocker = rocker_port->rocker; - int i; - - for (i = 0; i < rx_ring->size; i++) - rocker_dma_rx_ring_skb_free(rocker, &rx_ring->desc_info[i]); -} - -static int rocker_port_dma_rings_init(struct rocker_port *rocker_port) -{ - struct rocker *rocker = rocker_port->rocker; - int err; - - err = rocker_dma_ring_create(rocker, - ROCKER_DMA_TX(rocker_port->port_number), - ROCKER_DMA_TX_DEFAULT_SIZE, - &rocker_port->tx_ring); - if (err) { - netdev_err(rocker_port->dev, "failed to create tx dma ring\n"); - return err; - } - - err = rocker_dma_ring_bufs_alloc(rocker, &rocker_port->tx_ring, - PCI_DMA_TODEVICE, - ROCKER_DMA_TX_DESC_SIZE); - if (err) { - netdev_err(rocker_port->dev, "failed to alloc tx dma ring buffers\n"); - goto err_dma_tx_ring_bufs_alloc; - } - - err = rocker_dma_ring_create(rocker, - ROCKER_DMA_RX(rocker_port->port_number), - ROCKER_DMA_RX_DEFAULT_SIZE, - &rocker_port->rx_ring); - if (err) { - netdev_err(rocker_port->dev, "failed to create rx dma ring\n"); - goto err_dma_rx_ring_create; - } - - err = rocker_dma_ring_bufs_alloc(rocker, &rocker_port->rx_ring, - PCI_DMA_BIDIRECTIONAL, - ROCKER_DMA_RX_DESC_SIZE); - if (err) { - netdev_err(rocker_port->dev, "failed to alloc rx dma ring buffers\n"); - goto err_dma_rx_ring_bufs_alloc; - } - - err = rocker_dma_rx_ring_skbs_alloc(rocker_port); - if (err) { - netdev_err(rocker_port->dev, "failed to alloc rx dma ring skbs\n"); - goto err_dma_rx_ring_skbs_alloc; - } - rocker_dma_ring_pass_to_producer(rocker, &rocker_port->rx_ring); - - return 0; - -err_dma_rx_ring_skbs_alloc: - rocker_dma_ring_bufs_free(rocker, &rocker_port->rx_ring, - PCI_DMA_BIDIRECTIONAL); -err_dma_rx_ring_bufs_alloc: - rocker_dma_ring_destroy(rocker, &rocker_port->rx_ring); -err_dma_rx_ring_create: - rocker_dma_ring_bufs_free(rocker, &rocker_port->tx_ring, - PCI_DMA_TODEVICE); -err_dma_tx_ring_bufs_alloc: - rocker_dma_ring_destroy(rocker, &rocker_port->tx_ring); - return err; -} - -static void rocker_port_dma_rings_fini(struct rocker_port *rocker_port) -{ - struct rocker *rocker = rocker_port->rocker; - - rocker_dma_rx_ring_skbs_free(rocker_port); - rocker_dma_ring_bufs_free(rocker, &rocker_port->rx_ring, - PCI_DMA_BIDIRECTIONAL); - rocker_dma_ring_destroy(rocker, &rocker_port->rx_ring); - rocker_dma_ring_bufs_free(rocker, &rocker_port->tx_ring, - PCI_DMA_TODEVICE); - rocker_dma_ring_destroy(rocker, &rocker_port->tx_ring); -} - -static void rocker_port_set_enable(const struct rocker_port *rocker_port, - bool enable) -{ - u64 val = rocker_read64(rocker_port->rocker, PORT_PHYS_ENABLE); - - if (enable) - val |= 1ULL << rocker_port->pport; - else - val &= ~(1ULL << rocker_port->pport); - rocker_write64(rocker_port->rocker, PORT_PHYS_ENABLE, val); -} - -/******************************** - * Interrupt handler and helpers - ********************************/ - -static irqreturn_t rocker_cmd_irq_handler(int irq, void *dev_id) -{ - struct rocker *rocker = dev_id; - const struct rocker_desc_info *desc_info; - struct rocker_wait *wait; - u32 credits = 0; - - spin_lock(&rocker->cmd_ring_lock); - while ((desc_info = rocker_desc_tail_get(&rocker->cmd_ring))) { - wait = rocker_desc_cookie_ptr_get(desc_info); - if (wait->nowait) { - rocker_desc_gen_clear(desc_info); - rocker_wait_destroy(NULL, wait); - } else { - rocker_wait_wake_up(wait); - } - credits++; - } - spin_unlock(&rocker->cmd_ring_lock); - rocker_dma_ring_credits_set(rocker, &rocker->cmd_ring, credits); - - return IRQ_HANDLED; -} - -static void rocker_port_link_up(const struct rocker_port *rocker_port) -{ - netif_carrier_on(rocker_port->dev); - netdev_info(rocker_port->dev, "Link is up\n"); -} - -static void rocker_port_link_down(const struct rocker_port *rocker_port) -{ - netif_carrier_off(rocker_port->dev); - netdev_info(rocker_port->dev, "Link is down\n"); -} - -static int rocker_event_link_change(const struct rocker *rocker, - const struct rocker_tlv *info) -{ - const struct rocker_tlv *attrs[ROCKER_TLV_EVENT_LINK_CHANGED_MAX + 1]; - unsigned int port_number; - bool link_up; - struct rocker_port *rocker_port; - - rocker_tlv_parse_nested(attrs, ROCKER_TLV_EVENT_LINK_CHANGED_MAX, info); - if (!attrs[ROCKER_TLV_EVENT_LINK_CHANGED_PPORT] || - !attrs[ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP]) - return -EIO; - port_number = - rocker_tlv_get_u32(attrs[ROCKER_TLV_EVENT_LINK_CHANGED_PPORT]) - 1; - link_up = rocker_tlv_get_u8(attrs[ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP]); - - if (port_number >= rocker->port_count) - return -EINVAL; - - rocker_port = rocker->ports[port_number]; - if (netif_carrier_ok(rocker_port->dev) != link_up) { - if (link_up) - rocker_port_link_up(rocker_port); - else - rocker_port_link_down(rocker_port); - } - - return 0; -} - -static int rocker_port_fdb(struct rocker_port *rocker_port, - struct switchdev_trans *trans, - const unsigned char *addr, - __be16 vlan_id, int flags); - -static int rocker_event_mac_vlan_seen(const struct rocker *rocker, - const struct rocker_tlv *info) -{ - const struct rocker_tlv *attrs[ROCKER_TLV_EVENT_MAC_VLAN_MAX + 1]; - unsigned int port_number; - struct rocker_port *rocker_port; - const unsigned char *addr; - int flags = ROCKER_OP_FLAG_NOWAIT | ROCKER_OP_FLAG_LEARNED; - __be16 vlan_id; - - rocker_tlv_parse_nested(attrs, ROCKER_TLV_EVENT_MAC_VLAN_MAX, info); - if (!attrs[ROCKER_TLV_EVENT_MAC_VLAN_PPORT] || - !attrs[ROCKER_TLV_EVENT_MAC_VLAN_MAC] || - !attrs[ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID]) - return -EIO; - port_number = - rocker_tlv_get_u32(attrs[ROCKER_TLV_EVENT_MAC_VLAN_PPORT]) - 1; - addr = rocker_tlv_data(attrs[ROCKER_TLV_EVENT_MAC_VLAN_MAC]); - vlan_id = rocker_tlv_get_be16(attrs[ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID]); - - if (port_number >= rocker->port_count) - return -EINVAL; - - rocker_port = rocker->ports[port_number]; - - if (rocker_port->stp_state != BR_STATE_LEARNING && - rocker_port->stp_state != BR_STATE_FORWARDING) - return 0; - - return rocker_port_fdb(rocker_port, NULL, addr, vlan_id, flags); -} - -static int rocker_event_process(const struct rocker *rocker, - const struct rocker_desc_info *desc_info) -{ - const struct rocker_tlv *attrs[ROCKER_TLV_EVENT_MAX + 1]; - const struct rocker_tlv *info; - u16 type; - - rocker_tlv_parse_desc(attrs, ROCKER_TLV_EVENT_MAX, desc_info); - if (!attrs[ROCKER_TLV_EVENT_TYPE] || - !attrs[ROCKER_TLV_EVENT_INFO]) - return -EIO; - - type = rocker_tlv_get_u16(attrs[ROCKER_TLV_EVENT_TYPE]); - info = attrs[ROCKER_TLV_EVENT_INFO]; - - switch (type) { - case ROCKER_TLV_EVENT_TYPE_LINK_CHANGED: - return rocker_event_link_change(rocker, info); - case ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN: - return rocker_event_mac_vlan_seen(rocker, info); - } - - return -EOPNOTSUPP; -} - -static irqreturn_t rocker_event_irq_handler(int irq, void *dev_id) -{ - struct rocker *rocker = dev_id; - const struct pci_dev *pdev = rocker->pdev; - const struct rocker_desc_info *desc_info; - u32 credits = 0; - int err; - - while ((desc_info = rocker_desc_tail_get(&rocker->event_ring))) { - err = rocker_desc_err(desc_info); - if (err) { - dev_err(&pdev->dev, "event desc received with err %d\n", - err); - } else { - err = rocker_event_process(rocker, desc_info); - if (err) - dev_err(&pdev->dev, "event processing failed with err %d\n", - err); - } - rocker_desc_gen_clear(desc_info); - rocker_desc_head_set(rocker, &rocker->event_ring, desc_info); - credits++; - } - rocker_dma_ring_credits_set(rocker, &rocker->event_ring, credits); - - return IRQ_HANDLED; -} - -static irqreturn_t rocker_tx_irq_handler(int irq, void *dev_id) -{ - struct rocker_port *rocker_port = dev_id; - - napi_schedule(&rocker_port->napi_tx); - return IRQ_HANDLED; -} - -static irqreturn_t rocker_rx_irq_handler(int irq, void *dev_id) -{ - struct rocker_port *rocker_port = dev_id; - - napi_schedule(&rocker_port->napi_rx); - return IRQ_HANDLED; -} - -/******************** - * Command interface - ********************/ - -typedef int (*rocker_cmd_prep_cb_t)(const struct rocker_port *rocker_port, - struct rocker_desc_info *desc_info, - void *priv); - -typedef int (*rocker_cmd_proc_cb_t)(const struct rocker_port *rocker_port, - const struct rocker_desc_info *desc_info, - void *priv); - -static int rocker_cmd_exec(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - rocker_cmd_prep_cb_t prepare, void *prepare_priv, - rocker_cmd_proc_cb_t process, void *process_priv) -{ - struct rocker *rocker = rocker_port->rocker; - struct rocker_desc_info *desc_info; - struct rocker_wait *wait; - bool nowait = !!(flags & ROCKER_OP_FLAG_NOWAIT); - unsigned long lock_flags; - int err; - - wait = rocker_wait_create(rocker_port, trans, flags); - if (!wait) - return -ENOMEM; - wait->nowait = nowait; - - spin_lock_irqsave(&rocker->cmd_ring_lock, lock_flags); - - desc_info = rocker_desc_head_get(&rocker->cmd_ring); - if (!desc_info) { - spin_unlock_irqrestore(&rocker->cmd_ring_lock, lock_flags); - err = -EAGAIN; - goto out; - } - - err = prepare(rocker_port, desc_info, prepare_priv); - if (err) { - spin_unlock_irqrestore(&rocker->cmd_ring_lock, lock_flags); - goto out; - } - - rocker_desc_cookie_ptr_set(desc_info, wait); - - if (!switchdev_trans_ph_prepare(trans)) - rocker_desc_head_set(rocker, &rocker->cmd_ring, desc_info); - - spin_unlock_irqrestore(&rocker->cmd_ring_lock, lock_flags); - - if (nowait) - return 0; - - if (!switchdev_trans_ph_prepare(trans)) - if (!rocker_wait_event_timeout(wait, HZ / 10)) - return -EIO; - - err = rocker_desc_err(desc_info); - if (err) - return err; - - if (process) - err = process(rocker_port, desc_info, process_priv); - - rocker_desc_gen_clear(desc_info); -out: - rocker_wait_destroy(trans, wait); - return err; -} - -static int -rocker_cmd_get_port_settings_prep(const struct rocker_port *rocker_port, - struct rocker_desc_info *desc_info, - void *priv) -{ - struct rocker_tlv *cmd_info; - - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, - ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS)) - return -EMSGSIZE; - cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO); - if (!cmd_info) - return -EMSGSIZE; - if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, - rocker_port->pport)) - return -EMSGSIZE; - rocker_tlv_nest_end(desc_info, cmd_info); - return 0; -} - -static int -rocker_cmd_get_port_settings_ethtool_proc(const struct rocker_port *rocker_port, - const struct rocker_desc_info *desc_info, - void *priv) -{ - struct ethtool_cmd *ecmd = priv; - const struct rocker_tlv *attrs[ROCKER_TLV_CMD_MAX + 1]; - const struct rocker_tlv *info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1]; - u32 speed; - u8 duplex; - u8 autoneg; - - rocker_tlv_parse_desc(attrs, ROCKER_TLV_CMD_MAX, desc_info); - if (!attrs[ROCKER_TLV_CMD_INFO]) - return -EIO; - - rocker_tlv_parse_nested(info_attrs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX, - attrs[ROCKER_TLV_CMD_INFO]); - if (!info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] || - !info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] || - !info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) - return -EIO; - - speed = rocker_tlv_get_u32(info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]); - duplex = rocker_tlv_get_u8(info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]); - autoneg = rocker_tlv_get_u8(info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]); - - ecmd->transceiver = XCVR_INTERNAL; - ecmd->supported = SUPPORTED_TP; - ecmd->phy_address = 0xff; - ecmd->port = PORT_TP; - ethtool_cmd_speed_set(ecmd, speed); - ecmd->duplex = duplex ? DUPLEX_FULL : DUPLEX_HALF; - ecmd->autoneg = autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE; - - return 0; -} - -static int -rocker_cmd_get_port_settings_macaddr_proc(const struct rocker_port *rocker_port, - const struct rocker_desc_info *desc_info, - void *priv) -{ - unsigned char *macaddr = priv; - const struct rocker_tlv *attrs[ROCKER_TLV_CMD_MAX + 1]; - const struct rocker_tlv *info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1]; - const struct rocker_tlv *attr; - - rocker_tlv_parse_desc(attrs, ROCKER_TLV_CMD_MAX, desc_info); - if (!attrs[ROCKER_TLV_CMD_INFO]) - return -EIO; - - rocker_tlv_parse_nested(info_attrs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX, - attrs[ROCKER_TLV_CMD_INFO]); - attr = info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]; - if (!attr) - return -EIO; - - if (rocker_tlv_len(attr) != ETH_ALEN) - return -EINVAL; - - ether_addr_copy(macaddr, rocker_tlv_data(attr)); - return 0; -} - -struct port_name { - char *buf; - size_t len; -}; - -static int -rocker_cmd_get_port_settings_phys_name_proc(const struct rocker_port *rocker_port, - const struct rocker_desc_info *desc_info, - void *priv) -{ - const struct rocker_tlv *info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1]; - const struct rocker_tlv *attrs[ROCKER_TLV_CMD_MAX + 1]; - struct port_name *name = priv; - const struct rocker_tlv *attr; - size_t i, j, len; - const char *str; - - rocker_tlv_parse_desc(attrs, ROCKER_TLV_CMD_MAX, desc_info); - if (!attrs[ROCKER_TLV_CMD_INFO]) - return -EIO; - - rocker_tlv_parse_nested(info_attrs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX, - attrs[ROCKER_TLV_CMD_INFO]); - attr = info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME]; - if (!attr) - return -EIO; - - len = min_t(size_t, rocker_tlv_len(attr), name->len); - str = rocker_tlv_data(attr); - - /* make sure name only contains alphanumeric characters */ - for (i = j = 0; i < len; ++i) { - if (isalnum(str[i])) { - name->buf[j] = str[i]; - j++; - } - } - - if (j == 0) - return -EIO; - - name->buf[j] = '\0'; - - return 0; -} - -static int -rocker_cmd_set_port_settings_ethtool_prep(const struct rocker_port *rocker_port, - struct rocker_desc_info *desc_info, - void *priv) -{ - struct ethtool_cmd *ecmd = priv; - struct rocker_tlv *cmd_info; - - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, - ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS)) - return -EMSGSIZE; - cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO); - if (!cmd_info) - return -EMSGSIZE; - if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, - rocker_port->pport)) - return -EMSGSIZE; - if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, - ethtool_cmd_speed(ecmd))) - return -EMSGSIZE; - if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, - ecmd->duplex)) - return -EMSGSIZE; - if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, - ecmd->autoneg)) - return -EMSGSIZE; - rocker_tlv_nest_end(desc_info, cmd_info); - return 0; -} - -static int -rocker_cmd_set_port_settings_macaddr_prep(const struct rocker_port *rocker_port, - struct rocker_desc_info *desc_info, - void *priv) -{ - const unsigned char *macaddr = priv; - struct rocker_tlv *cmd_info; - - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, - ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS)) - return -EMSGSIZE; - cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO); - if (!cmd_info) - return -EMSGSIZE; - if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, - rocker_port->pport)) - return -EMSGSIZE; - if (rocker_tlv_put(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR, - ETH_ALEN, macaddr)) - return -EMSGSIZE; - rocker_tlv_nest_end(desc_info, cmd_info); - return 0; -} - -static int -rocker_cmd_set_port_settings_mtu_prep(const struct rocker_port *rocker_port, - struct rocker_desc_info *desc_info, - void *priv) -{ - int mtu = *(int *)priv; - struct rocker_tlv *cmd_info; - - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, - ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS)) - return -EMSGSIZE; - cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO); - if (!cmd_info) - return -EMSGSIZE; - if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, - rocker_port->pport)) - return -EMSGSIZE; - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_MTU, - mtu)) - return -EMSGSIZE; - rocker_tlv_nest_end(desc_info, cmd_info); - return 0; -} - -static int -rocker_cmd_set_port_learning_prep(const struct rocker_port *rocker_port, - struct rocker_desc_info *desc_info, - void *priv) -{ - struct rocker_tlv *cmd_info; - - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, - ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS)) - return -EMSGSIZE; - cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO); - if (!cmd_info) - return -EMSGSIZE; - if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, - rocker_port->pport)) - return -EMSGSIZE; - if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING, - !!(rocker_port->brport_flags & BR_LEARNING))) - return -EMSGSIZE; - rocker_tlv_nest_end(desc_info, cmd_info); - return 0; -} - -static int rocker_cmd_get_port_settings_ethtool(struct rocker_port *rocker_port, - struct ethtool_cmd *ecmd) -{ - return rocker_cmd_exec(rocker_port, NULL, 0, - rocker_cmd_get_port_settings_prep, NULL, - rocker_cmd_get_port_settings_ethtool_proc, - ecmd); -} - -static int rocker_cmd_get_port_settings_macaddr(struct rocker_port *rocker_port, - unsigned char *macaddr) -{ - return rocker_cmd_exec(rocker_port, NULL, 0, - rocker_cmd_get_port_settings_prep, NULL, - rocker_cmd_get_port_settings_macaddr_proc, - macaddr); -} - -static int rocker_cmd_set_port_settings_ethtool(struct rocker_port *rocker_port, - struct ethtool_cmd *ecmd) -{ - return rocker_cmd_exec(rocker_port, NULL, 0, - rocker_cmd_set_port_settings_ethtool_prep, - ecmd, NULL, NULL); -} - -static int rocker_cmd_set_port_settings_macaddr(struct rocker_port *rocker_port, - unsigned char *macaddr) -{ - return rocker_cmd_exec(rocker_port, NULL, 0, - rocker_cmd_set_port_settings_macaddr_prep, - macaddr, NULL, NULL); -} - -static int rocker_cmd_set_port_settings_mtu(struct rocker_port *rocker_port, - int mtu) -{ - return rocker_cmd_exec(rocker_port, NULL, 0, - rocker_cmd_set_port_settings_mtu_prep, - &mtu, NULL, NULL); -} - -static int rocker_port_set_learning(struct rocker_port *rocker_port, - struct switchdev_trans *trans) -{ - return rocker_cmd_exec(rocker_port, trans, 0, - rocker_cmd_set_port_learning_prep, - NULL, NULL, NULL); -} - -static int -rocker_cmd_flow_tbl_add_ig_port(struct rocker_desc_info *desc_info, - const struct rocker_flow_tbl_entry *entry) -{ - if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_IN_PPORT, - entry->key.ig_port.in_pport)) - return -EMSGSIZE; - if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_IN_PPORT_MASK, - entry->key.ig_port.in_pport_mask)) - return -EMSGSIZE; - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_GOTO_TABLE_ID, - entry->key.ig_port.goto_tbl)) - return -EMSGSIZE; - - return 0; -} - -static int -rocker_cmd_flow_tbl_add_vlan(struct rocker_desc_info *desc_info, - const struct rocker_flow_tbl_entry *entry) -{ - if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_IN_PPORT, - entry->key.vlan.in_pport)) - return -EMSGSIZE; - if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID, - entry->key.vlan.vlan_id)) - return -EMSGSIZE; - if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID_MASK, - entry->key.vlan.vlan_id_mask)) - return -EMSGSIZE; - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_GOTO_TABLE_ID, - entry->key.vlan.goto_tbl)) - return -EMSGSIZE; - if (entry->key.vlan.untagged && - rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_NEW_VLAN_ID, - entry->key.vlan.new_vlan_id)) - return -EMSGSIZE; - - return 0; -} - -static int -rocker_cmd_flow_tbl_add_term_mac(struct rocker_desc_info *desc_info, - const struct rocker_flow_tbl_entry *entry) -{ - if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_IN_PPORT, - entry->key.term_mac.in_pport)) - return -EMSGSIZE; - if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_IN_PPORT_MASK, - entry->key.term_mac.in_pport_mask)) - return -EMSGSIZE; - if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_ETHERTYPE, - entry->key.term_mac.eth_type)) - return -EMSGSIZE; - if (rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC, - ETH_ALEN, entry->key.term_mac.eth_dst)) - return -EMSGSIZE; - if (rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC_MASK, - ETH_ALEN, entry->key.term_mac.eth_dst_mask)) - return -EMSGSIZE; - if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID, - entry->key.term_mac.vlan_id)) - return -EMSGSIZE; - if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID_MASK, - entry->key.term_mac.vlan_id_mask)) - return -EMSGSIZE; - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_GOTO_TABLE_ID, - entry->key.term_mac.goto_tbl)) - return -EMSGSIZE; - if (entry->key.term_mac.copy_to_cpu && - rocker_tlv_put_u8(desc_info, ROCKER_TLV_OF_DPA_COPY_CPU_ACTION, - entry->key.term_mac.copy_to_cpu)) - return -EMSGSIZE; - - return 0; -} - -static int -rocker_cmd_flow_tbl_add_ucast_routing(struct rocker_desc_info *desc_info, - const struct rocker_flow_tbl_entry *entry) -{ - if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_ETHERTYPE, - entry->key.ucast_routing.eth_type)) - return -EMSGSIZE; - if (rocker_tlv_put_be32(desc_info, ROCKER_TLV_OF_DPA_DST_IP, - entry->key.ucast_routing.dst4)) - return -EMSGSIZE; - if (rocker_tlv_put_be32(desc_info, ROCKER_TLV_OF_DPA_DST_IP_MASK, - entry->key.ucast_routing.dst4_mask)) - return -EMSGSIZE; - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_GOTO_TABLE_ID, - entry->key.ucast_routing.goto_tbl)) - return -EMSGSIZE; - if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_GROUP_ID, - entry->key.ucast_routing.group_id)) - return -EMSGSIZE; - - return 0; -} - -static int -rocker_cmd_flow_tbl_add_bridge(struct rocker_desc_info *desc_info, - const struct rocker_flow_tbl_entry *entry) -{ - if (entry->key.bridge.has_eth_dst && - rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC, - ETH_ALEN, entry->key.bridge.eth_dst)) - return -EMSGSIZE; - if (entry->key.bridge.has_eth_dst_mask && - rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC_MASK, - ETH_ALEN, entry->key.bridge.eth_dst_mask)) - return -EMSGSIZE; - if (entry->key.bridge.vlan_id && - rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID, - entry->key.bridge.vlan_id)) - return -EMSGSIZE; - if (entry->key.bridge.tunnel_id && - rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_TUNNEL_ID, - entry->key.bridge.tunnel_id)) - return -EMSGSIZE; - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_GOTO_TABLE_ID, - entry->key.bridge.goto_tbl)) - return -EMSGSIZE; - if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_GROUP_ID, - entry->key.bridge.group_id)) - return -EMSGSIZE; - if (entry->key.bridge.copy_to_cpu && - rocker_tlv_put_u8(desc_info, ROCKER_TLV_OF_DPA_COPY_CPU_ACTION, - entry->key.bridge.copy_to_cpu)) - return -EMSGSIZE; - - return 0; -} - -static int -rocker_cmd_flow_tbl_add_acl(struct rocker_desc_info *desc_info, - const struct rocker_flow_tbl_entry *entry) -{ - if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_IN_PPORT, - entry->key.acl.in_pport)) - return -EMSGSIZE; - if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_IN_PPORT_MASK, - entry->key.acl.in_pport_mask)) - return -EMSGSIZE; - if (rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_SRC_MAC, - ETH_ALEN, entry->key.acl.eth_src)) - return -EMSGSIZE; - if (rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_SRC_MAC_MASK, - ETH_ALEN, entry->key.acl.eth_src_mask)) - return -EMSGSIZE; - if (rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC, - ETH_ALEN, entry->key.acl.eth_dst)) - return -EMSGSIZE; - if (rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC_MASK, - ETH_ALEN, entry->key.acl.eth_dst_mask)) - return -EMSGSIZE; - if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_ETHERTYPE, - entry->key.acl.eth_type)) - return -EMSGSIZE; - if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID, - entry->key.acl.vlan_id)) - return -EMSGSIZE; - if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID_MASK, - entry->key.acl.vlan_id_mask)) - return -EMSGSIZE; - - switch (ntohs(entry->key.acl.eth_type)) { - case ETH_P_IP: - case ETH_P_IPV6: - if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_OF_DPA_IP_PROTO, - entry->key.acl.ip_proto)) - return -EMSGSIZE; - if (rocker_tlv_put_u8(desc_info, - ROCKER_TLV_OF_DPA_IP_PROTO_MASK, - entry->key.acl.ip_proto_mask)) - return -EMSGSIZE; - if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_OF_DPA_IP_DSCP, - entry->key.acl.ip_tos & 0x3f)) - return -EMSGSIZE; - if (rocker_tlv_put_u8(desc_info, - ROCKER_TLV_OF_DPA_IP_DSCP_MASK, - entry->key.acl.ip_tos_mask & 0x3f)) - return -EMSGSIZE; - if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_OF_DPA_IP_ECN, - (entry->key.acl.ip_tos & 0xc0) >> 6)) - return -EMSGSIZE; - if (rocker_tlv_put_u8(desc_info, - ROCKER_TLV_OF_DPA_IP_ECN_MASK, - (entry->key.acl.ip_tos_mask & 0xc0) >> 6)) - return -EMSGSIZE; - break; - } - - if (entry->key.acl.group_id != ROCKER_GROUP_NONE && - rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_GROUP_ID, - entry->key.acl.group_id)) - return -EMSGSIZE; - - return 0; -} - -static int rocker_cmd_flow_tbl_add(const struct rocker_port *rocker_port, - struct rocker_desc_info *desc_info, - void *priv) -{ - const struct rocker_flow_tbl_entry *entry = priv; - struct rocker_tlv *cmd_info; - int err = 0; - - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, entry->cmd)) - return -EMSGSIZE; - cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO); - if (!cmd_info) - return -EMSGSIZE; - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_TABLE_ID, - entry->key.tbl_id)) - return -EMSGSIZE; - if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_PRIORITY, - entry->key.priority)) - return -EMSGSIZE; - if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_HARDTIME, 0)) - return -EMSGSIZE; - if (rocker_tlv_put_u64(desc_info, ROCKER_TLV_OF_DPA_COOKIE, - entry->cookie)) - return -EMSGSIZE; - - switch (entry->key.tbl_id) { - case ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT: - err = rocker_cmd_flow_tbl_add_ig_port(desc_info, entry); - break; - case ROCKER_OF_DPA_TABLE_ID_VLAN: - err = rocker_cmd_flow_tbl_add_vlan(desc_info, entry); - break; - case ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC: - err = rocker_cmd_flow_tbl_add_term_mac(desc_info, entry); - break; - case ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING: - err = rocker_cmd_flow_tbl_add_ucast_routing(desc_info, entry); - break; - case ROCKER_OF_DPA_TABLE_ID_BRIDGING: - err = rocker_cmd_flow_tbl_add_bridge(desc_info, entry); - break; - case ROCKER_OF_DPA_TABLE_ID_ACL_POLICY: - err = rocker_cmd_flow_tbl_add_acl(desc_info, entry); - break; - default: - err = -ENOTSUPP; - break; - } - - if (err) - return err; - - rocker_tlv_nest_end(desc_info, cmd_info); - - return 0; -} - -static int rocker_cmd_flow_tbl_del(const struct rocker_port *rocker_port, - struct rocker_desc_info *desc_info, - void *priv) -{ - const struct rocker_flow_tbl_entry *entry = priv; - struct rocker_tlv *cmd_info; - - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, entry->cmd)) - return -EMSGSIZE; - cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO); - if (!cmd_info) - return -EMSGSIZE; - if (rocker_tlv_put_u64(desc_info, ROCKER_TLV_OF_DPA_COOKIE, - entry->cookie)) - return -EMSGSIZE; - rocker_tlv_nest_end(desc_info, cmd_info); - - return 0; -} - -static int -rocker_cmd_group_tbl_add_l2_interface(struct rocker_desc_info *desc_info, - struct rocker_group_tbl_entry *entry) -{ - if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_OUT_PPORT, - ROCKER_GROUP_PORT_GET(entry->group_id))) - return -EMSGSIZE; - if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_OF_DPA_POP_VLAN, - entry->l2_interface.pop_vlan)) - return -EMSGSIZE; - - return 0; -} - -static int -rocker_cmd_group_tbl_add_l2_rewrite(struct rocker_desc_info *desc_info, - const struct rocker_group_tbl_entry *entry) -{ - if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_GROUP_ID_LOWER, - entry->l2_rewrite.group_id)) - return -EMSGSIZE; - if (!is_zero_ether_addr(entry->l2_rewrite.eth_src) && - rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_SRC_MAC, - ETH_ALEN, entry->l2_rewrite.eth_src)) - return -EMSGSIZE; - if (!is_zero_ether_addr(entry->l2_rewrite.eth_dst) && - rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC, - ETH_ALEN, entry->l2_rewrite.eth_dst)) - return -EMSGSIZE; - if (entry->l2_rewrite.vlan_id && - rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID, - entry->l2_rewrite.vlan_id)) - return -EMSGSIZE; - - return 0; -} - -static int -rocker_cmd_group_tbl_add_group_ids(struct rocker_desc_info *desc_info, - const struct rocker_group_tbl_entry *entry) -{ - int i; - struct rocker_tlv *group_ids; - - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_GROUP_COUNT, - entry->group_count)) - return -EMSGSIZE; - - group_ids = rocker_tlv_nest_start(desc_info, - ROCKER_TLV_OF_DPA_GROUP_IDS); - if (!group_ids) - return -EMSGSIZE; - - for (i = 0; i < entry->group_count; i++) - /* Note TLV array is 1-based */ - if (rocker_tlv_put_u32(desc_info, i + 1, entry->group_ids[i])) - return -EMSGSIZE; - - rocker_tlv_nest_end(desc_info, group_ids); - - return 0; -} - -static int -rocker_cmd_group_tbl_add_l3_unicast(struct rocker_desc_info *desc_info, - const struct rocker_group_tbl_entry *entry) -{ - if (!is_zero_ether_addr(entry->l3_unicast.eth_src) && - rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_SRC_MAC, - ETH_ALEN, entry->l3_unicast.eth_src)) - return -EMSGSIZE; - if (!is_zero_ether_addr(entry->l3_unicast.eth_dst) && - rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC, - ETH_ALEN, entry->l3_unicast.eth_dst)) - return -EMSGSIZE; - if (entry->l3_unicast.vlan_id && - rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID, - entry->l3_unicast.vlan_id)) - return -EMSGSIZE; - if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_OF_DPA_TTL_CHECK, - entry->l3_unicast.ttl_check)) - return -EMSGSIZE; - if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_GROUP_ID_LOWER, - entry->l3_unicast.group_id)) - return -EMSGSIZE; - - return 0; -} - -static int rocker_cmd_group_tbl_add(const struct rocker_port *rocker_port, - struct rocker_desc_info *desc_info, - void *priv) -{ - struct rocker_group_tbl_entry *entry = priv; - struct rocker_tlv *cmd_info; - int err = 0; - - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, entry->cmd)) - return -EMSGSIZE; - cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO); - if (!cmd_info) - return -EMSGSIZE; - - if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_GROUP_ID, - entry->group_id)) - return -EMSGSIZE; - - switch (ROCKER_GROUP_TYPE_GET(entry->group_id)) { - case ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE: - err = rocker_cmd_group_tbl_add_l2_interface(desc_info, entry); - break; - case ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE: - err = rocker_cmd_group_tbl_add_l2_rewrite(desc_info, entry); - break; - case ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD: - case ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST: - err = rocker_cmd_group_tbl_add_group_ids(desc_info, entry); - break; - case ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST: - err = rocker_cmd_group_tbl_add_l3_unicast(desc_info, entry); - break; - default: - err = -ENOTSUPP; - break; - } - - if (err) - return err; - - rocker_tlv_nest_end(desc_info, cmd_info); - - return 0; -} - -static int rocker_cmd_group_tbl_del(const struct rocker_port *rocker_port, - struct rocker_desc_info *desc_info, - void *priv) -{ - const struct rocker_group_tbl_entry *entry = priv; - struct rocker_tlv *cmd_info; - - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, entry->cmd)) - return -EMSGSIZE; - cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO); - if (!cmd_info) - return -EMSGSIZE; - if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_GROUP_ID, - entry->group_id)) - return -EMSGSIZE; - rocker_tlv_nest_end(desc_info, cmd_info); - - return 0; -} - -/*************************************************** - * Flow, group, FDB, internal VLAN and neigh tables - ***************************************************/ - -static int rocker_init_tbls(struct rocker *rocker) -{ - hash_init(rocker->flow_tbl); - spin_lock_init(&rocker->flow_tbl_lock); - - hash_init(rocker->group_tbl); - spin_lock_init(&rocker->group_tbl_lock); - - hash_init(rocker->fdb_tbl); - spin_lock_init(&rocker->fdb_tbl_lock); - - hash_init(rocker->internal_vlan_tbl); - spin_lock_init(&rocker->internal_vlan_tbl_lock); - - hash_init(rocker->neigh_tbl); - spin_lock_init(&rocker->neigh_tbl_lock); - - return 0; -} - -static void rocker_free_tbls(struct rocker *rocker) -{ - unsigned long flags; - struct rocker_flow_tbl_entry *flow_entry; - struct rocker_group_tbl_entry *group_entry; - struct rocker_fdb_tbl_entry *fdb_entry; - struct rocker_internal_vlan_tbl_entry *internal_vlan_entry; - struct rocker_neigh_tbl_entry *neigh_entry; - struct hlist_node *tmp; - int bkt; - - spin_lock_irqsave(&rocker->flow_tbl_lock, flags); - hash_for_each_safe(rocker->flow_tbl, bkt, tmp, flow_entry, entry) - hash_del(&flow_entry->entry); - spin_unlock_irqrestore(&rocker->flow_tbl_lock, flags); - - spin_lock_irqsave(&rocker->group_tbl_lock, flags); - hash_for_each_safe(rocker->group_tbl, bkt, tmp, group_entry, entry) - hash_del(&group_entry->entry); - spin_unlock_irqrestore(&rocker->group_tbl_lock, flags); - - spin_lock_irqsave(&rocker->fdb_tbl_lock, flags); - hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, fdb_entry, entry) - hash_del(&fdb_entry->entry); - spin_unlock_irqrestore(&rocker->fdb_tbl_lock, flags); - - spin_lock_irqsave(&rocker->internal_vlan_tbl_lock, flags); - hash_for_each_safe(rocker->internal_vlan_tbl, bkt, - tmp, internal_vlan_entry, entry) - hash_del(&internal_vlan_entry->entry); - spin_unlock_irqrestore(&rocker->internal_vlan_tbl_lock, flags); - - spin_lock_irqsave(&rocker->neigh_tbl_lock, flags); - hash_for_each_safe(rocker->neigh_tbl, bkt, tmp, neigh_entry, entry) - hash_del(&neigh_entry->entry); - spin_unlock_irqrestore(&rocker->neigh_tbl_lock, flags); -} - -static struct rocker_flow_tbl_entry * -rocker_flow_tbl_find(const struct rocker *rocker, - const struct rocker_flow_tbl_entry *match) -{ - struct rocker_flow_tbl_entry *found; - size_t key_len = match->key_len ? match->key_len : sizeof(found->key); - - hash_for_each_possible(rocker->flow_tbl, found, - entry, match->key_crc32) { - if (memcmp(&found->key, &match->key, key_len) == 0) - return found; - } - - return NULL; -} - -static int rocker_flow_tbl_add(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - struct rocker_flow_tbl_entry *match) -{ - struct rocker *rocker = rocker_port->rocker; - struct rocker_flow_tbl_entry *found; - size_t key_len = match->key_len ? match->key_len : sizeof(found->key); - unsigned long lock_flags; - - match->key_crc32 = crc32(~0, &match->key, key_len); - - spin_lock_irqsave(&rocker->flow_tbl_lock, lock_flags); - - found = rocker_flow_tbl_find(rocker, match); - - if (found) { - match->cookie = found->cookie; - if (!switchdev_trans_ph_prepare(trans)) - hash_del(&found->entry); - rocker_port_kfree(trans, found); - found = match; - found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD; - } else { - found = match; - found->cookie = rocker->flow_tbl_next_cookie++; - found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD; - } - - if (!switchdev_trans_ph_prepare(trans)) - hash_add(rocker->flow_tbl, &found->entry, found->key_crc32); - - spin_unlock_irqrestore(&rocker->flow_tbl_lock, lock_flags); - - return rocker_cmd_exec(rocker_port, trans, flags, - rocker_cmd_flow_tbl_add, found, NULL, NULL); -} - -static int rocker_flow_tbl_del(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - struct rocker_flow_tbl_entry *match) -{ - struct rocker *rocker = rocker_port->rocker; - struct rocker_flow_tbl_entry *found; - size_t key_len = match->key_len ? match->key_len : sizeof(found->key); - unsigned long lock_flags; - int err = 0; - - match->key_crc32 = crc32(~0, &match->key, key_len); - - spin_lock_irqsave(&rocker->flow_tbl_lock, lock_flags); - - found = rocker_flow_tbl_find(rocker, match); - - if (found) { - if (!switchdev_trans_ph_prepare(trans)) - hash_del(&found->entry); - found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL; - } - - spin_unlock_irqrestore(&rocker->flow_tbl_lock, lock_flags); - - rocker_port_kfree(trans, match); - - if (found) { - err = rocker_cmd_exec(rocker_port, trans, flags, - rocker_cmd_flow_tbl_del, - found, NULL, NULL); - rocker_port_kfree(trans, found); - } - - return err; -} - -static int rocker_flow_tbl_do(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - struct rocker_flow_tbl_entry *entry) -{ - if (flags & ROCKER_OP_FLAG_REMOVE) - return rocker_flow_tbl_del(rocker_port, trans, flags, entry); - else - return rocker_flow_tbl_add(rocker_port, trans, flags, entry); -} - -static int rocker_flow_tbl_ig_port(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - u32 in_pport, u32 in_pport_mask, - enum rocker_of_dpa_table_id goto_tbl) -{ - struct rocker_flow_tbl_entry *entry; - - entry = rocker_port_kzalloc(rocker_port, trans, flags, sizeof(*entry)); - if (!entry) - return -ENOMEM; - - entry->key.priority = ROCKER_PRIORITY_IG_PORT; - entry->key.tbl_id = ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT; - entry->key.ig_port.in_pport = in_pport; - entry->key.ig_port.in_pport_mask = in_pport_mask; - entry->key.ig_port.goto_tbl = goto_tbl; - - return rocker_flow_tbl_do(rocker_port, trans, flags, entry); -} - -static int rocker_flow_tbl_vlan(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - u32 in_pport, __be16 vlan_id, - __be16 vlan_id_mask, - enum rocker_of_dpa_table_id goto_tbl, - bool untagged, __be16 new_vlan_id) -{ - struct rocker_flow_tbl_entry *entry; - - entry = rocker_port_kzalloc(rocker_port, trans, flags, sizeof(*entry)); - if (!entry) - return -ENOMEM; - - entry->key.priority = ROCKER_PRIORITY_VLAN; - entry->key.tbl_id = ROCKER_OF_DPA_TABLE_ID_VLAN; - entry->key.vlan.in_pport = in_pport; - entry->key.vlan.vlan_id = vlan_id; - entry->key.vlan.vlan_id_mask = vlan_id_mask; - entry->key.vlan.goto_tbl = goto_tbl; - - entry->key.vlan.untagged = untagged; - entry->key.vlan.new_vlan_id = new_vlan_id; - - return rocker_flow_tbl_do(rocker_port, trans, flags, entry); -} - -static int rocker_flow_tbl_term_mac(struct rocker_port *rocker_port, - struct switchdev_trans *trans, - u32 in_pport, u32 in_pport_mask, - __be16 eth_type, const u8 *eth_dst, - const u8 *eth_dst_mask, __be16 vlan_id, - __be16 vlan_id_mask, bool copy_to_cpu, - int flags) -{ - struct rocker_flow_tbl_entry *entry; - - entry = rocker_port_kzalloc(rocker_port, trans, flags, sizeof(*entry)); - if (!entry) - return -ENOMEM; - - if (is_multicast_ether_addr(eth_dst)) { - entry->key.priority = ROCKER_PRIORITY_TERM_MAC_MCAST; - entry->key.term_mac.goto_tbl = - ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING; - } else { - entry->key.priority = ROCKER_PRIORITY_TERM_MAC_UCAST; - entry->key.term_mac.goto_tbl = - ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING; - } - - entry->key.tbl_id = ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC; - entry->key.term_mac.in_pport = in_pport; - entry->key.term_mac.in_pport_mask = in_pport_mask; - entry->key.term_mac.eth_type = eth_type; - ether_addr_copy(entry->key.term_mac.eth_dst, eth_dst); - ether_addr_copy(entry->key.term_mac.eth_dst_mask, eth_dst_mask); - entry->key.term_mac.vlan_id = vlan_id; - entry->key.term_mac.vlan_id_mask = vlan_id_mask; - entry->key.term_mac.copy_to_cpu = copy_to_cpu; - - return rocker_flow_tbl_do(rocker_port, trans, flags, entry); -} - -static int rocker_flow_tbl_bridge(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - const u8 *eth_dst, const u8 *eth_dst_mask, - __be16 vlan_id, u32 tunnel_id, - enum rocker_of_dpa_table_id goto_tbl, - u32 group_id, bool copy_to_cpu) -{ - struct rocker_flow_tbl_entry *entry; - u32 priority; - bool vlan_bridging = !!vlan_id; - bool dflt = !eth_dst || (eth_dst && eth_dst_mask); - bool wild = false; - - entry = rocker_port_kzalloc(rocker_port, trans, flags, sizeof(*entry)); - if (!entry) - return -ENOMEM; - - entry->key.tbl_id = ROCKER_OF_DPA_TABLE_ID_BRIDGING; - - if (eth_dst) { - entry->key.bridge.has_eth_dst = 1; - ether_addr_copy(entry->key.bridge.eth_dst, eth_dst); - } - if (eth_dst_mask) { - entry->key.bridge.has_eth_dst_mask = 1; - ether_addr_copy(entry->key.bridge.eth_dst_mask, eth_dst_mask); - if (!ether_addr_equal(eth_dst_mask, ff_mac)) - wild = true; - } - - priority = ROCKER_PRIORITY_UNKNOWN; - if (vlan_bridging && dflt && wild) - priority = ROCKER_PRIORITY_BRIDGING_VLAN_DFLT_WILD; - else if (vlan_bridging && dflt && !wild) - priority = ROCKER_PRIORITY_BRIDGING_VLAN_DFLT_EXACT; - else if (vlan_bridging && !dflt) - priority = ROCKER_PRIORITY_BRIDGING_VLAN; - else if (!vlan_bridging && dflt && wild) - priority = ROCKER_PRIORITY_BRIDGING_TENANT_DFLT_WILD; - else if (!vlan_bridging && dflt && !wild) - priority = ROCKER_PRIORITY_BRIDGING_TENANT_DFLT_EXACT; - else if (!vlan_bridging && !dflt) - priority = ROCKER_PRIORITY_BRIDGING_TENANT; - - entry->key.priority = priority; - entry->key.bridge.vlan_id = vlan_id; - entry->key.bridge.tunnel_id = tunnel_id; - entry->key.bridge.goto_tbl = goto_tbl; - entry->key.bridge.group_id = group_id; - entry->key.bridge.copy_to_cpu = copy_to_cpu; - - return rocker_flow_tbl_do(rocker_port, trans, flags, entry); -} - -static int rocker_flow_tbl_ucast4_routing(struct rocker_port *rocker_port, - struct switchdev_trans *trans, - __be16 eth_type, __be32 dst, - __be32 dst_mask, u32 priority, - enum rocker_of_dpa_table_id goto_tbl, - u32 group_id, int flags) -{ - struct rocker_flow_tbl_entry *entry; - - entry = rocker_port_kzalloc(rocker_port, trans, flags, sizeof(*entry)); - if (!entry) - return -ENOMEM; - - entry->key.tbl_id = ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING; - entry->key.priority = priority; - entry->key.ucast_routing.eth_type = eth_type; - entry->key.ucast_routing.dst4 = dst; - entry->key.ucast_routing.dst4_mask = dst_mask; - entry->key.ucast_routing.goto_tbl = goto_tbl; - entry->key.ucast_routing.group_id = group_id; - entry->key_len = offsetof(struct rocker_flow_tbl_key, - ucast_routing.group_id); - - return rocker_flow_tbl_do(rocker_port, trans, flags, entry); -} - -static int rocker_flow_tbl_acl(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - u32 in_pport, u32 in_pport_mask, - const u8 *eth_src, const u8 *eth_src_mask, - const u8 *eth_dst, const u8 *eth_dst_mask, - __be16 eth_type, __be16 vlan_id, - __be16 vlan_id_mask, u8 ip_proto, - u8 ip_proto_mask, u8 ip_tos, u8 ip_tos_mask, - u32 group_id) -{ - u32 priority; - struct rocker_flow_tbl_entry *entry; - - entry = rocker_port_kzalloc(rocker_port, trans, flags, sizeof(*entry)); - if (!entry) - return -ENOMEM; - - priority = ROCKER_PRIORITY_ACL_NORMAL; - if (eth_dst && eth_dst_mask) { - if (ether_addr_equal(eth_dst_mask, mcast_mac)) - priority = ROCKER_PRIORITY_ACL_DFLT; - else if (is_link_local_ether_addr(eth_dst)) - priority = ROCKER_PRIORITY_ACL_CTRL; - } - - entry->key.priority = priority; - entry->key.tbl_id = ROCKER_OF_DPA_TABLE_ID_ACL_POLICY; - entry->key.acl.in_pport = in_pport; - entry->key.acl.in_pport_mask = in_pport_mask; - - if (eth_src) - ether_addr_copy(entry->key.acl.eth_src, eth_src); - if (eth_src_mask) - ether_addr_copy(entry->key.acl.eth_src_mask, eth_src_mask); - if (eth_dst) - ether_addr_copy(entry->key.acl.eth_dst, eth_dst); - if (eth_dst_mask) - ether_addr_copy(entry->key.acl.eth_dst_mask, eth_dst_mask); - - entry->key.acl.eth_type = eth_type; - entry->key.acl.vlan_id = vlan_id; - entry->key.acl.vlan_id_mask = vlan_id_mask; - entry->key.acl.ip_proto = ip_proto; - entry->key.acl.ip_proto_mask = ip_proto_mask; - entry->key.acl.ip_tos = ip_tos; - entry->key.acl.ip_tos_mask = ip_tos_mask; - entry->key.acl.group_id = group_id; - - return rocker_flow_tbl_do(rocker_port, trans, flags, entry); -} - -static struct rocker_group_tbl_entry * -rocker_group_tbl_find(const struct rocker *rocker, - const struct rocker_group_tbl_entry *match) -{ - struct rocker_group_tbl_entry *found; - - hash_for_each_possible(rocker->group_tbl, found, - entry, match->group_id) { - if (found->group_id == match->group_id) - return found; - } - - return NULL; -} - -static void rocker_group_tbl_entry_free(struct switchdev_trans *trans, - struct rocker_group_tbl_entry *entry) -{ - switch (ROCKER_GROUP_TYPE_GET(entry->group_id)) { - case ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD: - case ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST: - rocker_port_kfree(trans, entry->group_ids); - break; - default: - break; - } - rocker_port_kfree(trans, entry); -} - -static int rocker_group_tbl_add(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - struct rocker_group_tbl_entry *match) -{ - struct rocker *rocker = rocker_port->rocker; - struct rocker_group_tbl_entry *found; - unsigned long lock_flags; - - spin_lock_irqsave(&rocker->group_tbl_lock, lock_flags); - - found = rocker_group_tbl_find(rocker, match); - - if (found) { - if (!switchdev_trans_ph_prepare(trans)) - hash_del(&found->entry); - rocker_group_tbl_entry_free(trans, found); - found = match; - found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD; - } else { - found = match; - found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD; - } - - if (!switchdev_trans_ph_prepare(trans)) - hash_add(rocker->group_tbl, &found->entry, found->group_id); - - spin_unlock_irqrestore(&rocker->group_tbl_lock, lock_flags); - - return rocker_cmd_exec(rocker_port, trans, flags, - rocker_cmd_group_tbl_add, found, NULL, NULL); -} - -static int rocker_group_tbl_del(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - struct rocker_group_tbl_entry *match) -{ - struct rocker *rocker = rocker_port->rocker; - struct rocker_group_tbl_entry *found; - unsigned long lock_flags; - int err = 0; - - spin_lock_irqsave(&rocker->group_tbl_lock, lock_flags); - - found = rocker_group_tbl_find(rocker, match); - - if (found) { - if (!switchdev_trans_ph_prepare(trans)) - hash_del(&found->entry); - found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL; - } - - spin_unlock_irqrestore(&rocker->group_tbl_lock, lock_flags); - - rocker_group_tbl_entry_free(trans, match); - - if (found) { - err = rocker_cmd_exec(rocker_port, trans, flags, - rocker_cmd_group_tbl_del, - found, NULL, NULL); - rocker_group_tbl_entry_free(trans, found); - } - - return err; -} - -static int rocker_group_tbl_do(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - struct rocker_group_tbl_entry *entry) -{ - if (flags & ROCKER_OP_FLAG_REMOVE) - return rocker_group_tbl_del(rocker_port, trans, flags, entry); - else - return rocker_group_tbl_add(rocker_port, trans, flags, entry); -} - -static int rocker_group_l2_interface(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - __be16 vlan_id, u32 out_pport, - int pop_vlan) -{ - struct rocker_group_tbl_entry *entry; - - entry = rocker_port_kzalloc(rocker_port, trans, flags, sizeof(*entry)); - if (!entry) - return -ENOMEM; - - entry->group_id = ROCKER_GROUP_L2_INTERFACE(vlan_id, out_pport); - entry->l2_interface.pop_vlan = pop_vlan; - - return rocker_group_tbl_do(rocker_port, trans, flags, entry); -} - -static int rocker_group_l2_fan_out(struct rocker_port *rocker_port, - struct switchdev_trans *trans, - int flags, u8 group_count, - const u32 *group_ids, u32 group_id) -{ - struct rocker_group_tbl_entry *entry; - - entry = rocker_port_kzalloc(rocker_port, trans, flags, sizeof(*entry)); - if (!entry) - return -ENOMEM; - - entry->group_id = group_id; - entry->group_count = group_count; - - entry->group_ids = rocker_port_kcalloc(rocker_port, trans, flags, - group_count, sizeof(u32)); - if (!entry->group_ids) { - rocker_port_kfree(trans, entry); - return -ENOMEM; - } - memcpy(entry->group_ids, group_ids, group_count * sizeof(u32)); - - return rocker_group_tbl_do(rocker_port, trans, flags, entry); -} - -static int rocker_group_l2_flood(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - __be16 vlan_id, u8 group_count, - const u32 *group_ids, u32 group_id) -{ - return rocker_group_l2_fan_out(rocker_port, trans, flags, - group_count, group_ids, - group_id); -} - -static int rocker_group_l3_unicast(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - u32 index, const u8 *src_mac, const u8 *dst_mac, - __be16 vlan_id, bool ttl_check, u32 pport) -{ - struct rocker_group_tbl_entry *entry; - - entry = rocker_port_kzalloc(rocker_port, trans, flags, sizeof(*entry)); - if (!entry) - return -ENOMEM; - - entry->group_id = ROCKER_GROUP_L3_UNICAST(index); - if (src_mac) - ether_addr_copy(entry->l3_unicast.eth_src, src_mac); - if (dst_mac) - ether_addr_copy(entry->l3_unicast.eth_dst, dst_mac); - entry->l3_unicast.vlan_id = vlan_id; - entry->l3_unicast.ttl_check = ttl_check; - entry->l3_unicast.group_id = ROCKER_GROUP_L2_INTERFACE(vlan_id, pport); - - return rocker_group_tbl_do(rocker_port, trans, flags, entry); -} - -static struct rocker_neigh_tbl_entry * -rocker_neigh_tbl_find(const struct rocker *rocker, __be32 ip_addr) -{ - struct rocker_neigh_tbl_entry *found; - - hash_for_each_possible(rocker->neigh_tbl, found, - entry, be32_to_cpu(ip_addr)) - if (found->ip_addr == ip_addr) - return found; - - return NULL; -} - -static void _rocker_neigh_add(struct rocker *rocker, - struct switchdev_trans *trans, - struct rocker_neigh_tbl_entry *entry) -{ - if (!switchdev_trans_ph_commit(trans)) - entry->index = rocker->neigh_tbl_next_index++; - if (switchdev_trans_ph_prepare(trans)) - return; - entry->ref_count++; - hash_add(rocker->neigh_tbl, &entry->entry, - be32_to_cpu(entry->ip_addr)); -} - -static void _rocker_neigh_del(struct switchdev_trans *trans, - struct rocker_neigh_tbl_entry *entry) -{ - if (switchdev_trans_ph_prepare(trans)) - return; - if (--entry->ref_count == 0) { - hash_del(&entry->entry); - rocker_port_kfree(trans, entry); - } -} - -static void _rocker_neigh_update(struct rocker_neigh_tbl_entry *entry, - struct switchdev_trans *trans, - const u8 *eth_dst, bool ttl_check) -{ - if (eth_dst) { - ether_addr_copy(entry->eth_dst, eth_dst); - entry->ttl_check = ttl_check; - } else if (!switchdev_trans_ph_prepare(trans)) { - entry->ref_count++; - } -} - -static int rocker_port_ipv4_neigh(struct rocker_port *rocker_port, - struct switchdev_trans *trans, - int flags, __be32 ip_addr, const u8 *eth_dst) -{ - struct rocker *rocker = rocker_port->rocker; - struct rocker_neigh_tbl_entry *entry; - struct rocker_neigh_tbl_entry *found; - unsigned long lock_flags; - __be16 eth_type = htons(ETH_P_IP); - enum rocker_of_dpa_table_id goto_tbl = - ROCKER_OF_DPA_TABLE_ID_ACL_POLICY; - u32 group_id; - u32 priority = 0; - bool adding = !(flags & ROCKER_OP_FLAG_REMOVE); - bool updating; - bool removing; - int err = 0; - - entry = rocker_port_kzalloc(rocker_port, trans, flags, sizeof(*entry)); - if (!entry) - return -ENOMEM; - - spin_lock_irqsave(&rocker->neigh_tbl_lock, lock_flags); - - found = rocker_neigh_tbl_find(rocker, ip_addr); - - updating = found && adding; - removing = found && !adding; - adding = !found && adding; - - if (adding) { - entry->ip_addr = ip_addr; - entry->dev = rocker_port->dev; - ether_addr_copy(entry->eth_dst, eth_dst); - entry->ttl_check = true; - _rocker_neigh_add(rocker, trans, entry); - } else if (removing) { - memcpy(entry, found, sizeof(*entry)); - _rocker_neigh_del(trans, found); - } else if (updating) { - _rocker_neigh_update(found, trans, eth_dst, true); - memcpy(entry, found, sizeof(*entry)); - } else { - err = -ENOENT; - } - - spin_unlock_irqrestore(&rocker->neigh_tbl_lock, lock_flags); - - if (err) - goto err_out; - - /* For each active neighbor, we have an L3 unicast group and - * a /32 route to the neighbor, which uses the L3 unicast - * group. The L3 unicast group can also be referred to by - * other routes' nexthops. - */ - - err = rocker_group_l3_unicast(rocker_port, trans, flags, - entry->index, - rocker_port->dev->dev_addr, - entry->eth_dst, - rocker_port->internal_vlan_id, - entry->ttl_check, - rocker_port->pport); - if (err) { - netdev_err(rocker_port->dev, - "Error (%d) L3 unicast group index %d\n", - err, entry->index); - goto err_out; - } - - if (adding || removing) { - group_id = ROCKER_GROUP_L3_UNICAST(entry->index); - err = rocker_flow_tbl_ucast4_routing(rocker_port, trans, - eth_type, ip_addr, - inet_make_mask(32), - priority, goto_tbl, - group_id, flags); - - if (err) - netdev_err(rocker_port->dev, - "Error (%d) /32 unicast route %pI4 group 0x%08x\n", - err, &entry->ip_addr, group_id); - } - -err_out: - if (!adding) - rocker_port_kfree(trans, entry); - - return err; -} - -static int rocker_port_ipv4_resolve(struct rocker_port *rocker_port, - struct switchdev_trans *trans, - __be32 ip_addr) -{ - struct net_device *dev = rocker_port->dev; - struct neighbour *n = __ipv4_neigh_lookup(dev, (__force u32)ip_addr); - int err = 0; - - if (!n) { - n = neigh_create(&arp_tbl, &ip_addr, dev); - if (IS_ERR(n)) - return IS_ERR(n); - } - - /* If the neigh is already resolved, then go ahead and - * install the entry, otherwise start the ARP process to - * resolve the neigh. - */ - - if (n->nud_state & NUD_VALID) - err = rocker_port_ipv4_neigh(rocker_port, trans, 0, - ip_addr, n->ha); - else - neigh_event_send(n, NULL); - - neigh_release(n); - return err; -} - -static int rocker_port_ipv4_nh(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - __be32 ip_addr, u32 *index) -{ - struct rocker *rocker = rocker_port->rocker; - struct rocker_neigh_tbl_entry *entry; - struct rocker_neigh_tbl_entry *found; - unsigned long lock_flags; - bool adding = !(flags & ROCKER_OP_FLAG_REMOVE); - bool updating; - bool removing; - bool resolved = true; - int err = 0; - - entry = rocker_port_kzalloc(rocker_port, trans, flags, sizeof(*entry)); - if (!entry) - return -ENOMEM; - - spin_lock_irqsave(&rocker->neigh_tbl_lock, lock_flags); - - found = rocker_neigh_tbl_find(rocker, ip_addr); - if (found) - *index = found->index; - - updating = found && adding; - removing = found && !adding; - adding = !found && adding; - - if (adding) { - entry->ip_addr = ip_addr; - entry->dev = rocker_port->dev; - _rocker_neigh_add(rocker, trans, entry); - *index = entry->index; - resolved = false; - } else if (removing) { - _rocker_neigh_del(trans, found); - } else if (updating) { - _rocker_neigh_update(found, trans, NULL, false); - resolved = !is_zero_ether_addr(found->eth_dst); - } else { - err = -ENOENT; - } - - spin_unlock_irqrestore(&rocker->neigh_tbl_lock, lock_flags); - - if (!adding) - rocker_port_kfree(trans, entry); - - if (err) - return err; - - /* Resolved means neigh ip_addr is resolved to neigh mac. */ - - if (!resolved) - err = rocker_port_ipv4_resolve(rocker_port, trans, ip_addr); - - return err; -} - -static int rocker_port_vlan_flood_group(struct rocker_port *rocker_port, - struct switchdev_trans *trans, - int flags, __be16 vlan_id) -{ - struct rocker_port *p; - const struct rocker *rocker = rocker_port->rocker; - u32 group_id = ROCKER_GROUP_L2_FLOOD(vlan_id, 0); - u32 *group_ids; - u8 group_count = 0; - int err = 0; - int i; - - group_ids = rocker_port_kcalloc(rocker_port, trans, flags, - rocker->port_count, sizeof(u32)); - if (!group_ids) - return -ENOMEM; - - /* Adjust the flood group for this VLAN. The flood group - * references an L2 interface group for each port in this - * VLAN. - */ - - for (i = 0; i < rocker->port_count; i++) { - p = rocker->ports[i]; - if (!p) - continue; - if (!rocker_port_is_bridged(p)) - continue; - if (test_bit(ntohs(vlan_id), p->vlan_bitmap)) { - group_ids[group_count++] = - ROCKER_GROUP_L2_INTERFACE(vlan_id, p->pport); - } - } - - /* If there are no bridged ports in this VLAN, we're done */ - if (group_count == 0) - goto no_ports_in_vlan; - - err = rocker_group_l2_flood(rocker_port, trans, flags, vlan_id, - group_count, group_ids, group_id); - if (err) - netdev_err(rocker_port->dev, - "Error (%d) port VLAN l2 flood group\n", err); - -no_ports_in_vlan: - rocker_port_kfree(trans, group_ids); - return err; -} - -static int rocker_port_vlan_l2_groups(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - __be16 vlan_id, bool pop_vlan) -{ - const struct rocker *rocker = rocker_port->rocker; - struct rocker_port *p; - bool adding = !(flags & ROCKER_OP_FLAG_REMOVE); - u32 out_pport; - int ref = 0; - int err; - int i; - - /* An L2 interface group for this port in this VLAN, but - * only when port STP state is LEARNING|FORWARDING. - */ - - if (rocker_port->stp_state == BR_STATE_LEARNING || - rocker_port->stp_state == BR_STATE_FORWARDING) { - out_pport = rocker_port->pport; - err = rocker_group_l2_interface(rocker_port, trans, flags, - vlan_id, out_pport, pop_vlan); - if (err) { - netdev_err(rocker_port->dev, - "Error (%d) port VLAN l2 group for pport %d\n", - err, out_pport); - return err; - } - } - - /* An L2 interface group for this VLAN to CPU port. - * Add when first port joins this VLAN and destroy when - * last port leaves this VLAN. - */ - - for (i = 0; i < rocker->port_count; i++) { - p = rocker->ports[i]; - if (p && test_bit(ntohs(vlan_id), p->vlan_bitmap)) - ref++; - } - - if ((!adding || ref != 1) && (adding || ref != 0)) - return 0; - - out_pport = 0; - err = rocker_group_l2_interface(rocker_port, trans, flags, - vlan_id, out_pport, pop_vlan); - if (err) { - netdev_err(rocker_port->dev, - "Error (%d) port VLAN l2 group for CPU port\n", err); - return err; - } - - return 0; -} - -static struct rocker_ctrl { - const u8 *eth_dst; - const u8 *eth_dst_mask; - __be16 eth_type; - bool acl; - bool bridge; - bool term; - bool copy_to_cpu; -} rocker_ctrls[] = { - [ROCKER_CTRL_LINK_LOCAL_MCAST] = { - /* pass link local multicast pkts up to CPU for filtering */ - .eth_dst = ll_mac, - .eth_dst_mask = ll_mask, - .acl = true, - }, - [ROCKER_CTRL_LOCAL_ARP] = { - /* pass local ARP pkts up to CPU */ - .eth_dst = zero_mac, - .eth_dst_mask = zero_mac, - .eth_type = htons(ETH_P_ARP), - .acl = true, - }, - [ROCKER_CTRL_IPV4_MCAST] = { - /* pass IPv4 mcast pkts up to CPU, RFC 1112 */ - .eth_dst = ipv4_mcast, - .eth_dst_mask = ipv4_mask, - .eth_type = htons(ETH_P_IP), - .term = true, - .copy_to_cpu = true, - }, - [ROCKER_CTRL_IPV6_MCAST] = { - /* pass IPv6 mcast pkts up to CPU, RFC 2464 */ - .eth_dst = ipv6_mcast, - .eth_dst_mask = ipv6_mask, - .eth_type = htons(ETH_P_IPV6), - .term = true, - .copy_to_cpu = true, - }, - [ROCKER_CTRL_DFLT_BRIDGING] = { - /* flood any pkts on vlan */ - .bridge = true, - .copy_to_cpu = true, - }, - [ROCKER_CTRL_DFLT_OVS] = { - /* pass all pkts up to CPU */ - .eth_dst = zero_mac, - .eth_dst_mask = zero_mac, - .acl = true, - }, -}; - -static int rocker_port_ctrl_vlan_acl(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - const struct rocker_ctrl *ctrl, __be16 vlan_id) -{ - u32 in_pport = rocker_port->pport; - u32 in_pport_mask = 0xffffffff; - u32 out_pport = 0; - const u8 *eth_src = NULL; - const u8 *eth_src_mask = NULL; - __be16 vlan_id_mask = htons(0xffff); - u8 ip_proto = 0; - u8 ip_proto_mask = 0; - u8 ip_tos = 0; - u8 ip_tos_mask = 0; - u32 group_id = ROCKER_GROUP_L2_INTERFACE(vlan_id, out_pport); - int err; - - err = rocker_flow_tbl_acl(rocker_port, trans, flags, - in_pport, in_pport_mask, - eth_src, eth_src_mask, - ctrl->eth_dst, ctrl->eth_dst_mask, - ctrl->eth_type, - vlan_id, vlan_id_mask, - ip_proto, ip_proto_mask, - ip_tos, ip_tos_mask, - group_id); - - if (err) - netdev_err(rocker_port->dev, "Error (%d) ctrl ACL\n", err); - - return err; -} - -static int rocker_port_ctrl_vlan_bridge(struct rocker_port *rocker_port, - struct switchdev_trans *trans, - int flags, - const struct rocker_ctrl *ctrl, - __be16 vlan_id) -{ - enum rocker_of_dpa_table_id goto_tbl = - ROCKER_OF_DPA_TABLE_ID_ACL_POLICY; - u32 group_id = ROCKER_GROUP_L2_FLOOD(vlan_id, 0); - u32 tunnel_id = 0; - int err; - - if (!rocker_port_is_bridged(rocker_port)) - return 0; - - err = rocker_flow_tbl_bridge(rocker_port, trans, flags, - ctrl->eth_dst, ctrl->eth_dst_mask, - vlan_id, tunnel_id, - goto_tbl, group_id, ctrl->copy_to_cpu); - - if (err) - netdev_err(rocker_port->dev, "Error (%d) ctrl FLOOD\n", err); - - return err; -} - -static int rocker_port_ctrl_vlan_term(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - const struct rocker_ctrl *ctrl, __be16 vlan_id) -{ - u32 in_pport_mask = 0xffffffff; - __be16 vlan_id_mask = htons(0xffff); - int err; - - if (ntohs(vlan_id) == 0) - vlan_id = rocker_port->internal_vlan_id; - - err = rocker_flow_tbl_term_mac(rocker_port, trans, - rocker_port->pport, in_pport_mask, - ctrl->eth_type, ctrl->eth_dst, - ctrl->eth_dst_mask, vlan_id, - vlan_id_mask, ctrl->copy_to_cpu, - flags); - - if (err) - netdev_err(rocker_port->dev, "Error (%d) ctrl term\n", err); - - return err; -} - -static int rocker_port_ctrl_vlan(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - const struct rocker_ctrl *ctrl, __be16 vlan_id) -{ - if (ctrl->acl) - return rocker_port_ctrl_vlan_acl(rocker_port, trans, flags, - ctrl, vlan_id); - if (ctrl->bridge) - return rocker_port_ctrl_vlan_bridge(rocker_port, trans, flags, - ctrl, vlan_id); - - if (ctrl->term) - return rocker_port_ctrl_vlan_term(rocker_port, trans, flags, - ctrl, vlan_id); - - return -EOPNOTSUPP; -} - -static int rocker_port_ctrl_vlan_add(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - __be16 vlan_id) -{ - int err = 0; - int i; - - for (i = 0; i < ROCKER_CTRL_MAX; i++) { - if (rocker_port->ctrls[i]) { - err = rocker_port_ctrl_vlan(rocker_port, trans, flags, - &rocker_ctrls[i], vlan_id); - if (err) - return err; - } - } - - return err; -} - -static int rocker_port_ctrl(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - const struct rocker_ctrl *ctrl) -{ - u16 vid; - int err = 0; - - for (vid = 1; vid < VLAN_N_VID; vid++) { - if (!test_bit(vid, rocker_port->vlan_bitmap)) - continue; - err = rocker_port_ctrl_vlan(rocker_port, trans, flags, - ctrl, htons(vid)); - if (err) - break; - } - - return err; -} - -static int rocker_port_vlan(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, u16 vid) -{ - enum rocker_of_dpa_table_id goto_tbl = - ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC; - u32 in_pport = rocker_port->pport; - __be16 vlan_id = htons(vid); - __be16 vlan_id_mask = htons(0xffff); - __be16 internal_vlan_id; - bool untagged; - bool adding = !(flags & ROCKER_OP_FLAG_REMOVE); - int err; - - internal_vlan_id = rocker_port_vid_to_vlan(rocker_port, vid, &untagged); - - if (adding && test_bit(ntohs(internal_vlan_id), - rocker_port->vlan_bitmap)) - return 0; /* already added */ - else if (!adding && !test_bit(ntohs(internal_vlan_id), - rocker_port->vlan_bitmap)) - return 0; /* already removed */ - - change_bit(ntohs(internal_vlan_id), rocker_port->vlan_bitmap); - - if (adding) { - err = rocker_port_ctrl_vlan_add(rocker_port, trans, flags, - internal_vlan_id); - if (err) { - netdev_err(rocker_port->dev, - "Error (%d) port ctrl vlan add\n", err); - goto err_out; - } - } - - err = rocker_port_vlan_l2_groups(rocker_port, trans, flags, - internal_vlan_id, untagged); - if (err) { - netdev_err(rocker_port->dev, - "Error (%d) port VLAN l2 groups\n", err); - goto err_out; - } - - err = rocker_port_vlan_flood_group(rocker_port, trans, flags, - internal_vlan_id); - if (err) { - netdev_err(rocker_port->dev, - "Error (%d) port VLAN l2 flood group\n", err); - goto err_out; - } - - err = rocker_flow_tbl_vlan(rocker_port, trans, flags, - in_pport, vlan_id, vlan_id_mask, - goto_tbl, untagged, internal_vlan_id); - if (err) - netdev_err(rocker_port->dev, - "Error (%d) port VLAN table\n", err); - -err_out: - if (switchdev_trans_ph_prepare(trans)) - change_bit(ntohs(internal_vlan_id), rocker_port->vlan_bitmap); - - return err; -} - -static int rocker_port_ig_tbl(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags) -{ - enum rocker_of_dpa_table_id goto_tbl; - u32 in_pport; - u32 in_pport_mask; - int err; - - /* Normal Ethernet Frames. Matches pkts from any local physical - * ports. Goto VLAN tbl. - */ - - in_pport = 0; - in_pport_mask = 0xffff0000; - goto_tbl = ROCKER_OF_DPA_TABLE_ID_VLAN; - - err = rocker_flow_tbl_ig_port(rocker_port, trans, flags, - in_pport, in_pport_mask, - goto_tbl); - if (err) - netdev_err(rocker_port->dev, - "Error (%d) ingress port table entry\n", err); - - return err; -} - -struct rocker_fdb_learn_work { - struct work_struct work; - struct rocker_port *rocker_port; - struct switchdev_trans *trans; - int flags; - u8 addr[ETH_ALEN]; - u16 vid; -}; - -static void rocker_port_fdb_learn_work(struct work_struct *work) -{ - const struct rocker_fdb_learn_work *lw = - container_of(work, struct rocker_fdb_learn_work, work); - bool removing = (lw->flags & ROCKER_OP_FLAG_REMOVE); - bool learned = (lw->flags & ROCKER_OP_FLAG_LEARNED); - struct switchdev_notifier_fdb_info info; - - info.addr = lw->addr; - info.vid = lw->vid; - - rtnl_lock(); - if (learned && removing) - call_switchdev_notifiers(SWITCHDEV_FDB_DEL, - lw->rocker_port->dev, &info.info); - else if (learned && !removing) - call_switchdev_notifiers(SWITCHDEV_FDB_ADD, - lw->rocker_port->dev, &info.info); - rtnl_unlock(); - - rocker_port_kfree(lw->trans, work); -} - -static int rocker_port_fdb_learn(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - const u8 *addr, __be16 vlan_id) -{ - struct rocker_fdb_learn_work *lw; - enum rocker_of_dpa_table_id goto_tbl = - ROCKER_OF_DPA_TABLE_ID_ACL_POLICY; - u32 out_pport = rocker_port->pport; - u32 tunnel_id = 0; - u32 group_id = ROCKER_GROUP_NONE; - bool syncing = !!(rocker_port->brport_flags & BR_LEARNING_SYNC); - bool copy_to_cpu = false; - int err; - - if (rocker_port_is_bridged(rocker_port)) - group_id = ROCKER_GROUP_L2_INTERFACE(vlan_id, out_pport); - - if (!(flags & ROCKER_OP_FLAG_REFRESH)) { - err = rocker_flow_tbl_bridge(rocker_port, trans, flags, addr, - NULL, vlan_id, tunnel_id, goto_tbl, - group_id, copy_to_cpu); - if (err) - return err; - } - - if (!syncing) - return 0; - - if (!rocker_port_is_bridged(rocker_port)) - return 0; - - lw = rocker_port_kzalloc(rocker_port, trans, flags, sizeof(*lw)); - if (!lw) - return -ENOMEM; - - INIT_WORK(&lw->work, rocker_port_fdb_learn_work); - - lw->rocker_port = rocker_port; - lw->trans = trans; - lw->flags = flags; - ether_addr_copy(lw->addr, addr); - lw->vid = rocker_port_vlan_to_vid(rocker_port, vlan_id); - - if (switchdev_trans_ph_prepare(trans)) - rocker_port_kfree(trans, lw); - else - schedule_work(&lw->work); - - return 0; -} - -static struct rocker_fdb_tbl_entry * -rocker_fdb_tbl_find(const struct rocker *rocker, - const struct rocker_fdb_tbl_entry *match) -{ - struct rocker_fdb_tbl_entry *found; - - hash_for_each_possible(rocker->fdb_tbl, found, entry, match->key_crc32) - if (memcmp(&found->key, &match->key, sizeof(found->key)) == 0) - return found; - - return NULL; -} - -static int rocker_port_fdb(struct rocker_port *rocker_port, - struct switchdev_trans *trans, - const unsigned char *addr, - __be16 vlan_id, int flags) -{ - struct rocker *rocker = rocker_port->rocker; - struct rocker_fdb_tbl_entry *fdb; - struct rocker_fdb_tbl_entry *found; - bool removing = (flags & ROCKER_OP_FLAG_REMOVE); - unsigned long lock_flags; - - fdb = rocker_port_kzalloc(rocker_port, trans, flags, sizeof(*fdb)); - if (!fdb) - return -ENOMEM; - - fdb->learned = (flags & ROCKER_OP_FLAG_LEARNED); - fdb->touched = jiffies; - fdb->key.rocker_port = rocker_port; - ether_addr_copy(fdb->key.addr, addr); - fdb->key.vlan_id = vlan_id; - fdb->key_crc32 = crc32(~0, &fdb->key, sizeof(fdb->key)); - - spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags); - - found = rocker_fdb_tbl_find(rocker, fdb); - - if (found) { - found->touched = jiffies; - if (removing) { - rocker_port_kfree(trans, fdb); - if (!switchdev_trans_ph_prepare(trans)) - hash_del(&found->entry); - } - } else if (!removing) { - if (!switchdev_trans_ph_prepare(trans)) - hash_add(rocker->fdb_tbl, &fdb->entry, - fdb->key_crc32); - } - - spin_unlock_irqrestore(&rocker->fdb_tbl_lock, lock_flags); - - /* Check if adding and already exists, or removing and can't find */ - if (!found != !removing) { - rocker_port_kfree(trans, fdb); - if (!found && removing) - return 0; - /* Refreshing existing to update aging timers */ - flags |= ROCKER_OP_FLAG_REFRESH; - } - - return rocker_port_fdb_learn(rocker_port, trans, flags, addr, vlan_id); -} - -static int rocker_port_fdb_flush(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags) -{ - struct rocker *rocker = rocker_port->rocker; - struct rocker_fdb_tbl_entry *found; - unsigned long lock_flags; - struct hlist_node *tmp; - int bkt; - int err = 0; - - if (rocker_port->stp_state == BR_STATE_LEARNING || - rocker_port->stp_state == BR_STATE_FORWARDING) - return 0; - - flags |= ROCKER_OP_FLAG_NOWAIT | ROCKER_OP_FLAG_REMOVE; - - spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags); - - hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) { - if (found->key.rocker_port != rocker_port) - continue; - if (!found->learned) - continue; - err = rocker_port_fdb_learn(rocker_port, trans, flags, - found->key.addr, - found->key.vlan_id); - if (err) - goto err_out; - if (!switchdev_trans_ph_prepare(trans)) - hash_del(&found->entry); - } - -err_out: - spin_unlock_irqrestore(&rocker->fdb_tbl_lock, lock_flags); - - return err; -} - -static void rocker_fdb_cleanup(unsigned long data) -{ - struct rocker *rocker = (struct rocker *)data; - struct rocker_port *rocker_port; - struct rocker_fdb_tbl_entry *entry; - struct hlist_node *tmp; - unsigned long next_timer = jiffies + BR_MIN_AGEING_TIME; - unsigned long expires; - unsigned long lock_flags; - int flags = ROCKER_OP_FLAG_NOWAIT | ROCKER_OP_FLAG_REMOVE | - ROCKER_OP_FLAG_LEARNED; - int bkt; - - spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags); - - hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, entry, entry) { - if (!entry->learned) - continue; - rocker_port = entry->key.rocker_port; - expires = entry->touched + rocker_port->ageing_time; - if (time_before_eq(expires, jiffies)) { - rocker_port_fdb_learn(rocker_port, NULL, - flags, entry->key.addr, - entry->key.vlan_id); - hash_del(&entry->entry); - } else if (time_before(expires, next_timer)) { - next_timer = expires; - } - } - - spin_unlock_irqrestore(&rocker->fdb_tbl_lock, lock_flags); - - mod_timer(&rocker->fdb_cleanup_timer, round_jiffies_up(next_timer)); -} - -static int rocker_port_router_mac(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - __be16 vlan_id) -{ - u32 in_pport_mask = 0xffffffff; - __be16 eth_type; - const u8 *dst_mac_mask = ff_mac; - __be16 vlan_id_mask = htons(0xffff); - bool copy_to_cpu = false; - int err; - - if (ntohs(vlan_id) == 0) - vlan_id = rocker_port->internal_vlan_id; - - eth_type = htons(ETH_P_IP); - err = rocker_flow_tbl_term_mac(rocker_port, trans, - rocker_port->pport, in_pport_mask, - eth_type, rocker_port->dev->dev_addr, - dst_mac_mask, vlan_id, vlan_id_mask, - copy_to_cpu, flags); - if (err) - return err; - - eth_type = htons(ETH_P_IPV6); - err = rocker_flow_tbl_term_mac(rocker_port, trans, - rocker_port->pport, in_pport_mask, - eth_type, rocker_port->dev->dev_addr, - dst_mac_mask, vlan_id, vlan_id_mask, - copy_to_cpu, flags); - - return err; -} - -static int rocker_port_fwding(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags) -{ - bool pop_vlan; - u32 out_pport; - __be16 vlan_id; - u16 vid; - int err; - - /* Port will be forwarding-enabled if its STP state is LEARNING - * or FORWARDING. Traffic from CPU can still egress, regardless of - * port STP state. Use L2 interface group on port VLANs as a way - * to toggle port forwarding: if forwarding is disabled, L2 - * interface group will not exist. - */ - - if (rocker_port->stp_state != BR_STATE_LEARNING && - rocker_port->stp_state != BR_STATE_FORWARDING) - flags |= ROCKER_OP_FLAG_REMOVE; - - out_pport = rocker_port->pport; - for (vid = 1; vid < VLAN_N_VID; vid++) { - if (!test_bit(vid, rocker_port->vlan_bitmap)) - continue; - vlan_id = htons(vid); - pop_vlan = rocker_vlan_id_is_internal(vlan_id); - err = rocker_group_l2_interface(rocker_port, trans, flags, - vlan_id, out_pport, pop_vlan); - if (err) { - netdev_err(rocker_port->dev, - "Error (%d) port VLAN l2 group for pport %d\n", - err, out_pport); - return err; - } - } - - return 0; -} - -static int rocker_port_stp_update(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags, - u8 state) -{ - bool want[ROCKER_CTRL_MAX] = { 0, }; - bool prev_ctrls[ROCKER_CTRL_MAX]; - u8 uninitialized_var(prev_state); - int err; - int i; - - if (switchdev_trans_ph_prepare(trans)) { - memcpy(prev_ctrls, rocker_port->ctrls, sizeof(prev_ctrls)); - prev_state = rocker_port->stp_state; - } - - if (rocker_port->stp_state == state) - return 0; - - rocker_port->stp_state = state; - - switch (state) { - case BR_STATE_DISABLED: - /* port is completely disabled */ - break; - case BR_STATE_LISTENING: - case BR_STATE_BLOCKING: - want[ROCKER_CTRL_LINK_LOCAL_MCAST] = true; - break; - case BR_STATE_LEARNING: - case BR_STATE_FORWARDING: - if (!rocker_port_is_ovsed(rocker_port)) - want[ROCKER_CTRL_LINK_LOCAL_MCAST] = true; - want[ROCKER_CTRL_IPV4_MCAST] = true; - want[ROCKER_CTRL_IPV6_MCAST] = true; - if (rocker_port_is_bridged(rocker_port)) - want[ROCKER_CTRL_DFLT_BRIDGING] = true; - else if (rocker_port_is_ovsed(rocker_port)) - want[ROCKER_CTRL_DFLT_OVS] = true; - else - want[ROCKER_CTRL_LOCAL_ARP] = true; - break; - } - - for (i = 0; i < ROCKER_CTRL_MAX; i++) { - if (want[i] != rocker_port->ctrls[i]) { - int ctrl_flags = flags | - (want[i] ? 0 : ROCKER_OP_FLAG_REMOVE); - err = rocker_port_ctrl(rocker_port, trans, ctrl_flags, - &rocker_ctrls[i]); - if (err) - goto err_out; - rocker_port->ctrls[i] = want[i]; - } - } - - err = rocker_port_fdb_flush(rocker_port, trans, flags); - if (err) - goto err_out; - - err = rocker_port_fwding(rocker_port, trans, flags); - -err_out: - if (switchdev_trans_ph_prepare(trans)) { - memcpy(rocker_port->ctrls, prev_ctrls, sizeof(prev_ctrls)); - rocker_port->stp_state = prev_state; - } - - return err; -} - -static int rocker_port_fwd_enable(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags) -{ - if (rocker_port_is_bridged(rocker_port)) - /* bridge STP will enable port */ - return 0; - - /* port is not bridged, so simulate going to FORWARDING state */ - return rocker_port_stp_update(rocker_port, trans, flags, - BR_STATE_FORWARDING); -} - -static int rocker_port_fwd_disable(struct rocker_port *rocker_port, - struct switchdev_trans *trans, int flags) -{ - if (rocker_port_is_bridged(rocker_port)) - /* bridge STP will disable port */ - return 0; - - /* port is not bridged, so simulate going to DISABLED state */ - return rocker_port_stp_update(rocker_port, trans, flags, - BR_STATE_DISABLED); -} - -static struct rocker_internal_vlan_tbl_entry * -rocker_internal_vlan_tbl_find(const struct rocker *rocker, int ifindex) -{ - struct rocker_internal_vlan_tbl_entry *found; - - hash_for_each_possible(rocker->internal_vlan_tbl, found, - entry, ifindex) { - if (found->ifindex == ifindex) - return found; - } - - return NULL; -} - -static __be16 rocker_port_internal_vlan_id_get(struct rocker_port *rocker_port, - int ifindex) -{ - struct rocker *rocker = rocker_port->rocker; - struct rocker_internal_vlan_tbl_entry *entry; - struct rocker_internal_vlan_tbl_entry *found; - unsigned long lock_flags; - int i; - - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return 0; - - entry->ifindex = ifindex; - - spin_lock_irqsave(&rocker->internal_vlan_tbl_lock, lock_flags); - - found = rocker_internal_vlan_tbl_find(rocker, ifindex); - if (found) { - kfree(entry); - goto found; - } - - found = entry; - hash_add(rocker->internal_vlan_tbl, &found->entry, found->ifindex); - - for (i = 0; i < ROCKER_N_INTERNAL_VLANS; i++) { - if (test_and_set_bit(i, rocker->internal_vlan_bitmap)) - continue; - found->vlan_id = htons(ROCKER_INTERNAL_VLAN_ID_BASE + i); - goto found; - } - - netdev_err(rocker_port->dev, "Out of internal VLAN IDs\n"); - -found: - found->ref_count++; - spin_unlock_irqrestore(&rocker->internal_vlan_tbl_lock, lock_flags); - - return found->vlan_id; -} - -static void -rocker_port_internal_vlan_id_put(const struct rocker_port *rocker_port, - int ifindex) -{ - struct rocker *rocker = rocker_port->rocker; - struct rocker_internal_vlan_tbl_entry *found; - unsigned long lock_flags; - unsigned long bit; - - spin_lock_irqsave(&rocker->internal_vlan_tbl_lock, lock_flags); - - found = rocker_internal_vlan_tbl_find(rocker, ifindex); - if (!found) { - netdev_err(rocker_port->dev, - "ifindex (%d) not found in internal VLAN tbl\n", - ifindex); - goto not_found; - } - - if (--found->ref_count <= 0) { - bit = ntohs(found->vlan_id) - ROCKER_INTERNAL_VLAN_ID_BASE; - clear_bit(bit, rocker->internal_vlan_bitmap); - hash_del(&found->entry); - kfree(found); - } - -not_found: - spin_unlock_irqrestore(&rocker->internal_vlan_tbl_lock, lock_flags); -} - -static int rocker_port_fib_ipv4(struct rocker_port *rocker_port, - struct switchdev_trans *trans, __be32 dst, - int dst_len, const struct fib_info *fi, - u32 tb_id, int flags) -{ - const struct fib_nh *nh; - __be16 eth_type = htons(ETH_P_IP); - __be32 dst_mask = inet_make_mask(dst_len); - __be16 internal_vlan_id = rocker_port->internal_vlan_id; - u32 priority = fi->fib_priority; - enum rocker_of_dpa_table_id goto_tbl = - ROCKER_OF_DPA_TABLE_ID_ACL_POLICY; - u32 group_id; - bool nh_on_port; - bool has_gw; - u32 index; - int err; - - /* XXX support ECMP */ - - nh = fi->fib_nh; - nh_on_port = (fi->fib_dev == rocker_port->dev); - has_gw = !!nh->nh_gw; - - if (has_gw && nh_on_port) { - err = rocker_port_ipv4_nh(rocker_port, trans, flags, - nh->nh_gw, &index); - if (err) - return err; - - group_id = ROCKER_GROUP_L3_UNICAST(index); - } else { - /* Send to CPU for processing */ - group_id = ROCKER_GROUP_L2_INTERFACE(internal_vlan_id, 0); - } - - err = rocker_flow_tbl_ucast4_routing(rocker_port, trans, eth_type, dst, - dst_mask, priority, goto_tbl, - group_id, flags); - if (err) - netdev_err(rocker_port->dev, "Error (%d) IPv4 route %pI4\n", - err, &dst); - - return err; -} - -/***************** - * Net device ops - *****************/ - -static int rocker_port_open(struct net_device *dev) -{ - struct rocker_port *rocker_port = netdev_priv(dev); - int err; - - err = rocker_port_dma_rings_init(rocker_port); - if (err) - return err; - - err = request_irq(rocker_msix_tx_vector(rocker_port), - rocker_tx_irq_handler, 0, - rocker_driver_name, rocker_port); - if (err) { - netdev_err(rocker_port->dev, "cannot assign tx irq\n"); - goto err_request_tx_irq; - } - - err = request_irq(rocker_msix_rx_vector(rocker_port), - rocker_rx_irq_handler, 0, - rocker_driver_name, rocker_port); - if (err) { - netdev_err(rocker_port->dev, "cannot assign rx irq\n"); - goto err_request_rx_irq; - } - - err = rocker_port_fwd_enable(rocker_port, NULL, 0); - if (err) - goto err_fwd_enable; - - napi_enable(&rocker_port->napi_tx); - napi_enable(&rocker_port->napi_rx); - if (!dev->proto_down) - rocker_port_set_enable(rocker_port, true); - netif_start_queue(dev); - return 0; - -err_fwd_enable: - free_irq(rocker_msix_rx_vector(rocker_port), rocker_port); -err_request_rx_irq: - free_irq(rocker_msix_tx_vector(rocker_port), rocker_port); -err_request_tx_irq: - rocker_port_dma_rings_fini(rocker_port); - return err; -} - -static int rocker_port_stop(struct net_device *dev) -{ - struct rocker_port *rocker_port = netdev_priv(dev); - - netif_stop_queue(dev); - rocker_port_set_enable(rocker_port, false); - napi_disable(&rocker_port->napi_rx); - napi_disable(&rocker_port->napi_tx); - rocker_port_fwd_disable(rocker_port, NULL, - ROCKER_OP_FLAG_NOWAIT); - free_irq(rocker_msix_rx_vector(rocker_port), rocker_port); - free_irq(rocker_msix_tx_vector(rocker_port), rocker_port); - rocker_port_dma_rings_fini(rocker_port); - - return 0; -} - -static void rocker_tx_desc_frags_unmap(const struct rocker_port *rocker_port, - const struct rocker_desc_info *desc_info) -{ - const struct rocker *rocker = rocker_port->rocker; - struct pci_dev *pdev = rocker->pdev; - const struct rocker_tlv *attrs[ROCKER_TLV_TX_MAX + 1]; - struct rocker_tlv *attr; - int rem; - - rocker_tlv_parse_desc(attrs, ROCKER_TLV_TX_MAX, desc_info); - if (!attrs[ROCKER_TLV_TX_FRAGS]) - return; - rocker_tlv_for_each_nested(attr, attrs[ROCKER_TLV_TX_FRAGS], rem) { - const struct rocker_tlv *frag_attrs[ROCKER_TLV_TX_FRAG_ATTR_MAX + 1]; - dma_addr_t dma_handle; - size_t len; - - if (rocker_tlv_type(attr) != ROCKER_TLV_TX_FRAG) - continue; - rocker_tlv_parse_nested(frag_attrs, ROCKER_TLV_TX_FRAG_ATTR_MAX, - attr); - if (!frag_attrs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] || - !frag_attrs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) - continue; - dma_handle = rocker_tlv_get_u64(frag_attrs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]); - len = rocker_tlv_get_u16(frag_attrs[ROCKER_TLV_TX_FRAG_ATTR_LEN]); - pci_unmap_single(pdev, dma_handle, len, DMA_TO_DEVICE); - } -} - -static int rocker_tx_desc_frag_map_put(const struct rocker_port *rocker_port, - struct rocker_desc_info *desc_info, - char *buf, size_t buf_len) -{ - const struct rocker *rocker = rocker_port->rocker; - struct pci_dev *pdev = rocker->pdev; - dma_addr_t dma_handle; - struct rocker_tlv *frag; - - dma_handle = pci_map_single(pdev, buf, buf_len, DMA_TO_DEVICE); - if (unlikely(pci_dma_mapping_error(pdev, dma_handle))) { - if (net_ratelimit()) - netdev_err(rocker_port->dev, "failed to dma map tx frag\n"); - return -EIO; - } - frag = rocker_tlv_nest_start(desc_info, ROCKER_TLV_TX_FRAG); - if (!frag) - goto unmap_frag; - if (rocker_tlv_put_u64(desc_info, ROCKER_TLV_TX_FRAG_ATTR_ADDR, - dma_handle)) - goto nest_cancel; - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_TX_FRAG_ATTR_LEN, - buf_len)) - goto nest_cancel; - rocker_tlv_nest_end(desc_info, frag); - return 0; - -nest_cancel: - rocker_tlv_nest_cancel(desc_info, frag); -unmap_frag: - pci_unmap_single(pdev, dma_handle, buf_len, DMA_TO_DEVICE); - return -EMSGSIZE; -} - -static netdev_tx_t rocker_port_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct rocker_port *rocker_port = netdev_priv(dev); - struct rocker *rocker = rocker_port->rocker; - struct rocker_desc_info *desc_info; - struct rocker_tlv *frags; - int i; - int err; - - desc_info = rocker_desc_head_get(&rocker_port->tx_ring); - if (unlikely(!desc_info)) { - if (net_ratelimit()) - netdev_err(dev, "tx ring full when queue awake\n"); - return NETDEV_TX_BUSY; - } - - rocker_desc_cookie_ptr_set(desc_info, skb); - - frags = rocker_tlv_nest_start(desc_info, ROCKER_TLV_TX_FRAGS); - if (!frags) - goto out; - err = rocker_tx_desc_frag_map_put(rocker_port, desc_info, - skb->data, skb_headlen(skb)); - if (err) - goto nest_cancel; - if (skb_shinfo(skb)->nr_frags > ROCKER_TX_FRAGS_MAX) { - err = skb_linearize(skb); - if (err) - goto unmap_frags; - } - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - err = rocker_tx_desc_frag_map_put(rocker_port, desc_info, - skb_frag_address(frag), - skb_frag_size(frag)); - if (err) - goto unmap_frags; - } - rocker_tlv_nest_end(desc_info, frags); - - rocker_desc_gen_clear(desc_info); - rocker_desc_head_set(rocker, &rocker_port->tx_ring, desc_info); - - desc_info = rocker_desc_head_get(&rocker_port->tx_ring); - if (!desc_info) - netif_stop_queue(dev); - - return NETDEV_TX_OK; - -unmap_frags: - rocker_tx_desc_frags_unmap(rocker_port, desc_info); -nest_cancel: - rocker_tlv_nest_cancel(desc_info, frags); -out: - dev_kfree_skb(skb); - dev->stats.tx_dropped++; - - return NETDEV_TX_OK; -} - -static int rocker_port_set_mac_address(struct net_device *dev, void *p) -{ - struct sockaddr *addr = p; - struct rocker_port *rocker_port = netdev_priv(dev); - int err; - - if (!is_valid_ether_addr(addr->sa_data)) - return -EADDRNOTAVAIL; - - err = rocker_cmd_set_port_settings_macaddr(rocker_port, addr->sa_data); - if (err) - return err; - memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); - return 0; -} - -static int rocker_port_change_mtu(struct net_device *dev, int new_mtu) -{ - struct rocker_port *rocker_port = netdev_priv(dev); - int running = netif_running(dev); - int err; - -#define ROCKER_PORT_MIN_MTU 68 -#define ROCKER_PORT_MAX_MTU 9000 - - if (new_mtu < ROCKER_PORT_MIN_MTU || new_mtu > ROCKER_PORT_MAX_MTU) - return -EINVAL; - - if (running) - rocker_port_stop(dev); - - netdev_info(dev, "MTU change from %d to %d\n", dev->mtu, new_mtu); - dev->mtu = new_mtu; - - err = rocker_cmd_set_port_settings_mtu(rocker_port, new_mtu); - if (err) - return err; - - if (running) - err = rocker_port_open(dev); - - return err; -} - -static int rocker_port_get_phys_port_name(struct net_device *dev, - char *buf, size_t len) -{ - struct rocker_port *rocker_port = netdev_priv(dev); - struct port_name name = { .buf = buf, .len = len }; - int err; - - err = rocker_cmd_exec(rocker_port, NULL, 0, - rocker_cmd_get_port_settings_prep, NULL, - rocker_cmd_get_port_settings_phys_name_proc, - &name); - - return err ? -EOPNOTSUPP : 0; -} - -static int rocker_port_change_proto_down(struct net_device *dev, - bool proto_down) -{ - struct rocker_port *rocker_port = netdev_priv(dev); - - if (rocker_port->dev->flags & IFF_UP) - rocker_port_set_enable(rocker_port, !proto_down); - rocker_port->dev->proto_down = proto_down; - return 0; -} - -static void rocker_port_neigh_destroy(struct neighbour *n) -{ - struct rocker_port *rocker_port = netdev_priv(n->dev); - int flags = ROCKER_OP_FLAG_REMOVE | ROCKER_OP_FLAG_NOWAIT; - __be32 ip_addr = *(__be32 *)n->primary_key; - - rocker_port_ipv4_neigh(rocker_port, NULL, - flags, ip_addr, n->ha); -} - -static const struct net_device_ops rocker_port_netdev_ops = { - .ndo_open = rocker_port_open, - .ndo_stop = rocker_port_stop, - .ndo_start_xmit = rocker_port_xmit, - .ndo_set_mac_address = rocker_port_set_mac_address, - .ndo_change_mtu = rocker_port_change_mtu, - .ndo_bridge_getlink = switchdev_port_bridge_getlink, - .ndo_bridge_setlink = switchdev_port_bridge_setlink, - .ndo_bridge_dellink = switchdev_port_bridge_dellink, - .ndo_fdb_add = switchdev_port_fdb_add, - .ndo_fdb_del = switchdev_port_fdb_del, - .ndo_fdb_dump = switchdev_port_fdb_dump, - .ndo_get_phys_port_name = rocker_port_get_phys_port_name, - .ndo_change_proto_down = rocker_port_change_proto_down, - .ndo_neigh_destroy = rocker_port_neigh_destroy, -}; - -/******************** - * swdev interface - ********************/ - -static int rocker_port_attr_get(struct net_device *dev, - struct switchdev_attr *attr) -{ - const struct rocker_port *rocker_port = netdev_priv(dev); - const struct rocker *rocker = rocker_port->rocker; - - switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = sizeof(rocker->hw.id); - memcpy(&attr->u.ppid.id, &rocker->hw.id, attr->u.ppid.id_len); - break; - case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: - attr->u.brport_flags = rocker_port->brport_flags; - break; - default: - return -EOPNOTSUPP; - } - - return 0; -} - -static int rocker_port_brport_flags_set(struct rocker_port *rocker_port, - struct switchdev_trans *trans, - unsigned long brport_flags) -{ - unsigned long orig_flags; - int err = 0; - - orig_flags = rocker_port->brport_flags; - rocker_port->brport_flags = brport_flags; - if ((orig_flags ^ rocker_port->brport_flags) & BR_LEARNING) - err = rocker_port_set_learning(rocker_port, trans); - - if (switchdev_trans_ph_prepare(trans)) - rocker_port->brport_flags = orig_flags; - - return err; -} - -static int rocker_port_bridge_ageing_time(struct rocker_port *rocker_port, - struct switchdev_trans *trans, - u32 ageing_time) -{ - if (!switchdev_trans_ph_prepare(trans)) { - rocker_port->ageing_time = clock_t_to_jiffies(ageing_time); - mod_timer(&rocker_port->rocker->fdb_cleanup_timer, jiffies); - } - - return 0; -} - -static int rocker_port_attr_set(struct net_device *dev, - const struct switchdev_attr *attr, - struct switchdev_trans *trans) -{ - struct rocker_port *rocker_port = netdev_priv(dev); - int err = 0; - - switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_STP_STATE: - err = rocker_port_stp_update(rocker_port, trans, 0, - attr->u.stp_state); - break; - case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: - err = rocker_port_brport_flags_set(rocker_port, trans, - attr->u.brport_flags); - break; - case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: - err = rocker_port_bridge_ageing_time(rocker_port, trans, - attr->u.ageing_time); - break; - default: - err = -EOPNOTSUPP; - break; - } - - return err; -} - -static int rocker_port_vlan_add(struct rocker_port *rocker_port, - struct switchdev_trans *trans, - u16 vid, u16 flags) -{ - int err; - - /* XXX deal with flags for PVID and untagged */ - - err = rocker_port_vlan(rocker_port, trans, 0, vid); - if (err) - return err; - - err = rocker_port_router_mac(rocker_port, trans, 0, htons(vid)); - if (err) - rocker_port_vlan(rocker_port, trans, - ROCKER_OP_FLAG_REMOVE, vid); - - return err; -} - -static int rocker_port_vlans_add(struct rocker_port *rocker_port, - struct switchdev_trans *trans, - const struct switchdev_obj_port_vlan *vlan) -{ - u16 vid; - int err; - - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { - err = rocker_port_vlan_add(rocker_port, trans, - vid, vlan->flags); - if (err) - return err; - } - - return 0; -} - -static int rocker_port_fdb_add(struct rocker_port *rocker_port, - struct switchdev_trans *trans, - const struct switchdev_obj_port_fdb *fdb) -{ - __be16 vlan_id = rocker_port_vid_to_vlan(rocker_port, fdb->vid, NULL); - int flags = 0; - - if (!rocker_port_is_bridged(rocker_port)) - return -EINVAL; - - return rocker_port_fdb(rocker_port, trans, fdb->addr, vlan_id, flags); -} - -static int rocker_port_obj_add(struct net_device *dev, - const struct switchdev_obj *obj, - struct switchdev_trans *trans) -{ - struct rocker_port *rocker_port = netdev_priv(dev); - const struct switchdev_obj_ipv4_fib *fib4; - int err = 0; - - switch (obj->id) { - case SWITCHDEV_OBJ_ID_PORT_VLAN: - err = rocker_port_vlans_add(rocker_port, trans, - SWITCHDEV_OBJ_PORT_VLAN(obj)); - break; - case SWITCHDEV_OBJ_ID_IPV4_FIB: - fib4 = SWITCHDEV_OBJ_IPV4_FIB(obj); - err = rocker_port_fib_ipv4(rocker_port, trans, - htonl(fib4->dst), fib4->dst_len, - &fib4->fi, fib4->tb_id, 0); - break; - case SWITCHDEV_OBJ_ID_PORT_FDB: - err = rocker_port_fdb_add(rocker_port, trans, - SWITCHDEV_OBJ_PORT_FDB(obj)); - break; - default: - err = -EOPNOTSUPP; - break; - } - - return err; -} - -static int rocker_port_vlan_del(struct rocker_port *rocker_port, - u16 vid, u16 flags) -{ - int err; - - err = rocker_port_router_mac(rocker_port, NULL, - ROCKER_OP_FLAG_REMOVE, htons(vid)); - if (err) - return err; - - return rocker_port_vlan(rocker_port, NULL, - ROCKER_OP_FLAG_REMOVE, vid); -} - -static int rocker_port_vlans_del(struct rocker_port *rocker_port, - const struct switchdev_obj_port_vlan *vlan) -{ - u16 vid; - int err; - - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { - err = rocker_port_vlan_del(rocker_port, vid, vlan->flags); - if (err) - return err; - } - - return 0; -} - -static int rocker_port_fdb_del(struct rocker_port *rocker_port, - struct switchdev_trans *trans, - const struct switchdev_obj_port_fdb *fdb) -{ - __be16 vlan_id = rocker_port_vid_to_vlan(rocker_port, fdb->vid, NULL); - int flags = ROCKER_OP_FLAG_REMOVE; - - if (!rocker_port_is_bridged(rocker_port)) - return -EINVAL; - - return rocker_port_fdb(rocker_port, trans, fdb->addr, vlan_id, flags); -} - -static int rocker_port_obj_del(struct net_device *dev, - const struct switchdev_obj *obj) -{ - struct rocker_port *rocker_port = netdev_priv(dev); - const struct switchdev_obj_ipv4_fib *fib4; - int err = 0; - - switch (obj->id) { - case SWITCHDEV_OBJ_ID_PORT_VLAN: - err = rocker_port_vlans_del(rocker_port, - SWITCHDEV_OBJ_PORT_VLAN(obj)); - break; - case SWITCHDEV_OBJ_ID_IPV4_FIB: - fib4 = SWITCHDEV_OBJ_IPV4_FIB(obj); - err = rocker_port_fib_ipv4(rocker_port, NULL, - htonl(fib4->dst), fib4->dst_len, - &fib4->fi, fib4->tb_id, - ROCKER_OP_FLAG_REMOVE); - break; - case SWITCHDEV_OBJ_ID_PORT_FDB: - err = rocker_port_fdb_del(rocker_port, NULL, - SWITCHDEV_OBJ_PORT_FDB(obj)); - break; - default: - err = -EOPNOTSUPP; - break; - } - - return err; -} - -static int rocker_port_fdb_dump(const struct rocker_port *rocker_port, - struct switchdev_obj_port_fdb *fdb, - switchdev_obj_dump_cb_t *cb) -{ - struct rocker *rocker = rocker_port->rocker; - struct rocker_fdb_tbl_entry *found; - struct hlist_node *tmp; - unsigned long lock_flags; - int bkt; - int err = 0; - - spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags); - hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) { - if (found->key.rocker_port != rocker_port) - continue; - ether_addr_copy(fdb->addr, found->key.addr); - fdb->ndm_state = NUD_REACHABLE; - fdb->vid = rocker_port_vlan_to_vid(rocker_port, - found->key.vlan_id); - err = cb(&fdb->obj); - if (err) - break; - } - spin_unlock_irqrestore(&rocker->fdb_tbl_lock, lock_flags); - - return err; -} - -static int rocker_port_vlan_dump(const struct rocker_port *rocker_port, - struct switchdev_obj_port_vlan *vlan, - switchdev_obj_dump_cb_t *cb) -{ - u16 vid; - int err = 0; - - for (vid = 1; vid < VLAN_N_VID; vid++) { - if (!test_bit(vid, rocker_port->vlan_bitmap)) - continue; - vlan->flags = 0; - if (rocker_vlan_id_is_internal(htons(vid))) - vlan->flags |= BRIDGE_VLAN_INFO_PVID; - vlan->vid_begin = vlan->vid_end = vid; - err = cb(&vlan->obj); - if (err) - break; - } - - return err; -} - -static int rocker_port_obj_dump(struct net_device *dev, - struct switchdev_obj *obj, - switchdev_obj_dump_cb_t *cb) -{ - const struct rocker_port *rocker_port = netdev_priv(dev); - int err = 0; - - switch (obj->id) { - case SWITCHDEV_OBJ_ID_PORT_FDB: - err = rocker_port_fdb_dump(rocker_port, - SWITCHDEV_OBJ_PORT_FDB(obj), cb); - break; - case SWITCHDEV_OBJ_ID_PORT_VLAN: - err = rocker_port_vlan_dump(rocker_port, - SWITCHDEV_OBJ_PORT_VLAN(obj), cb); - break; - default: - err = -EOPNOTSUPP; - break; - } - - return err; -} - -static const struct switchdev_ops rocker_port_switchdev_ops = { - .switchdev_port_attr_get = rocker_port_attr_get, - .switchdev_port_attr_set = rocker_port_attr_set, - .switchdev_port_obj_add = rocker_port_obj_add, - .switchdev_port_obj_del = rocker_port_obj_del, - .switchdev_port_obj_dump = rocker_port_obj_dump, -}; - -/******************** - * ethtool interface - ********************/ - -static int rocker_port_get_settings(struct net_device *dev, - struct ethtool_cmd *ecmd) -{ - struct rocker_port *rocker_port = netdev_priv(dev); - - return rocker_cmd_get_port_settings_ethtool(rocker_port, ecmd); -} - -static int rocker_port_set_settings(struct net_device *dev, - struct ethtool_cmd *ecmd) -{ - struct rocker_port *rocker_port = netdev_priv(dev); - - return rocker_cmd_set_port_settings_ethtool(rocker_port, ecmd); -} - -static void rocker_port_get_drvinfo(struct net_device *dev, - struct ethtool_drvinfo *drvinfo) -{ - strlcpy(drvinfo->driver, rocker_driver_name, sizeof(drvinfo->driver)); - strlcpy(drvinfo->version, UTS_RELEASE, sizeof(drvinfo->version)); -} - -static struct rocker_port_stats { - char str[ETH_GSTRING_LEN]; - int type; -} rocker_port_stats[] = { - { "rx_packets", ROCKER_TLV_CMD_PORT_STATS_RX_PKTS, }, - { "rx_bytes", ROCKER_TLV_CMD_PORT_STATS_RX_BYTES, }, - { "rx_dropped", ROCKER_TLV_CMD_PORT_STATS_RX_DROPPED, }, - { "rx_errors", ROCKER_TLV_CMD_PORT_STATS_RX_ERRORS, }, - - { "tx_packets", ROCKER_TLV_CMD_PORT_STATS_TX_PKTS, }, - { "tx_bytes", ROCKER_TLV_CMD_PORT_STATS_TX_BYTES, }, - { "tx_dropped", ROCKER_TLV_CMD_PORT_STATS_TX_DROPPED, }, - { "tx_errors", ROCKER_TLV_CMD_PORT_STATS_TX_ERRORS, }, -}; - -#define ROCKER_PORT_STATS_LEN ARRAY_SIZE(rocker_port_stats) - -static void rocker_port_get_strings(struct net_device *netdev, u32 stringset, - u8 *data) -{ - u8 *p = data; - int i; - - switch (stringset) { - case ETH_SS_STATS: - for (i = 0; i < ARRAY_SIZE(rocker_port_stats); i++) { - memcpy(p, rocker_port_stats[i].str, ETH_GSTRING_LEN); - p += ETH_GSTRING_LEN; - } - break; - } -} - -static int -rocker_cmd_get_port_stats_prep(const struct rocker_port *rocker_port, - struct rocker_desc_info *desc_info, - void *priv) -{ - struct rocker_tlv *cmd_stats; - - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, - ROCKER_TLV_CMD_TYPE_GET_PORT_STATS)) - return -EMSGSIZE; - - cmd_stats = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO); - if (!cmd_stats) - return -EMSGSIZE; - - if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_STATS_PPORT, - rocker_port->pport)) - return -EMSGSIZE; - - rocker_tlv_nest_end(desc_info, cmd_stats); - - return 0; -} - -static int -rocker_cmd_get_port_stats_ethtool_proc(const struct rocker_port *rocker_port, - const struct rocker_desc_info *desc_info, - void *priv) -{ - const struct rocker_tlv *attrs[ROCKER_TLV_CMD_MAX + 1]; - const struct rocker_tlv *stats_attrs[ROCKER_TLV_CMD_PORT_STATS_MAX + 1]; - const struct rocker_tlv *pattr; - u32 pport; - u64 *data = priv; - int i; - - rocker_tlv_parse_desc(attrs, ROCKER_TLV_CMD_MAX, desc_info); - - if (!attrs[ROCKER_TLV_CMD_INFO]) - return -EIO; - - rocker_tlv_parse_nested(stats_attrs, ROCKER_TLV_CMD_PORT_STATS_MAX, - attrs[ROCKER_TLV_CMD_INFO]); - - if (!stats_attrs[ROCKER_TLV_CMD_PORT_STATS_PPORT]) - return -EIO; - - pport = rocker_tlv_get_u32(stats_attrs[ROCKER_TLV_CMD_PORT_STATS_PPORT]); - if (pport != rocker_port->pport) - return -EIO; - - for (i = 0; i < ARRAY_SIZE(rocker_port_stats); i++) { - pattr = stats_attrs[rocker_port_stats[i].type]; - if (!pattr) - continue; - - data[i] = rocker_tlv_get_u64(pattr); - } - - return 0; -} - -static int rocker_cmd_get_port_stats_ethtool(struct rocker_port *rocker_port, - void *priv) -{ - return rocker_cmd_exec(rocker_port, NULL, 0, - rocker_cmd_get_port_stats_prep, NULL, - rocker_cmd_get_port_stats_ethtool_proc, - priv); -} - -static void rocker_port_get_stats(struct net_device *dev, - struct ethtool_stats *stats, u64 *data) -{ - struct rocker_port *rocker_port = netdev_priv(dev); - - if (rocker_cmd_get_port_stats_ethtool(rocker_port, data) != 0) { - int i; - - for (i = 0; i < ARRAY_SIZE(rocker_port_stats); ++i) - data[i] = 0; - } -} - -static int rocker_port_get_sset_count(struct net_device *netdev, int sset) -{ - switch (sset) { - case ETH_SS_STATS: - return ROCKER_PORT_STATS_LEN; - default: - return -EOPNOTSUPP; - } -} - -static const struct ethtool_ops rocker_port_ethtool_ops = { - .get_settings = rocker_port_get_settings, - .set_settings = rocker_port_set_settings, - .get_drvinfo = rocker_port_get_drvinfo, - .get_link = ethtool_op_get_link, - .get_strings = rocker_port_get_strings, - .get_ethtool_stats = rocker_port_get_stats, - .get_sset_count = rocker_port_get_sset_count, -}; - -/***************** - * NAPI interface - *****************/ - -static struct rocker_port *rocker_port_napi_tx_get(struct napi_struct *napi) -{ - return container_of(napi, struct rocker_port, napi_tx); -} - -static int rocker_port_poll_tx(struct napi_struct *napi, int budget) -{ - struct rocker_port *rocker_port = rocker_port_napi_tx_get(napi); - const struct rocker *rocker = rocker_port->rocker; - const struct rocker_desc_info *desc_info; - u32 credits = 0; - int err; - - /* Cleanup tx descriptors */ - while ((desc_info = rocker_desc_tail_get(&rocker_port->tx_ring))) { - struct sk_buff *skb; - - err = rocker_desc_err(desc_info); - if (err && net_ratelimit()) - netdev_err(rocker_port->dev, "tx desc received with err %d\n", - err); - rocker_tx_desc_frags_unmap(rocker_port, desc_info); - - skb = rocker_desc_cookie_ptr_get(desc_info); - if (err == 0) { - rocker_port->dev->stats.tx_packets++; - rocker_port->dev->stats.tx_bytes += skb->len; - } else { - rocker_port->dev->stats.tx_errors++; - } - - dev_kfree_skb_any(skb); - credits++; - } - - if (credits && netif_queue_stopped(rocker_port->dev)) - netif_wake_queue(rocker_port->dev); - - napi_complete(napi); - rocker_dma_ring_credits_set(rocker, &rocker_port->tx_ring, credits); - - return 0; -} - -static int rocker_port_rx_proc(const struct rocker *rocker, - const struct rocker_port *rocker_port, - struct rocker_desc_info *desc_info) -{ - const struct rocker_tlv *attrs[ROCKER_TLV_RX_MAX + 1]; - struct sk_buff *skb = rocker_desc_cookie_ptr_get(desc_info); - size_t rx_len; - u16 rx_flags = 0; - - if (!skb) - return -ENOENT; - - rocker_tlv_parse_desc(attrs, ROCKER_TLV_RX_MAX, desc_info); - if (!attrs[ROCKER_TLV_RX_FRAG_LEN]) - return -EINVAL; - if (attrs[ROCKER_TLV_RX_FLAGS]) - rx_flags = rocker_tlv_get_u16(attrs[ROCKER_TLV_RX_FLAGS]); - - rocker_dma_rx_ring_skb_unmap(rocker, attrs); - - rx_len = rocker_tlv_get_u16(attrs[ROCKER_TLV_RX_FRAG_LEN]); - skb_put(skb, rx_len); - skb->protocol = eth_type_trans(skb, rocker_port->dev); - - if (rx_flags & ROCKER_RX_FLAGS_FWD_OFFLOAD) - skb->offload_fwd_mark = rocker_port->dev->offload_fwd_mark; - - rocker_port->dev->stats.rx_packets++; - rocker_port->dev->stats.rx_bytes += skb->len; - - netif_receive_skb(skb); - - return rocker_dma_rx_ring_skb_alloc(rocker_port, desc_info); -} - -static struct rocker_port *rocker_port_napi_rx_get(struct napi_struct *napi) -{ - return container_of(napi, struct rocker_port, napi_rx); -} - -static int rocker_port_poll_rx(struct napi_struct *napi, int budget) -{ - struct rocker_port *rocker_port = rocker_port_napi_rx_get(napi); - const struct rocker *rocker = rocker_port->rocker; - struct rocker_desc_info *desc_info; - u32 credits = 0; - int err; - - /* Process rx descriptors */ - while (credits < budget && - (desc_info = rocker_desc_tail_get(&rocker_port->rx_ring))) { - err = rocker_desc_err(desc_info); - if (err) { - if (net_ratelimit()) - netdev_err(rocker_port->dev, "rx desc received with err %d\n", - err); - } else { - err = rocker_port_rx_proc(rocker, rocker_port, - desc_info); - if (err && net_ratelimit()) - netdev_err(rocker_port->dev, "rx processing failed with err %d\n", - err); - } - if (err) - rocker_port->dev->stats.rx_errors++; - - rocker_desc_gen_clear(desc_info); - rocker_desc_head_set(rocker, &rocker_port->rx_ring, desc_info); - credits++; - } - - if (credits < budget) - napi_complete(napi); - - rocker_dma_ring_credits_set(rocker, &rocker_port->rx_ring, credits); - - return credits; -} - -/***************** - * PCI driver ops - *****************/ - -static void rocker_carrier_init(const struct rocker_port *rocker_port) -{ - const struct rocker *rocker = rocker_port->rocker; - u64 link_status = rocker_read64(rocker, PORT_PHYS_LINK_STATUS); - bool link_up; - - link_up = link_status & (1 << rocker_port->pport); - if (link_up) - netif_carrier_on(rocker_port->dev); - else - netif_carrier_off(rocker_port->dev); -} - -static void rocker_remove_ports(const struct rocker *rocker) -{ - struct rocker_port *rocker_port; - int i; - - for (i = 0; i < rocker->port_count; i++) { - rocker_port = rocker->ports[i]; - if (!rocker_port) - continue; - rocker_port_ig_tbl(rocker_port, NULL, ROCKER_OP_FLAG_REMOVE); - unregister_netdev(rocker_port->dev); - free_netdev(rocker_port->dev); - } - kfree(rocker->ports); -} - -static void rocker_port_dev_addr_init(struct rocker_port *rocker_port) -{ - const struct rocker *rocker = rocker_port->rocker; - const struct pci_dev *pdev = rocker->pdev; - int err; - - err = rocker_cmd_get_port_settings_macaddr(rocker_port, - rocker_port->dev->dev_addr); - if (err) { - dev_warn(&pdev->dev, "failed to get mac address, using random\n"); - eth_hw_addr_random(rocker_port->dev); - } -} - -static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) -{ - const struct pci_dev *pdev = rocker->pdev; - struct rocker_port *rocker_port; - struct net_device *dev; - u16 untagged_vid = 0; - int err; - - dev = alloc_etherdev(sizeof(struct rocker_port)); - if (!dev) - return -ENOMEM; - rocker_port = netdev_priv(dev); - rocker_port->dev = dev; - rocker_port->rocker = rocker; - rocker_port->port_number = port_number; - rocker_port->pport = port_number + 1; - rocker_port->brport_flags = BR_LEARNING | BR_LEARNING_SYNC; - rocker_port->ageing_time = BR_DEFAULT_AGEING_TIME; - - rocker_port_dev_addr_init(rocker_port); - dev->netdev_ops = &rocker_port_netdev_ops; - dev->ethtool_ops = &rocker_port_ethtool_ops; - dev->switchdev_ops = &rocker_port_switchdev_ops; - netif_tx_napi_add(dev, &rocker_port->napi_tx, rocker_port_poll_tx, - NAPI_POLL_WEIGHT); - netif_napi_add(dev, &rocker_port->napi_rx, rocker_port_poll_rx, - NAPI_POLL_WEIGHT); - rocker_carrier_init(rocker_port); - - dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_SG; - - err = register_netdev(dev); - if (err) { - dev_err(&pdev->dev, "register_netdev failed\n"); - goto err_register_netdev; - } - rocker->ports[port_number] = rocker_port; - - switchdev_port_fwd_mark_set(rocker_port->dev, NULL, false); - - rocker_port_set_learning(rocker_port, NULL); - - err = rocker_port_ig_tbl(rocker_port, NULL, 0); - if (err) { - netdev_err(rocker_port->dev, "install ig port table failed\n"); - goto err_port_ig_tbl; - } - - rocker_port->internal_vlan_id = - rocker_port_internal_vlan_id_get(rocker_port, dev->ifindex); - - err = rocker_port_vlan_add(rocker_port, NULL, untagged_vid, 0); - if (err) { - netdev_err(rocker_port->dev, "install untagged VLAN failed\n"); - goto err_untagged_vlan; - } - - return 0; - -err_untagged_vlan: - rocker_port_ig_tbl(rocker_port, NULL, ROCKER_OP_FLAG_REMOVE); -err_port_ig_tbl: - rocker->ports[port_number] = NULL; - unregister_netdev(dev); -err_register_netdev: - free_netdev(dev); - return err; -} - -static int rocker_probe_ports(struct rocker *rocker) -{ - int i; - size_t alloc_size; - int err; - - alloc_size = sizeof(struct rocker_port *) * rocker->port_count; - rocker->ports = kzalloc(alloc_size, GFP_KERNEL); - if (!rocker->ports) - return -ENOMEM; - for (i = 0; i < rocker->port_count; i++) { - err = rocker_probe_port(rocker, i); - if (err) - goto remove_ports; - } - return 0; - -remove_ports: - rocker_remove_ports(rocker); - return err; -} - -static int rocker_msix_init(struct rocker *rocker) -{ - struct pci_dev *pdev = rocker->pdev; - int msix_entries; - int i; - int err; - - msix_entries = pci_msix_vec_count(pdev); - if (msix_entries < 0) - return msix_entries; - - if (msix_entries != ROCKER_MSIX_VEC_COUNT(rocker->port_count)) - return -EINVAL; - - rocker->msix_entries = kmalloc_array(msix_entries, - sizeof(struct msix_entry), - GFP_KERNEL); - if (!rocker->msix_entries) - return -ENOMEM; - - for (i = 0; i < msix_entries; i++) - rocker->msix_entries[i].entry = i; - - err = pci_enable_msix_exact(pdev, rocker->msix_entries, msix_entries); - if (err < 0) - goto err_enable_msix; - - return 0; - -err_enable_msix: - kfree(rocker->msix_entries); - return err; -} - -static void rocker_msix_fini(const struct rocker *rocker) -{ - pci_disable_msix(rocker->pdev); - kfree(rocker->msix_entries); -} - -static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id) -{ - struct rocker *rocker; - int err; - - rocker = kzalloc(sizeof(*rocker), GFP_KERNEL); - if (!rocker) - return -ENOMEM; - - err = pci_enable_device(pdev); - if (err) { - dev_err(&pdev->dev, "pci_enable_device failed\n"); - goto err_pci_enable_device; - } - - err = pci_request_regions(pdev, rocker_driver_name); - if (err) { - dev_err(&pdev->dev, "pci_request_regions failed\n"); - goto err_pci_request_regions; - } - - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); - if (!err) { - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (err) { - dev_err(&pdev->dev, "pci_set_consistent_dma_mask failed\n"); - goto err_pci_set_dma_mask; - } - } else { - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, "pci_set_dma_mask failed\n"); - goto err_pci_set_dma_mask; - } - } - - if (pci_resource_len(pdev, 0) < ROCKER_PCI_BAR0_SIZE) { - dev_err(&pdev->dev, "invalid PCI region size\n"); - err = -EINVAL; - goto err_pci_resource_len_check; - } - - rocker->hw_addr = ioremap(pci_resource_start(pdev, 0), - pci_resource_len(pdev, 0)); - if (!rocker->hw_addr) { - dev_err(&pdev->dev, "ioremap failed\n"); - err = -EIO; - goto err_ioremap; - } - pci_set_master(pdev); - - rocker->pdev = pdev; - pci_set_drvdata(pdev, rocker); - - rocker->port_count = rocker_read32(rocker, PORT_PHYS_COUNT); - - err = rocker_msix_init(rocker); - if (err) { - dev_err(&pdev->dev, "MSI-X init failed\n"); - goto err_msix_init; - } - - err = rocker_basic_hw_test(rocker); - if (err) { - dev_err(&pdev->dev, "basic hw test failed\n"); - goto err_basic_hw_test; - } - - rocker_write32(rocker, CONTROL, ROCKER_CONTROL_RESET); - - err = rocker_dma_rings_init(rocker); - if (err) - goto err_dma_rings_init; - - err = request_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_CMD), - rocker_cmd_irq_handler, 0, - rocker_driver_name, rocker); - if (err) { - dev_err(&pdev->dev, "cannot assign cmd irq\n"); - goto err_request_cmd_irq; - } - - err = request_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), - rocker_event_irq_handler, 0, - rocker_driver_name, rocker); - if (err) { - dev_err(&pdev->dev, "cannot assign event irq\n"); - goto err_request_event_irq; - } - - rocker->hw.id = rocker_read64(rocker, SWITCH_ID); - - err = rocker_init_tbls(rocker); - if (err) { - dev_err(&pdev->dev, "cannot init rocker tables\n"); - goto err_init_tbls; - } - - setup_timer(&rocker->fdb_cleanup_timer, rocker_fdb_cleanup, - (unsigned long) rocker); - mod_timer(&rocker->fdb_cleanup_timer, jiffies); - - err = rocker_probe_ports(rocker); - if (err) { - dev_err(&pdev->dev, "failed to probe ports\n"); - goto err_probe_ports; - } - - dev_info(&pdev->dev, "Rocker switch with id %*phN\n", - (int)sizeof(rocker->hw.id), &rocker->hw.id); - - return 0; - -err_probe_ports: - del_timer_sync(&rocker->fdb_cleanup_timer); - rocker_free_tbls(rocker); -err_init_tbls: - free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker); -err_request_event_irq: - free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_CMD), rocker); -err_request_cmd_irq: - rocker_dma_rings_fini(rocker); -err_dma_rings_init: -err_basic_hw_test: - rocker_msix_fini(rocker); -err_msix_init: - iounmap(rocker->hw_addr); -err_ioremap: -err_pci_resource_len_check: -err_pci_set_dma_mask: - pci_release_regions(pdev); -err_pci_request_regions: - pci_disable_device(pdev); -err_pci_enable_device: - kfree(rocker); - return err; -} - -static void rocker_remove(struct pci_dev *pdev) -{ - struct rocker *rocker = pci_get_drvdata(pdev); - - del_timer_sync(&rocker->fdb_cleanup_timer); - rocker_free_tbls(rocker); - rocker_write32(rocker, CONTROL, ROCKER_CONTROL_RESET); - rocker_remove_ports(rocker); - free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker); - free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_CMD), rocker); - rocker_dma_rings_fini(rocker); - rocker_msix_fini(rocker); - iounmap(rocker->hw_addr); - pci_release_regions(rocker->pdev); - pci_disable_device(rocker->pdev); - kfree(rocker); -} - -static struct pci_driver rocker_pci_driver = { - .name = rocker_driver_name, - .id_table = rocker_pci_id_table, - .probe = rocker_probe, - .remove = rocker_remove, -}; - -/************************************ - * Net device notifier event handler - ************************************/ - -static bool rocker_port_dev_check(const struct net_device *dev) -{ - return dev->netdev_ops == &rocker_port_netdev_ops; -} - -static int rocker_port_bridge_join(struct rocker_port *rocker_port, - struct net_device *bridge) -{ - u16 untagged_vid = 0; - int err; - - /* Port is joining bridge, so the internal VLAN for the - * port is going to change to the bridge internal VLAN. - * Let's remove untagged VLAN (vid=0) from port and - * re-add once internal VLAN has changed. - */ - - err = rocker_port_vlan_del(rocker_port, untagged_vid, 0); - if (err) - return err; - - rocker_port_internal_vlan_id_put(rocker_port, - rocker_port->dev->ifindex); - rocker_port->internal_vlan_id = - rocker_port_internal_vlan_id_get(rocker_port, bridge->ifindex); - - rocker_port->bridge_dev = bridge; - switchdev_port_fwd_mark_set(rocker_port->dev, bridge, true); - - return rocker_port_vlan_add(rocker_port, NULL, untagged_vid, 0); -} - -static int rocker_port_bridge_leave(struct rocker_port *rocker_port) -{ - u16 untagged_vid = 0; - int err; - - err = rocker_port_vlan_del(rocker_port, untagged_vid, 0); - if (err) - return err; - - rocker_port_internal_vlan_id_put(rocker_port, - rocker_port->bridge_dev->ifindex); - rocker_port->internal_vlan_id = - rocker_port_internal_vlan_id_get(rocker_port, - rocker_port->dev->ifindex); - - switchdev_port_fwd_mark_set(rocker_port->dev, rocker_port->bridge_dev, - false); - rocker_port->bridge_dev = NULL; - - err = rocker_port_vlan_add(rocker_port, NULL, untagged_vid, 0); - if (err) - return err; - - if (rocker_port->dev->flags & IFF_UP) - err = rocker_port_fwd_enable(rocker_port, NULL, 0); - - return err; -} - - -static int rocker_port_ovs_changed(struct rocker_port *rocker_port, - struct net_device *master) -{ - int err; - - rocker_port->bridge_dev = master; - - err = rocker_port_fwd_disable(rocker_port, NULL, 0); - if (err) - return err; - err = rocker_port_fwd_enable(rocker_port, NULL, 0); - - return err; -} - -static int rocker_port_master_linked(struct rocker_port *rocker_port, - struct net_device *master) -{ - int err = 0; - - if (netif_is_bridge_master(master)) - err = rocker_port_bridge_join(rocker_port, master); - else if (netif_is_ovs_master(master)) - err = rocker_port_ovs_changed(rocker_port, master); - return err; -} - -static int rocker_port_master_unlinked(struct rocker_port *rocker_port) -{ - int err = 0; - - if (rocker_port_is_bridged(rocker_port)) - err = rocker_port_bridge_leave(rocker_port); - else if (rocker_port_is_ovsed(rocker_port)) - err = rocker_port_ovs_changed(rocker_port, NULL); - return err; -} - -static int rocker_netdevice_event(struct notifier_block *unused, - unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct netdev_notifier_changeupper_info *info; - struct rocker_port *rocker_port; - int err; - - if (!rocker_port_dev_check(dev)) - return NOTIFY_DONE; - - switch (event) { - case NETDEV_CHANGEUPPER: - info = ptr; - if (!info->master) - goto out; - rocker_port = netdev_priv(dev); - if (info->linking) { - err = rocker_port_master_linked(rocker_port, - info->upper_dev); - if (err) - netdev_warn(dev, "failed to reflect master linked (err %d)\n", - err); - } else { - err = rocker_port_master_unlinked(rocker_port); - if (err) - netdev_warn(dev, "failed to reflect master unlinked (err %d)\n", - err); - } - break; - } -out: - return NOTIFY_DONE; -} - -static struct notifier_block rocker_netdevice_nb __read_mostly = { - .notifier_call = rocker_netdevice_event, -}; - -/************************************ - * Net event notifier event handler - ************************************/ - -static int rocker_neigh_update(struct net_device *dev, struct neighbour *n) -{ - struct rocker_port *rocker_port = netdev_priv(dev); - int flags = (n->nud_state & NUD_VALID ? 0 : ROCKER_OP_FLAG_REMOVE) | - ROCKER_OP_FLAG_NOWAIT; - __be32 ip_addr = *(__be32 *)n->primary_key; - - return rocker_port_ipv4_neigh(rocker_port, NULL, flags, ip_addr, n->ha); -} - -static int rocker_netevent_event(struct notifier_block *unused, - unsigned long event, void *ptr) -{ - struct net_device *dev; - struct neighbour *n = ptr; - int err; - - switch (event) { - case NETEVENT_NEIGH_UPDATE: - if (n->tbl != &arp_tbl) - return NOTIFY_DONE; - dev = n->dev; - if (!rocker_port_dev_check(dev)) - return NOTIFY_DONE; - err = rocker_neigh_update(dev, n); - if (err) - netdev_warn(dev, - "failed to handle neigh update (err %d)\n", - err); - break; - } - - return NOTIFY_DONE; -} - -static struct notifier_block rocker_netevent_nb __read_mostly = { - .notifier_call = rocker_netevent_event, -}; - -/*********************** - * Module init and exit - ***********************/ - -static int __init rocker_module_init(void) -{ - int err; - - register_netdevice_notifier(&rocker_netdevice_nb); - register_netevent_notifier(&rocker_netevent_nb); - err = pci_register_driver(&rocker_pci_driver); - if (err) - goto err_pci_register_driver; - return 0; - -err_pci_register_driver: - unregister_netevent_notifier(&rocker_netevent_nb); - unregister_netdevice_notifier(&rocker_netdevice_nb); - return err; -} - -static void __exit rocker_module_exit(void) -{ - unregister_netevent_notifier(&rocker_netevent_nb); - unregister_netdevice_notifier(&rocker_netdevice_nb); - pci_unregister_driver(&rocker_pci_driver); -} - -module_init(rocker_module_init); -module_exit(rocker_module_exit); - -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>"); -MODULE_AUTHOR("Scott Feldman <sfeldma@gmail.com>"); -MODULE_DESCRIPTION("Rocker switch device driver"); -MODULE_DEVICE_TABLE(pci, rocker_pci_id_table); diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h index 12490b2f6504..1ab995f7146b 100644 --- a/drivers/net/ethernet/rocker/rocker.h +++ b/drivers/net/ethernet/rocker/rocker.h @@ -1,6 +1,6 @@ /* * drivers/net/ethernet/rocker/rocker.h - Rocker switch device driver - * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us> + * Copyright (c) 2014-2016 Jiri Pirko <jiri@mellanox.com> * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> * * This program is free software; you can redistribute it and/or modify @@ -12,456 +12,137 @@ #ifndef _ROCKER_H #define _ROCKER_H +#include <linux/kernel.h> #include <linux/types.h> - -/* Return codes */ -enum { - ROCKER_OK = 0, - ROCKER_ENOENT = 2, - ROCKER_ENXIO = 6, - ROCKER_ENOMEM = 12, - ROCKER_EEXIST = 17, - ROCKER_EINVAL = 22, - ROCKER_EMSGSIZE = 90, - ROCKER_ENOTSUP = 95, - ROCKER_ENOBUFS = 105, -}; - -#define ROCKER_FP_PORTS_MAX 62 - -#define PCI_VENDOR_ID_REDHAT 0x1b36 -#define PCI_DEVICE_ID_REDHAT_ROCKER 0x0006 - -#define ROCKER_PCI_BAR0_SIZE 0x2000 - -/* MSI-X vectors */ -enum { - ROCKER_MSIX_VEC_CMD, - ROCKER_MSIX_VEC_EVENT, - ROCKER_MSIX_VEC_TEST, - ROCKER_MSIX_VEC_RESERVED0, - __ROCKER_MSIX_VEC_TX, - __ROCKER_MSIX_VEC_RX, -#define ROCKER_MSIX_VEC_TX(port) \ - (__ROCKER_MSIX_VEC_TX + ((port) * 2)) -#define ROCKER_MSIX_VEC_RX(port) \ - (__ROCKER_MSIX_VEC_RX + ((port) * 2)) -#define ROCKER_MSIX_VEC_COUNT(portcnt) \ - (ROCKER_MSIX_VEC_RX((portcnt - 1)) + 1) -}; - -/* Rocker bogus registers */ -#define ROCKER_BOGUS_REG0 0x0000 -#define ROCKER_BOGUS_REG1 0x0004 -#define ROCKER_BOGUS_REG2 0x0008 -#define ROCKER_BOGUS_REG3 0x000c - -/* Rocker test registers */ -#define ROCKER_TEST_REG 0x0010 -#define ROCKER_TEST_REG64 0x0018 /* 8-byte */ -#define ROCKER_TEST_IRQ 0x0020 -#define ROCKER_TEST_DMA_ADDR 0x0028 /* 8-byte */ -#define ROCKER_TEST_DMA_SIZE 0x0030 -#define ROCKER_TEST_DMA_CTRL 0x0034 - -/* Rocker test register ctrl */ -#define ROCKER_TEST_DMA_CTRL_CLEAR BIT(0) -#define ROCKER_TEST_DMA_CTRL_FILL BIT(1) -#define ROCKER_TEST_DMA_CTRL_INVERT BIT(2) - -/* Rocker DMA ring register offsets */ -#define ROCKER_DMA_DESC_ADDR(x) (0x1000 + (x) * 32) /* 8-byte */ -#define ROCKER_DMA_DESC_SIZE(x) (0x1008 + (x) * 32) -#define ROCKER_DMA_DESC_HEAD(x) (0x100c + (x) * 32) -#define ROCKER_DMA_DESC_TAIL(x) (0x1010 + (x) * 32) -#define ROCKER_DMA_DESC_CTRL(x) (0x1014 + (x) * 32) -#define ROCKER_DMA_DESC_CREDITS(x) (0x1018 + (x) * 32) -#define ROCKER_DMA_DESC_RES1(x) (0x101c + (x) * 32) - -/* Rocker dma ctrl register bits */ -#define ROCKER_DMA_DESC_CTRL_RESET BIT(0) - -/* Rocker DMA ring types */ -enum rocker_dma_type { - ROCKER_DMA_CMD, - ROCKER_DMA_EVENT, - __ROCKER_DMA_TX, - __ROCKER_DMA_RX, -#define ROCKER_DMA_TX(port) (__ROCKER_DMA_TX + (port) * 2) -#define ROCKER_DMA_RX(port) (__ROCKER_DMA_RX + (port) * 2) -}; - -/* Rocker DMA ring size limits and default sizes */ -#define ROCKER_DMA_SIZE_MIN 2ul -#define ROCKER_DMA_SIZE_MAX 65536ul -#define ROCKER_DMA_CMD_DEFAULT_SIZE 32ul -#define ROCKER_DMA_EVENT_DEFAULT_SIZE 32ul -#define ROCKER_DMA_TX_DEFAULT_SIZE 64ul -#define ROCKER_DMA_TX_DESC_SIZE 256 -#define ROCKER_DMA_RX_DEFAULT_SIZE 64ul -#define ROCKER_DMA_RX_DESC_SIZE 256 - -/* Rocker DMA descriptor struct */ -struct rocker_desc { - u64 buf_addr; - u64 cookie; - u16 buf_size; - u16 tlv_size; - u16 resv[5]; - u16 comp_err; -}; - -#define ROCKER_DMA_DESC_COMP_ERR_GEN BIT(15) - -/* Rocker DMA TLV struct */ -struct rocker_tlv { - u32 type; - u16 len; -}; - -/* TLVs */ -enum { - ROCKER_TLV_CMD_UNSPEC, - ROCKER_TLV_CMD_TYPE, /* u16 */ - ROCKER_TLV_CMD_INFO, /* nest */ - - __ROCKER_TLV_CMD_MAX, - ROCKER_TLV_CMD_MAX = __ROCKER_TLV_CMD_MAX - 1, -}; - -enum { - ROCKER_TLV_CMD_TYPE_UNSPEC, - ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS, - ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS, - ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD, - ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD, - ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL, - ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS, - ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD, - ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD, - ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL, - ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS, - - ROCKER_TLV_CMD_TYPE_CLEAR_PORT_STATS, - ROCKER_TLV_CMD_TYPE_GET_PORT_STATS, - - __ROCKER_TLV_CMD_TYPE_MAX, - ROCKER_TLV_CMD_TYPE_MAX = __ROCKER_TLV_CMD_TYPE_MAX - 1, -}; - -enum { - ROCKER_TLV_CMD_PORT_SETTINGS_UNSPEC, - ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, /* u32 */ - ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, /* u32 */ - ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, /* u8 */ - ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, /* u8 */ - ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR, /* binary */ - ROCKER_TLV_CMD_PORT_SETTINGS_MODE, /* u8 */ - ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING, /* u8 */ - ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME, /* binary */ - ROCKER_TLV_CMD_PORT_SETTINGS_MTU, /* u16 */ - - __ROCKER_TLV_CMD_PORT_SETTINGS_MAX, - ROCKER_TLV_CMD_PORT_SETTINGS_MAX = - __ROCKER_TLV_CMD_PORT_SETTINGS_MAX - 1, -}; - -enum { - ROCKER_TLV_CMD_PORT_STATS_UNSPEC, - ROCKER_TLV_CMD_PORT_STATS_PPORT, /* u32 */ - - ROCKER_TLV_CMD_PORT_STATS_RX_PKTS, /* u64 */ - ROCKER_TLV_CMD_PORT_STATS_RX_BYTES, /* u64 */ - ROCKER_TLV_CMD_PORT_STATS_RX_DROPPED, /* u64 */ - ROCKER_TLV_CMD_PORT_STATS_RX_ERRORS, /* u64 */ - - ROCKER_TLV_CMD_PORT_STATS_TX_PKTS, /* u64 */ - ROCKER_TLV_CMD_PORT_STATS_TX_BYTES, /* u64 */ - ROCKER_TLV_CMD_PORT_STATS_TX_DROPPED, /* u64 */ - ROCKER_TLV_CMD_PORT_STATS_TX_ERRORS, /* u64 */ - - __ROCKER_TLV_CMD_PORT_STATS_MAX, - ROCKER_TLV_CMD_PORT_STATS_MAX = __ROCKER_TLV_CMD_PORT_STATS_MAX - 1, -}; - -enum rocker_port_mode { - ROCKER_PORT_MODE_OF_DPA, -}; - -enum { - ROCKER_TLV_EVENT_UNSPEC, - ROCKER_TLV_EVENT_TYPE, /* u16 */ - ROCKER_TLV_EVENT_INFO, /* nest */ - - __ROCKER_TLV_EVENT_MAX, - ROCKER_TLV_EVENT_MAX = __ROCKER_TLV_EVENT_MAX - 1, -}; - -enum { - ROCKER_TLV_EVENT_TYPE_UNSPEC, - ROCKER_TLV_EVENT_TYPE_LINK_CHANGED, - ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN, - - __ROCKER_TLV_EVENT_TYPE_MAX, - ROCKER_TLV_EVENT_TYPE_MAX = __ROCKER_TLV_EVENT_TYPE_MAX - 1, -}; - -enum { - ROCKER_TLV_EVENT_LINK_CHANGED_UNSPEC, - ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, /* u32 */ - ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP, /* u8 */ - - __ROCKER_TLV_EVENT_LINK_CHANGED_MAX, - ROCKER_TLV_EVENT_LINK_CHANGED_MAX = - __ROCKER_TLV_EVENT_LINK_CHANGED_MAX - 1, -}; - -enum { - ROCKER_TLV_EVENT_MAC_VLAN_UNSPEC, - ROCKER_TLV_EVENT_MAC_VLAN_PPORT, /* u32 */ - ROCKER_TLV_EVENT_MAC_VLAN_MAC, /* binary */ - ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, /* __be16 */ - - __ROCKER_TLV_EVENT_MAC_VLAN_MAX, - ROCKER_TLV_EVENT_MAC_VLAN_MAX = __ROCKER_TLV_EVENT_MAC_VLAN_MAX - 1, -}; - -enum { - ROCKER_TLV_RX_UNSPEC, - ROCKER_TLV_RX_FLAGS, /* u16, see ROCKER_RX_FLAGS_ */ - ROCKER_TLV_RX_CSUM, /* u16 */ - ROCKER_TLV_RX_FRAG_ADDR, /* u64 */ - ROCKER_TLV_RX_FRAG_MAX_LEN, /* u16 */ - ROCKER_TLV_RX_FRAG_LEN, /* u16 */ - - __ROCKER_TLV_RX_MAX, - ROCKER_TLV_RX_MAX = __ROCKER_TLV_RX_MAX - 1, -}; - -#define ROCKER_RX_FLAGS_IPV4 BIT(0) -#define ROCKER_RX_FLAGS_IPV6 BIT(1) -#define ROCKER_RX_FLAGS_CSUM_CALC BIT(2) -#define ROCKER_RX_FLAGS_IPV4_CSUM_GOOD BIT(3) -#define ROCKER_RX_FLAGS_IP_FRAG BIT(4) -#define ROCKER_RX_FLAGS_TCP BIT(5) -#define ROCKER_RX_FLAGS_UDP BIT(6) -#define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD BIT(7) -#define ROCKER_RX_FLAGS_FWD_OFFLOAD BIT(8) - -enum { - ROCKER_TLV_TX_UNSPEC, - ROCKER_TLV_TX_OFFLOAD, /* u8, see ROCKER_TX_OFFLOAD_ */ - ROCKER_TLV_TX_L3_CSUM_OFF, /* u16 */ - ROCKER_TLV_TX_TSO_MSS, /* u16 */ - ROCKER_TLV_TX_TSO_HDR_LEN, /* u16 */ - ROCKER_TLV_TX_FRAGS, /* array */ - - __ROCKER_TLV_TX_MAX, - ROCKER_TLV_TX_MAX = __ROCKER_TLV_TX_MAX - 1, -}; - -#define ROCKER_TX_OFFLOAD_NONE 0 -#define ROCKER_TX_OFFLOAD_IP_CSUM 1 -#define ROCKER_TX_OFFLOAD_TCP_UDP_CSUM 2 -#define ROCKER_TX_OFFLOAD_L3_CSUM 3 -#define ROCKER_TX_OFFLOAD_TSO 4 - -#define ROCKER_TX_FRAGS_MAX 16 - -enum { - ROCKER_TLV_TX_FRAG_UNSPEC, - ROCKER_TLV_TX_FRAG, /* nest */ - - __ROCKER_TLV_TX_FRAG_MAX, - ROCKER_TLV_TX_FRAG_MAX = __ROCKER_TLV_TX_FRAG_MAX - 1, -}; - -enum { - ROCKER_TLV_TX_FRAG_ATTR_UNSPEC, - ROCKER_TLV_TX_FRAG_ATTR_ADDR, /* u64 */ - ROCKER_TLV_TX_FRAG_ATTR_LEN, /* u16 */ - - __ROCKER_TLV_TX_FRAG_ATTR_MAX, - ROCKER_TLV_TX_FRAG_ATTR_MAX = __ROCKER_TLV_TX_FRAG_ATTR_MAX - 1, -}; - -/* cmd info nested for OF-DPA msgs */ -enum { - ROCKER_TLV_OF_DPA_UNSPEC, - ROCKER_TLV_OF_DPA_TABLE_ID, /* u16 */ - ROCKER_TLV_OF_DPA_PRIORITY, /* u32 */ - ROCKER_TLV_OF_DPA_HARDTIME, /* u32 */ - ROCKER_TLV_OF_DPA_IDLETIME, /* u32 */ - ROCKER_TLV_OF_DPA_COOKIE, /* u64 */ - ROCKER_TLV_OF_DPA_IN_PPORT, /* u32 */ - ROCKER_TLV_OF_DPA_IN_PPORT_MASK, /* u32 */ - ROCKER_TLV_OF_DPA_OUT_PPORT, /* u32 */ - ROCKER_TLV_OF_DPA_GOTO_TABLE_ID, /* u16 */ - ROCKER_TLV_OF_DPA_GROUP_ID, /* u32 */ - ROCKER_TLV_OF_DPA_GROUP_ID_LOWER, /* u32 */ - ROCKER_TLV_OF_DPA_GROUP_COUNT, /* u16 */ - ROCKER_TLV_OF_DPA_GROUP_IDS, /* u32 array */ - ROCKER_TLV_OF_DPA_VLAN_ID, /* __be16 */ - ROCKER_TLV_OF_DPA_VLAN_ID_MASK, /* __be16 */ - ROCKER_TLV_OF_DPA_VLAN_PCP, /* __be16 */ - ROCKER_TLV_OF_DPA_VLAN_PCP_MASK, /* __be16 */ - ROCKER_TLV_OF_DPA_VLAN_PCP_ACTION, /* u8 */ - ROCKER_TLV_OF_DPA_NEW_VLAN_ID, /* __be16 */ - ROCKER_TLV_OF_DPA_NEW_VLAN_PCP, /* u8 */ - ROCKER_TLV_OF_DPA_TUNNEL_ID, /* u32 */ - ROCKER_TLV_OF_DPA_TUNNEL_LPORT, /* u32 */ - ROCKER_TLV_OF_DPA_ETHERTYPE, /* __be16 */ - ROCKER_TLV_OF_DPA_DST_MAC, /* binary */ - ROCKER_TLV_OF_DPA_DST_MAC_MASK, /* binary */ - ROCKER_TLV_OF_DPA_SRC_MAC, /* binary */ - ROCKER_TLV_OF_DPA_SRC_MAC_MASK, /* binary */ - ROCKER_TLV_OF_DPA_IP_PROTO, /* u8 */ - ROCKER_TLV_OF_DPA_IP_PROTO_MASK, /* u8 */ - ROCKER_TLV_OF_DPA_IP_DSCP, /* u8 */ - ROCKER_TLV_OF_DPA_IP_DSCP_MASK, /* u8 */ - ROCKER_TLV_OF_DPA_IP_DSCP_ACTION, /* u8 */ - ROCKER_TLV_OF_DPA_NEW_IP_DSCP, /* u8 */ - ROCKER_TLV_OF_DPA_IP_ECN, /* u8 */ - ROCKER_TLV_OF_DPA_IP_ECN_MASK, /* u8 */ - ROCKER_TLV_OF_DPA_DST_IP, /* __be32 */ - ROCKER_TLV_OF_DPA_DST_IP_MASK, /* __be32 */ - ROCKER_TLV_OF_DPA_SRC_IP, /* __be32 */ - ROCKER_TLV_OF_DPA_SRC_IP_MASK, /* __be32 */ - ROCKER_TLV_OF_DPA_DST_IPV6, /* binary */ - ROCKER_TLV_OF_DPA_DST_IPV6_MASK, /* binary */ - ROCKER_TLV_OF_DPA_SRC_IPV6, /* binary */ - ROCKER_TLV_OF_DPA_SRC_IPV6_MASK, /* binary */ - ROCKER_TLV_OF_DPA_SRC_ARP_IP, /* __be32 */ - ROCKER_TLV_OF_DPA_SRC_ARP_IP_MASK, /* __be32 */ - ROCKER_TLV_OF_DPA_L4_DST_PORT, /* __be16 */ - ROCKER_TLV_OF_DPA_L4_DST_PORT_MASK, /* __be16 */ - ROCKER_TLV_OF_DPA_L4_SRC_PORT, /* __be16 */ - ROCKER_TLV_OF_DPA_L4_SRC_PORT_MASK, /* __be16 */ - ROCKER_TLV_OF_DPA_ICMP_TYPE, /* u8 */ - ROCKER_TLV_OF_DPA_ICMP_TYPE_MASK, /* u8 */ - ROCKER_TLV_OF_DPA_ICMP_CODE, /* u8 */ - ROCKER_TLV_OF_DPA_ICMP_CODE_MASK, /* u8 */ - ROCKER_TLV_OF_DPA_IPV6_LABEL, /* __be32 */ - ROCKER_TLV_OF_DPA_IPV6_LABEL_MASK, /* __be32 */ - ROCKER_TLV_OF_DPA_QUEUE_ID_ACTION, /* u8 */ - ROCKER_TLV_OF_DPA_NEW_QUEUE_ID, /* u8 */ - ROCKER_TLV_OF_DPA_CLEAR_ACTIONS, /* u32 */ - ROCKER_TLV_OF_DPA_POP_VLAN, /* u8 */ - ROCKER_TLV_OF_DPA_TTL_CHECK, /* u8 */ - ROCKER_TLV_OF_DPA_COPY_CPU_ACTION, /* u8 */ - - __ROCKER_TLV_OF_DPA_MAX, - ROCKER_TLV_OF_DPA_MAX = __ROCKER_TLV_OF_DPA_MAX - 1, -}; - -/* OF-DPA table IDs */ - -enum rocker_of_dpa_table_id { - ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT = 0, - ROCKER_OF_DPA_TABLE_ID_VLAN = 10, - ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC = 20, - ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING = 30, - ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING = 40, - ROCKER_OF_DPA_TABLE_ID_BRIDGING = 50, - ROCKER_OF_DPA_TABLE_ID_ACL_POLICY = 60, -}; - -/* OF-DPA flow stats */ -enum { - ROCKER_TLV_OF_DPA_FLOW_STAT_UNSPEC, - ROCKER_TLV_OF_DPA_FLOW_STAT_DURATION, /* u32 */ - ROCKER_TLV_OF_DPA_FLOW_STAT_RX_PKTS, /* u64 */ - ROCKER_TLV_OF_DPA_FLOW_STAT_TX_PKTS, /* u64 */ - - __ROCKER_TLV_OF_DPA_FLOW_STAT_MAX, - ROCKER_TLV_OF_DPA_FLOW_STAT_MAX = __ROCKER_TLV_OF_DPA_FLOW_STAT_MAX - 1, -}; - -/* OF-DPA group types */ -enum rocker_of_dpa_group_type { - ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE = 0, - ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE, - ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST, - ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST, - ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD, - ROCKER_OF_DPA_GROUP_TYPE_L3_INTERFACE, - ROCKER_OF_DPA_GROUP_TYPE_L3_MCAST, - ROCKER_OF_DPA_GROUP_TYPE_L3_ECMP, - ROCKER_OF_DPA_GROUP_TYPE_L2_OVERLAY, -}; - -/* OF-DPA group L2 overlay types */ -enum rocker_of_dpa_overlay_type { - ROCKER_OF_DPA_OVERLAY_TYPE_FLOOD_UCAST = 0, - ROCKER_OF_DPA_OVERLAY_TYPE_FLOOD_MCAST, - ROCKER_OF_DPA_OVERLAY_TYPE_MCAST_UCAST, - ROCKER_OF_DPA_OVERLAY_TYPE_MCAST_MCAST, -}; - -/* OF-DPA group ID encoding */ -#define ROCKER_GROUP_TYPE_SHIFT 28 -#define ROCKER_GROUP_TYPE_MASK 0xf0000000 -#define ROCKER_GROUP_VLAN_SHIFT 16 -#define ROCKER_GROUP_VLAN_MASK 0x0fff0000 -#define ROCKER_GROUP_PORT_SHIFT 0 -#define ROCKER_GROUP_PORT_MASK 0x0000ffff -#define ROCKER_GROUP_TUNNEL_ID_SHIFT 12 -#define ROCKER_GROUP_TUNNEL_ID_MASK 0x0ffff000 -#define ROCKER_GROUP_SUBTYPE_SHIFT 10 -#define ROCKER_GROUP_SUBTYPE_MASK 0x00000c00 -#define ROCKER_GROUP_INDEX_SHIFT 0 -#define ROCKER_GROUP_INDEX_MASK 0x0000ffff -#define ROCKER_GROUP_INDEX_LONG_SHIFT 0 -#define ROCKER_GROUP_INDEX_LONG_MASK 0x0fffffff - -#define ROCKER_GROUP_TYPE_GET(group_id) \ - (((group_id) & ROCKER_GROUP_TYPE_MASK) >> ROCKER_GROUP_TYPE_SHIFT) -#define ROCKER_GROUP_TYPE_SET(type) \ - (((type) << ROCKER_GROUP_TYPE_SHIFT) & ROCKER_GROUP_TYPE_MASK) -#define ROCKER_GROUP_VLAN_GET(group_id) \ - (((group_id) & ROCKER_GROUP_VLAN_ID_MASK) >> ROCKER_GROUP_VLAN_ID_SHIFT) -#define ROCKER_GROUP_VLAN_SET(vlan_id) \ - (((vlan_id) << ROCKER_GROUP_VLAN_SHIFT) & ROCKER_GROUP_VLAN_MASK) -#define ROCKER_GROUP_PORT_GET(group_id) \ - (((group_id) & ROCKER_GROUP_PORT_MASK) >> ROCKER_GROUP_PORT_SHIFT) -#define ROCKER_GROUP_PORT_SET(port) \ - (((port) << ROCKER_GROUP_PORT_SHIFT) & ROCKER_GROUP_PORT_MASK) -#define ROCKER_GROUP_INDEX_GET(group_id) \ - (((group_id) & ROCKER_GROUP_INDEX_MASK) >> ROCKER_GROUP_INDEX_SHIFT) -#define ROCKER_GROUP_INDEX_SET(index) \ - (((index) << ROCKER_GROUP_INDEX_SHIFT) & ROCKER_GROUP_INDEX_MASK) -#define ROCKER_GROUP_INDEX_LONG_GET(group_id) \ - (((group_id) & ROCKER_GROUP_INDEX_LONG_MASK) >> \ - ROCKER_GROUP_INDEX_LONG_SHIFT) -#define ROCKER_GROUP_INDEX_LONG_SET(index) \ - (((index) << ROCKER_GROUP_INDEX_LONG_SHIFT) & \ - ROCKER_GROUP_INDEX_LONG_MASK) - -#define ROCKER_GROUP_NONE 0 -#define ROCKER_GROUP_L2_INTERFACE(vlan_id, port) \ - (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE) |\ - ROCKER_GROUP_VLAN_SET(ntohs(vlan_id)) | ROCKER_GROUP_PORT_SET(port)) -#define ROCKER_GROUP_L2_REWRITE(index) \ - (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE) |\ - ROCKER_GROUP_INDEX_LONG_SET(index)) -#define ROCKER_GROUP_L2_MCAST(vlan_id, index) \ - (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST) |\ - ROCKER_GROUP_VLAN_SET(ntohs(vlan_id)) | ROCKER_GROUP_INDEX_SET(index)) -#define ROCKER_GROUP_L2_FLOOD(vlan_id, index) \ - (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD) |\ - ROCKER_GROUP_VLAN_SET(ntohs(vlan_id)) | ROCKER_GROUP_INDEX_SET(index)) -#define ROCKER_GROUP_L3_UNICAST(index) \ - (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST) |\ - ROCKER_GROUP_INDEX_LONG_SET(index)) - -/* Rocker general purpose registers */ -#define ROCKER_CONTROL 0x0300 -#define ROCKER_PORT_PHYS_COUNT 0x0304 -#define ROCKER_PORT_PHYS_LINK_STATUS 0x0310 /* 8-byte */ -#define ROCKER_PORT_PHYS_ENABLE 0x0318 /* 8-byte */ -#define ROCKER_SWITCH_ID 0x0320 /* 8-byte */ - -/* Rocker control bits */ -#define ROCKER_CONTROL_RESET BIT(0) +#include <linux/netdevice.h> +#include <net/neighbour.h> +#include <net/switchdev.h> + +#include "rocker_hw.h" + +struct rocker_desc_info { + char *data; /* mapped */ + size_t data_size; + size_t tlv_size; + struct rocker_desc *desc; + dma_addr_t mapaddr; +}; + +struct rocker_dma_ring_info { + size_t size; + u32 head; + u32 tail; + struct rocker_desc *desc; /* mapped */ + dma_addr_t mapaddr; + struct rocker_desc_info *desc_info; + unsigned int type; +}; + +struct rocker; + +struct rocker_port { + struct net_device *dev; + struct rocker *rocker; + void *wpriv; + unsigned int port_number; + u32 pport; + struct napi_struct napi_tx; + struct napi_struct napi_rx; + struct rocker_dma_ring_info tx_ring; + struct rocker_dma_ring_info rx_ring; +}; + +struct rocker_world_ops; + +struct rocker { + struct pci_dev *pdev; + u8 __iomem *hw_addr; + struct msix_entry *msix_entries; + unsigned int port_count; + struct rocker_port **ports; + struct { + u64 id; + } hw; + spinlock_t cmd_ring_lock; /* for cmd ring accesses */ + struct rocker_dma_ring_info cmd_ring; + struct rocker_dma_ring_info event_ring; + struct rocker_world_ops *wops; + void *wpriv; +}; + +typedef int (*rocker_cmd_prep_cb_t)(const struct rocker_port *rocker_port, + struct rocker_desc_info *desc_info, + void *priv); + +typedef int (*rocker_cmd_proc_cb_t)(const struct rocker_port *rocker_port, + const struct rocker_desc_info *desc_info, + void *priv); + +int rocker_cmd_exec(struct rocker_port *rocker_port, bool nowait, + rocker_cmd_prep_cb_t prepare, void *prepare_priv, + rocker_cmd_proc_cb_t process, void *process_priv); + +int rocker_port_set_learning(struct rocker_port *rocker_port, + bool learning); + +struct rocker_world_ops { + const char *kind; + size_t priv_size; + size_t port_priv_size; + u8 mode; + int (*init)(struct rocker *rocker); + void (*fini)(struct rocker *rocker); + int (*port_pre_init)(struct rocker_port *rocker_port); + int (*port_init)(struct rocker_port *rocker_port); + void (*port_fini)(struct rocker_port *rocker_port); + void (*port_post_fini)(struct rocker_port *rocker_port); + int (*port_open)(struct rocker_port *rocker_port); + void (*port_stop)(struct rocker_port *rocker_port); + int (*port_attr_stp_state_set)(struct rocker_port *rocker_port, + u8 state, + struct switchdev_trans *trans); + int (*port_attr_bridge_flags_set)(struct rocker_port *rocker_port, + unsigned long brport_flags, + struct switchdev_trans *trans); + int (*port_attr_bridge_flags_get)(const struct rocker_port *rocker_port, + unsigned long *p_brport_flags); + int (*port_attr_bridge_ageing_time_set)(struct rocker_port *rocker_port, + u32 ageing_time, + struct switchdev_trans *trans); + int (*port_obj_vlan_add)(struct rocker_port *rocker_port, + const struct switchdev_obj_port_vlan *vlan, + struct switchdev_trans *trans); + int (*port_obj_vlan_del)(struct rocker_port *rocker_port, + const struct switchdev_obj_port_vlan *vlan); + int (*port_obj_vlan_dump)(const struct rocker_port *rocker_port, + struct switchdev_obj_port_vlan *vlan, + switchdev_obj_dump_cb_t *cb); + int (*port_obj_fib4_add)(struct rocker_port *rocker_port, + const struct switchdev_obj_ipv4_fib *fib4, + struct switchdev_trans *trans); + int (*port_obj_fib4_del)(struct rocker_port *rocker_port, + const struct switchdev_obj_ipv4_fib *fib4); + int (*port_obj_fdb_add)(struct rocker_port *rocker_port, + const struct switchdev_obj_port_fdb *fdb, + struct switchdev_trans *trans); + int (*port_obj_fdb_del)(struct rocker_port *rocker_port, + const struct switchdev_obj_port_fdb *fdb); + int (*port_obj_fdb_dump)(const struct rocker_port *rocker_port, + struct switchdev_obj_port_fdb *fdb, + switchdev_obj_dump_cb_t *cb); + int (*port_master_linked)(struct rocker_port *rocker_port, + struct net_device *master); + int (*port_master_unlinked)(struct rocker_port *rocker_port, + struct net_device *master); + int (*port_neigh_update)(struct rocker_port *rocker_port, + struct neighbour *n); + int (*port_neigh_destroy)(struct rocker_port *rocker_port, + struct neighbour *n); + int (*port_ev_mac_vlan_seen)(struct rocker_port *rocker_port, + const unsigned char *addr, + __be16 vlan_id); +}; + +extern struct rocker_world_ops rocker_ofdpa_ops; #endif diff --git a/drivers/net/ethernet/rocker/rocker_hw.h b/drivers/net/ethernet/rocker/rocker_hw.h new file mode 100644 index 000000000000..2adfe88859f2 --- /dev/null +++ b/drivers/net/ethernet/rocker/rocker_hw.h @@ -0,0 +1,467 @@ +/* + * drivers/net/ethernet/rocker/rocker_hw.h - Rocker switch device driver + * Copyright (c) 2014-2016 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _ROCKER_HW_H +#define _ROCKER_HW_H + +#include <linux/types.h> + +/* Return codes */ +enum { + ROCKER_OK = 0, + ROCKER_ENOENT = 2, + ROCKER_ENXIO = 6, + ROCKER_ENOMEM = 12, + ROCKER_EEXIST = 17, + ROCKER_EINVAL = 22, + ROCKER_EMSGSIZE = 90, + ROCKER_ENOTSUP = 95, + ROCKER_ENOBUFS = 105, +}; + +#define ROCKER_FP_PORTS_MAX 62 + +#define PCI_VENDOR_ID_REDHAT 0x1b36 +#define PCI_DEVICE_ID_REDHAT_ROCKER 0x0006 + +#define ROCKER_PCI_BAR0_SIZE 0x2000 + +/* MSI-X vectors */ +enum { + ROCKER_MSIX_VEC_CMD, + ROCKER_MSIX_VEC_EVENT, + ROCKER_MSIX_VEC_TEST, + ROCKER_MSIX_VEC_RESERVED0, + __ROCKER_MSIX_VEC_TX, + __ROCKER_MSIX_VEC_RX, +#define ROCKER_MSIX_VEC_TX(port) \ + (__ROCKER_MSIX_VEC_TX + ((port) * 2)) +#define ROCKER_MSIX_VEC_RX(port) \ + (__ROCKER_MSIX_VEC_RX + ((port) * 2)) +#define ROCKER_MSIX_VEC_COUNT(portcnt) \ + (ROCKER_MSIX_VEC_RX((portcnt - 1)) + 1) +}; + +/* Rocker bogus registers */ +#define ROCKER_BOGUS_REG0 0x0000 +#define ROCKER_BOGUS_REG1 0x0004 +#define ROCKER_BOGUS_REG2 0x0008 +#define ROCKER_BOGUS_REG3 0x000c + +/* Rocker test registers */ +#define ROCKER_TEST_REG 0x0010 +#define ROCKER_TEST_REG64 0x0018 /* 8-byte */ +#define ROCKER_TEST_IRQ 0x0020 +#define ROCKER_TEST_DMA_ADDR 0x0028 /* 8-byte */ +#define ROCKER_TEST_DMA_SIZE 0x0030 +#define ROCKER_TEST_DMA_CTRL 0x0034 + +/* Rocker test register ctrl */ +#define ROCKER_TEST_DMA_CTRL_CLEAR BIT(0) +#define ROCKER_TEST_DMA_CTRL_FILL BIT(1) +#define ROCKER_TEST_DMA_CTRL_INVERT BIT(2) + +/* Rocker DMA ring register offsets */ +#define ROCKER_DMA_DESC_ADDR(x) (0x1000 + (x) * 32) /* 8-byte */ +#define ROCKER_DMA_DESC_SIZE(x) (0x1008 + (x) * 32) +#define ROCKER_DMA_DESC_HEAD(x) (0x100c + (x) * 32) +#define ROCKER_DMA_DESC_TAIL(x) (0x1010 + (x) * 32) +#define ROCKER_DMA_DESC_CTRL(x) (0x1014 + (x) * 32) +#define ROCKER_DMA_DESC_CREDITS(x) (0x1018 + (x) * 32) +#define ROCKER_DMA_DESC_RES1(x) (0x101c + (x) * 32) + +/* Rocker dma ctrl register bits */ +#define ROCKER_DMA_DESC_CTRL_RESET BIT(0) + +/* Rocker DMA ring types */ +enum rocker_dma_type { + ROCKER_DMA_CMD, + ROCKER_DMA_EVENT, + __ROCKER_DMA_TX, + __ROCKER_DMA_RX, +#define ROCKER_DMA_TX(port) (__ROCKER_DMA_TX + (port) * 2) +#define ROCKER_DMA_RX(port) (__ROCKER_DMA_RX + (port) * 2) +}; + +/* Rocker DMA ring size limits and default sizes */ +#define ROCKER_DMA_SIZE_MIN 2ul +#define ROCKER_DMA_SIZE_MAX 65536ul +#define ROCKER_DMA_CMD_DEFAULT_SIZE 32ul +#define ROCKER_DMA_EVENT_DEFAULT_SIZE 32ul +#define ROCKER_DMA_TX_DEFAULT_SIZE 64ul +#define ROCKER_DMA_TX_DESC_SIZE 256 +#define ROCKER_DMA_RX_DEFAULT_SIZE 64ul +#define ROCKER_DMA_RX_DESC_SIZE 256 + +/* Rocker DMA descriptor struct */ +struct rocker_desc { + u64 buf_addr; + u64 cookie; + u16 buf_size; + u16 tlv_size; + u16 resv[5]; + u16 comp_err; +}; + +#define ROCKER_DMA_DESC_COMP_ERR_GEN BIT(15) + +/* Rocker DMA TLV struct */ +struct rocker_tlv { + u32 type; + u16 len; +}; + +/* TLVs */ +enum { + ROCKER_TLV_CMD_UNSPEC, + ROCKER_TLV_CMD_TYPE, /* u16 */ + ROCKER_TLV_CMD_INFO, /* nest */ + + __ROCKER_TLV_CMD_MAX, + ROCKER_TLV_CMD_MAX = __ROCKER_TLV_CMD_MAX - 1, +}; + +enum { + ROCKER_TLV_CMD_TYPE_UNSPEC, + ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS, + ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS, + ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD, + ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD, + ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL, + ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS, + ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD, + ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD, + ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL, + ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS, + + ROCKER_TLV_CMD_TYPE_CLEAR_PORT_STATS, + ROCKER_TLV_CMD_TYPE_GET_PORT_STATS, + + __ROCKER_TLV_CMD_TYPE_MAX, + ROCKER_TLV_CMD_TYPE_MAX = __ROCKER_TLV_CMD_TYPE_MAX - 1, +}; + +enum { + ROCKER_TLV_CMD_PORT_SETTINGS_UNSPEC, + ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, /* u32 */ + ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, /* u32 */ + ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, /* u8 */ + ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, /* u8 */ + ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR, /* binary */ + ROCKER_TLV_CMD_PORT_SETTINGS_MODE, /* u8 */ + ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING, /* u8 */ + ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME, /* binary */ + ROCKER_TLV_CMD_PORT_SETTINGS_MTU, /* u16 */ + + __ROCKER_TLV_CMD_PORT_SETTINGS_MAX, + ROCKER_TLV_CMD_PORT_SETTINGS_MAX = + __ROCKER_TLV_CMD_PORT_SETTINGS_MAX - 1, +}; + +enum { + ROCKER_TLV_CMD_PORT_STATS_UNSPEC, + ROCKER_TLV_CMD_PORT_STATS_PPORT, /* u32 */ + + ROCKER_TLV_CMD_PORT_STATS_RX_PKTS, /* u64 */ + ROCKER_TLV_CMD_PORT_STATS_RX_BYTES, /* u64 */ + ROCKER_TLV_CMD_PORT_STATS_RX_DROPPED, /* u64 */ + ROCKER_TLV_CMD_PORT_STATS_RX_ERRORS, /* u64 */ + + ROCKER_TLV_CMD_PORT_STATS_TX_PKTS, /* u64 */ + ROCKER_TLV_CMD_PORT_STATS_TX_BYTES, /* u64 */ + ROCKER_TLV_CMD_PORT_STATS_TX_DROPPED, /* u64 */ + ROCKER_TLV_CMD_PORT_STATS_TX_ERRORS, /* u64 */ + + __ROCKER_TLV_CMD_PORT_STATS_MAX, + ROCKER_TLV_CMD_PORT_STATS_MAX = __ROCKER_TLV_CMD_PORT_STATS_MAX - 1, +}; + +enum rocker_port_mode { + ROCKER_PORT_MODE_OF_DPA, +}; + +enum { + ROCKER_TLV_EVENT_UNSPEC, + ROCKER_TLV_EVENT_TYPE, /* u16 */ + ROCKER_TLV_EVENT_INFO, /* nest */ + + __ROCKER_TLV_EVENT_MAX, + ROCKER_TLV_EVENT_MAX = __ROCKER_TLV_EVENT_MAX - 1, +}; + +enum { + ROCKER_TLV_EVENT_TYPE_UNSPEC, + ROCKER_TLV_EVENT_TYPE_LINK_CHANGED, + ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN, + + __ROCKER_TLV_EVENT_TYPE_MAX, + ROCKER_TLV_EVENT_TYPE_MAX = __ROCKER_TLV_EVENT_TYPE_MAX - 1, +}; + +enum { + ROCKER_TLV_EVENT_LINK_CHANGED_UNSPEC, + ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, /* u32 */ + ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP, /* u8 */ + + __ROCKER_TLV_EVENT_LINK_CHANGED_MAX, + ROCKER_TLV_EVENT_LINK_CHANGED_MAX = + __ROCKER_TLV_EVENT_LINK_CHANGED_MAX - 1, +}; + +enum { + ROCKER_TLV_EVENT_MAC_VLAN_UNSPEC, + ROCKER_TLV_EVENT_MAC_VLAN_PPORT, /* u32 */ + ROCKER_TLV_EVENT_MAC_VLAN_MAC, /* binary */ + ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, /* __be16 */ + + __ROCKER_TLV_EVENT_MAC_VLAN_MAX, + ROCKER_TLV_EVENT_MAC_VLAN_MAX = __ROCKER_TLV_EVENT_MAC_VLAN_MAX - 1, +}; + +enum { + ROCKER_TLV_RX_UNSPEC, + ROCKER_TLV_RX_FLAGS, /* u16, see ROCKER_RX_FLAGS_ */ + ROCKER_TLV_RX_CSUM, /* u16 */ + ROCKER_TLV_RX_FRAG_ADDR, /* u64 */ + ROCKER_TLV_RX_FRAG_MAX_LEN, /* u16 */ + ROCKER_TLV_RX_FRAG_LEN, /* u16 */ + + __ROCKER_TLV_RX_MAX, + ROCKER_TLV_RX_MAX = __ROCKER_TLV_RX_MAX - 1, +}; + +#define ROCKER_RX_FLAGS_IPV4 BIT(0) +#define ROCKER_RX_FLAGS_IPV6 BIT(1) +#define ROCKER_RX_FLAGS_CSUM_CALC BIT(2) +#define ROCKER_RX_FLAGS_IPV4_CSUM_GOOD BIT(3) +#define ROCKER_RX_FLAGS_IP_FRAG BIT(4) +#define ROCKER_RX_FLAGS_TCP BIT(5) +#define ROCKER_RX_FLAGS_UDP BIT(6) +#define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD BIT(7) +#define ROCKER_RX_FLAGS_FWD_OFFLOAD BIT(8) + +enum { + ROCKER_TLV_TX_UNSPEC, + ROCKER_TLV_TX_OFFLOAD, /* u8, see ROCKER_TX_OFFLOAD_ */ + ROCKER_TLV_TX_L3_CSUM_OFF, /* u16 */ + ROCKER_TLV_TX_TSO_MSS, /* u16 */ + ROCKER_TLV_TX_TSO_HDR_LEN, /* u16 */ + ROCKER_TLV_TX_FRAGS, /* array */ + + __ROCKER_TLV_TX_MAX, + ROCKER_TLV_TX_MAX = __ROCKER_TLV_TX_MAX - 1, +}; + +#define ROCKER_TX_OFFLOAD_NONE 0 +#define ROCKER_TX_OFFLOAD_IP_CSUM 1 +#define ROCKER_TX_OFFLOAD_TCP_UDP_CSUM 2 +#define ROCKER_TX_OFFLOAD_L3_CSUM 3 +#define ROCKER_TX_OFFLOAD_TSO 4 + +#define ROCKER_TX_FRAGS_MAX 16 + +enum { + ROCKER_TLV_TX_FRAG_UNSPEC, + ROCKER_TLV_TX_FRAG, /* nest */ + + __ROCKER_TLV_TX_FRAG_MAX, + ROCKER_TLV_TX_FRAG_MAX = __ROCKER_TLV_TX_FRAG_MAX - 1, +}; + +enum { + ROCKER_TLV_TX_FRAG_ATTR_UNSPEC, + ROCKER_TLV_TX_FRAG_ATTR_ADDR, /* u64 */ + ROCKER_TLV_TX_FRAG_ATTR_LEN, /* u16 */ + + __ROCKER_TLV_TX_FRAG_ATTR_MAX, + ROCKER_TLV_TX_FRAG_ATTR_MAX = __ROCKER_TLV_TX_FRAG_ATTR_MAX - 1, +}; + +/* cmd info nested for OF-DPA msgs */ +enum { + ROCKER_TLV_OF_DPA_UNSPEC, + ROCKER_TLV_OF_DPA_TABLE_ID, /* u16 */ + ROCKER_TLV_OF_DPA_PRIORITY, /* u32 */ + ROCKER_TLV_OF_DPA_HARDTIME, /* u32 */ + ROCKER_TLV_OF_DPA_IDLETIME, /* u32 */ + ROCKER_TLV_OF_DPA_COOKIE, /* u64 */ + ROCKER_TLV_OF_DPA_IN_PPORT, /* u32 */ + ROCKER_TLV_OF_DPA_IN_PPORT_MASK, /* u32 */ + ROCKER_TLV_OF_DPA_OUT_PPORT, /* u32 */ + ROCKER_TLV_OF_DPA_GOTO_TABLE_ID, /* u16 */ + ROCKER_TLV_OF_DPA_GROUP_ID, /* u32 */ + ROCKER_TLV_OF_DPA_GROUP_ID_LOWER, /* u32 */ + ROCKER_TLV_OF_DPA_GROUP_COUNT, /* u16 */ + ROCKER_TLV_OF_DPA_GROUP_IDS, /* u32 array */ + ROCKER_TLV_OF_DPA_VLAN_ID, /* __be16 */ + ROCKER_TLV_OF_DPA_VLAN_ID_MASK, /* __be16 */ + ROCKER_TLV_OF_DPA_VLAN_PCP, /* __be16 */ + ROCKER_TLV_OF_DPA_VLAN_PCP_MASK, /* __be16 */ + ROCKER_TLV_OF_DPA_VLAN_PCP_ACTION, /* u8 */ + ROCKER_TLV_OF_DPA_NEW_VLAN_ID, /* __be16 */ + ROCKER_TLV_OF_DPA_NEW_VLAN_PCP, /* u8 */ + ROCKER_TLV_OF_DPA_TUNNEL_ID, /* u32 */ + ROCKER_TLV_OF_DPA_TUNNEL_LPORT, /* u32 */ + ROCKER_TLV_OF_DPA_ETHERTYPE, /* __be16 */ + ROCKER_TLV_OF_DPA_DST_MAC, /* binary */ + ROCKER_TLV_OF_DPA_DST_MAC_MASK, /* binary */ + ROCKER_TLV_OF_DPA_SRC_MAC, /* binary */ + ROCKER_TLV_OF_DPA_SRC_MAC_MASK, /* binary */ + ROCKER_TLV_OF_DPA_IP_PROTO, /* u8 */ + ROCKER_TLV_OF_DPA_IP_PROTO_MASK, /* u8 */ + ROCKER_TLV_OF_DPA_IP_DSCP, /* u8 */ + ROCKER_TLV_OF_DPA_IP_DSCP_MASK, /* u8 */ + ROCKER_TLV_OF_DPA_IP_DSCP_ACTION, /* u8 */ + ROCKER_TLV_OF_DPA_NEW_IP_DSCP, /* u8 */ + ROCKER_TLV_OF_DPA_IP_ECN, /* u8 */ + ROCKER_TLV_OF_DPA_IP_ECN_MASK, /* u8 */ + ROCKER_TLV_OF_DPA_DST_IP, /* __be32 */ + ROCKER_TLV_OF_DPA_DST_IP_MASK, /* __be32 */ + ROCKER_TLV_OF_DPA_SRC_IP, /* __be32 */ + ROCKER_TLV_OF_DPA_SRC_IP_MASK, /* __be32 */ + ROCKER_TLV_OF_DPA_DST_IPV6, /* binary */ + ROCKER_TLV_OF_DPA_DST_IPV6_MASK, /* binary */ + ROCKER_TLV_OF_DPA_SRC_IPV6, /* binary */ + ROCKER_TLV_OF_DPA_SRC_IPV6_MASK, /* binary */ + ROCKER_TLV_OF_DPA_SRC_ARP_IP, /* __be32 */ + ROCKER_TLV_OF_DPA_SRC_ARP_IP_MASK, /* __be32 */ + ROCKER_TLV_OF_DPA_L4_DST_PORT, /* __be16 */ + ROCKER_TLV_OF_DPA_L4_DST_PORT_MASK, /* __be16 */ + ROCKER_TLV_OF_DPA_L4_SRC_PORT, /* __be16 */ + ROCKER_TLV_OF_DPA_L4_SRC_PORT_MASK, /* __be16 */ + ROCKER_TLV_OF_DPA_ICMP_TYPE, /* u8 */ + ROCKER_TLV_OF_DPA_ICMP_TYPE_MASK, /* u8 */ + ROCKER_TLV_OF_DPA_ICMP_CODE, /* u8 */ + ROCKER_TLV_OF_DPA_ICMP_CODE_MASK, /* u8 */ + ROCKER_TLV_OF_DPA_IPV6_LABEL, /* __be32 */ + ROCKER_TLV_OF_DPA_IPV6_LABEL_MASK, /* __be32 */ + ROCKER_TLV_OF_DPA_QUEUE_ID_ACTION, /* u8 */ + ROCKER_TLV_OF_DPA_NEW_QUEUE_ID, /* u8 */ + ROCKER_TLV_OF_DPA_CLEAR_ACTIONS, /* u32 */ + ROCKER_TLV_OF_DPA_POP_VLAN, /* u8 */ + ROCKER_TLV_OF_DPA_TTL_CHECK, /* u8 */ + ROCKER_TLV_OF_DPA_COPY_CPU_ACTION, /* u8 */ + + __ROCKER_TLV_OF_DPA_MAX, + ROCKER_TLV_OF_DPA_MAX = __ROCKER_TLV_OF_DPA_MAX - 1, +}; + +/* OF-DPA table IDs */ + +enum rocker_of_dpa_table_id { + ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT = 0, + ROCKER_OF_DPA_TABLE_ID_VLAN = 10, + ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC = 20, + ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING = 30, + ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING = 40, + ROCKER_OF_DPA_TABLE_ID_BRIDGING = 50, + ROCKER_OF_DPA_TABLE_ID_ACL_POLICY = 60, +}; + +/* OF-DPA flow stats */ +enum { + ROCKER_TLV_OF_DPA_FLOW_STAT_UNSPEC, + ROCKER_TLV_OF_DPA_FLOW_STAT_DURATION, /* u32 */ + ROCKER_TLV_OF_DPA_FLOW_STAT_RX_PKTS, /* u64 */ + ROCKER_TLV_OF_DPA_FLOW_STAT_TX_PKTS, /* u64 */ + + __ROCKER_TLV_OF_DPA_FLOW_STAT_MAX, + ROCKER_TLV_OF_DPA_FLOW_STAT_MAX = __ROCKER_TLV_OF_DPA_FLOW_STAT_MAX - 1, +}; + +/* OF-DPA group types */ +enum rocker_of_dpa_group_type { + ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE = 0, + ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE, + ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST, + ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST, + ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD, + ROCKER_OF_DPA_GROUP_TYPE_L3_INTERFACE, + ROCKER_OF_DPA_GROUP_TYPE_L3_MCAST, + ROCKER_OF_DPA_GROUP_TYPE_L3_ECMP, + ROCKER_OF_DPA_GROUP_TYPE_L2_OVERLAY, +}; + +/* OF-DPA group L2 overlay types */ +enum rocker_of_dpa_overlay_type { + ROCKER_OF_DPA_OVERLAY_TYPE_FLOOD_UCAST = 0, + ROCKER_OF_DPA_OVERLAY_TYPE_FLOOD_MCAST, + ROCKER_OF_DPA_OVERLAY_TYPE_MCAST_UCAST, + ROCKER_OF_DPA_OVERLAY_TYPE_MCAST_MCAST, +}; + +/* OF-DPA group ID encoding */ +#define ROCKER_GROUP_TYPE_SHIFT 28 +#define ROCKER_GROUP_TYPE_MASK 0xf0000000 +#define ROCKER_GROUP_VLAN_SHIFT 16 +#define ROCKER_GROUP_VLAN_MASK 0x0fff0000 +#define ROCKER_GROUP_PORT_SHIFT 0 +#define ROCKER_GROUP_PORT_MASK 0x0000ffff +#define ROCKER_GROUP_TUNNEL_ID_SHIFT 12 +#define ROCKER_GROUP_TUNNEL_ID_MASK 0x0ffff000 +#define ROCKER_GROUP_SUBTYPE_SHIFT 10 +#define ROCKER_GROUP_SUBTYPE_MASK 0x00000c00 +#define ROCKER_GROUP_INDEX_SHIFT 0 +#define ROCKER_GROUP_INDEX_MASK 0x0000ffff +#define ROCKER_GROUP_INDEX_LONG_SHIFT 0 +#define ROCKER_GROUP_INDEX_LONG_MASK 0x0fffffff + +#define ROCKER_GROUP_TYPE_GET(group_id) \ + (((group_id) & ROCKER_GROUP_TYPE_MASK) >> ROCKER_GROUP_TYPE_SHIFT) +#define ROCKER_GROUP_TYPE_SET(type) \ + (((type) << ROCKER_GROUP_TYPE_SHIFT) & ROCKER_GROUP_TYPE_MASK) +#define ROCKER_GROUP_VLAN_GET(group_id) \ + (((group_id) & ROCKER_GROUP_VLAN_ID_MASK) >> ROCKER_GROUP_VLAN_ID_SHIFT) +#define ROCKER_GROUP_VLAN_SET(vlan_id) \ + (((vlan_id) << ROCKER_GROUP_VLAN_SHIFT) & ROCKER_GROUP_VLAN_MASK) +#define ROCKER_GROUP_PORT_GET(group_id) \ + (((group_id) & ROCKER_GROUP_PORT_MASK) >> ROCKER_GROUP_PORT_SHIFT) +#define ROCKER_GROUP_PORT_SET(port) \ + (((port) << ROCKER_GROUP_PORT_SHIFT) & ROCKER_GROUP_PORT_MASK) +#define ROCKER_GROUP_INDEX_GET(group_id) \ + (((group_id) & ROCKER_GROUP_INDEX_MASK) >> ROCKER_GROUP_INDEX_SHIFT) +#define ROCKER_GROUP_INDEX_SET(index) \ + (((index) << ROCKER_GROUP_INDEX_SHIFT) & ROCKER_GROUP_INDEX_MASK) +#define ROCKER_GROUP_INDEX_LONG_GET(group_id) \ + (((group_id) & ROCKER_GROUP_INDEX_LONG_MASK) >> \ + ROCKER_GROUP_INDEX_LONG_SHIFT) +#define ROCKER_GROUP_INDEX_LONG_SET(index) \ + (((index) << ROCKER_GROUP_INDEX_LONG_SHIFT) & \ + ROCKER_GROUP_INDEX_LONG_MASK) + +#define ROCKER_GROUP_NONE 0 +#define ROCKER_GROUP_L2_INTERFACE(vlan_id, port) \ + (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE) |\ + ROCKER_GROUP_VLAN_SET(ntohs(vlan_id)) | ROCKER_GROUP_PORT_SET(port)) +#define ROCKER_GROUP_L2_REWRITE(index) \ + (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE) |\ + ROCKER_GROUP_INDEX_LONG_SET(index)) +#define ROCKER_GROUP_L2_MCAST(vlan_id, index) \ + (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST) |\ + ROCKER_GROUP_VLAN_SET(ntohs(vlan_id)) | ROCKER_GROUP_INDEX_SET(index)) +#define ROCKER_GROUP_L2_FLOOD(vlan_id, index) \ + (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD) |\ + ROCKER_GROUP_VLAN_SET(ntohs(vlan_id)) | ROCKER_GROUP_INDEX_SET(index)) +#define ROCKER_GROUP_L3_UNICAST(index) \ + (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST) |\ + ROCKER_GROUP_INDEX_LONG_SET(index)) + +/* Rocker general purpose registers */ +#define ROCKER_CONTROL 0x0300 +#define ROCKER_PORT_PHYS_COUNT 0x0304 +#define ROCKER_PORT_PHYS_LINK_STATUS 0x0310 /* 8-byte */ +#define ROCKER_PORT_PHYS_ENABLE 0x0318 /* 8-byte */ +#define ROCKER_SWITCH_ID 0x0320 /* 8-byte */ + +/* Rocker control bits */ +#define ROCKER_CONTROL_RESET BIT(0) + +#endif diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c new file mode 100644 index 000000000000..acafbf870182 --- /dev/null +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -0,0 +1,2910 @@ +/* + * drivers/net/ethernet/rocker/rocker.c - Rocker switch device driver + * Copyright (c) 2014-2016 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/spinlock.h> +#include <linux/sort.h> +#include <linux/random.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/socket.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/if_bridge.h> +#include <linux/bitops.h> +#include <linux/ctype.h> +#include <net/switchdev.h> +#include <net/rtnetlink.h> +#include <net/netevent.h> +#include <net/arp.h> +#include <linux/io-64-nonatomic-lo-hi.h> +#include <generated/utsrelease.h> + +#include "rocker_hw.h" +#include "rocker.h" +#include "rocker_tlv.h" + +static const char rocker_driver_name[] = "rocker"; + +static const struct pci_device_id rocker_pci_id_table[] = { + {PCI_VDEVICE(REDHAT, PCI_DEVICE_ID_REDHAT_ROCKER), 0}, + {0, } +}; + +struct rocker_wait { + wait_queue_head_t wait; + bool done; + bool nowait; +}; + +static void rocker_wait_reset(struct rocker_wait *wait) +{ + wait->done = false; + wait->nowait = false; +} + +static void rocker_wait_init(struct rocker_wait *wait) +{ + init_waitqueue_head(&wait->wait); + rocker_wait_reset(wait); +} + +static struct rocker_wait *rocker_wait_create(void) +{ + struct rocker_wait *wait; + + wait = kzalloc(sizeof(*wait), GFP_KERNEL); + if (!wait) + return NULL; + return wait; +} + +static void rocker_wait_destroy(struct rocker_wait *wait) +{ + kfree(wait); +} + +static bool rocker_wait_event_timeout(struct rocker_wait *wait, + unsigned long timeout) +{ + wait_event_timeout(wait->wait, wait->done, HZ / 10); + if (!wait->done) + return false; + return true; +} + +static void rocker_wait_wake_up(struct rocker_wait *wait) +{ + wait->done = true; + wake_up(&wait->wait); +} + +static u32 rocker_msix_vector(const struct rocker *rocker, unsigned int vector) +{ + return rocker->msix_entries[vector].vector; +} + +static u32 rocker_msix_tx_vector(const struct rocker_port *rocker_port) +{ + return rocker_msix_vector(rocker_port->rocker, + ROCKER_MSIX_VEC_TX(rocker_port->port_number)); +} + +static u32 rocker_msix_rx_vector(const struct rocker_port *rocker_port) +{ + return rocker_msix_vector(rocker_port->rocker, + ROCKER_MSIX_VEC_RX(rocker_port->port_number)); +} + +#define rocker_write32(rocker, reg, val) \ + writel((val), (rocker)->hw_addr + (ROCKER_ ## reg)) +#define rocker_read32(rocker, reg) \ + readl((rocker)->hw_addr + (ROCKER_ ## reg)) +#define rocker_write64(rocker, reg, val) \ + writeq((val), (rocker)->hw_addr + (ROCKER_ ## reg)) +#define rocker_read64(rocker, reg) \ + readq((rocker)->hw_addr + (ROCKER_ ## reg)) + +/***************************** + * HW basic testing functions + *****************************/ + +static int rocker_reg_test(const struct rocker *rocker) +{ + const struct pci_dev *pdev = rocker->pdev; + u64 test_reg; + u64 rnd; + + rnd = prandom_u32(); + rnd >>= 1; + rocker_write32(rocker, TEST_REG, rnd); + test_reg = rocker_read32(rocker, TEST_REG); + if (test_reg != rnd * 2) { + dev_err(&pdev->dev, "unexpected 32bit register value %08llx, expected %08llx\n", + test_reg, rnd * 2); + return -EIO; + } + + rnd = prandom_u32(); + rnd <<= 31; + rnd |= prandom_u32(); + rocker_write64(rocker, TEST_REG64, rnd); + test_reg = rocker_read64(rocker, TEST_REG64); + if (test_reg != rnd * 2) { + dev_err(&pdev->dev, "unexpected 64bit register value %16llx, expected %16llx\n", + test_reg, rnd * 2); + return -EIO; + } + + return 0; +} + +static int rocker_dma_test_one(const struct rocker *rocker, + struct rocker_wait *wait, u32 test_type, + dma_addr_t dma_handle, const unsigned char *buf, + const unsigned char *expect, size_t size) +{ + const struct pci_dev *pdev = rocker->pdev; + int i; + + rocker_wait_reset(wait); + rocker_write32(rocker, TEST_DMA_CTRL, test_type); + + if (!rocker_wait_event_timeout(wait, HZ / 10)) { + dev_err(&pdev->dev, "no interrupt received within a timeout\n"); + return -EIO; + } + + for (i = 0; i < size; i++) { + if (buf[i] != expect[i]) { + dev_err(&pdev->dev, "unexpected memory content %02x at byte %x\n, %02x expected", + buf[i], i, expect[i]); + return -EIO; + } + } + return 0; +} + +#define ROCKER_TEST_DMA_BUF_SIZE (PAGE_SIZE * 4) +#define ROCKER_TEST_DMA_FILL_PATTERN 0x96 + +static int rocker_dma_test_offset(const struct rocker *rocker, + struct rocker_wait *wait, int offset) +{ + struct pci_dev *pdev = rocker->pdev; + unsigned char *alloc; + unsigned char *buf; + unsigned char *expect; + dma_addr_t dma_handle; + int i; + int err; + + alloc = kzalloc(ROCKER_TEST_DMA_BUF_SIZE * 2 + offset, + GFP_KERNEL | GFP_DMA); + if (!alloc) + return -ENOMEM; + buf = alloc + offset; + expect = buf + ROCKER_TEST_DMA_BUF_SIZE; + + dma_handle = pci_map_single(pdev, buf, ROCKER_TEST_DMA_BUF_SIZE, + PCI_DMA_BIDIRECTIONAL); + if (pci_dma_mapping_error(pdev, dma_handle)) { + err = -EIO; + goto free_alloc; + } + + rocker_write64(rocker, TEST_DMA_ADDR, dma_handle); + rocker_write32(rocker, TEST_DMA_SIZE, ROCKER_TEST_DMA_BUF_SIZE); + + memset(expect, ROCKER_TEST_DMA_FILL_PATTERN, ROCKER_TEST_DMA_BUF_SIZE); + err = rocker_dma_test_one(rocker, wait, ROCKER_TEST_DMA_CTRL_FILL, + dma_handle, buf, expect, + ROCKER_TEST_DMA_BUF_SIZE); + if (err) + goto unmap; + + memset(expect, 0, ROCKER_TEST_DMA_BUF_SIZE); + err = rocker_dma_test_one(rocker, wait, ROCKER_TEST_DMA_CTRL_CLEAR, + dma_handle, buf, expect, + ROCKER_TEST_DMA_BUF_SIZE); + if (err) + goto unmap; + + prandom_bytes(buf, ROCKER_TEST_DMA_BUF_SIZE); + for (i = 0; i < ROCKER_TEST_DMA_BUF_SIZE; i++) + expect[i] = ~buf[i]; + err = rocker_dma_test_one(rocker, wait, ROCKER_TEST_DMA_CTRL_INVERT, + dma_handle, buf, expect, + ROCKER_TEST_DMA_BUF_SIZE); + if (err) + goto unmap; + +unmap: + pci_unmap_single(pdev, dma_handle, ROCKER_TEST_DMA_BUF_SIZE, + PCI_DMA_BIDIRECTIONAL); +free_alloc: + kfree(alloc); + + return err; +} + +static int rocker_dma_test(const struct rocker *rocker, + struct rocker_wait *wait) +{ + int i; + int err; + + for (i = 0; i < 8; i++) { + err = rocker_dma_test_offset(rocker, wait, i); + if (err) + return err; + } + return 0; +} + +static irqreturn_t rocker_test_irq_handler(int irq, void *dev_id) +{ + struct rocker_wait *wait = dev_id; + + rocker_wait_wake_up(wait); + + return IRQ_HANDLED; +} + +static int rocker_basic_hw_test(const struct rocker *rocker) +{ + const struct pci_dev *pdev = rocker->pdev; + struct rocker_wait wait; + int err; + + err = rocker_reg_test(rocker); + if (err) { + dev_err(&pdev->dev, "reg test failed\n"); + return err; + } + + err = request_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_TEST), + rocker_test_irq_handler, 0, + rocker_driver_name, &wait); + if (err) { + dev_err(&pdev->dev, "cannot assign test irq\n"); + return err; + } + + rocker_wait_init(&wait); + rocker_write32(rocker, TEST_IRQ, ROCKER_MSIX_VEC_TEST); + + if (!rocker_wait_event_timeout(&wait, HZ / 10)) { + dev_err(&pdev->dev, "no interrupt received within a timeout\n"); + err = -EIO; + goto free_irq; + } + + err = rocker_dma_test(rocker, &wait); + if (err) + dev_err(&pdev->dev, "dma test failed\n"); + +free_irq: + free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_TEST), &wait); + return err; +} + +/****************************************** + * DMA rings and descriptors manipulations + ******************************************/ + +static u32 __pos_inc(u32 pos, size_t limit) +{ + return ++pos == limit ? 0 : pos; +} + +static int rocker_desc_err(const struct rocker_desc_info *desc_info) +{ + int err = desc_info->desc->comp_err & ~ROCKER_DMA_DESC_COMP_ERR_GEN; + + switch (err) { + case ROCKER_OK: + return 0; + case -ROCKER_ENOENT: + return -ENOENT; + case -ROCKER_ENXIO: + return -ENXIO; + case -ROCKER_ENOMEM: + return -ENOMEM; + case -ROCKER_EEXIST: + return -EEXIST; + case -ROCKER_EINVAL: + return -EINVAL; + case -ROCKER_EMSGSIZE: + return -EMSGSIZE; + case -ROCKER_ENOTSUP: + return -EOPNOTSUPP; + case -ROCKER_ENOBUFS: + return -ENOBUFS; + } + + return -EINVAL; +} + +static void rocker_desc_gen_clear(const struct rocker_desc_info *desc_info) +{ + desc_info->desc->comp_err &= ~ROCKER_DMA_DESC_COMP_ERR_GEN; +} + +static bool rocker_desc_gen(const struct rocker_desc_info *desc_info) +{ + u32 comp_err = desc_info->desc->comp_err; + + return comp_err & ROCKER_DMA_DESC_COMP_ERR_GEN ? true : false; +} + +static void * +rocker_desc_cookie_ptr_get(const struct rocker_desc_info *desc_info) +{ + return (void *)(uintptr_t)desc_info->desc->cookie; +} + +static void rocker_desc_cookie_ptr_set(const struct rocker_desc_info *desc_info, + void *ptr) +{ + desc_info->desc->cookie = (uintptr_t) ptr; +} + +static struct rocker_desc_info * +rocker_desc_head_get(const struct rocker_dma_ring_info *info) +{ + static struct rocker_desc_info *desc_info; + u32 head = __pos_inc(info->head, info->size); + + desc_info = &info->desc_info[info->head]; + if (head == info->tail) + return NULL; /* ring full */ + desc_info->tlv_size = 0; + return desc_info; +} + +static void rocker_desc_commit(const struct rocker_desc_info *desc_info) +{ + desc_info->desc->buf_size = desc_info->data_size; + desc_info->desc->tlv_size = desc_info->tlv_size; +} + +static void rocker_desc_head_set(const struct rocker *rocker, + struct rocker_dma_ring_info *info, + const struct rocker_desc_info *desc_info) +{ + u32 head = __pos_inc(info->head, info->size); + + BUG_ON(head == info->tail); + rocker_desc_commit(desc_info); + info->head = head; + rocker_write32(rocker, DMA_DESC_HEAD(info->type), head); +} + +static struct rocker_desc_info * +rocker_desc_tail_get(struct rocker_dma_ring_info *info) +{ + static struct rocker_desc_info *desc_info; + + if (info->tail == info->head) + return NULL; /* nothing to be done between head and tail */ + desc_info = &info->desc_info[info->tail]; + if (!rocker_desc_gen(desc_info)) + return NULL; /* gen bit not set, desc is not ready yet */ + info->tail = __pos_inc(info->tail, info->size); + desc_info->tlv_size = desc_info->desc->tlv_size; + return desc_info; +} + +static void rocker_dma_ring_credits_set(const struct rocker *rocker, + const struct rocker_dma_ring_info *info, + u32 credits) +{ + if (credits) + rocker_write32(rocker, DMA_DESC_CREDITS(info->type), credits); +} + +static unsigned long rocker_dma_ring_size_fix(size_t size) +{ + return max(ROCKER_DMA_SIZE_MIN, + min(roundup_pow_of_two(size), ROCKER_DMA_SIZE_MAX)); +} + +static int rocker_dma_ring_create(const struct rocker *rocker, + unsigned int type, + size_t size, + struct rocker_dma_ring_info *info) +{ + int i; + + BUG_ON(size != rocker_dma_ring_size_fix(size)); + info->size = size; + info->type = type; + info->head = 0; + info->tail = 0; + info->desc_info = kcalloc(info->size, sizeof(*info->desc_info), + GFP_KERNEL); + if (!info->desc_info) + return -ENOMEM; + + info->desc = pci_alloc_consistent(rocker->pdev, + info->size * sizeof(*info->desc), + &info->mapaddr); + if (!info->desc) { + kfree(info->desc_info); + return -ENOMEM; + } + + for (i = 0; i < info->size; i++) + info->desc_info[i].desc = &info->desc[i]; + + rocker_write32(rocker, DMA_DESC_CTRL(info->type), + ROCKER_DMA_DESC_CTRL_RESET); + rocker_write64(rocker, DMA_DESC_ADDR(info->type), info->mapaddr); + rocker_write32(rocker, DMA_DESC_SIZE(info->type), info->size); + + return 0; +} + +static void rocker_dma_ring_destroy(const struct rocker *rocker, + const struct rocker_dma_ring_info *info) +{ + rocker_write64(rocker, DMA_DESC_ADDR(info->type), 0); + + pci_free_consistent(rocker->pdev, + info->size * sizeof(struct rocker_desc), + info->desc, info->mapaddr); + kfree(info->desc_info); +} + +static void rocker_dma_ring_pass_to_producer(const struct rocker *rocker, + struct rocker_dma_ring_info *info) +{ + int i; + + BUG_ON(info->head || info->tail); + + /* When ring is consumer, we need to advance head for each desc. + * That tells hw that the desc is ready to be used by it. + */ + for (i = 0; i < info->size - 1; i++) + rocker_desc_head_set(rocker, info, &info->desc_info[i]); + rocker_desc_commit(&info->desc_info[i]); +} + +static int rocker_dma_ring_bufs_alloc(const struct rocker *rocker, + const struct rocker_dma_ring_info *info, + int direction, size_t buf_size) +{ + struct pci_dev *pdev = rocker->pdev; + int i; + int err; + + for (i = 0; i < info->size; i++) { + struct rocker_desc_info *desc_info = &info->desc_info[i]; + struct rocker_desc *desc = &info->desc[i]; + dma_addr_t dma_handle; + char *buf; + + buf = kzalloc(buf_size, GFP_KERNEL | GFP_DMA); + if (!buf) { + err = -ENOMEM; + goto rollback; + } + + dma_handle = pci_map_single(pdev, buf, buf_size, direction); + if (pci_dma_mapping_error(pdev, dma_handle)) { + kfree(buf); + err = -EIO; + goto rollback; + } + + desc_info->data = buf; + desc_info->data_size = buf_size; + dma_unmap_addr_set(desc_info, mapaddr, dma_handle); + + desc->buf_addr = dma_handle; + desc->buf_size = buf_size; + } + return 0; + +rollback: + for (i--; i >= 0; i--) { + const struct rocker_desc_info *desc_info = &info->desc_info[i]; + + pci_unmap_single(pdev, dma_unmap_addr(desc_info, mapaddr), + desc_info->data_size, direction); + kfree(desc_info->data); + } + return err; +} + +static void rocker_dma_ring_bufs_free(const struct rocker *rocker, + const struct rocker_dma_ring_info *info, + int direction) +{ + struct pci_dev *pdev = rocker->pdev; + int i; + + for (i = 0; i < info->size; i++) { + const struct rocker_desc_info *desc_info = &info->desc_info[i]; + struct rocker_desc *desc = &info->desc[i]; + + desc->buf_addr = 0; + desc->buf_size = 0; + pci_unmap_single(pdev, dma_unmap_addr(desc_info, mapaddr), + desc_info->data_size, direction); + kfree(desc_info->data); + } +} + +static int rocker_dma_cmd_ring_wait_alloc(struct rocker_desc_info *desc_info) +{ + struct rocker_wait *wait; + + wait = rocker_wait_create(); + if (!wait) + return -ENOMEM; + rocker_desc_cookie_ptr_set(desc_info, wait); + return 0; +} + +static void +rocker_dma_cmd_ring_wait_free(const struct rocker_desc_info *desc_info) +{ + struct rocker_wait *wait = rocker_desc_cookie_ptr_get(desc_info); + + rocker_wait_destroy(wait); +} + +static int rocker_dma_cmd_ring_waits_alloc(const struct rocker *rocker) +{ + const struct rocker_dma_ring_info *cmd_ring = &rocker->cmd_ring; + int i; + int err; + + for (i = 0; i < cmd_ring->size; i++) { + err = rocker_dma_cmd_ring_wait_alloc(&cmd_ring->desc_info[i]); + if (err) + goto rollback; + } + return 0; + +rollback: + for (i--; i >= 0; i--) + rocker_dma_cmd_ring_wait_free(&cmd_ring->desc_info[i]); + return err; +} + +static void rocker_dma_cmd_ring_waits_free(const struct rocker *rocker) +{ + const struct rocker_dma_ring_info *cmd_ring = &rocker->cmd_ring; + int i; + + for (i = 0; i < cmd_ring->size; i++) + rocker_dma_cmd_ring_wait_free(&cmd_ring->desc_info[i]); +} + +static int rocker_dma_rings_init(struct rocker *rocker) +{ + const struct pci_dev *pdev = rocker->pdev; + int err; + + err = rocker_dma_ring_create(rocker, ROCKER_DMA_CMD, + ROCKER_DMA_CMD_DEFAULT_SIZE, + &rocker->cmd_ring); + if (err) { + dev_err(&pdev->dev, "failed to create command dma ring\n"); + return err; + } + + spin_lock_init(&rocker->cmd_ring_lock); + + err = rocker_dma_ring_bufs_alloc(rocker, &rocker->cmd_ring, + PCI_DMA_BIDIRECTIONAL, PAGE_SIZE); + if (err) { + dev_err(&pdev->dev, "failed to alloc command dma ring buffers\n"); + goto err_dma_cmd_ring_bufs_alloc; + } + + err = rocker_dma_cmd_ring_waits_alloc(rocker); + if (err) { + dev_err(&pdev->dev, "failed to alloc command dma ring waits\n"); + goto err_dma_cmd_ring_waits_alloc; + } + + err = rocker_dma_ring_create(rocker, ROCKER_DMA_EVENT, + ROCKER_DMA_EVENT_DEFAULT_SIZE, + &rocker->event_ring); + if (err) { + dev_err(&pdev->dev, "failed to create event dma ring\n"); + goto err_dma_event_ring_create; + } + + err = rocker_dma_ring_bufs_alloc(rocker, &rocker->event_ring, + PCI_DMA_FROMDEVICE, PAGE_SIZE); + if (err) { + dev_err(&pdev->dev, "failed to alloc event dma ring buffers\n"); + goto err_dma_event_ring_bufs_alloc; + } + rocker_dma_ring_pass_to_producer(rocker, &rocker->event_ring); + return 0; + +err_dma_event_ring_bufs_alloc: + rocker_dma_ring_destroy(rocker, &rocker->event_ring); +err_dma_event_ring_create: + rocker_dma_ring_bufs_free(rocker, &rocker->cmd_ring, + PCI_DMA_BIDIRECTIONAL); +err_dma_cmd_ring_waits_alloc: + rocker_dma_cmd_ring_waits_free(rocker); +err_dma_cmd_ring_bufs_alloc: + rocker_dma_ring_destroy(rocker, &rocker->cmd_ring); + return err; +} + +static void rocker_dma_rings_fini(struct rocker *rocker) +{ + rocker_dma_ring_bufs_free(rocker, &rocker->event_ring, + PCI_DMA_BIDIRECTIONAL); + rocker_dma_ring_destroy(rocker, &rocker->event_ring); + rocker_dma_cmd_ring_waits_free(rocker); + rocker_dma_ring_bufs_free(rocker, &rocker->cmd_ring, + PCI_DMA_BIDIRECTIONAL); + rocker_dma_ring_destroy(rocker, &rocker->cmd_ring); +} + +static int rocker_dma_rx_ring_skb_map(const struct rocker_port *rocker_port, + struct rocker_desc_info *desc_info, + struct sk_buff *skb, size_t buf_len) +{ + const struct rocker *rocker = rocker_port->rocker; + struct pci_dev *pdev = rocker->pdev; + dma_addr_t dma_handle; + + dma_handle = pci_map_single(pdev, skb->data, buf_len, + PCI_DMA_FROMDEVICE); + if (pci_dma_mapping_error(pdev, dma_handle)) + return -EIO; + if (rocker_tlv_put_u64(desc_info, ROCKER_TLV_RX_FRAG_ADDR, dma_handle)) + goto tlv_put_failure; + if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_RX_FRAG_MAX_LEN, buf_len)) + goto tlv_put_failure; + return 0; + +tlv_put_failure: + pci_unmap_single(pdev, dma_handle, buf_len, PCI_DMA_FROMDEVICE); + desc_info->tlv_size = 0; + return -EMSGSIZE; +} + +static size_t rocker_port_rx_buf_len(const struct rocker_port *rocker_port) +{ + return rocker_port->dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; +} + +static int rocker_dma_rx_ring_skb_alloc(const struct rocker_port *rocker_port, + struct rocker_desc_info *desc_info) +{ + struct net_device *dev = rocker_port->dev; + struct sk_buff *skb; + size_t buf_len = rocker_port_rx_buf_len(rocker_port); + int err; + + /* Ensure that hw will see tlv_size zero in case of an error. + * That tells hw to use another descriptor. + */ + rocker_desc_cookie_ptr_set(desc_info, NULL); + desc_info->tlv_size = 0; + + skb = netdev_alloc_skb_ip_align(dev, buf_len); + if (!skb) + return -ENOMEM; + err = rocker_dma_rx_ring_skb_map(rocker_port, desc_info, skb, buf_len); + if (err) { + dev_kfree_skb_any(skb); + return err; + } + rocker_desc_cookie_ptr_set(desc_info, skb); + return 0; +} + +static void rocker_dma_rx_ring_skb_unmap(const struct rocker *rocker, + const struct rocker_tlv **attrs) +{ + struct pci_dev *pdev = rocker->pdev; + dma_addr_t dma_handle; + size_t len; + + if (!attrs[ROCKER_TLV_RX_FRAG_ADDR] || + !attrs[ROCKER_TLV_RX_FRAG_MAX_LEN]) + return; + dma_handle = rocker_tlv_get_u64(attrs[ROCKER_TLV_RX_FRAG_ADDR]); + len = rocker_tlv_get_u16(attrs[ROCKER_TLV_RX_FRAG_MAX_LEN]); + pci_unmap_single(pdev, dma_handle, len, PCI_DMA_FROMDEVICE); +} + +static void rocker_dma_rx_ring_skb_free(const struct rocker *rocker, + const struct rocker_desc_info *desc_info) +{ + const struct rocker_tlv *attrs[ROCKER_TLV_RX_MAX + 1]; + struct sk_buff *skb = rocker_desc_cookie_ptr_get(desc_info); + + if (!skb) + return; + rocker_tlv_parse_desc(attrs, ROCKER_TLV_RX_MAX, desc_info); + rocker_dma_rx_ring_skb_unmap(rocker, attrs); + dev_kfree_skb_any(skb); +} + +static int rocker_dma_rx_ring_skbs_alloc(const struct rocker_port *rocker_port) +{ + const struct rocker_dma_ring_info *rx_ring = &rocker_port->rx_ring; + const struct rocker *rocker = rocker_port->rocker; + int i; + int err; + + for (i = 0; i < rx_ring->size; i++) { + err = rocker_dma_rx_ring_skb_alloc(rocker_port, + &rx_ring->desc_info[i]); + if (err) + goto rollback; + } + return 0; + +rollback: + for (i--; i >= 0; i--) + rocker_dma_rx_ring_skb_free(rocker, &rx_ring->desc_info[i]); + return err; +} + +static void rocker_dma_rx_ring_skbs_free(const struct rocker_port *rocker_port) +{ + const struct rocker_dma_ring_info *rx_ring = &rocker_port->rx_ring; + const struct rocker *rocker = rocker_port->rocker; + int i; + + for (i = 0; i < rx_ring->size; i++) + rocker_dma_rx_ring_skb_free(rocker, &rx_ring->desc_info[i]); +} + +static int rocker_port_dma_rings_init(struct rocker_port *rocker_port) +{ + struct rocker *rocker = rocker_port->rocker; + int err; + + err = rocker_dma_ring_create(rocker, + ROCKER_DMA_TX(rocker_port->port_number), + ROCKER_DMA_TX_DEFAULT_SIZE, + &rocker_port->tx_ring); + if (err) { + netdev_err(rocker_port->dev, "failed to create tx dma ring\n"); + return err; + } + + err = rocker_dma_ring_bufs_alloc(rocker, &rocker_port->tx_ring, + PCI_DMA_TODEVICE, + ROCKER_DMA_TX_DESC_SIZE); + if (err) { + netdev_err(rocker_port->dev, "failed to alloc tx dma ring buffers\n"); + goto err_dma_tx_ring_bufs_alloc; + } + + err = rocker_dma_ring_create(rocker, + ROCKER_DMA_RX(rocker_port->port_number), + ROCKER_DMA_RX_DEFAULT_SIZE, + &rocker_port->rx_ring); + if (err) { + netdev_err(rocker_port->dev, "failed to create rx dma ring\n"); + goto err_dma_rx_ring_create; + } + + err = rocker_dma_ring_bufs_alloc(rocker, &rocker_port->rx_ring, + PCI_DMA_BIDIRECTIONAL, + ROCKER_DMA_RX_DESC_SIZE); + if (err) { + netdev_err(rocker_port->dev, "failed to alloc rx dma ring buffers\n"); + goto err_dma_rx_ring_bufs_alloc; + } + + err = rocker_dma_rx_ring_skbs_alloc(rocker_port); + if (err) { + netdev_err(rocker_port->dev, "failed to alloc rx dma ring skbs\n"); + goto err_dma_rx_ring_skbs_alloc; + } + rocker_dma_ring_pass_to_producer(rocker, &rocker_port->rx_ring); + + return 0; + +err_dma_rx_ring_skbs_alloc: + rocker_dma_ring_bufs_free(rocker, &rocker_port->rx_ring, + PCI_DMA_BIDIRECTIONAL); +err_dma_rx_ring_bufs_alloc: + rocker_dma_ring_destroy(rocker, &rocker_port->rx_ring); +err_dma_rx_ring_create: + rocker_dma_ring_bufs_free(rocker, &rocker_port->tx_ring, + PCI_DMA_TODEVICE); +err_dma_tx_ring_bufs_alloc: + rocker_dma_ring_destroy(rocker, &rocker_port->tx_ring); + return err; +} + +static void rocker_port_dma_rings_fini(struct rocker_port *rocker_port) +{ + struct rocker *rocker = rocker_port->rocker; + + rocker_dma_rx_ring_skbs_free(rocker_port); + rocker_dma_ring_bufs_free(rocker, &rocker_port->rx_ring, + PCI_DMA_BIDIRECTIONAL); + rocker_dma_ring_destroy(rocker, &rocker_port->rx_ring); + rocker_dma_ring_bufs_free(rocker, &rocker_port->tx_ring, + PCI_DMA_TODEVICE); + rocker_dma_ring_destroy(rocker, &rocker_port->tx_ring); +} + +static void rocker_port_set_enable(const struct rocker_port *rocker_port, + bool enable) +{ + u64 val = rocker_read64(rocker_port->rocker, PORT_PHYS_ENABLE); + + if (enable) + val |= 1ULL << rocker_port->pport; + else + val &= ~(1ULL << rocker_port->pport); + rocker_write64(rocker_port->rocker, PORT_PHYS_ENABLE, val); +} + +/******************************** + * Interrupt handler and helpers + ********************************/ + +static irqreturn_t rocker_cmd_irq_handler(int irq, void *dev_id) +{ + struct rocker *rocker = dev_id; + const struct rocker_desc_info *desc_info; + struct rocker_wait *wait; + u32 credits = 0; + + spin_lock(&rocker->cmd_ring_lock); + while ((desc_info = rocker_desc_tail_get(&rocker->cmd_ring))) { + wait = rocker_desc_cookie_ptr_get(desc_info); + if (wait->nowait) { + rocker_desc_gen_clear(desc_info); + } else { + rocker_wait_wake_up(wait); + } + credits++; + } + spin_unlock(&rocker->cmd_ring_lock); + rocker_dma_ring_credits_set(rocker, &rocker->cmd_ring, credits); + + return IRQ_HANDLED; +} + +static void rocker_port_link_up(const struct rocker_port *rocker_port) +{ + netif_carrier_on(rocker_port->dev); + netdev_info(rocker_port->dev, "Link is up\n"); +} + +static void rocker_port_link_down(const struct rocker_port *rocker_port) +{ + netif_carrier_off(rocker_port->dev); + netdev_info(rocker_port->dev, "Link is down\n"); +} + +static int rocker_event_link_change(const struct rocker *rocker, + const struct rocker_tlv *info) +{ + const struct rocker_tlv *attrs[ROCKER_TLV_EVENT_LINK_CHANGED_MAX + 1]; + unsigned int port_number; + bool link_up; + struct rocker_port *rocker_port; + + rocker_tlv_parse_nested(attrs, ROCKER_TLV_EVENT_LINK_CHANGED_MAX, info); + if (!attrs[ROCKER_TLV_EVENT_LINK_CHANGED_PPORT] || + !attrs[ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP]) + return -EIO; + port_number = + rocker_tlv_get_u32(attrs[ROCKER_TLV_EVENT_LINK_CHANGED_PPORT]) - 1; + link_up = rocker_tlv_get_u8(attrs[ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP]); + + if (port_number >= rocker->port_count) + return -EINVAL; + + rocker_port = rocker->ports[port_number]; + if (netif_carrier_ok(rocker_port->dev) != link_up) { + if (link_up) + rocker_port_link_up(rocker_port); + else + rocker_port_link_down(rocker_port); + } + + return 0; +} + +static int rocker_world_port_ev_mac_vlan_seen(struct rocker_port *rocker_port, + const unsigned char *addr, + __be16 vlan_id); + +static int rocker_event_mac_vlan_seen(const struct rocker *rocker, + const struct rocker_tlv *info) +{ + const struct rocker_tlv *attrs[ROCKER_TLV_EVENT_MAC_VLAN_MAX + 1]; + unsigned int port_number; + struct rocker_port *rocker_port; + const unsigned char *addr; + __be16 vlan_id; + + rocker_tlv_parse_nested(attrs, ROCKER_TLV_EVENT_MAC_VLAN_MAX, info); + if (!attrs[ROCKER_TLV_EVENT_MAC_VLAN_PPORT] || + !attrs[ROCKER_TLV_EVENT_MAC_VLAN_MAC] || + !attrs[ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID]) + return -EIO; + port_number = + rocker_tlv_get_u32(attrs[ROCKER_TLV_EVENT_MAC_VLAN_PPORT]) - 1; + addr = rocker_tlv_data(attrs[ROCKER_TLV_EVENT_MAC_VLAN_MAC]); + vlan_id = rocker_tlv_get_be16(attrs[ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID]); + + if (port_number >= rocker->port_count) + return -EINVAL; + + rocker_port = rocker->ports[port_number]; + return rocker_world_port_ev_mac_vlan_seen(rocker_port, addr, vlan_id); +} + +static int rocker_event_process(const struct rocker *rocker, + const struct rocker_desc_info *desc_info) +{ + const struct rocker_tlv *attrs[ROCKER_TLV_EVENT_MAX + 1]; + const struct rocker_tlv *info; + u16 type; + + rocker_tlv_parse_desc(attrs, ROCKER_TLV_EVENT_MAX, desc_info); + if (!attrs[ROCKER_TLV_EVENT_TYPE] || + !attrs[ROCKER_TLV_EVENT_INFO]) + return -EIO; + + type = rocker_tlv_get_u16(attrs[ROCKER_TLV_EVENT_TYPE]); + info = attrs[ROCKER_TLV_EVENT_INFO]; + + switch (type) { + case ROCKER_TLV_EVENT_TYPE_LINK_CHANGED: + return rocker_event_link_change(rocker, info); + case ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN: + return rocker_event_mac_vlan_seen(rocker, info); + } + + return -EOPNOTSUPP; +} + +static irqreturn_t rocker_event_irq_handler(int irq, void *dev_id) +{ + struct rocker *rocker = dev_id; + const struct pci_dev *pdev = rocker->pdev; + const struct rocker_desc_info *desc_info; + u32 credits = 0; + int err; + + while ((desc_info = rocker_desc_tail_get(&rocker->event_ring))) { + err = rocker_desc_err(desc_info); + if (err) { + dev_err(&pdev->dev, "event desc received with err %d\n", + err); + } else { + err = rocker_event_process(rocker, desc_info); + if (err) + dev_err(&pdev->dev, "event processing failed with err %d\n", + err); + } + rocker_desc_gen_clear(desc_info); + rocker_desc_head_set(rocker, &rocker->event_ring, desc_info); + credits++; + } + rocker_dma_ring_credits_set(rocker, &rocker->event_ring, credits); + + return IRQ_HANDLED; +} + +static irqreturn_t rocker_tx_irq_handler(int irq, void *dev_id) +{ + struct rocker_port *rocker_port = dev_id; + + napi_schedule(&rocker_port->napi_tx); + return IRQ_HANDLED; +} + +static irqreturn_t rocker_rx_irq_handler(int irq, void *dev_id) +{ + struct rocker_port *rocker_port = dev_id; + + napi_schedule(&rocker_port->napi_rx); + return IRQ_HANDLED; +} + +/******************** + * Command interface + ********************/ + +int rocker_cmd_exec(struct rocker_port *rocker_port, bool nowait, + rocker_cmd_prep_cb_t prepare, void *prepare_priv, + rocker_cmd_proc_cb_t process, void *process_priv) +{ + struct rocker *rocker = rocker_port->rocker; + struct rocker_desc_info *desc_info; + struct rocker_wait *wait; + unsigned long lock_flags; + int err; + + spin_lock_irqsave(&rocker->cmd_ring_lock, lock_flags); + + desc_info = rocker_desc_head_get(&rocker->cmd_ring); + if (!desc_info) { + spin_unlock_irqrestore(&rocker->cmd_ring_lock, lock_flags); + return -EAGAIN; + } + + wait = rocker_desc_cookie_ptr_get(desc_info); + rocker_wait_init(wait); + wait->nowait = nowait; + + err = prepare(rocker_port, desc_info, prepare_priv); + if (err) { + spin_unlock_irqrestore(&rocker->cmd_ring_lock, lock_flags); + return err; + } + + rocker_desc_head_set(rocker, &rocker->cmd_ring, desc_info); + + spin_unlock_irqrestore(&rocker->cmd_ring_lock, lock_flags); + + if (nowait) + return 0; + + if (!rocker_wait_event_timeout(wait, HZ / 10)) + return -EIO; + + err = rocker_desc_err(desc_info); + if (err) + return err; + + if (process) + err = process(rocker_port, desc_info, process_priv); + + rocker_desc_gen_clear(desc_info); + return err; +} + +static int +rocker_cmd_get_port_settings_prep(const struct rocker_port *rocker_port, + struct rocker_desc_info *desc_info, + void *priv) +{ + struct rocker_tlv *cmd_info; + + if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, + ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS)) + return -EMSGSIZE; + cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO); + if (!cmd_info) + return -EMSGSIZE; + if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, + rocker_port->pport)) + return -EMSGSIZE; + rocker_tlv_nest_end(desc_info, cmd_info); + return 0; +} + +static int +rocker_cmd_get_port_settings_ethtool_proc(const struct rocker_port *rocker_port, + const struct rocker_desc_info *desc_info, + void *priv) +{ + struct ethtool_cmd *ecmd = priv; + const struct rocker_tlv *attrs[ROCKER_TLV_CMD_MAX + 1]; + const struct rocker_tlv *info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1]; + u32 speed; + u8 duplex; + u8 autoneg; + + rocker_tlv_parse_desc(attrs, ROCKER_TLV_CMD_MAX, desc_info); + if (!attrs[ROCKER_TLV_CMD_INFO]) + return -EIO; + + rocker_tlv_parse_nested(info_attrs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX, + attrs[ROCKER_TLV_CMD_INFO]); + if (!info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] || + !info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] || + !info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) + return -EIO; + + speed = rocker_tlv_get_u32(info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]); + duplex = rocker_tlv_get_u8(info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]); + autoneg = rocker_tlv_get_u8(info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]); + + ecmd->transceiver = XCVR_INTERNAL; + ecmd->supported = SUPPORTED_TP; + ecmd->phy_address = 0xff; + ecmd->port = PORT_TP; + ethtool_cmd_speed_set(ecmd, speed); + ecmd->duplex = duplex ? DUPLEX_FULL : DUPLEX_HALF; + ecmd->autoneg = autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE; + + return 0; +} + +static int +rocker_cmd_get_port_settings_macaddr_proc(const struct rocker_port *rocker_port, + const struct rocker_desc_info *desc_info, + void *priv) +{ + unsigned char *macaddr = priv; + const struct rocker_tlv *attrs[ROCKER_TLV_CMD_MAX + 1]; + const struct rocker_tlv *info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1]; + const struct rocker_tlv *attr; + + rocker_tlv_parse_desc(attrs, ROCKER_TLV_CMD_MAX, desc_info); + if (!attrs[ROCKER_TLV_CMD_INFO]) + return -EIO; + + rocker_tlv_parse_nested(info_attrs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX, + attrs[ROCKER_TLV_CMD_INFO]); + attr = info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]; + if (!attr) + return -EIO; + + if (rocker_tlv_len(attr) != ETH_ALEN) + return -EINVAL; + + ether_addr_copy(macaddr, rocker_tlv_data(attr)); + return 0; +} + +static int +rocker_cmd_get_port_settings_mode_proc(const struct rocker_port *rocker_port, + const struct rocker_desc_info *desc_info, + void *priv) +{ + u8 *p_mode = priv; + const struct rocker_tlv *attrs[ROCKER_TLV_CMD_MAX + 1]; + const struct rocker_tlv *info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1]; + const struct rocker_tlv *attr; + + rocker_tlv_parse_desc(attrs, ROCKER_TLV_CMD_MAX, desc_info); + if (!attrs[ROCKER_TLV_CMD_INFO]) + return -EIO; + + rocker_tlv_parse_nested(info_attrs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX, + attrs[ROCKER_TLV_CMD_INFO]); + attr = info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]; + if (!attr) + return -EIO; + + *p_mode = rocker_tlv_get_u8(info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]); + return 0; +} + +struct port_name { + char *buf; + size_t len; +}; + +static int +rocker_cmd_get_port_settings_phys_name_proc(const struct rocker_port *rocker_port, + const struct rocker_desc_info *desc_info, + void *priv) +{ + const struct rocker_tlv *info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1]; + const struct rocker_tlv *attrs[ROCKER_TLV_CMD_MAX + 1]; + struct port_name *name = priv; + const struct rocker_tlv *attr; + size_t i, j, len; + const char *str; + + rocker_tlv_parse_desc(attrs, ROCKER_TLV_CMD_MAX, desc_info); + if (!attrs[ROCKER_TLV_CMD_INFO]) + return -EIO; + + rocker_tlv_parse_nested(info_attrs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX, + attrs[ROCKER_TLV_CMD_INFO]); + attr = info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME]; + if (!attr) + return -EIO; + + len = min_t(size_t, rocker_tlv_len(attr), name->len); + str = rocker_tlv_data(attr); + + /* make sure name only contains alphanumeric characters */ + for (i = j = 0; i < len; ++i) { + if (isalnum(str[i])) { + name->buf[j] = str[i]; + j++; + } + } + + if (j == 0) + return -EIO; + + name->buf[j] = '\0'; + + return 0; +} + +static int +rocker_cmd_set_port_settings_ethtool_prep(const struct rocker_port *rocker_port, + struct rocker_desc_info *desc_info, + void *priv) +{ + struct ethtool_cmd *ecmd = priv; + struct rocker_tlv *cmd_info; + + if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, + ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS)) + return -EMSGSIZE; + cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO); + if (!cmd_info) + return -EMSGSIZE; + if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, + rocker_port->pport)) + return -EMSGSIZE; + if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, + ethtool_cmd_speed(ecmd))) + return -EMSGSIZE; + if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, + ecmd->duplex)) + return -EMSGSIZE; + if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, + ecmd->autoneg)) + return -EMSGSIZE; + rocker_tlv_nest_end(desc_info, cmd_info); + return 0; +} + +static int +rocker_cmd_set_port_settings_macaddr_prep(const struct rocker_port *rocker_port, + struct rocker_desc_info *desc_info, + void *priv) +{ + const unsigned char *macaddr = priv; + struct rocker_tlv *cmd_info; + + if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, + ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS)) + return -EMSGSIZE; + cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO); + if (!cmd_info) + return -EMSGSIZE; + if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, + rocker_port->pport)) + return -EMSGSIZE; + if (rocker_tlv_put(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR, + ETH_ALEN, macaddr)) + return -EMSGSIZE; + rocker_tlv_nest_end(desc_info, cmd_info); + return 0; +} + +static int +rocker_cmd_set_port_settings_mtu_prep(const struct rocker_port *rocker_port, + struct rocker_desc_info *desc_info, + void *priv) +{ + int mtu = *(int *)priv; + struct rocker_tlv *cmd_info; + + if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, + ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS)) + return -EMSGSIZE; + cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO); + if (!cmd_info) + return -EMSGSIZE; + if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, + rocker_port->pport)) + return -EMSGSIZE; + if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_MTU, + mtu)) + return -EMSGSIZE; + rocker_tlv_nest_end(desc_info, cmd_info); + return 0; +} + +static int +rocker_cmd_set_port_learning_prep(const struct rocker_port *rocker_port, + struct rocker_desc_info *desc_info, + void *priv) +{ + bool learning = *(bool *)priv; + struct rocker_tlv *cmd_info; + + if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, + ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS)) + return -EMSGSIZE; + cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO); + if (!cmd_info) + return -EMSGSIZE; + if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, + rocker_port->pport)) + return -EMSGSIZE; + if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING, + learning)) + return -EMSGSIZE; + rocker_tlv_nest_end(desc_info, cmd_info); + return 0; +} + +static int rocker_cmd_get_port_settings_ethtool(struct rocker_port *rocker_port, + struct ethtool_cmd *ecmd) +{ + return rocker_cmd_exec(rocker_port, false, + rocker_cmd_get_port_settings_prep, NULL, + rocker_cmd_get_port_settings_ethtool_proc, + ecmd); +} + +static int rocker_cmd_get_port_settings_macaddr(struct rocker_port *rocker_port, + unsigned char *macaddr) +{ + return rocker_cmd_exec(rocker_port, false, + rocker_cmd_get_port_settings_prep, NULL, + rocker_cmd_get_port_settings_macaddr_proc, + macaddr); +} + +static int rocker_cmd_get_port_settings_mode(struct rocker_port *rocker_port, + u8 *p_mode) +{ + return rocker_cmd_exec(rocker_port, false, + rocker_cmd_get_port_settings_prep, NULL, + rocker_cmd_get_port_settings_mode_proc, p_mode); +} + +static int rocker_cmd_set_port_settings_ethtool(struct rocker_port *rocker_port, + struct ethtool_cmd *ecmd) +{ + return rocker_cmd_exec(rocker_port, false, + rocker_cmd_set_port_settings_ethtool_prep, + ecmd, NULL, NULL); +} + +static int rocker_cmd_set_port_settings_macaddr(struct rocker_port *rocker_port, + unsigned char *macaddr) +{ + return rocker_cmd_exec(rocker_port, false, + rocker_cmd_set_port_settings_macaddr_prep, + macaddr, NULL, NULL); +} + +static int rocker_cmd_set_port_settings_mtu(struct rocker_port *rocker_port, + int mtu) +{ + return rocker_cmd_exec(rocker_port, false, + rocker_cmd_set_port_settings_mtu_prep, + &mtu, NULL, NULL); +} + +int rocker_port_set_learning(struct rocker_port *rocker_port, + bool learning) +{ + return rocker_cmd_exec(rocker_port, false, + rocker_cmd_set_port_learning_prep, + &learning, NULL, NULL); +} + +/********************** + * Worlds manipulation + **********************/ + +static struct rocker_world_ops *rocker_world_ops[] = { + &rocker_ofdpa_ops, +}; + +#define ROCKER_WORLD_OPS_LEN ARRAY_SIZE(rocker_world_ops) + +static struct rocker_world_ops *rocker_world_ops_find(u8 mode) +{ + int i; + + for (i = 0; i < ROCKER_WORLD_OPS_LEN; i++) + if (rocker_world_ops[i]->mode == mode) + return rocker_world_ops[i]; + return NULL; +} + +static int rocker_world_init(struct rocker *rocker, u8 mode) +{ + struct rocker_world_ops *wops; + int err; + + wops = rocker_world_ops_find(mode); + if (!wops) { + dev_err(&rocker->pdev->dev, "port mode \"%d\" is not supported\n", + mode); + return -EINVAL; + } + rocker->wops = wops; + rocker->wpriv = kzalloc(wops->priv_size, GFP_KERNEL); + if (!rocker->wpriv) + return -ENOMEM; + if (!wops->init) + return 0; + err = wops->init(rocker); + if (err) + kfree(rocker->wpriv); + return err; +} + +static void rocker_world_fini(struct rocker *rocker) +{ + struct rocker_world_ops *wops = rocker->wops; + + if (!wops || !wops->fini) + return; + wops->fini(rocker); + kfree(rocker->wpriv); +} + +static int rocker_world_check_init(struct rocker_port *rocker_port) +{ + struct rocker *rocker = rocker_port->rocker; + u8 mode; + int err; + + err = rocker_cmd_get_port_settings_mode(rocker_port, &mode); + if (err) { + dev_err(&rocker->pdev->dev, "failed to get port mode\n"); + return err; + } + if (rocker->wops) { + if (rocker->wops->mode != mode) { + dev_err(&rocker->pdev->dev, "hardware has ports in different worlds, which is not supported\n"); + return err; + } + return 0; + } + return rocker_world_init(rocker, mode); +} + +static int rocker_world_port_pre_init(struct rocker_port *rocker_port) +{ + struct rocker_world_ops *wops = rocker_port->rocker->wops; + int err; + + rocker_port->wpriv = kzalloc(wops->port_priv_size, GFP_KERNEL); + if (!rocker_port->wpriv) + return -ENOMEM; + if (!wops->port_pre_init) + return 0; + err = wops->port_pre_init(rocker_port); + if (err) + kfree(rocker_port->wpriv); + return 0; +} + +static int rocker_world_port_init(struct rocker_port *rocker_port) +{ + struct rocker_world_ops *wops = rocker_port->rocker->wops; + + if (!wops->port_init) + return 0; + return wops->port_init(rocker_port); +} + +static void rocker_world_port_fini(struct rocker_port *rocker_port) +{ + struct rocker_world_ops *wops = rocker_port->rocker->wops; + + if (!wops->port_fini) + return; + wops->port_fini(rocker_port); +} + +static void rocker_world_port_post_fini(struct rocker_port *rocker_port) +{ + struct rocker_world_ops *wops = rocker_port->rocker->wops; + + if (!wops->port_post_fini) + return; + wops->port_post_fini(rocker_port); + kfree(rocker_port->wpriv); +} + +static int rocker_world_port_open(struct rocker_port *rocker_port) +{ + struct rocker_world_ops *wops = rocker_port->rocker->wops; + + if (!wops->port_open) + return 0; + return wops->port_open(rocker_port); +} + +static void rocker_world_port_stop(struct rocker_port *rocker_port) +{ + struct rocker_world_ops *wops = rocker_port->rocker->wops; + + if (!wops->port_stop) + return; + wops->port_stop(rocker_port); +} + +static int rocker_world_port_attr_stp_state_set(struct rocker_port *rocker_port, + u8 state, + struct switchdev_trans *trans) +{ + struct rocker_world_ops *wops = rocker_port->rocker->wops; + + if (!wops->port_attr_stp_state_set) + return -EOPNOTSUPP; + return wops->port_attr_stp_state_set(rocker_port, state, trans); +} + +static int +rocker_world_port_attr_bridge_flags_set(struct rocker_port *rocker_port, + unsigned long brport_flags, + struct switchdev_trans *trans) +{ + struct rocker_world_ops *wops = rocker_port->rocker->wops; + + if (!wops->port_attr_bridge_flags_set) + return -EOPNOTSUPP; + return wops->port_attr_bridge_flags_set(rocker_port, brport_flags, + trans); +} + +static int +rocker_world_port_attr_bridge_flags_get(const struct rocker_port *rocker_port, + unsigned long *p_brport_flags) +{ + struct rocker_world_ops *wops = rocker_port->rocker->wops; + + if (!wops->port_attr_bridge_flags_get) + return -EOPNOTSUPP; + return wops->port_attr_bridge_flags_get(rocker_port, p_brport_flags); +} + +static int +rocker_world_port_attr_bridge_ageing_time_set(struct rocker_port *rocker_port, + u32 ageing_time, + struct switchdev_trans *trans) + +{ + struct rocker_world_ops *wops = rocker_port->rocker->wops; + + if (!wops->port_attr_bridge_ageing_time_set) + return -EOPNOTSUPP; + return wops->port_attr_bridge_ageing_time_set(rocker_port, ageing_time, + trans); +} + +static int +rocker_world_port_obj_vlan_add(struct rocker_port *rocker_port, + const struct switchdev_obj_port_vlan *vlan, + struct switchdev_trans *trans) +{ + struct rocker_world_ops *wops = rocker_port->rocker->wops; + + if (!wops->port_obj_vlan_add) + return -EOPNOTSUPP; + return 0; + return wops->port_obj_vlan_add(rocker_port, vlan, trans); +} + +static int +rocker_world_port_obj_vlan_del(struct rocker_port *rocker_port, + const struct switchdev_obj_port_vlan *vlan) +{ + struct rocker_world_ops *wops = rocker_port->rocker->wops; + + if (!wops->port_obj_vlan_del) + return -EOPNOTSUPP; + return wops->port_obj_vlan_del(rocker_port, vlan); +} + +static int +rocker_world_port_obj_vlan_dump(const struct rocker_port *rocker_port, + struct switchdev_obj_port_vlan *vlan, + switchdev_obj_dump_cb_t *cb) +{ + struct rocker_world_ops *wops = rocker_port->rocker->wops; + + if (!wops->port_obj_vlan_dump) + return -EOPNOTSUPP; + return wops->port_obj_vlan_dump(rocker_port, vlan, cb); +} + +static int +rocker_world_port_obj_fib4_add(struct rocker_port *rocker_port, + const struct switchdev_obj_ipv4_fib *fib4, + struct switchdev_trans *trans) +{ + struct rocker_world_ops *wops = rocker_port->rocker->wops; + + if (!wops->port_obj_fib4_add) + return -EOPNOTSUPP; + return wops->port_obj_fib4_add(rocker_port, fib4, trans); +} + +static int +rocker_world_port_obj_fib4_del(struct rocker_port *rocker_port, + const struct switchdev_obj_ipv4_fib *fib4) +{ + struct rocker_world_ops *wops = rocker_port->rocker->wops; + + if (!wops->port_obj_fib4_del) + return -EOPNOTSUPP; + return wops->port_obj_fib4_del(rocker_port, fib4); +} + +static int +rocker_world_port_obj_fdb_add(struct rocker_port *rocker_port, + const struct switchdev_obj_port_fdb *fdb, + struct switchdev_trans *trans) +{ + struct rocker_world_ops *wops = rocker_port->rocker->wops; + + if (!wops->port_obj_fdb_add) + return -EOPNOTSUPP; + return wops->port_obj_fdb_add(rocker_port, fdb, trans); +} + +static int +rocker_world_port_obj_fdb_del(struct rocker_port *rocker_port, + const struct switchdev_obj_port_fdb *fdb) +{ + struct rocker_world_ops *wops = rocker_port->rocker->wops; + + if (!wops->port_obj_fdb_del) + return -EOPNOTSUPP; + return wops->port_obj_fdb_del(rocker_port, fdb); +} + +static int +rocker_world_port_obj_fdb_dump(const struct rocker_port *rocker_port, + struct switchdev_obj_port_fdb *fdb, + switchdev_obj_dump_cb_t *cb) +{ + struct rocker_world_ops *wops = rocker_port->rocker->wops; + + if (!wops->port_obj_fdb_dump) + return -EOPNOTSUPP; + return wops->port_obj_fdb_dump(rocker_port, fdb, cb); +} + +static int rocker_world_port_master_linked(struct rocker_port *rocker_port, + struct net_device *master) +{ + struct rocker_world_ops *wops = rocker_port->rocker->wops; + + if (!wops->port_master_linked) + return -EOPNOTSUPP; + return wops->port_master_linked(rocker_port, master); +} + +static int rocker_world_port_master_unlinked(struct rocker_port *rocker_port, + struct net_device *master) +{ + struct rocker_world_ops *wops = rocker_port->rocker->wops; + + if (!wops->port_master_unlinked) + return -EOPNOTSUPP; + return wops->port_master_unlinked(rocker_port, master); +} + +static int rocker_world_port_neigh_update(struct rocker_port *rocker_port, + struct neighbour *n) +{ + struct rocker_world_ops *wops = rocker_port->rocker->wops; + + if (!wops->port_neigh_update) + return -EOPNOTSUPP; + return wops->port_neigh_update(rocker_port, n); +} + +static int rocker_world_port_neigh_destroy(struct rocker_port *rocker_port, + struct neighbour *n) +{ + struct rocker_world_ops *wops = rocker_port->rocker->wops; + + if (!wops->port_neigh_destroy) + return -EOPNOTSUPP; + return wops->port_neigh_destroy(rocker_port, n); +} + +static int rocker_world_port_ev_mac_vlan_seen(struct rocker_port *rocker_port, + const unsigned char *addr, + __be16 vlan_id) +{ + struct rocker_world_ops *wops = rocker_port->rocker->wops; + + if (!wops->port_ev_mac_vlan_seen) + return -EOPNOTSUPP; + return wops->port_ev_mac_vlan_seen(rocker_port, addr, vlan_id); +} + +/***************** + * Net device ops + *****************/ + +static int rocker_port_open(struct net_device *dev) +{ + struct rocker_port *rocker_port = netdev_priv(dev); + int err; + + err = rocker_port_dma_rings_init(rocker_port); + if (err) + return err; + + err = request_irq(rocker_msix_tx_vector(rocker_port), + rocker_tx_irq_handler, 0, + rocker_driver_name, rocker_port); + if (err) { + netdev_err(rocker_port->dev, "cannot assign tx irq\n"); + goto err_request_tx_irq; + } + + err = request_irq(rocker_msix_rx_vector(rocker_port), + rocker_rx_irq_handler, 0, + rocker_driver_name, rocker_port); + if (err) { + netdev_err(rocker_port->dev, "cannot assign rx irq\n"); + goto err_request_rx_irq; + } + + err = rocker_world_port_open(rocker_port); + if (err) { + netdev_err(rocker_port->dev, "cannot open port in world\n"); + goto err_world_port_open; + } + + napi_enable(&rocker_port->napi_tx); + napi_enable(&rocker_port->napi_rx); + if (!dev->proto_down) + rocker_port_set_enable(rocker_port, true); + netif_start_queue(dev); + return 0; + +err_world_port_open: + free_irq(rocker_msix_rx_vector(rocker_port), rocker_port); +err_request_rx_irq: + free_irq(rocker_msix_tx_vector(rocker_port), rocker_port); +err_request_tx_irq: + rocker_port_dma_rings_fini(rocker_port); + return err; +} + +static int rocker_port_stop(struct net_device *dev) +{ + struct rocker_port *rocker_port = netdev_priv(dev); + + netif_stop_queue(dev); + rocker_port_set_enable(rocker_port, false); + napi_disable(&rocker_port->napi_rx); + napi_disable(&rocker_port->napi_tx); + rocker_world_port_stop(rocker_port); + free_irq(rocker_msix_rx_vector(rocker_port), rocker_port); + free_irq(rocker_msix_tx_vector(rocker_port), rocker_port); + rocker_port_dma_rings_fini(rocker_port); + + return 0; +} + +static void rocker_tx_desc_frags_unmap(const struct rocker_port *rocker_port, + const struct rocker_desc_info *desc_info) +{ + const struct rocker *rocker = rocker_port->rocker; + struct pci_dev *pdev = rocker->pdev; + const struct rocker_tlv *attrs[ROCKER_TLV_TX_MAX + 1]; + struct rocker_tlv *attr; + int rem; + + rocker_tlv_parse_desc(attrs, ROCKER_TLV_TX_MAX, desc_info); + if (!attrs[ROCKER_TLV_TX_FRAGS]) + return; + rocker_tlv_for_each_nested(attr, attrs[ROCKER_TLV_TX_FRAGS], rem) { + const struct rocker_tlv *frag_attrs[ROCKER_TLV_TX_FRAG_ATTR_MAX + 1]; + dma_addr_t dma_handle; + size_t len; + + if (rocker_tlv_type(attr) != ROCKER_TLV_TX_FRAG) + continue; + rocker_tlv_parse_nested(frag_attrs, ROCKER_TLV_TX_FRAG_ATTR_MAX, + attr); + if (!frag_attrs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] || + !frag_attrs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) + continue; + dma_handle = rocker_tlv_get_u64(frag_attrs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]); + len = rocker_tlv_get_u16(frag_attrs[ROCKER_TLV_TX_FRAG_ATTR_LEN]); + pci_unmap_single(pdev, dma_handle, len, DMA_TO_DEVICE); + } +} + +static int rocker_tx_desc_frag_map_put(const struct rocker_port *rocker_port, + struct rocker_desc_info *desc_info, + char *buf, size_t buf_len) +{ + const struct rocker *rocker = rocker_port->rocker; + struct pci_dev *pdev = rocker->pdev; + dma_addr_t dma_handle; + struct rocker_tlv *frag; + + dma_handle = pci_map_single(pdev, buf, buf_len, DMA_TO_DEVICE); + if (unlikely(pci_dma_mapping_error(pdev, dma_handle))) { + if (net_ratelimit()) + netdev_err(rocker_port->dev, "failed to dma map tx frag\n"); + return -EIO; + } + frag = rocker_tlv_nest_start(desc_info, ROCKER_TLV_TX_FRAG); + if (!frag) + goto unmap_frag; + if (rocker_tlv_put_u64(desc_info, ROCKER_TLV_TX_FRAG_ATTR_ADDR, + dma_handle)) + goto nest_cancel; + if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_TX_FRAG_ATTR_LEN, + buf_len)) + goto nest_cancel; + rocker_tlv_nest_end(desc_info, frag); + return 0; + +nest_cancel: + rocker_tlv_nest_cancel(desc_info, frag); +unmap_frag: + pci_unmap_single(pdev, dma_handle, buf_len, DMA_TO_DEVICE); + return -EMSGSIZE; +} + +static netdev_tx_t rocker_port_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct rocker_port *rocker_port = netdev_priv(dev); + struct rocker *rocker = rocker_port->rocker; + struct rocker_desc_info *desc_info; + struct rocker_tlv *frags; + int i; + int err; + + desc_info = rocker_desc_head_get(&rocker_port->tx_ring); + if (unlikely(!desc_info)) { + if (net_ratelimit()) + netdev_err(dev, "tx ring full when queue awake\n"); + return NETDEV_TX_BUSY; + } + + rocker_desc_cookie_ptr_set(desc_info, skb); + + frags = rocker_tlv_nest_start(desc_info, ROCKER_TLV_TX_FRAGS); + if (!frags) + goto out; + err = rocker_tx_desc_frag_map_put(rocker_port, desc_info, + skb->data, skb_headlen(skb)); + if (err) + goto nest_cancel; + if (skb_shinfo(skb)->nr_frags > ROCKER_TX_FRAGS_MAX) { + err = skb_linearize(skb); + if (err) + goto unmap_frags; + } + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + err = rocker_tx_desc_frag_map_put(rocker_port, desc_info, + skb_frag_address(frag), + skb_frag_size(frag)); + if (err) + goto unmap_frags; + } + rocker_tlv_nest_end(desc_info, frags); + + rocker_desc_gen_clear(desc_info); + rocker_desc_head_set(rocker, &rocker_port->tx_ring, desc_info); + + desc_info = rocker_desc_head_get(&rocker_port->tx_ring); + if (!desc_info) + netif_stop_queue(dev); + + return NETDEV_TX_OK; + +unmap_frags: + rocker_tx_desc_frags_unmap(rocker_port, desc_info); +nest_cancel: + rocker_tlv_nest_cancel(desc_info, frags); +out: + dev_kfree_skb(skb); + dev->stats.tx_dropped++; + + return NETDEV_TX_OK; +} + +static int rocker_port_set_mac_address(struct net_device *dev, void *p) +{ + struct sockaddr *addr = p; + struct rocker_port *rocker_port = netdev_priv(dev); + int err; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + err = rocker_cmd_set_port_settings_macaddr(rocker_port, addr->sa_data); + if (err) + return err; + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + return 0; +} + +static int rocker_port_change_mtu(struct net_device *dev, int new_mtu) +{ + struct rocker_port *rocker_port = netdev_priv(dev); + int running = netif_running(dev); + int err; + +#define ROCKER_PORT_MIN_MTU 68 +#define ROCKER_PORT_MAX_MTU 9000 + + if (new_mtu < ROCKER_PORT_MIN_MTU || new_mtu > ROCKER_PORT_MAX_MTU) + return -EINVAL; + + if (running) + rocker_port_stop(dev); + + netdev_info(dev, "MTU change from %d to %d\n", dev->mtu, new_mtu); + dev->mtu = new_mtu; + + err = rocker_cmd_set_port_settings_mtu(rocker_port, new_mtu); + if (err) + return err; + + if (running) + err = rocker_port_open(dev); + + return err; +} + +static int rocker_port_get_phys_port_name(struct net_device *dev, + char *buf, size_t len) +{ + struct rocker_port *rocker_port = netdev_priv(dev); + struct port_name name = { .buf = buf, .len = len }; + int err; + + err = rocker_cmd_exec(rocker_port, false, + rocker_cmd_get_port_settings_prep, NULL, + rocker_cmd_get_port_settings_phys_name_proc, + &name); + + return err ? -EOPNOTSUPP : 0; +} + +static int rocker_port_change_proto_down(struct net_device *dev, + bool proto_down) +{ + struct rocker_port *rocker_port = netdev_priv(dev); + + if (rocker_port->dev->flags & IFF_UP) + rocker_port_set_enable(rocker_port, !proto_down); + rocker_port->dev->proto_down = proto_down; + return 0; +} + +static void rocker_port_neigh_destroy(struct neighbour *n) +{ + struct rocker_port *rocker_port = netdev_priv(n->dev); + int err; + + err = rocker_world_port_neigh_destroy(rocker_port, n); + if (err) + netdev_warn(rocker_port->dev, "failed to handle neigh destroy (err %d)\n", + err); +} + +static const struct net_device_ops rocker_port_netdev_ops = { + .ndo_open = rocker_port_open, + .ndo_stop = rocker_port_stop, + .ndo_start_xmit = rocker_port_xmit, + .ndo_set_mac_address = rocker_port_set_mac_address, + .ndo_change_mtu = rocker_port_change_mtu, + .ndo_bridge_getlink = switchdev_port_bridge_getlink, + .ndo_bridge_setlink = switchdev_port_bridge_setlink, + .ndo_bridge_dellink = switchdev_port_bridge_dellink, + .ndo_fdb_add = switchdev_port_fdb_add, + .ndo_fdb_del = switchdev_port_fdb_del, + .ndo_fdb_dump = switchdev_port_fdb_dump, + .ndo_get_phys_port_name = rocker_port_get_phys_port_name, + .ndo_change_proto_down = rocker_port_change_proto_down, + .ndo_neigh_destroy = rocker_port_neigh_destroy, +}; + +/******************** + * swdev interface + ********************/ + +static int rocker_port_attr_get(struct net_device *dev, + struct switchdev_attr *attr) +{ + const struct rocker_port *rocker_port = netdev_priv(dev); + const struct rocker *rocker = rocker_port->rocker; + int err = 0; + + switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: + attr->u.ppid.id_len = sizeof(rocker->hw.id); + memcpy(&attr->u.ppid.id, &rocker->hw.id, attr->u.ppid.id_len); + break; + case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: + err = rocker_world_port_attr_bridge_flags_get(rocker_port, + &attr->u.brport_flags); + break; + default: + return -EOPNOTSUPP; + } + + return err; +} + +static int rocker_port_attr_set(struct net_device *dev, + const struct switchdev_attr *attr, + struct switchdev_trans *trans) +{ + struct rocker_port *rocker_port = netdev_priv(dev); + int err = 0; + + switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_STP_STATE: + err = rocker_world_port_attr_stp_state_set(rocker_port, + attr->u.stp_state, + trans); + break; + case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: + err = rocker_world_port_attr_bridge_flags_set(rocker_port, + attr->u.brport_flags, + trans); + break; + case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: + err = rocker_world_port_attr_bridge_ageing_time_set(rocker_port, + attr->u.ageing_time, + trans); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int rocker_port_obj_add(struct net_device *dev, + const struct switchdev_obj *obj, + struct switchdev_trans *trans) +{ + struct rocker_port *rocker_port = netdev_priv(dev); + int err = 0; + + switch (obj->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + err = rocker_world_port_obj_vlan_add(rocker_port, + SWITCHDEV_OBJ_PORT_VLAN(obj), + trans); + break; + case SWITCHDEV_OBJ_ID_IPV4_FIB: + err = rocker_world_port_obj_fib4_add(rocker_port, + SWITCHDEV_OBJ_IPV4_FIB(obj), + trans); + break; + case SWITCHDEV_OBJ_ID_PORT_FDB: + err = rocker_world_port_obj_fdb_add(rocker_port, + SWITCHDEV_OBJ_PORT_FDB(obj), + trans); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int rocker_port_obj_del(struct net_device *dev, + const struct switchdev_obj *obj) +{ + struct rocker_port *rocker_port = netdev_priv(dev); + int err = 0; + + switch (obj->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + err = rocker_world_port_obj_vlan_del(rocker_port, + SWITCHDEV_OBJ_PORT_VLAN(obj)); + break; + case SWITCHDEV_OBJ_ID_IPV4_FIB: + err = rocker_world_port_obj_fib4_del(rocker_port, + SWITCHDEV_OBJ_IPV4_FIB(obj)); + break; + case SWITCHDEV_OBJ_ID_PORT_FDB: + err = rocker_world_port_obj_fdb_del(rocker_port, + SWITCHDEV_OBJ_PORT_FDB(obj)); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int rocker_port_obj_dump(struct net_device *dev, + struct switchdev_obj *obj, + switchdev_obj_dump_cb_t *cb) +{ + const struct rocker_port *rocker_port = netdev_priv(dev); + int err = 0; + + switch (obj->id) { + case SWITCHDEV_OBJ_ID_PORT_FDB: + err = rocker_world_port_obj_fdb_dump(rocker_port, + SWITCHDEV_OBJ_PORT_FDB(obj), + cb); + break; + case SWITCHDEV_OBJ_ID_PORT_VLAN: + err = rocker_world_port_obj_vlan_dump(rocker_port, + SWITCHDEV_OBJ_PORT_VLAN(obj), + cb); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static const struct switchdev_ops rocker_port_switchdev_ops = { + .switchdev_port_attr_get = rocker_port_attr_get, + .switchdev_port_attr_set = rocker_port_attr_set, + .switchdev_port_obj_add = rocker_port_obj_add, + .switchdev_port_obj_del = rocker_port_obj_del, + .switchdev_port_obj_dump = rocker_port_obj_dump, +}; + +/******************** + * ethtool interface + ********************/ + +static int rocker_port_get_settings(struct net_device *dev, + struct ethtool_cmd *ecmd) +{ + struct rocker_port *rocker_port = netdev_priv(dev); + + return rocker_cmd_get_port_settings_ethtool(rocker_port, ecmd); +} + +static int rocker_port_set_settings(struct net_device *dev, + struct ethtool_cmd *ecmd) +{ + struct rocker_port *rocker_port = netdev_priv(dev); + + return rocker_cmd_set_port_settings_ethtool(rocker_port, ecmd); +} + +static void rocker_port_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + strlcpy(drvinfo->driver, rocker_driver_name, sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, UTS_RELEASE, sizeof(drvinfo->version)); +} + +static struct rocker_port_stats { + char str[ETH_GSTRING_LEN]; + int type; +} rocker_port_stats[] = { + { "rx_packets", ROCKER_TLV_CMD_PORT_STATS_RX_PKTS, }, + { "rx_bytes", ROCKER_TLV_CMD_PORT_STATS_RX_BYTES, }, + { "rx_dropped", ROCKER_TLV_CMD_PORT_STATS_RX_DROPPED, }, + { "rx_errors", ROCKER_TLV_CMD_PORT_STATS_RX_ERRORS, }, + + { "tx_packets", ROCKER_TLV_CMD_PORT_STATS_TX_PKTS, }, + { "tx_bytes", ROCKER_TLV_CMD_PORT_STATS_TX_BYTES, }, + { "tx_dropped", ROCKER_TLV_CMD_PORT_STATS_TX_DROPPED, }, + { "tx_errors", ROCKER_TLV_CMD_PORT_STATS_TX_ERRORS, }, +}; + +#define ROCKER_PORT_STATS_LEN ARRAY_SIZE(rocker_port_stats) + +static void rocker_port_get_strings(struct net_device *netdev, u32 stringset, + u8 *data) +{ + u8 *p = data; + int i; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < ARRAY_SIZE(rocker_port_stats); i++) { + memcpy(p, rocker_port_stats[i].str, ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + break; + } +} + +static int +rocker_cmd_get_port_stats_prep(const struct rocker_port *rocker_port, + struct rocker_desc_info *desc_info, + void *priv) +{ + struct rocker_tlv *cmd_stats; + + if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, + ROCKER_TLV_CMD_TYPE_GET_PORT_STATS)) + return -EMSGSIZE; + + cmd_stats = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO); + if (!cmd_stats) + return -EMSGSIZE; + + if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_STATS_PPORT, + rocker_port->pport)) + return -EMSGSIZE; + + rocker_tlv_nest_end(desc_info, cmd_stats); + + return 0; +} + +static int +rocker_cmd_get_port_stats_ethtool_proc(const struct rocker_port *rocker_port, + const struct rocker_desc_info *desc_info, + void *priv) +{ + const struct rocker_tlv *attrs[ROCKER_TLV_CMD_MAX + 1]; + const struct rocker_tlv *stats_attrs[ROCKER_TLV_CMD_PORT_STATS_MAX + 1]; + const struct rocker_tlv *pattr; + u32 pport; + u64 *data = priv; + int i; + + rocker_tlv_parse_desc(attrs, ROCKER_TLV_CMD_MAX, desc_info); + + if (!attrs[ROCKER_TLV_CMD_INFO]) + return -EIO; + + rocker_tlv_parse_nested(stats_attrs, ROCKER_TLV_CMD_PORT_STATS_MAX, + attrs[ROCKER_TLV_CMD_INFO]); + + if (!stats_attrs[ROCKER_TLV_CMD_PORT_STATS_PPORT]) + return -EIO; + + pport = rocker_tlv_get_u32(stats_attrs[ROCKER_TLV_CMD_PORT_STATS_PPORT]); + if (pport != rocker_port->pport) + return -EIO; + + for (i = 0; i < ARRAY_SIZE(rocker_port_stats); i++) { + pattr = stats_attrs[rocker_port_stats[i].type]; + if (!pattr) + continue; + + data[i] = rocker_tlv_get_u64(pattr); + } + + return 0; +} + +static int rocker_cmd_get_port_stats_ethtool(struct rocker_port *rocker_port, + void *priv) +{ + return rocker_cmd_exec(rocker_port, false, + rocker_cmd_get_port_stats_prep, NULL, + rocker_cmd_get_port_stats_ethtool_proc, + priv); +} + +static void rocker_port_get_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct rocker_port *rocker_port = netdev_priv(dev); + + if (rocker_cmd_get_port_stats_ethtool(rocker_port, data) != 0) { + int i; + + for (i = 0; i < ARRAY_SIZE(rocker_port_stats); ++i) + data[i] = 0; + } +} + +static int rocker_port_get_sset_count(struct net_device *netdev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return ROCKER_PORT_STATS_LEN; + default: + return -EOPNOTSUPP; + } +} + +static const struct ethtool_ops rocker_port_ethtool_ops = { + .get_settings = rocker_port_get_settings, + .set_settings = rocker_port_set_settings, + .get_drvinfo = rocker_port_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = rocker_port_get_strings, + .get_ethtool_stats = rocker_port_get_stats, + .get_sset_count = rocker_port_get_sset_count, +}; + +/***************** + * NAPI interface + *****************/ + +static struct rocker_port *rocker_port_napi_tx_get(struct napi_struct *napi) +{ + return container_of(napi, struct rocker_port, napi_tx); +} + +static int rocker_port_poll_tx(struct napi_struct *napi, int budget) +{ + struct rocker_port *rocker_port = rocker_port_napi_tx_get(napi); + const struct rocker *rocker = rocker_port->rocker; + const struct rocker_desc_info *desc_info; + u32 credits = 0; + int err; + + /* Cleanup tx descriptors */ + while ((desc_info = rocker_desc_tail_get(&rocker_port->tx_ring))) { + struct sk_buff *skb; + + err = rocker_desc_err(desc_info); + if (err && net_ratelimit()) + netdev_err(rocker_port->dev, "tx desc received with err %d\n", + err); + rocker_tx_desc_frags_unmap(rocker_port, desc_info); + + skb = rocker_desc_cookie_ptr_get(desc_info); + if (err == 0) { + rocker_port->dev->stats.tx_packets++; + rocker_port->dev->stats.tx_bytes += skb->len; + } else { + rocker_port->dev->stats.tx_errors++; + } + + dev_kfree_skb_any(skb); + credits++; + } + + if (credits && netif_queue_stopped(rocker_port->dev)) + netif_wake_queue(rocker_port->dev); + + napi_complete(napi); + rocker_dma_ring_credits_set(rocker, &rocker_port->tx_ring, credits); + + return 0; +} + +static int rocker_port_rx_proc(const struct rocker *rocker, + const struct rocker_port *rocker_port, + struct rocker_desc_info *desc_info) +{ + const struct rocker_tlv *attrs[ROCKER_TLV_RX_MAX + 1]; + struct sk_buff *skb = rocker_desc_cookie_ptr_get(desc_info); + size_t rx_len; + u16 rx_flags = 0; + + if (!skb) + return -ENOENT; + + rocker_tlv_parse_desc(attrs, ROCKER_TLV_RX_MAX, desc_info); + if (!attrs[ROCKER_TLV_RX_FRAG_LEN]) + return -EINVAL; + if (attrs[ROCKER_TLV_RX_FLAGS]) + rx_flags = rocker_tlv_get_u16(attrs[ROCKER_TLV_RX_FLAGS]); + + rocker_dma_rx_ring_skb_unmap(rocker, attrs); + + rx_len = rocker_tlv_get_u16(attrs[ROCKER_TLV_RX_FRAG_LEN]); + skb_put(skb, rx_len); + skb->protocol = eth_type_trans(skb, rocker_port->dev); + + if (rx_flags & ROCKER_RX_FLAGS_FWD_OFFLOAD) + skb->offload_fwd_mark = rocker_port->dev->offload_fwd_mark; + + rocker_port->dev->stats.rx_packets++; + rocker_port->dev->stats.rx_bytes += skb->len; + + netif_receive_skb(skb); + + return rocker_dma_rx_ring_skb_alloc(rocker_port, desc_info); +} + +static struct rocker_port *rocker_port_napi_rx_get(struct napi_struct *napi) +{ + return container_of(napi, struct rocker_port, napi_rx); +} + +static int rocker_port_poll_rx(struct napi_struct *napi, int budget) +{ + struct rocker_port *rocker_port = rocker_port_napi_rx_get(napi); + const struct rocker *rocker = rocker_port->rocker; + struct rocker_desc_info *desc_info; + u32 credits = 0; + int err; + + /* Process rx descriptors */ + while (credits < budget && + (desc_info = rocker_desc_tail_get(&rocker_port->rx_ring))) { + err = rocker_desc_err(desc_info); + if (err) { + if (net_ratelimit()) + netdev_err(rocker_port->dev, "rx desc received with err %d\n", + err); + } else { + err = rocker_port_rx_proc(rocker, rocker_port, + desc_info); + if (err && net_ratelimit()) + netdev_err(rocker_port->dev, "rx processing failed with err %d\n", + err); + } + if (err) + rocker_port->dev->stats.rx_errors++; + + rocker_desc_gen_clear(desc_info); + rocker_desc_head_set(rocker, &rocker_port->rx_ring, desc_info); + credits++; + } + + if (credits < budget) + napi_complete(napi); + + rocker_dma_ring_credits_set(rocker, &rocker_port->rx_ring, credits); + + return credits; +} + +/***************** + * PCI driver ops + *****************/ + +static void rocker_carrier_init(const struct rocker_port *rocker_port) +{ + const struct rocker *rocker = rocker_port->rocker; + u64 link_status = rocker_read64(rocker, PORT_PHYS_LINK_STATUS); + bool link_up; + + link_up = link_status & (1 << rocker_port->pport); + if (link_up) + netif_carrier_on(rocker_port->dev); + else + netif_carrier_off(rocker_port->dev); +} + +static void rocker_remove_ports(struct rocker *rocker) +{ + struct rocker_port *rocker_port; + int i; + + for (i = 0; i < rocker->port_count; i++) { + rocker_port = rocker->ports[i]; + if (!rocker_port) + continue; + rocker_world_port_fini(rocker_port); + unregister_netdev(rocker_port->dev); + rocker_world_port_post_fini(rocker_port); + free_netdev(rocker_port->dev); + } + rocker_world_fini(rocker); + kfree(rocker->ports); +} + +static void rocker_port_dev_addr_init(struct rocker_port *rocker_port) +{ + const struct rocker *rocker = rocker_port->rocker; + const struct pci_dev *pdev = rocker->pdev; + int err; + + err = rocker_cmd_get_port_settings_macaddr(rocker_port, + rocker_port->dev->dev_addr); + if (err) { + dev_warn(&pdev->dev, "failed to get mac address, using random\n"); + eth_hw_addr_random(rocker_port->dev); + } +} + +static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) +{ + const struct pci_dev *pdev = rocker->pdev; + struct rocker_port *rocker_port; + struct net_device *dev; + int err; + + dev = alloc_etherdev(sizeof(struct rocker_port)); + if (!dev) + return -ENOMEM; + rocker_port = netdev_priv(dev); + rocker_port->dev = dev; + rocker_port->rocker = rocker; + rocker_port->port_number = port_number; + rocker_port->pport = port_number + 1; + + err = rocker_world_check_init(rocker_port); + if (err) { + dev_err(&pdev->dev, "world init failed\n"); + goto err_world_check_init; + } + + rocker_port_dev_addr_init(rocker_port); + dev->netdev_ops = &rocker_port_netdev_ops; + dev->ethtool_ops = &rocker_port_ethtool_ops; + dev->switchdev_ops = &rocker_port_switchdev_ops; + netif_tx_napi_add(dev, &rocker_port->napi_tx, rocker_port_poll_tx, + NAPI_POLL_WEIGHT); + netif_napi_add(dev, &rocker_port->napi_rx, rocker_port_poll_rx, + NAPI_POLL_WEIGHT); + rocker_carrier_init(rocker_port); + + dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_SG; + + err = rocker_world_port_pre_init(rocker_port); + if (err) { + dev_err(&pdev->dev, "port world pre-init failed\n"); + goto err_world_port_pre_init; + } + err = register_netdev(dev); + if (err) { + dev_err(&pdev->dev, "register_netdev failed\n"); + goto err_register_netdev; + } + rocker->ports[port_number] = rocker_port; + + err = rocker_world_port_init(rocker_port); + if (err) { + dev_err(&pdev->dev, "port world init failed\n"); + goto err_world_port_init; + } + + return 0; + +err_world_port_init: + rocker->ports[port_number] = NULL; + unregister_netdev(dev); +err_register_netdev: + rocker_world_port_post_fini(rocker_port); +err_world_port_pre_init: +err_world_check_init: + free_netdev(dev); + return err; +} + +static int rocker_probe_ports(struct rocker *rocker) +{ + int i; + size_t alloc_size; + int err; + + alloc_size = sizeof(struct rocker_port *) * rocker->port_count; + rocker->ports = kzalloc(alloc_size, GFP_KERNEL); + if (!rocker->ports) + return -ENOMEM; + for (i = 0; i < rocker->port_count; i++) { + err = rocker_probe_port(rocker, i); + if (err) + goto remove_ports; + } + return 0; + +remove_ports: + rocker_remove_ports(rocker); + return err; +} + +static int rocker_msix_init(struct rocker *rocker) +{ + struct pci_dev *pdev = rocker->pdev; + int msix_entries; + int i; + int err; + + msix_entries = pci_msix_vec_count(pdev); + if (msix_entries < 0) + return msix_entries; + + if (msix_entries != ROCKER_MSIX_VEC_COUNT(rocker->port_count)) + return -EINVAL; + + rocker->msix_entries = kmalloc_array(msix_entries, + sizeof(struct msix_entry), + GFP_KERNEL); + if (!rocker->msix_entries) + return -ENOMEM; + + for (i = 0; i < msix_entries; i++) + rocker->msix_entries[i].entry = i; + + err = pci_enable_msix_exact(pdev, rocker->msix_entries, msix_entries); + if (err < 0) + goto err_enable_msix; + + return 0; + +err_enable_msix: + kfree(rocker->msix_entries); + return err; +} + +static void rocker_msix_fini(const struct rocker *rocker) +{ + pci_disable_msix(rocker->pdev); + kfree(rocker->msix_entries); +} + +static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct rocker *rocker; + int err; + + rocker = kzalloc(sizeof(*rocker), GFP_KERNEL); + if (!rocker) + return -ENOMEM; + + err = pci_enable_device(pdev); + if (err) { + dev_err(&pdev->dev, "pci_enable_device failed\n"); + goto err_pci_enable_device; + } + + err = pci_request_regions(pdev, rocker_driver_name); + if (err) { + dev_err(&pdev->dev, "pci_request_regions failed\n"); + goto err_pci_request_regions; + } + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + if (!err) { + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) { + dev_err(&pdev->dev, "pci_set_consistent_dma_mask failed\n"); + goto err_pci_set_dma_mask; + } + } else { + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&pdev->dev, "pci_set_dma_mask failed\n"); + goto err_pci_set_dma_mask; + } + } + + if (pci_resource_len(pdev, 0) < ROCKER_PCI_BAR0_SIZE) { + dev_err(&pdev->dev, "invalid PCI region size\n"); + err = -EINVAL; + goto err_pci_resource_len_check; + } + + rocker->hw_addr = ioremap(pci_resource_start(pdev, 0), + pci_resource_len(pdev, 0)); + if (!rocker->hw_addr) { + dev_err(&pdev->dev, "ioremap failed\n"); + err = -EIO; + goto err_ioremap; + } + pci_set_master(pdev); + + rocker->pdev = pdev; + pci_set_drvdata(pdev, rocker); + + rocker->port_count = rocker_read32(rocker, PORT_PHYS_COUNT); + + err = rocker_msix_init(rocker); + if (err) { + dev_err(&pdev->dev, "MSI-X init failed\n"); + goto err_msix_init; + } + + err = rocker_basic_hw_test(rocker); + if (err) { + dev_err(&pdev->dev, "basic hw test failed\n"); + goto err_basic_hw_test; + } + + rocker_write32(rocker, CONTROL, ROCKER_CONTROL_RESET); + + err = rocker_dma_rings_init(rocker); + if (err) + goto err_dma_rings_init; + + err = request_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_CMD), + rocker_cmd_irq_handler, 0, + rocker_driver_name, rocker); + if (err) { + dev_err(&pdev->dev, "cannot assign cmd irq\n"); + goto err_request_cmd_irq; + } + + err = request_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), + rocker_event_irq_handler, 0, + rocker_driver_name, rocker); + if (err) { + dev_err(&pdev->dev, "cannot assign event irq\n"); + goto err_request_event_irq; + } + + rocker->hw.id = rocker_read64(rocker, SWITCH_ID); + + err = rocker_probe_ports(rocker); + if (err) { + dev_err(&pdev->dev, "failed to probe ports\n"); + goto err_probe_ports; + } + + dev_info(&pdev->dev, "Rocker switch with id %*phN\n", + (int)sizeof(rocker->hw.id), &rocker->hw.id); + + return 0; + +err_probe_ports: + free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker); +err_request_event_irq: + free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_CMD), rocker); +err_request_cmd_irq: + rocker_dma_rings_fini(rocker); +err_dma_rings_init: +err_basic_hw_test: + rocker_msix_fini(rocker); +err_msix_init: + iounmap(rocker->hw_addr); +err_ioremap: +err_pci_resource_len_check: +err_pci_set_dma_mask: + pci_release_regions(pdev); +err_pci_request_regions: + pci_disable_device(pdev); +err_pci_enable_device: + kfree(rocker); + return err; +} + +static void rocker_remove(struct pci_dev *pdev) +{ + struct rocker *rocker = pci_get_drvdata(pdev); + + rocker_write32(rocker, CONTROL, ROCKER_CONTROL_RESET); + rocker_remove_ports(rocker); + free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker); + free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_CMD), rocker); + rocker_dma_rings_fini(rocker); + rocker_msix_fini(rocker); + iounmap(rocker->hw_addr); + pci_release_regions(rocker->pdev); + pci_disable_device(rocker->pdev); + kfree(rocker); +} + +static struct pci_driver rocker_pci_driver = { + .name = rocker_driver_name, + .id_table = rocker_pci_id_table, + .probe = rocker_probe, + .remove = rocker_remove, +}; + +/************************************ + * Net device notifier event handler + ************************************/ + +static bool rocker_port_dev_check(const struct net_device *dev) +{ + return dev->netdev_ops == &rocker_port_netdev_ops; +} + +static int rocker_netdevice_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_changeupper_info *info; + struct rocker_port *rocker_port; + int err; + + if (!rocker_port_dev_check(dev)) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_CHANGEUPPER: + info = ptr; + if (!info->master) + goto out; + rocker_port = netdev_priv(dev); + if (info->linking) { + err = rocker_world_port_master_linked(rocker_port, + info->upper_dev); + if (err) + netdev_warn(dev, "failed to reflect master linked (err %d)\n", + err); + } else { + err = rocker_world_port_master_unlinked(rocker_port, + info->upper_dev); + if (err) + netdev_warn(dev, "failed to reflect master unlinked (err %d)\n", + err); + } + } +out: + return NOTIFY_DONE; +} + +static struct notifier_block rocker_netdevice_nb __read_mostly = { + .notifier_call = rocker_netdevice_event, +}; + +/************************************ + * Net event notifier event handler + ************************************/ + +static int rocker_netevent_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct rocker_port *rocker_port; + struct net_device *dev; + struct neighbour *n = ptr; + int err; + + switch (event) { + case NETEVENT_NEIGH_UPDATE: + if (n->tbl != &arp_tbl) + return NOTIFY_DONE; + dev = n->dev; + if (!rocker_port_dev_check(dev)) + return NOTIFY_DONE; + rocker_port = netdev_priv(dev); + err = rocker_world_port_neigh_update(rocker_port, n); + if (err) + netdev_warn(dev, "failed to handle neigh update (err %d)\n", + err); + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block rocker_netevent_nb __read_mostly = { + .notifier_call = rocker_netevent_event, +}; + +/*********************** + * Module init and exit + ***********************/ + +static int __init rocker_module_init(void) +{ + int err; + + register_netdevice_notifier(&rocker_netdevice_nb); + register_netevent_notifier(&rocker_netevent_nb); + err = pci_register_driver(&rocker_pci_driver); + if (err) + goto err_pci_register_driver; + return 0; + +err_pci_register_driver: + unregister_netevent_notifier(&rocker_netevent_nb); + unregister_netdevice_notifier(&rocker_netdevice_nb); + return err; +} + +static void __exit rocker_module_exit(void) +{ + unregister_netevent_notifier(&rocker_netevent_nb); + unregister_netdevice_notifier(&rocker_netdevice_nb); + pci_unregister_driver(&rocker_pci_driver); +} + +module_init(rocker_module_init); +module_exit(rocker_module_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>"); +MODULE_AUTHOR("Scott Feldman <sfeldma@gmail.com>"); +MODULE_DESCRIPTION("Rocker switch device driver"); +MODULE_DEVICE_TABLE(pci, rocker_pci_id_table); diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c new file mode 100644 index 000000000000..099008a53b03 --- /dev/null +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -0,0 +1,2952 @@ +/* + * drivers/net/ethernet/rocker/rocker_ofdpa.c - Rocker switch OF-DPA-like + * implementation + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * Copyright (c) 2014-2016 Jiri Pirko <jiri@mellanox.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/spinlock.h> +#include <linux/hashtable.h> +#include <linux/crc32.h> +#include <linux/netdevice.h> +#include <linux/inetdevice.h> +#include <linux/if_vlan.h> +#include <linux/if_bridge.h> +#include <net/neighbour.h> +#include <net/switchdev.h> +#include <net/ip_fib.h> +#include <net/arp.h> + +#include "rocker.h" +#include "rocker_tlv.h" + +struct ofdpa_flow_tbl_key { + u32 priority; + enum rocker_of_dpa_table_id tbl_id; + union { + struct { + u32 in_pport; + u32 in_pport_mask; + enum rocker_of_dpa_table_id goto_tbl; + } ig_port; + struct { + u32 in_pport; + __be16 vlan_id; + __be16 vlan_id_mask; + enum rocker_of_dpa_table_id goto_tbl; + bool untagged; + __be16 new_vlan_id; + } vlan; + struct { + u32 in_pport; + u32 in_pport_mask; + __be16 eth_type; + u8 eth_dst[ETH_ALEN]; + u8 eth_dst_mask[ETH_ALEN]; + __be16 vlan_id; + __be16 vlan_id_mask; + enum rocker_of_dpa_table_id goto_tbl; + bool copy_to_cpu; + } term_mac; + struct { + __be16 eth_type; + __be32 dst4; + __be32 dst4_mask; + enum rocker_of_dpa_table_id goto_tbl; + u32 group_id; + } ucast_routing; + struct { + u8 eth_dst[ETH_ALEN]; + u8 eth_dst_mask[ETH_ALEN]; + int has_eth_dst; + int has_eth_dst_mask; + __be16 vlan_id; + u32 tunnel_id; + enum rocker_of_dpa_table_id goto_tbl; + u32 group_id; + bool copy_to_cpu; + } bridge; + struct { + u32 in_pport; + u32 in_pport_mask; + u8 eth_src[ETH_ALEN]; + u8 eth_src_mask[ETH_ALEN]; + u8 eth_dst[ETH_ALEN]; + u8 eth_dst_mask[ETH_ALEN]; + __be16 eth_type; + __be16 vlan_id; + __be16 vlan_id_mask; + u8 ip_proto; + u8 ip_proto_mask; + u8 ip_tos; + u8 ip_tos_mask; + u32 group_id; + } acl; + }; +}; + +struct ofdpa_flow_tbl_entry { + struct hlist_node entry; + u32 cmd; + u64 cookie; + struct ofdpa_flow_tbl_key key; + size_t key_len; + u32 key_crc32; /* key */ +}; + +struct ofdpa_group_tbl_entry { + struct hlist_node entry; + u32 cmd; + u32 group_id; /* key */ + u16 group_count; + u32 *group_ids; + union { + struct { + u8 pop_vlan; + } l2_interface; + struct { + u8 eth_src[ETH_ALEN]; + u8 eth_dst[ETH_ALEN]; + __be16 vlan_id; + u32 group_id; + } l2_rewrite; + struct { + u8 eth_src[ETH_ALEN]; + u8 eth_dst[ETH_ALEN]; + __be16 vlan_id; + bool ttl_check; + u32 group_id; + } l3_unicast; + }; +}; + +struct ofdpa_fdb_tbl_entry { + struct hlist_node entry; + u32 key_crc32; /* key */ + bool learned; + unsigned long touched; + struct ofdpa_fdb_tbl_key { + struct ofdpa_port *ofdpa_port; + u8 addr[ETH_ALEN]; + __be16 vlan_id; + } key; +}; + +struct ofdpa_internal_vlan_tbl_entry { + struct hlist_node entry; + int ifindex; /* key */ + u32 ref_count; + __be16 vlan_id; +}; + +struct ofdpa_neigh_tbl_entry { + struct hlist_node entry; + __be32 ip_addr; /* key */ + struct net_device *dev; + u32 ref_count; + u32 index; + u8 eth_dst[ETH_ALEN]; + bool ttl_check; +}; + +enum { + OFDPA_CTRL_LINK_LOCAL_MCAST, + OFDPA_CTRL_LOCAL_ARP, + OFDPA_CTRL_IPV4_MCAST, + OFDPA_CTRL_IPV6_MCAST, + OFDPA_CTRL_DFLT_BRIDGING, + OFDPA_CTRL_DFLT_OVS, + OFDPA_CTRL_MAX, +}; + +#define OFDPA_INTERNAL_VLAN_ID_BASE 0x0f00 +#define OFDPA_N_INTERNAL_VLANS 255 +#define OFDPA_VLAN_BITMAP_LEN BITS_TO_LONGS(VLAN_N_VID) +#define OFDPA_INTERNAL_VLAN_BITMAP_LEN BITS_TO_LONGS(OFDPA_N_INTERNAL_VLANS) +#define OFDPA_UNTAGGED_VID 0 + +struct ofdpa { + struct rocker *rocker; + DECLARE_HASHTABLE(flow_tbl, 16); + spinlock_t flow_tbl_lock; /* for flow tbl accesses */ + u64 flow_tbl_next_cookie; + DECLARE_HASHTABLE(group_tbl, 16); + spinlock_t group_tbl_lock; /* for group tbl accesses */ + struct timer_list fdb_cleanup_timer; + DECLARE_HASHTABLE(fdb_tbl, 16); + spinlock_t fdb_tbl_lock; /* for fdb tbl accesses */ + unsigned long internal_vlan_bitmap[OFDPA_INTERNAL_VLAN_BITMAP_LEN]; + DECLARE_HASHTABLE(internal_vlan_tbl, 8); + spinlock_t internal_vlan_tbl_lock; /* for vlan tbl accesses */ + DECLARE_HASHTABLE(neigh_tbl, 16); + spinlock_t neigh_tbl_lock; /* for neigh tbl accesses */ + u32 neigh_tbl_next_index; +}; + +struct ofdpa_port { + struct ofdpa *ofdpa; + struct rocker_port *rocker_port; + struct net_device *dev; + u32 pport; + struct net_device *bridge_dev; + __be16 internal_vlan_id; + int stp_state; + u32 brport_flags; + unsigned long ageing_time; + bool ctrls[OFDPA_CTRL_MAX]; + unsigned long vlan_bitmap[OFDPA_VLAN_BITMAP_LEN]; +}; + +static const u8 zero_mac[ETH_ALEN] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; +static const u8 ff_mac[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +static const u8 ll_mac[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; +static const u8 ll_mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0 }; +static const u8 mcast_mac[ETH_ALEN] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00 }; +static const u8 ipv4_mcast[ETH_ALEN] = { 0x01, 0x00, 0x5e, 0x00, 0x00, 0x00 }; +static const u8 ipv4_mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0x80, 0x00, 0x00 }; +static const u8 ipv6_mcast[ETH_ALEN] = { 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 }; +static const u8 ipv6_mask[ETH_ALEN] = { 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 }; + +/* Rocker priority levels for flow table entries. Higher + * priority match takes precedence over lower priority match. + */ + +enum { + OFDPA_PRIORITY_UNKNOWN = 0, + OFDPA_PRIORITY_IG_PORT = 1, + OFDPA_PRIORITY_VLAN = 1, + OFDPA_PRIORITY_TERM_MAC_UCAST = 0, + OFDPA_PRIORITY_TERM_MAC_MCAST = 1, + OFDPA_PRIORITY_BRIDGING_VLAN_DFLT_EXACT = 1, + OFDPA_PRIORITY_BRIDGING_VLAN_DFLT_WILD = 2, + OFDPA_PRIORITY_BRIDGING_VLAN = 3, + OFDPA_PRIORITY_BRIDGING_TENANT_DFLT_EXACT = 1, + OFDPA_PRIORITY_BRIDGING_TENANT_DFLT_WILD = 2, + OFDPA_PRIORITY_BRIDGING_TENANT = 3, + OFDPA_PRIORITY_ACL_CTRL = 3, + OFDPA_PRIORITY_ACL_NORMAL = 2, + OFDPA_PRIORITY_ACL_DFLT = 1, +}; + +static bool ofdpa_vlan_id_is_internal(__be16 vlan_id) +{ + u16 start = OFDPA_INTERNAL_VLAN_ID_BASE; + u16 end = 0xffe; + u16 _vlan_id = ntohs(vlan_id); + + return (_vlan_id >= start && _vlan_id <= end); +} + +static __be16 ofdpa_port_vid_to_vlan(const struct ofdpa_port *ofdpa_port, + u16 vid, bool *pop_vlan) +{ + __be16 vlan_id; + + if (pop_vlan) + *pop_vlan = false; + vlan_id = htons(vid); + if (!vlan_id) { + vlan_id = ofdpa_port->internal_vlan_id; + if (pop_vlan) + *pop_vlan = true; + } + + return vlan_id; +} + +static u16 ofdpa_port_vlan_to_vid(const struct ofdpa_port *ofdpa_port, + __be16 vlan_id) +{ + if (ofdpa_vlan_id_is_internal(vlan_id)) + return 0; + + return ntohs(vlan_id); +} + +static bool ofdpa_port_is_slave(const struct ofdpa_port *ofdpa_port, + const char *kind) +{ + return ofdpa_port->bridge_dev && + !strcmp(ofdpa_port->bridge_dev->rtnl_link_ops->kind, kind); +} + +static bool ofdpa_port_is_bridged(const struct ofdpa_port *ofdpa_port) +{ + return ofdpa_port_is_slave(ofdpa_port, "bridge"); +} + +static bool ofdpa_port_is_ovsed(const struct ofdpa_port *ofdpa_port) +{ + return ofdpa_port_is_slave(ofdpa_port, "openvswitch"); +} + +#define OFDPA_OP_FLAG_REMOVE BIT(0) +#define OFDPA_OP_FLAG_NOWAIT BIT(1) +#define OFDPA_OP_FLAG_LEARNED BIT(2) +#define OFDPA_OP_FLAG_REFRESH BIT(3) + +static bool ofdpa_flags_nowait(int flags) +{ + return flags & OFDPA_OP_FLAG_NOWAIT; +} + +static void *__ofdpa_mem_alloc(struct switchdev_trans *trans, int flags, + size_t size) +{ + struct switchdev_trans_item *elem = NULL; + gfp_t gfp_flags = (flags & OFDPA_OP_FLAG_NOWAIT) ? + GFP_ATOMIC : GFP_KERNEL; + + /* If in transaction prepare phase, allocate the memory + * and enqueue it on a transaction. If in transaction + * commit phase, dequeue the memory from the transaction + * rather than re-allocating the memory. The idea is the + * driver code paths for prepare and commit are identical + * so the memory allocated in the prepare phase is the + * memory used in the commit phase. + */ + + if (!trans) { + elem = kzalloc(size + sizeof(*elem), gfp_flags); + } else if (switchdev_trans_ph_prepare(trans)) { + elem = kzalloc(size + sizeof(*elem), gfp_flags); + if (!elem) + return NULL; + switchdev_trans_item_enqueue(trans, elem, kfree, elem); + } else { + elem = switchdev_trans_item_dequeue(trans); + } + + return elem ? elem + 1 : NULL; +} + +static void *ofdpa_kzalloc(struct switchdev_trans *trans, int flags, + size_t size) +{ + return __ofdpa_mem_alloc(trans, flags, size); +} + +static void *ofdpa_kcalloc(struct switchdev_trans *trans, int flags, + size_t n, size_t size) +{ + return __ofdpa_mem_alloc(trans, flags, n * size); +} + +static void ofdpa_kfree(struct switchdev_trans *trans, const void *mem) +{ + struct switchdev_trans_item *elem; + + /* Frees are ignored if in transaction prepare phase. The + * memory remains on the per-port list until freed in the + * commit phase. + */ + + if (switchdev_trans_ph_prepare(trans)) + return; + + elem = (struct switchdev_trans_item *) mem - 1; + kfree(elem); +} + +/************************************************************* + * Flow, group, FDB, internal VLAN and neigh command prepares + *************************************************************/ + +static int +ofdpa_cmd_flow_tbl_add_ig_port(struct rocker_desc_info *desc_info, + const struct ofdpa_flow_tbl_entry *entry) +{ + if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_IN_PPORT, + entry->key.ig_port.in_pport)) + return -EMSGSIZE; + if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_IN_PPORT_MASK, + entry->key.ig_port.in_pport_mask)) + return -EMSGSIZE; + if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_GOTO_TABLE_ID, + entry->key.ig_port.goto_tbl)) + return -EMSGSIZE; + + return 0; +} + +static int +ofdpa_cmd_flow_tbl_add_vlan(struct rocker_desc_info *desc_info, + const struct ofdpa_flow_tbl_entry *entry) +{ + if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_IN_PPORT, + entry->key.vlan.in_pport)) + return -EMSGSIZE; + if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID, + entry->key.vlan.vlan_id)) + return -EMSGSIZE; + if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID_MASK, + entry->key.vlan.vlan_id_mask)) + return -EMSGSIZE; + if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_GOTO_TABLE_ID, + entry->key.vlan.goto_tbl)) + return -EMSGSIZE; + if (entry->key.vlan.untagged && + rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_NEW_VLAN_ID, + entry->key.vlan.new_vlan_id)) + return -EMSGSIZE; + + return 0; +} + +static int +ofdpa_cmd_flow_tbl_add_term_mac(struct rocker_desc_info *desc_info, + const struct ofdpa_flow_tbl_entry *entry) +{ + if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_IN_PPORT, + entry->key.term_mac.in_pport)) + return -EMSGSIZE; + if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_IN_PPORT_MASK, + entry->key.term_mac.in_pport_mask)) + return -EMSGSIZE; + if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_ETHERTYPE, + entry->key.term_mac.eth_type)) + return -EMSGSIZE; + if (rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC, + ETH_ALEN, entry->key.term_mac.eth_dst)) + return -EMSGSIZE; + if (rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC_MASK, + ETH_ALEN, entry->key.term_mac.eth_dst_mask)) + return -EMSGSIZE; + if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID, + entry->key.term_mac.vlan_id)) + return -EMSGSIZE; + if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID_MASK, + entry->key.term_mac.vlan_id_mask)) + return -EMSGSIZE; + if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_GOTO_TABLE_ID, + entry->key.term_mac.goto_tbl)) + return -EMSGSIZE; + if (entry->key.term_mac.copy_to_cpu && + rocker_tlv_put_u8(desc_info, ROCKER_TLV_OF_DPA_COPY_CPU_ACTION, + entry->key.term_mac.copy_to_cpu)) + return -EMSGSIZE; + + return 0; +} + +static int +ofdpa_cmd_flow_tbl_add_ucast_routing(struct rocker_desc_info *desc_info, + const struct ofdpa_flow_tbl_entry *entry) +{ + if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_ETHERTYPE, + entry->key.ucast_routing.eth_type)) + return -EMSGSIZE; + if (rocker_tlv_put_be32(desc_info, ROCKER_TLV_OF_DPA_DST_IP, + entry->key.ucast_routing.dst4)) + return -EMSGSIZE; + if (rocker_tlv_put_be32(desc_info, ROCKER_TLV_OF_DPA_DST_IP_MASK, + entry->key.ucast_routing.dst4_mask)) + return -EMSGSIZE; + if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_GOTO_TABLE_ID, + entry->key.ucast_routing.goto_tbl)) + return -EMSGSIZE; + if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_GROUP_ID, + entry->key.ucast_routing.group_id)) + return -EMSGSIZE; + + return 0; +} + +static int +ofdpa_cmd_flow_tbl_add_bridge(struct rocker_desc_info *desc_info, + const struct ofdpa_flow_tbl_entry *entry) +{ + if (entry->key.bridge.has_eth_dst && + rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC, + ETH_ALEN, entry->key.bridge.eth_dst)) + return -EMSGSIZE; + if (entry->key.bridge.has_eth_dst_mask && + rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC_MASK, + ETH_ALEN, entry->key.bridge.eth_dst_mask)) + return -EMSGSIZE; + if (entry->key.bridge.vlan_id && + rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID, + entry->key.bridge.vlan_id)) + return -EMSGSIZE; + if (entry->key.bridge.tunnel_id && + rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_TUNNEL_ID, + entry->key.bridge.tunnel_id)) + return -EMSGSIZE; + if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_GOTO_TABLE_ID, + entry->key.bridge.goto_tbl)) + return -EMSGSIZE; + if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_GROUP_ID, + entry->key.bridge.group_id)) + return -EMSGSIZE; + if (entry->key.bridge.copy_to_cpu && + rocker_tlv_put_u8(desc_info, ROCKER_TLV_OF_DPA_COPY_CPU_ACTION, + entry->key.bridge.copy_to_cpu)) + return -EMSGSIZE; + + return 0; +} + +static int +ofdpa_cmd_flow_tbl_add_acl(struct rocker_desc_info *desc_info, + const struct ofdpa_flow_tbl_entry *entry) +{ + if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_IN_PPORT, + entry->key.acl.in_pport)) + return -EMSGSIZE; + if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_IN_PPORT_MASK, + entry->key.acl.in_pport_mask)) + return -EMSGSIZE; + if (rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_SRC_MAC, + ETH_ALEN, entry->key.acl.eth_src)) + return -EMSGSIZE; + if (rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_SRC_MAC_MASK, + ETH_ALEN, entry->key.acl.eth_src_mask)) + return -EMSGSIZE; + if (rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC, + ETH_ALEN, entry->key.acl.eth_dst)) + return -EMSGSIZE; + if (rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC_MASK, + ETH_ALEN, entry->key.acl.eth_dst_mask)) + return -EMSGSIZE; + if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_ETHERTYPE, + entry->key.acl.eth_type)) + return -EMSGSIZE; + if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID, + entry->key.acl.vlan_id)) + return -EMSGSIZE; + if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID_MASK, + entry->key.acl.vlan_id_mask)) + return -EMSGSIZE; + + switch (ntohs(entry->key.acl.eth_type)) { + case ETH_P_IP: + case ETH_P_IPV6: + if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_OF_DPA_IP_PROTO, + entry->key.acl.ip_proto)) + return -EMSGSIZE; + if (rocker_tlv_put_u8(desc_info, + ROCKER_TLV_OF_DPA_IP_PROTO_MASK, + entry->key.acl.ip_proto_mask)) + return -EMSGSIZE; + if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_OF_DPA_IP_DSCP, + entry->key.acl.ip_tos & 0x3f)) + return -EMSGSIZE; + if (rocker_tlv_put_u8(desc_info, + ROCKER_TLV_OF_DPA_IP_DSCP_MASK, + entry->key.acl.ip_tos_mask & 0x3f)) + return -EMSGSIZE; + if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_OF_DPA_IP_ECN, + (entry->key.acl.ip_tos & 0xc0) >> 6)) + return -EMSGSIZE; + if (rocker_tlv_put_u8(desc_info, + ROCKER_TLV_OF_DPA_IP_ECN_MASK, + (entry->key.acl.ip_tos_mask & 0xc0) >> 6)) + return -EMSGSIZE; + break; + } + + if (entry->key.acl.group_id != ROCKER_GROUP_NONE && + rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_GROUP_ID, + entry->key.acl.group_id)) + return -EMSGSIZE; + + return 0; +} + +static int ofdpa_cmd_flow_tbl_add(const struct rocker_port *rocker_port, + struct rocker_desc_info *desc_info, + void *priv) +{ + const struct ofdpa_flow_tbl_entry *entry = priv; + struct rocker_tlv *cmd_info; + int err = 0; + + if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, entry->cmd)) + return -EMSGSIZE; + cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO); + if (!cmd_info) + return -EMSGSIZE; + if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_TABLE_ID, + entry->key.tbl_id)) + return -EMSGSIZE; + if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_PRIORITY, + entry->key.priority)) + return -EMSGSIZE; + if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_HARDTIME, 0)) + return -EMSGSIZE; + if (rocker_tlv_put_u64(desc_info, ROCKER_TLV_OF_DPA_COOKIE, + entry->cookie)) + return -EMSGSIZE; + + switch (entry->key.tbl_id) { + case ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT: + err = ofdpa_cmd_flow_tbl_add_ig_port(desc_info, entry); + break; + case ROCKER_OF_DPA_TABLE_ID_VLAN: + err = ofdpa_cmd_flow_tbl_add_vlan(desc_info, entry); + break; + case ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC: + err = ofdpa_cmd_flow_tbl_add_term_mac(desc_info, entry); + break; + case ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING: + err = ofdpa_cmd_flow_tbl_add_ucast_routing(desc_info, entry); + break; + case ROCKER_OF_DPA_TABLE_ID_BRIDGING: + err = ofdpa_cmd_flow_tbl_add_bridge(desc_info, entry); + break; + case ROCKER_OF_DPA_TABLE_ID_ACL_POLICY: + err = ofdpa_cmd_flow_tbl_add_acl(desc_info, entry); + break; + default: + err = -ENOTSUPP; + break; + } + + if (err) + return err; + + rocker_tlv_nest_end(desc_info, cmd_info); + + return 0; +} + +static int ofdpa_cmd_flow_tbl_del(const struct rocker_port *rocker_port, + struct rocker_desc_info *desc_info, + void *priv) +{ + const struct ofdpa_flow_tbl_entry *entry = priv; + struct rocker_tlv *cmd_info; + + if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, entry->cmd)) + return -EMSGSIZE; + cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO); + if (!cmd_info) + return -EMSGSIZE; + if (rocker_tlv_put_u64(desc_info, ROCKER_TLV_OF_DPA_COOKIE, + entry->cookie)) + return -EMSGSIZE; + rocker_tlv_nest_end(desc_info, cmd_info); + + return 0; +} + +static int +ofdpa_cmd_group_tbl_add_l2_interface(struct rocker_desc_info *desc_info, + struct ofdpa_group_tbl_entry *entry) +{ + if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_OUT_PPORT, + ROCKER_GROUP_PORT_GET(entry->group_id))) + return -EMSGSIZE; + if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_OF_DPA_POP_VLAN, + entry->l2_interface.pop_vlan)) + return -EMSGSIZE; + + return 0; +} + +static int +ofdpa_cmd_group_tbl_add_l2_rewrite(struct rocker_desc_info *desc_info, + const struct ofdpa_group_tbl_entry *entry) +{ + if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_GROUP_ID_LOWER, + entry->l2_rewrite.group_id)) + return -EMSGSIZE; + if (!is_zero_ether_addr(entry->l2_rewrite.eth_src) && + rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_SRC_MAC, + ETH_ALEN, entry->l2_rewrite.eth_src)) + return -EMSGSIZE; + if (!is_zero_ether_addr(entry->l2_rewrite.eth_dst) && + rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC, + ETH_ALEN, entry->l2_rewrite.eth_dst)) + return -EMSGSIZE; + if (entry->l2_rewrite.vlan_id && + rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID, + entry->l2_rewrite.vlan_id)) + return -EMSGSIZE; + + return 0; +} + +static int +ofdpa_cmd_group_tbl_add_group_ids(struct rocker_desc_info *desc_info, + const struct ofdpa_group_tbl_entry *entry) +{ + int i; + struct rocker_tlv *group_ids; + + if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_GROUP_COUNT, + entry->group_count)) + return -EMSGSIZE; + + group_ids = rocker_tlv_nest_start(desc_info, + ROCKER_TLV_OF_DPA_GROUP_IDS); + if (!group_ids) + return -EMSGSIZE; + + for (i = 0; i < entry->group_count; i++) + /* Note TLV array is 1-based */ + if (rocker_tlv_put_u32(desc_info, i + 1, entry->group_ids[i])) + return -EMSGSIZE; + + rocker_tlv_nest_end(desc_info, group_ids); + + return 0; +} + +static int +ofdpa_cmd_group_tbl_add_l3_unicast(struct rocker_desc_info *desc_info, + const struct ofdpa_group_tbl_entry *entry) +{ + if (!is_zero_ether_addr(entry->l3_unicast.eth_src) && + rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_SRC_MAC, + ETH_ALEN, entry->l3_unicast.eth_src)) + return -EMSGSIZE; + if (!is_zero_ether_addr(entry->l3_unicast.eth_dst) && + rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC, + ETH_ALEN, entry->l3_unicast.eth_dst)) + return -EMSGSIZE; + if (entry->l3_unicast.vlan_id && + rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID, + entry->l3_unicast.vlan_id)) + return -EMSGSIZE; + if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_OF_DPA_TTL_CHECK, + entry->l3_unicast.ttl_check)) + return -EMSGSIZE; + if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_GROUP_ID_LOWER, + entry->l3_unicast.group_id)) + return -EMSGSIZE; + + return 0; +} + +static int ofdpa_cmd_group_tbl_add(const struct rocker_port *rocker_port, + struct rocker_desc_info *desc_info, + void *priv) +{ + struct ofdpa_group_tbl_entry *entry = priv; + struct rocker_tlv *cmd_info; + int err = 0; + + if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, entry->cmd)) + return -EMSGSIZE; + cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO); + if (!cmd_info) + return -EMSGSIZE; + + if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_GROUP_ID, + entry->group_id)) + return -EMSGSIZE; + + switch (ROCKER_GROUP_TYPE_GET(entry->group_id)) { + case ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE: + err = ofdpa_cmd_group_tbl_add_l2_interface(desc_info, entry); + break; + case ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE: + err = ofdpa_cmd_group_tbl_add_l2_rewrite(desc_info, entry); + break; + case ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD: + case ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST: + err = ofdpa_cmd_group_tbl_add_group_ids(desc_info, entry); + break; + case ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST: + err = ofdpa_cmd_group_tbl_add_l3_unicast(desc_info, entry); + break; + default: + err = -ENOTSUPP; + break; + } + + if (err) + return err; + + rocker_tlv_nest_end(desc_info, cmd_info); + + return 0; +} + +static int ofdpa_cmd_group_tbl_del(const struct rocker_port *rocker_port, + struct rocker_desc_info *desc_info, + void *priv) +{ + const struct ofdpa_group_tbl_entry *entry = priv; + struct rocker_tlv *cmd_info; + + if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, entry->cmd)) + return -EMSGSIZE; + cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO); + if (!cmd_info) + return -EMSGSIZE; + if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_GROUP_ID, + entry->group_id)) + return -EMSGSIZE; + rocker_tlv_nest_end(desc_info, cmd_info); + + return 0; +} + +/*************************************************** + * Flow, group, FDB, internal VLAN and neigh tables + ***************************************************/ + +static struct ofdpa_flow_tbl_entry * +ofdpa_flow_tbl_find(const struct ofdpa *ofdpa, + const struct ofdpa_flow_tbl_entry *match) +{ + struct ofdpa_flow_tbl_entry *found; + size_t key_len = match->key_len ? match->key_len : sizeof(found->key); + + hash_for_each_possible(ofdpa->flow_tbl, found, + entry, match->key_crc32) { + if (memcmp(&found->key, &match->key, key_len) == 0) + return found; + } + + return NULL; +} + +static int ofdpa_flow_tbl_add(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, int flags, + struct ofdpa_flow_tbl_entry *match) +{ + struct ofdpa *ofdpa = ofdpa_port->ofdpa; + struct ofdpa_flow_tbl_entry *found; + size_t key_len = match->key_len ? match->key_len : sizeof(found->key); + unsigned long lock_flags; + + match->key_crc32 = crc32(~0, &match->key, key_len); + + spin_lock_irqsave(&ofdpa->flow_tbl_lock, lock_flags); + + found = ofdpa_flow_tbl_find(ofdpa, match); + + if (found) { + match->cookie = found->cookie; + if (!switchdev_trans_ph_prepare(trans)) + hash_del(&found->entry); + ofdpa_kfree(trans, found); + found = match; + found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD; + } else { + found = match; + found->cookie = ofdpa->flow_tbl_next_cookie++; + found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD; + } + + if (!switchdev_trans_ph_prepare(trans)) + hash_add(ofdpa->flow_tbl, &found->entry, found->key_crc32); + + spin_unlock_irqrestore(&ofdpa->flow_tbl_lock, lock_flags); + + if (!switchdev_trans_ph_prepare(trans)) + return rocker_cmd_exec(ofdpa_port->rocker_port, + ofdpa_flags_nowait(flags), + ofdpa_cmd_flow_tbl_add, + found, NULL, NULL); + return 0; +} + +static int ofdpa_flow_tbl_del(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, int flags, + struct ofdpa_flow_tbl_entry *match) +{ + struct ofdpa *ofdpa = ofdpa_port->ofdpa; + struct ofdpa_flow_tbl_entry *found; + size_t key_len = match->key_len ? match->key_len : sizeof(found->key); + unsigned long lock_flags; + int err = 0; + + match->key_crc32 = crc32(~0, &match->key, key_len); + + spin_lock_irqsave(&ofdpa->flow_tbl_lock, lock_flags); + + found = ofdpa_flow_tbl_find(ofdpa, match); + + if (found) { + if (!switchdev_trans_ph_prepare(trans)) + hash_del(&found->entry); + found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL; + } + + spin_unlock_irqrestore(&ofdpa->flow_tbl_lock, lock_flags); + + ofdpa_kfree(trans, match); + + if (found) { + if (!switchdev_trans_ph_prepare(trans)) + err = rocker_cmd_exec(ofdpa_port->rocker_port, + ofdpa_flags_nowait(flags), + ofdpa_cmd_flow_tbl_del, + found, NULL, NULL); + ofdpa_kfree(trans, found); + } + + return err; +} + +static int ofdpa_flow_tbl_do(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, int flags, + struct ofdpa_flow_tbl_entry *entry) +{ + if (flags & OFDPA_OP_FLAG_REMOVE) + return ofdpa_flow_tbl_del(ofdpa_port, trans, flags, entry); + else + return ofdpa_flow_tbl_add(ofdpa_port, trans, flags, entry); +} + +static int ofdpa_flow_tbl_ig_port(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, int flags, + u32 in_pport, u32 in_pport_mask, + enum rocker_of_dpa_table_id goto_tbl) +{ + struct ofdpa_flow_tbl_entry *entry; + + entry = ofdpa_kzalloc(trans, flags, sizeof(*entry)); + if (!entry) + return -ENOMEM; + + entry->key.priority = OFDPA_PRIORITY_IG_PORT; + entry->key.tbl_id = ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT; + entry->key.ig_port.in_pport = in_pport; + entry->key.ig_port.in_pport_mask = in_pport_mask; + entry->key.ig_port.goto_tbl = goto_tbl; + + return ofdpa_flow_tbl_do(ofdpa_port, trans, flags, entry); +} + +static int ofdpa_flow_tbl_vlan(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, int flags, + u32 in_pport, __be16 vlan_id, + __be16 vlan_id_mask, + enum rocker_of_dpa_table_id goto_tbl, + bool untagged, __be16 new_vlan_id) +{ + struct ofdpa_flow_tbl_entry *entry; + + entry = ofdpa_kzalloc(trans, flags, sizeof(*entry)); + if (!entry) + return -ENOMEM; + + entry->key.priority = OFDPA_PRIORITY_VLAN; + entry->key.tbl_id = ROCKER_OF_DPA_TABLE_ID_VLAN; + entry->key.vlan.in_pport = in_pport; + entry->key.vlan.vlan_id = vlan_id; + entry->key.vlan.vlan_id_mask = vlan_id_mask; + entry->key.vlan.goto_tbl = goto_tbl; + + entry->key.vlan.untagged = untagged; + entry->key.vlan.new_vlan_id = new_vlan_id; + + return ofdpa_flow_tbl_do(ofdpa_port, trans, flags, entry); +} + +static int ofdpa_flow_tbl_term_mac(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, + u32 in_pport, u32 in_pport_mask, + __be16 eth_type, const u8 *eth_dst, + const u8 *eth_dst_mask, __be16 vlan_id, + __be16 vlan_id_mask, bool copy_to_cpu, + int flags) +{ + struct ofdpa_flow_tbl_entry *entry; + + entry = ofdpa_kzalloc(trans, flags, sizeof(*entry)); + if (!entry) + return -ENOMEM; + + if (is_multicast_ether_addr(eth_dst)) { + entry->key.priority = OFDPA_PRIORITY_TERM_MAC_MCAST; + entry->key.term_mac.goto_tbl = + ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING; + } else { + entry->key.priority = OFDPA_PRIORITY_TERM_MAC_UCAST; + entry->key.term_mac.goto_tbl = + ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING; + } + + entry->key.tbl_id = ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC; + entry->key.term_mac.in_pport = in_pport; + entry->key.term_mac.in_pport_mask = in_pport_mask; + entry->key.term_mac.eth_type = eth_type; + ether_addr_copy(entry->key.term_mac.eth_dst, eth_dst); + ether_addr_copy(entry->key.term_mac.eth_dst_mask, eth_dst_mask); + entry->key.term_mac.vlan_id = vlan_id; + entry->key.term_mac.vlan_id_mask = vlan_id_mask; + entry->key.term_mac.copy_to_cpu = copy_to_cpu; + + return ofdpa_flow_tbl_do(ofdpa_port, trans, flags, entry); +} + +static int ofdpa_flow_tbl_bridge(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, int flags, + const u8 *eth_dst, const u8 *eth_dst_mask, + __be16 vlan_id, u32 tunnel_id, + enum rocker_of_dpa_table_id goto_tbl, + u32 group_id, bool copy_to_cpu) +{ + struct ofdpa_flow_tbl_entry *entry; + u32 priority; + bool vlan_bridging = !!vlan_id; + bool dflt = !eth_dst || (eth_dst && eth_dst_mask); + bool wild = false; + + entry = ofdpa_kzalloc(trans, flags, sizeof(*entry)); + if (!entry) + return -ENOMEM; + + entry->key.tbl_id = ROCKER_OF_DPA_TABLE_ID_BRIDGING; + + if (eth_dst) { + entry->key.bridge.has_eth_dst = 1; + ether_addr_copy(entry->key.bridge.eth_dst, eth_dst); + } + if (eth_dst_mask) { + entry->key.bridge.has_eth_dst_mask = 1; + ether_addr_copy(entry->key.bridge.eth_dst_mask, eth_dst_mask); + if (!ether_addr_equal(eth_dst_mask, ff_mac)) + wild = true; + } + + priority = OFDPA_PRIORITY_UNKNOWN; + if (vlan_bridging && dflt && wild) + priority = OFDPA_PRIORITY_BRIDGING_VLAN_DFLT_WILD; + else if (vlan_bridging && dflt && !wild) + priority = OFDPA_PRIORITY_BRIDGING_VLAN_DFLT_EXACT; + else if (vlan_bridging && !dflt) + priority = OFDPA_PRIORITY_BRIDGING_VLAN; + else if (!vlan_bridging && dflt && wild) + priority = OFDPA_PRIORITY_BRIDGING_TENANT_DFLT_WILD; + else if (!vlan_bridging && dflt && !wild) + priority = OFDPA_PRIORITY_BRIDGING_TENANT_DFLT_EXACT; + else if (!vlan_bridging && !dflt) + priority = OFDPA_PRIORITY_BRIDGING_TENANT; + + entry->key.priority = priority; + entry->key.bridge.vlan_id = vlan_id; + entry->key.bridge.tunnel_id = tunnel_id; + entry->key.bridge.goto_tbl = goto_tbl; + entry->key.bridge.group_id = group_id; + entry->key.bridge.copy_to_cpu = copy_to_cpu; + + return ofdpa_flow_tbl_do(ofdpa_port, trans, flags, entry); +} + +static int ofdpa_flow_tbl_ucast4_routing(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, + __be16 eth_type, __be32 dst, + __be32 dst_mask, u32 priority, + enum rocker_of_dpa_table_id goto_tbl, + u32 group_id, int flags) +{ + struct ofdpa_flow_tbl_entry *entry; + + entry = ofdpa_kzalloc(trans, flags, sizeof(*entry)); + if (!entry) + return -ENOMEM; + + entry->key.tbl_id = ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING; + entry->key.priority = priority; + entry->key.ucast_routing.eth_type = eth_type; + entry->key.ucast_routing.dst4 = dst; + entry->key.ucast_routing.dst4_mask = dst_mask; + entry->key.ucast_routing.goto_tbl = goto_tbl; + entry->key.ucast_routing.group_id = group_id; + entry->key_len = offsetof(struct ofdpa_flow_tbl_key, + ucast_routing.group_id); + + return ofdpa_flow_tbl_do(ofdpa_port, trans, flags, entry); +} + +static int ofdpa_flow_tbl_acl(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, int flags, + u32 in_pport, u32 in_pport_mask, + const u8 *eth_src, const u8 *eth_src_mask, + const u8 *eth_dst, const u8 *eth_dst_mask, + __be16 eth_type, __be16 vlan_id, + __be16 vlan_id_mask, u8 ip_proto, + u8 ip_proto_mask, u8 ip_tos, u8 ip_tos_mask, + u32 group_id) +{ + u32 priority; + struct ofdpa_flow_tbl_entry *entry; + + entry = ofdpa_kzalloc(trans, flags, sizeof(*entry)); + if (!entry) + return -ENOMEM; + + priority = OFDPA_PRIORITY_ACL_NORMAL; + if (eth_dst && eth_dst_mask) { + if (ether_addr_equal(eth_dst_mask, mcast_mac)) + priority = OFDPA_PRIORITY_ACL_DFLT; + else if (is_link_local_ether_addr(eth_dst)) + priority = OFDPA_PRIORITY_ACL_CTRL; + } + + entry->key.priority = priority; + entry->key.tbl_id = ROCKER_OF_DPA_TABLE_ID_ACL_POLICY; + entry->key.acl.in_pport = in_pport; + entry->key.acl.in_pport_mask = in_pport_mask; + + if (eth_src) + ether_addr_copy(entry->key.acl.eth_src, eth_src); + if (eth_src_mask) + ether_addr_copy(entry->key.acl.eth_src_mask, eth_src_mask); + if (eth_dst) + ether_addr_copy(entry->key.acl.eth_dst, eth_dst); + if (eth_dst_mask) + ether_addr_copy(entry->key.acl.eth_dst_mask, eth_dst_mask); + + entry->key.acl.eth_type = eth_type; + entry->key.acl.vlan_id = vlan_id; + entry->key.acl.vlan_id_mask = vlan_id_mask; + entry->key.acl.ip_proto = ip_proto; + entry->key.acl.ip_proto_mask = ip_proto_mask; + entry->key.acl.ip_tos = ip_tos; + entry->key.acl.ip_tos_mask = ip_tos_mask; + entry->key.acl.group_id = group_id; + + return ofdpa_flow_tbl_do(ofdpa_port, trans, flags, entry); +} + +static struct ofdpa_group_tbl_entry * +ofdpa_group_tbl_find(const struct ofdpa *ofdpa, + const struct ofdpa_group_tbl_entry *match) +{ + struct ofdpa_group_tbl_entry *found; + + hash_for_each_possible(ofdpa->group_tbl, found, + entry, match->group_id) { + if (found->group_id == match->group_id) + return found; + } + + return NULL; +} + +static void ofdpa_group_tbl_entry_free(struct switchdev_trans *trans, + struct ofdpa_group_tbl_entry *entry) +{ + switch (ROCKER_GROUP_TYPE_GET(entry->group_id)) { + case ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD: + case ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST: + ofdpa_kfree(trans, entry->group_ids); + break; + default: + break; + } + ofdpa_kfree(trans, entry); +} + +static int ofdpa_group_tbl_add(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, int flags, + struct ofdpa_group_tbl_entry *match) +{ + struct ofdpa *ofdpa = ofdpa_port->ofdpa; + struct ofdpa_group_tbl_entry *found; + unsigned long lock_flags; + + spin_lock_irqsave(&ofdpa->group_tbl_lock, lock_flags); + + found = ofdpa_group_tbl_find(ofdpa, match); + + if (found) { + if (!switchdev_trans_ph_prepare(trans)) + hash_del(&found->entry); + ofdpa_group_tbl_entry_free(trans, found); + found = match; + found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD; + } else { + found = match; + found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD; + } + + if (!switchdev_trans_ph_prepare(trans)) + hash_add(ofdpa->group_tbl, &found->entry, found->group_id); + + spin_unlock_irqrestore(&ofdpa->group_tbl_lock, lock_flags); + + if (!switchdev_trans_ph_prepare(trans)) + return rocker_cmd_exec(ofdpa_port->rocker_port, + ofdpa_flags_nowait(flags), + ofdpa_cmd_group_tbl_add, + found, NULL, NULL); + return 0; +} + +static int ofdpa_group_tbl_del(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, int flags, + struct ofdpa_group_tbl_entry *match) +{ + struct ofdpa *ofdpa = ofdpa_port->ofdpa; + struct ofdpa_group_tbl_entry *found; + unsigned long lock_flags; + int err = 0; + + spin_lock_irqsave(&ofdpa->group_tbl_lock, lock_flags); + + found = ofdpa_group_tbl_find(ofdpa, match); + + if (found) { + if (!switchdev_trans_ph_prepare(trans)) + hash_del(&found->entry); + found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL; + } + + spin_unlock_irqrestore(&ofdpa->group_tbl_lock, lock_flags); + + ofdpa_group_tbl_entry_free(trans, match); + + if (found) { + if (!switchdev_trans_ph_prepare(trans)) + err = rocker_cmd_exec(ofdpa_port->rocker_port, + ofdpa_flags_nowait(flags), + ofdpa_cmd_group_tbl_del, + found, NULL, NULL); + ofdpa_group_tbl_entry_free(trans, found); + } + + return err; +} + +static int ofdpa_group_tbl_do(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, int flags, + struct ofdpa_group_tbl_entry *entry) +{ + if (flags & OFDPA_OP_FLAG_REMOVE) + return ofdpa_group_tbl_del(ofdpa_port, trans, flags, entry); + else + return ofdpa_group_tbl_add(ofdpa_port, trans, flags, entry); +} + +static int ofdpa_group_l2_interface(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, int flags, + __be16 vlan_id, u32 out_pport, + int pop_vlan) +{ + struct ofdpa_group_tbl_entry *entry; + + entry = ofdpa_kzalloc(trans, flags, sizeof(*entry)); + if (!entry) + return -ENOMEM; + + entry->group_id = ROCKER_GROUP_L2_INTERFACE(vlan_id, out_pport); + entry->l2_interface.pop_vlan = pop_vlan; + + return ofdpa_group_tbl_do(ofdpa_port, trans, flags, entry); +} + +static int ofdpa_group_l2_fan_out(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, + int flags, u8 group_count, + const u32 *group_ids, u32 group_id) +{ + struct ofdpa_group_tbl_entry *entry; + + entry = ofdpa_kzalloc(trans, flags, sizeof(*entry)); + if (!entry) + return -ENOMEM; + + entry->group_id = group_id; + entry->group_count = group_count; + + entry->group_ids = ofdpa_kcalloc(trans, flags, + group_count, sizeof(u32)); + if (!entry->group_ids) { + ofdpa_kfree(trans, entry); + return -ENOMEM; + } + memcpy(entry->group_ids, group_ids, group_count * sizeof(u32)); + + return ofdpa_group_tbl_do(ofdpa_port, trans, flags, entry); +} + +static int ofdpa_group_l2_flood(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, int flags, + __be16 vlan_id, u8 group_count, + const u32 *group_ids, u32 group_id) +{ + return ofdpa_group_l2_fan_out(ofdpa_port, trans, flags, + group_count, group_ids, + group_id); +} + +static int ofdpa_group_l3_unicast(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, int flags, + u32 index, const u8 *src_mac, const u8 *dst_mac, + __be16 vlan_id, bool ttl_check, u32 pport) +{ + struct ofdpa_group_tbl_entry *entry; + + entry = ofdpa_kzalloc(trans, flags, sizeof(*entry)); + if (!entry) + return -ENOMEM; + + entry->group_id = ROCKER_GROUP_L3_UNICAST(index); + if (src_mac) + ether_addr_copy(entry->l3_unicast.eth_src, src_mac); + if (dst_mac) + ether_addr_copy(entry->l3_unicast.eth_dst, dst_mac); + entry->l3_unicast.vlan_id = vlan_id; + entry->l3_unicast.ttl_check = ttl_check; + entry->l3_unicast.group_id = ROCKER_GROUP_L2_INTERFACE(vlan_id, pport); + + return ofdpa_group_tbl_do(ofdpa_port, trans, flags, entry); +} + +static struct ofdpa_neigh_tbl_entry * +ofdpa_neigh_tbl_find(const struct ofdpa *ofdpa, __be32 ip_addr) +{ + struct ofdpa_neigh_tbl_entry *found; + + hash_for_each_possible(ofdpa->neigh_tbl, found, + entry, be32_to_cpu(ip_addr)) + if (found->ip_addr == ip_addr) + return found; + + return NULL; +} + +static void ofdpa_neigh_add(struct ofdpa *ofdpa, + struct switchdev_trans *trans, + struct ofdpa_neigh_tbl_entry *entry) +{ + if (!switchdev_trans_ph_commit(trans)) + entry->index = ofdpa->neigh_tbl_next_index++; + if (switchdev_trans_ph_prepare(trans)) + return; + entry->ref_count++; + hash_add(ofdpa->neigh_tbl, &entry->entry, + be32_to_cpu(entry->ip_addr)); +} + +static void ofdpa_neigh_del(struct switchdev_trans *trans, + struct ofdpa_neigh_tbl_entry *entry) +{ + if (switchdev_trans_ph_prepare(trans)) + return; + if (--entry->ref_count == 0) { + hash_del(&entry->entry); + ofdpa_kfree(trans, entry); + } +} + +static void ofdpa_neigh_update(struct ofdpa_neigh_tbl_entry *entry, + struct switchdev_trans *trans, + const u8 *eth_dst, bool ttl_check) +{ + if (eth_dst) { + ether_addr_copy(entry->eth_dst, eth_dst); + entry->ttl_check = ttl_check; + } else if (!switchdev_trans_ph_prepare(trans)) { + entry->ref_count++; + } +} + +static int ofdpa_port_ipv4_neigh(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, + int flags, __be32 ip_addr, const u8 *eth_dst) +{ + struct ofdpa *ofdpa = ofdpa_port->ofdpa; + struct ofdpa_neigh_tbl_entry *entry; + struct ofdpa_neigh_tbl_entry *found; + unsigned long lock_flags; + __be16 eth_type = htons(ETH_P_IP); + enum rocker_of_dpa_table_id goto_tbl = + ROCKER_OF_DPA_TABLE_ID_ACL_POLICY; + u32 group_id; + u32 priority = 0; + bool adding = !(flags & OFDPA_OP_FLAG_REMOVE); + bool updating; + bool removing; + int err = 0; + + entry = ofdpa_kzalloc(trans, flags, sizeof(*entry)); + if (!entry) + return -ENOMEM; + + spin_lock_irqsave(&ofdpa->neigh_tbl_lock, lock_flags); + + found = ofdpa_neigh_tbl_find(ofdpa, ip_addr); + + updating = found && adding; + removing = found && !adding; + adding = !found && adding; + + if (adding) { + entry->ip_addr = ip_addr; + entry->dev = ofdpa_port->dev; + ether_addr_copy(entry->eth_dst, eth_dst); + entry->ttl_check = true; + ofdpa_neigh_add(ofdpa, trans, entry); + } else if (removing) { + memcpy(entry, found, sizeof(*entry)); + ofdpa_neigh_del(trans, found); + } else if (updating) { + ofdpa_neigh_update(found, trans, eth_dst, true); + memcpy(entry, found, sizeof(*entry)); + } else { + err = -ENOENT; + } + + spin_unlock_irqrestore(&ofdpa->neigh_tbl_lock, lock_flags); + + if (err) + goto err_out; + + /* For each active neighbor, we have an L3 unicast group and + * a /32 route to the neighbor, which uses the L3 unicast + * group. The L3 unicast group can also be referred to by + * other routes' nexthops. + */ + + err = ofdpa_group_l3_unicast(ofdpa_port, trans, flags, + entry->index, + ofdpa_port->dev->dev_addr, + entry->eth_dst, + ofdpa_port->internal_vlan_id, + entry->ttl_check, + ofdpa_port->pport); + if (err) { + netdev_err(ofdpa_port->dev, "Error (%d) L3 unicast group index %d\n", + err, entry->index); + goto err_out; + } + + if (adding || removing) { + group_id = ROCKER_GROUP_L3_UNICAST(entry->index); + err = ofdpa_flow_tbl_ucast4_routing(ofdpa_port, trans, + eth_type, ip_addr, + inet_make_mask(32), + priority, goto_tbl, + group_id, flags); + + if (err) + netdev_err(ofdpa_port->dev, "Error (%d) /32 unicast route %pI4 group 0x%08x\n", + err, &entry->ip_addr, group_id); + } + +err_out: + if (!adding) + ofdpa_kfree(trans, entry); + + return err; +} + +static int ofdpa_port_ipv4_resolve(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, + __be32 ip_addr) +{ + struct net_device *dev = ofdpa_port->dev; + struct neighbour *n = __ipv4_neigh_lookup(dev, (__force u32)ip_addr); + int err = 0; + + if (!n) { + n = neigh_create(&arp_tbl, &ip_addr, dev); + if (IS_ERR(n)) + return IS_ERR(n); + } + + /* If the neigh is already resolved, then go ahead and + * install the entry, otherwise start the ARP process to + * resolve the neigh. + */ + + if (n->nud_state & NUD_VALID) + err = ofdpa_port_ipv4_neigh(ofdpa_port, trans, 0, + ip_addr, n->ha); + else + neigh_event_send(n, NULL); + + neigh_release(n); + return err; +} + +static int ofdpa_port_ipv4_nh(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, int flags, + __be32 ip_addr, u32 *index) +{ + struct ofdpa *ofdpa = ofdpa_port->ofdpa; + struct ofdpa_neigh_tbl_entry *entry; + struct ofdpa_neigh_tbl_entry *found; + unsigned long lock_flags; + bool adding = !(flags & OFDPA_OP_FLAG_REMOVE); + bool updating; + bool removing; + bool resolved = true; + int err = 0; + + entry = ofdpa_kzalloc(trans, flags, sizeof(*entry)); + if (!entry) + return -ENOMEM; + + spin_lock_irqsave(&ofdpa->neigh_tbl_lock, lock_flags); + + found = ofdpa_neigh_tbl_find(ofdpa, ip_addr); + if (found) + *index = found->index; + + updating = found && adding; + removing = found && !adding; + adding = !found && adding; + + if (adding) { + entry->ip_addr = ip_addr; + entry->dev = ofdpa_port->dev; + ofdpa_neigh_add(ofdpa, trans, entry); + *index = entry->index; + resolved = false; + } else if (removing) { + ofdpa_neigh_del(trans, found); + } else if (updating) { + ofdpa_neigh_update(found, trans, NULL, false); + resolved = !is_zero_ether_addr(found->eth_dst); + } else { + err = -ENOENT; + } + + spin_unlock_irqrestore(&ofdpa->neigh_tbl_lock, lock_flags); + + if (!adding) + ofdpa_kfree(trans, entry); + + if (err) + return err; + + /* Resolved means neigh ip_addr is resolved to neigh mac. */ + + if (!resolved) + err = ofdpa_port_ipv4_resolve(ofdpa_port, trans, ip_addr); + + return err; +} + +static struct ofdpa_port *ofdpa_port_get(const struct ofdpa *ofdpa, + int port_index) +{ + struct rocker_port *rocker_port; + + rocker_port = ofdpa->rocker->ports[port_index]; + return rocker_port ? rocker_port->wpriv : NULL; +} + +static int ofdpa_port_vlan_flood_group(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, + int flags, __be16 vlan_id) +{ + struct ofdpa_port *p; + const struct ofdpa *ofdpa = ofdpa_port->ofdpa; + unsigned int port_count = ofdpa->rocker->port_count; + u32 group_id = ROCKER_GROUP_L2_FLOOD(vlan_id, 0); + u32 *group_ids; + u8 group_count = 0; + int err = 0; + int i; + + group_ids = ofdpa_kcalloc(trans, flags, port_count, sizeof(u32)); + if (!group_ids) + return -ENOMEM; + + /* Adjust the flood group for this VLAN. The flood group + * references an L2 interface group for each port in this + * VLAN. + */ + + for (i = 0; i < port_count; i++) { + p = ofdpa_port_get(ofdpa, i); + if (!p) + continue; + if (!ofdpa_port_is_bridged(p)) + continue; + if (test_bit(ntohs(vlan_id), p->vlan_bitmap)) { + group_ids[group_count++] = + ROCKER_GROUP_L2_INTERFACE(vlan_id, p->pport); + } + } + + /* If there are no bridged ports in this VLAN, we're done */ + if (group_count == 0) + goto no_ports_in_vlan; + + err = ofdpa_group_l2_flood(ofdpa_port, trans, flags, vlan_id, + group_count, group_ids, group_id); + if (err) + netdev_err(ofdpa_port->dev, "Error (%d) port VLAN l2 flood group\n", err); + +no_ports_in_vlan: + ofdpa_kfree(trans, group_ids); + return err; +} + +static int ofdpa_port_vlan_l2_groups(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, int flags, + __be16 vlan_id, bool pop_vlan) +{ + const struct ofdpa *ofdpa = ofdpa_port->ofdpa; + unsigned int port_count = ofdpa->rocker->port_count; + struct ofdpa_port *p; + bool adding = !(flags & OFDPA_OP_FLAG_REMOVE); + u32 out_pport; + int ref = 0; + int err; + int i; + + /* An L2 interface group for this port in this VLAN, but + * only when port STP state is LEARNING|FORWARDING. + */ + + if (ofdpa_port->stp_state == BR_STATE_LEARNING || + ofdpa_port->stp_state == BR_STATE_FORWARDING) { + out_pport = ofdpa_port->pport; + err = ofdpa_group_l2_interface(ofdpa_port, trans, flags, + vlan_id, out_pport, pop_vlan); + if (err) { + netdev_err(ofdpa_port->dev, "Error (%d) port VLAN l2 group for pport %d\n", + err, out_pport); + return err; + } + } + + /* An L2 interface group for this VLAN to CPU port. + * Add when first port joins this VLAN and destroy when + * last port leaves this VLAN. + */ + + for (i = 0; i < port_count; i++) { + p = ofdpa_port_get(ofdpa, i); + if (p && test_bit(ntohs(vlan_id), p->vlan_bitmap)) + ref++; + } + + if ((!adding || ref != 1) && (adding || ref != 0)) + return 0; + + out_pport = 0; + err = ofdpa_group_l2_interface(ofdpa_port, trans, flags, + vlan_id, out_pport, pop_vlan); + if (err) { + netdev_err(ofdpa_port->dev, "Error (%d) port VLAN l2 group for CPU port\n", err); + return err; + } + + return 0; +} + +static struct ofdpa_ctrl { + const u8 *eth_dst; + const u8 *eth_dst_mask; + __be16 eth_type; + bool acl; + bool bridge; + bool term; + bool copy_to_cpu; +} ofdpa_ctrls[] = { + [OFDPA_CTRL_LINK_LOCAL_MCAST] = { + /* pass link local multicast pkts up to CPU for filtering */ + .eth_dst = ll_mac, + .eth_dst_mask = ll_mask, + .acl = true, + }, + [OFDPA_CTRL_LOCAL_ARP] = { + /* pass local ARP pkts up to CPU */ + .eth_dst = zero_mac, + .eth_dst_mask = zero_mac, + .eth_type = htons(ETH_P_ARP), + .acl = true, + }, + [OFDPA_CTRL_IPV4_MCAST] = { + /* pass IPv4 mcast pkts up to CPU, RFC 1112 */ + .eth_dst = ipv4_mcast, + .eth_dst_mask = ipv4_mask, + .eth_type = htons(ETH_P_IP), + .term = true, + .copy_to_cpu = true, + }, + [OFDPA_CTRL_IPV6_MCAST] = { + /* pass IPv6 mcast pkts up to CPU, RFC 2464 */ + .eth_dst = ipv6_mcast, + .eth_dst_mask = ipv6_mask, + .eth_type = htons(ETH_P_IPV6), + .term = true, + .copy_to_cpu = true, + }, + [OFDPA_CTRL_DFLT_BRIDGING] = { + /* flood any pkts on vlan */ + .bridge = true, + .copy_to_cpu = true, + }, + [OFDPA_CTRL_DFLT_OVS] = { + /* pass all pkts up to CPU */ + .eth_dst = zero_mac, + .eth_dst_mask = zero_mac, + .acl = true, + }, +}; + +static int ofdpa_port_ctrl_vlan_acl(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, int flags, + const struct ofdpa_ctrl *ctrl, __be16 vlan_id) +{ + u32 in_pport = ofdpa_port->pport; + u32 in_pport_mask = 0xffffffff; + u32 out_pport = 0; + const u8 *eth_src = NULL; + const u8 *eth_src_mask = NULL; + __be16 vlan_id_mask = htons(0xffff); + u8 ip_proto = 0; + u8 ip_proto_mask = 0; + u8 ip_tos = 0; + u8 ip_tos_mask = 0; + u32 group_id = ROCKER_GROUP_L2_INTERFACE(vlan_id, out_pport); + int err; + + err = ofdpa_flow_tbl_acl(ofdpa_port, trans, flags, + in_pport, in_pport_mask, + eth_src, eth_src_mask, + ctrl->eth_dst, ctrl->eth_dst_mask, + ctrl->eth_type, + vlan_id, vlan_id_mask, + ip_proto, ip_proto_mask, + ip_tos, ip_tos_mask, + group_id); + + if (err) + netdev_err(ofdpa_port->dev, "Error (%d) ctrl ACL\n", err); + + return err; +} + +static int ofdpa_port_ctrl_vlan_bridge(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, + int flags, + const struct ofdpa_ctrl *ctrl, + __be16 vlan_id) +{ + enum rocker_of_dpa_table_id goto_tbl = + ROCKER_OF_DPA_TABLE_ID_ACL_POLICY; + u32 group_id = ROCKER_GROUP_L2_FLOOD(vlan_id, 0); + u32 tunnel_id = 0; + int err; + + if (!ofdpa_port_is_bridged(ofdpa_port)) + return 0; + + err = ofdpa_flow_tbl_bridge(ofdpa_port, trans, flags, + ctrl->eth_dst, ctrl->eth_dst_mask, + vlan_id, tunnel_id, + goto_tbl, group_id, ctrl->copy_to_cpu); + + if (err) + netdev_err(ofdpa_port->dev, "Error (%d) ctrl FLOOD\n", err); + + return err; +} + +static int ofdpa_port_ctrl_vlan_term(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, int flags, + const struct ofdpa_ctrl *ctrl, __be16 vlan_id) +{ + u32 in_pport_mask = 0xffffffff; + __be16 vlan_id_mask = htons(0xffff); + int err; + + if (ntohs(vlan_id) == 0) + vlan_id = ofdpa_port->internal_vlan_id; + + err = ofdpa_flow_tbl_term_mac(ofdpa_port, trans, + ofdpa_port->pport, in_pport_mask, + ctrl->eth_type, ctrl->eth_dst, + ctrl->eth_dst_mask, vlan_id, + vlan_id_mask, ctrl->copy_to_cpu, + flags); + + if (err) + netdev_err(ofdpa_port->dev, "Error (%d) ctrl term\n", err); + + return err; +} + +static int ofdpa_port_ctrl_vlan(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, int flags, + const struct ofdpa_ctrl *ctrl, __be16 vlan_id) +{ + if (ctrl->acl) + return ofdpa_port_ctrl_vlan_acl(ofdpa_port, trans, flags, + ctrl, vlan_id); + if (ctrl->bridge) + return ofdpa_port_ctrl_vlan_bridge(ofdpa_port, trans, flags, + ctrl, vlan_id); + + if (ctrl->term) + return ofdpa_port_ctrl_vlan_term(ofdpa_port, trans, flags, + ctrl, vlan_id); + + return -EOPNOTSUPP; +} + +static int ofdpa_port_ctrl_vlan_add(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, int flags, + __be16 vlan_id) +{ + int err = 0; + int i; + + for (i = 0; i < OFDPA_CTRL_MAX; i++) { + if (ofdpa_port->ctrls[i]) { + err = ofdpa_port_ctrl_vlan(ofdpa_port, trans, flags, + &ofdpa_ctrls[i], vlan_id); + if (err) + return err; + } + } + + return err; +} + +static int ofdpa_port_ctrl(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, int flags, + const struct ofdpa_ctrl *ctrl) +{ + u16 vid; + int err = 0; + + for (vid = 1; vid < VLAN_N_VID; vid++) { + if (!test_bit(vid, ofdpa_port->vlan_bitmap)) + continue; + err = ofdpa_port_ctrl_vlan(ofdpa_port, trans, flags, + ctrl, htons(vid)); + if (err) + break; + } + + return err; +} + +static int ofdpa_port_vlan(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, int flags, u16 vid) +{ + enum rocker_of_dpa_table_id goto_tbl = + ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC; + u32 in_pport = ofdpa_port->pport; + __be16 vlan_id = htons(vid); + __be16 vlan_id_mask = htons(0xffff); + __be16 internal_vlan_id; + bool untagged; + bool adding = !(flags & OFDPA_OP_FLAG_REMOVE); + int err; + + internal_vlan_id = ofdpa_port_vid_to_vlan(ofdpa_port, vid, &untagged); + + if (adding && + test_bit(ntohs(internal_vlan_id), ofdpa_port->vlan_bitmap)) + return 0; /* already added */ + else if (!adding && + !test_bit(ntohs(internal_vlan_id), ofdpa_port->vlan_bitmap)) + return 0; /* already removed */ + + change_bit(ntohs(internal_vlan_id), ofdpa_port->vlan_bitmap); + + if (adding) { + err = ofdpa_port_ctrl_vlan_add(ofdpa_port, trans, flags, + internal_vlan_id); + if (err) { + netdev_err(ofdpa_port->dev, "Error (%d) port ctrl vlan add\n", err); + goto err_out; + } + } + + err = ofdpa_port_vlan_l2_groups(ofdpa_port, trans, flags, + internal_vlan_id, untagged); + if (err) { + netdev_err(ofdpa_port->dev, "Error (%d) port VLAN l2 groups\n", err); + goto err_out; + } + + err = ofdpa_port_vlan_flood_group(ofdpa_port, trans, flags, + internal_vlan_id); + if (err) { + netdev_err(ofdpa_port->dev, "Error (%d) port VLAN l2 flood group\n", err); + goto err_out; + } + + err = ofdpa_flow_tbl_vlan(ofdpa_port, trans, flags, + in_pport, vlan_id, vlan_id_mask, + goto_tbl, untagged, internal_vlan_id); + if (err) + netdev_err(ofdpa_port->dev, "Error (%d) port VLAN table\n", err); + +err_out: + if (switchdev_trans_ph_prepare(trans)) + change_bit(ntohs(internal_vlan_id), ofdpa_port->vlan_bitmap); + + return err; +} + +static int ofdpa_port_ig_tbl(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, int flags) +{ + enum rocker_of_dpa_table_id goto_tbl; + u32 in_pport; + u32 in_pport_mask; + int err; + + /* Normal Ethernet Frames. Matches pkts from any local physical + * ports. Goto VLAN tbl. + */ + + in_pport = 0; + in_pport_mask = 0xffff0000; + goto_tbl = ROCKER_OF_DPA_TABLE_ID_VLAN; + + err = ofdpa_flow_tbl_ig_port(ofdpa_port, trans, flags, + in_pport, in_pport_mask, + goto_tbl); + if (err) + netdev_err(ofdpa_port->dev, "Error (%d) ingress port table entry\n", err); + + return err; +} + +struct ofdpa_fdb_learn_work { + struct work_struct work; + struct ofdpa_port *ofdpa_port; + struct switchdev_trans *trans; + int flags; + u8 addr[ETH_ALEN]; + u16 vid; +}; + +static void ofdpa_port_fdb_learn_work(struct work_struct *work) +{ + const struct ofdpa_fdb_learn_work *lw = + container_of(work, struct ofdpa_fdb_learn_work, work); + bool removing = (lw->flags & OFDPA_OP_FLAG_REMOVE); + bool learned = (lw->flags & OFDPA_OP_FLAG_LEARNED); + struct switchdev_notifier_fdb_info info; + + info.addr = lw->addr; + info.vid = lw->vid; + + rtnl_lock(); + if (learned && removing) + call_switchdev_notifiers(SWITCHDEV_FDB_DEL, + lw->ofdpa_port->dev, &info.info); + else if (learned && !removing) + call_switchdev_notifiers(SWITCHDEV_FDB_ADD, + lw->ofdpa_port->dev, &info.info); + rtnl_unlock(); + + ofdpa_kfree(lw->trans, work); +} + +static int ofdpa_port_fdb_learn(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, int flags, + const u8 *addr, __be16 vlan_id) +{ + struct ofdpa_fdb_learn_work *lw; + enum rocker_of_dpa_table_id goto_tbl = + ROCKER_OF_DPA_TABLE_ID_ACL_POLICY; + u32 out_pport = ofdpa_port->pport; + u32 tunnel_id = 0; + u32 group_id = ROCKER_GROUP_NONE; + bool syncing = !!(ofdpa_port->brport_flags & BR_LEARNING_SYNC); + bool copy_to_cpu = false; + int err; + + if (ofdpa_port_is_bridged(ofdpa_port)) + group_id = ROCKER_GROUP_L2_INTERFACE(vlan_id, out_pport); + + if (!(flags & OFDPA_OP_FLAG_REFRESH)) { + err = ofdpa_flow_tbl_bridge(ofdpa_port, trans, flags, addr, + NULL, vlan_id, tunnel_id, goto_tbl, + group_id, copy_to_cpu); + if (err) + return err; + } + + if (!syncing) + return 0; + + if (!ofdpa_port_is_bridged(ofdpa_port)) + return 0; + + lw = ofdpa_kzalloc(trans, flags, sizeof(*lw)); + if (!lw) + return -ENOMEM; + + INIT_WORK(&lw->work, ofdpa_port_fdb_learn_work); + + lw->ofdpa_port = ofdpa_port; + lw->trans = trans; + lw->flags = flags; + ether_addr_copy(lw->addr, addr); + lw->vid = ofdpa_port_vlan_to_vid(ofdpa_port, vlan_id); + + if (switchdev_trans_ph_prepare(trans)) + ofdpa_kfree(trans, lw); + else + schedule_work(&lw->work); + + return 0; +} + +static struct ofdpa_fdb_tbl_entry * +ofdpa_fdb_tbl_find(const struct ofdpa *ofdpa, + const struct ofdpa_fdb_tbl_entry *match) +{ + struct ofdpa_fdb_tbl_entry *found; + + hash_for_each_possible(ofdpa->fdb_tbl, found, entry, match->key_crc32) + if (memcmp(&found->key, &match->key, sizeof(found->key)) == 0) + return found; + + return NULL; +} + +static int ofdpa_port_fdb(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, + const unsigned char *addr, + __be16 vlan_id, int flags) +{ + struct ofdpa *ofdpa = ofdpa_port->ofdpa; + struct ofdpa_fdb_tbl_entry *fdb; + struct ofdpa_fdb_tbl_entry *found; + bool removing = (flags & OFDPA_OP_FLAG_REMOVE); + unsigned long lock_flags; + + fdb = ofdpa_kzalloc(trans, flags, sizeof(*fdb)); + if (!fdb) + return -ENOMEM; + + fdb->learned = (flags & OFDPA_OP_FLAG_LEARNED); + fdb->touched = jiffies; + fdb->key.ofdpa_port = ofdpa_port; + ether_addr_copy(fdb->key.addr, addr); + fdb->key.vlan_id = vlan_id; + fdb->key_crc32 = crc32(~0, &fdb->key, sizeof(fdb->key)); + + spin_lock_irqsave(&ofdpa->fdb_tbl_lock, lock_flags); + + found = ofdpa_fdb_tbl_find(ofdpa, fdb); + + if (found) { + found->touched = jiffies; + if (removing) { + ofdpa_kfree(trans, fdb); + if (!switchdev_trans_ph_prepare(trans)) + hash_del(&found->entry); + } + } else if (!removing) { + if (!switchdev_trans_ph_prepare(trans)) + hash_add(ofdpa->fdb_tbl, &fdb->entry, + fdb->key_crc32); + } + + spin_unlock_irqrestore(&ofdpa->fdb_tbl_lock, lock_flags); + + /* Check if adding and already exists, or removing and can't find */ + if (!found != !removing) { + ofdpa_kfree(trans, fdb); + if (!found && removing) + return 0; + /* Refreshing existing to update aging timers */ + flags |= OFDPA_OP_FLAG_REFRESH; + } + + return ofdpa_port_fdb_learn(ofdpa_port, trans, flags, addr, vlan_id); +} + +static int ofdpa_port_fdb_flush(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, int flags) +{ + struct ofdpa *ofdpa = ofdpa_port->ofdpa; + struct ofdpa_fdb_tbl_entry *found; + unsigned long lock_flags; + struct hlist_node *tmp; + int bkt; + int err = 0; + + if (ofdpa_port->stp_state == BR_STATE_LEARNING || + ofdpa_port->stp_state == BR_STATE_FORWARDING) + return 0; + + flags |= OFDPA_OP_FLAG_NOWAIT | OFDPA_OP_FLAG_REMOVE; + + spin_lock_irqsave(&ofdpa->fdb_tbl_lock, lock_flags); + + hash_for_each_safe(ofdpa->fdb_tbl, bkt, tmp, found, entry) { + if (found->key.ofdpa_port != ofdpa_port) + continue; + if (!found->learned) + continue; + err = ofdpa_port_fdb_learn(ofdpa_port, trans, flags, + found->key.addr, + found->key.vlan_id); + if (err) + goto err_out; + if (!switchdev_trans_ph_prepare(trans)) + hash_del(&found->entry); + } + +err_out: + spin_unlock_irqrestore(&ofdpa->fdb_tbl_lock, lock_flags); + + return err; +} + +static void ofdpa_fdb_cleanup(unsigned long data) +{ + struct ofdpa *ofdpa = (struct ofdpa *)data; + struct ofdpa_port *ofdpa_port; + struct ofdpa_fdb_tbl_entry *entry; + struct hlist_node *tmp; + unsigned long next_timer = jiffies + BR_MIN_AGEING_TIME; + unsigned long expires; + unsigned long lock_flags; + int flags = OFDPA_OP_FLAG_NOWAIT | OFDPA_OP_FLAG_REMOVE | + OFDPA_OP_FLAG_LEARNED; + int bkt; + + spin_lock_irqsave(&ofdpa->fdb_tbl_lock, lock_flags); + + hash_for_each_safe(ofdpa->fdb_tbl, bkt, tmp, entry, entry) { + if (!entry->learned) + continue; + ofdpa_port = entry->key.ofdpa_port; + expires = entry->touched + ofdpa_port->ageing_time; + if (time_before_eq(expires, jiffies)) { + ofdpa_port_fdb_learn(ofdpa_port, NULL, + flags, entry->key.addr, + entry->key.vlan_id); + hash_del(&entry->entry); + } else if (time_before(expires, next_timer)) { + next_timer = expires; + } + } + + spin_unlock_irqrestore(&ofdpa->fdb_tbl_lock, lock_flags); + + mod_timer(&ofdpa->fdb_cleanup_timer, round_jiffies_up(next_timer)); +} + +static int ofdpa_port_router_mac(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, int flags, + __be16 vlan_id) +{ + u32 in_pport_mask = 0xffffffff; + __be16 eth_type; + const u8 *dst_mac_mask = ff_mac; + __be16 vlan_id_mask = htons(0xffff); + bool copy_to_cpu = false; + int err; + + if (ntohs(vlan_id) == 0) + vlan_id = ofdpa_port->internal_vlan_id; + + eth_type = htons(ETH_P_IP); + err = ofdpa_flow_tbl_term_mac(ofdpa_port, trans, + ofdpa_port->pport, in_pport_mask, + eth_type, ofdpa_port->dev->dev_addr, + dst_mac_mask, vlan_id, vlan_id_mask, + copy_to_cpu, flags); + if (err) + return err; + + eth_type = htons(ETH_P_IPV6); + err = ofdpa_flow_tbl_term_mac(ofdpa_port, trans, + ofdpa_port->pport, in_pport_mask, + eth_type, ofdpa_port->dev->dev_addr, + dst_mac_mask, vlan_id, vlan_id_mask, + copy_to_cpu, flags); + + return err; +} + +static int ofdpa_port_fwding(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, int flags) +{ + bool pop_vlan; + u32 out_pport; + __be16 vlan_id; + u16 vid; + int err; + + /* Port will be forwarding-enabled if its STP state is LEARNING + * or FORWARDING. Traffic from CPU can still egress, regardless of + * port STP state. Use L2 interface group on port VLANs as a way + * to toggle port forwarding: if forwarding is disabled, L2 + * interface group will not exist. + */ + + if (ofdpa_port->stp_state != BR_STATE_LEARNING && + ofdpa_port->stp_state != BR_STATE_FORWARDING) + flags |= OFDPA_OP_FLAG_REMOVE; + + out_pport = ofdpa_port->pport; + for (vid = 1; vid < VLAN_N_VID; vid++) { + if (!test_bit(vid, ofdpa_port->vlan_bitmap)) + continue; + vlan_id = htons(vid); + pop_vlan = ofdpa_vlan_id_is_internal(vlan_id); + err = ofdpa_group_l2_interface(ofdpa_port, trans, flags, + vlan_id, out_pport, pop_vlan); + if (err) { + netdev_err(ofdpa_port->dev, "Error (%d) port VLAN l2 group for pport %d\n", + err, out_pport); + return err; + } + } + + return 0; +} + +static int ofdpa_port_stp_update(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, + int flags, u8 state) +{ + bool want[OFDPA_CTRL_MAX] = { 0, }; + bool prev_ctrls[OFDPA_CTRL_MAX]; + u8 uninitialized_var(prev_state); + int err; + int i; + + if (switchdev_trans_ph_prepare(trans)) { + memcpy(prev_ctrls, ofdpa_port->ctrls, sizeof(prev_ctrls)); + prev_state = ofdpa_port->stp_state; + } + + if (ofdpa_port->stp_state == state) + return 0; + + ofdpa_port->stp_state = state; + + switch (state) { + case BR_STATE_DISABLED: + /* port is completely disabled */ + break; + case BR_STATE_LISTENING: + case BR_STATE_BLOCKING: + want[OFDPA_CTRL_LINK_LOCAL_MCAST] = true; + break; + case BR_STATE_LEARNING: + case BR_STATE_FORWARDING: + if (!ofdpa_port_is_ovsed(ofdpa_port)) + want[OFDPA_CTRL_LINK_LOCAL_MCAST] = true; + want[OFDPA_CTRL_IPV4_MCAST] = true; + want[OFDPA_CTRL_IPV6_MCAST] = true; + if (ofdpa_port_is_bridged(ofdpa_port)) + want[OFDPA_CTRL_DFLT_BRIDGING] = true; + else if (ofdpa_port_is_ovsed(ofdpa_port)) + want[OFDPA_CTRL_DFLT_OVS] = true; + else + want[OFDPA_CTRL_LOCAL_ARP] = true; + break; + } + + for (i = 0; i < OFDPA_CTRL_MAX; i++) { + if (want[i] != ofdpa_port->ctrls[i]) { + int ctrl_flags = flags | + (want[i] ? 0 : OFDPA_OP_FLAG_REMOVE); + err = ofdpa_port_ctrl(ofdpa_port, trans, ctrl_flags, + &ofdpa_ctrls[i]); + if (err) + goto err_out; + ofdpa_port->ctrls[i] = want[i]; + } + } + + err = ofdpa_port_fdb_flush(ofdpa_port, trans, flags); + if (err) + goto err_out; + + err = ofdpa_port_fwding(ofdpa_port, trans, flags); + +err_out: + if (switchdev_trans_ph_prepare(trans)) { + memcpy(ofdpa_port->ctrls, prev_ctrls, sizeof(prev_ctrls)); + ofdpa_port->stp_state = prev_state; + } + + return err; +} + +static int ofdpa_port_fwd_enable(struct ofdpa_port *ofdpa_port, int flags) +{ + if (ofdpa_port_is_bridged(ofdpa_port)) + /* bridge STP will enable port */ + return 0; + + /* port is not bridged, so simulate going to FORWARDING state */ + return ofdpa_port_stp_update(ofdpa_port, NULL, flags, + BR_STATE_FORWARDING); +} + +static int ofdpa_port_fwd_disable(struct ofdpa_port *ofdpa_port, int flags) +{ + if (ofdpa_port_is_bridged(ofdpa_port)) + /* bridge STP will disable port */ + return 0; + + /* port is not bridged, so simulate going to DISABLED state */ + return ofdpa_port_stp_update(ofdpa_port, NULL, flags, + BR_STATE_DISABLED); +} + +static int ofdpa_port_vlan_add(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, + u16 vid, u16 flags) +{ + int err; + + /* XXX deal with flags for PVID and untagged */ + + err = ofdpa_port_vlan(ofdpa_port, trans, 0, vid); + if (err) + return err; + + err = ofdpa_port_router_mac(ofdpa_port, trans, 0, htons(vid)); + if (err) + ofdpa_port_vlan(ofdpa_port, trans, + OFDPA_OP_FLAG_REMOVE, vid); + + return err; +} + +static int ofdpa_port_vlan_del(struct ofdpa_port *ofdpa_port, + u16 vid, u16 flags) +{ + int err; + + err = ofdpa_port_router_mac(ofdpa_port, NULL, + OFDPA_OP_FLAG_REMOVE, htons(vid)); + if (err) + return err; + + return ofdpa_port_vlan(ofdpa_port, NULL, + OFDPA_OP_FLAG_REMOVE, vid); +} + +static struct ofdpa_internal_vlan_tbl_entry * +ofdpa_internal_vlan_tbl_find(const struct ofdpa *ofdpa, int ifindex) +{ + struct ofdpa_internal_vlan_tbl_entry *found; + + hash_for_each_possible(ofdpa->internal_vlan_tbl, found, + entry, ifindex) { + if (found->ifindex == ifindex) + return found; + } + + return NULL; +} + +static __be16 ofdpa_port_internal_vlan_id_get(struct ofdpa_port *ofdpa_port, + int ifindex) +{ + struct ofdpa *ofdpa = ofdpa_port->ofdpa; + struct ofdpa_internal_vlan_tbl_entry *entry; + struct ofdpa_internal_vlan_tbl_entry *found; + unsigned long lock_flags; + int i; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return 0; + + entry->ifindex = ifindex; + + spin_lock_irqsave(&ofdpa->internal_vlan_tbl_lock, lock_flags); + + found = ofdpa_internal_vlan_tbl_find(ofdpa, ifindex); + if (found) { + kfree(entry); + goto found; + } + + found = entry; + hash_add(ofdpa->internal_vlan_tbl, &found->entry, found->ifindex); + + for (i = 0; i < OFDPA_N_INTERNAL_VLANS; i++) { + if (test_and_set_bit(i, ofdpa->internal_vlan_bitmap)) + continue; + found->vlan_id = htons(OFDPA_INTERNAL_VLAN_ID_BASE + i); + goto found; + } + + netdev_err(ofdpa_port->dev, "Out of internal VLAN IDs\n"); + +found: + found->ref_count++; + spin_unlock_irqrestore(&ofdpa->internal_vlan_tbl_lock, lock_flags); + + return found->vlan_id; +} + +static int ofdpa_port_fib_ipv4(struct ofdpa_port *ofdpa_port, + struct switchdev_trans *trans, __be32 dst, + int dst_len, const struct fib_info *fi, + u32 tb_id, int flags) +{ + const struct fib_nh *nh; + __be16 eth_type = htons(ETH_P_IP); + __be32 dst_mask = inet_make_mask(dst_len); + __be16 internal_vlan_id = ofdpa_port->internal_vlan_id; + u32 priority = fi->fib_priority; + enum rocker_of_dpa_table_id goto_tbl = + ROCKER_OF_DPA_TABLE_ID_ACL_POLICY; + u32 group_id; + bool nh_on_port; + bool has_gw; + u32 index; + int err; + + /* XXX support ECMP */ + + nh = fi->fib_nh; + nh_on_port = (fi->fib_dev == ofdpa_port->dev); + has_gw = !!nh->nh_gw; + + if (has_gw && nh_on_port) { + err = ofdpa_port_ipv4_nh(ofdpa_port, trans, flags, + nh->nh_gw, &index); + if (err) + return err; + + group_id = ROCKER_GROUP_L3_UNICAST(index); + } else { + /* Send to CPU for processing */ + group_id = ROCKER_GROUP_L2_INTERFACE(internal_vlan_id, 0); + } + + err = ofdpa_flow_tbl_ucast4_routing(ofdpa_port, trans, eth_type, dst, + dst_mask, priority, goto_tbl, + group_id, flags); + if (err) + netdev_err(ofdpa_port->dev, "Error (%d) IPv4 route %pI4\n", + err, &dst); + + return err; +} + +static void +ofdpa_port_internal_vlan_id_put(const struct ofdpa_port *ofdpa_port, + int ifindex) +{ + struct ofdpa *ofdpa = ofdpa_port->ofdpa; + struct ofdpa_internal_vlan_tbl_entry *found; + unsigned long lock_flags; + unsigned long bit; + + spin_lock_irqsave(&ofdpa->internal_vlan_tbl_lock, lock_flags); + + found = ofdpa_internal_vlan_tbl_find(ofdpa, ifindex); + if (!found) { + netdev_err(ofdpa_port->dev, + "ifindex (%d) not found in internal VLAN tbl\n", + ifindex); + goto not_found; + } + + if (--found->ref_count <= 0) { + bit = ntohs(found->vlan_id) - OFDPA_INTERNAL_VLAN_ID_BASE; + clear_bit(bit, ofdpa->internal_vlan_bitmap); + hash_del(&found->entry); + kfree(found); + } + +not_found: + spin_unlock_irqrestore(&ofdpa->internal_vlan_tbl_lock, lock_flags); +} + +/********************************** + * Rocker world ops implementation + **********************************/ + +static int ofdpa_init(struct rocker *rocker) +{ + struct ofdpa *ofdpa = rocker->wpriv; + + ofdpa->rocker = rocker; + + hash_init(ofdpa->flow_tbl); + spin_lock_init(&ofdpa->flow_tbl_lock); + + hash_init(ofdpa->group_tbl); + spin_lock_init(&ofdpa->group_tbl_lock); + + hash_init(ofdpa->fdb_tbl); + spin_lock_init(&ofdpa->fdb_tbl_lock); + + hash_init(ofdpa->internal_vlan_tbl); + spin_lock_init(&ofdpa->internal_vlan_tbl_lock); + + hash_init(ofdpa->neigh_tbl); + spin_lock_init(&ofdpa->neigh_tbl_lock); + + setup_timer(&ofdpa->fdb_cleanup_timer, ofdpa_fdb_cleanup, + (unsigned long) ofdpa); + mod_timer(&ofdpa->fdb_cleanup_timer, jiffies); + + return 0; +} + +static void ofdpa_fini(struct rocker *rocker) +{ + struct ofdpa *ofdpa = rocker->wpriv; + + unsigned long flags; + struct ofdpa_flow_tbl_entry *flow_entry; + struct ofdpa_group_tbl_entry *group_entry; + struct ofdpa_fdb_tbl_entry *fdb_entry; + struct ofdpa_internal_vlan_tbl_entry *internal_vlan_entry; + struct ofdpa_neigh_tbl_entry *neigh_entry; + struct hlist_node *tmp; + int bkt; + + del_timer_sync(&ofdpa->fdb_cleanup_timer); + + spin_lock_irqsave(&ofdpa->flow_tbl_lock, flags); + hash_for_each_safe(ofdpa->flow_tbl, bkt, tmp, flow_entry, entry) + hash_del(&flow_entry->entry); + spin_unlock_irqrestore(&ofdpa->flow_tbl_lock, flags); + + spin_lock_irqsave(&ofdpa->group_tbl_lock, flags); + hash_for_each_safe(ofdpa->group_tbl, bkt, tmp, group_entry, entry) + hash_del(&group_entry->entry); + spin_unlock_irqrestore(&ofdpa->group_tbl_lock, flags); + + spin_lock_irqsave(&ofdpa->fdb_tbl_lock, flags); + hash_for_each_safe(ofdpa->fdb_tbl, bkt, tmp, fdb_entry, entry) + hash_del(&fdb_entry->entry); + spin_unlock_irqrestore(&ofdpa->fdb_tbl_lock, flags); + + spin_lock_irqsave(&ofdpa->internal_vlan_tbl_lock, flags); + hash_for_each_safe(ofdpa->internal_vlan_tbl, bkt, + tmp, internal_vlan_entry, entry) + hash_del(&internal_vlan_entry->entry); + spin_unlock_irqrestore(&ofdpa->internal_vlan_tbl_lock, flags); + + spin_lock_irqsave(&ofdpa->neigh_tbl_lock, flags); + hash_for_each_safe(ofdpa->neigh_tbl, bkt, tmp, neigh_entry, entry) + hash_del(&neigh_entry->entry); + spin_unlock_irqrestore(&ofdpa->neigh_tbl_lock, flags); +} + +static int ofdpa_port_pre_init(struct rocker_port *rocker_port) +{ + struct ofdpa_port *ofdpa_port = rocker_port->wpriv; + + ofdpa_port->ofdpa = rocker_port->rocker->wpriv; + ofdpa_port->rocker_port = rocker_port; + ofdpa_port->dev = rocker_port->dev; + ofdpa_port->pport = rocker_port->pport; + ofdpa_port->brport_flags = BR_LEARNING | BR_LEARNING_SYNC; + ofdpa_port->ageing_time = BR_DEFAULT_AGEING_TIME; + return 0; +} + +static int ofdpa_port_init(struct rocker_port *rocker_port) +{ + struct ofdpa_port *ofdpa_port = rocker_port->wpriv; + int err; + + switchdev_port_fwd_mark_set(ofdpa_port->dev, NULL, false); + rocker_port_set_learning(rocker_port, + !!(ofdpa_port->brport_flags & BR_LEARNING)); + + err = ofdpa_port_ig_tbl(ofdpa_port, NULL, 0); + if (err) { + netdev_err(ofdpa_port->dev, "install ig port table failed\n"); + return err; + } + + ofdpa_port->internal_vlan_id = + ofdpa_port_internal_vlan_id_get(ofdpa_port, + ofdpa_port->dev->ifindex); + + err = ofdpa_port_vlan_add(ofdpa_port, NULL, OFDPA_UNTAGGED_VID, 0); + if (err) { + netdev_err(ofdpa_port->dev, "install untagged VLAN failed\n"); + goto err_untagged_vlan; + } + return 0; + +err_untagged_vlan: + ofdpa_port_ig_tbl(ofdpa_port, NULL, OFDPA_OP_FLAG_REMOVE); + return err; +} + +static void ofdpa_port_fini(struct rocker_port *rocker_port) +{ + struct ofdpa_port *ofdpa_port = rocker_port->wpriv; + + ofdpa_port_ig_tbl(ofdpa_port, NULL, OFDPA_OP_FLAG_REMOVE); +} + +static int ofdpa_port_open(struct rocker_port *rocker_port) +{ + struct ofdpa_port *ofdpa_port = rocker_port->wpriv; + + return ofdpa_port_fwd_enable(ofdpa_port, 0); +} + +static void ofdpa_port_stop(struct rocker_port *rocker_port) +{ + struct ofdpa_port *ofdpa_port = rocker_port->wpriv; + + ofdpa_port_fwd_disable(ofdpa_port, OFDPA_OP_FLAG_NOWAIT); +} + +static int ofdpa_port_attr_stp_state_set(struct rocker_port *rocker_port, + u8 state, + struct switchdev_trans *trans) +{ + struct ofdpa_port *ofdpa_port = rocker_port->wpriv; + + return ofdpa_port_stp_update(ofdpa_port, trans, 0, state); +} + +static int ofdpa_port_attr_bridge_flags_set(struct rocker_port *rocker_port, + unsigned long brport_flags, + struct switchdev_trans *trans) +{ + struct ofdpa_port *ofdpa_port = rocker_port->wpriv; + unsigned long orig_flags; + int err = 0; + + orig_flags = ofdpa_port->brport_flags; + ofdpa_port->brport_flags = brport_flags; + if ((orig_flags ^ ofdpa_port->brport_flags) & BR_LEARNING && + !switchdev_trans_ph_prepare(trans)) + err = rocker_port_set_learning(ofdpa_port->rocker_port, + !!(ofdpa_port->brport_flags & BR_LEARNING)); + + if (switchdev_trans_ph_prepare(trans)) + ofdpa_port->brport_flags = orig_flags; + + return err; +} + +static int +ofdpa_port_attr_bridge_flags_get(const struct rocker_port *rocker_port, + unsigned long *p_brport_flags) +{ + const struct ofdpa_port *ofdpa_port = rocker_port->wpriv; + + *p_brport_flags = ofdpa_port->brport_flags; + return 0; +} + +static int +ofdpa_port_attr_bridge_ageing_time_set(struct rocker_port *rocker_port, + u32 ageing_time, + struct switchdev_trans *trans) +{ + struct ofdpa_port *ofdpa_port = rocker_port->wpriv; + + if (!switchdev_trans_ph_prepare(trans)) { + ofdpa_port->ageing_time = clock_t_to_jiffies(ageing_time); + mod_timer(&ofdpa_port->ofdpa->fdb_cleanup_timer, jiffies); + } + + return 0; +} + +static int ofdpa_port_obj_vlan_add(struct rocker_port *rocker_port, + const struct switchdev_obj_port_vlan *vlan, + struct switchdev_trans *trans) +{ + struct ofdpa_port *ofdpa_port = rocker_port->wpriv; + u16 vid; + int err; + + for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { + err = ofdpa_port_vlan_add(ofdpa_port, trans, vid, vlan->flags); + if (err) + return err; + } + + return 0; +} + +static int ofdpa_port_obj_vlan_del(struct rocker_port *rocker_port, + const struct switchdev_obj_port_vlan *vlan) +{ + struct ofdpa_port *ofdpa_port = rocker_port->wpriv; + u16 vid; + int err; + + for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { + err = ofdpa_port_vlan_del(ofdpa_port, vid, vlan->flags); + if (err) + return err; + } + + return 0; +} + +static int ofdpa_port_obj_vlan_dump(const struct rocker_port *rocker_port, + struct switchdev_obj_port_vlan *vlan, + switchdev_obj_dump_cb_t *cb) +{ + const struct ofdpa_port *ofdpa_port = rocker_port->wpriv; + u16 vid; + int err = 0; + + for (vid = 1; vid < VLAN_N_VID; vid++) { + if (!test_bit(vid, ofdpa_port->vlan_bitmap)) + continue; + vlan->flags = 0; + if (ofdpa_vlan_id_is_internal(htons(vid))) + vlan->flags |= BRIDGE_VLAN_INFO_PVID; + vlan->vid_begin = vlan->vid_end = vid; + err = cb(&vlan->obj); + if (err) + break; + } + + return err; +} + +static int ofdpa_port_obj_fib4_add(struct rocker_port *rocker_port, + const struct switchdev_obj_ipv4_fib *fib4, + struct switchdev_trans *trans) +{ + struct ofdpa_port *ofdpa_port = rocker_port->wpriv; + + return ofdpa_port_fib_ipv4(ofdpa_port, trans, + htonl(fib4->dst), fib4->dst_len, + &fib4->fi, fib4->tb_id, 0); +} + +static int ofdpa_port_obj_fib4_del(struct rocker_port *rocker_port, + const struct switchdev_obj_ipv4_fib *fib4) +{ + struct ofdpa_port *ofdpa_port = rocker_port->wpriv; + + return ofdpa_port_fib_ipv4(ofdpa_port, NULL, + htonl(fib4->dst), fib4->dst_len, + &fib4->fi, fib4->tb_id, + OFDPA_OP_FLAG_REMOVE); +} + +static int ofdpa_port_obj_fdb_add(struct rocker_port *rocker_port, + const struct switchdev_obj_port_fdb *fdb, + struct switchdev_trans *trans) +{ + struct ofdpa_port *ofdpa_port = rocker_port->wpriv; + __be16 vlan_id = ofdpa_port_vid_to_vlan(ofdpa_port, fdb->vid, NULL); + + if (!ofdpa_port_is_bridged(ofdpa_port)) + return -EINVAL; + + return ofdpa_port_fdb(ofdpa_port, trans, fdb->addr, vlan_id, 0); +} + +static int ofdpa_port_obj_fdb_del(struct rocker_port *rocker_port, + const struct switchdev_obj_port_fdb *fdb) +{ + struct ofdpa_port *ofdpa_port = rocker_port->wpriv; + __be16 vlan_id = ofdpa_port_vid_to_vlan(ofdpa_port, fdb->vid, NULL); + int flags = OFDPA_OP_FLAG_REMOVE; + + if (!ofdpa_port_is_bridged(ofdpa_port)) + return -EINVAL; + + return ofdpa_port_fdb(ofdpa_port, NULL, fdb->addr, vlan_id, flags); +} + +static int ofdpa_port_obj_fdb_dump(const struct rocker_port *rocker_port, + struct switchdev_obj_port_fdb *fdb, + switchdev_obj_dump_cb_t *cb) +{ + const struct ofdpa_port *ofdpa_port = rocker_port->wpriv; + struct ofdpa *ofdpa = ofdpa_port->ofdpa; + struct ofdpa_fdb_tbl_entry *found; + struct hlist_node *tmp; + unsigned long lock_flags; + int bkt; + int err = 0; + + spin_lock_irqsave(&ofdpa->fdb_tbl_lock, lock_flags); + hash_for_each_safe(ofdpa->fdb_tbl, bkt, tmp, found, entry) { + if (found->key.ofdpa_port != ofdpa_port) + continue; + ether_addr_copy(fdb->addr, found->key.addr); + fdb->ndm_state = NUD_REACHABLE; + fdb->vid = ofdpa_port_vlan_to_vid(ofdpa_port, + found->key.vlan_id); + err = cb(&fdb->obj); + if (err) + break; + } + spin_unlock_irqrestore(&ofdpa->fdb_tbl_lock, lock_flags); + + return err; +} + +static int ofdpa_port_bridge_join(struct ofdpa_port *ofdpa_port, + struct net_device *bridge) +{ + int err; + + /* Port is joining bridge, so the internal VLAN for the + * port is going to change to the bridge internal VLAN. + * Let's remove untagged VLAN (vid=0) from port and + * re-add once internal VLAN has changed. + */ + + err = ofdpa_port_vlan_del(ofdpa_port, OFDPA_UNTAGGED_VID, 0); + if (err) + return err; + + ofdpa_port_internal_vlan_id_put(ofdpa_port, + ofdpa_port->dev->ifindex); + ofdpa_port->internal_vlan_id = + ofdpa_port_internal_vlan_id_get(ofdpa_port, bridge->ifindex); + + ofdpa_port->bridge_dev = bridge; + switchdev_port_fwd_mark_set(ofdpa_port->dev, bridge, true); + + return ofdpa_port_vlan_add(ofdpa_port, NULL, OFDPA_UNTAGGED_VID, 0); +} + +static int ofdpa_port_bridge_leave(struct ofdpa_port *ofdpa_port) +{ + int err; + + err = ofdpa_port_vlan_del(ofdpa_port, OFDPA_UNTAGGED_VID, 0); + if (err) + return err; + + ofdpa_port_internal_vlan_id_put(ofdpa_port, + ofdpa_port->bridge_dev->ifindex); + ofdpa_port->internal_vlan_id = + ofdpa_port_internal_vlan_id_get(ofdpa_port, + ofdpa_port->dev->ifindex); + + switchdev_port_fwd_mark_set(ofdpa_port->dev, ofdpa_port->bridge_dev, + false); + ofdpa_port->bridge_dev = NULL; + + err = ofdpa_port_vlan_add(ofdpa_port, NULL, OFDPA_UNTAGGED_VID, 0); + if (err) + return err; + + if (ofdpa_port->dev->flags & IFF_UP) + err = ofdpa_port_fwd_enable(ofdpa_port, 0); + + return err; +} + +static int ofdpa_port_ovs_changed(struct ofdpa_port *ofdpa_port, + struct net_device *master) +{ + int err; + + ofdpa_port->bridge_dev = master; + + err = ofdpa_port_fwd_disable(ofdpa_port, 0); + if (err) + return err; + err = ofdpa_port_fwd_enable(ofdpa_port, 0); + + return err; +} + +static int ofdpa_port_master_linked(struct rocker_port *rocker_port, + struct net_device *master) +{ + struct ofdpa_port *ofdpa_port = rocker_port->wpriv; + int err = 0; + + if (netif_is_bridge_master(master)) + err = ofdpa_port_bridge_join(ofdpa_port, master); + else if (netif_is_ovs_master(master)) + err = ofdpa_port_ovs_changed(ofdpa_port, master); + return err; +} + +static int ofdpa_port_master_unlinked(struct rocker_port *rocker_port, + struct net_device *master) +{ + struct ofdpa_port *ofdpa_port = rocker_port->wpriv; + int err = 0; + + if (ofdpa_port_is_bridged(ofdpa_port)) + err = ofdpa_port_bridge_leave(ofdpa_port); + else if (ofdpa_port_is_ovsed(ofdpa_port)) + err = ofdpa_port_ovs_changed(ofdpa_port, NULL); + return err; +} + +static int ofdpa_port_neigh_update(struct rocker_port *rocker_port, + struct neighbour *n) +{ + struct ofdpa_port *ofdpa_port = rocker_port->wpriv; + int flags = (n->nud_state & NUD_VALID ? 0 : OFDPA_OP_FLAG_REMOVE) | + OFDPA_OP_FLAG_NOWAIT; + __be32 ip_addr = *(__be32 *) n->primary_key; + + return ofdpa_port_ipv4_neigh(ofdpa_port, NULL, flags, ip_addr, n->ha); +} + +static int ofdpa_port_neigh_destroy(struct rocker_port *rocker_port, + struct neighbour *n) +{ + struct ofdpa_port *ofdpa_port = rocker_port->wpriv; + int flags = OFDPA_OP_FLAG_REMOVE | OFDPA_OP_FLAG_NOWAIT; + __be32 ip_addr = *(__be32 *) n->primary_key; + + return ofdpa_port_ipv4_neigh(ofdpa_port, NULL, flags, ip_addr, n->ha); +} + +static int ofdpa_port_ev_mac_vlan_seen(struct rocker_port *rocker_port, + const unsigned char *addr, + __be16 vlan_id) +{ + struct ofdpa_port *ofdpa_port = rocker_port->wpriv; + int flags = OFDPA_OP_FLAG_NOWAIT | OFDPA_OP_FLAG_LEARNED; + + if (ofdpa_port->stp_state != BR_STATE_LEARNING && + ofdpa_port->stp_state != BR_STATE_FORWARDING) + return 0; + + return ofdpa_port_fdb(ofdpa_port, NULL, addr, vlan_id, flags); +} + +struct rocker_world_ops rocker_ofdpa_ops = { + .kind = "ofdpa", + .priv_size = sizeof(struct ofdpa), + .port_priv_size = sizeof(struct ofdpa_port), + .mode = ROCKER_PORT_MODE_OF_DPA, + .init = ofdpa_init, + .fini = ofdpa_fini, + .port_pre_init = ofdpa_port_pre_init, + .port_init = ofdpa_port_init, + .port_fini = ofdpa_port_fini, + .port_open = ofdpa_port_open, + .port_stop = ofdpa_port_stop, + .port_attr_stp_state_set = ofdpa_port_attr_stp_state_set, + .port_attr_bridge_flags_set = ofdpa_port_attr_bridge_flags_set, + .port_attr_bridge_flags_get = ofdpa_port_attr_bridge_flags_get, + .port_attr_bridge_ageing_time_set = ofdpa_port_attr_bridge_ageing_time_set, + .port_obj_vlan_add = ofdpa_port_obj_vlan_add, + .port_obj_vlan_del = ofdpa_port_obj_vlan_del, + .port_obj_vlan_dump = ofdpa_port_obj_vlan_dump, + .port_obj_fib4_add = ofdpa_port_obj_fib4_add, + .port_obj_fib4_del = ofdpa_port_obj_fib4_del, + .port_obj_fdb_add = ofdpa_port_obj_fdb_add, + .port_obj_fdb_del = ofdpa_port_obj_fdb_del, + .port_obj_fdb_dump = ofdpa_port_obj_fdb_dump, + .port_master_linked = ofdpa_port_master_linked, + .port_master_unlinked = ofdpa_port_master_unlinked, + .port_neigh_update = ofdpa_port_neigh_update, + .port_neigh_destroy = ofdpa_port_neigh_destroy, + .port_ev_mac_vlan_seen = ofdpa_port_ev_mac_vlan_seen, +}; diff --git a/drivers/net/ethernet/rocker/rocker_tlv.c b/drivers/net/ethernet/rocker/rocker_tlv.c new file mode 100644 index 000000000000..8185118f3492 --- /dev/null +++ b/drivers/net/ethernet/rocker/rocker_tlv.c @@ -0,0 +1,53 @@ +/* + * drivers/net/ethernet/rocker/rocker_tlv.c - Rocker switch device driver + * Copyright (c) 2014-2016 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/errno.h> + +#include "rocker_hw.h" +#include "rocker_tlv.h" + +void rocker_tlv_parse(const struct rocker_tlv **tb, int maxtype, + const char *buf, int buf_len) +{ + const struct rocker_tlv *tlv; + const struct rocker_tlv *head = (const struct rocker_tlv *) buf; + int rem; + + memset(tb, 0, sizeof(struct rocker_tlv *) * (maxtype + 1)); + + rocker_tlv_for_each(tlv, head, buf_len, rem) { + u32 type = rocker_tlv_type(tlv); + + if (type > 0 && type <= maxtype) + tb[type] = tlv; + } +} + +int rocker_tlv_put(struct rocker_desc_info *desc_info, + int attrtype, int attrlen, const void *data) +{ + int tail_room = desc_info->data_size - desc_info->tlv_size; + int total_size = rocker_tlv_total_size(attrlen); + struct rocker_tlv *tlv; + + if (unlikely(tail_room < total_size)) + return -EMSGSIZE; + + tlv = rocker_tlv_start(desc_info); + desc_info->tlv_size += total_size; + tlv->type = attrtype; + tlv->len = rocker_tlv_attr_size(attrlen); + memcpy(rocker_tlv_data(tlv), data, attrlen); + memset((char *) tlv + tlv->len, 0, rocker_tlv_padlen(attrlen)); + return 0; +} diff --git a/drivers/net/ethernet/rocker/rocker_tlv.h b/drivers/net/ethernet/rocker/rocker_tlv.h new file mode 100644 index 000000000000..a63ef82e7c72 --- /dev/null +++ b/drivers/net/ethernet/rocker/rocker_tlv.h @@ -0,0 +1,201 @@ +/* + * drivers/net/ethernet/rocker/rocker_tlv.h - Rocker switch device driver + * Copyright (c) 2014-2016 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _ROCKER_TLV_H +#define _ROCKER_TLV_H + +#include <linux/types.h> + +#include "rocker_hw.h" +#include "rocker.h" + +#define ROCKER_TLV_ALIGNTO 8U +#define ROCKER_TLV_ALIGN(len) \ + (((len) + ROCKER_TLV_ALIGNTO - 1) & ~(ROCKER_TLV_ALIGNTO - 1)) +#define ROCKER_TLV_HDRLEN ROCKER_TLV_ALIGN(sizeof(struct rocker_tlv)) + +/* <------- ROCKER_TLV_HDRLEN -------> <--- ROCKER_TLV_ALIGN(payload) ---> + * +-----------------------------+- - -+- - - - - - - - - - - - - - -+- - -+ + * | Header | Pad | Payload | Pad | + * | (struct rocker_tlv) | ing | | ing | + * +-----------------------------+- - -+- - - - - - - - - - - - - - -+- - -+ + * <--------------------------- tlv->len --------------------------> + */ + +static inline struct rocker_tlv *rocker_tlv_next(const struct rocker_tlv *tlv, + int *remaining) +{ + int totlen = ROCKER_TLV_ALIGN(tlv->len); + + *remaining -= totlen; + return (struct rocker_tlv *) ((char *) tlv + totlen); +} + +static inline int rocker_tlv_ok(const struct rocker_tlv *tlv, int remaining) +{ + return remaining >= (int) ROCKER_TLV_HDRLEN && + tlv->len >= ROCKER_TLV_HDRLEN && + tlv->len <= remaining; +} + +#define rocker_tlv_for_each(pos, head, len, rem) \ + for (pos = head, rem = len; \ + rocker_tlv_ok(pos, rem); \ + pos = rocker_tlv_next(pos, &(rem))) + +#define rocker_tlv_for_each_nested(pos, tlv, rem) \ + rocker_tlv_for_each(pos, rocker_tlv_data(tlv), \ + rocker_tlv_len(tlv), rem) + +static inline int rocker_tlv_attr_size(int payload) +{ + return ROCKER_TLV_HDRLEN + payload; +} + +static inline int rocker_tlv_total_size(int payload) +{ + return ROCKER_TLV_ALIGN(rocker_tlv_attr_size(payload)); +} + +static inline int rocker_tlv_padlen(int payload) +{ + return rocker_tlv_total_size(payload) - rocker_tlv_attr_size(payload); +} + +static inline int rocker_tlv_type(const struct rocker_tlv *tlv) +{ + return tlv->type; +} + +static inline void *rocker_tlv_data(const struct rocker_tlv *tlv) +{ + return (char *) tlv + ROCKER_TLV_HDRLEN; +} + +static inline int rocker_tlv_len(const struct rocker_tlv *tlv) +{ + return tlv->len - ROCKER_TLV_HDRLEN; +} + +static inline u8 rocker_tlv_get_u8(const struct rocker_tlv *tlv) +{ + return *(u8 *) rocker_tlv_data(tlv); +} + +static inline u16 rocker_tlv_get_u16(const struct rocker_tlv *tlv) +{ + return *(u16 *) rocker_tlv_data(tlv); +} + +static inline __be16 rocker_tlv_get_be16(const struct rocker_tlv *tlv) +{ + return *(__be16 *) rocker_tlv_data(tlv); +} + +static inline u32 rocker_tlv_get_u32(const struct rocker_tlv *tlv) +{ + return *(u32 *) rocker_tlv_data(tlv); +} + +static inline u64 rocker_tlv_get_u64(const struct rocker_tlv *tlv) +{ + return *(u64 *) rocker_tlv_data(tlv); +} + +void rocker_tlv_parse(const struct rocker_tlv **tb, int maxtype, + const char *buf, int buf_len); + +static inline void rocker_tlv_parse_nested(const struct rocker_tlv **tb, + int maxtype, + const struct rocker_tlv *tlv) +{ + rocker_tlv_parse(tb, maxtype, rocker_tlv_data(tlv), + rocker_tlv_len(tlv)); +} + +static inline void +rocker_tlv_parse_desc(const struct rocker_tlv **tb, int maxtype, + const struct rocker_desc_info *desc_info) +{ + rocker_tlv_parse(tb, maxtype, desc_info->data, + desc_info->desc->tlv_size); +} + +static inline struct rocker_tlv * +rocker_tlv_start(struct rocker_desc_info *desc_info) +{ + return (struct rocker_tlv *) ((char *) desc_info->data + + desc_info->tlv_size); +} + +int rocker_tlv_put(struct rocker_desc_info *desc_info, + int attrtype, int attrlen, const void *data); + +static inline int rocker_tlv_put_u8(struct rocker_desc_info *desc_info, + int attrtype, u8 value) +{ + return rocker_tlv_put(desc_info, attrtype, sizeof(u8), &value); +} + +static inline int rocker_tlv_put_u16(struct rocker_desc_info *desc_info, + int attrtype, u16 value) +{ + return rocker_tlv_put(desc_info, attrtype, sizeof(u16), &value); +} + +static inline int rocker_tlv_put_be16(struct rocker_desc_info *desc_info, + int attrtype, __be16 value) +{ + return rocker_tlv_put(desc_info, attrtype, sizeof(__be16), &value); +} + +static inline int rocker_tlv_put_u32(struct rocker_desc_info *desc_info, + int attrtype, u32 value) +{ + return rocker_tlv_put(desc_info, attrtype, sizeof(u32), &value); +} + +static inline int rocker_tlv_put_be32(struct rocker_desc_info *desc_info, + int attrtype, __be32 value) +{ + return rocker_tlv_put(desc_info, attrtype, sizeof(__be32), &value); +} + +static inline int rocker_tlv_put_u64(struct rocker_desc_info *desc_info, + int attrtype, u64 value) +{ + return rocker_tlv_put(desc_info, attrtype, sizeof(u64), &value); +} + +static inline struct rocker_tlv * +rocker_tlv_nest_start(struct rocker_desc_info *desc_info, int attrtype) +{ + struct rocker_tlv *start = rocker_tlv_start(desc_info); + + if (rocker_tlv_put(desc_info, attrtype, 0, NULL) < 0) + return NULL; + + return start; +} + +static inline void rocker_tlv_nest_end(struct rocker_desc_info *desc_info, + struct rocker_tlv *start) +{ + start->len = (char *) rocker_tlv_start(desc_info) - (char *) start; +} + +static inline void rocker_tlv_nest_cancel(struct rocker_desc_info *desc_info, + const struct rocker_tlv *start) +{ + desc_info->tlv_size = (const char *) start - desc_info->data; +} + +#endif diff --git a/drivers/net/ethernet/samsung/sxgbe/Makefile b/drivers/net/ethernet/samsung/sxgbe/Makefile index dcc80b9d4370..31e968561d5c 100644 --- a/drivers/net/ethernet/samsung/sxgbe/Makefile +++ b/drivers/net/ethernet/samsung/sxgbe/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_SXGBE_ETH) += samsung-sxgbe.o samsung-sxgbe-objs:= sxgbe_platform.o sxgbe_main.o sxgbe_desc.o \ sxgbe_dma.o sxgbe_core.o sxgbe_mtl.o sxgbe_mdio.o \ - sxgbe_ethtool.o sxgbe_xpcs.o $(samsung-sxgbe-y) + sxgbe_ethtool.o $(samsung-sxgbe-y) diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_xpcs.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_xpcs.c deleted file mode 100644 index 51c32194ba88..000000000000 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_xpcs.c +++ /dev/null @@ -1,91 +0,0 @@ -/* 10G controller driver for Samsung SoCs - * - * Copyright (C) 2013 Samsung Electronics Co., Ltd. - * http://www.samsung.com - * - * Author: Siva Reddy Kallam <siva.kallam@samsung.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/bitops.h> -#include <linux/kernel.h> -#include <linux/netdevice.h> -#include <linux/phy.h> -#include "sxgbe_common.h" -#include "sxgbe_xpcs.h" - -static int sxgbe_xpcs_read(struct net_device *ndev, unsigned int reg) -{ - u32 value; - struct sxgbe_priv_data *priv = netdev_priv(ndev); - - value = readl(priv->ioaddr + XPCS_OFFSET + reg); - - return value; -} - -static int sxgbe_xpcs_write(struct net_device *ndev, int reg, int data) -{ - struct sxgbe_priv_data *priv = netdev_priv(ndev); - - writel(data, priv->ioaddr + XPCS_OFFSET + reg); - - return 0; -} - -int sxgbe_xpcs_init(struct net_device *ndev) -{ - u32 value; - - value = sxgbe_xpcs_read(ndev, SR_PCS_MMD_CONTROL1); - /* 10G XAUI mode */ - sxgbe_xpcs_write(ndev, SR_PCS_CONTROL2, XPCS_TYPE_SEL_X); - sxgbe_xpcs_write(ndev, VR_PCS_MMD_XAUI_MODE_CONTROL, XPCS_XAUI_MODE); - sxgbe_xpcs_write(ndev, VR_PCS_MMD_XAUI_MODE_CONTROL, value | BIT(13)); - sxgbe_xpcs_write(ndev, SR_PCS_MMD_CONTROL1, value | BIT(11)); - - do { - value = sxgbe_xpcs_read(ndev, VR_PCS_MMD_DIGITAL_STATUS); - } while ((value & XPCS_QSEQ_STATE_MPLLOFF) == XPCS_QSEQ_STATE_STABLE); - - value = sxgbe_xpcs_read(ndev, SR_PCS_MMD_CONTROL1); - sxgbe_xpcs_write(ndev, SR_PCS_MMD_CONTROL1, value & ~BIT(11)); - - do { - value = sxgbe_xpcs_read(ndev, VR_PCS_MMD_DIGITAL_STATUS); - } while ((value & XPCS_QSEQ_STATE_MPLLOFF) != XPCS_QSEQ_STATE_STABLE); - - return 0; -} - -int sxgbe_xpcs_init_1G(struct net_device *ndev) -{ - int value; - - /* 10GBASE-X PCS (1G) mode */ - sxgbe_xpcs_write(ndev, SR_PCS_CONTROL2, XPCS_TYPE_SEL_X); - sxgbe_xpcs_write(ndev, VR_PCS_MMD_XAUI_MODE_CONTROL, XPCS_XAUI_MODE); - value = sxgbe_xpcs_read(ndev, SR_PCS_MMD_CONTROL1); - sxgbe_xpcs_write(ndev, SR_PCS_MMD_CONTROL1, value & ~BIT(13)); - - value = sxgbe_xpcs_read(ndev, SR_MII_MMD_CONTROL); - sxgbe_xpcs_write(ndev, SR_MII_MMD_CONTROL, value | BIT(6)); - sxgbe_xpcs_write(ndev, SR_MII_MMD_CONTROL, value & ~BIT(13)); - value = sxgbe_xpcs_read(ndev, SR_PCS_MMD_CONTROL1); - sxgbe_xpcs_write(ndev, SR_PCS_MMD_CONTROL1, value | BIT(11)); - - do { - value = sxgbe_xpcs_read(ndev, VR_PCS_MMD_DIGITAL_STATUS); - } while ((value & XPCS_QSEQ_STATE_MPLLOFF) != XPCS_QSEQ_STATE_STABLE); - - value = sxgbe_xpcs_read(ndev, SR_PCS_MMD_CONTROL1); - sxgbe_xpcs_write(ndev, SR_PCS_MMD_CONTROL1, value & ~BIT(11)); - - /* Auto Negotiation cluase 37 enable */ - value = sxgbe_xpcs_read(ndev, SR_MII_MMD_CONTROL); - sxgbe_xpcs_write(ndev, SR_MII_MMD_CONTROL, value | BIT(12)); - - return 0; -} diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_xpcs.h b/drivers/net/ethernet/samsung/sxgbe/sxgbe_xpcs.h deleted file mode 100644 index 6b26a50724d3..000000000000 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_xpcs.h +++ /dev/null @@ -1,38 +0,0 @@ -/* 10G controller driver for Samsung SoCs - * - * Copyright (C) 2013 Samsung Electronics Co., Ltd. - * http://www.samsung.com - * - * Author: Byungho An <bh74.an@samsung.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef __SXGBE_XPCS_H__ -#define __SXGBE_XPCS_H__ - -/* XPCS Registers */ -#define XPCS_OFFSET 0x1A060000 -#define SR_PCS_MMD_CONTROL1 0x030000 -#define SR_PCS_CONTROL2 0x030007 -#define VR_PCS_MMD_XAUI_MODE_CONTROL 0x038004 -#define VR_PCS_MMD_DIGITAL_STATUS 0x038010 -#define SR_MII_MMD_CONTROL 0x1F0000 -#define SR_MII_MMD_AN_ADV 0x1F0004 -#define SR_MII_MMD_AN_LINK_PARTNER_BA 0x1F0005 -#define VR_MII_MMD_AN_CONTROL 0x1F8001 -#define VR_MII_MMD_AN_INT_STATUS 0x1F8002 - -#define XPCS_QSEQ_STATE_STABLE 0x10 -#define XPCS_QSEQ_STATE_MPLLOFF 0x1c -#define XPCS_TYPE_SEL_R 0x00 -#define XPCS_TYPE_SEL_X 0x01 -#define XPCS_TYPE_SEL_W 0x02 -#define XPCS_XAUI_MODE 0x00 -#define XPCS_RXAUI_MODE 0x01 - -int sxgbe_xpcs_init(struct net_device *ndev); -int sxgbe_xpcs_init_1G(struct net_device *ndev); - -#endif /* __SXGBE_XPCS_H__ */ diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index 10827476bc0b..5e3f93f04e62 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -32,7 +32,8 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev); netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb); void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index); -int efx_setup_tc(struct net_device *net_dev, u8 num_tc); +int efx_setup_tc(struct net_device *net_dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc); unsigned int efx_tx_max_skb_descs(struct efx_nic *efx); extern unsigned int efx_piobuf_size; extern bool efx_separate_tx_channels; diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 034797661f96..445ccdb6bc67 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -783,14 +783,26 @@ static int efx_ethtool_reset(struct net_device *net_dev, u32 *flags) static const u8 mac_addr_ig_mask[ETH_ALEN] __aligned(2) = {0x01, 0, 0, 0, 0, 0}; #define IP4_ADDR_FULL_MASK ((__force __be32)~0) +#define IP_PROTO_FULL_MASK 0xFF #define PORT_FULL_MASK ((__force __be16)~0) #define ETHER_TYPE_FULL_MASK ((__force __be16)~0) +static inline void ip6_fill_mask(__be32 *mask) +{ + mask[0] = mask[1] = mask[2] = mask[3] = ~(__be32)0; +} + static int efx_ethtool_get_class_rule(struct efx_nic *efx, struct ethtool_rx_flow_spec *rule) { struct ethtool_tcpip4_spec *ip_entry = &rule->h_u.tcp_ip4_spec; struct ethtool_tcpip4_spec *ip_mask = &rule->m_u.tcp_ip4_spec; + struct ethtool_usrip4_spec *uip_entry = &rule->h_u.usr_ip4_spec; + struct ethtool_usrip4_spec *uip_mask = &rule->m_u.usr_ip4_spec; + struct ethtool_tcpip6_spec *ip6_entry = &rule->h_u.tcp_ip6_spec; + struct ethtool_tcpip6_spec *ip6_mask = &rule->m_u.tcp_ip6_spec; + struct ethtool_usrip6_spec *uip6_entry = &rule->h_u.usr_ip6_spec; + struct ethtool_usrip6_spec *uip6_mask = &rule->m_u.usr_ip6_spec; struct ethhdr *mac_entry = &rule->h_u.ether_spec; struct ethhdr *mac_mask = &rule->m_u.ether_spec; struct efx_filter_spec spec; @@ -833,6 +845,35 @@ static int efx_ethtool_get_class_rule(struct efx_nic *efx, ip_entry->psrc = spec.rem_port; ip_mask->psrc = PORT_FULL_MASK; } + } else if ((spec.match_flags & EFX_FILTER_MATCH_ETHER_TYPE) && + spec.ether_type == htons(ETH_P_IPV6) && + (spec.match_flags & EFX_FILTER_MATCH_IP_PROTO) && + (spec.ip_proto == IPPROTO_TCP || spec.ip_proto == IPPROTO_UDP) && + !(spec.match_flags & + ~(EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_OUTER_VID | + EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_REM_HOST | + EFX_FILTER_MATCH_IP_PROTO | + EFX_FILTER_MATCH_LOC_PORT | EFX_FILTER_MATCH_REM_PORT))) { + rule->flow_type = ((spec.ip_proto == IPPROTO_TCP) ? + TCP_V6_FLOW : UDP_V6_FLOW); + if (spec.match_flags & EFX_FILTER_MATCH_LOC_HOST) { + memcpy(ip6_entry->ip6dst, spec.loc_host, + sizeof(ip6_entry->ip6dst)); + ip6_fill_mask(ip6_mask->ip6dst); + } + if (spec.match_flags & EFX_FILTER_MATCH_REM_HOST) { + memcpy(ip6_entry->ip6src, spec.rem_host, + sizeof(ip6_entry->ip6src)); + ip6_fill_mask(ip6_mask->ip6src); + } + if (spec.match_flags & EFX_FILTER_MATCH_LOC_PORT) { + ip6_entry->pdst = spec.loc_port; + ip6_mask->pdst = PORT_FULL_MASK; + } + if (spec.match_flags & EFX_FILTER_MATCH_REM_PORT) { + ip6_entry->psrc = spec.rem_port; + ip6_mask->psrc = PORT_FULL_MASK; + } } else if (!(spec.match_flags & ~(EFX_FILTER_MATCH_LOC_MAC | EFX_FILTER_MATCH_LOC_MAC_IG | EFX_FILTER_MATCH_REM_MAC | EFX_FILTER_MATCH_ETHER_TYPE | @@ -855,6 +896,47 @@ static int efx_ethtool_get_class_rule(struct efx_nic *efx, mac_entry->h_proto = spec.ether_type; mac_mask->h_proto = ETHER_TYPE_FULL_MASK; } + } else if (spec.match_flags & EFX_FILTER_MATCH_ETHER_TYPE && + spec.ether_type == htons(ETH_P_IP) && + !(spec.match_flags & + ~(EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_OUTER_VID | + EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_REM_HOST | + EFX_FILTER_MATCH_IP_PROTO))) { + rule->flow_type = IPV4_USER_FLOW; + uip_entry->ip_ver = ETH_RX_NFC_IP4; + if (spec.match_flags & EFX_FILTER_MATCH_IP_PROTO) { + uip_mask->proto = IP_PROTO_FULL_MASK; + uip_entry->proto = spec.ip_proto; + } + if (spec.match_flags & EFX_FILTER_MATCH_LOC_HOST) { + uip_entry->ip4dst = spec.loc_host[0]; + uip_mask->ip4dst = IP4_ADDR_FULL_MASK; + } + if (spec.match_flags & EFX_FILTER_MATCH_REM_HOST) { + uip_entry->ip4src = spec.rem_host[0]; + uip_mask->ip4src = IP4_ADDR_FULL_MASK; + } + } else if (spec.match_flags & EFX_FILTER_MATCH_ETHER_TYPE && + spec.ether_type == htons(ETH_P_IPV6) && + !(spec.match_flags & + ~(EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_OUTER_VID | + EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_REM_HOST | + EFX_FILTER_MATCH_IP_PROTO))) { + rule->flow_type = IPV6_USER_FLOW; + if (spec.match_flags & EFX_FILTER_MATCH_IP_PROTO) { + uip6_mask->l4_proto = IP_PROTO_FULL_MASK; + uip6_entry->l4_proto = spec.ip_proto; + } + if (spec.match_flags & EFX_FILTER_MATCH_LOC_HOST) { + memcpy(uip6_entry->ip6dst, spec.loc_host, + sizeof(uip6_entry->ip6dst)); + ip6_fill_mask(uip6_mask->ip6dst); + } + if (spec.match_flags & EFX_FILTER_MATCH_REM_HOST) { + memcpy(uip6_entry->ip6src, spec.rem_host, + sizeof(uip6_entry->ip6src)); + ip6_fill_mask(uip6_mask->ip6src); + } } else { /* The above should handle all filters that we insert */ WARN_ON(1); @@ -946,11 +1028,27 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev, } } +static inline bool ip6_mask_is_full(__be32 mask[4]) +{ + return !~(mask[0] & mask[1] & mask[2] & mask[3]); +} + +static inline bool ip6_mask_is_empty(__be32 mask[4]) +{ + return !(mask[0] | mask[1] | mask[2] | mask[3]); +} + static int efx_ethtool_set_class_rule(struct efx_nic *efx, struct ethtool_rx_flow_spec *rule) { struct ethtool_tcpip4_spec *ip_entry = &rule->h_u.tcp_ip4_spec; struct ethtool_tcpip4_spec *ip_mask = &rule->m_u.tcp_ip4_spec; + struct ethtool_usrip4_spec *uip_entry = &rule->h_u.usr_ip4_spec; + struct ethtool_usrip4_spec *uip_mask = &rule->m_u.usr_ip4_spec; + struct ethtool_tcpip6_spec *ip6_entry = &rule->h_u.tcp_ip6_spec; + struct ethtool_tcpip6_spec *ip6_mask = &rule->m_u.tcp_ip6_spec; + struct ethtool_usrip6_spec *uip6_entry = &rule->h_u.usr_ip6_spec; + struct ethtool_usrip6_spec *uip6_mask = &rule->m_u.usr_ip6_spec; struct ethhdr *mac_entry = &rule->h_u.ether_spec; struct ethhdr *mac_mask = &rule->m_u.ether_spec; struct efx_filter_spec spec; @@ -1012,6 +1110,92 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx, return -EINVAL; break; + case TCP_V6_FLOW: + case UDP_V6_FLOW: + spec.match_flags = (EFX_FILTER_MATCH_ETHER_TYPE | + EFX_FILTER_MATCH_IP_PROTO); + spec.ether_type = htons(ETH_P_IPV6); + spec.ip_proto = ((rule->flow_type & ~FLOW_EXT) == TCP_V6_FLOW ? + IPPROTO_TCP : IPPROTO_UDP); + if (!ip6_mask_is_empty(ip6_mask->ip6dst)) { + if (!ip6_mask_is_full(ip6_mask->ip6dst)) + return -EINVAL; + spec.match_flags |= EFX_FILTER_MATCH_LOC_HOST; + memcpy(spec.loc_host, ip6_entry->ip6dst, sizeof(spec.loc_host)); + } + if (!ip6_mask_is_empty(ip6_mask->ip6src)) { + if (!ip6_mask_is_full(ip6_mask->ip6src)) + return -EINVAL; + spec.match_flags |= EFX_FILTER_MATCH_REM_HOST; + memcpy(spec.rem_host, ip6_entry->ip6src, sizeof(spec.rem_host)); + } + if (ip6_mask->pdst) { + if (ip6_mask->pdst != PORT_FULL_MASK) + return -EINVAL; + spec.match_flags |= EFX_FILTER_MATCH_LOC_PORT; + spec.loc_port = ip6_entry->pdst; + } + if (ip6_mask->psrc) { + if (ip6_mask->psrc != PORT_FULL_MASK) + return -EINVAL; + spec.match_flags |= EFX_FILTER_MATCH_REM_PORT; + spec.rem_port = ip6_entry->psrc; + } + if (ip6_mask->tclass) + return -EINVAL; + break; + + case IPV4_USER_FLOW: + if (uip_mask->l4_4_bytes || uip_mask->tos || uip_mask->ip_ver || + uip_entry->ip_ver != ETH_RX_NFC_IP4) + return -EINVAL; + spec.match_flags = EFX_FILTER_MATCH_ETHER_TYPE; + spec.ether_type = htons(ETH_P_IP); + if (uip_mask->ip4dst) { + if (uip_mask->ip4dst != IP4_ADDR_FULL_MASK) + return -EINVAL; + spec.match_flags |= EFX_FILTER_MATCH_LOC_HOST; + spec.loc_host[0] = uip_entry->ip4dst; + } + if (uip_mask->ip4src) { + if (uip_mask->ip4src != IP4_ADDR_FULL_MASK) + return -EINVAL; + spec.match_flags |= EFX_FILTER_MATCH_REM_HOST; + spec.rem_host[0] = uip_entry->ip4src; + } + if (uip_mask->proto) { + if (uip_mask->proto != IP_PROTO_FULL_MASK) + return -EINVAL; + spec.match_flags |= EFX_FILTER_MATCH_IP_PROTO; + spec.ip_proto = uip_entry->proto; + } + break; + + case IPV6_USER_FLOW: + if (uip6_mask->l4_4_bytes || uip6_mask->tclass) + return -EINVAL; + spec.match_flags = EFX_FILTER_MATCH_ETHER_TYPE; + spec.ether_type = htons(ETH_P_IPV6); + if (!ip6_mask_is_empty(uip6_mask->ip6dst)) { + if (!ip6_mask_is_full(uip6_mask->ip6dst)) + return -EINVAL; + spec.match_flags |= EFX_FILTER_MATCH_LOC_HOST; + memcpy(spec.loc_host, uip6_entry->ip6dst, sizeof(spec.loc_host)); + } + if (!ip6_mask_is_empty(uip6_mask->ip6src)) { + if (!ip6_mask_is_full(uip6_mask->ip6src)) + return -EINVAL; + spec.match_flags |= EFX_FILTER_MATCH_REM_HOST; + memcpy(spec.rem_host, uip6_entry->ip6src, sizeof(spec.rem_host)); + } + if (uip6_mask->l4_proto) { + if (uip6_mask->l4_proto != IP_PROTO_FULL_MASK) + return -EINVAL; + spec.match_flags |= EFX_FILTER_MATCH_IP_PROTO; + spec.ip_proto = uip6_entry->l4_proto; + } + break; + case ETHER_FLOW: if (!is_zero_ether_addr(mac_mask->h_dest)) { if (ether_addr_equal(mac_mask->h_dest, diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index f7a0ec1bca97..2cdb5718ed66 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -562,14 +562,20 @@ void efx_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue) efx->n_tx_channels : 0)); } -int efx_setup_tc(struct net_device *net_dev, u8 num_tc) +int efx_setup_tc(struct net_device *net_dev, u32 handle, __be16 proto, + struct tc_to_netdev *ntc) { struct efx_nic *efx = netdev_priv(net_dev); struct efx_channel *channel; struct efx_tx_queue *tx_queue; - unsigned tc; + unsigned tc, num_tc; int rc; + if (handle != TC_H_ROOT || ntc->type != TC_SETUP_MQPRIO) + return -EINVAL; + + num_tc = ntc->tc; + if (efx_nic_rev(efx) < EFX_REV_FALCON_B0 || num_tc > EFX_MAX_TX_TC) return -EINVAL; diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index 23fa29877f5b..942a95db2061 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -17,6 +17,8 @@ #include <linux/mutex.h> #include <linux/highmem.h> #include <linux/if_vlan.h> +#define CREATE_TRACE_POINTS +#include <trace/events/sunvnet.h> #if IS_ENABLED(CONFIG_IPV6) #include <linux/icmpv6.h> @@ -540,6 +542,8 @@ static int vnet_walk_rx_one(struct vnet_port *port, err = vnet_rx_one(port, desc); if (err == -ECONNRESET) return err; + trace_vnet_rx_one(port->vio._local_sid, port->vio._peer_sid, + index, desc->hdr.ack); desc->hdr.state = VIO_DESC_DONE; err = put_rx_desc(port, dr, desc, index); if (err < 0) @@ -587,9 +591,15 @@ static int vnet_walk_rx(struct vnet_port *port, struct vio_dring_state *dr, ack_start = ack_end = vio_dring_prev(dr, start); if (send_ack) { port->napi_resume = false; + trace_vnet_tx_send_stopped_ack(port->vio._local_sid, + port->vio._peer_sid, + ack_end, *npkts); return vnet_send_ack(port, dr, ack_start, ack_end, VIO_DRING_STOPPED); } else { + trace_vnet_tx_defer_stopped_ack(port->vio._local_sid, + port->vio._peer_sid, + ack_end, *npkts); port->napi_resume = true; port->napi_stop_idx = ack_end; return 1; @@ -663,6 +673,8 @@ static int vnet_ack(struct vnet_port *port, void *msgbuf) /* sync for race conditions with vnet_start_xmit() and tell xmit it * is time to send a trigger. */ + trace_vnet_rx_stopped_ack(port->vio._local_sid, + port->vio._peer_sid, end); dr->cons = vio_dring_next(dr, end); desc = vio_dring_entry(dr, dr->cons); if (desc->hdr.state == VIO_DESC_READY && !port->start_cons) { @@ -886,6 +898,9 @@ static int __vnet_tx_trigger(struct vnet_port *port, u32 start) int retries = 0; if (port->stop_rx) { + trace_vnet_tx_pending_stopped_ack(port->vio._local_sid, + port->vio._peer_sid, + port->stop_rx_idx, -1); err = vnet_send_ack(port, &port->vio.drings[VIO_DRIVER_RX_RING], port->stop_rx_idx, -1, @@ -908,6 +923,8 @@ static int __vnet_tx_trigger(struct vnet_port *port, u32 start) if (retries++ > VNET_MAX_RETRIES) break; } while (err == -EAGAIN); + trace_vnet_tx_trigger(port->vio._local_sid, + port->vio._peer_sid, start, err); return err; } @@ -1414,8 +1431,11 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev) * producer to consumer announcement that work is available to the * consumer */ - if (!port->start_cons) - goto ldc_start_done; /* previous trigger suffices */ + if (!port->start_cons) { /* previous trigger suffices */ + trace_vnet_skip_tx_trigger(port->vio._local_sid, + port->vio._peer_sid, dr->cons); + goto ldc_start_done; + } err = __vnet_tx_trigger(port, dr->cons); if (unlikely(err < 0)) { diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 029841f98c32..ed0c30f590d4 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1852,22 +1852,26 @@ static u16 netcp_select_queue(struct net_device *dev, struct sk_buff *skb, return 0; } -static int netcp_setup_tc(struct net_device *dev, u8 num_tc) +static int netcp_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) { int i; /* setup tc must be called under rtnl lock */ ASSERT_RTNL(); + if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO) + return -EINVAL; + /* Sanity-check the number of traffic classes requested */ if ((dev->real_num_tx_queues <= 1) || - (dev->real_num_tx_queues < num_tc)) + (dev->real_num_tx_queues < tc->tc)) return -EINVAL; /* Configure traffic class to queue mappings */ - if (num_tc) { - netdev_set_num_tc(dev, num_tc); - for (i = 0; i < num_tc; i++) + if (tc->tc) { + netdev_set_num_tc(dev, tc->tc); + for (i = 0; i < tc->tc; i++) netdev_set_tc_queue(dev, i, 1, i); } else { netdev_reset_tc(dev); diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 0bf7edd99573..bc5da357e16d 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -72,10 +72,11 @@ struct geneve_dev { bool collect_md; struct gro_cells gro_cells; u32 flags; + struct dst_cache dst_cache; }; /* Geneve device flags */ -#define GENEVE_F_UDP_CSUM BIT(0) +#define GENEVE_F_UDP_ZERO_CSUM_TX BIT(0) #define GENEVE_F_UDP_ZERO_CSUM6_TX BIT(1) #define GENEVE_F_UDP_ZERO_CSUM6_RX BIT(2) @@ -109,6 +110,11 @@ static __be64 vni_to_tunnel_id(const __u8 *vni) #endif } +static sa_family_t geneve_get_sk_family(struct geneve_sock *gs) +{ + return gs->sock->sk->sk_family; +} + static struct geneve_dev *geneve_lookup(struct geneve_sock *gs, __be32 addr, u8 vni[]) { @@ -152,58 +158,60 @@ static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) return (struct genevehdr *)(udp_hdr(skb) + 1); } -/* geneve receive/decap routine */ -static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) +static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs, + struct sk_buff *skb) { - struct genevehdr *gnvh = geneve_hdr(skb); - struct metadata_dst *tun_dst = NULL; - struct geneve_dev *geneve = NULL; - struct pcpu_sw_netstats *stats; - struct iphdr *iph = NULL; + u8 *vni; __be32 addr; static u8 zero_vni[3]; - u8 *vni; - int err = 0; - sa_family_t sa_family; #if IS_ENABLED(CONFIG_IPV6) - struct ipv6hdr *ip6h = NULL; - struct in6_addr addr6; static struct in6_addr zero_addr6; #endif - sa_family = gs->sock->sk->sk_family; + if (geneve_get_sk_family(gs) == AF_INET) { + struct iphdr *iph; - if (sa_family == AF_INET) { iph = ip_hdr(skb); /* outer IP header... */ if (gs->collect_md) { vni = zero_vni; addr = 0; } else { - vni = gnvh->vni; - + vni = geneve_hdr(skb)->vni; addr = iph->saddr; } - geneve = geneve_lookup(gs, addr, vni); + return geneve_lookup(gs, addr, vni); #if IS_ENABLED(CONFIG_IPV6) - } else if (sa_family == AF_INET6) { + } else if (geneve_get_sk_family(gs) == AF_INET6) { + struct ipv6hdr *ip6h; + struct in6_addr addr6; + ip6h = ipv6_hdr(skb); /* outer IPv6 header... */ if (gs->collect_md) { vni = zero_vni; addr6 = zero_addr6; } else { - vni = gnvh->vni; - + vni = geneve_hdr(skb)->vni; addr6 = ip6h->saddr; } - geneve = geneve6_lookup(gs, addr6, vni); + return geneve6_lookup(gs, addr6, vni); #endif } - if (!geneve) - goto drop; + return NULL; +} + +/* geneve receive/decap routine */ +static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, + struct sk_buff *skb) +{ + struct genevehdr *gnvh = geneve_hdr(skb); + struct metadata_dst *tun_dst = NULL; + struct pcpu_sw_netstats *stats; + int err = 0; + void *oiph; if (ip_tunnel_collect_metadata() || gs->collect_md) { __be16 flags; @@ -212,7 +220,7 @@ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) (gnvh->oam ? TUNNEL_OAM : 0) | (gnvh->critical ? TUNNEL_CRIT_OPT : 0); - tun_dst = udp_tun_rx_dst(skb, sa_family, flags, + tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags, vni_to_tunnel_id(gnvh->vni), gnvh->opt_len * 4); if (!tun_dst) @@ -229,7 +237,6 @@ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) } skb_reset_mac_header(skb); - skb_scrub_packet(skb, !net_eq(geneve->net, dev_net(geneve->dev))); skb->protocol = eth_type_trans(skb, geneve->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); @@ -240,25 +247,27 @@ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) goto drop; + oiph = skb_network_header(skb); skb_reset_network_header(skb); - if (iph) - err = IP_ECN_decapsulate(iph, skb); + if (geneve_get_sk_family(gs) == AF_INET) + err = IP_ECN_decapsulate(oiph, skb); #if IS_ENABLED(CONFIG_IPV6) - if (ip6h) - err = IP6_ECN_decapsulate(ip6h, skb); + else + err = IP6_ECN_decapsulate(oiph, skb); #endif if (unlikely(err)) { if (log_ecn_error) { - if (iph) + if (geneve_get_sk_family(gs) == AF_INET) net_info_ratelimited("non-ECT from %pI4 " "with TOS=%#x\n", - &iph->saddr, iph->tos); + &((struct iphdr *)oiph)->saddr, + ((struct iphdr *)oiph)->tos); #if IS_ENABLED(CONFIG_IPV6) - if (ip6h) + else net_info_ratelimited("non-ECT from %pI6\n", - &ip6h->saddr); + &((struct ipv6hdr *)oiph)->saddr); #endif } if (err > 1) { @@ -297,6 +306,13 @@ static int geneve_init(struct net_device *dev) return err; } + err = dst_cache_init(&geneve->dst_cache, GFP_KERNEL); + if (err) { + free_percpu(dev->tstats); + gro_cells_destroy(&geneve->gro_cells); + return err; + } + return 0; } @@ -304,6 +320,7 @@ static void geneve_uninit(struct net_device *dev) { struct geneve_dev *geneve = netdev_priv(dev); + dst_cache_destroy(&geneve->dst_cache); gro_cells_destroy(&geneve->gro_cells); free_percpu(dev->tstats); } @@ -312,6 +329,7 @@ static void geneve_uninit(struct net_device *dev) static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct genevehdr *geneveh; + struct geneve_dev *geneve; struct geneve_sock *gs; int opts_len; @@ -327,16 +345,21 @@ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) if (unlikely(geneveh->proto_type != htons(ETH_P_TEB))) goto error; - opts_len = geneveh->opt_len * 4; - if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, - htons(ETH_P_TEB))) - goto drop; - gs = rcu_dereference_sk_user_data(sk); if (!gs) goto drop; - geneve_rx(gs, skb); + geneve = geneve_lookup_skb(gs, skb); + if (!geneve) + goto drop; + + opts_len = geneveh->opt_len * 4; + if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, + htons(ETH_P_TEB), + !net_eq(geneve->net, dev_net(geneve->dev)))) + goto drop; + + geneve_rx(geneve, gs, skb); return 0; drop: @@ -383,7 +406,7 @@ static void geneve_notify_add_rx_port(struct geneve_sock *gs) struct net_device *dev; struct sock *sk = gs->sock->sk; struct net *net = sock_net(sk); - sa_family_t sa_family = sk->sk_family; + sa_family_t sa_family = geneve_get_sk_family(gs); __be16 port = inet_sk(sk)->inet_sport; int err; @@ -544,7 +567,7 @@ static void geneve_notify_del_rx_port(struct geneve_sock *gs) struct net_device *dev; struct sock *sk = gs->sock->sk; struct net *net = sock_net(sk); - sa_family_t sa_family = sk->sk_family; + sa_family_t sa_family = geneve_get_sk_family(gs); __be16 port = inet_sk(sk)->inet_sport; rcu_read_lock(); @@ -587,7 +610,7 @@ static struct geneve_sock *geneve_find_sock(struct geneve_net *gn, list_for_each_entry(gs, &gn->sock_list, list) { if (inet_sk(gs->sock->sk)->inet_sport == dst_port && - inet_sk(gs->sock->sk)->sk.sk_family == family) { + geneve_get_sk_family(gs) == family) { return gs; } } @@ -680,7 +703,7 @@ static int geneve_build_skb(struct rtable *rt, struct sk_buff *skb, struct genevehdr *gnvh; int min_headroom; int err; - bool udp_sum = !!(flags & GENEVE_F_UDP_CSUM); + bool udp_sum = !(flags & GENEVE_F_UDP_ZERO_CSUM_TX); skb_scrub_packet(skb, xnet); @@ -753,7 +776,9 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, struct ip_tunnel_info *info) { struct geneve_dev *geneve = netdev_priv(dev); + struct dst_cache *dst_cache; struct rtable *rt = NULL; + bool use_cache = true; __u8 tos; memset(fl4, 0, sizeof(*fl4)); @@ -764,16 +789,26 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, fl4->daddr = info->key.u.ipv4.dst; fl4->saddr = info->key.u.ipv4.src; fl4->flowi4_tos = RT_TOS(info->key.tos); + dst_cache = &info->dst_cache; } else { tos = geneve->tos; if (tos == 1) { const struct iphdr *iip = ip_hdr(skb); tos = ip_tunnel_get_dsfield(iip, skb); + use_cache = false; } fl4->flowi4_tos = RT_TOS(tos); fl4->daddr = geneve->remote.sin.sin_addr.s_addr; + dst_cache = &geneve->dst_cache; + } + + use_cache = use_cache && !skb->mark; + if (use_cache) { + rt = dst_cache_get_ip4(dst_cache, &fl4->saddr); + if (rt) + return rt; } rt = ip_route_output_key(geneve->net, fl4); @@ -786,6 +821,8 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, ip_rt_put(rt); return ERR_PTR(-ELOOP); } + if (use_cache) + dst_cache_set_ip4(dst_cache, &rt->dst, fl4->saddr); return rt; } @@ -798,6 +835,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, struct geneve_dev *geneve = netdev_priv(dev); struct geneve_sock *gs6 = geneve->sock6; struct dst_entry *dst = NULL; + struct dst_cache *dst_cache; + bool use_cache = true; __u8 prio; memset(fl6, 0, sizeof(*fl6)); @@ -808,16 +847,26 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, fl6->daddr = info->key.u.ipv6.dst; fl6->saddr = info->key.u.ipv6.src; fl6->flowi6_tos = RT_TOS(info->key.tos); + dst_cache = &info->dst_cache; } else { prio = geneve->tos; if (prio == 1) { const struct iphdr *iip = ip_hdr(skb); prio = ip_tunnel_get_dsfield(iip, skb); + use_cache = false; } fl6->flowi6_tos = RT_TOS(prio); fl6->daddr = geneve->remote.sin6.sin6_addr; + dst_cache = &geneve->dst_cache; + } + + use_cache = use_cache && !skb->mark; + if (use_cache) { + dst = dst_cache_get_ip6(dst_cache, &fl6->saddr); + if (dst) + return dst; } if (ipv6_stub->ipv6_dst_lookup(geneve->net, gs6->sock->sk, &dst, fl6)) { @@ -830,6 +879,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, return ERR_PTR(-ELOOP); } + if (use_cache) + dst_cache_set_ip6(dst_cache, dst, &fl6->saddr); return dst; } #endif @@ -893,9 +944,9 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, opts = ip_tunnel_info_opts(info); if (key->tun_flags & TUNNEL_CSUM) - flags |= GENEVE_F_UDP_CSUM; + flags &= ~GENEVE_F_UDP_ZERO_CSUM_TX; else - flags &= ~GENEVE_F_UDP_CSUM; + flags |= GENEVE_F_UDP_ZERO_CSUM_TX; err = geneve_build_skb(rt, skb, key->tun_flags, vni, info->options_len, opts, flags, xnet); @@ -921,7 +972,7 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr, tos, ttl, df, sport, geneve->dst_port, !net_eq(geneve->net, dev_net(geneve->dev)), - !(flags & GENEVE_F_UDP_CSUM)); + !!(flags & GENEVE_F_UDP_ZERO_CSUM_TX)); return NETDEV_TX_OK; @@ -1301,6 +1352,8 @@ static int geneve_configure(struct net *net, struct net_device *dev, return -EPERM; } + dst_cache_reset(&geneve->dst_cache); + err = register_netdevice(dev); if (err) return err; @@ -1359,8 +1412,8 @@ static int geneve_newlink(struct net *net, struct net_device *dev, metadata = true; if (data[IFLA_GENEVE_UDP_CSUM] && - nla_get_u8(data[IFLA_GENEVE_UDP_CSUM])) - flags |= GENEVE_F_UDP_CSUM; + !nla_get_u8(data[IFLA_GENEVE_UDP_CSUM])) + flags |= GENEVE_F_UDP_ZERO_CSUM_TX; if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX] && nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX])) @@ -1430,7 +1483,7 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) } if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM, - !!(geneve->flags & GENEVE_F_UDP_CSUM)) || + !(geneve->flags & GENEVE_F_UDP_ZERO_CSUM_TX)) || nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX, !!(geneve->flags & GENEVE_F_UDP_ZERO_CSUM6_TX)) || nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, diff --git a/drivers/net/hamradio/baycom_ser_fdx.c b/drivers/net/hamradio/baycom_ser_fdx.c index 636b65c66d49..7b916d5b14b9 100644 --- a/drivers/net/hamradio/baycom_ser_fdx.c +++ b/drivers/net/hamradio/baycom_ser_fdx.c @@ -80,6 +80,7 @@ #include <linux/hdlcdrv.h> #include <linux/baycom.h> #include <linux/jiffies.h> +#include <linux/time64.h> #include <asm/uaccess.h> #include <asm/io.h> @@ -228,14 +229,15 @@ static inline unsigned int hweight8(unsigned int w) /* --------------------------------------------------------------------- */ -static __inline__ void ser12_rx(struct net_device *dev, struct baycom_state *bc, struct timeval *tv, unsigned char curs) +static __inline__ void ser12_rx(struct net_device *dev, struct baycom_state *bc, struct timespec64 *ts, unsigned char curs) { int timediff; int bdus8 = bc->baud_us >> 3; int bdus4 = bc->baud_us >> 2; int bdus2 = bc->baud_us >> 1; - timediff = 1000000 + tv->tv_usec - bc->modem.ser12.pll_time; + timediff = 1000000 + ts->tv_nsec / NSEC_PER_USEC - + bc->modem.ser12.pll_time; while (timediff >= 500000) timediff -= 1000000; while (timediff >= bdus2) { @@ -287,7 +289,7 @@ static irqreturn_t ser12_interrupt(int irq, void *dev_id) { struct net_device *dev = (struct net_device *)dev_id; struct baycom_state *bc = netdev_priv(dev); - struct timeval tv; + struct timespec64 ts; unsigned char iir, msr; unsigned int txcount = 0; @@ -297,7 +299,7 @@ static irqreturn_t ser12_interrupt(int irq, void *dev_id) if ((iir = inb(IIR(dev->base_addr))) & 1) return IRQ_NONE; /* get current time */ - do_gettimeofday(&tv); + ktime_get_ts64(&ts); msr = inb(MSR(dev->base_addr)); /* delta DCD */ if ((msr & 8) && bc->opt_dcd) @@ -340,7 +342,7 @@ static irqreturn_t ser12_interrupt(int irq, void *dev_id) } iir = inb(IIR(dev->base_addr)); } while (!(iir & 1)); - ser12_rx(dev, bc, &tv, msr & 0x10); /* CTS */ + ser12_rx(dev, bc, &ts, msr & 0x10); /* CTS */ if (bc->modem.ptt && txcount) { if (bc->modem.ser12.txshreg <= 1) { bc->modem.ser12.txshreg = 0x10000 | hdlcdrv_getbits(&bc->hdrv); diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c index c3d377770616..e4137c1b3df9 100644 --- a/drivers/net/hamradio/dmascc.c +++ b/drivers/net/hamradio/dmascc.c @@ -451,7 +451,7 @@ static const struct net_device_ops scc_netdev_ops = { static int __init setup_adapter(int card_base, int type, int n) { - int i, irq, chip; + int i, irq, chip, err; struct scc_info *info; struct net_device *dev; struct scc_priv *priv; @@ -463,14 +463,17 @@ static int __init setup_adapter(int card_base, int type, int n) /* Initialize what is necessary for write_scc and write_scc_data */ info = kzalloc(sizeof(struct scc_info), GFP_KERNEL | GFP_DMA); - if (!info) + if (!info) { + err = -ENOMEM; goto out; + } info->dev[0] = alloc_netdev(0, "", NET_NAME_UNKNOWN, dev_setup); if (!info->dev[0]) { printk(KERN_ERR "dmascc: " "could not allocate memory for %s at %#3x\n", hw[type].name, card_base); + err = -ENOMEM; goto out1; } @@ -479,6 +482,7 @@ static int __init setup_adapter(int card_base, int type, int n) printk(KERN_ERR "dmascc: " "could not allocate memory for %s at %#3x\n", hw[type].name, card_base); + err = -ENOMEM; goto out2; } spin_lock_init(&info->register_lock); @@ -549,6 +553,7 @@ static int __init setup_adapter(int card_base, int type, int n) printk(KERN_ERR "dmascc: could not find irq of %s at %#3x (irq=%d)\n", hw[type].name, card_base, irq); + err = -ENODEV; goto out3; } @@ -585,11 +590,13 @@ static int __init setup_adapter(int card_base, int type, int n) if (register_netdev(info->dev[0])) { printk(KERN_ERR "dmascc: could not register %s\n", info->dev[0]->name); + err = -ENODEV; goto out3; } if (register_netdev(info->dev[1])) { printk(KERN_ERR "dmascc: could not register %s\n", info->dev[1]->name); + err = -ENODEV; goto out4; } @@ -612,7 +619,7 @@ static int __init setup_adapter(int card_base, int type, int n) out1: kfree(info); out: - return -1; + return err; } diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 98e34fee45c7..2b6595e24f43 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -43,6 +43,11 @@ #define RING_SIZE_MIN 64 #define LINKCHANGE_INT (2 * HZ) +#define NETVSC_HW_FEATURES (NETIF_F_RXCSUM | \ + NETIF_F_SG | \ + NETIF_F_TSO | \ + NETIF_F_TSO6 | \ + NETIF_F_HW_CSUM) static int ring_size = 128; module_param(ring_size, int, S_IRUGO); MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); @@ -545,6 +550,8 @@ do_send: packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size, skb, packet, &pb); + /* timestamp packet in software */ + skb_tx_timestamp(skb); ret = netvsc_send(net_device_ctx->device_ctx, packet, rndis_msg, &pb, skb); @@ -915,6 +922,7 @@ static const struct ethtool_ops ethtool_ops = { .get_link = ethtool_op_get_link, .get_channels = netvsc_get_channels, .set_channels = netvsc_set_channels, + .get_ts_info = ethtool_op_get_ts_info, }; static const struct net_device_ops device_ops = { @@ -1081,10 +1089,8 @@ static int netvsc_probe(struct hv_device *dev, net->netdev_ops = &device_ops; - net->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM | - NETIF_F_TSO; - net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM | - NETIF_F_IP_CSUM | NETIF_F_TSO; + net->hw_features = NETVSC_HW_FEATURES; + net->features = NETVSC_HW_FEATURES | NETIF_F_HW_VLAN_CTAG_TX; net->ethtool_ops = ðtool_ops; SET_NETDEV_DEV(net, &dev->device); diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index 9542b7bac61a..695a5dc9ace3 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -84,19 +84,19 @@ struct ipvl_addr { #define ip4addr ipu.ip4 struct hlist_node hlnode; /* Hash-table linkage */ struct list_head anode; /* logical-interface linkage */ - struct rcu_head rcu; ipvl_hdr_type atype; + struct rcu_head rcu; }; struct ipvl_port { struct net_device *dev; struct hlist_head hlhead[IPVLAN_HASH_SIZE]; struct list_head ipvlans; - struct rcu_head rcu; + u16 mode; struct work_struct wq; struct sk_buff_head backlog; int count; - u16 mode; + struct rcu_head rcu; }; static inline struct ipvl_port *ipvlan_port_get_rcu(const struct net_device *d) @@ -114,8 +114,6 @@ static inline struct ipvl_port *ipvlan_port_get_rtnl(const struct net_device *d) return rtnl_dereference(d->rx_handler_data); } -void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev); -void ipvlan_set_port_mode(struct ipvl_port *port, u32 nval); void ipvlan_init_secret(void); unsigned int ipvlan_mac_hash(const unsigned char *addr); rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb); @@ -125,7 +123,5 @@ void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr); struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, const void *iaddr, bool is_v6); bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6); -struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port, - const void *iaddr, bool is_v6); void ipvlan_ht_addr_del(struct ipvl_addr *addr); #endif /* __IPVLAN_H */ diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 8c48bb2a94ea..d6d0524ee5fd 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -53,8 +53,8 @@ static u8 ipvlan_get_v4_hash(const void *iaddr) IPVLAN_HASH_MASK; } -struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port, - const void *iaddr, bool is_v6) +static struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port, + const void *iaddr, bool is_v6) { struct ipvl_addr *addr; u8 hash; @@ -265,20 +265,25 @@ static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb, struct sk_buff *skb = *pskb; len = skb->len + ETH_HLEN; - if (unlikely(!(dev->flags & IFF_UP))) { - kfree_skb(skb); - goto out; - } + /* Only packets exchanged between two local slaves need to have + * device-up check as well as skb-share check. + */ + if (local) { + if (unlikely(!(dev->flags & IFF_UP))) { + kfree_skb(skb); + goto out; + } - skb = skb_share_check(skb, GFP_ATOMIC); - if (!skb) - goto out; + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + goto out; - *pskb = skb; + *pskb = skb; + } skb->dev = dev; - skb->pkt_type = PACKET_HOST; if (local) { + skb->pkt_type = PACKET_HOST; if (dev_forward_skb(ipvlan->dev, skb) == NET_RX_SUCCESS) success = true; } else { @@ -342,7 +347,7 @@ static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, return addr; } -static int ipvlan_process_v4_outbound(struct sk_buff *skb) +static int ipvlan_process_v4_outbound(struct sk_buff *skb, bool xnet) { const struct iphdr *ip4h = ip_hdr(skb); struct net_device *dev = skb->dev; @@ -365,7 +370,7 @@ static int ipvlan_process_v4_outbound(struct sk_buff *skb) ip_rt_put(rt); goto err; } - skb_dst_drop(skb); + skb_scrub_packet(skb, xnet); skb_dst_set(skb, &rt->dst); err = ip_local_out(net, skb->sk, skb); if (unlikely(net_xmit_eval(err))) @@ -380,7 +385,7 @@ out: return ret; } -static int ipvlan_process_v6_outbound(struct sk_buff *skb) +static int ipvlan_process_v6_outbound(struct sk_buff *skb, bool xnet) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); struct net_device *dev = skb->dev; @@ -403,7 +408,7 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb) dst_release(dst); goto err; } - skb_dst_drop(skb); + skb_scrub_packet(skb, xnet); skb_dst_set(skb, dst); err = ip6_local_out(net, skb->sk, skb); if (unlikely(net_xmit_eval(err))) @@ -418,8 +423,7 @@ out: return ret; } -static int ipvlan_process_outbound(struct sk_buff *skb, - const struct ipvl_dev *ipvlan) +static int ipvlan_process_outbound(struct sk_buff *skb, bool xnet) { struct ethhdr *ethh = eth_hdr(skb); int ret = NET_XMIT_DROP; @@ -443,9 +447,9 @@ static int ipvlan_process_outbound(struct sk_buff *skb, } if (skb->protocol == htons(ETH_P_IPV6)) - ret = ipvlan_process_v6_outbound(skb); + ret = ipvlan_process_v6_outbound(skb, xnet); else if (skb->protocol == htons(ETH_P_IP)) - ret = ipvlan_process_v4_outbound(skb); + ret = ipvlan_process_v4_outbound(skb, xnet); else { pr_warn_ratelimited("Dropped outbound packet type=%x\n", ntohs(skb->protocol)); @@ -481,6 +485,7 @@ static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev) void *lyr3h; struct ipvl_addr *addr; int addr_type; + bool xnet; lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); if (!lyr3h) @@ -491,8 +496,9 @@ static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev) return ipvlan_rcv_frame(addr, &skb, true); out: + xnet = !net_eq(dev_net(skb->dev), dev_net(ipvlan->phy_dev)); skb->dev = ipvlan->phy_dev; - return ipvlan_process_outbound(skb, ipvlan); + return ipvlan_process_outbound(skb, xnet); } static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index f94392d07126..a7ca1c519a0d 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -9,12 +9,12 @@ #include "ipvlan.h" -void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev) +static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev) { ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj; } -void ipvlan_set_port_mode(struct ipvl_port *port, u32 nval) +static void ipvlan_set_port_mode(struct ipvl_port *port, u16 nval) { struct ipvl_dev *ipvlan; @@ -442,6 +442,7 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev, struct ipvl_port *port; struct net_device *phy_dev; int err; + u16 mode = IPVLAN_MODE_L3; if (!tb[IFLA_LINK]) return -EINVAL; @@ -460,14 +461,15 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev, return err; } - port = ipvlan_port_get_rtnl(phy_dev); if (data && data[IFLA_IPVLAN_MODE]) - port->mode = nla_get_u16(data[IFLA_IPVLAN_MODE]); + mode = nla_get_u16(data[IFLA_IPVLAN_MODE]); + port = ipvlan_port_get_rtnl(phy_dev); ipvlan->phy_dev = phy_dev; ipvlan->dev = dev; ipvlan->port = port; ipvlan->sfeatures = IPVLAN_FEATURES; + ipvlan_adjust_mtu(ipvlan, phy_dev); INIT_LIST_HEAD(&ipvlan->addrs); /* TODO Probably put random address here to be presented to the @@ -488,6 +490,8 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev, goto ipvlan_destroy_port; list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans); + ipvlan_set_port_mode(port, mode); + netif_stacked_transfer_operstate(phy_dev, dev); return 0; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 94e688805dd2..426a2cc27ac8 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1069,7 +1069,7 @@ EXPORT_SYMBOL_GPL(macvlan_common_setup); static void macvlan_setup(struct net_device *dev) { macvlan_common_setup(dev); - dev->tx_queue_len = 0; + dev->priv_flags |= IFF_NO_QUEUE; } static int macvlan_port_create(struct net_device *dev) diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index db507e3bcab9..b881a7b1e4f6 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -24,7 +24,7 @@ #define MII_BCM7XXX_100TX_FALSE_CAR 0x13 #define MII_BCM7XXX_100TX_DISC 0x14 #define MII_BCM7XXX_AUX_MODE 0x1d -#define MII_BCM7XX_64CLK_MDIO BIT(12) +#define MII_BCM7XXX_64CLK_MDIO BIT(12) #define MII_BCM7XXX_TEST 0x1f #define MII_BCM7XXX_SHD_MODE_2 BIT(2) @@ -247,7 +247,7 @@ static int bcm7xxx_config_init(struct phy_device *phydev) int ret; /* Enable 64 clock MDIO */ - phy_write(phydev, MII_BCM7XXX_AUX_MODE, MII_BCM7XX_64CLK_MDIO); + phy_write(phydev, MII_BCM7XXX_AUX_MODE, MII_BCM7XXX_64CLK_MDIO); phy_read(phydev, MII_BCM7XXX_AUX_MODE); /* set shadow mode 2 */ @@ -317,6 +317,21 @@ static int bcm7xxx_suspend(struct phy_device *phydev) .resume = bcm7xxx_28nm_resume, \ } +#define BCM7XXX_40NM_EPHY(_oui, _name) \ +{ \ + .phy_id = (_oui), \ + .phy_id_mask = 0xfffffff0, \ + .name = _name, \ + .features = PHY_BASIC_FEATURES | \ + SUPPORTED_Pause | SUPPORTED_Asym_Pause, \ + .flags = PHY_IS_INTERNAL, \ + .config_init = bcm7xxx_config_init, \ + .config_aneg = genphy_config_aneg, \ + .read_status = genphy_read_status, \ + .suspend = bcm7xxx_suspend, \ + .resume = bcm7xxx_config_init, \ +} + static struct phy_driver bcm7xxx_driver[] = { BCM7XXX_28NM_GPHY(PHY_ID_BCM7250, "Broadcom BCM7250"), BCM7XXX_28NM_GPHY(PHY_ID_BCM7364, "Broadcom BCM7364"), @@ -324,43 +339,10 @@ static struct phy_driver bcm7xxx_driver[] = { BCM7XXX_28NM_GPHY(PHY_ID_BCM7439, "Broadcom BCM7439"), BCM7XXX_28NM_GPHY(PHY_ID_BCM7439_2, "Broadcom BCM7439 (2)"), BCM7XXX_28NM_GPHY(PHY_ID_BCM7445, "Broadcom BCM7445"), -{ - .phy_id = PHY_ID_BCM7425, - .phy_id_mask = 0xfffffff0, - .name = "Broadcom BCM7425", - .features = PHY_BASIC_FEATURES | - SUPPORTED_Pause | SUPPORTED_Asym_Pause, - .flags = PHY_IS_INTERNAL, - .config_init = bcm7xxx_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, - .suspend = bcm7xxx_suspend, - .resume = bcm7xxx_config_init, -}, { - .phy_id = PHY_ID_BCM7429, - .phy_id_mask = 0xfffffff0, - .name = "Broadcom BCM7429", - .features = PHY_BASIC_FEATURES | - SUPPORTED_Pause | SUPPORTED_Asym_Pause, - .flags = PHY_IS_INTERNAL, - .config_init = bcm7xxx_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, - .suspend = bcm7xxx_suspend, - .resume = bcm7xxx_config_init, -}, { - .phy_id = PHY_ID_BCM7435, - .phy_id_mask = 0xfffffff0, - .name = "Broadcom BCM7435", - .features = PHY_BASIC_FEATURES | - SUPPORTED_Pause | SUPPORTED_Asym_Pause, - .flags = PHY_IS_INTERNAL, - .config_init = bcm7xxx_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, - .suspend = bcm7xxx_suspend, - .resume = bcm7xxx_config_init, -} }; + BCM7XXX_40NM_EPHY(PHY_ID_BCM7425, "Broadcom BCM7425"), + BCM7XXX_40NM_EPHY(PHY_ID_BCM7429, "Broadcom BCM7429"), + BCM7XXX_40NM_EPHY(PHY_ID_BCM7435, "Broadcom BCM7435"), +}; static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = { { PHY_ID_BCM7250, 0xfffffff0, }, diff --git a/drivers/net/phy/dp83848.c b/drivers/net/phy/dp83848.c index 5e14e629c597..03d54c4adc88 100644 --- a/drivers/net/phy/dp83848.c +++ b/drivers/net/phy/dp83848.c @@ -1,7 +1,7 @@ /* * Driver for the Texas Instruments DP83848 PHY * - * Copyright (C) 2015 Texas Instruments Inc. + * Copyright (C) 2015-2016 Texas Instruments Incorporated - http://www.ti.com/ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -16,11 +16,13 @@ #include <linux/module.h> #include <linux/phy.h> -#define DP83848_PHY_ID 0x20005c90 +#define TI_DP83848C_PHY_ID 0x20005ca0 +#define NS_DP83848C_PHY_ID 0x20005c90 +#define TLK10X_PHY_ID 0x2000a210 /* Registers */ -#define DP83848_MICR 0x11 -#define DP83848_MISR 0x12 +#define DP83848_MICR 0x11 /* MII Interrupt Control Register */ +#define DP83848_MISR 0x12 /* MII Interrupt Status Register */ /* MICR Register Fields */ #define DP83848_MICR_INT_OE BIT(0) /* Interrupt Output Enable */ @@ -36,6 +38,12 @@ #define DP83848_MISR_ED_INT_EN BIT(6) /* Energy detect */ #define DP83848_MISR_LQM_INT_EN BIT(7) /* Link Quality Monitor */ +#define DP83848_INT_EN_MASK \ + (DP83848_MISR_ANC_INT_EN | \ + DP83848_MISR_DUP_INT_EN | \ + DP83848_MISR_SPD_INT_EN | \ + DP83848_MISR_LINK_INT_EN) + static int dp83848_ack_interrupt(struct phy_device *phydev) { int err = phy_read(phydev, DP83848_MISR); @@ -45,50 +53,58 @@ static int dp83848_ack_interrupt(struct phy_device *phydev) static int dp83848_config_intr(struct phy_device *phydev) { - int err; + int control, ret; + + control = phy_read(phydev, DP83848_MICR); + if (control < 0) + return control; if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { - err = phy_write(phydev, DP83848_MICR, - DP83848_MICR_INT_OE | - DP83848_MICR_INTEN); - if (err < 0) - return err; - - return phy_write(phydev, DP83848_MISR, - DP83848_MISR_ANC_INT_EN | - DP83848_MISR_DUP_INT_EN | - DP83848_MISR_SPD_INT_EN | - DP83848_MISR_LINK_INT_EN); + control |= DP83848_MICR_INT_OE; + control |= DP83848_MICR_INTEN; + + ret = phy_write(phydev, DP83848_MISR, DP83848_INT_EN_MASK); + if (ret < 0) + return ret; + } else { + control &= ~DP83848_MICR_INTEN; } - return phy_write(phydev, DP83848_MICR, 0x0); + return phy_write(phydev, DP83848_MICR, control); } static struct mdio_device_id __maybe_unused dp83848_tbl[] = { - { DP83848_PHY_ID, 0xfffffff0 }, + { TI_DP83848C_PHY_ID, 0xfffffff0 }, + { NS_DP83848C_PHY_ID, 0xfffffff0 }, + { TLK10X_PHY_ID, 0xfffffff0 }, { } }; MODULE_DEVICE_TABLE(mdio, dp83848_tbl); +#define DP83848_PHY_DRIVER(_id, _name) \ + { \ + .phy_id = _id, \ + .phy_id_mask = 0xfffffff0, \ + .name = _name, \ + .features = PHY_BASIC_FEATURES, \ + .flags = PHY_HAS_INTERRUPT, \ + \ + .soft_reset = genphy_soft_reset, \ + .config_init = genphy_config_init, \ + .suspend = genphy_suspend, \ + .resume = genphy_resume, \ + .config_aneg = genphy_config_aneg, \ + .read_status = genphy_read_status, \ + \ + /* IRQ related */ \ + .ack_interrupt = dp83848_ack_interrupt, \ + .config_intr = dp83848_config_intr, \ + } + static struct phy_driver dp83848_driver[] = { - { - .phy_id = DP83848_PHY_ID, - .phy_id_mask = 0xfffffff0, - .name = "TI DP83848", - .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, - - .soft_reset = genphy_soft_reset, - .config_init = genphy_config_init, - .suspend = genphy_suspend, - .resume = genphy_resume, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, - - /* IRQ related */ - .ack_interrupt = dp83848_ack_interrupt, - .config_intr = dp83848_config_intr, - }, + DP83848_PHY_DRIVER(TI_DP83848C_PHY_ID, "TI DP83848C 10/100 Mbps PHY"), + DP83848_PHY_DRIVER(NS_DP83848C_PHY_ID, "NS DP83848C 10/100 Mbps PHY"), + DP83848_PHY_DRIVER(TLK10X_PHY_ID, "TI TLK10X 10/100 Mbps PHY"), }; module_phy_driver(dp83848_driver); diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index ab1d0fcaf1d9..9fb9d80ae419 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -133,6 +133,11 @@ #define MII_88E3016_DISABLE_SCRAMBLER 0x0200 #define MII_88E3016_AUTO_MDIX_CROSSOVER 0x0030 +#define MII_88E1510_GEN_CTRL_REG_1 0x14 +#define MII_88E1510_GEN_CTRL_REG_1_MODE_MASK 0x7 +#define MII_88E1510_GEN_CTRL_REG_1_MODE_SGMII 0x1 /* SGMII to copper */ +#define MII_88E1510_GEN_CTRL_REG_1_RESET 0x8000 /* Soft reset */ + MODULE_DESCRIPTION("Marvell PHY driver"); MODULE_AUTHOR("Andy Fleming"); MODULE_LICENSE("GPL"); @@ -438,6 +443,41 @@ static int m88e1318_config_aneg(struct phy_device *phydev) return m88e1121_config_aneg(phydev); } +static int m88e1510_config_init(struct phy_device *phydev) +{ + int err; + int temp; + + /* SGMII-to-Copper mode initialization */ + if (phydev->interface == PHY_INTERFACE_MODE_SGMII) { + /* Select page 18 */ + err = phy_write(phydev, MII_MARVELL_PHY_PAGE, 18); + if (err < 0) + return err; + + /* In reg 20, write MODE[2:0] = 0x1 (SGMII to Copper) */ + temp = phy_read(phydev, MII_88E1510_GEN_CTRL_REG_1); + temp &= ~MII_88E1510_GEN_CTRL_REG_1_MODE_MASK; + temp |= MII_88E1510_GEN_CTRL_REG_1_MODE_SGMII; + err = phy_write(phydev, MII_88E1510_GEN_CTRL_REG_1, temp); + if (err < 0) + return err; + + /* PHY reset is necessary after changing MODE[2:0] */ + temp |= MII_88E1510_GEN_CTRL_REG_1_RESET; + err = phy_write(phydev, MII_88E1510_GEN_CTRL_REG_1, temp); + if (err < 0) + return err; + + /* Reset page selection */ + err = phy_write(phydev, MII_MARVELL_PHY_PAGE, 0); + if (err < 0) + return err; + } + + return 0; +} + static int m88e1510_config_aneg(struct phy_device *phydev) { int err; @@ -1031,8 +1071,8 @@ static u64 marvell_get_stat(struct phy_device *phydev, int i) { struct marvell_hw_stat stat = marvell_hw_stats[i]; struct marvell_priv *priv = phydev->priv; - int err, oldpage; - u64 val; + int err, oldpage, val; + u64 ret; oldpage = phy_read(phydev, MII_MARVELL_PHY_PAGE); err = phy_write(phydev, MII_MARVELL_PHY_PAGE, @@ -1042,16 +1082,16 @@ static u64 marvell_get_stat(struct phy_device *phydev, int i) val = phy_read(phydev, stat.reg); if (val < 0) { - val = UINT64_MAX; + ret = UINT64_MAX; } else { val = val & ((1 << stat.bits) - 1); priv->stats[i] += val; - val = priv->stats[i]; + ret = priv->stats[i]; } phy_write(phydev, MII_MARVELL_PHY_PAGE, oldpage); - return val; + return ret; } static void marvell_get_stats(struct phy_device *phydev, @@ -1268,7 +1308,7 @@ static struct phy_driver marvell_drivers[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .probe = marvell_probe, - .config_init = &marvell_config_init, + .config_init = &m88e1510_config_init, .config_aneg = &m88e1510_config_aneg, .read_status = &marvell_read_status, .ack_interrupt = &marvell_ack_interrupt, diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 03833dbfca67..48219c83fb00 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -612,18 +612,19 @@ static u64 kszphy_get_stat(struct phy_device *phydev, int i) { struct kszphy_hw_stat stat = kszphy_hw_stats[i]; struct kszphy_priv *priv = phydev->priv; - u64 val; + int val; + u64 ret; val = phy_read(phydev, stat.reg); if (val < 0) { - val = UINT64_MAX; + ret = UINT64_MAX; } else { val = val & ((1 << stat.bits) - 1); priv->stats[i] += val; - val = priv->stats[i]; + ret = priv->stats[i]; } - return val; + return ret; } static void kszphy_get_stats(struct phy_device *phydev, diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c index c72c42206850..b5d50d458728 100644 --- a/drivers/net/phy/spi_ks8995.c +++ b/drivers/net/phy/spi_ks8995.c @@ -18,6 +18,9 @@ #include <linux/module.h> #include <linux/delay.h> #include <linux/device.h> +#include <linux/gpio/consumer.h> +#include <linux/of.h> +#include <linux/of_gpio.h> #include <linux/spi/spi.h> @@ -74,6 +77,7 @@ #define KS8995_REGS_SIZE 0x80 #define KSZ8864_REGS_SIZE 0x100 +#define KSZ8795_REGS_SIZE 0x100 #define ID1_CHIPID_M 0xf #define ID1_CHIPID_S 4 @@ -82,15 +86,63 @@ #define ID1_START_SW 1 /* start the switch */ #define FAMILY_KS8995 0x95 +#define FAMILY_KSZ8795 0x87 #define CHIPID_M 0 +#define KS8995_CHIP_ID 0x00 +#define KSZ8864_CHIP_ID 0x01 +#define KSZ8795_CHIP_ID 0x09 #define KS8995_CMD_WRITE 0x02U #define KS8995_CMD_READ 0x03U #define KS8995_RESET_DELAY 10 /* usec */ +enum ks8995_chip_variant { + ks8995, + ksz8864, + ksz8795, + max_variant +}; + +struct ks8995_chip_params { + char *name; + int family_id; + int chip_id; + int regs_size; + int addr_width; + int addr_shift; +}; + +static const struct ks8995_chip_params ks8995_chip[] = { + [ks8995] = { + .name = "KS8995MA", + .family_id = FAMILY_KS8995, + .chip_id = KS8995_CHIP_ID, + .regs_size = KS8995_REGS_SIZE, + .addr_width = 8, + .addr_shift = 0, + }, + [ksz8864] = { + .name = "KSZ8864RMN", + .family_id = FAMILY_KS8995, + .chip_id = KSZ8864_CHIP_ID, + .regs_size = KSZ8864_REGS_SIZE, + .addr_width = 8, + .addr_shift = 0, + }, + [ksz8795] = { + .name = "KSZ8795CLX", + .family_id = FAMILY_KSZ8795, + .chip_id = KSZ8795_CHIP_ID, + .regs_size = KSZ8795_REGS_SIZE, + .addr_width = 12, + .addr_shift = 1, + }, +}; + struct ks8995_pdata { - /* not yet implemented */ + int reset_gpio; + enum of_gpio_flags reset_gpio_flags; }; struct ks8995_switch { @@ -98,7 +150,17 @@ struct ks8995_switch { struct mutex lock; struct ks8995_pdata *pdata; struct bin_attribute regs_attr; + const struct ks8995_chip_params *chip; + int revision_id; +}; + +static const struct spi_device_id ks8995_id[] = { + {"ks8995", ks8995}, + {"ksz8864", ksz8864}, + {"ksz8795", ksz8795}, + { } }; +MODULE_DEVICE_TABLE(spi, ks8995_id); static inline u8 get_chip_id(u8 val) { @@ -110,20 +172,44 @@ static inline u8 get_chip_rev(u8 val) return (val >> ID1_REVISION_S) & ID1_REVISION_M; } +/* create_spi_cmd - create a chip specific SPI command header + * @ks: pointer to switch instance + * @cmd: SPI command for switch + * @address: register address for command + * + * Different chip families use different bit pattern to address the switches + * registers: + * + * KS8995: 8bit command + 8bit address + * KSZ8795: 3bit command + 12bit address + 1bit TR (?) + */ +static inline __be16 create_spi_cmd(struct ks8995_switch *ks, int cmd, + unsigned address) +{ + u16 result = cmd; + + /* make room for address (incl. address shift) */ + result <<= ks->chip->addr_width + ks->chip->addr_shift; + /* add address */ + result |= address << ks->chip->addr_shift; + /* SPI protocol needs big endian */ + return cpu_to_be16(result); +} /* ------------------------------------------------------------------------ */ static int ks8995_read(struct ks8995_switch *ks, char *buf, unsigned offset, size_t count) { - u8 cmd[2]; + __be16 cmd; struct spi_transfer t[2]; struct spi_message m; int err; + cmd = create_spi_cmd(ks, KS8995_CMD_READ, offset); spi_message_init(&m); memset(&t, 0, sizeof(t)); - t[0].tx_buf = cmd; + t[0].tx_buf = &cmd; t[0].len = sizeof(cmd); spi_message_add_tail(&t[0], &m); @@ -131,9 +217,6 @@ static int ks8995_read(struct ks8995_switch *ks, char *buf, t[1].len = count; spi_message_add_tail(&t[1], &m); - cmd[0] = KS8995_CMD_READ; - cmd[1] = offset; - mutex_lock(&ks->lock); err = spi_sync(ks->spi, &m); mutex_unlock(&ks->lock); @@ -141,20 +224,20 @@ static int ks8995_read(struct ks8995_switch *ks, char *buf, return err ? err : count; } - static int ks8995_write(struct ks8995_switch *ks, char *buf, unsigned offset, size_t count) { - u8 cmd[2]; + __be16 cmd; struct spi_transfer t[2]; struct spi_message m; int err; + cmd = create_spi_cmd(ks, KS8995_CMD_WRITE, offset); spi_message_init(&m); memset(&t, 0, sizeof(t)); - t[0].tx_buf = cmd; + t[0].tx_buf = &cmd; t[0].len = sizeof(cmd); spi_message_add_tail(&t[0], &m); @@ -162,9 +245,6 @@ static int ks8995_write(struct ks8995_switch *ks, char *buf, t[1].len = count; spi_message_add_tail(&t[1], &m); - cmd[0] = KS8995_CMD_WRITE; - cmd[1] = offset; - mutex_lock(&ks->lock); err = spi_sync(ks->spi, &m); mutex_unlock(&ks->lock); @@ -233,6 +313,107 @@ static ssize_t ks8995_registers_write(struct file *filp, struct kobject *kobj, return ks8995_write(ks8995, buf, off, count); } +/* ks8995_get_revision - get chip revision + * @ks: pointer to switch instance + * + * Verify chip family and id and get chip revision. + */ +static int ks8995_get_revision(struct ks8995_switch *ks) +{ + int err; + u8 id0, id1, ksz8864_id; + + /* read family id */ + err = ks8995_read_reg(ks, KS8995_REG_ID0, &id0); + if (err) { + err = -EIO; + goto err_out; + } + + /* verify family id */ + if (id0 != ks->chip->family_id) { + dev_err(&ks->spi->dev, "chip family id mismatch: expected 0x%02x but 0x%02x read\n", + ks->chip->family_id, id0); + err = -ENODEV; + goto err_out; + } + + switch (ks->chip->family_id) { + case FAMILY_KS8995: + /* try reading chip id at CHIP ID1 */ + err = ks8995_read_reg(ks, KS8995_REG_ID1, &id1); + if (err) { + err = -EIO; + goto err_out; + } + + /* verify chip id */ + if ((get_chip_id(id1) == CHIPID_M) && + (get_chip_id(id1) == ks->chip->chip_id)) { + /* KS8995MA */ + ks->revision_id = get_chip_rev(id1); + } else if (get_chip_id(id1) != CHIPID_M) { + /* KSZ8864RMN */ + err = ks8995_read_reg(ks, KS8995_REG_ID1, &ksz8864_id); + if (err) { + err = -EIO; + goto err_out; + } + + if ((ksz8864_id & 0x80) && + (ks->chip->chip_id == KSZ8864_CHIP_ID)) { + ks->revision_id = get_chip_rev(id1); + } + + } else { + dev_err(&ks->spi->dev, "unsupported chip id for KS8995 family: 0x%02x\n", + id1); + err = -ENODEV; + } + break; + case FAMILY_KSZ8795: + /* try reading chip id at CHIP ID1 */ + err = ks8995_read_reg(ks, KS8995_REG_ID1, &id1); + if (err) { + err = -EIO; + goto err_out; + } + + if (get_chip_id(id1) == ks->chip->chip_id) { + ks->revision_id = get_chip_rev(id1); + } else { + dev_err(&ks->spi->dev, "unsupported chip id for KSZ8795 family: 0x%02x\n", + id1); + err = -ENODEV; + } + break; + default: + dev_err(&ks->spi->dev, "unsupported family id: 0x%02x\n", id0); + err = -ENODEV; + break; + } +err_out: + return err; +} + +/* ks8995_parse_dt - setup platform data from devicetree + * @ks: pointer to switch instance + * + * Parses supported DT properties and sets up platform data + * accordingly. + */ +static void ks8995_parse_dt(struct ks8995_switch *ks) +{ + struct device_node *np = ks->spi->dev.of_node; + struct ks8995_pdata *pdata = ks->pdata; + + if (!np) + return; + + pdata->reset_gpio = of_get_named_gpio_flags(np, "reset-gpios", 0, + &pdata->reset_gpio_flags); +} + static const struct bin_attribute ks8995_registers_attr = { .attr = { .name = "registers", @@ -244,24 +425,58 @@ static const struct bin_attribute ks8995_registers_attr = { }; /* ------------------------------------------------------------------------ */ - static int ks8995_probe(struct spi_device *spi) { - struct ks8995_switch *ks; - struct ks8995_pdata *pdata; - u8 ids[2]; - int err; + struct ks8995_switch *ks; + int err; + int variant = spi_get_device_id(spi)->driver_data; - /* Chip description */ - pdata = spi->dev.platform_data; + if (variant >= max_variant) { + dev_err(&spi->dev, "bad chip variant %d\n", variant); + return -ENODEV; + } ks = devm_kzalloc(&spi->dev, sizeof(*ks), GFP_KERNEL); if (!ks) return -ENOMEM; mutex_init(&ks->lock); - ks->pdata = pdata; ks->spi = spi_dev_get(spi); + ks->chip = &ks8995_chip[variant]; + + if (ks->spi->dev.of_node) { + ks->pdata = devm_kzalloc(&spi->dev, sizeof(*ks->pdata), + GFP_KERNEL); + if (!ks->pdata) + return -ENOMEM; + + ks->pdata->reset_gpio = -1; + + ks8995_parse_dt(ks); + } + + if (!ks->pdata) + ks->pdata = spi->dev.platform_data; + + /* de-assert switch reset */ + if (ks->pdata && gpio_is_valid(ks->pdata->reset_gpio)) { + unsigned long flags; + + flags = (ks->pdata->reset_gpio_flags == OF_GPIO_ACTIVE_LOW ? + GPIOF_ACTIVE_LOW : 0); + + err = devm_gpio_request_one(&spi->dev, + ks->pdata->reset_gpio, + flags, "switch-reset"); + if (err) { + dev_err(&spi->dev, + "failed to get reset-gpios: %d\n", err); + return -EIO; + } + + gpiod_set_value(gpio_to_desc(ks->pdata->reset_gpio), 0); + } + spi_set_drvdata(spi, ks); spi->mode = SPI_MODE_0; @@ -272,39 +487,12 @@ static int ks8995_probe(struct spi_device *spi) return err; } - err = ks8995_read(ks, ids, KS8995_REG_ID0, sizeof(ids)); - if (err < 0) { - dev_err(&spi->dev, "unable to read id registers, err=%d\n", - err); + err = ks8995_get_revision(ks); + if (err) return err; - } - - switch (ids[0]) { - case FAMILY_KS8995: - break; - default: - dev_err(&spi->dev, "unknown family id:%02x\n", ids[0]); - return -ENODEV; - } + ks->regs_attr.size = ks->chip->regs_size; memcpy(&ks->regs_attr, &ks8995_registers_attr, sizeof(ks->regs_attr)); - if (get_chip_id(ids[1]) != CHIPID_M) { - u8 val; - - /* Check if this is a KSZ8864RMN */ - err = ks8995_read(ks, &val, KSZ8864_REG_ID1, sizeof(val)); - if (err < 0) { - dev_err(&spi->dev, - "unable to read chip id register, err=%d\n", - err); - return err; - } - if ((val & 0x80) == 0) { - dev_err(&spi->dev, "unknown chip:%02x,0\n", ids[1]); - return err; - } - ks->regs_attr.size = KSZ8864_REGS_SIZE; - } err = ks8995_reset(ks); if (err) @@ -317,14 +505,8 @@ static int ks8995_probe(struct spi_device *spi) return err; } - if (get_chip_id(ids[1]) == CHIPID_M) { - dev_info(&spi->dev, - "KS8995 device found, Chip ID:%x, Revision:%x\n", - get_chip_id(ids[1]), get_chip_rev(ids[1])); - } else { - dev_info(&spi->dev, "KSZ8864 device found, Revision:%x\n", - get_chip_rev(ids[1])); - } + dev_info(&spi->dev, "%s device found, Chip ID:%x, Revision:%x\n", + ks->chip->name, ks->chip->chip_id, ks->revision_id); return 0; } @@ -335,17 +517,21 @@ static int ks8995_remove(struct spi_device *spi) sysfs_remove_bin_file(&spi->dev.kobj, &ks->regs_attr); + /* assert reset */ + if (ks->pdata && gpio_is_valid(ks->pdata->reset_gpio)) + gpiod_set_value(gpio_to_desc(ks->pdata->reset_gpio), 1); + return 0; } /* ------------------------------------------------------------------------ */ - static struct spi_driver ks8995_driver = { .driver = { .name = "spi-ks8995", }, .probe = ks8995_probe, .remove = ks8995_remove, + .id_table = ks8995_id, }; module_spi_driver(ks8995_driver); diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 718ceeab4dbc..00558e139584 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -758,6 +758,8 @@ static rx_handler_result_t team_handle_frame(struct sk_buff **pskb) u64_stats_update_end(&pcpu_stats->syncp); skb->dev = team->dev; + } else if (res == RX_HANDLER_EXACT) { + this_cpu_inc(team->pcpu_stats->rx_nohandler); } else { this_cpu_inc(team->pcpu_stats->rx_dropped); } @@ -1807,7 +1809,7 @@ team_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) struct team *team = netdev_priv(dev); struct team_pcpu_stats *p; u64 rx_packets, rx_bytes, rx_multicast, tx_packets, tx_bytes; - u32 rx_dropped = 0, tx_dropped = 0; + u32 rx_dropped = 0, tx_dropped = 0, rx_nohandler = 0; unsigned int start; int i; @@ -1828,14 +1830,16 @@ team_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->tx_packets += tx_packets; stats->tx_bytes += tx_bytes; /* - * rx_dropped & tx_dropped are u32, updated - * without syncp protection. + * rx_dropped, tx_dropped & rx_nohandler are u32, + * updated without syncp protection. */ rx_dropped += p->rx_dropped; tx_dropped += p->tx_dropped; + rx_nohandler += p->rx_nohandler; } stats->rx_dropped = rx_dropped; stats->tx_dropped = tx_dropped; + stats->rx_nohandler = rx_nohandler; return stats; } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 767ab11a6e9f..fb0eae42bf39 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -146,6 +146,10 @@ struct virtnet_info { virtio_net_ctrl_ack ctrl_status; u8 ctrl_promisc; u8 ctrl_allmulti; + + /* Ethtool settings */ + u8 duplex; + u32 speed; }; struct padded_vnet_hdr { @@ -1376,6 +1380,60 @@ static void virtnet_get_channels(struct net_device *dev, channels->other_count = 0; } +/* Check if the user is trying to change anything besides speed/duplex */ +static bool virtnet_validate_ethtool_cmd(const struct ethtool_cmd *cmd) +{ + struct ethtool_cmd diff1 = *cmd; + struct ethtool_cmd diff2 = {}; + + /* cmd is always set so we need to clear it, validate the port type + * and also without autonegotiation we can ignore advertising + */ + ethtool_cmd_speed_set(&diff1, 0); + diff2.port = PORT_OTHER; + diff1.advertising = 0; + diff1.duplex = 0; + diff1.cmd = 0; + + return !memcmp(&diff1, &diff2, sizeof(diff1)); +} + +static int virtnet_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct virtnet_info *vi = netdev_priv(dev); + u32 speed; + + speed = ethtool_cmd_speed(cmd); + /* don't allow custom speed and duplex */ + if (!ethtool_validate_speed(speed) || + !ethtool_validate_duplex(cmd->duplex) || + !virtnet_validate_ethtool_cmd(cmd)) + return -EINVAL; + vi->speed = speed; + vi->duplex = cmd->duplex; + + return 0; +} + +static int virtnet_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct virtnet_info *vi = netdev_priv(dev); + + ethtool_cmd_speed_set(cmd, vi->speed); + cmd->duplex = vi->duplex; + cmd->port = PORT_OTHER; + + return 0; +} + +static void virtnet_init_settings(struct net_device *dev) +{ + struct virtnet_info *vi = netdev_priv(dev); + + vi->speed = SPEED_UNKNOWN; + vi->duplex = DUPLEX_UNKNOWN; +} + static const struct ethtool_ops virtnet_ethtool_ops = { .get_drvinfo = virtnet_get_drvinfo, .get_link = ethtool_op_get_link, @@ -1383,6 +1441,8 @@ static const struct ethtool_ops virtnet_ethtool_ops = { .set_channels = virtnet_set_channels, .get_channels = virtnet_get_channels, .get_ts_info = ethtool_op_get_ts_info, + .get_settings = virtnet_get_settings, + .set_settings = virtnet_set_settings, }; #define MIN_MTU 68 @@ -1855,6 +1915,8 @@ static int virtnet_probe(struct virtio_device *vdev) netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); + virtnet_init_settings(dev); + err = register_netdev(dev); if (err) { pr_debug("virtio_net: registering device failed\n"); diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 66addb7a7911..9ce088bb28ab 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -32,7 +32,6 @@ #include <net/ip_fib.h> #include <net/ip6_fib.h> #include <net/ip6_route.h> -#include <net/rtnetlink.h> #include <net/route.h> #include <net/addrconf.h> #include <net/l3mdev.h> @@ -877,6 +876,24 @@ static int vrf_fillinfo(struct sk_buff *skb, return nla_put_u32(skb, IFLA_VRF_TABLE, vrf->tb_id); } +static size_t vrf_get_slave_size(const struct net_device *bond_dev, + const struct net_device *slave_dev) +{ + return nla_total_size(sizeof(u32)); /* IFLA_VRF_PORT_TABLE */ +} + +static int vrf_fill_slave_info(struct sk_buff *skb, + const struct net_device *vrf_dev, + const struct net_device *slave_dev) +{ + struct net_vrf *vrf = netdev_priv(vrf_dev); + + if (nla_put_u32(skb, IFLA_VRF_PORT_TABLE, vrf->tb_id)) + return -EMSGSIZE; + + return 0; +} + static const struct nla_policy vrf_nl_policy[IFLA_VRF_MAX + 1] = { [IFLA_VRF_TABLE] = { .type = NLA_U32 }, }; @@ -890,6 +907,9 @@ static struct rtnl_link_ops vrf_link_ops __read_mostly = { .validate = vrf_validate, .fill_info = vrf_fillinfo, + .get_slave_size = vrf_get_slave_size, + .fill_slave_info = vrf_fill_slave_info, + .newlink = vrf_newlink, .dellink = vrf_dellink, .setup = vrf_setup, diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index e6944b29588e..909f7931c297 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -42,7 +42,7 @@ #include <net/netns/generic.h> #include <net/vxlan.h> #include <net/protocol.h> -#include <net/udp_tunnel.h> + #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> #include <net/addrconf.h> @@ -197,9 +197,9 @@ static int vxlan_nla_put_addr(struct sk_buff *skb, int attr, #endif /* Virtual Network hash table head */ -static inline struct hlist_head *vni_head(struct vxlan_sock *vs, u32 id) +static inline struct hlist_head *vni_head(struct vxlan_sock *vs, __be32 vni) { - return &vs->vni_list[hash_32(id, VNI_HASH_BITS)]; + return &vs->vni_list[hash_32((__force u32)vni, VNI_HASH_BITS)]; } /* Socket hash table head */ @@ -242,12 +242,16 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family, return NULL; } -static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id) +static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, __be32 vni) { struct vxlan_dev *vxlan; - hlist_for_each_entry_rcu(vxlan, vni_head(vs, id), hlist) { - if (vxlan->default_dst.remote_vni == id) + /* For flow based devices, map all packets to VNI 0 */ + if (vs->flags & VXLAN_F_COLLECT_METADATA) + vni = 0; + + hlist_for_each_entry_rcu(vxlan, vni_head(vs, vni), hlist) { + if (vxlan->default_dst.remote_vni == vni) return vxlan; } @@ -255,7 +259,7 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id) } /* Look up VNI in a per net namespace table */ -static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, +static struct vxlan_dev *vxlan_find_vni(struct net *net, __be32 vni, sa_family_t family, __be16 port, u32 flags) { @@ -265,7 +269,7 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, if (!vs) return NULL; - return vxlan_vs_find_vni(vs, id); + return vxlan_vs_find_vni(vs, vni); } /* Fill in neighbour message in skbuff. */ @@ -315,7 +319,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, nla_put_be16(skb, NDA_PORT, rdst->remote_port)) goto nla_put_failure; if (rdst->remote_vni != vxlan->default_dst.remote_vni && - nla_put_u32(skb, NDA_VNI, rdst->remote_vni)) + nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni))) goto nla_put_failure; if (rdst->remote_ifindex && nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex)) @@ -383,7 +387,7 @@ static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa) }; struct vxlan_rdst remote = { .remote_ip = *ipa, /* goes to NDA_DST */ - .remote_vni = VXLAN_N_VID, + .remote_vni = cpu_to_be32(VXLAN_N_VID), }; vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH); @@ -452,7 +456,7 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan, /* caller should hold vxlan->hash_lock */ static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f, union vxlan_addr *ip, __be16 port, - __u32 vni, __u32 ifindex) + __be32 vni, __u32 ifindex) { struct vxlan_rdst *rd; @@ -469,7 +473,8 @@ static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f, /* Replace destination of unicast mac */ static int vxlan_fdb_replace(struct vxlan_fdb *f, - union vxlan_addr *ip, __be16 port, __u32 vni, __u32 ifindex) + union vxlan_addr *ip, __be16 port, __be32 vni, + __u32 ifindex) { struct vxlan_rdst *rd; @@ -480,6 +485,8 @@ static int vxlan_fdb_replace(struct vxlan_fdb *f, rd = list_first_entry_or_null(&f->remotes, struct vxlan_rdst, list); if (!rd) return 0; + + dst_cache_reset(&rd->dst_cache); rd->remote_ip = *ip; rd->remote_port = port; rd->remote_vni = vni; @@ -489,7 +496,7 @@ static int vxlan_fdb_replace(struct vxlan_fdb *f, /* Add/update destinations for multicast */ static int vxlan_fdb_append(struct vxlan_fdb *f, - union vxlan_addr *ip, __be16 port, __u32 vni, + union vxlan_addr *ip, __be16 port, __be32 vni, __u32 ifindex, struct vxlan_rdst **rdp) { struct vxlan_rdst *rd; @@ -501,6 +508,12 @@ static int vxlan_fdb_append(struct vxlan_fdb *f, rd = kmalloc(sizeof(*rd), GFP_ATOMIC); if (rd == NULL) return -ENOBUFS; + + if (dst_cache_init(&rd->dst_cache, GFP_ATOMIC)) { + kfree(rd); + return -ENOBUFS; + } + rd->remote_ip = *ip; rd->remote_port = port; rd->remote_vni = vni; @@ -515,7 +528,8 @@ static int vxlan_fdb_append(struct vxlan_fdb *f, static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb, unsigned int off, struct vxlanhdr *vh, size_t hdrlen, - u32 data, struct gro_remcsum *grc, + __be32 vni_field, + struct gro_remcsum *grc, bool nopartial) { size_t start, offset; @@ -526,10 +540,8 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb, if (!NAPI_GRO_CB(skb)->csum_valid) return NULL; - start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT; - offset = start + ((data & VXLAN_RCO_UDP) ? - offsetof(struct udphdr, check) : - offsetof(struct tcphdr, check)); + start = vxlan_rco_start(vni_field); + offset = start + vxlan_rco_offset(vni_field); vh = skb_gro_remcsum_process(skb, (void *)vh, off, hdrlen, start, offset, grc, nopartial); @@ -549,7 +561,7 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, int flush = 1; struct vxlan_sock *vs = container_of(uoff, struct vxlan_sock, udp_offloads); - u32 flags; + __be32 flags; struct gro_remcsum grc; skb_gro_remcsum_init(&grc); @@ -565,11 +577,11 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr)); - flags = ntohl(vh->vx_flags); + flags = vh->vx_flags; if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) { vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr), - ntohl(vh->vx_vni), &grc, + vh->vx_vni, &grc, !!(vs->flags & VXLAN_F_REMCSUM_NOPARTIAL)); @@ -660,7 +672,7 @@ static void vxlan_notify_del_rx_port(struct vxlan_sock *vs) static int vxlan_fdb_create(struct vxlan_dev *vxlan, const u8 *mac, union vxlan_addr *ip, __u16 state, __u16 flags, - __be16 port, __u32 vni, __u32 ifindex, + __be16 port, __be32 vni, __u32 ifindex, __u8 ndm_flags) { struct vxlan_rdst *rd = NULL; @@ -749,8 +761,10 @@ static void vxlan_fdb_free(struct rcu_head *head) struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu); struct vxlan_rdst *rd, *nd; - list_for_each_entry_safe(rd, nd, &f->remotes, list) + list_for_each_entry_safe(rd, nd, &f->remotes, list) { + dst_cache_destroy(&rd->dst_cache); kfree(rd); + } kfree(f); } @@ -767,7 +781,8 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f) } static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan, - union vxlan_addr *ip, __be16 *port, u32 *vni, u32 *ifindex) + union vxlan_addr *ip, __be16 *port, __be32 *vni, + u32 *ifindex) { struct net *net = dev_net(vxlan->dev); int err; @@ -800,7 +815,7 @@ static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan, if (tb[NDA_VNI]) { if (nla_len(tb[NDA_VNI]) != sizeof(u32)) return -EINVAL; - *vni = nla_get_u32(tb[NDA_VNI]); + *vni = cpu_to_be32(nla_get_u32(tb[NDA_VNI])); } else { *vni = vxlan->default_dst.remote_vni; } @@ -830,7 +845,8 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], /* struct net *net = dev_net(vxlan->dev); */ union vxlan_addr ip; __be16 port; - u32 vni, ifindex; + __be32 vni; + u32 ifindex; int err; if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) { @@ -867,7 +883,8 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct vxlan_rdst *rd = NULL; union vxlan_addr ip; __be16 port; - u32 vni, ifindex; + __be32 vni; + u32 ifindex; int err; err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex); @@ -1122,54 +1139,65 @@ static int vxlan_igmp_leave(struct vxlan_dev *vxlan) return ret; } -static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh, - size_t hdrlen, u32 data, bool nopartial) +static bool vxlan_remcsum(struct vxlanhdr *unparsed, + struct sk_buff *skb, u32 vxflags) { size_t start, offset, plen; - if (skb->remcsum_offload) - return vh; + if (!(unparsed->vx_flags & VXLAN_HF_RCO) || skb->remcsum_offload) + goto out; - start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT; - offset = start + ((data & VXLAN_RCO_UDP) ? - offsetof(struct udphdr, check) : - offsetof(struct tcphdr, check)); + start = vxlan_rco_start(unparsed->vx_vni); + offset = start + vxlan_rco_offset(unparsed->vx_vni); - plen = hdrlen + offset + sizeof(u16); + plen = sizeof(struct vxlanhdr) + offset + sizeof(u16); if (!pskb_may_pull(skb, plen)) - return NULL; + return false; - vh = (struct vxlanhdr *)(udp_hdr(skb) + 1); + skb_remcsum_process(skb, (void *)(vxlan_hdr(skb) + 1), start, offset, + !!(vxflags & VXLAN_F_REMCSUM_NOPARTIAL)); +out: + unparsed->vx_flags &= ~VXLAN_HF_RCO; + unparsed->vx_vni &= VXLAN_VNI_MASK; + return true; +} - skb_remcsum_process(skb, (void *)vh + hdrlen, start, offset, - nopartial); +static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed, + struct vxlan_metadata *md, + struct metadata_dst *tun_dst) +{ + struct vxlanhdr_gbp *gbp = (struct vxlanhdr_gbp *)unparsed; - return vh; + if (!(unparsed->vx_flags & VXLAN_HF_GBP)) + goto out; + + md->gbp = ntohs(gbp->policy_id); + + if (tun_dst) + tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT; + + if (gbp->dont_learn) + md->gbp |= VXLAN_GBP_DONT_LEARN; + + if (gbp->policy_applied) + md->gbp |= VXLAN_GBP_POLICY_APPLIED; + +out: + unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS; } -static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, - struct vxlan_metadata *md, u32 vni, +static void vxlan_rcv(struct vxlan_dev *vxlan, struct vxlan_sock *vs, + struct sk_buff *skb, struct vxlan_metadata *md, struct metadata_dst *tun_dst) { struct iphdr *oip = NULL; struct ipv6hdr *oip6 = NULL; - struct vxlan_dev *vxlan; struct pcpu_sw_netstats *stats; union vxlan_addr saddr; int err = 0; - /* For flow based devices, map all packets to VNI 0 */ - if (vs->flags & VXLAN_F_COLLECT_METADATA) - vni = 0; - - /* Is this VNI defined? */ - vxlan = vxlan_vs_find_vni(vs, vni); - if (!vxlan) - goto drop; - skb_reset_mac_header(skb); - skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev))); skb->protocol = eth_type_trans(skb, vxlan->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); @@ -1246,48 +1274,45 @@ drop: static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct metadata_dst *tun_dst = NULL; + struct vxlan_dev *vxlan; struct vxlan_sock *vs; - struct vxlanhdr *vxh; - u32 flags, vni; + struct vxlanhdr unparsed; struct vxlan_metadata _md; struct vxlan_metadata *md = &_md; /* Need Vxlan and inner Ethernet header to be present */ if (!pskb_may_pull(skb, VXLAN_HLEN)) - goto error; - - vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1); - flags = ntohl(vxh->vx_flags); - vni = ntohl(vxh->vx_vni); + return 1; - if (flags & VXLAN_HF_VNI) { - flags &= ~VXLAN_HF_VNI; - } else { - /* VNI flag always required to be set */ - goto bad_flags; + unparsed = *vxlan_hdr(skb); + /* VNI flag always required to be set */ + if (!(unparsed.vx_flags & VXLAN_HF_VNI)) { + netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n", + ntohl(vxlan_hdr(skb)->vx_flags), + ntohl(vxlan_hdr(skb)->vx_vni)); + /* Return non vxlan pkt */ + return 1; } - - if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB))) - goto drop; - vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1); + unparsed.vx_flags &= ~VXLAN_HF_VNI; + unparsed.vx_vni &= ~VXLAN_VNI_MASK; vs = rcu_dereference_sk_user_data(sk); if (!vs) goto drop; - if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) { - vxh = vxlan_remcsum(skb, vxh, sizeof(struct vxlanhdr), vni, - !!(vs->flags & VXLAN_F_REMCSUM_NOPARTIAL)); - if (!vxh) - goto drop; + vxlan = vxlan_vs_find_vni(vs, vxlan_vni(vxlan_hdr(skb)->vx_vni)); + if (!vxlan) + goto drop; - flags &= ~VXLAN_HF_RCO; - vni &= VXLAN_VNI_MASK; - } + if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB), + !net_eq(vxlan->net, dev_net(vxlan->dev)))) + goto drop; if (vxlan_collect_metadata(vs)) { + __be32 vni = vxlan_vni(vxlan_hdr(skb)->vx_vni); + tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY, - cpu_to_be64(vni >> 8), sizeof(*md)); + vxlan_vni_to_tun_id(vni), sizeof(*md)); if (!tun_dst) goto drop; @@ -1300,25 +1325,13 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) /* For backwards compatibility, only allow reserved fields to be * used by VXLAN extensions if explicitly requested. */ - if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) { - struct vxlanhdr_gbp *gbp; - - gbp = (struct vxlanhdr_gbp *)vxh; - md->gbp = ntohs(gbp->policy_id); - - if (tun_dst) - tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT; - - if (gbp->dont_learn) - md->gbp |= VXLAN_GBP_DONT_LEARN; - - if (gbp->policy_applied) - md->gbp |= VXLAN_GBP_POLICY_APPLIED; - - flags &= ~VXLAN_GBP_USED_BITS; - } + if (vs->flags & VXLAN_F_REMCSUM_RX) + if (!vxlan_remcsum(&unparsed, skb, vs->flags)) + goto drop; + if (vs->flags & VXLAN_F_GBP) + vxlan_parse_gbp_hdr(&unparsed, md, tun_dst); - if (flags || vni & ~VXLAN_VNI_MASK) { + if (unparsed.vx_flags || unparsed.vx_vni) { /* If there are any unprocessed flags remaining treat * this as a malformed packet. This behavior diverges from * VXLAN RFC (RFC7348) which stipulates that bits in reserved @@ -1327,28 +1340,19 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) * is more robust and provides a little more security in * adding extensions to VXLAN. */ - - goto bad_flags; + goto drop; } - vxlan_rcv(vs, skb, md, vni >> 8, tun_dst); + vxlan_rcv(vxlan, vs, skb, md, tun_dst); return 0; drop: - /* Consume bad packet */ - kfree_skb(skb); - return 0; - -bad_flags: - netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n", - ntohl(vxh->vx_flags), ntohl(vxh->vx_vni)); - -error: if (tun_dst) dst_release((struct dst_entry *)tun_dst); - /* Return non vxlan pkt */ - return 1; + /* Consume bad packet */ + kfree_skb(skb); + return 0; } static int arp_reduce(struct net_device *dev, struct sk_buff *skb) @@ -1673,7 +1677,7 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags, return; gbp = (struct vxlanhdr_gbp *)vxh; - vxh->vx_flags |= htonl(VXLAN_HF_GBP); + vxh->vx_flags |= VXLAN_HF_GBP; if (md->gbp & VXLAN_GBP_DONT_LEARN) gbp->dont_learn = 1; @@ -1684,20 +1688,15 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags, gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK); } -#if IS_ENABLED(CONFIG_IPV6) -static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, - struct net_device *dev, struct in6_addr *saddr, - struct in6_addr *daddr, __u8 prio, __u8 ttl, - __be16 src_port, __be16 dst_port, __be32 vni, - struct vxlan_metadata *md, bool xnet, u32 vxflags) +static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst, + int iphdr_len, __be32 vni, + struct vxlan_metadata *md, u32 vxflags, + bool udp_sum) { struct vxlanhdr *vxh; int min_headroom; int err; - bool udp_sum = !(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX); int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; - u16 hdrlen = sizeof(struct vxlanhdr); if ((vxflags & VXLAN_F_REMCSUM_TX) && skb->ip_summed == CHECKSUM_PARTIAL) { @@ -1706,50 +1705,39 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk, if (csum_start <= VXLAN_MAX_REMCSUM_START && !(csum_start & VXLAN_RCO_SHIFT_MASK) && (skb->csum_offset == offsetof(struct udphdr, check) || - skb->csum_offset == offsetof(struct tcphdr, check))) { - udp_sum = false; + skb->csum_offset == offsetof(struct tcphdr, check))) type |= SKB_GSO_TUNNEL_REMCSUM; - } } - skb_scrub_packet(skb, xnet); - min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len - + VXLAN_HLEN + sizeof(struct ipv6hdr) + + VXLAN_HLEN + iphdr_len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); /* Need space for new headers (invalidates iph ptr) */ err = skb_cow_head(skb, min_headroom); if (unlikely(err)) { kfree_skb(skb); - goto err; + return err; } skb = vlan_hwaccel_push_inside(skb); - if (WARN_ON(!skb)) { - err = -ENOMEM; - goto err; - } + if (WARN_ON(!skb)) + return -ENOMEM; - skb = iptunnel_handle_offloads(skb, udp_sum, type); - if (IS_ERR(skb)) { - err = -EINVAL; - goto err; - } + skb = iptunnel_handle_offloads(skb, type); + if (IS_ERR(skb)) + return PTR_ERR(skb); vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); - vxh->vx_flags = htonl(VXLAN_HF_VNI); - vxh->vx_vni = vni; + vxh->vx_flags = VXLAN_HF_VNI; + vxh->vx_vni = vxlan_vni_field(vni); if (type & SKB_GSO_TUNNEL_REMCSUM) { - u32 data = (skb_checksum_start_offset(skb) - hdrlen) >> - VXLAN_RCO_SHIFT; + unsigned int start; - if (skb->csum_offset == offsetof(struct udphdr, check)) - data |= VXLAN_RCO_UDP; - - vxh->vx_vni |= htonl(data); - vxh->vx_flags |= htonl(VXLAN_HF_RCO); + start = skb_checksum_start_offset(skb) - sizeof(struct vxlanhdr); + vxh->vx_vni |= vxlan_compute_rco(start, skb->csum_offset); + vxh->vx_flags |= VXLAN_HF_RCO; if (!skb_is_gso(skb)) { skb->ip_summed = CHECKSUM_NONE; @@ -1761,102 +1749,63 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk, vxlan_build_gbp_hdr(vxh, vxflags, md); skb_set_inner_protocol(skb, htons(ETH_P_TEB)); - - udp_tunnel6_xmit_skb(dst, sk, skb, dev, saddr, daddr, prio, - ttl, src_port, dst_port, - !!(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX)); return 0; -err: - dst_release(dst); - return err; } -#endif -static int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, - __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, - __be16 src_port, __be16 dst_port, __be32 vni, - struct vxlan_metadata *md, bool xnet, u32 vxflags) +static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, + struct sk_buff *skb, int oif, u8 tos, + __be32 daddr, __be32 *saddr, + struct dst_cache *dst_cache, + struct ip_tunnel_info *info) { - struct vxlanhdr *vxh; - int min_headroom; - int err; - bool udp_sum = !!(vxflags & VXLAN_F_UDP_CSUM); - int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; - u16 hdrlen = sizeof(struct vxlanhdr); - - if ((vxflags & VXLAN_F_REMCSUM_TX) && - skb->ip_summed == CHECKSUM_PARTIAL) { - int csum_start = skb_checksum_start_offset(skb); - - if (csum_start <= VXLAN_MAX_REMCSUM_START && - !(csum_start & VXLAN_RCO_SHIFT_MASK) && - (skb->csum_offset == offsetof(struct udphdr, check) || - skb->csum_offset == offsetof(struct tcphdr, check))) { - udp_sum = false; - type |= SKB_GSO_TUNNEL_REMCSUM; - } - } - - min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len - + VXLAN_HLEN + sizeof(struct iphdr) - + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); + struct rtable *rt = NULL; + bool use_cache = false; + struct flowi4 fl4; - /* Need space for new headers (invalidates iph ptr) */ - err = skb_cow_head(skb, min_headroom); - if (unlikely(err)) { - kfree_skb(skb); - return err; + /* when the ip_tunnel_info is availble, the tos used for lookup is + * packet independent, so we can use the cache + */ + if (!skb->mark && (!tos || info)) { + use_cache = true; + rt = dst_cache_get_ip4(dst_cache, saddr); + if (rt) + return rt; } - skb = vlan_hwaccel_push_inside(skb); - if (WARN_ON(!skb)) - return -ENOMEM; - - skb = iptunnel_handle_offloads(skb, udp_sum, type); - if (IS_ERR(skb)) - return PTR_ERR(skb); - - vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); - vxh->vx_flags = htonl(VXLAN_HF_VNI); - vxh->vx_vni = vni; - - if (type & SKB_GSO_TUNNEL_REMCSUM) { - u32 data = (skb_checksum_start_offset(skb) - hdrlen) >> - VXLAN_RCO_SHIFT; - - if (skb->csum_offset == offsetof(struct udphdr, check)) - data |= VXLAN_RCO_UDP; - - vxh->vx_vni |= htonl(data); - vxh->vx_flags |= htonl(VXLAN_HF_RCO); + memset(&fl4, 0, sizeof(fl4)); + fl4.flowi4_oif = oif; + fl4.flowi4_tos = RT_TOS(tos); + fl4.flowi4_mark = skb->mark; + fl4.flowi4_proto = IPPROTO_UDP; + fl4.daddr = daddr; + fl4.saddr = vxlan->cfg.saddr.sin.sin_addr.s_addr; - if (!skb_is_gso(skb)) { - skb->ip_summed = CHECKSUM_NONE; - skb->encapsulation = 0; - } + rt = ip_route_output_key(vxlan->net, &fl4); + if (!IS_ERR(rt)) { + *saddr = fl4.saddr; + if (use_cache) + dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr); } - - if (vxflags & VXLAN_F_GBP) - vxlan_build_gbp_hdr(vxh, vxflags, md); - - skb_set_inner_protocol(skb, htons(ETH_P_TEB)); - - udp_tunnel_xmit_skb(rt, sk, skb, src, dst, tos, ttl, df, - src_port, dst_port, xnet, - !(vxflags & VXLAN_F_UDP_CSUM)); - return 0; + return rt; } #if IS_ENABLED(CONFIG_IPV6) static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, struct sk_buff *skb, int oif, const struct in6_addr *daddr, - struct in6_addr *saddr) + struct in6_addr *saddr, + struct dst_cache *dst_cache) { struct dst_entry *ndst; struct flowi6 fl6; int err; + if (!skb->mark) { + ndst = dst_cache_get_ip6(dst_cache, saddr); + if (ndst) + return ndst; + } + memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; fl6.daddr = *daddr; @@ -1871,6 +1820,8 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, return ERR_PTR(err); *saddr = fl6.saddr; + if (!skb->mark) + dst_cache_set_ip6(dst_cache, ndst, saddr); return ndst; } #endif @@ -1923,22 +1874,24 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct vxlan_rdst *rdst, bool did_rsc) { + struct dst_cache *dst_cache; struct ip_tunnel_info *info; struct vxlan_dev *vxlan = netdev_priv(dev); struct sock *sk; struct rtable *rt = NULL; const struct iphdr *old_iph; - struct flowi4 fl4; union vxlan_addr *dst; union vxlan_addr remote_ip; struct vxlan_metadata _md; struct vxlan_metadata *md = &_md; __be16 src_port = 0, dst_port; - u32 vni; + __be32 vni; __be16 df = 0; __u8 tos, ttl; int err; u32 flags = vxlan->flags; + bool udp_sum = false; + bool xnet = !net_eq(vxlan->net, dev_net(vxlan->dev)); info = skb_tunnel_info(skb); @@ -1946,6 +1899,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port; vni = rdst->remote_vni; dst = &rdst->remote_ip; + dst_cache = &rdst->dst_cache; } else { if (!info) { WARN_ONCE(1, "%s: Missing encapsulation instructions\n", @@ -1953,13 +1907,14 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, goto drop; } dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port; - vni = be64_to_cpu(info->key.tun_id); + vni = vxlan_tun_id_to_vni(info->key.tun_id); remote_ip.sa.sa_family = ip_tunnel_info_af(info); if (remote_ip.sa.sa_family == AF_INET) remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst; else remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst; dst = &remote_ip; + dst_cache = &info->dst_cache; } if (vxlan_addr_any(dst)) { @@ -1987,6 +1942,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, if (info) { ttl = info->key.ttl; tos = info->key.tos; + udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM); if (info->options_len) md = ip_tunnel_info_opts(info); @@ -1995,29 +1951,16 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } if (dst->sa.sa_family == AF_INET) { + __be32 saddr; + if (!vxlan->vn4_sock) goto drop; sk = vxlan->vn4_sock->sock->sk; - if (info) { - if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) - df = htons(IP_DF); - - if (info->key.tun_flags & TUNNEL_CSUM) - flags |= VXLAN_F_UDP_CSUM; - else - flags &= ~VXLAN_F_UDP_CSUM; - } - - memset(&fl4, 0, sizeof(fl4)); - fl4.flowi4_oif = rdst ? rdst->remote_ifindex : 0; - fl4.flowi4_tos = RT_TOS(tos); - fl4.flowi4_mark = skb->mark; - fl4.flowi4_proto = IPPROTO_UDP; - fl4.daddr = dst->sin.sin_addr.s_addr; - fl4.saddr = vxlan->cfg.saddr.sin.sin_addr.s_addr; - - rt = ip_route_output_key(vxlan->net, &fl4); + rt = vxlan_get_route(vxlan, skb, + rdst ? rdst->remote_ifindex : 0, tos, + dst->sin.sin_addr.s_addr, &saddr, + dst_cache, info); if (IS_ERR(rt)) { netdev_dbg(dev, "no route to %pI4\n", &dst->sin.sin_addr.s_addr); @@ -2047,18 +1990,21 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, return; } + if (!info) + udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX); + else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) + df = htons(IP_DF); + tos = ip_tunnel_ecn_encap(tos, old_iph, skb); ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); - err = vxlan_xmit_skb(rt, sk, skb, fl4.saddr, - dst->sin.sin_addr.s_addr, tos, ttl, df, - src_port, dst_port, htonl(vni << 8), md, - !net_eq(vxlan->net, dev_net(vxlan->dev)), - flags); - if (err < 0) { - /* skb is already freed. */ - skb = NULL; - goto rt_tx_error; - } + err = vxlan_build_skb(skb, &rt->dst, sizeof(struct iphdr), + vni, md, flags, udp_sum); + if (err < 0) + goto xmit_tx_error; + + udp_tunnel_xmit_skb(rt, sk, skb, saddr, + dst->sin.sin_addr.s_addr, tos, ttl, df, + src_port, dst_port, xnet, !udp_sum); #if IS_ENABLED(CONFIG_IPV6) } else { struct dst_entry *ndst; @@ -2071,7 +2017,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ndst = vxlan6_get_route(vxlan, skb, rdst ? rdst->remote_ifindex : 0, - &dst->sin6.sin6_addr, &saddr); + &dst->sin6.sin6_addr, &saddr, + dst_cache); if (IS_ERR(ndst)) { netdev_dbg(dev, "no route to %pI6\n", &dst->sin6.sin6_addr); @@ -2103,18 +2050,20 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, return; } - if (info) { - if (info->key.tun_flags & TUNNEL_CSUM) - flags &= ~VXLAN_F_UDP_ZERO_CSUM6_TX; - else - flags |= VXLAN_F_UDP_ZERO_CSUM6_TX; - } + if (!info) + udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX); ttl = ttl ? : ip6_dst_hoplimit(ndst); - err = vxlan6_xmit_skb(ndst, sk, skb, dev, &saddr, &dst->sin6.sin6_addr, - 0, ttl, src_port, dst_port, htonl(vni << 8), md, - !net_eq(vxlan->net, dev_net(vxlan->dev)), - flags); + skb_scrub_packet(skb, xnet); + err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr), + vni, md, flags, udp_sum); + if (err < 0) { + dst_release(ndst); + return; + } + udp_tunnel6_xmit_skb(ndst, sk, skb, dev, + &saddr, &dst->sin6.sin6_addr, + 0, ttl, src_port, dst_port, !udp_sum); #endif } @@ -2124,6 +2073,9 @@ drop: dev->stats.tx_dropped++; goto tx_free; +xmit_tx_error: + /* skb is already freed. */ + skb = NULL; rt_tx_error: ip_rt_put(rt); tx_error: @@ -2263,7 +2215,7 @@ static void vxlan_cleanup(unsigned long arg) static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan) { struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); - __u32 vni = vxlan->default_dst.remote_vni; + __be32 vni = vxlan->default_dst.remote_vni; spin_lock(&vn->sock_lock); hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni)); @@ -2406,31 +2358,6 @@ static int vxlan_change_mtu(struct net_device *dev, int new_mtu) return __vxlan_change_mtu(dev, lowerdev, dst, new_mtu, true); } -static int egress_ipv4_tun_info(struct net_device *dev, struct sk_buff *skb, - struct ip_tunnel_info *info, - __be16 sport, __be16 dport) -{ - struct vxlan_dev *vxlan = netdev_priv(dev); - struct rtable *rt; - struct flowi4 fl4; - - memset(&fl4, 0, sizeof(fl4)); - fl4.flowi4_tos = RT_TOS(info->key.tos); - fl4.flowi4_mark = skb->mark; - fl4.flowi4_proto = IPPROTO_UDP; - fl4.daddr = info->key.u.ipv4.dst; - - rt = ip_route_output_key(vxlan->net, &fl4); - if (IS_ERR(rt)) - return PTR_ERR(rt); - ip_rt_put(rt); - - info->key.u.ipv4.src = fl4.saddr; - info->key.tp_src = sport; - info->key.tp_dst = dport; - return 0; -} - static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) { struct vxlan_dev *vxlan = netdev_priv(dev); @@ -2442,9 +2369,16 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) dport = info->key.tp_dst ? : vxlan->cfg.dst_port; if (ip_tunnel_info_af(info) == AF_INET) { + struct rtable *rt; + if (!vxlan->vn4_sock) return -EINVAL; - return egress_ipv4_tun_info(dev, skb, info, sport, dport); + rt = vxlan_get_route(vxlan, skb, 0, info->key.tos, + info->key.u.ipv4.dst, + &info->key.u.ipv4.src, NULL, info); + if (IS_ERR(rt)) + return PTR_ERR(rt); + ip_rt_put(rt); } else { #if IS_ENABLED(CONFIG_IPV6) struct dst_entry *ndst; @@ -2453,17 +2387,16 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) return -EINVAL; ndst = vxlan6_get_route(vxlan, skb, 0, &info->key.u.ipv6.dst, - &info->key.u.ipv6.src); + &info->key.u.ipv6.src, NULL); if (IS_ERR(ndst)) return PTR_ERR(ndst); dst_release(ndst); - - info->key.tp_src = sport; - info->key.tp_dst = dport; #else /* !CONFIG_IPV6 */ return -EPFNOSUPPORT; #endif } + info->key.tp_src = sport; + info->key.tp_dst = dport; return 0; } @@ -2917,7 +2850,7 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, memset(&conf, 0, sizeof(conf)); if (data[IFLA_VXLAN_ID]) - conf.vni = nla_get_u32(data[IFLA_VXLAN_ID]); + conf.vni = cpu_to_be32(nla_get_u32(data[IFLA_VXLAN_ID])); if (data[IFLA_VXLAN_GROUP]) { conf.remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]); @@ -2985,8 +2918,9 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, if (data[IFLA_VXLAN_PORT]) conf.dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]); - if (data[IFLA_VXLAN_UDP_CSUM] && nla_get_u8(data[IFLA_VXLAN_UDP_CSUM])) - conf.flags |= VXLAN_F_UDP_CSUM; + if (data[IFLA_VXLAN_UDP_CSUM] && + !nla_get_u8(data[IFLA_VXLAN_UDP_CSUM])) + conf.flags |= VXLAN_F_UDP_ZERO_CSUM_TX; if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX] && nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX])) @@ -3021,7 +2955,7 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, break; case -EEXIST: - pr_info("duplicate VNI %u\n", conf.vni); + pr_info("duplicate VNI %u\n", be32_to_cpu(conf.vni)); break; } @@ -3079,7 +3013,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) .high = htons(vxlan->cfg.port_max), }; - if (nla_put_u32(skb, IFLA_VXLAN_ID, dst->remote_vni)) + if (nla_put_u32(skb, IFLA_VXLAN_ID, be32_to_cpu(dst->remote_vni))) goto nla_put_failure; if (!vxlan_addr_any(&dst->remote_ip)) { @@ -3130,7 +3064,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->cfg.addrmax) || nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->cfg.dst_port) || nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM, - !!(vxlan->flags & VXLAN_F_UDP_CSUM)) || + !(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM_TX)) || nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX, !!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) || nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX, diff --git a/drivers/net/wireless/broadcom/b43/main.c b/drivers/net/wireless/broadcom/b43/main.c index ec013fbd6a81..c279211e49f9 100644 --- a/drivers/net/wireless/broadcom/b43/main.c +++ b/drivers/net/wireless/broadcom/b43/main.c @@ -1215,10 +1215,10 @@ void b43_wireless_core_phy_pll_reset(struct b43_wldev *dev) case B43_BUS_BCMA: bcma_cc = &dev->dev->bdev->bus->drv_cc; - bcma_cc_write32(bcma_cc, BCMA_CC_CHIPCTL_ADDR, 0); - bcma_cc_mask32(bcma_cc, BCMA_CC_CHIPCTL_DATA, ~0x4); - bcma_cc_set32(bcma_cc, BCMA_CC_CHIPCTL_DATA, 0x4); - bcma_cc_mask32(bcma_cc, BCMA_CC_CHIPCTL_DATA, ~0x4); + bcma_cc_write32(bcma_cc, BCMA_CC_PMU_CHIPCTL_ADDR, 0); + bcma_cc_mask32(bcma_cc, BCMA_CC_PMU_CHIPCTL_DATA, ~0x4); + bcma_cc_set32(bcma_cc, BCMA_CC_PMU_CHIPCTL_DATA, 0x4); + bcma_cc_mask32(bcma_cc, BCMA_CC_PMU_CHIPCTL_DATA, ~0x4); break; #endif #ifdef CONFIG_B43_SSB diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 7b01e4ddb315..d00c5c1d58bf 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -247,7 +247,7 @@ static u16 chandef_to_chanspec(struct brcmu_d11inf *d11inf, brcmf_dbg(TRACE, "chandef: control %d center %d width %d\n", ch->chan->center_freq, ch->center_freq1, ch->width); ch_inf.chnum = ieee80211_frequency_to_channel(ch->center_freq1); - primary_offset = ch->center_freq1 - ch->chan->center_freq; + primary_offset = ch->chan->center_freq - ch->center_freq1; switch (ch->width) { case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_20_NOHT: @@ -256,24 +256,21 @@ static u16 chandef_to_chanspec(struct brcmu_d11inf *d11inf, break; case NL80211_CHAN_WIDTH_40: ch_inf.bw = BRCMU_CHAN_BW_40; - if (primary_offset < 0) + if (primary_offset > 0) ch_inf.sb = BRCMU_CHAN_SB_U; else ch_inf.sb = BRCMU_CHAN_SB_L; break; case NL80211_CHAN_WIDTH_80: ch_inf.bw = BRCMU_CHAN_BW_80; - if (primary_offset < 0) { - if (primary_offset < -CH_10MHZ_APART) - ch_inf.sb = BRCMU_CHAN_SB_UU; - else - ch_inf.sb = BRCMU_CHAN_SB_UL; - } else { - if (primary_offset > CH_10MHZ_APART) - ch_inf.sb = BRCMU_CHAN_SB_LL; - else - ch_inf.sb = BRCMU_CHAN_SB_LU; - } + if (primary_offset == -30) + ch_inf.sb = BRCMU_CHAN_SB_LL; + else if (primary_offset == -10) + ch_inf.sb = BRCMU_CHAN_SB_LU; + else if (primary_offset == 10) + ch_inf.sb = BRCMU_CHAN_SB_UL; + else + ch_inf.sb = BRCMU_CHAN_SB_UU; break; case NL80211_CHAN_WIDTH_80P80: case NL80211_CHAN_WIDTH_160: diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c index 82e4382eb177..0e8f2a079907 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c @@ -803,7 +803,14 @@ static int brcmf_chip_dmp_get_regaddr(struct brcmf_chip_priv *ci, u32 *eromaddr, *eromaddr -= 4; return -EFAULT; } - } while (desc != DMP_DESC_ADDRESS); + } while (desc != DMP_DESC_ADDRESS && + desc != DMP_DESC_COMPONENT); + + /* stop if we crossed current component border */ + if (desc == DMP_DESC_COMPONENT) { + *eromaddr -= 4; + return 0; + } /* skip upper 32-bit address descriptor */ if (val & DMP_DESC_ADDRSIZE_GT32) @@ -876,7 +883,8 @@ int brcmf_chip_dmp_erom_scan(struct brcmf_chip_priv *ci) rev = (val & DMP_COMP_REVISION) >> DMP_COMP_REVISION_S; /* need core with ports */ - if (nmw + nsw == 0) + if (nmw + nsw == 0 && + id != BCMA_CORE_PMU) continue; /* try to obtain register address info */ @@ -1006,6 +1014,7 @@ static int brcmf_chip_setup(struct brcmf_chip_priv *chip) { struct brcmf_chip *pub; struct brcmf_core_priv *cc; + struct brcmf_core *pmu; u32 base; u32 val; int ret = 0; @@ -1017,11 +1026,15 @@ static int brcmf_chip_setup(struct brcmf_chip_priv *chip) /* get chipcommon capabilites */ pub->cc_caps = chip->ops->read32(chip->ctx, CORE_CC_REG(base, capabilities)); + pub->cc_caps_ext = chip->ops->read32(chip->ctx, + CORE_CC_REG(base, + capabilities_ext)); /* get pmu caps & rev */ + pmu = brcmf_chip_get_pmu(pub); /* after reading cc_caps_ext */ if (pub->cc_caps & CC_CAP_PMU) { val = chip->ops->read32(chip->ctx, - CORE_CC_REG(base, pmucapabilities)); + CORE_CC_REG(pmu->base, pmucapabilities)); pub->pmurev = val & PCAP_REV_MASK; pub->pmucaps = val; } @@ -1120,6 +1133,23 @@ struct brcmf_core *brcmf_chip_get_chipcommon(struct brcmf_chip *pub) return &cc->pub; } +struct brcmf_core *brcmf_chip_get_pmu(struct brcmf_chip *pub) +{ + struct brcmf_core *cc = brcmf_chip_get_chipcommon(pub); + struct brcmf_core *pmu; + + /* See if there is separated PMU core available */ + if (cc->rev >= 35 && + pub->cc_caps_ext & BCMA_CC_CAP_EXT_AOB_PRESENT) { + pmu = brcmf_chip_get_core(pub, BCMA_CORE_PMU); + if (pmu) + return pmu; + } + + /* Fallback to ChipCommon core for older hardware */ + return cc; +} + bool brcmf_chip_iscoreup(struct brcmf_core *pub) { struct brcmf_core_priv *core; @@ -1290,6 +1320,7 @@ bool brcmf_chip_sr_capable(struct brcmf_chip *pub) { u32 base, addr, reg, pmu_cc3_mask = ~0; struct brcmf_chip_priv *chip; + struct brcmf_core *pmu = brcmf_chip_get_pmu(pub); brcmf_dbg(TRACE, "Enter\n"); @@ -1309,9 +1340,9 @@ bool brcmf_chip_sr_capable(struct brcmf_chip *pub) case BRCM_CC_4335_CHIP_ID: case BRCM_CC_4339_CHIP_ID: /* read PMU chipcontrol register 3 */ - addr = CORE_CC_REG(base, chipcontrol_addr); + addr = CORE_CC_REG(pmu->base, chipcontrol_addr); chip->ops->write32(chip->ctx, addr, 3); - addr = CORE_CC_REG(base, chipcontrol_data); + addr = CORE_CC_REG(pmu->base, chipcontrol_data); reg = chip->ops->read32(chip->ctx, addr); return (reg & pmu_cc3_mask) != 0; case BRCM_CC_43430_CHIP_ID: @@ -1319,12 +1350,12 @@ bool brcmf_chip_sr_capable(struct brcmf_chip *pub) reg = chip->ops->read32(chip->ctx, addr); return reg != 0; default: - addr = CORE_CC_REG(base, pmucapabilities_ext); + addr = CORE_CC_REG(pmu->base, pmucapabilities_ext); reg = chip->ops->read32(chip->ctx, addr); if ((reg & PCAPEXT_SR_SUPPORTED_MASK) == 0) return false; - addr = CORE_CC_REG(base, retention_ctl); + addr = CORE_CC_REG(pmu->base, retention_ctl); reg = chip->ops->read32(chip->ctx, addr); return (reg & (PMU_RCTL_MACPHY_DISABLE_MASK | PMU_RCTL_LOGIC_DISABLE_MASK)) == 0; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.h index f6b5feea23d2..dd0ec3eba6a9 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.h @@ -27,6 +27,7 @@ * @chip: chip identifier. * @chiprev: chip revision. * @cc_caps: chipcommon core capabilities. + * @cc_caps_ext: chipcommon core extended capabilities. * @pmucaps: PMU capabilities. * @pmurev: PMU revision. * @rambase: RAM base address (only applicable for ARM CR4 chips). @@ -38,6 +39,7 @@ struct brcmf_chip { u32 chip; u32 chiprev; u32 cc_caps; + u32 cc_caps_ext; u32 pmucaps; u32 pmurev; u32 rambase; @@ -83,6 +85,7 @@ struct brcmf_chip *brcmf_chip_attach(void *ctx, void brcmf_chip_detach(struct brcmf_chip *chip); struct brcmf_core *brcmf_chip_get_core(struct brcmf_chip *chip, u16 coreid); struct brcmf_core *brcmf_chip_get_chipcommon(struct brcmf_chip *chip); +struct brcmf_core *brcmf_chip_get_pmu(struct brcmf_chip *pub); bool brcmf_chip_iscoreup(struct brcmf_core *core); void brcmf_chip_coredisable(struct brcmf_core *core, u32 prereset, u32 reset); void brcmf_chip_resetcore(struct brcmf_core *core, u32 prereset, u32 reset, diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c index 1365c12b78fc..7269056d0044 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c @@ -93,7 +93,7 @@ static enum nvram_parser_state brcmf_nvram_handle_idle(struct nvram_parser *nvp) c = nvp->data[nvp->pos]; if (c == '\n') return COMMENT; - if (is_whitespace(c)) + if (is_whitespace(c) || c == '\0') goto proceed; if (c == '#') return COMMENT; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c index 0480b70e3eb8..d5f9ef470447 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c @@ -1951,6 +1951,9 @@ static const struct dev_pm_ops brcmf_pciedrvr_pm = { #define BRCMF_PCIE_DEVICE(dev_id) { BRCM_PCIE_VENDOR_ID_BROADCOM, dev_id,\ PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_OTHER << 8, 0xffff00, 0 } +#define BRCMF_PCIE_DEVICE_SUB(dev_id, subvend, subdev) { \ + BRCM_PCIE_VENDOR_ID_BROADCOM, dev_id,\ + subvend, subdev, PCI_CLASS_NETWORK_OTHER << 8, 0xffff00, 0 } static struct pci_device_id brcmf_pcie_devid_table[] = { BRCMF_PCIE_DEVICE(BRCM_PCIE_4350_DEVICE_ID), @@ -1966,6 +1969,7 @@ static struct pci_device_id brcmf_pcie_devid_table[] = { BRCMF_PCIE_DEVICE(BRCM_PCIE_4365_DEVICE_ID), BRCMF_PCIE_DEVICE(BRCM_PCIE_4365_2G_DEVICE_ID), BRCMF_PCIE_DEVICE(BRCM_PCIE_4365_5G_DEVICE_ID), + BRCMF_PCIE_DEVICE_SUB(0x4365, BRCM_PCIE_VENDOR_ID_BROADCOM, 0x4365), BRCMF_PCIE_DEVICE(BRCM_PCIE_4366_DEVICE_ID), BRCMF_PCIE_DEVICE(BRCM_PCIE_4366_2G_DEVICE_ID), BRCMF_PCIE_DEVICE(BRCM_PCIE_4366_5G_DEVICE_ID), diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index a14d9d9da094..c790fa89db05 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -45,8 +45,8 @@ #include "chip.h" #include "firmware.h" -#define DCMD_RESP_TIMEOUT msecs_to_jiffies(2000) -#define CTL_DONE_TIMEOUT msecs_to_jiffies(2000) +#define DCMD_RESP_TIMEOUT msecs_to_jiffies(2500) +#define CTL_DONE_TIMEOUT msecs_to_jiffies(2500) #ifdef DEBUG @@ -3615,7 +3615,6 @@ brcmf_sdio_drivestrengthinit(struct brcmf_sdio_dev *sdiodev, const struct sdiod_drive_str *str_tab = NULL; u32 str_mask; u32 str_shift; - u32 base; u32 i; u32 drivestrength_sel = 0; u32 cc_data_temp; @@ -3658,14 +3657,15 @@ brcmf_sdio_drivestrengthinit(struct brcmf_sdio_dev *sdiodev, } if (str_tab != NULL) { + struct brcmf_core *pmu = brcmf_chip_get_pmu(ci); + for (i = 0; str_tab[i].strength != 0; i++) { if (drivestrength >= str_tab[i].strength) { drivestrength_sel = str_tab[i].sel; break; } } - base = brcmf_chip_get_chipcommon(ci)->base; - addr = CORE_CC_REG(base, chipcontrol_addr); + addr = CORE_CC_REG(pmu->base, chipcontrol_addr); brcmf_sdiod_regwl(sdiodev, addr, 1, NULL); cc_data_temp = brcmf_sdiod_regrl(sdiodev, addr, NULL); cc_data_temp &= ~str_mask; @@ -3835,8 +3835,7 @@ brcmf_sdio_probe_attach(struct brcmf_sdio *bus) goto fail; /* set PMUControl so a backplane reset does PMU state reload */ - reg_addr = CORE_CC_REG(brcmf_chip_get_chipcommon(bus->ci)->base, - pmucontrol); + reg_addr = CORE_CC_REG(brcmf_chip_get_pmu(bus->ci)->base, pmucontrol); reg_val = brcmf_sdiod_regrl(bus->sdiodev, reg_addr, &err); if (err) goto fail; diff --git a/drivers/net/wireless/intel/iwlwifi/Kconfig b/drivers/net/wireless/intel/iwlwifi/Kconfig index 7438fbeef744..16c4f383488f 100644 --- a/drivers/net/wireless/intel/iwlwifi/Kconfig +++ b/drivers/net/wireless/intel/iwlwifi/Kconfig @@ -98,6 +98,18 @@ config IWLWIFI_UAPSD If unsure, say N. +config IWLWIFI_PCIE_RTPM + bool "Enable runtime power management mode for PCIe devices" + depends on IWLMVM && PM + default false + help + Say Y here to enable runtime power management for PCIe + devices. If enabled, the device will go into low power mode + when idle for a short period of time, allowing for improved + power saving during runtime. + + If unsure, say N. + menu "Debugging Options" config IWLWIFI_DEBUG diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/led.c b/drivers/net/wireless/intel/iwlwifi/dvm/led.c index 1aabb5ec096f..1bbd17ada974 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/led.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/led.c @@ -152,11 +152,14 @@ static void iwl_led_brightness_set(struct led_classdev *led_cdev, { struct iwl_priv *priv = container_of(led_cdev, struct iwl_priv, led); unsigned long on = 0; + unsigned long off = 0; if (brightness > 0) on = IWL_LED_SOLID; + else + off = IWL_LED_SOLID; - iwl_led_cmd(priv, on, 0); + iwl_led_cmd(priv, on, off); } static int iwl_led_blink_set(struct led_classdev *led_cdev, diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c index 29ea1c6705b4..4db4cb7aa73a 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c @@ -396,7 +396,7 @@ static int iwlagn_mac_suspend(struct ieee80211_hw *hw, iwl_write32(priv->trans, CSR_UCODE_DRV_GP1_SET, CSR_UCODE_DRV_GP1_BIT_D3_CFG_COMPLETE); - iwl_trans_d3_suspend(priv->trans, false); + iwl_trans_d3_suspend(priv->trans, false, true); goto out; @@ -469,7 +469,7 @@ static int iwlagn_mac_resume(struct ieee80211_hw *hw) /* we'll clear ctx->vif during iwlagn_prepare_restart() */ vif = ctx->vif; - ret = iwl_trans_d3_resume(priv->trans, &d3_status, false); + ret = iwl_trans_d3_resume(priv->trans, &d3_status, false, true); if (ret) goto out_unlock; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c index ecbf4822cd69..4b93404f46a7 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c @@ -138,7 +138,8 @@ static const struct iwl_tt_params iwl9000_tt_params = { .smem_offset = IWL9000_SMEM_OFFSET, \ .smem_len = IWL9000_SMEM_LEN, \ .thermal_params = &iwl9000_tt_params, \ - .apmg_not_supported = true + .apmg_not_supported = true, \ + .mq_rx_supported = true const struct iwl_cfg iwl9260_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 9260", diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index f99048135fb9..dad5570d6cc8 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -311,6 +311,7 @@ struct iwl_pwr_tx_backoff { * @dccm2_len: length of the second DCCM * @smem_offset: offset from which the SMEM begins * @smem_len: the length of SMEM + * @mq_rx_supported: multi-queue rx support * * We enable the driver to be backward compatible wrt. hardware features. * API differences in uCode shouldn't be handled here but through TLVs @@ -362,6 +363,7 @@ struct iwl_cfg { const u32 smem_len; const struct iwl_tt_params *thermal_params; bool apmg_not_supported; + bool mq_rx_supported; }; /* diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fh.h b/drivers/net/wireless/intel/iwlwifi/iwl-fh.h index 5cc6be927eab..4ab6682ea53e 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-fh.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-fh.h @@ -6,6 +6,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved. + * Copyright(c) 2015 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -31,6 +32,7 @@ * BSD LICENSE * * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved. + * Copyright(c) 2015 Intel Deutschland GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -312,6 +314,77 @@ static inline unsigned int FH_MEM_CBBC_QUEUE(unsigned int chnl) #define FH_MEM_TFDIB_REG1_ADDR_BITSHIFT 28 #define FH_MEM_TB_MAX_LENGTH (0x00020000) +/* 9000 rx series registers */ + +#define RFH_Q0_FRBDCB_BA_LSB 0xA08000 /* 64 bit address */ +#define RFH_Q_FRBDCB_BA_LSB(q) (RFH_Q0_FRBDCB_BA_LSB + (q) * 8) +/* Write index table */ +#define RFH_Q0_FRBDCB_WIDX 0xA08080 +#define RFH_Q_FRBDCB_WIDX(q) (RFH_Q0_FRBDCB_WIDX + (q) * 4) +/* Read index table */ +#define RFH_Q0_FRBDCB_RIDX 0xA080C0 +#define RFH_Q_FRBDCB_RIDX(q) (RFH_Q0_FRBDCB_RIDX + (q) * 4) +/* Used list table */ +#define RFH_Q0_URBDCB_BA_LSB 0xA08100 /* 64 bit address */ +#define RFH_Q_URBDCB_BA_LSB(q) (RFH_Q0_URBDCB_BA_LSB + (q) * 8) +/* Write index table */ +#define RFH_Q0_URBDCB_WIDX 0xA08180 +#define RFH_Q_URBDCB_WIDX(q) (RFH_Q0_URBDCB_WIDX + (q) * 4) +#define RFH_Q0_URBDCB_VAID 0xA081C0 +#define RFH_Q_URBDCB_VAID(q) (RFH_Q0_URBDCB_VAID + (q) * 4) +/* stts */ +#define RFH_Q0_URBD_STTS_WPTR_LSB 0xA08200 /*64 bits address */ +#define RFH_Q_URBD_STTS_WPTR_LSB(q) (RFH_Q0_URBD_STTS_WPTR_LSB + (q) * 8) + +#define RFH_Q0_ORB_WPTR_LSB 0xA08280 +#define RFH_Q_ORB_WPTR_LSB(q) (RFH_Q0_ORB_WPTR_LSB + (q) * 8) +#define RFH_RBDBUF_RBD0_LSB 0xA08300 +#define RFH_RBDBUF_RBD_LSB(q) (RFH_RBDBUF_RBD0_LSB + (q) * 8) + +/* DMA configuration */ +#define RFH_RXF_DMA_CFG 0xA09820 +/* RB size */ +#define RFH_RXF_DMA_RB_SIZE_MASK (0x000F0000) /* bits 16-19 */ +#define RFH_RXF_DMA_RB_SIZE_POS 16 +#define RFH_RXF_DMA_RB_SIZE_1K (0x1 << RFH_RXF_DMA_RB_SIZE_POS) +#define RFH_RXF_DMA_RB_SIZE_2K (0x2 << RFH_RXF_DMA_RB_SIZE_POS) +#define RFH_RXF_DMA_RB_SIZE_4K (0x4 << RFH_RXF_DMA_RB_SIZE_POS) +#define RFH_RXF_DMA_RB_SIZE_8K (0x8 << RFH_RXF_DMA_RB_SIZE_POS) +#define RFH_RXF_DMA_RB_SIZE_12K (0x9 << RFH_RXF_DMA_RB_SIZE_POS) +#define RFH_RXF_DMA_RB_SIZE_16K (0xA << RFH_RXF_DMA_RB_SIZE_POS) +#define RFH_RXF_DMA_RB_SIZE_20K (0xB << RFH_RXF_DMA_RB_SIZE_POS) +#define RFH_RXF_DMA_RB_SIZE_24K (0xC << RFH_RXF_DMA_RB_SIZE_POS) +#define RFH_RXF_DMA_RB_SIZE_28K (0xD << RFH_RXF_DMA_RB_SIZE_POS) +#define RFH_RXF_DMA_RB_SIZE_32K (0xE << RFH_RXF_DMA_RB_SIZE_POS) +/* RB Circular Buffer size:defines the table sizes in RBD units */ +#define RFH_RXF_DMA_RBDCB_SIZE_MASK (0x00F00000) /* bits 20-23 */ +#define RFH_RXF_DMA_RBDCB_SIZE_POS 20 +#define RFH_RXF_DMA_RBDCB_SIZE_8 (0x3 << RFH_RXF_DMA_RBDCB_SIZE_POS) +#define RFH_RXF_DMA_RBDCB_SIZE_16 (0x4 << RFH_RXF_DMA_RBDCB_SIZE_POS) +#define RFH_RXF_DMA_RBDCB_SIZE_32 (0x5 << RFH_RXF_DMA_RBDCB_SIZE_POS) +#define RFH_RXF_DMA_RBDCB_SIZE_64 (0x7 << RFH_RXF_DMA_RBDCB_SIZE_POS) +#define RFH_RXF_DMA_RBDCB_SIZE_128 (0x7 << RFH_RXF_DMA_RBDCB_SIZE_POS) +#define RFH_RXF_DMA_RBDCB_SIZE_256 (0x8 << RFH_RXF_DMA_RBDCB_SIZE_POS) +#define RFH_RXF_DMA_RBDCB_SIZE_512 (0x9 << RFH_RXF_DMA_RBDCB_SIZE_POS) +#define RFH_RXF_DMA_RBDCB_SIZE_1024 (0xA << RFH_RXF_DMA_RBDCB_SIZE_POS) +#define RFH_RXF_DMA_RBDCB_SIZE_2048 (0xB << RFH_RXF_DMA_RBDCB_SIZE_POS) +#define RFH_RXF_DMA_MIN_RB_SIZE_MASK (0x03000000) /* bit 24-25 */ +#define RFH_RXF_DMA_MIN_RB_SIZE_POS 24 +#define RFH_RXF_DMA_MIN_RB_4_8 (3 << RFH_RXF_DMA_MIN_RB_SIZE_POS) +#define RFH_RXF_DMA_SINGLE_FRAME_MASK (0x20000000) /* bit 29 */ +#define RFH_DMA_EN_MASK (0xC0000000) /* bits 30-31*/ +#define RFH_DMA_EN_ENABLE_VAL BIT(31) + +#define RFH_RXF_RXQ_ACTIVE 0xA0980C + +#define RFH_GEN_CFG 0xA09800 +#define RFH_GEN_CFG_DEFAULT_RXQ_NUM_MASK 0xF00 +#define RFH_GEN_CFG_SERVICE_DMA_SNOOP BIT(0) +#define RFH_GEN_CFG_RFH_DMA_SNOOP BIT(1) +#define DEFAULT_RXQ_NUM 8 + +/* end of 9000 rx series registers */ + /* TFDB Area - TFDs buffer table */ #define FH_MEM_TFDIB_DRAM_ADDR_LSB_MSK (0xFFFFFFFF) #define FH_TFDIB_LOWER_BOUND (FH_MEM_LOWER_BOUND + 0x900) @@ -434,6 +507,10 @@ static inline unsigned int FH_MEM_CBBC_QUEUE(unsigned int chnl) */ #define FH_TX_CHICKEN_BITS_SCD_AUTO_RETRY_EN (0x00000002) +#define MQ_RX_TABLE_SIZE 512 +#define MQ_RX_TABLE_MASK (MQ_RX_TABLE_SIZE - 1) +#define MQ_RX_POOL_SIZE MQ_RX_TABLE_MASK + #define RX_QUEUE_SIZE 256 #define RX_QUEUE_MASK 255 #define RX_QUEUE_SIZE_LOG 8 diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fw-error-dump.h b/drivers/net/wireless/intel/iwlwifi/iwl-fw-error-dump.h index a5aaf6853704..8425e1a587d9 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-fw-error-dump.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-fw-error-dump.h @@ -293,6 +293,8 @@ iwl_fw_error_next_data(struct iwl_fw_error_dump_data *data) * @FW_DBG_TX_LATENCY: trigger log collection when the tx latency goes above a * threshold. * @FW_DBG_TDLS: trigger log collection upon TDLS related events. + * @FW_DBG_TRIGGER_TX_STATUS: trigger log collection upon tx status when + * the firmware sends a tx reply. */ enum iwl_fw_dbg_trigger { FW_DBG_TRIGGER_INVALID = 0, @@ -309,6 +311,7 @@ enum iwl_fw_dbg_trigger { FW_DBG_TRIGGER_BA, FW_DBG_TRIGGER_TX_LATENCY, FW_DBG_TRIGGER_TDLS, + FW_DBG_TRIGGER_TX_STATUS, /* must be last */ FW_DBG_TRIGGER_MAX, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h b/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h index 84f8aeb926c8..e2dbc67a367b 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h @@ -297,10 +297,12 @@ typedef unsigned int __bitwise__ iwl_ucode_tlv_capa_t; * @IWL_UCODE_TLV_CAPA_DQA_SUPPORT: supports dynamic queue allocation (DQA), * which also implies support for the scheduler configuration command * @IWL_UCODE_TLV_CAPA_TDLS_CHANNEL_SWITCH: supports TDLS channel switching + * @IWL_UCODE_TLV_CAPA_CNSLDTD_D3_D0_IMG: Consolidated D3-D0 image * @IWL_UCODE_TLV_CAPA_HOTSPOT_SUPPORT: supports Hot Spot Command * @IWL_UCODE_TLV_CAPA_DC2DC_SUPPORT: supports DC2DC Command * @IWL_UCODE_TLV_CAPA_CSUM_SUPPORT: supports TCP Checksum Offload * @IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS: support radio and beacon statistics + * @IWL_UCODE_TLV_CAPA_P2P_STANDALONE_UAPSD: support p2p standalone U-APSD * @IWL_UCODE_TLV_CAPA_BT_COEX_PLCR: enabled BT Coex packet level co-running * @IWL_UCODE_TLV_CAPA_LAR_MULTI_MCC: ucode supports LAR updates with different * sources for the MCC. This TLV bit is a future replacement to @@ -313,6 +315,8 @@ typedef unsigned int __bitwise__ iwl_ucode_tlv_capa_t; * @IWL_UCODE_TLV_CAPA_BT_MPLUT_SUPPORT: supports bt-coex Multi-priority LUT * @IWL_UCODE_TLV_CAPA_BEACON_ANT_SELECTION: firmware will decide on what * antenna the beacon should be transmitted + * @IWL_UCODE_TLV_CAPA_BEACON_STORING: firmware will store the latest beacon + * from AP and will send it upon d0i3 exit. * @IWL_UCODE_TLV_CAPA_LAR_SUPPORT_V2: support LAR API V2 * * @NUM_IWL_UCODE_TLV_CAPA: number of bits used @@ -330,10 +334,12 @@ enum iwl_ucode_tlv_capa { IWL_UCODE_TLV_CAPA_QUIET_PERIOD_SUPPORT = (__force iwl_ucode_tlv_capa_t)11, IWL_UCODE_TLV_CAPA_DQA_SUPPORT = (__force iwl_ucode_tlv_capa_t)12, IWL_UCODE_TLV_CAPA_TDLS_CHANNEL_SWITCH = (__force iwl_ucode_tlv_capa_t)13, + IWL_UCODE_TLV_CAPA_CNSLDTD_D3_D0_IMG = (__force iwl_ucode_tlv_capa_t)17, IWL_UCODE_TLV_CAPA_HOTSPOT_SUPPORT = (__force iwl_ucode_tlv_capa_t)18, IWL_UCODE_TLV_CAPA_DC2DC_CONFIG_SUPPORT = (__force iwl_ucode_tlv_capa_t)19, IWL_UCODE_TLV_CAPA_CSUM_SUPPORT = (__force iwl_ucode_tlv_capa_t)21, IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS = (__force iwl_ucode_tlv_capa_t)22, + IWL_UCODE_TLV_CAPA_P2P_STANDALONE_UAPSD = (__force iwl_ucode_tlv_capa_t)26, IWL_UCODE_TLV_CAPA_BT_COEX_PLCR = (__force iwl_ucode_tlv_capa_t)28, IWL_UCODE_TLV_CAPA_LAR_MULTI_MCC = (__force iwl_ucode_tlv_capa_t)29, IWL_UCODE_TLV_CAPA_BT_COEX_RRC = (__force iwl_ucode_tlv_capa_t)30, @@ -341,7 +347,9 @@ enum iwl_ucode_tlv_capa { IWL_UCODE_TLV_CAPA_EXTENDED_DTS_MEASURE = (__force iwl_ucode_tlv_capa_t)64, IWL_UCODE_TLV_CAPA_SHORT_PM_TIMEOUTS = (__force iwl_ucode_tlv_capa_t)65, IWL_UCODE_TLV_CAPA_BT_MPLUT_SUPPORT = (__force iwl_ucode_tlv_capa_t)67, + IWL_UCODE_TLV_CAPA_MULTI_QUEUE_RX_SUPPORT = (__force iwl_ucode_tlv_capa_t)68, IWL_UCODE_TLV_CAPA_BEACON_ANT_SELECTION = (__force iwl_ucode_tlv_capa_t)71, + IWL_UCODE_TLV_CAPA_BEACON_STORING = (__force iwl_ucode_tlv_capa_t)72, IWL_UCODE_TLV_CAPA_LAR_SUPPORT_V2 = (__force iwl_ucode_tlv_capa_t)73, NUM_IWL_UCODE_TLV_CAPA @@ -748,6 +756,19 @@ struct iwl_fw_dbg_trigger_tdls { } __packed; /** + * struct iwl_fw_dbg_trigger_tx_status - configures trigger for tx response + * status. + * @statuses: the list of statuses to trigger the collection on + */ +struct iwl_fw_dbg_trigger_tx_status { + struct tx_status { + u8 status; + u8 reserved[3]; + } __packed statuses[16]; + __le32 reserved[2]; +} __packed; + +/** * struct iwl_fw_dbg_conf_tlv - a TLV that describes a debug configuration. * @id: conf id * @usniffer: should the uSniffer image be used diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h b/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h index fd42f63f5e84..b88ecc7892a9 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h @@ -108,6 +108,8 @@ enum iwl_amsdu_size { * @power_level: power level, default = 1 * @debug_level: levels are IWL_DL_* * @ant_coupling: antenna coupling in dB, default = 0 + * @nvm_file: specifies a external NVM file + * @uapsd_disable: disable U-APSD, default = 1 * @d0i3_disable: disable d0i3, default = 1, * @d0i3_entry_delay: time to wait after no refs are taken before * entering D0i3 (in msecs) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 7b89bfc8c8ac..50f4cc60cf3e 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -539,7 +539,7 @@ static void iwl_set_hw_address_family_8000(struct device *dev, struct iwl_nvm_data *data, const __le16 *mac_override, const __le16 *nvm_hw, - u32 mac_addr0, u32 mac_addr1) + __le32 mac_addr0, __le32 mac_addr1) { const u8 *hw_addr; @@ -583,7 +583,8 @@ static void iwl_set_hw_address_family_8000(struct device *dev, if (!is_valid_ether_addr(data->hw_addr)) IWL_ERR_DEV(dev, - "mac address from hw section is not valid\n"); + "mac address (%pM) from hw section is not valid\n", + data->hw_addr); return; } @@ -597,7 +598,7 @@ iwl_parse_nvm_data(struct device *dev, const struct iwl_cfg *cfg, const __le16 *nvm_calib, const __le16 *regulatory, const __le16 *mac_override, const __le16 *phy_sku, u8 tx_chains, u8 rx_chains, bool lar_fw_supported, - u32 mac_addr0, u32 mac_addr1) + __le32 mac_addr0, __le32 mac_addr1) { struct iwl_nvm_data *data; u32 sku; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h index 92466ee72806..4e8e0dc474d4 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h @@ -79,7 +79,7 @@ iwl_parse_nvm_data(struct device *dev, const struct iwl_cfg *cfg, const __le16 *nvm_calib, const __le16 *regulatory, const __le16 *mac_override, const __le16 *phy_sku, u8 tx_chains, u8 rx_chains, bool lar_fw_supported, - u32 mac_addr0, u32 mac_addr1); + __le32 mac_addr0, __le32 mac_addr1); /** * iwl_parse_mcc_info - parse MCC (mobile country code) info coming from FW diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index 82fb3a97a46d..0ca0f13b69b0 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -506,7 +506,7 @@ struct iwl_trans_config { bool sw_csum_tx; const struct iwl_hcmd_arr *command_groups; int command_groups_size; - + u32 sdio_adma_addr; }; @@ -618,9 +618,9 @@ struct iwl_trans_ops { void (*fw_alive)(struct iwl_trans *trans, u32 scd_addr); void (*stop_device)(struct iwl_trans *trans, bool low_power); - void (*d3_suspend)(struct iwl_trans *trans, bool test); + void (*d3_suspend)(struct iwl_trans *trans, bool test, bool reset); int (*d3_resume)(struct iwl_trans *trans, enum iwl_d3_status *status, - bool test); + bool test, bool reset); int (*send_cmd)(struct iwl_trans *trans, struct iwl_host_cmd *cmd); @@ -736,6 +736,11 @@ enum iwl_plat_pm_mode { IWL_PLAT_PM_MODE_D0I3, }; +/* Max time to wait for trans to become idle/non-idle on d0i3 + * enter/exit (in msecs). + */ +#define IWL_TRANS_IDLE_TIMEOUT 2000 + /** * struct iwl_trans - transport common data * @@ -920,22 +925,23 @@ static inline void iwl_trans_stop_device(struct iwl_trans *trans) _iwl_trans_stop_device(trans, true); } -static inline void iwl_trans_d3_suspend(struct iwl_trans *trans, bool test) +static inline void iwl_trans_d3_suspend(struct iwl_trans *trans, bool test, + bool reset) { might_sleep(); if (trans->ops->d3_suspend) - trans->ops->d3_suspend(trans, test); + trans->ops->d3_suspend(trans, test, reset); } static inline int iwl_trans_d3_resume(struct iwl_trans *trans, enum iwl_d3_status *status, - bool test) + bool test, bool reset) { might_sleep(); if (!trans->ops->d3_resume) return 0; - return trans->ops->d3_resume(trans, status, test); + return trans->ops->d3_resume(trans, status, test, reset); } static inline void iwl_trans_ref(struct iwl_trans *trans) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/constants.h b/drivers/net/wireless/intel/iwlwifi/mvm/constants.h index b00c03fcd447..4b560e4417ee 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/constants.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/constants.h @@ -6,7 +6,8 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2013 - 2014 Intel Corporation. All rights reserved. - * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH + * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2015 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -32,7 +33,8 @@ * BSD LICENSE * * Copyright(c) 2013 - 2014 Intel Corporation. All rights reserved. - * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH + * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2015 Intel Deutschland GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -73,6 +75,7 @@ #define IWL_MVM_WOWLAN_PS_RX_DATA_TIMEOUT (10 * USEC_PER_MSEC) #define IWL_MVM_SHORT_PS_TX_DATA_TIMEOUT (2 * 1024) /* defined in TU */ #define IWL_MVM_SHORT_PS_RX_DATA_TIMEOUT (40 * 1024) /* defined in TU */ +#define IWL_MVM_P2P_UAPSD_STANDALONE 0 #define IWL_MVM_P2P_LOWLATENCY_PS_ENABLE 0 #define IWL_MVM_UAPSD_RX_DATA_TIMEOUT (50 * USEC_PER_MSEC) #define IWL_MVM_UAPSD_TX_DATA_TIMEOUT (50 * USEC_PER_MSEC) @@ -107,6 +110,7 @@ #define IWL_MVM_RS_80_20_FAR_RANGE_TWEAK 1 #define IWL_MVM_TOF_IS_RESPONDER 0 #define IWL_MVM_SW_TX_CSUM_OFFLOAD 0 +#define IWL_MVM_COLLECT_FW_ERR_DUMP 1 #define IWL_MVM_RS_NUM_TRY_BEFORE_ANT_TOGGLE 1 #define IWL_MVM_RS_HT_VHT_RETRIES_PER_RATE 2 #define IWL_MVM_RS_HT_VHT_RETRIES_PER_RATE_TW 1 diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index d3e21d95cece..346376187ef8 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -7,6 +7,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -33,6 +34,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -851,7 +853,8 @@ iwl_mvm_get_wowlan_config(struct iwl_mvm *mvm, wowlan_config_cmd->is_11n_connection = ap_sta->ht_cap.ht_supported; wowlan_config_cmd->flags = ENABLE_L3_FILTERING | - ENABLE_NBNS_FILTERING | ENABLE_DHCP_FILTERING; + ENABLE_NBNS_FILTERING | ENABLE_DHCP_FILTERING | + ENABLE_STORE_BEACON; /* Query the last used seqno and set it */ ret = iwl_mvm_get_last_nonqos_seq(mvm, vif); @@ -1023,14 +1026,18 @@ iwl_mvm_wowlan_config(struct iwl_mvm *mvm, struct ieee80211_sta *ap_sta) { int ret; + bool unified_image = fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_CNSLDTD_D3_D0_IMG); - ret = iwl_mvm_switch_to_d3(mvm); - if (ret) - return ret; + if (!unified_image) { + ret = iwl_mvm_switch_to_d3(mvm); + if (ret) + return ret; - ret = iwl_mvm_d3_reprogram(mvm, vif, ap_sta); - if (ret) - return ret; + ret = iwl_mvm_d3_reprogram(mvm, vif, ap_sta); + if (ret) + return ret; + } if (!iwlwifi_mod_params.sw_crypto) { /* @@ -1072,10 +1079,14 @@ iwl_mvm_netdetect_config(struct iwl_mvm *mvm, { struct iwl_wowlan_config_cmd wowlan_config_cmd = {}; int ret; + bool unified_image = fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_CNSLDTD_D3_D0_IMG); - ret = iwl_mvm_switch_to_d3(mvm); - if (ret) - return ret; + if (!unified_image) { + ret = iwl_mvm_switch_to_d3(mvm); + if (ret) + return ret; + } /* rfkill release can be either for wowlan or netdetect */ if (wowlan->rfkill_release) @@ -1151,6 +1162,8 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw, }; int ret; int len __maybe_unused; + bool unified_image = fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_CNSLDTD_D3_D0_IMG); if (!wowlan) { /* @@ -1236,7 +1249,7 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw, clear_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status); - iwl_trans_d3_suspend(mvm->trans, test); + iwl_trans_d3_suspend(mvm->trans, test, !unified_image); out: if (ret < 0) { iwl_mvm_ref(mvm, IWL_MVM_REF_UCODE_DOWN); @@ -1299,7 +1312,7 @@ int iwl_mvm_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) __set_bit(D0I3_DEFER_WAKEUP, &mvm->d0i3_suspend_flags); mutex_unlock(&mvm->d0i3_suspend_mutex); - iwl_trans_d3_suspend(trans, false); + iwl_trans_d3_suspend(trans, false, false); return 0; } @@ -2041,9 +2054,14 @@ static void iwl_mvm_d3_disconnect_iter(void *data, u8 *mac, static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test) { struct ieee80211_vif *vif = NULL; - int ret; + int ret = 1; enum iwl_d3_status d3_status; bool keep = false; + bool unified_image = fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_CNSLDTD_D3_D0_IMG); + + u32 flags = CMD_ASYNC | CMD_HIGH_PRIO | CMD_SEND_IN_IDLE | + CMD_WAKE_UP_TRANS; mutex_lock(&mvm->mutex); @@ -2052,7 +2070,7 @@ static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test) if (IS_ERR_OR_NULL(vif)) goto err; - ret = iwl_trans_d3_resume(mvm->trans, &d3_status, test); + ret = iwl_trans_d3_resume(mvm->trans, &d3_status, test, !unified_image); if (ret) goto err; @@ -2095,17 +2113,28 @@ out_iterate: iwl_mvm_d3_disconnect_iter, keep ? vif : NULL); out: - /* return 1 to reconfigure the device */ + if (unified_image && !ret) { + ret = iwl_mvm_send_cmd_pdu(mvm, D0I3_END_CMD, flags, 0, NULL); + if (!ret) /* D3 ended successfully - no need to reset device */ + return 0; + } + + /* + * Reconfigure the device in one of the following cases: + * 1. We are not using a unified image + * 2. We are using a unified image but had an error while exiting D3 + */ set_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status); set_bit(IWL_MVM_STATUS_D3_RECONFIG, &mvm->status); - - /* We always return 1, which causes mac80211 to do a reconfig - * with IEEE80211_RECONFIG_TYPE_RESTART. This type of - * reconfig calls iwl_mvm_restart_complete(), where we unref - * the IWL_MVM_REF_UCODE_DOWN, so we need to take the - * reference here. + /* + * When switching images we return 1, which causes mac80211 + * to do a reconfig with IEEE80211_RECONFIG_TYPE_RESTART. + * This type of reconfig calls iwl_mvm_restart_complete(), + * where we unref the IWL_MVM_REF_UCODE_DOWN, so we need + * to take the reference here. */ iwl_mvm_ref(mvm, IWL_MVM_REF_UCODE_DOWN); + return 1; } @@ -2122,7 +2151,7 @@ static int iwl_mvm_resume_d0i3(struct iwl_mvm *mvm) enum iwl_d3_status d3_status; struct iwl_trans *trans = mvm->trans; - iwl_trans_d3_resume(trans, &d3_status, false); + iwl_trans_d3_resume(trans, &d3_status, false, false); /* * make sure to clear D0I3_DEFER_WAKEUP before diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c index 9e0d46368cdd..14004456bf55 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c @@ -7,6 +7,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -33,6 +34,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -1255,6 +1257,7 @@ static ssize_t iwl_dbgfs_low_latency_write(struct ieee80211_vif *vif, char *buf, { struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); struct iwl_mvm *mvm = mvmvif->mvm; + bool prev; u8 value; int ret; @@ -1265,7 +1268,9 @@ static ssize_t iwl_dbgfs_low_latency_write(struct ieee80211_vif *vif, char *buf, return -EINVAL; mutex_lock(&mvm->mutex); - iwl_mvm_update_low_latency(mvm, vif, value); + prev = iwl_mvm_vif_low_latency(mvmvif); + mvmvif->low_latency_dbgfs = value; + iwl_mvm_update_low_latency(mvm, vif, prev); mutex_unlock(&mvm->mutex); return count; @@ -1277,11 +1282,15 @@ static ssize_t iwl_dbgfs_low_latency_read(struct file *file, { struct ieee80211_vif *vif = file->private_data; struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - char buf[2]; + char buf[30] = {}; + int len; - buf[0] = mvmvif->low_latency ? '1' : '0'; - buf[1] = '\n'; - return simple_read_from_buffer(user_buf, count, ppos, buf, sizeof(buf)); + len = snprintf(buf, sizeof(buf) - 1, + "traffic=%d\ndbgfs=%d\nvcmd=%d\n", + mvmvif->low_latency_traffic, + mvmvif->low_latency_dbgfs, + mvmvif->low_latency_vcmd); + return simple_read_from_buffer(user_buf, count, ppos, buf, len); } static ssize_t iwl_dbgfs_uapsd_misbehaving_read(struct file *file, @@ -1363,6 +1372,59 @@ static ssize_t iwl_dbgfs_rx_phyinfo_read(struct file *file, return simple_read_from_buffer(user_buf, count, ppos, buf, sizeof(buf)); } +static void iwl_dbgfs_quota_check(void *data, u8 *mac, + struct ieee80211_vif *vif) +{ + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + int *ret = data; + + if (mvmvif->dbgfs_quota_min) + *ret = -EINVAL; +} + +static ssize_t iwl_dbgfs_quota_min_write(struct ieee80211_vif *vif, char *buf, + size_t count, loff_t *ppos) +{ + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + struct iwl_mvm *mvm = mvmvif->mvm; + u16 value; + int ret; + + ret = kstrtou16(buf, 0, &value); + if (ret) + return ret; + + if (value > 95) + return -EINVAL; + + mutex_lock(&mvm->mutex); + + mvmvif->dbgfs_quota_min = 0; + ieee80211_iterate_interfaces(mvm->hw, IEEE80211_IFACE_ITER_NORMAL, + iwl_dbgfs_quota_check, &ret); + if (ret == 0) { + mvmvif->dbgfs_quota_min = value; + iwl_mvm_update_quotas(mvm, false, NULL); + } + mutex_unlock(&mvm->mutex); + + return ret ?: count; +} + +static ssize_t iwl_dbgfs_quota_min_read(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct ieee80211_vif *vif = file->private_data; + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + char buf[10]; + int len; + + len = snprintf(buf, sizeof(buf), "%d\n", mvmvif->dbgfs_quota_min); + + return simple_read_from_buffer(user_buf, count, ppos, buf, len); +} + #define MVM_DEBUGFS_WRITE_FILE_OPS(name, bufsz) \ _MVM_DEBUGFS_WRITE_FILE_OPS(name, bufsz, struct ieee80211_vif) #define MVM_DEBUGFS_READ_WRITE_FILE_OPS(name, bufsz) \ @@ -1386,6 +1448,7 @@ MVM_DEBUGFS_READ_WRITE_FILE_OPS(tof_range_req_ext, 32); MVM_DEBUGFS_READ_WRITE_FILE_OPS(tof_range_abort, 32); MVM_DEBUGFS_READ_FILE_OPS(tof_range_response); MVM_DEBUGFS_READ_WRITE_FILE_OPS(tof_responder_params, 32); +MVM_DEBUGFS_READ_WRITE_FILE_OPS(quota_min, 32); void iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif) { @@ -1423,6 +1486,8 @@ void iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif) S_IRUSR | S_IWUSR); MVM_DEBUGFS_ADD_FILE_VIF(rx_phyinfo, mvmvif->dbgfs_dir, S_IRUSR | S_IWUSR); + MVM_DEBUGFS_ADD_FILE_VIF(quota_min, mvmvif->dbgfs_dir, + S_IRUSR | S_IWUSR); if (vif->type == NL80211_IFTYPE_STATION && !vif->p2p && mvmvif == mvm->bf_allowed_vif) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c index 90500e2d107b..c529e5355803 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c @@ -7,6 +7,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -261,17 +262,18 @@ static ssize_t iwl_dbgfs_nic_temp_read(struct file *file, { struct iwl_mvm *mvm = file->private_data; char buf[16]; - int pos, temp; + int pos, ret; + s32 temp; if (!mvm->ucode_loaded) return -EIO; mutex_lock(&mvm->mutex); - temp = iwl_mvm_get_temp(mvm); + ret = iwl_mvm_get_temp(mvm, &temp); mutex_unlock(&mvm->mutex); - if (temp < 0) - return temp; + if (ret) + return -EIO; pos = scnprintf(buf , sizeof(buf), "%d\n", temp); @@ -942,6 +944,47 @@ iwl_dbgfs_scan_ant_rxchain_write(struct iwl_mvm *mvm, char *buf, return count; } +static ssize_t iwl_dbgfs_indirection_tbl_write(struct iwl_mvm *mvm, + char *buf, size_t count, + loff_t *ppos) +{ + struct iwl_rss_config_cmd cmd = { + .flags = cpu_to_le32(IWL_RSS_ENABLE), + .hash_mask = IWL_RSS_HASH_TYPE_IPV4_TCP | + IWL_RSS_HASH_TYPE_IPV4_PAYLOAD | + IWL_RSS_HASH_TYPE_IPV6_TCP | + IWL_RSS_HASH_TYPE_IPV6_PAYLOAD, + }; + int ret, i, num_repeats, nbytes = count / 2; + + ret = hex2bin(cmd.indirection_table, buf, nbytes); + if (ret) + return ret; + + /* + * The input is the redirection table, partial or full. + * Repeat the pattern if needed. + * For example, input of 01020F will be repeated 42 times, + * indirecting RSS hash results to queues 1, 2, 15 (skipping + * queues 3 - 14). + */ + num_repeats = ARRAY_SIZE(cmd.indirection_table) / nbytes; + for (i = 1; i < num_repeats; i++) + memcpy(&cmd.indirection_table[i * nbytes], + cmd.indirection_table, nbytes); + /* handle cut in the middle pattern for the last places */ + memcpy(&cmd.indirection_table[i * nbytes], cmd.indirection_table, + ARRAY_SIZE(cmd.indirection_table) % nbytes); + + memcpy(cmd.secret_key, mvm->secret_key, ARRAY_SIZE(cmd.secret_key)); + + mutex_lock(&mvm->mutex); + ret = iwl_mvm_send_cmd_pdu(mvm, RSS_CONFIG_CMD, 0, sizeof(cmd), &cmd); + mutex_unlock(&mvm->mutex); + + return ret ?: count; +} + static ssize_t iwl_dbgfs_fw_dbg_conf_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) @@ -983,7 +1026,7 @@ static ssize_t iwl_dbgfs_cont_recording_write(struct iwl_mvm *mvm, trans->cfg->device_family != IWL_DEVICE_FAMILY_8000) return -EOPNOTSUPP; - ret = kstrtouint(buf, 0, &rec_mode); + ret = kstrtoint(buf, 0, &rec_mode); if (ret) return ret; @@ -1454,6 +1497,7 @@ MVM_DEBUGFS_READ_WRITE_FILE_OPS(d0i3_refs, 8); MVM_DEBUGFS_READ_WRITE_FILE_OPS(fw_dbg_conf, 8); MVM_DEBUGFS_WRITE_FILE_OPS(fw_dbg_collect, 64); MVM_DEBUGFS_WRITE_FILE_OPS(cont_recording, 8); +MVM_DEBUGFS_WRITE_FILE_OPS(indirection_tbl, 16); #ifdef CONFIG_IWLWIFI_BCAST_FILTERING MVM_DEBUGFS_READ_WRITE_FILE_OPS(bcast_filters, 256); @@ -1498,11 +1542,15 @@ int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir) MVM_DEBUGFS_ADD_FILE(fw_dbg_collect, mvm->debugfs_dir, S_IWUSR); MVM_DEBUGFS_ADD_FILE(send_echo_cmd, mvm->debugfs_dir, S_IWUSR); MVM_DEBUGFS_ADD_FILE(cont_recording, mvm->debugfs_dir, S_IWUSR); + MVM_DEBUGFS_ADD_FILE(indirection_tbl, mvm->debugfs_dir, S_IWUSR); if (!debugfs_create_bool("enable_scan_iteration_notif", S_IRUSR | S_IWUSR, mvm->debugfs_dir, &mvm->scan_iter_notif_enabled)) goto err; + if (!debugfs_create_bool("drop_bcn_ap_mode", S_IRUSR | S_IWUSR, + mvm->debugfs_dir, &mvm->drop_bcn_ap_mode)) + goto err; #ifdef CONFIG_IWLWIFI_BCAST_FILTERING if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_BCAST_FILTERING) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-d3.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-d3.h index 62b9a0a96700..eec52c57f718 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-d3.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-d3.h @@ -251,6 +251,7 @@ enum iwl_wowlan_flags { ENABLE_L3_FILTERING = BIT(1), ENABLE_NBNS_FILTERING = BIT(2), ENABLE_DHCP_FILTERING = BIT(3), + ENABLE_STORE_BEACON = BIT(4), }; struct iwl_wowlan_config_cmd { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-rx.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-rx.h index fb6d341d6f3d..df939f51d9b9 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-rx.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-rx.h @@ -7,7 +7,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH - * Copyright(c) 2015 Intel Deutschland GmbH + * Copyright(c) 2015 - 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -34,7 +34,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH - * Copyright(c) 2015 Intel Deutschland GmbH + * Copyright(c) 2015 - 2016 Intel Deutschland GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -287,16 +287,13 @@ enum iwl_rx_mpdu_status { IWL_RX_MPDU_STATUS_KEY_ERROR = BIT(4), IWL_RX_MPDU_STATUS_ICV_OK = BIT(5), IWL_RX_MPDU_STATUS_MIC_OK = BIT(6), - /* TODO - verify this is the correct value */ IWL_RX_MPDU_RES_STATUS_TTAK_OK = BIT(7), IWL_RX_MPDU_STATUS_SEC_MASK = 0x7 << 8, IWL_RX_MPDU_STATUS_SEC_NONE = 0x0 << 8, IWL_RX_MPDU_STATUS_SEC_WEP = 0x1 << 8, IWL_RX_MPDU_STATUS_SEC_CCM = 0x2 << 8, IWL_RX_MPDU_STATUS_SEC_TKIP = 0x3 << 8, - /* TODO - define IWL_RX_MPDU_STATUS_SEC_EXT_ENC - this is a stub */ IWL_RX_MPDU_STATUS_SEC_EXT_ENC = 0x4 << 8, - /* TODO - define IWL_RX_MPDU_STATUS_SEC_GCM - this is a stub */ IWL_RX_MPDU_STATUS_SEC_GCM = 0x5 << 8, IWL_RX_MPDU_STATUS_DECRYPTED = BIT(11), IWL_RX_MPDU_STATUS_WEP_MATCH = BIT(12), @@ -350,11 +347,11 @@ struct iwl_rx_mpdu_desc { /* DW8 */ __le32 filter_match; /* DW9 */ - __le32 gp2_on_air_rise; - /* DW10 */ __le32 rate_n_flags; + /* DW10 */ + u8 energy_a, energy_b, channel, reserved; /* DW11 */ - u8 energy_a, energy_b, energy_c, channel; + __le32 gp2_on_air_rise; /* DW12 & DW13 */ __le64 tsf_on_air_rise; } __packed; @@ -365,4 +362,33 @@ struct iwl_frame_release { __le16 nssn; }; +enum iwl_rss_hash_func_en { + IWL_RSS_HASH_TYPE_IPV4_TCP, + IWL_RSS_HASH_TYPE_IPV4_UDP, + IWL_RSS_HASH_TYPE_IPV4_PAYLOAD, + IWL_RSS_HASH_TYPE_IPV6_TCP, + IWL_RSS_HASH_TYPE_IPV6_UDP, + IWL_RSS_HASH_TYPE_IPV6_PAYLOAD, +}; + +#define IWL_RSS_HASH_KEY_CNT 10 +#define IWL_RSS_INDIRECTION_TABLE_SIZE 128 +#define IWL_RSS_ENABLE 1 + +/** + * struct iwl_rss_config_cmd - RSS (Receive Side Scaling) configuration + * + * @flags: 1 - enable, 0 - disable + * @hash_mask: Type of RSS to use. Values are from %iwl_rss_hash_func_en + * @secret_key: 320 bit input of random key configuration from driver + * @indirection_table: indirection table + */ +struct iwl_rss_config_cmd { + __le32 flags; + u8 hash_mask; + u8 reserved[3]; + __le32 secret_key[IWL_RSS_HASH_KEY_CNT]; + u8 indirection_table[IWL_RSS_INDIRECTION_TABLE_SIZE]; +} __packed; /* RSS_CONFIG_CMD_API_S_VER_1 */ + #endif /* __fw_api_rx_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-sta.h index 6fca4fb1d306..90d911394836 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-sta.h @@ -7,6 +7,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -33,6 +34,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -253,6 +255,68 @@ struct iwl_mvm_keyinfo { __le64 hw_tkip_mic_tx_key; } __packed; +#define IWL_ADD_STA_STATUS_MASK 0xFF +#define IWL_ADD_STA_BAID_MASK 0xFF00 + +/** + * struct iwl_mvm_add_sta_cmd_v7 - Add/modify a station in the fw's sta table. + * ( REPLY_ADD_STA = 0x18 ) + * @add_modify: 1: modify existing, 0: add new station + * @awake_acs: + * @tid_disable_tx: is tid BIT(tid) enabled for Tx. Clear BIT(x) to enable + * AMPDU for tid x. Set %STA_MODIFY_TID_DISABLE_TX to change this field. + * @mac_id_n_color: the Mac context this station belongs to + * @addr[ETH_ALEN]: station's MAC address + * @sta_id: index of station in uCode's station table + * @modify_mask: STA_MODIFY_*, selects which parameters to modify vs. leave + * alone. 1 - modify, 0 - don't change. + * @station_flags: look at %iwl_sta_flags + * @station_flags_msk: what of %station_flags have changed + * @add_immediate_ba_tid: tid for which to add block-ack support (Rx) + * Set %STA_MODIFY_ADD_BA_TID to use this field, and also set + * add_immediate_ba_ssn. + * @remove_immediate_ba_tid: tid for which to remove block-ack support (Rx) + * Set %STA_MODIFY_REMOVE_BA_TID to use this field + * @add_immediate_ba_ssn: ssn for the Rx block-ack session. Used together with + * add_immediate_ba_tid. + * @sleep_tx_count: number of packets to transmit to station even though it is + * asleep. Used to synchronise PS-poll and u-APSD responses while ucode + * keeps track of STA sleep state. + * @sleep_state_flags: Look at %iwl_sta_sleep_flag. + * @assoc_id: assoc_id to be sent in VHT PLCP (9-bit), for grp use 0, for AP + * mac-addr. + * @beamform_flags: beam forming controls + * @tfd_queue_msk: tfd queues used by this station + * + * The device contains an internal table of per-station information, with info + * on security keys, aggregation parameters, and Tx rates for initial Tx + * attempt and any retries (set by REPLY_TX_LINK_QUALITY_CMD). + * + * ADD_STA sets up the table entry for one station, either creating a new + * entry, or modifying a pre-existing one. + */ +struct iwl_mvm_add_sta_cmd_v7 { + u8 add_modify; + u8 awake_acs; + __le16 tid_disable_tx; + __le32 mac_id_n_color; + u8 addr[ETH_ALEN]; /* _STA_ID_MODIFY_INFO_API_S_VER_1 */ + __le16 reserved2; + u8 sta_id; + u8 modify_mask; + __le16 reserved3; + __le32 station_flags; + __le32 station_flags_msk; + u8 add_immediate_ba_tid; + u8 remove_immediate_ba_tid; + __le16 add_immediate_ba_ssn; + __le16 sleep_tx_count; + __le16 sleep_state_flags; + __le16 assoc_id; + __le16 beamform_flags; + __le32 tfd_queue_msk; +} __packed; /* ADD_STA_CMD_API_S_VER_7 */ + /** * struct iwl_mvm_add_sta_cmd - Add/modify a station in the fw's sta table. * ( REPLY_ADD_STA = 0x18 ) @@ -282,6 +346,7 @@ struct iwl_mvm_keyinfo { * mac-addr. * @beamform_flags: beam forming controls * @tfd_queue_msk: tfd queues used by this station + * @rx_ba_window: aggregation window size * * The device contains an internal table of per-station information, with info * on security keys, aggregation parameters, and Tx rates for initial Tx @@ -310,7 +375,9 @@ struct iwl_mvm_add_sta_cmd { __le16 assoc_id; __le16 beamform_flags; __le32 tfd_queue_msk; -} __packed; /* ADD_STA_CMD_API_S_VER_7 */ + __le16 rx_ba_window; + __le16 reserved; +} __packed; /* ADD_STA_CMD_API_S_VER_8 */ /** * struct iwl_mvm_add_sta_key_cmd - add/modify sta key diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h index 82049bb139c2..f332497e29d1 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h @@ -213,6 +213,8 @@ enum { MFUART_LOAD_NOTIFICATION = 0xb1, + RSS_CONFIG_CMD = 0xb3, + REPLY_RX_PHY_CMD = 0xc0, REPLY_RX_MPDU_CMD = 0xc1, FRAME_RELEASE = 0xc3, @@ -280,11 +282,16 @@ enum iwl_phy_ops_subcmd_ids { DTS_MEASUREMENT_NOTIF_WIDE = 0xFF, }; +enum iwl_prot_offload_subcmd_ids { + STORED_BEACON_NTF = 0xFF, +}; + /* command groups */ enum { LEGACY_GROUP = 0x0, LONG_GROUP = 0x1, PHY_OPS_GROUP = 0x4, + PROT_OFFLOAD_GROUP = 0xb, }; /** @@ -1851,4 +1858,28 @@ struct iwl_shared_mem_cfg { __le32 page_buff_size; } __packed; /* SHARED_MEM_ALLOC_API_S_VER_1 */ +#define MAX_STORED_BEACON_SIZE 600 + +/** + * Stored beacon notification + * + * @system_time: system time on air rise + * @tsf: TSF on air rise + * @beacon_timestamp: beacon on air rise + * @phy_flags: general phy flags: band, modulation, etc. + * @channel: channel this beacon was received on + * @rates: rate in ucode internal format + * @byte_count: frame's byte count + */ +struct iwl_stored_beacon_notif { + __le32 system_time; + __le64 tsf; + __le32 beacon_timestamp; + __le16 phy_flags; + __le16 channel; + __le32 rates; + __le32 byte_count; + u8 data[MAX_STORED_BEACON_SIZE]; +} __packed; /* WOWLAN_STROED_BEACON_INFO_S_VER_1 */ + #endif /* __fw_api_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c index 0813f8184e10..4856eac120f6 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c @@ -435,6 +435,10 @@ void iwl_mvm_fw_error_dump(struct iwl_mvm *mvm) bool monitor_dump_only = false; int i; + if (!IWL_MVM_COLLECT_FW_ERR_DUMP && + !mvm->trans->dbg_dest_tlv) + return; + lockdep_assert_held(&mvm->mutex); /* there's no point in fw dump if the bus is dead */ @@ -640,8 +644,6 @@ void iwl_mvm_fw_error_dump(struct iwl_mvm *mvm) /* Dump fw's virtual image */ if (mvm->fw->img[mvm->cur_ucode].paging_mem_size) { - u32 i; - for (i = 1; i < mvm->num_of_paging_blk + 1; i++) { struct iwl_fw_error_dump_paging *paging; struct page *pages = diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 4ed5180c547b..070e2af05ca2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -7,6 +7,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -107,6 +108,24 @@ static int iwl_send_tx_ant_cfg(struct iwl_mvm *mvm, u8 valid_tx_ant) sizeof(tx_ant_cmd), &tx_ant_cmd); } +static int iwl_send_rss_cfg_cmd(struct iwl_mvm *mvm) +{ + int i; + struct iwl_rss_config_cmd cmd = { + .flags = cpu_to_le32(IWL_RSS_ENABLE), + .hash_mask = IWL_RSS_HASH_TYPE_IPV4_TCP | + IWL_RSS_HASH_TYPE_IPV4_PAYLOAD | + IWL_RSS_HASH_TYPE_IPV6_TCP | + IWL_RSS_HASH_TYPE_IPV6_PAYLOAD, + }; + + for (i = 0; i < ARRAY_SIZE(cmd.indirection_table); i++) + cmd.indirection_table[i] = i % mvm->trans->num_rx_queues; + memcpy(cmd.secret_key, mvm->secret_key, ARRAY_SIZE(cmd.secret_key)); + + return iwl_mvm_send_cmd_pdu(mvm, RSS_CONFIG_CMD, 0, sizeof(cmd), &cmd); +} + static void iwl_free_fw_paging(struct iwl_mvm *mvm) { int i; @@ -894,6 +913,16 @@ int iwl_mvm_up(struct iwl_mvm *mvm) if (ret) goto error; + /* Init RSS configuration */ + if (iwl_mvm_has_new_rx_api(mvm)) { + ret = iwl_send_rss_cfg_cmd(mvm); + if (ret) { + IWL_ERR(mvm, "Failed to configure RSS queues: %d\n", + ret); + goto error; + } + } + /* init the fw <-> mac80211 STA mapping */ for (i = 0; i < IWL_MVM_STATION_COUNT; i++) RCU_INIT_POINTER(mvm->fw_id_to_mac_id[i], NULL); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index bf1e5eb5dbdb..535134d639e0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -7,7 +7,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH - * Copyright(c) 2015 Intel Deutschland GmbH + * Copyright(c) 2015 - 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -34,7 +34,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH - * Copyright(c) 2015 Intel Deutschland GmbH + * Copyright(c) 2015 - 2016 Intel Deutschland GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -744,7 +744,7 @@ static void iwl_mvm_mac_ctxt_cmd_common(struct iwl_mvm *mvm, * wake-ups. */ cmd->filter_flags |= cpu_to_le32(MAC_FILTER_IN_PROBE_REQUEST); - if (mvmvif->ap_assoc_sta_count) { + if (mvmvif->ap_assoc_sta_count || !mvm->drop_bcn_ap_mode) { cmd->filter_flags |= cpu_to_le32(MAC_FILTER_IN_BEACON); IWL_DEBUG_HC(mvm, "Asking FW to pass beacons\n"); } else { @@ -1462,3 +1462,40 @@ void iwl_mvm_rx_missed_beacons_notif(struct iwl_mvm *mvm, iwl_mvm_beacon_loss_iterator, mb); } + +void iwl_mvm_rx_stored_beacon_notif(struct iwl_mvm *mvm, + struct iwl_rx_cmd_buffer *rxb) +{ + struct iwl_rx_packet *pkt = rxb_addr(rxb); + struct iwl_stored_beacon_notif *sb = (void *)pkt->data; + struct ieee80211_rx_status rx_status; + struct sk_buff *skb; + u32 size = le32_to_cpu(sb->byte_count); + + if (size == 0) + return; + + skb = alloc_skb(size, GFP_ATOMIC); + if (!skb) { + IWL_ERR(mvm, "alloc_skb failed\n"); + return; + } + + /* update rx_status according to the notification's metadata */ + memset(&rx_status, 0, sizeof(rx_status)); + rx_status.mactime = le64_to_cpu(sb->tsf); + rx_status.device_timestamp = le32_to_cpu(sb->system_time); + rx_status.band = + (sb->phy_flags & cpu_to_le16(RX_RES_PHY_FLAGS_BAND_24)) ? + IEEE80211_BAND_2GHZ : IEEE80211_BAND_5GHZ; + rx_status.freq = + ieee80211_channel_to_frequency(le16_to_cpu(sb->channel), + rx_status.band); + + /* copy the data */ + memcpy(skb_put(skb, size), sb->data, size); + memcpy(IEEE80211_SKB_RXCB(skb), &rx_status, sizeof(rx_status)); + + /* pass it as regular rx to mac80211 */ + ieee80211_rx_napi(mvm->hw, skb, NULL); +} diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index d70a1716f3e0..01476f545695 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -884,10 +884,10 @@ static int iwl_mvm_mac_ampdu_action(struct ieee80211_hw *hw, ret = -EINVAL; break; } - ret = iwl_mvm_sta_rx_agg(mvm, sta, tid, *ssn, true); + ret = iwl_mvm_sta_rx_agg(mvm, sta, tid, *ssn, true, buf_size); break; case IEEE80211_AMPDU_RX_STOP: - ret = iwl_mvm_sta_rx_agg(mvm, sta, tid, 0, false); + ret = iwl_mvm_sta_rx_agg(mvm, sta, tid, 0, false, buf_size); break; case IEEE80211_AMPDU_TX_START: if (!iwl_enable_tx_ampdu(mvm->cfg)) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 5f3ac8cccf49..ebe37bb0ce4c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -346,8 +346,9 @@ struct iwl_mvm_vif_bf_data { * @pm_enabled - Indicate if MAC power management is allowed * @monitor_active: indicates that monitor context is configured, and that the * interface should get quota etc. - * @low_latency: indicates that this interface is in low-latency mode - * (VMACLowLatencyMode) + * @low_latency_traffic: indicates low latency traffic was detected + * @low_latency_dbgfs: low latency mode set from debugfs + * @low_latency_vcmd: low latency mode set from vendor command * @ps_disabled: indicates that this interface requires PS to be disabled * @queue_params: QoS params for this MAC * @bcast_sta: station used for broadcast packets. Used by the following @@ -375,7 +376,7 @@ struct iwl_mvm_vif { bool ap_ibss_active; bool pm_enabled; bool monitor_active; - bool low_latency; + bool low_latency_traffic, low_latency_dbgfs, low_latency_vcmd; bool ps_disabled; struct iwl_mvm_vif_bf_data bf_data; @@ -432,6 +433,7 @@ struct iwl_mvm_vif { struct iwl_dbgfs_pm dbgfs_pm; struct iwl_dbgfs_bf dbgfs_bf; struct iwl_mac_power_cmd mac_pwr_cmd; + int dbgfs_quota_min; #endif enum ieee80211_smps_mode smps_requests[NUM_IWL_MVM_SMPS_REQ]; @@ -645,6 +647,7 @@ struct iwl_mvm { atomic_t pending_frames[IWL_MVM_STATION_COUNT]; u32 tfd_drained[IWL_MVM_STATION_COUNT]; u8 rx_ba_sessions; + u32 secret_key[IWL_RSS_HASH_KEY_CNT]; /* configured by mac80211 */ u32 rts_threshold; @@ -856,6 +859,12 @@ struct iwl_mvm { u32 ciphers[6]; struct iwl_mvm_tof_data tof_data; + + /* + * Drop beacons from other APs in AP mode when there are no connected + * clients. + */ + bool drop_bcn_ap_mode; }; /* Extract MVM priv from op_mode and _hw */ @@ -1005,10 +1014,18 @@ static inline bool iwl_mvm_is_mplut_supported(struct iwl_mvm *mvm) IWL_MVM_BT_COEX_MPLUT; } +static inline +bool iwl_mvm_is_p2p_standalone_uapsd_supported(struct iwl_mvm *mvm) +{ + return fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_P2P_STANDALONE_UAPSD) && + IWL_MVM_P2P_UAPSD_STANDALONE; +} + static inline bool iwl_mvm_has_new_rx_api(struct iwl_mvm *mvm) { - /* firmware flag isn't defined yet */ - return false; + return fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_MULTI_QUEUE_RX_SUPPORT); } extern const u8 iwl_mvm_ac_to_tx_fifo[]; @@ -1184,6 +1201,8 @@ void iwl_mvm_rx_beacon_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb); void iwl_mvm_rx_missed_beacons_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb); +void iwl_mvm_rx_stored_beacon_notif(struct iwl_mvm *mvm, + struct iwl_rx_cmd_buffer *rxb); void iwl_mvm_mac_ctxt_recalc_tsf_id(struct iwl_mvm *mvm, struct ieee80211_vif *vif); unsigned long iwl_mvm_get_used_hw_queues(struct iwl_mvm *mvm, @@ -1417,8 +1436,9 @@ static inline bool iwl_mvm_vif_low_latency(struct iwl_mvm_vif *mvmvif) * binding, so this has no real impact. For now, just return * the current desired low-latency state. */ - - return mvmvif->low_latency; + return mvmvif->low_latency_dbgfs || + mvmvif->low_latency_traffic || + mvmvif->low_latency_vcmd; } /* hw scheduler queue config */ @@ -1481,7 +1501,7 @@ void iwl_mvm_tt_handler(struct iwl_mvm *mvm); void iwl_mvm_tt_initialize(struct iwl_mvm *mvm, u32 min_backoff); void iwl_mvm_tt_exit(struct iwl_mvm *mvm); void iwl_mvm_set_hw_ctkill_state(struct iwl_mvm *mvm, bool state); -int iwl_mvm_get_temp(struct iwl_mvm *mvm); +int iwl_mvm_get_temp(struct iwl_mvm *mvm, s32 *temp); /* Location Aware Regulatory */ struct iwl_mcc_update_resp * diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c index 7a3da2da6fd0..c446e0da9789 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c @@ -300,7 +300,7 @@ iwl_parse_nvm_sections(struct iwl_mvm *mvm) struct iwl_nvm_section *sections = mvm->nvm_sections; const __le16 *hw, *sw, *calib, *regulatory, *mac_override, *phy_sku; bool lar_enabled; - u32 mac_addr0, mac_addr1; + __le32 mac_addr0, mac_addr1; /* Checking for required sections */ if (mvm->trans->cfg->device_family != IWL_DEVICE_FAMILY_8000) { @@ -337,8 +337,10 @@ iwl_parse_nvm_sections(struct iwl_mvm *mvm) return NULL; /* read the mac address from WFMP registers */ - mac_addr0 = iwl_trans_read_prph(mvm->trans, WFMP_MAC_ADDR_0); - mac_addr1 = iwl_trans_read_prph(mvm->trans, WFMP_MAC_ADDR_1); + mac_addr0 = cpu_to_le32(iwl_trans_read_prph(mvm->trans, + WFMP_MAC_ADDR_0)); + mac_addr1 = cpu_to_le32(iwl_trans_read_prph(mvm->trans, + WFMP_MAC_ADDR_1)); hw = (const __le16 *)sections[mvm->cfg->nvm_hw_section_num].data; sw = (const __le16 *)sections[NVM_SECTION_TYPE_SW].data; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 89ea70deeb84..09a94a5efb61 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -33,6 +33,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -267,6 +268,8 @@ static const struct iwl_rx_handlers iwl_mvm_rx_handlers[] = { true), RX_HANDLER(MFUART_LOAD_NOTIFICATION, iwl_mvm_rx_mfuart_notif, false), RX_HANDLER(TOF_NOTIFICATION, iwl_mvm_tof_resp_handler, true), + RX_HANDLER_GRP(PROT_OFFLOAD_GROUP, STORED_BEACON_NTF, + iwl_mvm_rx_stored_beacon_notif, false), }; #undef RX_HANDLER @@ -344,6 +347,7 @@ static const struct iwl_hcmd_names iwl_mvm_legacy_names[] = { HCMD_NAME(MAC_PM_POWER_TABLE), HCMD_NAME(TDLS_CHANNEL_SWITCH_NOTIFICATION), HCMD_NAME(MFUART_LOAD_NOTIFICATION), + HCMD_NAME(RSS_CONFIG_CMD), HCMD_NAME(SCAN_ITERATION_COMPLETE_UMAC), HCMD_NAME(REPLY_RX_PHY_CMD), HCMD_NAME(REPLY_RX_MPDU_CMD), @@ -386,13 +390,20 @@ static const struct iwl_hcmd_names iwl_mvm_phy_names[] = { HCMD_NAME(DTS_MEASUREMENT_NOTIF_WIDE), }; +/* Please keep this array *SORTED* by hex value. + * Access is done through binary search + */ +static const struct iwl_hcmd_names iwl_mvm_prot_offload_names[] = { + HCMD_NAME(STORED_BEACON_NTF), +}; + static const struct iwl_hcmd_arr iwl_mvm_groups[] = { [LEGACY_GROUP] = HCMD_ARR(iwl_mvm_legacy_names), [LONG_GROUP] = HCMD_ARR(iwl_mvm_legacy_names), [PHY_OPS_GROUP] = HCMD_ARR(iwl_mvm_phy_names), + [PROT_OFFLOAD_GROUP] = HCMD_ARR(iwl_mvm_prot_offload_names), }; - /* this forward declaration can avoid to export the function */ static void iwl_mvm_async_handlers_wk(struct work_struct *wk); static void iwl_mvm_d0i3_exit_work(struct work_struct *wk); @@ -481,6 +492,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, } mvm->sf_state = SF_UNINIT; mvm->cur_ucode = IWL_UCODE_INIT; + mvm->drop_bcn_ap_mode = true; mutex_init(&mvm->mutex); mutex_init(&mvm->d0i3_suspend_mutex); @@ -641,6 +653,9 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, iwl_mvm_tof_init(mvm); + /* init RSS hash key */ + get_random_bytes(mvm->secret_key, ARRAY_SIZE(mvm->secret_key)); + return op_mode; out_unregister: @@ -1196,7 +1211,7 @@ static void iwl_mvm_set_wowlan_data(struct iwl_mvm *mvm, cmd->is_11n_connection = ap_sta->ht_cap.ht_supported; cmd->offloading_tid = iter_data->offloading_tid; cmd->flags = ENABLE_L3_FILTERING | ENABLE_NBNS_FILTERING | - ENABLE_DHCP_FILTERING; + ENABLE_DHCP_FILTERING | ENABLE_STORE_BEACON; /* * The d0i3 uCode takes care of the nonqos counters, * so configure only the qos seq ones. @@ -1217,8 +1232,7 @@ int iwl_mvm_enter_d0i3(struct iwl_op_mode *op_mode) struct iwl_wowlan_config_cmd wowlan_config_cmd = { .wakeup_filter = cpu_to_le32(IWL_WOWLAN_WAKEUP_RX_FRAME | IWL_WOWLAN_WAKEUP_BEACON_MISS | - IWL_WOWLAN_WAKEUP_LINK_CHANGE | - IWL_WOWLAN_WAKEUP_BCN_FILTERING), + IWL_WOWLAN_WAKEUP_LINK_CHANGE), }; struct iwl_d3_manager_config d3_cfg_cmd = { .min_sleep_time = cpu_to_le32(1000), @@ -1268,6 +1282,12 @@ int iwl_mvm_enter_d0i3(struct iwl_op_mode *op_mode) /* configure wowlan configuration only if needed */ if (mvm->d0i3_ap_sta_id != IWL_MVM_STATION_COUNT) { + /* wake on beacons only if beacon storing isn't supported */ + if (!fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_BEACON_STORING)) + wowlan_config_cmd.wakeup_filter |= + cpu_to_le32(IWL_WOWLAN_WAKEUP_BCN_FILTERING); + iwl_mvm_wowlan_config_key_params(mvm, d0i3_iter_data.connected_vif, true, flags); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/power.c b/drivers/net/wireless/intel/iwlwifi/mvm/power.c index 9de159f1ef2d..f313910cd026 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/power.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/power.c @@ -7,7 +7,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH - * Copyright(c) 2015 Intel Deutschland GmbH + * Copyright(c) 2015 - 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -34,7 +34,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH - * Copyright(c) 2015 Intel Deutschland GmbH + * Copyright(c) 2015 - 2016 Intel Deutschland GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -259,6 +259,26 @@ static void iwl_mvm_power_configure_uapsd(struct iwl_mvm *mvm, IWL_MVM_PS_HEAVY_RX_THLD_PERCENT; } +static void iwl_mvm_p2p_standalone_iterator(void *_data, u8 *mac, + struct ieee80211_vif *vif) +{ + bool *is_p2p_standalone = _data; + + switch (ieee80211_vif_type_p2p(vif)) { + case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_AP: + *is_p2p_standalone = false; + break; + case NL80211_IFTYPE_STATION: + if (vif->bss_conf.assoc) + *is_p2p_standalone = false; + break; + + default: + break; + } +} + static bool iwl_mvm_power_allow_uapsd(struct iwl_mvm *mvm, struct ieee80211_vif *vif) { @@ -268,9 +288,6 @@ static bool iwl_mvm_power_allow_uapsd(struct iwl_mvm *mvm, ETH_ALEN)) return false; - if (vif->p2p && - !(mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_P2P_PS_UAPSD)) - return false; /* * Avoid using uAPSD if P2P client is associated to GO that uses * opportunistic power save. This is due to current FW limitation. @@ -287,6 +304,22 @@ static bool iwl_mvm_power_allow_uapsd(struct iwl_mvm *mvm, if (iwl_mvm_phy_ctx_count(mvm) >= 2) return false; + if (vif->p2p) { + /* Allow U-APSD only if p2p is stand alone */ + bool is_p2p_standalone = true; + + if (!iwl_mvm_is_p2p_standalone_uapsd_supported(mvm)) + return false; + + ieee80211_iterate_active_interfaces_atomic(mvm->hw, + IEEE80211_IFACE_ITER_NORMAL, + iwl_mvm_p2p_standalone_iterator, + &is_p2p_standalone); + + if (!is_p2p_standalone) + return false; + } + return true; } @@ -544,7 +577,6 @@ void iwl_mvm_power_uapsd_misbehaving_ap_notif(struct iwl_mvm *mvm, struct iwl_power_vifs { struct iwl_mvm *mvm; - struct ieee80211_vif *bf_vif; struct ieee80211_vif *bss_vif; struct ieee80211_vif *p2p_vif; struct ieee80211_vif *ap_vif; @@ -617,11 +649,6 @@ static void iwl_mvm_power_get_vifs_iterator(void *_data, u8 *mac, if (mvmvif->phy_ctxt) if (mvmvif->phy_ctxt->id < MAX_PHYS) power_iterator->bss_active = true; - - if (mvmvif->bf_data.bf_enabled && - !WARN_ON(power_iterator->bf_vif)) - power_iterator->bf_vif = vif; - break; default: @@ -850,29 +877,9 @@ int iwl_mvm_enable_beacon_filter(struct iwl_mvm *mvm, return _iwl_mvm_enable_beacon_filter(mvm, vif, &cmd, flags, false); } -static int iwl_mvm_update_beacon_abort(struct iwl_mvm *mvm, - struct ieee80211_vif *vif, - bool enable) -{ - struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - struct iwl_beacon_filter_cmd cmd = { - IWL_BF_CMD_CONFIG_DEFAULTS, - .bf_enable_beacon_filter = cpu_to_le32(1), - }; - - if (!mvmvif->bf_data.bf_enabled) - return 0; - - if (mvm->cur_ucode == IWL_UCODE_WOWLAN) - cmd.ba_escape_timer = cpu_to_le32(IWL_BA_ESCAPE_TIMER_D3); - - mvmvif->bf_data.ba_enabled = enable; - return _iwl_mvm_enable_beacon_filter(mvm, vif, &cmd, 0, false); -} - -int iwl_mvm_disable_beacon_filter(struct iwl_mvm *mvm, - struct ieee80211_vif *vif, - u32 flags) +static int _iwl_mvm_disable_beacon_filter(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + u32 flags, bool d0i3) { struct iwl_beacon_filter_cmd cmd = {}; struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); @@ -883,12 +890,20 @@ int iwl_mvm_disable_beacon_filter(struct iwl_mvm *mvm, ret = iwl_mvm_beacon_filter_send_cmd(mvm, &cmd, flags); - if (!ret) + /* don't change bf_enabled in case of temporary d0i3 configuration */ + if (!ret && !d0i3) mvmvif->bf_data.bf_enabled = false; return ret; } +int iwl_mvm_disable_beacon_filter(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + u32 flags) +{ + return _iwl_mvm_disable_beacon_filter(mvm, vif, flags, false); +} + static int iwl_mvm_power_set_ps(struct iwl_mvm *mvm) { bool disable_ps; @@ -918,21 +933,26 @@ static int iwl_mvm_power_set_ps(struct iwl_mvm *mvm) } static int iwl_mvm_power_set_ba(struct iwl_mvm *mvm, - struct iwl_power_vifs *vifs) + struct ieee80211_vif *vif) { - struct iwl_mvm_vif *mvmvif; - bool ba_enable; + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + struct iwl_beacon_filter_cmd cmd = { + IWL_BF_CMD_CONFIG_DEFAULTS, + .bf_enable_beacon_filter = cpu_to_le32(1), + }; - if (!vifs->bf_vif) + if (!mvmvif->bf_data.bf_enabled) return 0; - mvmvif = iwl_mvm_vif_from_mac80211(vifs->bf_vif); + if (mvm->cur_ucode == IWL_UCODE_WOWLAN) + cmd.ba_escape_timer = cpu_to_le32(IWL_BA_ESCAPE_TIMER_D3); - ba_enable = !(!mvmvif->pm_enabled || mvm->ps_disabled || - !vifs->bf_vif->bss_conf.ps || - iwl_mvm_vif_low_latency(mvmvif)); + mvmvif->bf_data.ba_enabled = !(!mvmvif->pm_enabled || + mvm->ps_disabled || + !vif->bss_conf.ps || + iwl_mvm_vif_low_latency(mvmvif)); - return iwl_mvm_update_beacon_abort(mvm, vifs->bf_vif, ba_enable); + return _iwl_mvm_enable_beacon_filter(mvm, vif, &cmd, 0, false); } int iwl_mvm_power_update_ps(struct iwl_mvm *mvm) @@ -953,7 +973,10 @@ int iwl_mvm_power_update_ps(struct iwl_mvm *mvm) if (ret) return ret; - return iwl_mvm_power_set_ba(mvm, &vifs); + if (vifs.bss_vif) + return iwl_mvm_power_set_ba(mvm, vifs.bss_vif); + + return 0; } int iwl_mvm_power_update_mac(struct iwl_mvm *mvm) @@ -988,7 +1011,10 @@ int iwl_mvm_power_update_mac(struct iwl_mvm *mvm) return ret; } - return iwl_mvm_power_set_ba(mvm, &vifs); + if (vifs.bss_vif) + return iwl_mvm_power_set_ba(mvm, vifs.bss_vif); + + return 0; } int iwl_mvm_update_d0i3_power_mode(struct iwl_mvm *mvm, @@ -1025,8 +1051,17 @@ int iwl_mvm_update_d0i3_power_mode(struct iwl_mvm *mvm, IWL_BF_CMD_CONFIG_D0I3, .bf_enable_beacon_filter = cpu_to_le32(1), }; - ret = _iwl_mvm_enable_beacon_filter(mvm, vif, &cmd_bf, - flags, true); + /* + * When beacon storing is supported - disable beacon filtering + * altogether - the latest beacon will be sent when exiting d0i3 + */ + if (fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_BEACON_STORING)) + ret = _iwl_mvm_disable_beacon_filter(mvm, vif, flags, + true); + else + ret = _iwl_mvm_enable_beacon_filter(mvm, vif, &cmd_bf, + flags, true); } else { if (mvmvif->bf_data.bf_enabled) ret = iwl_mvm_enable_beacon_filter(mvm, vif, flags); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/quota.c b/drivers/net/wireless/intel/iwlwifi/mvm/quota.c index 0b762b4f8fad..2141db5bff82 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/quota.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/quota.c @@ -7,6 +7,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -33,6 +34,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -74,6 +76,9 @@ struct iwl_mvm_quota_iterator_data { int n_interfaces[MAX_BINDINGS]; int colors[MAX_BINDINGS]; int low_latency[MAX_BINDINGS]; +#ifdef CONFIG_IWLWIFI_DEBUGFS + int dbgfs_min[MAX_BINDINGS]; +#endif int n_low_latency_bindings; struct ieee80211_vif *disabled_vif; }; @@ -129,6 +134,12 @@ static void iwl_mvm_quota_iterator(void *_data, u8 *mac, data->n_interfaces[id]++; +#ifdef CONFIG_IWLWIFI_DEBUGFS + if (mvmvif->dbgfs_quota_min) + data->dbgfs_min[id] = max(data->dbgfs_min[id], + mvmvif->dbgfs_quota_min); +#endif + if (iwl_mvm_vif_low_latency(mvmvif) && !data->low_latency[id]) { data->n_low_latency_bindings++; data->low_latency[id] = true; @@ -259,6 +270,11 @@ int iwl_mvm_update_quotas(struct iwl_mvm *mvm, if (data.n_interfaces[i] <= 0) cmd.quotas[idx].quota = cpu_to_le32(0); +#ifdef CONFIG_IWLWIFI_DEBUGFS + else if (data.dbgfs_min[i]) + cmd.quotas[idx].quota = + cpu_to_le32(data.dbgfs_min[i] * QUOTA_100 / 100); +#endif else if (data.n_low_latency_bindings == 1 && n_non_lowlat && data.low_latency[i]) /* diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c index 94caa88df442..6e7e78a37879 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c @@ -2062,7 +2062,8 @@ static enum tpc_action rs_get_tpc_action(struct iwl_mvm *mvm, } /* try decreasing first if applicable */ - if (weak != TPC_INVALID) { + if (sr >= RS_PERCENT(IWL_MVM_RS_TPC_SR_NO_INCREASE) && + weak != TPC_INVALID) { if (weak_tpt == IWL_INVALID_VALUE && (strong_tpt == IWL_INVALID_VALUE || current_tpt >= strong_tpt)) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 0c073e02fd4c..615dea143d4e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -201,25 +201,22 @@ static void iwl_mvm_get_signal_strength(struct iwl_mvm *mvm, struct iwl_rx_mpdu_desc *desc, struct ieee80211_rx_status *rx_status) { - int energy_a, energy_b, energy_c, max_energy; + int energy_a, energy_b, max_energy; energy_a = desc->energy_a; energy_a = energy_a ? -energy_a : S8_MIN; energy_b = desc->energy_b; energy_b = energy_b ? -energy_b : S8_MIN; - energy_c = desc->energy_c; - energy_c = energy_c ? -energy_c : S8_MIN; max_energy = max(energy_a, energy_b); - max_energy = max(max_energy, energy_c); - IWL_DEBUG_STATS(mvm, "energy In A %d B %d C %d , and max %d\n", - energy_a, energy_b, energy_c, max_energy); + IWL_DEBUG_STATS(mvm, "energy In A %d B %d, and max %d\n", + energy_a, energy_b, max_energy); rx_status->signal = max_energy; rx_status->chains = 0; /* TODO: phy info */ rx_status->chain_signal[0] = energy_a; rx_status->chain_signal[1] = energy_b; - rx_status->chain_signal[2] = energy_c; + rx_status->chain_signal[2] = S8_MIN; } static int iwl_mvm_rx_crypto(struct iwl_mvm *mvm, struct ieee80211_hdr *hdr, @@ -294,7 +291,7 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, struct ieee80211_rx_status *rx_status; struct iwl_rx_packet *pkt = rxb_addr(rxb); struct iwl_rx_mpdu_desc *desc = (void *)pkt->data; - struct ieee80211_hdr *hdr = (void *)(desc + 1); + struct ieee80211_hdr *hdr = (void *)(pkt->data + sizeof(*desc)); u32 len = le16_to_cpu(desc->mpdu_len); u32 rate_n_flags = le32_to_cpu(desc->rate_n_flags); struct ieee80211_sta *sta = NULL; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index ea1e177c2ea1..aa6d8074f63a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -930,8 +930,11 @@ int iwl_mvm_config_scan(struct iwl_mvm *mvm) if (WARN_ON(num_channels > mvm->fw->ucode_capa.n_scan_channels)) return -ENOBUFS; - if (type == mvm->scan_type) + if (type == mvm->scan_type) { + IWL_DEBUG_SCAN(mvm, + "Ignoring UMAC scan config of the same type\n"); return 0; + } cmd_size = sizeof(*scan_config) + mvm->fw->ucode_capa.n_scan_channels; @@ -1109,7 +1112,7 @@ static int iwl_mvm_scan_umac(struct iwl_mvm *mvm, struct ieee80211_vif *vif, cmd->general_flags = cpu_to_le32(iwl_mvm_scan_umac_flags(mvm, params, vif)); - if (type == IWL_MVM_SCAN_SCHED) + if (type == IWL_MVM_SCAN_SCHED || type == IWL_MVM_SCAN_NETDETECT) cmd->flags = cpu_to_le32(IWL_UMAC_SCAN_FLAG_PREEMPTIVE); if (iwl_mvm_scan_use_ebs(mvm, vif)) @@ -1355,7 +1358,7 @@ int iwl_mvm_sched_scan_start(struct iwl_mvm *mvm, if (fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_UMAC_SCAN)) { hcmd.id = iwl_cmd_id(SCAN_REQ_UMAC, IWL_ALWAYS_LONG_GROUP, 0); - ret = iwl_mvm_scan_umac(mvm, vif, ¶ms, IWL_MVM_SCAN_SCHED); + ret = iwl_mvm_scan_umac(mvm, vif, ¶ms, type); } else { hcmd.id = SCAN_OFFLOAD_REQUEST_CMD; ret = iwl_mvm_scan_lmac(mvm, vif, ¶ms); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index b556e33658d7..4854e79cbda8 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -7,6 +7,7 @@ * * Copyright(c) 2012 - 2015 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -33,6 +34,7 @@ * * Copyright(c) 2012 - 2015 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -68,6 +70,18 @@ #include "sta.h" #include "rs.h" +/* + * New version of ADD_STA_sta command added new fields at the end of the + * structure, so sending the size of the relevant API's structure is enough to + * support both API versions. + */ +static inline int iwl_mvm_add_sta_cmd_size(struct iwl_mvm *mvm) +{ + return iwl_mvm_has_new_rx_api(mvm) ? + sizeof(struct iwl_mvm_add_sta_cmd) : + sizeof(struct iwl_mvm_add_sta_cmd_v7); +} + static int iwl_mvm_find_free_sta_id(struct iwl_mvm *mvm, enum nl80211_iftype iftype) { @@ -187,12 +201,13 @@ int iwl_mvm_sta_send_to_fw(struct iwl_mvm *mvm, struct ieee80211_sta *sta, cpu_to_le32(mpdu_dens << STA_FLG_AGG_MPDU_DENS_SHIFT); status = ADD_STA_SUCCESS; - ret = iwl_mvm_send_cmd_pdu_status(mvm, ADD_STA, sizeof(add_sta_cmd), + ret = iwl_mvm_send_cmd_pdu_status(mvm, ADD_STA, + iwl_mvm_add_sta_cmd_size(mvm), &add_sta_cmd, &status); if (ret) return ret; - switch (status) { + switch (status & IWL_ADD_STA_STATUS_MASK) { case ADD_STA_SUCCESS: IWL_DEBUG_ASSOC(mvm, "ADD_STA PASSED\n"); break; @@ -357,12 +372,13 @@ int iwl_mvm_drain_sta(struct iwl_mvm *mvm, struct iwl_mvm_sta *mvmsta, cmd.station_flags_msk = cpu_to_le32(STA_FLG_DRAIN_FLOW); status = ADD_STA_SUCCESS; - ret = iwl_mvm_send_cmd_pdu_status(mvm, ADD_STA, sizeof(cmd), + ret = iwl_mvm_send_cmd_pdu_status(mvm, ADD_STA, + iwl_mvm_add_sta_cmd_size(mvm), &cmd, &status); if (ret) return ret; - switch (status) { + switch (status & IWL_ADD_STA_STATUS_MASK) { case ADD_STA_SUCCESS: IWL_DEBUG_INFO(mvm, "Frames for staid %d will drained in fw\n", mvmsta->sta_id); @@ -623,12 +639,13 @@ static int iwl_mvm_add_int_sta_common(struct iwl_mvm *mvm, if (addr) memcpy(cmd.addr, addr, ETH_ALEN); - ret = iwl_mvm_send_cmd_pdu_status(mvm, ADD_STA, sizeof(cmd), + ret = iwl_mvm_send_cmd_pdu_status(mvm, ADD_STA, + iwl_mvm_add_sta_cmd_size(mvm), &cmd, &status); if (ret) return ret; - switch (status) { + switch (status & IWL_ADD_STA_STATUS_MASK) { case ADD_STA_SUCCESS: IWL_DEBUG_INFO(mvm, "Internal station added.\n"); return 0; @@ -819,7 +836,7 @@ int iwl_mvm_rm_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) #define IWL_MAX_RX_BA_SESSIONS 16 int iwl_mvm_sta_rx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta, - int tid, u16 ssn, bool start) + int tid, u16 ssn, bool start, u8 buf_size) { struct iwl_mvm_sta *mvm_sta = iwl_mvm_sta_from_mac80211(sta); struct iwl_mvm_add_sta_cmd cmd = {}; @@ -839,6 +856,7 @@ int iwl_mvm_sta_rx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta, if (start) { cmd.add_immediate_ba_tid = (u8) tid; cmd.add_immediate_ba_ssn = cpu_to_le16(ssn); + cmd.rx_ba_window = cpu_to_le16((u16)buf_size); } else { cmd.remove_immediate_ba_tid = (u8) tid; } @@ -846,12 +864,13 @@ int iwl_mvm_sta_rx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta, STA_MODIFY_REMOVE_BA_TID; status = ADD_STA_SUCCESS; - ret = iwl_mvm_send_cmd_pdu_status(mvm, ADD_STA, sizeof(cmd), + ret = iwl_mvm_send_cmd_pdu_status(mvm, ADD_STA, + iwl_mvm_add_sta_cmd_size(mvm), &cmd, &status); if (ret) return ret; - switch (status) { + switch (status & IWL_ADD_STA_STATUS_MASK) { case ADD_STA_SUCCESS: IWL_DEBUG_INFO(mvm, "RX BA Session %sed in fw\n", start ? "start" : "stopp"); @@ -904,12 +923,13 @@ static int iwl_mvm_sta_tx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta, cmd.tid_disable_tx = cpu_to_le16(mvm_sta->tid_disable_agg); status = ADD_STA_SUCCESS; - ret = iwl_mvm_send_cmd_pdu_status(mvm, ADD_STA, sizeof(cmd), + ret = iwl_mvm_send_cmd_pdu_status(mvm, ADD_STA, + iwl_mvm_add_sta_cmd_size(mvm), &cmd, &status); if (ret) return ret; - switch (status) { + switch (status & IWL_ADD_STA_STATUS_MASK) { case ADD_STA_SUCCESS: break; default: @@ -1640,7 +1660,8 @@ void iwl_mvm_sta_modify_ps_wake(struct iwl_mvm *mvm, }; int ret; - ret = iwl_mvm_send_cmd_pdu(mvm, ADD_STA, CMD_ASYNC, sizeof(cmd), &cmd); + ret = iwl_mvm_send_cmd_pdu(mvm, ADD_STA, CMD_ASYNC, + iwl_mvm_add_sta_cmd_size(mvm), &cmd); if (ret) IWL_ERR(mvm, "Failed to send ADD_STA command (%d)\n", ret); } @@ -1731,7 +1752,7 @@ void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm, ret = iwl_mvm_send_cmd_pdu(mvm, ADD_STA, CMD_ASYNC | CMD_WANT_ASYNC_CALLBACK, - sizeof(cmd), &cmd); + iwl_mvm_add_sta_cmd_size(mvm), &cmd); if (ret) IWL_ERR(mvm, "Failed to send ADD_STA command (%d)\n", ret); } @@ -1766,7 +1787,8 @@ void iwl_mvm_sta_modify_disable_tx(struct iwl_mvm *mvm, }; int ret; - ret = iwl_mvm_send_cmd_pdu(mvm, ADD_STA, CMD_ASYNC, sizeof(cmd), &cmd); + ret = iwl_mvm_send_cmd_pdu(mvm, ADD_STA, CMD_ASYNC, + iwl_mvm_add_sta_cmd_size(mvm), &cmd); if (ret) IWL_ERR(mvm, "Failed to send ADD_STA command (%d)\n", ret); } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h index 39fdf5224e81..e3b9446ee995 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h @@ -401,7 +401,7 @@ void iwl_mvm_rx_eosp_notif(struct iwl_mvm *mvm, /* AMPDU */ int iwl_mvm_sta_rx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta, - int tid, u16 ssn, bool start); + int tid, u16 ssn, bool start, u8 buf_size); int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct ieee80211_sta *sta, u16 tid, u16 *ssn); int iwl_mvm_sta_tx_agg_oper(struct iwl_mvm *mvm, struct ieee80211_vif *vif, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c index fb76004eede4..758d05a8c6aa 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c @@ -194,12 +194,12 @@ static int iwl_mvm_get_temp_cmd(struct iwl_mvm *mvm) return iwl_mvm_send_cmd_pdu(mvm, cmdid, 0, sizeof(extcmd), &extcmd); } -int iwl_mvm_get_temp(struct iwl_mvm *mvm) +int iwl_mvm_get_temp(struct iwl_mvm *mvm, s32 *temp) { struct iwl_notification_wait wait_temp_notif; static u16 temp_notif[] = { WIDE_ID(PHY_OPS_GROUP, DTS_MEASUREMENT_NOTIF_WIDE) }; - int ret, temp; + int ret; if (!fw_has_api(&mvm->fw->ucode_capa, IWL_UCODE_TLV_API_WIDE_CMD_HDR)) temp_notif[0] = DTS_MEASUREMENT_NOTIFICATION; @@ -208,7 +208,7 @@ int iwl_mvm_get_temp(struct iwl_mvm *mvm) iwl_init_notification_wait(&mvm->notif_wait, &wait_temp_notif, temp_notif, ARRAY_SIZE(temp_notif), - iwl_mvm_temp_notif_wait, &temp); + iwl_mvm_temp_notif_wait, temp); ret = iwl_mvm_get_temp_cmd(mvm); if (ret) { @@ -219,12 +219,10 @@ int iwl_mvm_get_temp(struct iwl_mvm *mvm) ret = iwl_wait_notification(&mvm->notif_wait, &wait_temp_notif, IWL_MVM_TEMP_NOTIF_WAIT_TIMEOUT); - if (ret) { + if (ret) IWL_ERR(mvm, "Getting the temperature timed out\n"); - return ret; - } - return temp; + return ret; } static void check_exit_ctkill(struct work_struct *work) @@ -233,6 +231,7 @@ static void check_exit_ctkill(struct work_struct *work) struct iwl_mvm *mvm; u32 duration; s32 temp; + int ret; tt = container_of(work, struct iwl_mvm_tt_mgmt, ct_kill_exit.work); mvm = container_of(tt, struct iwl_mvm, thermal_throttle); @@ -250,13 +249,13 @@ static void check_exit_ctkill(struct work_struct *work) goto reschedule; } - temp = iwl_mvm_get_temp(mvm); + ret = iwl_mvm_get_temp(mvm, &temp); iwl_mvm_unref(mvm, IWL_MVM_REF_CHECK_CTKILL); __iwl_mvm_mac_stop(mvm); - if (temp < 0) + if (ret) goto reschedule; IWL_DEBUG_TEMP(mvm, "NIC temperature: %d\n", temp); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 0914ec2fd574..8a1638ec7e28 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -736,6 +736,37 @@ static void iwl_mvm_hwrate_to_tx_status(u32 rate_n_flags, iwl_mvm_hwrate_to_tx_rate(rate_n_flags, info->band, r); } +static void iwl_mvm_tx_status_check_trigger(struct iwl_mvm *mvm, + u32 status) +{ + struct iwl_fw_dbg_trigger_tlv *trig; + struct iwl_fw_dbg_trigger_tx_status *status_trig; + int i; + + if (!iwl_fw_dbg_trigger_enabled(mvm->fw, FW_DBG_TRIGGER_TX_STATUS)) + return; + + trig = iwl_fw_dbg_get_trigger(mvm->fw, FW_DBG_TRIGGER_TX_STATUS); + status_trig = (void *)trig->data; + + if (!iwl_fw_dbg_trigger_check_stop(mvm, NULL, trig)) + return; + + for (i = 0; i < ARRAY_SIZE(status_trig->statuses); i++) { + /* don't collect on status 0 */ + if (!status_trig->statuses[i].status) + break; + + if (status_trig->statuses[i].status != (status & TX_STATUS_MSK)) + continue; + + iwl_mvm_fw_dbg_collect_trig(mvm, trig, + "Tx status %d was received", + status & TX_STATUS_MSK); + break; + } +} + static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm, struct iwl_rx_packet *pkt) { @@ -784,6 +815,8 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm, break; } + iwl_mvm_tx_status_check_trigger(mvm, status); + info->status.rates[0].count = tx_resp->failure_frame + 1; iwl_mvm_hwrate_to_tx_status(le32_to_cpu(tx_resp->initial_rate), info); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index 3a989f5c20db..59453c176580 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -937,18 +937,16 @@ bool iwl_mvm_rx_diversity_allowed(struct iwl_mvm *mvm) } int iwl_mvm_update_low_latency(struct iwl_mvm *mvm, struct ieee80211_vif *vif, - bool value) + bool prev) { struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); int res; lockdep_assert_held(&mvm->mutex); - if (mvmvif->low_latency == value) + if (iwl_mvm_vif_low_latency(mvmvif) == prev) return 0; - mvmvif->low_latency = value; - res = iwl_mvm_update_quotas(mvm, false, NULL); if (res) return res; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 00335ea6b3eb..753ec6785912 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -7,6 +7,7 @@ * * Copyright(c) 2007 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -66,6 +67,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> +#include <linux/pm_runtime.h> #include <linux/pci.h> #include <linux/pci-aspm.h> #include <linux/acpi.h> @@ -627,6 +629,15 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto out_free_drv; + /* if RTPM is in use, enable it in our device */ + if (iwl_trans->runtime_pm_mode != IWL_PLAT_PM_MODE_DISABLED) { + pm_runtime_set_active(&pdev->dev); + pm_runtime_set_autosuspend_delay(&pdev->dev, + iwlwifi_mod_params.d0i3_entry_delay); + pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_allow(&pdev->dev); + } + return 0; out_free_drv: @@ -693,15 +704,132 @@ static int iwl_pci_resume(struct device *device) return 0; } -static SIMPLE_DEV_PM_OPS(iwl_dev_pm_ops, iwl_pci_suspend, iwl_pci_resume); +int iwl_pci_fw_enter_d0i3(struct iwl_trans *trans) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + int ret; + + if (test_bit(STATUS_FW_ERROR, &trans->status)) + return 0; + + set_bit(STATUS_TRANS_GOING_IDLE, &trans->status); + + /* config the fw */ + ret = iwl_op_mode_enter_d0i3(trans->op_mode); + if (ret == 1) { + IWL_DEBUG_RPM(trans, "aborting d0i3 entrance\n"); + clear_bit(STATUS_TRANS_GOING_IDLE, &trans->status); + return -EBUSY; + } + if (ret) + goto err; + + ret = wait_event_timeout(trans_pcie->d0i3_waitq, + test_bit(STATUS_TRANS_IDLE, &trans->status), + msecs_to_jiffies(IWL_TRANS_IDLE_TIMEOUT)); + if (!ret) { + IWL_ERR(trans, "Timeout entering D0i3\n"); + ret = -ETIMEDOUT; + goto err; + } + + clear_bit(STATUS_TRANS_GOING_IDLE, &trans->status); + + return 0; +err: + clear_bit(STATUS_TRANS_GOING_IDLE, &trans->status); + iwl_trans_fw_error(trans); + return ret; +} + +int iwl_pci_fw_exit_d0i3(struct iwl_trans *trans) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + int ret; + + /* sometimes a D0i3 entry is not followed through */ + if (!test_bit(STATUS_TRANS_IDLE, &trans->status)) + return 0; + + /* config the fw */ + ret = iwl_op_mode_exit_d0i3(trans->op_mode); + if (ret) + goto err; + + /* we clear STATUS_TRANS_IDLE only when D0I3_END command is completed */ + + ret = wait_event_timeout(trans_pcie->d0i3_waitq, + !test_bit(STATUS_TRANS_IDLE, &trans->status), + msecs_to_jiffies(IWL_TRANS_IDLE_TIMEOUT)); + if (!ret) { + IWL_ERR(trans, "Timeout exiting D0i3\n"); + ret = -ETIMEDOUT; + goto err; + } + + return 0; +err: + clear_bit(STATUS_TRANS_IDLE, &trans->status); + iwl_trans_fw_error(trans); + return ret; +} + +#ifdef CONFIG_IWLWIFI_PCIE_RTPM +static int iwl_pci_runtime_suspend(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct iwl_trans *trans = pci_get_drvdata(pdev); + int ret; + + IWL_DEBUG_RPM(trans, "entering runtime suspend\n"); + + if (test_bit(STATUS_DEVICE_ENABLED, &trans->status)) { + ret = iwl_pci_fw_enter_d0i3(trans); + if (ret < 0) + return ret; + } + + trans->system_pm_mode = IWL_PLAT_PM_MODE_D0I3; + + iwl_trans_d3_suspend(trans, false, false); + + return 0; +} + +static int iwl_pci_runtime_resume(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct iwl_trans *trans = pci_get_drvdata(pdev); + enum iwl_d3_status d3_status; + + IWL_DEBUG_RPM(trans, "exiting runtime suspend (resume)\n"); + + iwl_trans_d3_resume(trans, &d3_status, false, false); + + if (test_bit(STATUS_DEVICE_ENABLED, &trans->status)) + return iwl_pci_fw_exit_d0i3(trans); + + return 0; +} +#endif /* CONFIG_IWLWIFI_PCIE_RTPM */ + +static const struct dev_pm_ops iwl_dev_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(iwl_pci_suspend, + iwl_pci_resume) +#ifdef CONFIG_IWLWIFI_PCIE_RTPM + SET_RUNTIME_PM_OPS(iwl_pci_runtime_suspend, + iwl_pci_runtime_resume, + NULL) +#endif /* CONFIG_IWLWIFI_PCIE_RTPM */ +}; #define IWL_PM_OPS (&iwl_dev_pm_ops) -#else +#else /* CONFIG_PM_SLEEP */ #define IWL_PM_OPS NULL -#endif +#endif /* CONFIG_PM_SLEEP */ static struct pci_driver iwl_pci_driver = { .name = DRV_NAME, diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index 73c95594eabe..542bbc5e2b24 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -2,6 +2,7 @@ * * Copyright(c) 2003 - 2015 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * * Portions of this file are derived from the ipw3945 project, as well * as portions of the ieee80211 subsystem header files. @@ -56,17 +57,23 @@ #define RX_NUM_QUEUES 1 #define RX_POST_REQ_ALLOC 2 #define RX_CLAIM_REQ_ALLOC 8 -#define RX_POOL_SIZE ((RX_CLAIM_REQ_ALLOC - RX_POST_REQ_ALLOC) * RX_NUM_QUEUES) -#define RX_LOW_WATERMARK 8 +#define RX_PENDING_WATERMARK 16 struct iwl_host_cmd; /*This file includes the declaration that are internal to the * trans_pcie layer */ +/** + * struct iwl_rx_mem_buffer + * @page_dma: bus address of rxb page + * @page: driver's pointer to the rxb page + * @vid: index of this rxb in the global table + */ struct iwl_rx_mem_buffer { dma_addr_t page_dma; struct page *page; + u16 vid; struct list_head list; }; @@ -90,8 +97,12 @@ struct isr_statistics { /** * struct iwl_rxq - Rx queue - * @bd: driver's pointer to buffer of receive buffer descriptors (rbd) + * @id: queue index + * @bd: driver's pointer to buffer of receive buffer descriptors (rbd). + * Address size is 32 bit in pre-9000 devices and 64 bit in 9000 devices. * @bd_dma: bus address of buffer of receive buffer descriptors (rbd) + * @ubd: driver's pointer to buffer of used receive buffer descriptors (rbd) + * @ubd_dma: physical address of buffer of used receive buffer descriptors (rbd) * @read: Shared index to newest available Rx buffer * @write: Shared index to oldest written Rx packet * @free_count: Number of pre-allocated buffers in rx_free @@ -103,32 +114,34 @@ struct isr_statistics { * @rb_stts: driver's pointer to receive buffer status * @rb_stts_dma: bus address of receive buffer status * @lock: - * @pool: initial pool of iwl_rx_mem_buffer for the queue - * @queue: actual rx queue + * @queue: actual rx queue. Not used for multi-rx queue. * * NOTE: rx_free and rx_used are used as a FIFO for iwl_rx_mem_buffers */ struct iwl_rxq { - __le32 *bd; + int id; + void *bd; dma_addr_t bd_dma; + __le32 *used_bd; + dma_addr_t used_bd_dma; u32 read; u32 write; u32 free_count; u32 used_count; u32 write_actual; + u32 queue_size; struct list_head rx_free; struct list_head rx_used; bool need_update; struct iwl_rb_status *rb_stts; dma_addr_t rb_stts_dma; spinlock_t lock; - struct iwl_rx_mem_buffer pool[RX_QUEUE_SIZE]; + struct napi_struct napi; struct iwl_rx_mem_buffer *queue[RX_QUEUE_SIZE]; }; /** * struct iwl_rb_allocator - Rx allocator - * @pool: initial pool of allocator * @req_pending: number of requests the allcator had not processed yet * @req_ready: number of requests honored and ready for claiming * @rbd_allocated: RBDs with pages allocated and ready to be handled to @@ -140,7 +153,6 @@ struct iwl_rxq { * @rx_alloc: work struct for background calls */ struct iwl_rb_allocator { - struct iwl_rx_mem_buffer pool[RX_POOL_SIZE]; atomic_t req_pending; atomic_t req_ready; struct list_head rbd_allocated; @@ -280,6 +292,7 @@ struct iwl_txq { bool ampdu; bool block; unsigned long wd_timeout; + struct sk_buff_head overflow_q; }; static inline dma_addr_t @@ -297,6 +310,8 @@ struct iwl_tso_hdr_page { /** * struct iwl_trans_pcie - PCIe transport specific data * @rxq: all the RX queue data + * @rx_pool: initial pool of iwl_rx_mem_buffer for all the queues + * @global_table: table mapping received VID from hw to rxb * @rba: allocator for RX replenishing * @drv - pointer to iwl_drv * @trans: pointer to the generic transport area @@ -323,13 +338,14 @@ struct iwl_tso_hdr_page { * @fw_mon_size: size of the buffer for the firmware monitor */ struct iwl_trans_pcie { - struct iwl_rxq rxq; + struct iwl_rxq *rxq; + struct iwl_rx_mem_buffer rx_pool[MQ_RX_POOL_SIZE]; + struct iwl_rx_mem_buffer *global_table[MQ_RX_TABLE_SIZE]; struct iwl_rb_allocator rba; struct iwl_trans *trans; struct iwl_drv *drv; struct net_device napi_dev; - struct napi_struct napi; struct __percpu iwl_tso_hdr_page *tso_hdr_page; @@ -359,6 +375,7 @@ struct iwl_trans_pcie { bool ucode_write_complete; wait_queue_head_t ucode_write_waitq; wait_queue_head_t wait_command_queue; + wait_queue_head_t d0i3_waitq; u8 cmd_queue; u8 cmd_fifo; @@ -588,4 +605,7 @@ static inline int iwl_trans_pcie_dbgfs_register(struct iwl_trans *trans) } #endif +int iwl_pci_fw_exit_d0i3(struct iwl_trans *trans); +int iwl_pci_fw_enter_d0i3(struct iwl_trans *trans); + #endif /* __iwl_trans_int_pcie_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index 152cf9ad9566..07973ef826c1 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -2,6 +2,7 @@ * * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * * Portions of this file are derived from the ipw3945 project, as well * as portions of the ieee80211 subsystem header files. @@ -140,8 +141,8 @@ */ static int iwl_rxq_space(const struct iwl_rxq *rxq) { - /* Make sure RX_QUEUE_SIZE is a power of 2 */ - BUILD_BUG_ON(RX_QUEUE_SIZE & (RX_QUEUE_SIZE - 1)); + /* Make sure rx queue size is a power of 2 */ + WARN_ON(rxq->queue_size & (rxq->queue_size - 1)); /* * There can be up to (RX_QUEUE_SIZE - 1) free slots, to avoid ambiguity @@ -149,7 +150,7 @@ static int iwl_rxq_space(const struct iwl_rxq *rxq) * The following is equivalent to modulo by RX_QUEUE_SIZE and is well * defined for negative dividends. */ - return (rxq->read - rxq->write - 1) & (RX_QUEUE_SIZE - 1); + return (rxq->read - rxq->write - 1) & (rxq->queue_size - 1); } /* @@ -160,6 +161,12 @@ static inline __le32 iwl_pcie_dma_addr2rbd_ptr(dma_addr_t dma_addr) return cpu_to_le32((u32)(dma_addr >> 8)); } +static void iwl_pcie_write_prph_64(struct iwl_trans *trans, u64 ofs, u64 val) +{ + iwl_write_prph(trans, ofs, val & 0xffffffff); + iwl_write_prph(trans, ofs + 4, val >> 32); +} + /* * iwl_pcie_rx_stop - stops the Rx DMA */ @@ -173,10 +180,9 @@ int iwl_pcie_rx_stop(struct iwl_trans *trans) /* * iwl_pcie_rxq_inc_wr_ptr - Update the write pointer for the RX queue */ -static void iwl_pcie_rxq_inc_wr_ptr(struct iwl_trans *trans) +static void iwl_pcie_rxq_inc_wr_ptr(struct iwl_trans *trans, + struct iwl_rxq *rxq) { - struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - struct iwl_rxq *rxq = &trans_pcie->rxq; u32 reg; lockdep_assert_held(&rxq->lock); @@ -201,24 +207,73 @@ static void iwl_pcie_rxq_inc_wr_ptr(struct iwl_trans *trans) } rxq->write_actual = round_down(rxq->write, 8); - iwl_write32(trans, FH_RSCSR_CHNL0_WPTR, rxq->write_actual); + if (trans->cfg->mq_rx_supported) + iwl_write_prph(trans, RFH_Q_FRBDCB_WIDX(rxq->id), + rxq->write_actual); + else + iwl_write32(trans, FH_RSCSR_CHNL0_WPTR, rxq->write_actual); } static void iwl_pcie_rxq_check_wrptr(struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - struct iwl_rxq *rxq = &trans_pcie->rxq; + int i; - spin_lock(&rxq->lock); + for (i = 0; i < trans->num_rx_queues; i++) { + struct iwl_rxq *rxq = &trans_pcie->rxq[i]; + + if (!rxq->need_update) + continue; + spin_lock(&rxq->lock); + iwl_pcie_rxq_inc_wr_ptr(trans, rxq); + rxq->need_update = false; + spin_unlock(&rxq->lock); + } +} + +static void iwl_pcie_rxq_mq_restock(struct iwl_trans *trans, + struct iwl_rxq *rxq) +{ + struct iwl_rx_mem_buffer *rxb; + + /* + * If the device isn't enabled - no need to try to add buffers... + * This can happen when we stop the device and still have an interrupt + * pending. We stop the APM before we sync the interrupts because we + * have to (see comment there). On the other hand, since the APM is + * stopped, we cannot access the HW (in particular not prph). + * So don't try to restock if the APM has been already stopped. + */ + if (!test_bit(STATUS_DEVICE_ENABLED, &trans->status)) + return; - if (!rxq->need_update) - goto exit_unlock; + spin_lock(&rxq->lock); + while (rxq->free_count) { + __le64 *bd = (__le64 *)rxq->bd; - iwl_pcie_rxq_inc_wr_ptr(trans); - rxq->need_update = false; + /* Get next free Rx buffer, remove from free list */ + rxb = list_first_entry(&rxq->rx_free, struct iwl_rx_mem_buffer, + list); + list_del(&rxb->list); - exit_unlock: + /* 12 first bits are expected to be empty */ + WARN_ON(rxb->page_dma & DMA_BIT_MASK(12)); + /* Point to Rx buffer via next RBD in circular buffer */ + bd[rxq->write] = cpu_to_le64(rxb->page_dma | rxb->vid); + rxq->write = (rxq->write + 1) & MQ_RX_TABLE_MASK; + rxq->free_count--; + } spin_unlock(&rxq->lock); + + /* + * If we've added more space for the firmware to place data, tell it. + * Increment device's write pointer in multiples of 8. + */ + if (rxq->write_actual != (rxq->write & ~0x7)) { + spin_lock(&rxq->lock); + iwl_pcie_rxq_inc_wr_ptr(trans, rxq); + spin_unlock(&rxq->lock); + } } /* @@ -232,10 +287,8 @@ static void iwl_pcie_rxq_check_wrptr(struct iwl_trans *trans) * also updates the memory address in the firmware to reference the new * target buffer. */ -static void iwl_pcie_rxq_restock(struct iwl_trans *trans) +static void iwl_pcie_rxq_restock(struct iwl_trans *trans, struct iwl_rxq *rxq) { - struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - struct iwl_rxq *rxq = &trans_pcie->rxq; struct iwl_rx_mem_buffer *rxb; /* @@ -251,6 +304,7 @@ static void iwl_pcie_rxq_restock(struct iwl_trans *trans) spin_lock(&rxq->lock); while ((iwl_rxq_space(rxq) > 0) && (rxq->free_count)) { + __le32 *bd = (__le32 *)rxq->bd; /* The overwritten rxb must be a used one */ rxb = rxq->queue[rxq->write]; BUG_ON(rxb && rxb->page); @@ -261,7 +315,7 @@ static void iwl_pcie_rxq_restock(struct iwl_trans *trans) list_del(&rxb->list); /* Point to Rx buffer via next RBD in circular buffer */ - rxq->bd[rxq->write] = iwl_pcie_dma_addr2rbd_ptr(rxb->page_dma); + bd[rxq->write] = iwl_pcie_dma_addr2rbd_ptr(rxb->page_dma); rxq->queue[rxq->write] = rxb; rxq->write = (rxq->write + 1) & RX_QUEUE_MASK; rxq->free_count--; @@ -272,7 +326,7 @@ static void iwl_pcie_rxq_restock(struct iwl_trans *trans) * Increment device's write pointer in multiples of 8. */ if (rxq->write_actual != (rxq->write & ~0x7)) { spin_lock(&rxq->lock); - iwl_pcie_rxq_inc_wr_ptr(trans); + iwl_pcie_rxq_inc_wr_ptr(trans, rxq); spin_unlock(&rxq->lock); } } @@ -285,13 +339,9 @@ static struct page *iwl_pcie_rx_alloc_page(struct iwl_trans *trans, gfp_t priority) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - struct iwl_rxq *rxq = &trans_pcie->rxq; struct page *page; gfp_t gfp_mask = priority; - if (rxq->free_count > RX_LOW_WATERMARK) - gfp_mask |= __GFP_NOWARN; - if (trans_pcie->rx_page_order > 0) gfp_mask |= __GFP_COMP; @@ -301,16 +351,13 @@ static struct page *iwl_pcie_rx_alloc_page(struct iwl_trans *trans, if (net_ratelimit()) IWL_DEBUG_INFO(trans, "alloc_pages failed, order: %d\n", trans_pcie->rx_page_order); - /* Issue an error if the hardware has consumed more than half - * of its free buffer list and we don't have enough - * pre-allocated buffers. + /* + * Issue an error if we don't have enough pre-allocated + * buffers. ` */ - if (rxq->free_count <= RX_LOW_WATERMARK && - iwl_rxq_space(rxq) > (RX_QUEUE_SIZE / 2) && - net_ratelimit()) + if (!(gfp_mask & __GFP_NOWARN) && net_ratelimit()) IWL_CRIT(trans, - "Failed to alloc_pages with GFP_KERNEL. Only %u free buffers remaining.\n", - rxq->free_count); + "Failed to alloc_pages\n"); return NULL; } return page; @@ -325,10 +372,10 @@ static struct page *iwl_pcie_rx_alloc_page(struct iwl_trans *trans, * iwl_pcie_rxq_restock. The latter function will update the HW to use the newly * allocated buffers. */ -static void iwl_pcie_rxq_alloc_rbs(struct iwl_trans *trans, gfp_t priority) +static void iwl_pcie_rxq_alloc_rbs(struct iwl_trans *trans, gfp_t priority, + struct iwl_rxq *rxq) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - struct iwl_rxq *rxq = &trans_pcie->rxq; struct iwl_rx_mem_buffer *rxb; struct page *page; @@ -372,10 +419,6 @@ static void iwl_pcie_rxq_alloc_rbs(struct iwl_trans *trans, gfp_t priority) __free_pages(page, trans_pcie->rx_page_order); return; } - /* dma address must be no more than 36 bits */ - BUG_ON(rxb->page_dma & ~DMA_BIT_MASK(36)); - /* and also 256 byte aligned! */ - BUG_ON(rxb->page_dma & DMA_BIT_MASK(8)); spin_lock(&rxq->lock); @@ -386,41 +429,24 @@ static void iwl_pcie_rxq_alloc_rbs(struct iwl_trans *trans, gfp_t priority) } } -static void iwl_pcie_rxq_free_rbs(struct iwl_trans *trans) +static void iwl_pcie_free_rbs_pool(struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - struct iwl_rxq *rxq = &trans_pcie->rxq; int i; - lockdep_assert_held(&rxq->lock); - - for (i = 0; i < RX_QUEUE_SIZE; i++) { - if (!rxq->pool[i].page) + for (i = 0; i < MQ_RX_POOL_SIZE; i++) { + if (!trans_pcie->rx_pool[i].page) continue; - dma_unmap_page(trans->dev, rxq->pool[i].page_dma, + dma_unmap_page(trans->dev, trans_pcie->rx_pool[i].page_dma, PAGE_SIZE << trans_pcie->rx_page_order, DMA_FROM_DEVICE); - __free_pages(rxq->pool[i].page, trans_pcie->rx_page_order); - rxq->pool[i].page = NULL; + __free_pages(trans_pcie->rx_pool[i].page, + trans_pcie->rx_page_order); + trans_pcie->rx_pool[i].page = NULL; } } /* - * iwl_pcie_rx_replenish - Move all used buffers from rx_used to rx_free - * - * When moving to rx_free an page is allocated for the slot. - * - * Also restock the Rx queue via iwl_pcie_rxq_restock. - * This is called only during initialization - */ -static void iwl_pcie_rx_replenish(struct iwl_trans *trans) -{ - iwl_pcie_rxq_alloc_rbs(trans, GFP_KERNEL); - - iwl_pcie_rxq_restock(trans); -} - -/* * iwl_pcie_rx_allocator - Allocates pages in the background for RX queues * * Allocates for each received request 8 pages @@ -444,6 +470,11 @@ static void iwl_pcie_rx_allocator(struct iwl_trans *trans) while (pending) { int i; struct list_head local_allocated; + gfp_t gfp_mask = GFP_KERNEL; + + /* Do not post a warning if there are only a few requests */ + if (pending < RX_PENDING_WATERMARK) + gfp_mask |= __GFP_NOWARN; INIT_LIST_HEAD(&local_allocated); @@ -463,7 +494,7 @@ static void iwl_pcie_rx_allocator(struct iwl_trans *trans) BUG_ON(rxb->page); /* Alloc a new receive buffer */ - page = iwl_pcie_rx_alloc_page(trans, GFP_KERNEL); + page = iwl_pcie_rx_alloc_page(trans, gfp_mask); if (!page) continue; rxb->page = page; @@ -477,10 +508,6 @@ static void iwl_pcie_rx_allocator(struct iwl_trans *trans) __free_pages(page, trans_pcie->rx_page_order); continue; } - /* dma address must be no more than 36 bits */ - BUG_ON(rxb->page_dma & ~DMA_BIT_MASK(36)); - /* and also 256 byte aligned! */ - BUG_ON(rxb->page_dma & DMA_BIT_MASK(8)); /* move the allocated entry to the out list */ list_move(&rxb->list, &local_allocated); @@ -561,38 +588,83 @@ static void iwl_pcie_rx_allocator_work(struct work_struct *data) static int iwl_pcie_rx_alloc(struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - struct iwl_rxq *rxq = &trans_pcie->rxq; struct iwl_rb_allocator *rba = &trans_pcie->rba; struct device *dev = trans->dev; + int i; + int free_size = trans->cfg->mq_rx_supported ? sizeof(__le64) : + sizeof(__le32); + + if (WARN_ON(trans_pcie->rxq)) + return -EINVAL; - memset(&trans_pcie->rxq, 0, sizeof(trans_pcie->rxq)); + trans_pcie->rxq = kcalloc(trans->num_rx_queues, sizeof(struct iwl_rxq), + GFP_KERNEL); + if (!trans_pcie->rxq) + return -EINVAL; - spin_lock_init(&rxq->lock); spin_lock_init(&rba->lock); - if (WARN_ON(rxq->bd || rxq->rb_stts)) - return -EINVAL; + for (i = 0; i < trans->num_rx_queues; i++) { + struct iwl_rxq *rxq = &trans_pcie->rxq[i]; - /* Allocate the circular buffer of Read Buffer Descriptors (RBDs) */ - rxq->bd = dma_zalloc_coherent(dev, sizeof(__le32) * RX_QUEUE_SIZE, - &rxq->bd_dma, GFP_KERNEL); - if (!rxq->bd) - goto err_bd; + spin_lock_init(&rxq->lock); + if (trans->cfg->mq_rx_supported) + rxq->queue_size = MQ_RX_TABLE_SIZE; + else + rxq->queue_size = RX_QUEUE_SIZE; - /*Allocate the driver's pointer to receive buffer status */ - rxq->rb_stts = dma_zalloc_coherent(dev, sizeof(*rxq->rb_stts), - &rxq->rb_stts_dma, GFP_KERNEL); - if (!rxq->rb_stts) - goto err_rb_stts; + /* + * Allocate the circular buffer of Read Buffer Descriptors + * (RBDs) + */ + rxq->bd = dma_zalloc_coherent(dev, + free_size * rxq->queue_size, + &rxq->bd_dma, GFP_KERNEL); + if (!rxq->bd) + goto err; + + if (trans->cfg->mq_rx_supported) { + rxq->used_bd = dma_zalloc_coherent(dev, + sizeof(__le32) * + rxq->queue_size, + &rxq->used_bd_dma, + GFP_KERNEL); + if (!rxq->used_bd) + goto err; + } + /*Allocate the driver's pointer to receive buffer status */ + rxq->rb_stts = dma_zalloc_coherent(dev, sizeof(*rxq->rb_stts), + &rxq->rb_stts_dma, + GFP_KERNEL); + if (!rxq->rb_stts) + goto err; + } return 0; -err_rb_stts: - dma_free_coherent(dev, sizeof(__le32) * RX_QUEUE_SIZE, - rxq->bd, rxq->bd_dma); - rxq->bd_dma = 0; - rxq->bd = NULL; -err_bd: +err: + for (i = 0; i < trans->num_rx_queues; i++) { + struct iwl_rxq *rxq = &trans_pcie->rxq[i]; + + if (rxq->bd) + dma_free_coherent(dev, free_size * rxq->queue_size, + rxq->bd, rxq->bd_dma); + rxq->bd_dma = 0; + rxq->bd = NULL; + + if (rxq->rb_stts) + dma_free_coherent(trans->dev, + sizeof(struct iwl_rb_status), + rxq->rb_stts, rxq->rb_stts_dma); + + if (rxq->used_bd) + dma_free_coherent(dev, sizeof(__le32) * rxq->queue_size, + rxq->used_bd, rxq->used_bd_dma); + rxq->used_bd_dma = 0; + rxq->used_bd = NULL; + } + kfree(trans_pcie->rxq); + return -ENOMEM; } @@ -659,65 +731,103 @@ static void iwl_pcie_rx_hw_init(struct iwl_trans *trans, struct iwl_rxq *rxq) iwl_set_bit(trans, CSR_INT_COALESCING, IWL_HOST_INT_OPER_MODE); } -static void iwl_pcie_rx_init_rxb_lists(struct iwl_rxq *rxq) +static void iwl_pcie_rx_mq_hw_init(struct iwl_trans *trans) { + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + u32 rb_size, enabled = 0; int i; - lockdep_assert_held(&rxq->lock); - - INIT_LIST_HEAD(&rxq->rx_free); - INIT_LIST_HEAD(&rxq->rx_used); - rxq->free_count = 0; - rxq->used_count = 0; + switch (trans_pcie->rx_buf_size) { + case IWL_AMSDU_4K: + rb_size = RFH_RXF_DMA_RB_SIZE_4K; + break; + case IWL_AMSDU_8K: + rb_size = RFH_RXF_DMA_RB_SIZE_8K; + break; + case IWL_AMSDU_12K: + rb_size = RFH_RXF_DMA_RB_SIZE_12K; + break; + default: + WARN_ON(1); + rb_size = RFH_RXF_DMA_RB_SIZE_4K; + } - for (i = 0; i < RX_QUEUE_SIZE; i++) - list_add(&rxq->pool[i].list, &rxq->rx_used); -} + /* Stop Rx DMA */ + iwl_write_prph(trans, RFH_RXF_DMA_CFG, 0); + /* disable free amd used rx queue operation */ + iwl_write_prph(trans, RFH_RXF_RXQ_ACTIVE, 0); + + for (i = 0; i < trans->num_rx_queues; i++) { + /* Tell device where to find RBD free table in DRAM */ + iwl_pcie_write_prph_64(trans, RFH_Q_FRBDCB_BA_LSB(i), + (u64)(trans_pcie->rxq[i].bd_dma)); + /* Tell device where to find RBD used table in DRAM */ + iwl_pcie_write_prph_64(trans, RFH_Q_URBDCB_BA_LSB(i), + (u64)(trans_pcie->rxq[i].used_bd_dma)); + /* Tell device where in DRAM to update its Rx status */ + iwl_pcie_write_prph_64(trans, RFH_Q_URBD_STTS_WPTR_LSB(i), + trans_pcie->rxq[i].rb_stts_dma); + /* Reset device indice tables */ + iwl_write_prph(trans, RFH_Q_FRBDCB_WIDX(i), 0); + iwl_write_prph(trans, RFH_Q_FRBDCB_RIDX(i), 0); + iwl_write_prph(trans, RFH_Q_URBDCB_WIDX(i), 0); + + enabled |= BIT(i) | BIT(i + 16); + } -static void iwl_pcie_rx_init_rba(struct iwl_rb_allocator *rba) -{ - int i; + /* restock default queue */ + iwl_pcie_rxq_mq_restock(trans, &trans_pcie->rxq[0]); - lockdep_assert_held(&rba->lock); + /* + * Enable Rx DMA + * Single frame mode + * Rx buffer size 4 or 8k or 12k + * Min RB size 4 or 8 + * 512 RBDs + */ + iwl_write_prph(trans, RFH_RXF_DMA_CFG, + RFH_DMA_EN_ENABLE_VAL | + rb_size | RFH_RXF_DMA_SINGLE_FRAME_MASK | + RFH_RXF_DMA_MIN_RB_4_8 | + RFH_RXF_DMA_RBDCB_SIZE_512); - INIT_LIST_HEAD(&rba->rbd_allocated); - INIT_LIST_HEAD(&rba->rbd_empty); + iwl_write_prph(trans, RFH_GEN_CFG, RFH_GEN_CFG_RFH_DMA_SNOOP | + RFH_GEN_CFG_SERVICE_DMA_SNOOP); + iwl_write_prph(trans, RFH_RXF_RXQ_ACTIVE, enabled); - for (i = 0; i < RX_POOL_SIZE; i++) - list_add(&rba->pool[i].list, &rba->rbd_empty); + /* Set interrupt coalescing timer to default (2048 usecs) */ + iwl_write8(trans, CSR_INT_COALESCING, IWL_HOST_INT_TIMEOUT_DEF); } -static void iwl_pcie_rx_free_rba(struct iwl_trans *trans) +static void iwl_pcie_rx_init_rxb_lists(struct iwl_rxq *rxq) { - struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - struct iwl_rb_allocator *rba = &trans_pcie->rba; - int i; + lockdep_assert_held(&rxq->lock); - lockdep_assert_held(&rba->lock); + INIT_LIST_HEAD(&rxq->rx_free); + INIT_LIST_HEAD(&rxq->rx_used); + rxq->free_count = 0; + rxq->used_count = 0; +} - for (i = 0; i < RX_POOL_SIZE; i++) { - if (!rba->pool[i].page) - continue; - dma_unmap_page(trans->dev, rba->pool[i].page_dma, - PAGE_SIZE << trans_pcie->rx_page_order, - DMA_FROM_DEVICE); - __free_pages(rba->pool[i].page, trans_pcie->rx_page_order); - rba->pool[i].page = NULL; - } +static int iwl_pcie_dummy_napi_poll(struct napi_struct *napi, int budget) +{ + WARN_ON(1); + return 0; } int iwl_pcie_rx_init(struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - struct iwl_rxq *rxq = &trans_pcie->rxq; + struct iwl_rxq *def_rxq; struct iwl_rb_allocator *rba = &trans_pcie->rba; - int i, err; + int i, err, num_rbds, allocator_pool_size; - if (!rxq->bd) { + if (!trans_pcie->rxq) { err = iwl_pcie_rx_alloc(trans); if (err) return err; } + def_rxq = trans_pcie->rxq; if (!rba->alloc_wq) rba->alloc_wq = alloc_workqueue("rb_allocator", WQ_HIGHPRI | WQ_UNBOUND, 1); @@ -726,34 +836,68 @@ int iwl_pcie_rx_init(struct iwl_trans *trans) spin_lock(&rba->lock); atomic_set(&rba->req_pending, 0); atomic_set(&rba->req_ready, 0); - /* free all first - we might be reconfigured for a different size */ - iwl_pcie_rx_free_rba(trans); - iwl_pcie_rx_init_rba(rba); + INIT_LIST_HEAD(&rba->rbd_allocated); + INIT_LIST_HEAD(&rba->rbd_empty); spin_unlock(&rba->lock); - spin_lock(&rxq->lock); - /* free all first - we might be reconfigured for a different size */ - iwl_pcie_rxq_free_rbs(trans); - iwl_pcie_rx_init_rxb_lists(rxq); + iwl_pcie_free_rbs_pool(trans); for (i = 0; i < RX_QUEUE_SIZE; i++) - rxq->queue[i] = NULL; + def_rxq->queue[i] = NULL; - /* Set us so that we have processed and used all buffers, but have - * not restocked the Rx queue with fresh buffers */ - rxq->read = rxq->write = 0; - rxq->write_actual = 0; - memset(rxq->rb_stts, 0, sizeof(*rxq->rb_stts)); - spin_unlock(&rxq->lock); + for (i = 0; i < trans->num_rx_queues; i++) { + struct iwl_rxq *rxq = &trans_pcie->rxq[i]; - iwl_pcie_rx_replenish(trans); + rxq->id = i; - iwl_pcie_rx_hw_init(trans, rxq); + spin_lock(&rxq->lock); + /* + * Set read write pointer to reflect that we have processed + * and used all buffers, but have not restocked the Rx queue + * with fresh buffers + */ + rxq->read = 0; + rxq->write = 0; + rxq->write_actual = 0; + memset(rxq->rb_stts, 0, sizeof(*rxq->rb_stts)); - spin_lock(&rxq->lock); - iwl_pcie_rxq_inc_wr_ptr(trans); - spin_unlock(&rxq->lock); + iwl_pcie_rx_init_rxb_lists(rxq); + + if (!rxq->napi.poll) + netif_napi_add(&trans_pcie->napi_dev, &rxq->napi, + iwl_pcie_dummy_napi_poll, 64); + + spin_unlock(&rxq->lock); + } + + /* move the pool to the default queue and allocator ownerships */ + num_rbds = trans->cfg->mq_rx_supported ? + MQ_RX_POOL_SIZE : RX_QUEUE_SIZE; + allocator_pool_size = trans->num_rx_queues * + (RX_CLAIM_REQ_ALLOC - RX_POST_REQ_ALLOC); + for (i = 0; i < num_rbds; i++) { + struct iwl_rx_mem_buffer *rxb = &trans_pcie->rx_pool[i]; + + if (i < allocator_pool_size) + list_add(&rxb->list, &rba->rbd_empty); + else + list_add(&rxb->list, &def_rxq->rx_used); + trans_pcie->global_table[i] = rxb; + rxb->vid = (u16)i; + } + + iwl_pcie_rxq_alloc_rbs(trans, GFP_KERNEL, def_rxq); + if (trans->cfg->mq_rx_supported) { + iwl_pcie_rx_mq_hw_init(trans); + } else { + iwl_pcie_rxq_restock(trans, def_rxq); + iwl_pcie_rx_hw_init(trans, def_rxq); + } + + spin_lock(&def_rxq->lock); + iwl_pcie_rxq_inc_wr_ptr(trans, def_rxq); + spin_unlock(&def_rxq->lock); return 0; } @@ -761,12 +905,16 @@ int iwl_pcie_rx_init(struct iwl_trans *trans) void iwl_pcie_rx_free(struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - struct iwl_rxq *rxq = &trans_pcie->rxq; struct iwl_rb_allocator *rba = &trans_pcie->rba; + int free_size = trans->cfg->mq_rx_supported ? sizeof(__le64) : + sizeof(__le32); + int i; - /*if rxq->bd is NULL, it means that nothing has been allocated, - * exit now */ - if (!rxq->bd) { + /* + * if rxq is NULL, it means that nothing has been allocated, + * exit now + */ + if (!trans_pcie->rxq) { IWL_DEBUG_INFO(trans, "Free NULL rx context\n"); return; } @@ -777,27 +925,37 @@ void iwl_pcie_rx_free(struct iwl_trans *trans) rba->alloc_wq = NULL; } - spin_lock(&rba->lock); - iwl_pcie_rx_free_rba(trans); - spin_unlock(&rba->lock); - - spin_lock(&rxq->lock); - iwl_pcie_rxq_free_rbs(trans); - spin_unlock(&rxq->lock); - - dma_free_coherent(trans->dev, sizeof(__le32) * RX_QUEUE_SIZE, - rxq->bd, rxq->bd_dma); - rxq->bd_dma = 0; - rxq->bd = NULL; - - if (rxq->rb_stts) - dma_free_coherent(trans->dev, - sizeof(struct iwl_rb_status), - rxq->rb_stts, rxq->rb_stts_dma); - else - IWL_DEBUG_INFO(trans, "Free rxq->rb_stts which is NULL\n"); - rxq->rb_stts_dma = 0; - rxq->rb_stts = NULL; + iwl_pcie_free_rbs_pool(trans); + + for (i = 0; i < trans->num_rx_queues; i++) { + struct iwl_rxq *rxq = &trans_pcie->rxq[i]; + + if (rxq->bd) + dma_free_coherent(trans->dev, + free_size * rxq->queue_size, + rxq->bd, rxq->bd_dma); + rxq->bd_dma = 0; + rxq->bd = NULL; + + if (rxq->rb_stts) + dma_free_coherent(trans->dev, + sizeof(struct iwl_rb_status), + rxq->rb_stts, rxq->rb_stts_dma); + else + IWL_DEBUG_INFO(trans, + "Free rxq->rb_stts which is NULL\n"); + + if (rxq->used_bd) + dma_free_coherent(trans->dev, + sizeof(__le32) * rxq->queue_size, + rxq->used_bd, rxq->used_bd_dma); + rxq->used_bd_dma = 0; + rxq->used_bd = NULL; + + if (rxq->napi.poll) + netif_napi_del(&rxq->napi); + } + kfree(trans_pcie->rxq); } /* @@ -841,11 +999,11 @@ static void iwl_pcie_rx_reuse_rbd(struct iwl_trans *trans, } static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans, + struct iwl_rxq *rxq, struct iwl_rx_mem_buffer *rxb, bool emergency) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - struct iwl_rxq *rxq = &trans_pcie->rxq; struct iwl_txq *txq = &trans_pcie->txq[trans_pcie->cmd_queue]; bool page_stolen = false; int max_len = PAGE_SIZE << trans_pcie->rx_page_order; @@ -911,7 +1069,12 @@ static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans, index = SEQ_TO_INDEX(sequence); cmd_index = get_cmd_index(&txq->q, index); - iwl_op_mode_rx(trans->op_mode, &trans_pcie->napi, &rxcb); + if (rxq->id == 0) + iwl_op_mode_rx(trans->op_mode, &rxq->napi, + &rxcb); + else + iwl_op_mode_rx_rss(trans->op_mode, &rxq->napi, + &rxcb, rxq->id); if (reclaim) { kzfree(txq->entries[cmd_index].free_buf); @@ -975,7 +1138,7 @@ static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans, static void iwl_pcie_rx_handle(struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - struct iwl_rxq *rxq = &trans_pcie->rxq; + struct iwl_rxq *rxq = &trans_pcie->rxq[0]; u32 r, i, j, count = 0; bool emergency = false; @@ -993,16 +1156,26 @@ restart: while (i != r) { struct iwl_rx_mem_buffer *rxb; - if (unlikely(rxq->used_count == RX_QUEUE_SIZE / 2)) + if (unlikely(rxq->used_count == rxq->queue_size / 2)) emergency = true; - rxb = rxq->queue[i]; - rxq->queue[i] = NULL; + if (trans->cfg->mq_rx_supported) { + /* + * used_bd is a 32 bit but only 12 are used to retrieve + * the vid + */ + u16 vid = (u16)le32_to_cpu(rxq->used_bd[i]); + + rxb = trans_pcie->global_table[vid]; + } else { + rxb = rxq->queue[i]; + rxq->queue[i] = NULL; + } IWL_DEBUG_RX(trans, "rxbuf: HW = %d, SW = %d\n", r, i); - iwl_pcie_rx_handle_rb(trans, rxb, emergency); + iwl_pcie_rx_handle_rb(trans, rxq, rxb, emergency); - i = (i + 1) & RX_QUEUE_MASK; + i = (i + 1) & (rxq->queue_size - 1); /* If we have RX_CLAIM_REQ_ALLOC released rx buffers - * try to claim the pre-allocated buffers from the allocator */ @@ -1040,10 +1213,10 @@ restart: count++; if (count == 8) { count = 0; - if (rxq->used_count < RX_QUEUE_SIZE / 3) + if (rxq->used_count < rxq->queue_size / 3) emergency = false; spin_unlock(&rxq->lock); - iwl_pcie_rxq_alloc_rbs(trans, GFP_ATOMIC); + iwl_pcie_rxq_alloc_rbs(trans, GFP_ATOMIC, rxq); spin_lock(&rxq->lock); } } @@ -1055,7 +1228,10 @@ restart: if (rxq->free_count >= RX_CLAIM_REQ_ALLOC) { rxq->read = i; spin_unlock(&rxq->lock); - iwl_pcie_rxq_restock(trans); + if (trans->cfg->mq_rx_supported) + iwl_pcie_rxq_mq_restock(trans, rxq); + else + iwl_pcie_rxq_restock(trans, rxq); goto restart; } } @@ -1077,10 +1253,10 @@ restart: * will be restocked by the next call of iwl_pcie_rxq_restock. */ if (unlikely(emergency && count)) - iwl_pcie_rxq_alloc_rbs(trans, GFP_ATOMIC); + iwl_pcie_rxq_alloc_rbs(trans, GFP_ATOMIC, rxq); - if (trans_pcie->napi.poll) - napi_gro_flush(&trans_pcie->napi, false); + if (rxq->napi.poll) + napi_gro_flush(&rxq->napi, false); } /* diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 5a854c609477..58591ca051fd 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -72,6 +72,7 @@ #include <linux/bitops.h> #include <linux/gfp.h> #include <linux/vmalloc.h> +#include <linux/pm_runtime.h> #include "iwl-drv.h" #include "iwl-trans.h" @@ -1248,11 +1249,12 @@ void iwl_trans_pcie_rf_kill(struct iwl_trans *trans, bool state) _iwl_trans_pcie_stop_device(trans, true); } -static void iwl_trans_pcie_d3_suspend(struct iwl_trans *trans, bool test) +static void iwl_trans_pcie_d3_suspend(struct iwl_trans *trans, bool test, + bool reset) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - if (trans->system_pm_mode == IWL_PLAT_PM_MODE_D0I3) { + if (!reset) { /* Enable persistence mode to avoid reset */ iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG, CSR_HW_IF_CONFIG_REG_PERSIST_MODE); @@ -1276,7 +1278,7 @@ static void iwl_trans_pcie_d3_suspend(struct iwl_trans *trans, bool test) iwl_clear_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_INIT_DONE); - if (trans->system_pm_mode == IWL_PLAT_PM_MODE_D3) { + if (reset) { /* * reset TX queues -- some of their registers reset during S3 * so if we don't reset everything here the D3 image would try @@ -1290,7 +1292,7 @@ static void iwl_trans_pcie_d3_suspend(struct iwl_trans *trans, bool test) static int iwl_trans_pcie_d3_resume(struct iwl_trans *trans, enum iwl_d3_status *status, - bool test) + bool test, bool reset) { u32 val; int ret; @@ -1325,7 +1327,7 @@ static int iwl_trans_pcie_d3_resume(struct iwl_trans *trans, iwl_pcie_set_pwr(trans, false); - if (trans->system_pm_mode == IWL_PLAT_PM_MODE_D0I3) { + if (!reset) { iwl_clear_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); } else { @@ -1383,6 +1385,10 @@ static int _iwl_trans_pcie_start_hw(struct iwl_trans *trans, bool low_power) /* ... rfkill can call stop_device and set it false if needed */ iwl_trans_pcie_rf_kill(trans, hw_rfkill); + /* Make sure we sync here, because we'll need full access later */ + if (low_power) + pm_runtime_resume(trans->dev); + return 0; } @@ -1452,12 +1458,6 @@ static void iwl_trans_pcie_write_prph(struct iwl_trans *trans, u32 addr, iwl_trans_pcie_write32(trans, HBUS_TARG_PRPH_WDAT, val); } -static int iwl_pcie_dummy_napi_poll(struct napi_struct *napi, int budget) -{ - WARN_ON(1); - return 0; -} - static void iwl_trans_pcie_configure(struct iwl_trans *trans, const struct iwl_trans_config *trans_cfg) { @@ -1494,11 +1494,8 @@ static void iwl_trans_pcie_configure(struct iwl_trans *trans, * As this function may be called again in some corner cases don't * do anything if NAPI was already initialized. */ - if (!trans_pcie->napi.poll) { + if (trans_pcie->napi_dev.reg_state != NETREG_DUMMY) init_dummy_netdev(&trans_pcie->napi_dev); - netif_napi_add(&trans_pcie->napi_dev, &trans_pcie->napi, - iwl_pcie_dummy_napi_poll, 64); - } } void iwl_trans_pcie_free(struct iwl_trans *trans) @@ -1506,6 +1503,9 @@ void iwl_trans_pcie_free(struct iwl_trans *trans) struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); int i; + /* TODO: check if this is really needed */ + pm_runtime_disable(trans->dev); + synchronize_irq(trans_pcie->pci_dev->irq); iwl_pcie_tx_free(trans); @@ -1519,9 +1519,6 @@ void iwl_trans_pcie_free(struct iwl_trans *trans) pci_release_regions(trans_pcie->pci_dev); pci_disable_device(trans_pcie->pci_dev); - if (trans_pcie->napi.poll) - netif_napi_del(&trans_pcie->napi); - iwl_pcie_free_fw_monitor(trans); for_each_possible_cpu(i) { @@ -1861,6 +1858,7 @@ void iwl_trans_pcie_ref(struct iwl_trans *trans) spin_lock_irqsave(&trans_pcie->ref_lock, flags); IWL_DEBUG_RPM(trans, "ref_counter: %d\n", trans_pcie->ref_count); trans_pcie->ref_count++; + pm_runtime_get(&trans_pcie->pci_dev->dev); spin_unlock_irqrestore(&trans_pcie->ref_lock, flags); } @@ -1879,6 +1877,10 @@ void iwl_trans_pcie_unref(struct iwl_trans *trans) return; } trans_pcie->ref_count--; + + pm_runtime_mark_last_busy(&trans_pcie->pci_dev->dev); + pm_runtime_put_autosuspend(&trans_pcie->pci_dev->dev); + spin_unlock_irqrestore(&trans_pcie->ref_lock, flags); } @@ -2031,29 +2033,48 @@ static ssize_t iwl_dbgfs_rx_queue_read(struct file *file, { struct iwl_trans *trans = file->private_data; struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - struct iwl_rxq *rxq = &trans_pcie->rxq; - char buf[256]; - int pos = 0; - const size_t bufsz = sizeof(buf); - - pos += scnprintf(buf + pos, bufsz - pos, "read: %u\n", - rxq->read); - pos += scnprintf(buf + pos, bufsz - pos, "write: %u\n", - rxq->write); - pos += scnprintf(buf + pos, bufsz - pos, "write_actual: %u\n", - rxq->write_actual); - pos += scnprintf(buf + pos, bufsz - pos, "need_update: %d\n", - rxq->need_update); - pos += scnprintf(buf + pos, bufsz - pos, "free_count: %u\n", - rxq->free_count); - if (rxq->rb_stts) { - pos += scnprintf(buf + pos, bufsz - pos, "closed_rb_num: %u\n", - le16_to_cpu(rxq->rb_stts->closed_rb_num) & 0x0FFF); - } else { - pos += scnprintf(buf + pos, bufsz - pos, - "closed_rb_num: Not Allocated\n"); + char *buf; + int pos = 0, i, ret; + size_t bufsz = sizeof(buf); + + bufsz = sizeof(char) * 121 * trans->num_rx_queues; + + if (!trans_pcie->rxq) + return -EAGAIN; + + buf = kzalloc(bufsz, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + for (i = 0; i < trans->num_rx_queues && pos < bufsz; i++) { + struct iwl_rxq *rxq = &trans_pcie->rxq[i]; + + pos += scnprintf(buf + pos, bufsz - pos, "queue#: %2d\n", + i); + pos += scnprintf(buf + pos, bufsz - pos, "\tread: %u\n", + rxq->read); + pos += scnprintf(buf + pos, bufsz - pos, "\twrite: %u\n", + rxq->write); + pos += scnprintf(buf + pos, bufsz - pos, "\twrite_actual: %u\n", + rxq->write_actual); + pos += scnprintf(buf + pos, bufsz - pos, "\tneed_update: %2d\n", + rxq->need_update); + pos += scnprintf(buf + pos, bufsz - pos, "\tfree_count: %u\n", + rxq->free_count); + if (rxq->rb_stts) { + pos += scnprintf(buf + pos, bufsz - pos, + "\tclosed_rb_num: %u\n", + le16_to_cpu(rxq->rb_stts->closed_rb_num) & + 0x0FFF); + } else { + pos += scnprintf(buf + pos, bufsz - pos, + "\tclosed_rb_num: Not Allocated\n"); + } } - return simple_read_from_buffer(user_buf, count, ppos, buf, pos); + ret = simple_read_from_buffer(user_buf, count, ppos, buf, pos); + kfree(buf); + + return ret; } static ssize_t iwl_dbgfs_interrupt_read(struct file *file, @@ -2218,7 +2239,8 @@ static u32 iwl_trans_pcie_dump_rbs(struct iwl_trans *trans, { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); int max_len = PAGE_SIZE << trans_pcie->rx_page_order; - struct iwl_rxq *rxq = &trans_pcie->rxq; + /* Dump RBs is supported only for pre-9000 devices (1 queue) */ + struct iwl_rxq *rxq = &trans_pcie->rxq[0]; u32 i, r, j, rb_len = 0; spin_lock(&rxq->lock); @@ -2413,7 +2435,8 @@ static struct iwl_trans_dump_data u32 len, num_rbs; u32 monitor_len; int i, ptr; - bool dump_rbs = test_bit(STATUS_FW_ERROR, &trans->status); + bool dump_rbs = test_bit(STATUS_FW_ERROR, &trans->status) && + !trans->cfg->mq_rx_supported; /* transport dump header */ len = sizeof(*dump_data); @@ -2468,11 +2491,12 @@ static struct iwl_trans_dump_data len += sizeof(*data) + (FH_MEM_UPPER_BOUND - FH_MEM_LOWER_BOUND); if (dump_rbs) { + /* Dump RBs is supported only for pre-9000 devices (1 queue) */ + struct iwl_rxq *rxq = &trans_pcie->rxq[0]; /* RBs */ - num_rbs = le16_to_cpu(ACCESS_ONCE( - trans_pcie->rxq.rb_stts->closed_rb_num)) + num_rbs = le16_to_cpu(ACCESS_ONCE(rxq->rb_stts->closed_rb_num)) & 0x0FFF; - num_rbs = (num_rbs - trans_pcie->rxq.read) & RX_QUEUE_MASK; + num_rbs = (num_rbs - rxq->read) & RX_QUEUE_MASK; len += num_rbs * (sizeof(*data) + sizeof(struct iwl_fw_error_dump_rb) + (PAGE_SIZE << trans_pcie->rx_page_order)); @@ -2523,6 +2547,22 @@ static struct iwl_trans_dump_data return dump_data; } +#ifdef CONFIG_PM_SLEEP +static int iwl_trans_pcie_suspend(struct iwl_trans *trans) +{ + if (trans->runtime_pm_mode == IWL_PLAT_PM_MODE_D0I3) + return iwl_pci_fw_enter_d0i3(trans); + + return 0; +} + +static void iwl_trans_pcie_resume(struct iwl_trans *trans) +{ + if (trans->runtime_pm_mode == IWL_PLAT_PM_MODE_D0I3) + iwl_pci_fw_exit_d0i3(trans); +} +#endif /* CONFIG_PM_SLEEP */ + static const struct iwl_trans_ops trans_ops_pcie = { .start_hw = iwl_trans_pcie_start_hw, .op_mode_leave = iwl_trans_pcie_op_mode_leave, @@ -2533,6 +2573,11 @@ static const struct iwl_trans_ops trans_ops_pcie = { .d3_suspend = iwl_trans_pcie_d3_suspend, .d3_resume = iwl_trans_pcie_d3_resume, +#ifdef CONFIG_PM_SLEEP + .suspend = iwl_trans_pcie_suspend, + .resume = iwl_trans_pcie_resume, +#endif /* CONFIG_PM_SLEEP */ + .send_cmd = iwl_trans_pcie_send_hcmd, .tx = iwl_trans_pcie_tx, @@ -2571,7 +2616,7 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, struct iwl_trans_pcie *trans_pcie; struct iwl_trans *trans; u16 pci_cmd; - int ret; + int ret, addr_size; trans = iwl_trans_alloc(sizeof(struct iwl_trans_pcie), &pdev->dev, cfg, &trans_ops_pcie, 0); @@ -2609,11 +2654,17 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, PCIE_LINK_STATE_CLKPM); } + if (cfg->mq_rx_supported) + addr_size = 64; + else + addr_size = 36; + pci_set_master(pdev); - ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(36)); + ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(addr_size)); if (!ret) - ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(36)); + ret = pci_set_consistent_dma_mask(pdev, + DMA_BIT_MASK(addr_size)); if (ret) { ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (!ret) @@ -2716,6 +2767,8 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, /* Initialize the wait queue for commands */ init_waitqueue_head(&trans_pcie->wait_command_queue); + init_waitqueue_head(&trans_pcie->d0i3_waitq); + ret = iwl_pcie_alloc_ict(trans); if (ret) goto out_pci_disable_msi; @@ -2730,6 +2783,12 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, trans_pcie->inta_mask = CSR_INI_SET_MASK; +#ifdef CONFIG_IWLWIFI_PCIE_RTPM + trans->runtime_pm_mode = IWL_PLAT_PM_MODE_D0I3; +#else + trans->runtime_pm_mode = IWL_PLAT_PM_MODE_DISABLED; +#endif /* CONFIG_IWLWIFI_PCIE_RTPM */ + return trans; out_free_ict: diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 5262028b5505..837a7d536874 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -1,7 +1,8 @@ /****************************************************************************** * * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved. - * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH + * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * * Portions of this file are derived from the ipw3945 project, as well * as portions of the ieee80211 subsystem header files. @@ -33,7 +34,6 @@ #include <linux/sched.h> #include <net/ip6_checksum.h> #include <net/tso.h> -#include <net/ip6_checksum.h> #include "iwl-debug.h" #include "iwl-csr.h" @@ -571,6 +571,7 @@ static int iwl_pcie_txq_init(struct iwl_trans *trans, struct iwl_txq *txq, return ret; spin_lock_init(&txq->lock); + __skb_queue_head_init(&txq->overflow_q); /* * Tell nic where to find circular buffer of Tx Frame Descriptors for @@ -621,6 +622,13 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id) q->read_ptr = iwl_queue_inc_wrap(q->read_ptr); } txq->active = false; + + while (!skb_queue_empty(&txq->overflow_q)) { + struct sk_buff *skb = __skb_dequeue(&txq->overflow_q); + + iwl_op_mode_free_skb(trans->op_mode, skb); + } + spin_unlock_bh(&txq->lock); /* just in case - this queue may have been stopped */ @@ -1052,8 +1060,41 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, iwl_pcie_txq_progress(txq); - if (iwl_queue_space(&txq->q) > txq->q.low_mark) - iwl_wake_queue(trans, txq); + if (iwl_queue_space(&txq->q) > txq->q.low_mark && + test_bit(txq_id, trans_pcie->queue_stopped)) { + struct sk_buff_head skbs; + + __skb_queue_head_init(&skbs); + skb_queue_splice_init(&txq->overflow_q, &skbs); + + /* + * This is tricky: we are in reclaim path which is non + * re-entrant, so noone will try to take the access the + * txq data from that path. We stopped tx, so we can't + * have tx as well. Bottom line, we can unlock and re-lock + * later. + */ + spin_unlock_bh(&txq->lock); + + while (!skb_queue_empty(&skbs)) { + struct sk_buff *skb = __skb_dequeue(&skbs); + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + u8 dev_cmd_idx = IWL_TRANS_FIRST_DRIVER_DATA + 1; + struct iwl_device_cmd *dev_cmd = + info->driver_data[dev_cmd_idx]; + + /* + * Note that we can very well be overflowing again. + * In that case, iwl_queue_space will be small again + * and we won't wake mac80211's queue. + */ + iwl_trans_pcie_tx(trans, skb, dev_cmd, txq_id); + } + spin_lock_bh(&txq->lock); + + if (iwl_queue_space(&txq->q) > txq->q.low_mark) + iwl_wake_queue(trans, txq); + } if (q->read_ptr == q->write_ptr) { IWL_DEBUG_RPM(trans, "Q %d - last tx reclaimed\n", q->id); @@ -1686,6 +1727,20 @@ void iwl_pcie_hcmd_complete(struct iwl_trans *trans, wake_up(&trans_pcie->wait_command_queue); } + if (meta->flags & CMD_MAKE_TRANS_IDLE) { + IWL_DEBUG_INFO(trans, "complete %s - mark trans as idle\n", + iwl_get_cmd_string(trans, cmd->hdr.cmd)); + set_bit(STATUS_TRANS_IDLE, &trans->status); + wake_up(&trans_pcie->d0i3_waitq); + } + + if (meta->flags & CMD_WAKE_UP_TRANS) { + IWL_DEBUG_INFO(trans, "complete %s - clear trans idle flag\n", + iwl_get_cmd_string(trans, cmd->hdr.cmd)); + clear_bit(STATUS_TRANS_IDLE, &trans->status); + wake_up(&trans_pcie->d0i3_waitq); + } + meta->flags = 0; spin_unlock_bh(&txq->lock); @@ -2161,6 +2216,8 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, csum = skb_checksum(skb, offs, skb->len - offs, 0); *(__sum16 *)(skb->data + csum_offs) = csum_fold(csum); + + skb->ip_summed = CHECKSUM_UNNECESSARY; } if (skb_is_nonlinear(skb) && @@ -2177,6 +2234,22 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, spin_lock(&txq->lock); + if (iwl_queue_space(q) < q->high_mark) { + iwl_stop_queue(trans, txq); + + /* don't put the packet on the ring, if there is no room */ + if (unlikely(iwl_queue_space(q) < 3)) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + + info->driver_data[IWL_TRANS_FIRST_DRIVER_DATA + 1] = + dev_cmd; + __skb_queue_tail(&txq->overflow_q, skb); + + spin_unlock(&txq->lock); + return 0; + } + } + /* In AGG mode, the index in the ring must correspond to the WiFi * sequence number. This is a HW requirements to help the SCD to parse * the BA. @@ -2281,12 +2354,6 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, * At this point the frame is "transmitted" successfully * and we will get a TX status notification eventually. */ - if (iwl_queue_space(q) < q->high_mark) { - if (wait_write_ptr) - iwl_pcie_txq_inc_wr_ptr(trans, txq); - else - iwl_stop_queue(trans, txq); - } spin_unlock(&txq->lock); return 0; out_err: diff --git a/drivers/net/wireless/intersil/hostap/hostap_hw.c b/drivers/net/wireless/intersil/hostap/hostap_hw.c index 6df3ee561d52..515aa3f993f3 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_hw.c +++ b/drivers/net/wireless/intersil/hostap/hostap_hw.c @@ -836,25 +836,30 @@ static int hfa384x_get_rid(struct net_device *dev, u16 rid, void *buf, int len, spin_lock_bh(&local->baplock); res = hfa384x_setup_bap(dev, BAP0, rid, 0); - if (!res) - res = hfa384x_from_bap(dev, BAP0, &rec, sizeof(rec)); + if (res) + goto unlock; + + res = hfa384x_from_bap(dev, BAP0, &rec, sizeof(rec)); + if (res) + goto unlock; if (le16_to_cpu(rec.len) == 0) { /* RID not available */ res = -ENODATA; + goto unlock; } rlen = (le16_to_cpu(rec.len) - 1) * 2; - if (!res && exact_len && rlen != len) { + if (exact_len && rlen != len) { printk(KERN_DEBUG "%s: hfa384x_get_rid - RID len mismatch: " "rid=0x%04x, len=%d (expected %d)\n", dev->name, rid, rlen, len); res = -ENODATA; } - if (!res) - res = hfa384x_from_bap(dev, BAP0, buf, len); + res = hfa384x_from_bap(dev, BAP0, buf, len); +unlock: spin_unlock_bh(&local->baplock); mutex_unlock(&local->rid_bap_mtx); diff --git a/drivers/net/wireless/marvell/libertas/cfg.c b/drivers/net/wireless/marvell/libertas/cfg.c index 86955c416b30..2eea76a340b7 100644 --- a/drivers/net/wireless/marvell/libertas/cfg.c +++ b/drivers/net/wireless/marvell/libertas/cfg.c @@ -2039,6 +2039,43 @@ static int lbs_leave_ibss(struct wiphy *wiphy, struct net_device *dev) +int lbs_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, + bool enabled, int timeout) +{ + struct lbs_private *priv = wiphy_priv(wiphy); + + if (!(priv->fwcapinfo & FW_CAPINFO_PS)) { + if (!enabled) + return 0; + else + return -EINVAL; + } + /* firmware does not work well with too long latency with power saving + * enabled, so do not enable it if there is only polling, no + * interrupts (like in some sdio hosts which can only + * poll for sdio irqs) + */ + if (priv->is_polling) { + if (!enabled) + return 0; + else + return -EINVAL; + } + if (!enabled) { + priv->psmode = LBS802_11POWERMODECAM; + if (priv->psstate != PS_STATE_FULL_POWER) + lbs_set_ps_mode(priv, + PS_MODE_ACTION_EXIT_PS, + true); + return 0; + } + if (priv->psmode != LBS802_11POWERMODECAM) + return 0; + priv->psmode = LBS802_11POWERMODEMAX_PSP; + if (priv->connect_status == LBS_CONNECTED) + lbs_set_ps_mode(priv, PS_MODE_ACTION_ENTER_PS, true); + return 0; +} /* * Initialization @@ -2057,6 +2094,7 @@ static struct cfg80211_ops lbs_cfg80211_ops = { .change_virtual_intf = lbs_change_intf, .join_ibss = lbs_join_ibss, .leave_ibss = lbs_leave_ibss, + .set_power_mgmt = lbs_set_power_mgmt, }; diff --git a/drivers/net/wireless/marvell/libertas/cmd.c b/drivers/net/wireless/marvell/libertas/cmd.c index 0387a5b380c8..4ddd0e5a6b85 100644 --- a/drivers/net/wireless/marvell/libertas/cmd.c +++ b/drivers/net/wireless/marvell/libertas/cmd.c @@ -957,7 +957,7 @@ static void lbs_queue_cmd(struct lbs_private *priv, /* Exit_PS command needs to be queued in the header always. */ if (le16_to_cpu(cmdnode->cmdbuf->command) == CMD_802_11_PS_MODE) { - struct cmd_ds_802_11_ps_mode *psm = (void *) &cmdnode->cmdbuf; + struct cmd_ds_802_11_ps_mode *psm = (void *)cmdnode->cmdbuf; if (psm->action == cpu_to_le16(PS_MODE_ACTION_EXIT_PS)) { if (priv->psstate != PS_STATE_FULL_POWER) @@ -1387,7 +1387,7 @@ int lbs_execute_next_command(struct lbs_private *priv) * PS command. Ignore it if it is not Exit_PS. * otherwise send it down immediately. */ - struct cmd_ds_802_11_ps_mode *psm = (void *)&cmd[1]; + struct cmd_ds_802_11_ps_mode *psm = (void *)cmd; lbs_deb_host( "EXEC_NEXT_CMD: PS cmd, action 0x%02x\n", @@ -1428,40 +1428,14 @@ int lbs_execute_next_command(struct lbs_private *priv) * check if in power save mode, if yes, put the device back * to PS mode */ -#ifdef TODO - /* - * This was the old code for libertas+wext. Someone that - * understands this beast should re-code it in a sane way. - * - * I actually don't understand why this is related to WPA - * and to connection status, shouldn't powering should be - * independ of such things? - */ if ((priv->psmode != LBS802_11POWERMODECAM) && (priv->psstate == PS_STATE_FULL_POWER) && - ((priv->connect_status == LBS_CONNECTED) || - lbs_mesh_connected(priv))) { - if (priv->secinfo.WPAenabled || - priv->secinfo.WPA2enabled) { - /* check for valid WPA group keys */ - if (priv->wpa_mcast_key.len || - priv->wpa_unicast_key.len) { - lbs_deb_host( - "EXEC_NEXT_CMD: WPA enabled and GTK_SET" - " go back to PS_SLEEP"); - lbs_set_ps_mode(priv, - PS_MODE_ACTION_ENTER_PS, - false); - } - } else { - lbs_deb_host( - "EXEC_NEXT_CMD: cmdpendingq empty, " - "go back to PS_SLEEP"); - lbs_set_ps_mode(priv, PS_MODE_ACTION_ENTER_PS, - false); - } + (priv->connect_status == LBS_CONNECTED)) { + lbs_deb_host( + "EXEC_NEXT_CMD: cmdpendingq empty, go back to PS_SLEEP"); + lbs_set_ps_mode(priv, PS_MODE_ACTION_ENTER_PS, + false); } -#endif } ret = 0; diff --git a/drivers/net/wireless/marvell/libertas/cmdresp.c b/drivers/net/wireless/marvell/libertas/cmdresp.c index e5442e8956f7..c95bf6dc9522 100644 --- a/drivers/net/wireless/marvell/libertas/cmdresp.c +++ b/drivers/net/wireless/marvell/libertas/cmdresp.c @@ -123,7 +123,10 @@ int lbs_process_command_response(struct lbs_private *priv, u8 *data, u32 len) priv->cmd_timed_out = 0; if (respcmd == CMD_RET(CMD_802_11_PS_MODE)) { - struct cmd_ds_802_11_ps_mode *psmode = (void *) &resp[1]; + /* struct cmd_ds_802_11_ps_mode also contains + * the header + */ + struct cmd_ds_802_11_ps_mode *psmode = (void *)resp; u16 action = le16_to_cpu(psmode->action); lbs_deb_host( @@ -254,6 +257,10 @@ int lbs_process_event(struct lbs_private *priv, u32 event) "EVENT: in FULL POWER mode, ignoring PS_SLEEP\n"); break; } + if (!list_empty(&priv->cmdpendingq)) { + lbs_deb_cmd("EVENT: commands in queue, do not sleep\n"); + break; + } priv->psstate = PS_STATE_PRE_SLEEP; lbs_ps_confirm_sleep(priv); diff --git a/drivers/net/wireless/marvell/libertas/dev.h b/drivers/net/wireless/marvell/libertas/dev.h index 6bd1608992b0..edf710bc5e77 100644 --- a/drivers/net/wireless/marvell/libertas/dev.h +++ b/drivers/net/wireless/marvell/libertas/dev.h @@ -99,6 +99,7 @@ struct lbs_private { /* Hardware access */ void *card; bool iface_running; + u8 is_polling; /* host has to poll the card irq */ u8 fw_ready; u8 surpriseremoved; u8 setup_fw_on_resume; diff --git a/drivers/net/wireless/marvell/libertas/if_sdio.c b/drivers/net/wireless/marvell/libertas/if_sdio.c index 68fd3a9779bd..13eae9ff8c35 100644 --- a/drivers/net/wireless/marvell/libertas/if_sdio.c +++ b/drivers/net/wireless/marvell/libertas/if_sdio.c @@ -1267,7 +1267,7 @@ static int if_sdio_probe(struct sdio_func *func, priv->reset_card = if_sdio_reset_card; priv->power_save = if_sdio_power_save; priv->power_restore = if_sdio_power_restore; - + priv->is_polling = !(func->card->host->caps & MMC_CAP_SDIO_IRQ); ret = if_sdio_power_on(card); if (ret) goto err_activate_card; diff --git a/drivers/net/wireless/marvell/libertas/if_usb.c b/drivers/net/wireless/marvell/libertas/if_usb.c index dff08a2896a3..aba0c9995b14 100644 --- a/drivers/net/wireless/marvell/libertas/if_usb.c +++ b/drivers/net/wireless/marvell/libertas/if_usb.c @@ -267,6 +267,7 @@ static int if_usb_probe(struct usb_interface *intf, priv->enter_deep_sleep = NULL; priv->exit_deep_sleep = NULL; priv->reset_deep_sleep_wakeup = NULL; + priv->is_polling = false; #ifdef CONFIG_OLPC if (machine_is_olpc()) priv->reset_card = if_usb_reset_olpc_card; diff --git a/drivers/net/wireless/marvell/libertas/main.c b/drivers/net/wireless/marvell/libertas/main.c index 8079560f4965..b35b8bcce24c 100644 --- a/drivers/net/wireless/marvell/libertas/main.c +++ b/drivers/net/wireless/marvell/libertas/main.c @@ -1060,7 +1060,12 @@ void lbs_remove_card(struct lbs_private *priv) if (priv->psmode == LBS802_11POWERMODEMAX_PSP) { priv->psmode = LBS802_11POWERMODECAM; - lbs_set_ps_mode(priv, PS_MODE_ACTION_EXIT_PS, true); + /* no need to wakeup if already woken up, + * on suspend, this exit ps command is not processed + * the driver hangs + */ + if (priv->psstate != PS_STATE_FULL_POWER) + lbs_set_ps_mode(priv, PS_MODE_ACTION_EXIT_PS, true); } if (priv->is_deep_sleep) { diff --git a/drivers/net/wireless/marvell/mwifiex/README b/drivers/net/wireless/marvell/mwifiex/README index 2f0f9b5609d0..24e649b1eb24 100644 --- a/drivers/net/wireless/marvell/mwifiex/README +++ b/drivers/net/wireless/marvell/mwifiex/README @@ -237,4 +237,14 @@ device_dump cat fw_dump +verext + This command is used to get extended firmware version string using + different configuration parameters. + + Usage: + echo "[version_str_sel]" > verext + cat verext + + [version_str_sel]: firmware support several extend version + string cases, include 0/1/10/20/21/99 =============================================================================== diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index e7adef72c05f..f2dce81ba36e 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -1962,6 +1962,9 @@ mwifiex_cfg80211_disconnect(struct wiphy *wiphy, struct net_device *dev, { struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); + if (!mwifiex_stop_bg_scan(priv)) + cfg80211_sched_scan_stopped_rtnl(priv->wdev.wiphy); + if (mwifiex_deauthenticate(priv, NULL)) return -EFAULT; @@ -2217,6 +2220,9 @@ mwifiex_cfg80211_connect(struct wiphy *wiphy, struct net_device *dev, "info: Trying to associate to %s and bssid %pM\n", (char *)sme->ssid, sme->bssid); + if (!mwifiex_stop_bg_scan(priv)) + cfg80211_sched_scan_stopped_rtnl(priv->wdev.wiphy); + ret = mwifiex_cfg80211_assoc(priv, sme->ssid_len, sme->ssid, sme->bssid, priv->bss_mode, sme->channel, sme, 0); if (!ret) { @@ -2420,6 +2426,9 @@ mwifiex_cfg80211_scan(struct wiphy *wiphy, return -EBUSY; } + if (!mwifiex_stop_bg_scan(priv)) + cfg80211_sched_scan_stopped_rtnl(priv->wdev.wiphy); + user_scan_cfg = kzalloc(sizeof(*user_scan_cfg), GFP_KERNEL); if (!user_scan_cfg) return -ENOMEM; @@ -2487,6 +2496,125 @@ mwifiex_cfg80211_scan(struct wiphy *wiphy, return 0; } +/* CFG802.11 operation handler for sched_scan_start. + * + * This function issues a bgscan config request to the firmware based upon + * the user specified sched_scan configuration. On successful completion, + * firmware will generate BGSCAN_REPORT event, driver should issue bgscan + * query command to get sched_scan results from firmware. + */ +static int +mwifiex_cfg80211_sched_scan_start(struct wiphy *wiphy, + struct net_device *dev, + struct cfg80211_sched_scan_request *request) +{ + struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); + int i, offset; + struct ieee80211_channel *chan; + struct mwifiex_bg_scan_cfg *bgscan_cfg; + struct ieee_types_header *ie; + + if (!request || (!request->n_ssids && !request->n_match_sets)) { + wiphy_err(wiphy, "%s : Invalid Sched_scan parameters", + __func__); + return -EINVAL; + } + + wiphy_info(wiphy, "sched_scan start : n_ssids=%d n_match_sets=%d ", + request->n_ssids, request->n_match_sets); + wiphy_info(wiphy, "n_channels=%d interval=%d ie_len=%d\n", + request->n_channels, request->scan_plans->interval, + (int)request->ie_len); + + bgscan_cfg = kzalloc(sizeof(*bgscan_cfg), GFP_KERNEL); + if (!bgscan_cfg) + return -ENOMEM; + + if (priv->scan_request || priv->scan_aborting) + bgscan_cfg->start_later = true; + + bgscan_cfg->num_ssids = request->n_match_sets; + bgscan_cfg->ssid_list = request->match_sets; + + if (request->ie && request->ie_len) { + offset = 0; + for (i = 0; i < MWIFIEX_MAX_VSIE_NUM; i++) { + if (priv->vs_ie[i].mask != MWIFIEX_VSIE_MASK_CLEAR) + continue; + priv->vs_ie[i].mask = MWIFIEX_VSIE_MASK_BGSCAN; + ie = (struct ieee_types_header *)(request->ie + offset); + memcpy(&priv->vs_ie[i].ie, ie, sizeof(*ie) + ie->len); + offset += sizeof(*ie) + ie->len; + + if (offset >= request->ie_len) + break; + } + } + + for (i = 0; i < min_t(u32, request->n_channels, + MWIFIEX_BG_SCAN_CHAN_MAX); i++) { + chan = request->channels[i]; + bgscan_cfg->chan_list[i].chan_number = chan->hw_value; + bgscan_cfg->chan_list[i].radio_type = chan->band; + + if ((chan->flags & IEEE80211_CHAN_NO_IR) || !request->n_ssids) + bgscan_cfg->chan_list[i].scan_type = + MWIFIEX_SCAN_TYPE_PASSIVE; + else + bgscan_cfg->chan_list[i].scan_type = + MWIFIEX_SCAN_TYPE_ACTIVE; + + bgscan_cfg->chan_list[i].scan_time = 0; + } + + bgscan_cfg->chan_per_scan = min_t(u32, request->n_channels, + MWIFIEX_BG_SCAN_CHAN_MAX); + + /* Use at least 15 second for per scan cycle */ + bgscan_cfg->scan_interval = (request->scan_plans->interval > + MWIFIEX_BGSCAN_INTERVAL) ? + request->scan_plans->interval : + MWIFIEX_BGSCAN_INTERVAL; + + bgscan_cfg->repeat_count = MWIFIEX_BGSCAN_REPEAT_COUNT; + bgscan_cfg->report_condition = MWIFIEX_BGSCAN_SSID_MATCH | + MWIFIEX_BGSCAN_WAIT_ALL_CHAN_DONE; + bgscan_cfg->bss_type = MWIFIEX_BSS_MODE_INFRA; + bgscan_cfg->action = MWIFIEX_BGSCAN_ACT_SET; + bgscan_cfg->enable = true; + if (request->min_rssi_thold != NL80211_SCAN_RSSI_THOLD_OFF) { + bgscan_cfg->report_condition |= MWIFIEX_BGSCAN_SSID_RSSI_MATCH; + bgscan_cfg->rssi_threshold = request->min_rssi_thold; + } + + if (mwifiex_send_cmd(priv, HostCmd_CMD_802_11_BG_SCAN_CONFIG, + HostCmd_ACT_GEN_SET, 0, bgscan_cfg, true)) { + kfree(bgscan_cfg); + return -EFAULT; + } + + priv->sched_scanning = true; + + kfree(bgscan_cfg); + return 0; +} + +/* CFG802.11 operation handler for sched_scan_stop. + * + * This function issues a bgscan config command to disable + * previous bgscan configuration in the firmware + */ +static int mwifiex_cfg80211_sched_scan_stop(struct wiphy *wiphy, + struct net_device *dev) +{ + struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); + + wiphy_info(wiphy, "sched scan stop!"); + mwifiex_stop_bg_scan(priv); + + return 0; +} + static void mwifiex_setup_vht_caps(struct ieee80211_sta_vht_cap *vht_info, struct mwifiex_private *priv) { @@ -2848,6 +2976,9 @@ int mwifiex_del_virtual_intf(struct wiphy *wiphy, struct wireless_dev *wdev) mwifiex_dev_debugfs_remove(priv); #endif + if (priv->sched_scanning) + priv->sched_scanning = false; + mwifiex_stop_net_dev_queue(priv->netdev, adapter); skb_queue_walk_safe(&priv->bypass_txq, skb, tmp) @@ -3044,10 +3175,12 @@ static int mwifiex_set_wowlan_mef_entry(struct mwifiex_private *priv, sizeof(byte_seq)); mef_entry->filter[filt_num].filt_type = TYPE_EQ; - if (first_pat) + if (first_pat) { first_pat = false; - else + mwifiex_dbg(priv->adapter, INFO, "Wake on patterns\n"); + } else { mef_entry->filter[filt_num].filt_action = TYPE_AND; + } filt_num++; } @@ -3073,6 +3206,7 @@ static int mwifiex_set_wowlan_mef_entry(struct mwifiex_private *priv, mef_entry->filter[filt_num].offset = 56; mef_entry->filter[filt_num].filt_type = TYPE_EQ; mef_entry->filter[filt_num].filt_action = TYPE_OR; + mwifiex_dbg(priv->adapter, INFO, "Wake on magic packet\n"); } return ret; } @@ -3143,7 +3277,7 @@ static int mwifiex_cfg80211_suspend(struct wiphy *wiphy, priv = mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_STA); - if (!priv->media_connected) { + if (!priv->media_connected && !wowlan->nd_config) { mwifiex_dbg(adapter, ERROR, "Can not configure WOWLAN in disconnected state\n"); return 0; @@ -3155,19 +3289,30 @@ static int mwifiex_cfg80211_suspend(struct wiphy *wiphy, return ret; } + memset(&hs_cfg, 0, sizeof(hs_cfg)); + hs_cfg.conditions = le32_to_cpu(adapter->hs_cfg.conditions); + + if (wowlan->nd_config) { + mwifiex_dbg(adapter, INFO, "Wake on net detect\n"); + hs_cfg.conditions |= HS_CFG_COND_MAC_EVENT; + mwifiex_cfg80211_sched_scan_start(wiphy, priv->netdev, + wowlan->nd_config); + } + if (wowlan->disconnect) { - memset(&hs_cfg, 0, sizeof(hs_cfg)); - hs_cfg.is_invoke_hostcmd = false; - hs_cfg.conditions = HS_CFG_COND_MAC_EVENT; - hs_cfg.gpio = adapter->hs_cfg.gpio; - hs_cfg.gap = adapter->hs_cfg.gap; - ret = mwifiex_set_hs_params(priv, HostCmd_ACT_GEN_SET, - MWIFIEX_SYNC_CMD, &hs_cfg); - if (ret) { - mwifiex_dbg(adapter, ERROR, - "Failed to set HS params\n"); - return ret; - } + hs_cfg.conditions |= HS_CFG_COND_MAC_EVENT; + mwifiex_dbg(priv->adapter, INFO, "Wake on device disconnect\n"); + } + + hs_cfg.is_invoke_hostcmd = false; + hs_cfg.gpio = adapter->hs_cfg.gpio; + hs_cfg.gap = adapter->hs_cfg.gap; + ret = mwifiex_set_hs_params(priv, HostCmd_ACT_GEN_SET, + MWIFIEX_SYNC_CMD, &hs_cfg); + if (ret) { + mwifiex_dbg(adapter, ERROR, + "Failed to set HS params\n"); + return ret; } return ret; @@ -3175,6 +3320,64 @@ static int mwifiex_cfg80211_suspend(struct wiphy *wiphy, static int mwifiex_cfg80211_resume(struct wiphy *wiphy) { + struct mwifiex_adapter *adapter = mwifiex_cfg80211_get_adapter(wiphy); + struct mwifiex_private *priv = + mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_STA); + struct mwifiex_ds_wakeup_reason wakeup_reason; + struct cfg80211_wowlan_wakeup wakeup_report; + int i; + + mwifiex_get_wakeup_reason(priv, HostCmd_ACT_GEN_GET, MWIFIEX_SYNC_CMD, + &wakeup_reason); + memset(&wakeup_report, 0, sizeof(struct cfg80211_wowlan_wakeup)); + + wakeup_report.pattern_idx = -1; + + switch (wakeup_reason.hs_wakeup_reason) { + case NO_HSWAKEUP_REASON: + break; + case BCAST_DATA_MATCHED: + break; + case MCAST_DATA_MATCHED: + break; + case UCAST_DATA_MATCHED: + break; + case MASKTABLE_EVENT_MATCHED: + break; + case NON_MASKABLE_EVENT_MATCHED: + if (wiphy->wowlan_config->disconnect) + wakeup_report.disconnect = true; + if (wiphy->wowlan_config->nd_config) + wakeup_report.net_detect = adapter->nd_info; + break; + case NON_MASKABLE_CONDITION_MATCHED: + break; + case MAGIC_PATTERN_MATCHED: + if (wiphy->wowlan_config->magic_pkt) + wakeup_report.magic_pkt = true; + if (wiphy->wowlan_config->n_patterns) + wakeup_report.pattern_idx = 1; + break; + case CONTROL_FRAME_MATCHED: + break; + case MANAGEMENT_FRAME_MATCHED: + break; + default: + break; + } + + if ((wakeup_reason.hs_wakeup_reason > 0) && + (wakeup_reason.hs_wakeup_reason <= 7)) + cfg80211_report_wowlan_wakeup(&priv->wdev, &wakeup_report, + GFP_KERNEL); + + if (adapter->nd_info) { + for (i = 0 ; i < adapter->nd_info->n_matches ; i++) + kfree(adapter->nd_info->matches[i]); + kfree(adapter->nd_info); + adapter->nd_info = NULL; + } + return 0; } @@ -3590,8 +3793,8 @@ static int mwifiex_cfg80211_get_channel(struct wiphy *wiphy, freq = ieee80211_channel_to_frequency(curr_bss->channel, band); chan = ieee80211_get_channel(wiphy, freq); - if (curr_bss->bcn_ht_oper) { - second_chan_offset = curr_bss->bcn_ht_oper->ht_param & + if (priv->ht_param_present) { + second_chan_offset = priv->assoc_resp_ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET; chan_type = mwifiex_sec_chan_offset_to_chan_type (second_chan_offset); @@ -3701,6 +3904,8 @@ static struct cfg80211_ops mwifiex_cfg80211_ops = { .set_cqm_rssi_config = mwifiex_cfg80211_set_cqm_rssi_config, .set_antenna = mwifiex_cfg80211_set_antenna, .del_station = mwifiex_cfg80211_del_station, + .sched_scan_start = mwifiex_cfg80211_sched_scan_start, + .sched_scan_stop = mwifiex_cfg80211_sched_scan_stop, #ifdef CONFIG_PM .suspend = mwifiex_cfg80211_suspend, .resume = mwifiex_cfg80211_resume, @@ -3720,11 +3925,13 @@ static struct cfg80211_ops mwifiex_cfg80211_ops = { #ifdef CONFIG_PM static const struct wiphy_wowlan_support mwifiex_wowlan_support = { - .flags = WIPHY_WOWLAN_MAGIC_PKT | WIPHY_WOWLAN_DISCONNECT, + .flags = WIPHY_WOWLAN_MAGIC_PKT | WIPHY_WOWLAN_DISCONNECT | + WIPHY_WOWLAN_NET_DETECT, .n_patterns = MWIFIEX_MEF_MAX_FILTERS, .pattern_min_len = 1, .pattern_max_len = MWIFIEX_MAX_PATTERN_LEN, .max_pkt_offset = MWIFIEX_MAX_OFFSET_LEN, + .max_nd_match_sets = MWIFIEX_MAX_ND_MATCH_SETS, }; #endif @@ -3829,6 +4036,7 @@ int mwifiex_register_cfg80211(struct mwifiex_adapter *adapter) wiphy->flags |= WIPHY_FLAG_HAVE_AP_SME | WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD | WIPHY_FLAG_AP_UAPSD | + WIPHY_FLAG_SUPPORTS_SCHED_SCAN | WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL | WIPHY_FLAG_HAS_CHANNEL_SWITCH | WIPHY_FLAG_PS_ON_BY_DEFAULT; @@ -3847,6 +4055,10 @@ int mwifiex_register_cfg80211(struct mwifiex_adapter *adapter) NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS2 | NL80211_PROBE_RESP_OFFLOAD_SUPPORT_P2P; + wiphy->max_sched_scan_ssids = MWIFIEX_MAX_SSID_LIST_LENGTH; + wiphy->max_sched_scan_ie_len = MWIFIEX_MAX_VSIE_LEN; + wiphy->max_match_sets = MWIFIEX_MAX_SSID_LIST_LENGTH; + wiphy->available_antennas_tx = BIT(adapter->number_of_antenna) - 1; wiphy->available_antennas_rx = BIT(adapter->number_of_antenna) - 1; diff --git a/drivers/net/wireless/marvell/mwifiex/cmdevt.c b/drivers/net/wireless/marvell/mwifiex/cmdevt.c index cb25aa7e90db..a12adee776c6 100644 --- a/drivers/net/wireless/marvell/mwifiex/cmdevt.c +++ b/drivers/net/wireless/marvell/mwifiex/cmdevt.c @@ -1657,3 +1657,16 @@ int mwifiex_ret_get_hw_spec(struct mwifiex_private *priv, return 0; } + +/* This function handles the command response of hs wakeup reason + * command. + */ +int mwifiex_ret_wakeup_reason(struct mwifiex_private *priv, + struct host_cmd_ds_command *resp, + struct host_cmd_ds_wakeup_reason *wakeup_reason) +{ + wakeup_reason->wakeup_reason = + resp->params.hs_wakeup_reason.wakeup_reason; + + return 0; +} diff --git a/drivers/net/wireless/marvell/mwifiex/debugfs.c b/drivers/net/wireless/marvell/mwifiex/debugfs.c index 0b9c580af988..df28836a1d11 100644 --- a/drivers/net/wireless/marvell/mwifiex/debugfs.c +++ b/drivers/net/wireless/marvell/mwifiex/debugfs.c @@ -95,8 +95,7 @@ mwifiex_info_read(struct file *file, char __user *ubuf, mwifiex_drv_get_driver_version(priv->adapter, fmt, sizeof(fmt) - 1); - if (!priv->version_str[0]) - mwifiex_get_ver_ext(priv); + mwifiex_get_ver_ext(priv, 0); p += sprintf(p, "driver_name = " "\"mwifiex\"\n"); p += sprintf(p, "driver_version = %s", fmt); @@ -583,6 +582,52 @@ done: return ret; } +/* debugfs verext file write handler. + * This function is called when the 'verext' file is opened for write + */ +static ssize_t +mwifiex_verext_write(struct file *file, const char __user *ubuf, + size_t count, loff_t *ppos) +{ + int ret; + u32 versionstrsel; + struct mwifiex_private *priv = (void *)file->private_data; + char buf[16]; + + memset(buf, 0, sizeof(buf)); + + if (copy_from_user(&buf, ubuf, min_t(size_t, sizeof(buf) - 1, count))) + return -EFAULT; + + ret = kstrtou32(buf, 10, &versionstrsel); + if (ret) + return ret; + + priv->versionstrsel = versionstrsel; + + return count; +} + +/* Proc verext file read handler. + * This function is called when the 'verext' file is opened for reading + * This function can be used read driver exteneed verion string. + */ +static ssize_t +mwifiex_verext_read(struct file *file, char __user *ubuf, + size_t count, loff_t *ppos) +{ + struct mwifiex_private *priv = + (struct mwifiex_private *)file->private_data; + char buf[256]; + int ret; + + mwifiex_get_ver_ext(priv, priv->versionstrsel); + ret = snprintf(buf, sizeof(buf), "version string: %s\n", + priv->version_str); + + return simple_read_from_buffer(ubuf, count, ppos, buf, ret); +} + /* Proc memrw file write handler. * This function is called when the 'memrw' file is opened for writing * This function can be used to write to a memory location. @@ -940,6 +985,7 @@ MWIFIEX_DFS_FILE_OPS(histogram); MWIFIEX_DFS_FILE_OPS(debug_mask); MWIFIEX_DFS_FILE_OPS(timeshare_coex); MWIFIEX_DFS_FILE_WRITE_OPS(reset); +MWIFIEX_DFS_FILE_OPS(verext); /* * This function creates the debug FS directory structure and the files. @@ -968,6 +1014,7 @@ mwifiex_dev_debugfs_init(struct mwifiex_private *priv) MWIFIEX_DFS_ADD_FILE(debug_mask); MWIFIEX_DFS_ADD_FILE(timeshare_coex); MWIFIEX_DFS_ADD_FILE(reset); + MWIFIEX_DFS_ADD_FILE(verext); } /* diff --git a/drivers/net/wireless/marvell/mwifiex/decl.h b/drivers/net/wireless/marvell/mwifiex/decl.h index d9c15cd36f12..bec300b9c2ea 100644 --- a/drivers/net/wireless/marvell/mwifiex/decl.h +++ b/drivers/net/wireless/marvell/mwifiex/decl.h @@ -122,6 +122,8 @@ #define BLOCK_NUMBER_OFFSET 15 #define SDIO_HEADER_OFFSET 28 +#define MWIFIEX_SIZE_4K 0x4000 + enum mwifiex_bss_type { MWIFIEX_BSS_TYPE_STA = 0, MWIFIEX_BSS_TYPE_UAP = 1, @@ -270,4 +272,26 @@ struct mwifiex_11h_intf_state { bool is_11h_enabled; bool is_11h_active; } __packed; + +#define MWIFIEX_FW_DUMP_IDX 0xff +#define MWIFIEX_FW_DUMP_MAX_MEMSIZE 0x160000 +#define MWIFIEX_DRV_INFO_IDX 20 +#define FW_DUMP_MAX_NAME_LEN 8 +#define FW_DUMP_HOST_READY 0xEE +#define FW_DUMP_DONE 0xFF +#define FW_DUMP_READ_DONE 0xFE + +struct memory_type_mapping { + u8 mem_name[FW_DUMP_MAX_NAME_LEN]; + u8 *mem_ptr; + u32 mem_size; + u8 done_flag; +}; + +enum rdwr_status { + RDWR_STATUS_SUCCESS = 0, + RDWR_STATUS_FAILURE = 1, + RDWR_STATUS_DONE = 2 +}; + #endif /* !_MWIFIEX_DECL_H_ */ diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h index ced7af2be29a..c134cf865291 100644 --- a/drivers/net/wireless/marvell/mwifiex/fw.h +++ b/drivers/net/wireless/marvell/mwifiex/fw.h @@ -96,7 +96,7 @@ enum KEY_TYPE_ID { #define WAPI_KEY_LEN (WLAN_KEY_LEN_SMS4 + PN_LEN + 2) #define MAX_POLL_TRIES 100 -#define MAX_FIRMWARE_POLL_TRIES 100 +#define MAX_FIRMWARE_POLL_TRIES 150 #define FIRMWARE_READY_SDIO 0xfedc #define FIRMWARE_READY_PCIE 0xfedcba00 @@ -144,6 +144,7 @@ enum MWIFIEX_802_11_PRIVACY_FILTER { #define TLV_TYPE_WILDCARDSSID (PROPRIETARY_TLV_BASE_ID + 18) #define TLV_TYPE_TSFTIMESTAMP (PROPRIETARY_TLV_BASE_ID + 19) #define TLV_TYPE_RSSI_HIGH (PROPRIETARY_TLV_BASE_ID + 22) +#define TLV_TYPE_BGSCAN_START_LATER (PROPRIETARY_TLV_BASE_ID + 30) #define TLV_TYPE_AUTH_TYPE (PROPRIETARY_TLV_BASE_ID + 31) #define TLV_TYPE_STA_MAC_ADDR (PROPRIETARY_TLV_BASE_ID + 32) #define TLV_TYPE_BSSID (PROPRIETARY_TLV_BASE_ID + 35) @@ -177,6 +178,7 @@ enum MWIFIEX_802_11_PRIVACY_FILTER { #define TLV_TYPE_TX_PAUSE (PROPRIETARY_TLV_BASE_ID + 148) #define TLV_TYPE_COALESCE_RULE (PROPRIETARY_TLV_BASE_ID + 154) #define TLV_TYPE_KEY_PARAM_V2 (PROPRIETARY_TLV_BASE_ID + 156) +#define TLV_TYPE_REPEAT_COUNT (PROPRIETARY_TLV_BASE_ID + 176) #define TLV_TYPE_MULTI_CHAN_INFO (PROPRIETARY_TLV_BASE_ID + 183) #define TLV_TYPE_MC_GROUP_INFO (PROPRIETARY_TLV_BASE_ID + 184) #define TLV_TYPE_TDLS_IDLE_TIMEOUT (PROPRIETARY_TLV_BASE_ID + 194) @@ -331,6 +333,7 @@ enum MWIFIEX_802_11_PRIVACY_FILTER { #define HostCmd_CMD_802_11_MAC_ADDRESS 0x004D #define HostCmd_CMD_802_11D_DOMAIN_INFO 0x005b #define HostCmd_CMD_802_11_KEY_MATERIAL 0x005e +#define HostCmd_CMD_802_11_BG_SCAN_CONFIG 0x006b #define HostCmd_CMD_802_11_BG_SCAN_QUERY 0x006c #define HostCmd_CMD_WMM_GET_STATUS 0x0071 #define HostCmd_CMD_802_11_SUBSCRIBE_EVENT 0x0075 @@ -370,6 +373,7 @@ enum MWIFIEX_802_11_PRIVACY_FILTER { #define HostCmd_CMD_MGMT_FRAME_REG 0x010c #define HostCmd_CMD_REMAIN_ON_CHAN 0x010d #define HostCmd_CMD_11AC_CFG 0x0112 +#define HostCmd_CMD_HS_WAKEUP_REASON 0x0116 #define HostCmd_CMD_TDLS_CONFIG 0x0100 #define HostCmd_CMD_MC_POLICY 0x0121 #define HostCmd_CMD_TDLS_OPER 0x0122 @@ -523,6 +527,7 @@ enum P2P_MODES { #define EVENT_CHANNEL_REPORT_RDY 0x00000054 #define EVENT_TX_DATA_PAUSE 0x00000055 #define EVENT_EXT_SCAN_REPORT 0x00000058 +#define EVENT_BG_SCAN_STOPPED 0x00000065 #define EVENT_REMAIN_ON_CHAN_EXPIRED 0x0000005f #define EVENT_MULTI_CHAN_INFO 0x0000006a #define EVENT_TX_STATUS_REPORT 0x00000074 @@ -539,6 +544,8 @@ enum P2P_MODES { #define MWIFIEX_MAX_PATTERN_LEN 40 #define MWIFIEX_MAX_OFFSET_LEN 100 +#define MWIFIEX_MAX_ND_MATCH_SETS 10 + #define STACK_NBYTES 100 #define TYPE_DNUM 1 #define TYPE_BYTESEQ 2 @@ -601,6 +608,20 @@ struct mwifiex_ie_types_data { #define MWIFIEX_RXPD_FLAGS_TDLS_PACKET 0x01 #define MWIFIEX_TXPD_FLAGS_REQ_TX_STATUS 0x20 +enum HS_WAKEUP_REASON { + NO_HSWAKEUP_REASON = 0, + BCAST_DATA_MATCHED, + MCAST_DATA_MATCHED, + UCAST_DATA_MATCHED, + MASKTABLE_EVENT_MATCHED, + NON_MASKABLE_EVENT_MATCHED, + NON_MASKABLE_CONDITION_MATCHED, + MAGIC_PATTERN_MATCHED, + CONTROL_FRAME_MATCHED, + MANAGEMENT_FRAME_MATCHED, + RESERVED +}; + struct txpd { u8 bss_type; u8 bss_num; @@ -733,6 +754,21 @@ struct mwifiex_ie_types_num_probes { __le16 num_probes; } __packed; +struct mwifiex_ie_types_repeat_count { + struct mwifiex_ie_types_header header; + __le16 repeat_count; +} __packed; + +struct mwifiex_ie_types_min_rssi_threshold { + struct mwifiex_ie_types_header header; + __le16 rssi_threshold; +} __packed; + +struct mwifiex_ie_types_bgscan_start_later { + struct mwifiex_ie_types_header header; + __le16 start_later; +} __packed; + struct mwifiex_ie_types_scan_chan_gap { struct mwifiex_ie_types_header header; /* time gap in TUs to be used between two consecutive channels scan */ @@ -1027,7 +1063,7 @@ struct ieee_types_assoc_rsp { __le16 cap_info_bitmap; __le16 status_code; __le16 a_id; - u8 ie_buffer[1]; + u8 ie_buffer[0]; } __packed; struct host_cmd_ds_802_11_associate_rsp { @@ -1425,6 +1461,36 @@ struct mwifiex_user_scan_cfg { u16 scan_chan_gap; } __packed; +#define MWIFIEX_BG_SCAN_CHAN_MAX 38 +#define MWIFIEX_BSS_MODE_INFRA 1 +#define MWIFIEX_BGSCAN_ACT_GET 0x0000 +#define MWIFIEX_BGSCAN_ACT_SET 0x0001 +#define MWIFIEX_BGSCAN_ACT_SET_ALL 0xff01 +/** ssid match */ +#define MWIFIEX_BGSCAN_SSID_MATCH 0x0001 +/** ssid match and RSSI exceeded */ +#define MWIFIEX_BGSCAN_SSID_RSSI_MATCH 0x0004 +/**wait for all channel scan to complete to report scan result*/ +#define MWIFIEX_BGSCAN_WAIT_ALL_CHAN_DONE 0x80000000 + +struct mwifiex_bg_scan_cfg { + u16 action; + u8 enable; + u8 bss_type; + u8 chan_per_scan; + u32 scan_interval; + u32 report_condition; + u8 num_probes; + u8 rssi_threshold; + u8 snr_threshold; + u16 repeat_count; + u16 start_later; + struct cfg80211_match_set *ssid_list; + u8 num_ssids; + struct mwifiex_user_scan_chan chan_list[MWIFIEX_BG_SCAN_CHAN_MAX]; + u16 scan_chan_gap; +} __packed; + struct ie_body { u8 grp_key_oui[4]; u8 ptk_cnt[2]; @@ -1470,6 +1536,20 @@ struct mwifiex_ie_types_bss_scan_info { __le64 tsf; } __packed; +struct host_cmd_ds_802_11_bg_scan_config { + __le16 action; + u8 enable; + u8 bss_type; + u8 chan_per_scan; + u8 reserved; + __le16 reserved1; + __le32 scan_interval; + __le32 reserved2; + __le32 report_condition; + __le16 reserved3; + u8 tlv[0]; +} __packed; + struct host_cmd_ds_802_11_bg_scan_query { u8 flush; } __packed; @@ -2099,6 +2179,10 @@ struct host_cmd_ds_robust_coex { __le16 reserved; } __packed; +struct host_cmd_ds_wakeup_reason { + u16 wakeup_reason; +} __packed; + struct host_cmd_ds_command { __le16 command; __le16 size; @@ -2124,6 +2208,7 @@ struct host_cmd_ds_command { struct host_cmd_ds_802_11_scan scan; struct host_cmd_ds_802_11_scan_ext ext_scan; struct host_cmd_ds_802_11_scan_rsp scan_resp; + struct host_cmd_ds_802_11_bg_scan_config bg_scan_config; struct host_cmd_ds_802_11_bg_scan_query bg_scan_query; struct host_cmd_ds_802_11_bg_scan_query_rsp bg_scan_query_resp; struct host_cmd_ds_802_11_associate associate; @@ -2170,6 +2255,7 @@ struct host_cmd_ds_command { struct host_cmd_sdio_sp_rx_aggr_cfg sdio_rx_aggr_cfg; struct host_cmd_ds_multi_chan_policy mc_policy; struct host_cmd_ds_robust_coex coex; + struct host_cmd_ds_wakeup_reason hs_wakeup_reason; } params; } __packed; diff --git a/drivers/net/wireless/marvell/mwifiex/init.c b/drivers/net/wireless/marvell/mwifiex/init.c index 6f7876ec31b7..517653b3adab 100644 --- a/drivers/net/wireless/marvell/mwifiex/init.c +++ b/drivers/net/wireless/marvell/mwifiex/init.c @@ -741,8 +741,6 @@ int mwifiex_dnld_fw(struct mwifiex_adapter *adapter, u32 poll_num = 1; if (adapter->if_ops.check_fw_status) { - adapter->winner = 0; - /* check if firmware is already running */ ret = adapter->if_ops.check_fw_status(adapter, poll_num); if (!ret) { @@ -750,13 +748,23 @@ int mwifiex_dnld_fw(struct mwifiex_adapter *adapter, "WLAN FW already running! Skip FW dnld\n"); return 0; } + } + + /* check if we are the winner for downloading FW */ + if (adapter->if_ops.check_winner_status) { + adapter->winner = 0; + ret = adapter->if_ops.check_winner_status(adapter); poll_num = MAX_FIRMWARE_POLL_TRIES; + if (ret) { + mwifiex_dbg(adapter, MSG, + "WLAN read winner status failed!\n"); + return ret; + } - /* check if we are the winner for downloading FW */ if (!adapter->winner) { mwifiex_dbg(adapter, MSG, - "FW already running! Skip FW dnld\n"); + "WLAN is not the winner! Skip FW dnld\n"); goto poll_fw; } } diff --git a/drivers/net/wireless/marvell/mwifiex/ioctl.h b/drivers/net/wireless/marvell/mwifiex/ioctl.h index 4f0174c64946..14cfa37deb00 100644 --- a/drivers/net/wireless/marvell/mwifiex/ioctl.h +++ b/drivers/net/wireless/marvell/mwifiex/ioctl.h @@ -271,6 +271,10 @@ struct mwifiex_ds_hs_cfg { u32 gap; }; +struct mwifiex_ds_wakeup_reason { + u16 hs_wakeup_reason; +}; + #define DEEP_SLEEP_ON 1 #define DEEP_SLEEP_OFF 0 #define DEEP_SLEEP_IDLE_TIME 100 @@ -414,6 +418,7 @@ struct mwifiex_ds_mef_cfg { #define MWIFIEX_VSIE_MASK_SCAN 0x01 #define MWIFIEX_VSIE_MASK_ASSOC 0x02 #define MWIFIEX_VSIE_MASK_ADHOC 0x04 +#define MWIFIEX_VSIE_MASK_BGSCAN 0x08 enum { MWIFIEX_FUNC_INIT = 1, diff --git a/drivers/net/wireless/marvell/mwifiex/join.c b/drivers/net/wireless/marvell/mwifiex/join.c index cc09a81dbf6a..62211fca91b7 100644 --- a/drivers/net/wireless/marvell/mwifiex/join.c +++ b/drivers/net/wireless/marvell/mwifiex/join.c @@ -644,6 +644,8 @@ int mwifiex_ret_802_11_associate(struct mwifiex_private *priv, struct mwifiex_bssdescriptor *bss_desc; bool enable_data = true; u16 cap_info, status_code, aid; + const u8 *ie_ptr; + struct ieee80211_ht_operation *assoc_resp_ht_oper; assoc_rsp = (struct ieee_types_assoc_rsp *) &resp->params; @@ -733,6 +735,19 @@ int mwifiex_ret_802_11_associate(struct mwifiex_private *priv, = ((bss_desc->wmm_ie.qos_info_bitmap & IEEE80211_WMM_IE_AP_QOSINFO_UAPSD) ? 1 : 0); + /* Store the bandwidth information from assoc response */ + ie_ptr = cfg80211_find_ie(WLAN_EID_HT_OPERATION, assoc_rsp->ie_buffer, + priv->assoc_rsp_size + - sizeof(struct ieee_types_assoc_rsp)); + if (ie_ptr) { + assoc_resp_ht_oper = (struct ieee80211_ht_operation *)(ie_ptr + + sizeof(struct ieee_types_header)); + priv->assoc_resp_ht_param = assoc_resp_ht_oper->ht_param; + priv->ht_param_present = true; + } else { + priv->ht_param_present = false; + } + mwifiex_dbg(priv->adapter, INFO, "info: ASSOC_RESP: curr_pkt_filter is %#x\n", priv->curr_pkt_filter); diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c index 79c16de8743e..3cfa94677a8e 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.c +++ b/drivers/net/wireless/marvell/mwifiex/main.c @@ -132,6 +132,13 @@ static int mwifiex_unregister(struct mwifiex_adapter *adapter) } } + if (adapter->nd_info) { + for (i = 0 ; i < adapter->nd_info->n_matches ; i++) + kfree(adapter->nd_info->matches[i]); + kfree(adapter->nd_info); + adapter->nd_info = NULL; + } + vfree(adapter->chan_stats); kfree(adapter); return 0; @@ -746,6 +753,13 @@ int mwifiex_queue_tx_pkt(struct mwifiex_private *priv, struct sk_buff *skb) mwifiex_queue_main_work(priv->adapter); + if (priv->sched_scanning) { + mwifiex_dbg(priv->adapter, INFO, + "aborting bgscan on ndo_stop\n"); + mwifiex_stop_bg_scan(priv); + cfg80211_sched_scan_stopped(priv->wdev.wiphy); + } + return 0; } diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h index 2f7f478ce04b..aea7aee46cf7 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.h +++ b/drivers/net/wireless/marvell/mwifiex/main.h @@ -198,6 +198,11 @@ do { \ buf, len, false); \ } while (0) +/** Min BGSCAN interval 15 second */ +#define MWIFIEX_BGSCAN_INTERVAL 15000 +/** default repeat count */ +#define MWIFIEX_BGSCAN_REPEAT_COUNT 6 + struct mwifiex_dbg { u32 num_cmd_host_to_card_failure; u32 num_cmd_sleep_cfm_host_to_card_failure; @@ -293,6 +298,7 @@ struct mwifiex_tid_tbl { #define WMM_HIGHEST_PRIORITY 7 #define HIGH_PRIO_TID 7 #define LOW_PRIO_TID 0 +#define MWIFIEX_WMM_DRV_DELAY_MAX 510 struct mwifiex_wmm_desc { struct mwifiex_tid_tbl tid_tbl_ptr[MAX_NUM_TID]; @@ -483,26 +489,6 @@ struct mwifiex_roc_cfg { struct ieee80211_channel chan; }; -#define MWIFIEX_FW_DUMP_IDX 0xff -#define MWIFIEX_DRV_INFO_IDX 20 -#define FW_DUMP_MAX_NAME_LEN 8 -#define FW_DUMP_HOST_READY 0xEE -#define FW_DUMP_DONE 0xFF -#define FW_DUMP_READ_DONE 0xFE - -struct memory_type_mapping { - u8 mem_name[FW_DUMP_MAX_NAME_LEN]; - u8 *mem_ptr; - u32 mem_size; - u8 done_flag; -}; - -enum rdwr_status { - RDWR_STATUS_SUCCESS = 0, - RDWR_STATUS_FAILURE = 1, - RDWR_STATUS_DONE = 2 -}; - enum mwifiex_iface_work_flags { MWIFIEX_IFACE_WORK_DEVICE_DUMP, MWIFIEX_IFACE_WORK_CARD_RESET, @@ -616,6 +602,7 @@ struct mwifiex_private { spinlock_t curr_bcn_buf_lock; struct wireless_dev wdev; struct mwifiex_chan_freq_power cfp; + u32 versionstrsel; char version_str[128]; #ifdef CONFIG_DEBUG_FS struct dentry *dfs_dev_dir; @@ -640,6 +627,7 @@ struct mwifiex_private { u32 mgmt_frame_mask; struct mwifiex_roc_cfg roc_cfg; bool scan_aborting; + u8 sched_scanning; u8 csa_chan; unsigned long csa_expire_time; u8 del_list_idx; @@ -667,6 +655,8 @@ struct mwifiex_private { struct mwifiex_ds_mem_rw mem_rw; struct sk_buff_head bypass_txq; struct mwifiex_user_scan_chan hidden_chan[MWIFIEX_USER_SCAN_CHAN_MAX]; + u8 assoc_resp_ht_param; + bool ht_param_present; }; @@ -791,6 +781,7 @@ struct mwifiex_if_ops { int (*init_if) (struct mwifiex_adapter *); void (*cleanup_if) (struct mwifiex_adapter *); int (*check_fw_status) (struct mwifiex_adapter *, u32); + int (*check_winner_status)(struct mwifiex_adapter *); int (*prog_fw) (struct mwifiex_adapter *, struct mwifiex_fw_image *); int (*register_dev) (struct mwifiex_adapter *); void (*unregister_dev) (struct mwifiex_adapter *); @@ -994,6 +985,7 @@ struct mwifiex_adapter { u8 active_scan_triggered; bool usb_mc_status; bool usb_mc_setup; + struct cfg80211_wowlan_nd_info *nd_info; }; void mwifiex_process_tx_queue(struct mwifiex_adapter *adapter); @@ -1196,6 +1188,10 @@ int mwifiex_ret_802_11_scan_ext(struct mwifiex_private *priv, struct host_cmd_ds_command *resp); int mwifiex_handle_event_ext_scan_report(struct mwifiex_private *priv, void *buf); +int mwifiex_cmd_802_11_bg_scan_config(struct mwifiex_private *priv, + struct host_cmd_ds_command *cmd, + void *data_buf); +int mwifiex_stop_bg_scan(struct mwifiex_private *priv); /* * This function checks if the queuing is RA based or not. @@ -1417,7 +1413,7 @@ int mwifiex_set_encode(struct mwifiex_private *priv, struct key_params *kp, int mwifiex_set_gen_ie(struct mwifiex_private *priv, const u8 *ie, int ie_len); -int mwifiex_get_ver_ext(struct mwifiex_private *priv); +int mwifiex_get_ver_ext(struct mwifiex_private *priv, u32 version_str_sel); int mwifiex_remain_on_chan_cfg(struct mwifiex_private *priv, u16 action, struct ieee80211_channel *chan, @@ -1586,6 +1582,12 @@ void mwifiex_drv_info_dump(struct mwifiex_adapter *adapter); void mwifiex_upload_device_dump(struct mwifiex_adapter *adapter); void *mwifiex_alloc_dma_align_buf(int rx_len, gfp_t flags); void mwifiex_queue_main_work(struct mwifiex_adapter *adapter); +int mwifiex_get_wakeup_reason(struct mwifiex_private *priv, u16 action, + int cmd_type, + struct mwifiex_ds_wakeup_reason *wakeup_reason); +int mwifiex_ret_wakeup_reason(struct mwifiex_private *priv, + struct host_cmd_ds_command *resp, + struct host_cmd_ds_wakeup_reason *wakeup_reason); void mwifiex_coex_ampdu_rxwinsize(struct mwifiex_adapter *adapter); void mwifiex_11n_delba(struct mwifiex_private *priv, int tid); int mwifiex_send_domain_info_cmd_fw(struct wiphy *wiphy); diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index 6d0dc40e20e5..cc072142411a 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -37,17 +37,6 @@ static struct mwifiex_if_ops pcie_ops; static struct semaphore add_remove_card_sem; -static struct memory_type_mapping mem_type_mapping_tbl[] = { - {"ITCM", NULL, 0, 0xF0}, - {"DTCM", NULL, 0, 0xF1}, - {"SQRAM", NULL, 0, 0xF2}, - {"IRAM", NULL, 0, 0xF3}, - {"APU", NULL, 0, 0xF4}, - {"CIU", NULL, 0, 0xF5}, - {"ICU", NULL, 0, 0xF6}, - {"MAC", NULL, 0, 0xF7}, -}; - static int mwifiex_map_pci_memory(struct mwifiex_adapter *adapter, struct sk_buff *skb, size_t size, int flags) @@ -206,6 +195,8 @@ static int mwifiex_pcie_probe(struct pci_dev *pdev, card->pcie.blksz_fw_dl = data->blksz_fw_dl; card->pcie.tx_buf_size = data->tx_buf_size; card->pcie.can_dump_fw = data->can_dump_fw; + card->pcie.mem_type_mapping_tbl = data->mem_type_mapping_tbl; + card->pcie.num_mem_types = data->num_mem_types; card->pcie.can_ext_scan = data->can_ext_scan; } @@ -323,6 +314,8 @@ static int mwifiex_read_reg(struct mwifiex_adapter *adapter, int reg, u32 *data) struct pcie_service_card *card = adapter->card; *data = ioread32(card->pci_mmap1 + reg); + if (*data == 0xffffffff) + return 0xffffffff; return 0; } @@ -2007,14 +2000,12 @@ done: /* * This function checks the firmware status in card. - * - * The winner interface is also determined by this function. */ static int mwifiex_check_fw_status(struct mwifiex_adapter *adapter, u32 poll_num) { int ret = 0; - u32 firmware_stat, winner_status; + u32 firmware_stat; struct pcie_service_card *card = adapter->card; const struct mwifiex_pcie_card_reg *reg = card->pcie.reg; u32 tries; @@ -2054,19 +2045,28 @@ mwifiex_check_fw_status(struct mwifiex_adapter *adapter, u32 poll_num) } } - if (ret) { - if (mwifiex_read_reg(adapter, reg->fw_status, - &winner_status)) - ret = -1; - else if (!winner_status) { - mwifiex_dbg(adapter, INFO, - "PCI-E is the winner\n"); - adapter->winner = 1; - } else { - mwifiex_dbg(adapter, ERROR, - "PCI-E is not the winner <%#x,%d>, exit dnld\n", - ret, adapter->winner); - } + return ret; +} + +/* This function checks if WLAN is the winner. + */ +static int +mwifiex_check_winner_status(struct mwifiex_adapter *adapter) +{ + u32 winner = 0; + int ret = 0; + struct pcie_service_card *card = adapter->card; + const struct mwifiex_pcie_card_reg *reg = card->pcie.reg; + + if (mwifiex_read_reg(adapter, reg->fw_status, &winner)) { + ret = -1; + } else if (!winner) { + mwifiex_dbg(adapter, INFO, "PCI-E is the winner\n"); + adapter->winner = 1; + } else { + mwifiex_dbg(adapter, ERROR, + "PCI-E is not the winner <%#x,%d>, exit dnld\n", + ret, adapter->winner); } return ret; @@ -2075,20 +2075,28 @@ mwifiex_check_fw_status(struct mwifiex_adapter *adapter, u32 poll_num) /* * This function reads the interrupt status from card. */ -static void mwifiex_interrupt_status(struct mwifiex_adapter *adapter) +static void mwifiex_interrupt_status(struct mwifiex_adapter *adapter, + int msg_id) { u32 pcie_ireg; unsigned long flags; + struct pcie_service_card *card = adapter->card; if (!mwifiex_pcie_ok_to_access_hw(adapter)) return; - if (mwifiex_read_reg(adapter, PCIE_HOST_INT_STATUS, &pcie_ireg)) { - mwifiex_dbg(adapter, ERROR, "Read register failed\n"); - return; - } + if (card->msix_enable && msg_id >= 0) { + pcie_ireg = BIT(msg_id); + } else { + if (mwifiex_read_reg(adapter, PCIE_HOST_INT_STATUS, + &pcie_ireg)) { + mwifiex_dbg(adapter, ERROR, "Read register failed\n"); + return; + } + + if ((pcie_ireg == 0xFFFFFFFF) || !pcie_ireg) + return; - if ((pcie_ireg != 0xFFFFFFFF) && (pcie_ireg)) { mwifiex_pcie_disable_host_int(adapter); @@ -2099,21 +2107,24 @@ static void mwifiex_interrupt_status(struct mwifiex_adapter *adapter) "Write register failed\n"); return; } - spin_lock_irqsave(&adapter->int_lock, flags); - adapter->int_status |= pcie_ireg; - spin_unlock_irqrestore(&adapter->int_lock, flags); - - if (!adapter->pps_uapsd_mode && - adapter->ps_state == PS_STATE_SLEEP && - mwifiex_pcie_ok_to_access_hw(adapter)) { - /* Potentially for PCIe we could get other - * interrupts like shared. Don't change power - * state until cookie is set */ - adapter->ps_state = PS_STATE_AWAKE; - adapter->pm_wakeup_fw_try = false; - del_timer(&adapter->wakeup_timer); - } } + + if (!adapter->pps_uapsd_mode && + adapter->ps_state == PS_STATE_SLEEP && + mwifiex_pcie_ok_to_access_hw(adapter)) { + /* Potentially for PCIe we could get other + * interrupts like shared. Don't change power + * state until cookie is set + */ + adapter->ps_state = PS_STATE_AWAKE; + adapter->pm_wakeup_fw_try = false; + del_timer(&adapter->wakeup_timer); + } + + spin_lock_irqsave(&adapter->int_lock, flags); + adapter->int_status |= pcie_ireg; + spin_unlock_irqrestore(&adapter->int_lock, flags); + mwifiex_dbg(adapter, INTR, "ireg: 0x%08x\n", pcie_ireg); } /* @@ -2124,7 +2135,8 @@ static void mwifiex_interrupt_status(struct mwifiex_adapter *adapter) */ static irqreturn_t mwifiex_pcie_interrupt(int irq, void *context) { - struct pci_dev *pdev = (struct pci_dev *)context; + struct mwifiex_msix_context *ctx = context; + struct pci_dev *pdev = ctx->dev; struct pcie_service_card *card; struct mwifiex_adapter *adapter; @@ -2144,7 +2156,11 @@ static irqreturn_t mwifiex_pcie_interrupt(int irq, void *context) if (adapter->surprise_removed) goto exit; - mwifiex_interrupt_status(adapter); + if (card->msix_enable) + mwifiex_interrupt_status(adapter, ctx->msg_id); + else + mwifiex_interrupt_status(adapter, -1); + mwifiex_queue_main_work(adapter); exit: @@ -2164,7 +2180,7 @@ exit: * In case of Rx packets received, the packets are uploaded from card to * host and processed accordingly. */ -static int mwifiex_process_int_status(struct mwifiex_adapter *adapter) +static int mwifiex_process_pcie_int(struct mwifiex_adapter *adapter) { int ret; u32 pcie_ireg; @@ -2244,6 +2260,69 @@ static int mwifiex_process_int_status(struct mwifiex_adapter *adapter) return 0; } +static int mwifiex_process_msix_int(struct mwifiex_adapter *adapter) +{ + int ret; + u32 pcie_ireg; + unsigned long flags; + + spin_lock_irqsave(&adapter->int_lock, flags); + /* Clear out unused interrupts */ + pcie_ireg = adapter->int_status; + adapter->int_status = 0; + spin_unlock_irqrestore(&adapter->int_lock, flags); + + if (pcie_ireg & HOST_INTR_DNLD_DONE) { + mwifiex_dbg(adapter, INTR, + "info: TX DNLD Done\n"); + ret = mwifiex_pcie_send_data_complete(adapter); + if (ret) + return ret; + } + if (pcie_ireg & HOST_INTR_UPLD_RDY) { + mwifiex_dbg(adapter, INTR, + "info: Rx DATA\n"); + ret = mwifiex_pcie_process_recv_data(adapter); + if (ret) + return ret; + } + if (pcie_ireg & HOST_INTR_EVENT_RDY) { + mwifiex_dbg(adapter, INTR, + "info: Rx EVENT\n"); + ret = mwifiex_pcie_process_event_ready(adapter); + if (ret) + return ret; + } + + if (pcie_ireg & HOST_INTR_CMD_DONE) { + if (adapter->cmd_sent) { + mwifiex_dbg(adapter, INTR, + "info: CMD sent Interrupt\n"); + adapter->cmd_sent = false; + } + /* Handle command response */ + ret = mwifiex_pcie_process_cmd_complete(adapter); + if (ret) + return ret; + } + + mwifiex_dbg(adapter, INTR, + "info: cmd_sent=%d data_sent=%d\n", + adapter->cmd_sent, adapter->data_sent); + + return 0; +} + +static int mwifiex_process_int_status(struct mwifiex_adapter *adapter) +{ + struct pcie_service_card *card = adapter->card; + + if (card->msix_enable) + return mwifiex_process_msix_int(adapter); + else + return mwifiex_process_pcie_int(adapter); +} + /* * This function downloads data from driver to card. * @@ -2278,10 +2357,15 @@ mwifiex_pcie_rdwr_firmware(struct mwifiex_adapter *adapter, u8 doneflag) { int ret, tries; u8 ctrl_data; + u32 fw_status; struct pcie_service_card *card = adapter->card; const struct mwifiex_pcie_card_reg *reg = card->pcie.reg; - ret = mwifiex_write_reg(adapter, reg->fw_dump_ctrl, FW_DUMP_HOST_READY); + if (mwifiex_read_reg(adapter, reg->fw_status, &fw_status)) + return RDWR_STATUS_FAILURE; + + ret = mwifiex_write_reg(adapter, reg->fw_dump_ctrl, + reg->fw_dump_host_ready); if (ret) { mwifiex_dbg(adapter, ERROR, "PCIE write err\n"); @@ -2294,11 +2378,11 @@ mwifiex_pcie_rdwr_firmware(struct mwifiex_adapter *adapter, u8 doneflag) return RDWR_STATUS_SUCCESS; if (doneflag && ctrl_data == doneflag) return RDWR_STATUS_DONE; - if (ctrl_data != FW_DUMP_HOST_READY) { + if (ctrl_data != reg->fw_dump_host_ready) { mwifiex_dbg(adapter, WARN, "The ctrl reg was changed, re-try again!\n"); ret = mwifiex_write_reg(adapter, reg->fw_dump_ctrl, - FW_DUMP_HOST_READY); + reg->fw_dump_host_ready); if (ret) { mwifiex_dbg(adapter, ERROR, "PCIE write err\n"); @@ -2318,7 +2402,8 @@ static void mwifiex_pcie_fw_dump(struct mwifiex_adapter *adapter) struct pcie_service_card *card = adapter->card; const struct mwifiex_pcie_card_reg *creg = card->pcie.reg; unsigned int reg, reg_start, reg_end; - u8 *dbg_ptr, *end_ptr, dump_num, idx, i, read_reg, doneflag = 0; + u8 *dbg_ptr, *end_ptr, *tmp_ptr, fw_dump_num, dump_num; + u8 idx, i, read_reg, doneflag = 0; enum rdwr_status stat; u32 memory_size; int ret; @@ -2326,8 +2411,9 @@ static void mwifiex_pcie_fw_dump(struct mwifiex_adapter *adapter) if (!card->pcie.can_dump_fw) return; - for (idx = 0; idx < ARRAY_SIZE(mem_type_mapping_tbl); idx++) { - struct memory_type_mapping *entry = &mem_type_mapping_tbl[idx]; + for (idx = 0; idx < adapter->num_mem_types; idx++) { + struct memory_type_mapping *entry = + &adapter->mem_type_mapping_tbl[idx]; if (entry->mem_ptr) { vfree(entry->mem_ptr); @@ -2336,7 +2422,7 @@ static void mwifiex_pcie_fw_dump(struct mwifiex_adapter *adapter) entry->mem_size = 0; } - mwifiex_dbg(adapter, DUMP, "== mwifiex firmware dump start ==\n"); + mwifiex_dbg(adapter, MSG, "== mwifiex firmware dump start ==\n"); /* Read the number of the memories which will dump */ stat = mwifiex_pcie_rdwr_firmware(adapter, doneflag); @@ -2344,28 +2430,38 @@ static void mwifiex_pcie_fw_dump(struct mwifiex_adapter *adapter) return; reg = creg->fw_dump_start; - mwifiex_read_reg_byte(adapter, reg, &dump_num); + mwifiex_read_reg_byte(adapter, reg, &fw_dump_num); + + /* W8997 chipset firmware dump will be restore in single region*/ + if (fw_dump_num == 0) + dump_num = 1; + else + dump_num = fw_dump_num; /* Read the length of every memory which will dump */ for (idx = 0; idx < dump_num; idx++) { - struct memory_type_mapping *entry = &mem_type_mapping_tbl[idx]; - - stat = mwifiex_pcie_rdwr_firmware(adapter, doneflag); - if (stat == RDWR_STATUS_FAILURE) - return; - + struct memory_type_mapping *entry = + &adapter->mem_type_mapping_tbl[idx]; memory_size = 0; - reg = creg->fw_dump_start; - for (i = 0; i < 4; i++) { - mwifiex_read_reg_byte(adapter, reg, &read_reg); - memory_size |= (read_reg << (i * 8)); + if (fw_dump_num != 0) { + stat = mwifiex_pcie_rdwr_firmware(adapter, doneflag); + if (stat == RDWR_STATUS_FAILURE) + return; + + reg = creg->fw_dump_start; + for (i = 0; i < 4; i++) { + mwifiex_read_reg_byte(adapter, reg, &read_reg); + memory_size |= (read_reg << (i * 8)); reg++; + } + } else { + memory_size = MWIFIEX_FW_DUMP_MAX_MEMSIZE; } if (memory_size == 0) { mwifiex_dbg(adapter, MSG, "Firmware dump Finished!\n"); ret = mwifiex_write_reg(adapter, creg->fw_dump_ctrl, - FW_DUMP_READ_DONE); + creg->fw_dump_read_done); if (ret) { mwifiex_dbg(adapter, ERROR, "PCIE write err\n"); return; @@ -2400,11 +2496,21 @@ static void mwifiex_pcie_fw_dump(struct mwifiex_adapter *adapter) mwifiex_read_reg_byte(adapter, reg, dbg_ptr); if (dbg_ptr < end_ptr) { dbg_ptr++; - } else { - mwifiex_dbg(adapter, ERROR, - "Allocated buf not enough\n"); - return; + continue; } + mwifiex_dbg(adapter, ERROR, + "pre-allocated buf not enough\n"); + tmp_ptr = + vzalloc(memory_size + MWIFIEX_SIZE_4K); + if (!tmp_ptr) + return; + memcpy(tmp_ptr, entry->mem_ptr, memory_size); + vfree(entry->mem_ptr); + entry->mem_ptr = tmp_ptr; + tmp_ptr = NULL; + dbg_ptr = entry->mem_ptr + memory_size; + memory_size += MWIFIEX_SIZE_4K; + end_ptr = entry->mem_ptr + memory_size; } if (stat != RDWR_STATUS_DONE) @@ -2416,7 +2522,7 @@ static void mwifiex_pcie_fw_dump(struct mwifiex_adapter *adapter) break; } while (true); } - mwifiex_dbg(adapter, DUMP, "== mwifiex firmware dump end ==\n"); + mwifiex_dbg(adapter, MSG, "== mwifiex firmware dump end ==\n"); } static void mwifiex_pcie_device_dump_work(struct mwifiex_adapter *adapter) @@ -2595,10 +2701,43 @@ static void mwifiex_pcie_cleanup(struct mwifiex_adapter *adapter) static int mwifiex_pcie_request_irq(struct mwifiex_adapter *adapter) { - int ret; + int ret, i, j; struct pcie_service_card *card = adapter->card; struct pci_dev *pdev = card->dev; + if (card->pcie.reg->msix_support) { + for (i = 0; i < MWIFIEX_NUM_MSIX_VECTORS; i++) + card->msix_entries[i].entry = i; + ret = pci_enable_msix_exact(pdev, card->msix_entries, + MWIFIEX_NUM_MSIX_VECTORS); + if (!ret) { + for (i = 0; i < MWIFIEX_NUM_MSIX_VECTORS; i++) { + card->msix_ctx[i].dev = pdev; + card->msix_ctx[i].msg_id = i; + + ret = request_irq(card->msix_entries[i].vector, + mwifiex_pcie_interrupt, 0, + "MWIFIEX_PCIE_MSIX", + &card->msix_ctx[i]); + if (ret) + break; + } + + if (ret) { + mwifiex_dbg(adapter, INFO, "request_irq fail: %d\n", + ret); + for (j = 0; j < i; j++) + free_irq(card->msix_entries[j].vector, + &card->msix_ctx[i]); + pci_disable_msix(pdev); + } else { + mwifiex_dbg(adapter, MSG, "MSIx enabled!"); + card->msix_enable = 1; + return 0; + } + } + } + if (pci_enable_msi(pdev) != 0) pci_disable_msi(pdev); else @@ -2606,8 +2745,10 @@ static int mwifiex_pcie_request_irq(struct mwifiex_adapter *adapter) mwifiex_dbg(adapter, INFO, "msi_enable = %d\n", card->msi_enable); + card->share_irq_ctx.dev = pdev; + card->share_irq_ctx.msg_id = -1; ret = request_irq(pdev->irq, mwifiex_pcie_interrupt, IRQF_SHARED, - "MRVL_PCIE", pdev); + "MRVL_PCIE", &card->share_irq_ctx); if (ret) { pr_err("request_irq failed: ret=%d\n", ret); adapter->card = NULL; @@ -2635,8 +2776,8 @@ static int mwifiex_register_dev(struct mwifiex_adapter *adapter) return -1; adapter->tx_buf_size = card->pcie.tx_buf_size; - adapter->mem_type_mapping_tbl = mem_type_mapping_tbl; - adapter->num_mem_types = ARRAY_SIZE(mem_type_mapping_tbl); + adapter->mem_type_mapping_tbl = card->pcie.mem_type_mapping_tbl; + adapter->num_mem_types = card->pcie.num_mem_types; strcpy(adapter->fw_name, card->pcie.firmware); adapter->ext_scan = card->pcie.can_ext_scan; @@ -2653,11 +2794,28 @@ static void mwifiex_unregister_dev(struct mwifiex_adapter *adapter) { struct pcie_service_card *card = adapter->card; const struct mwifiex_pcie_card_reg *reg; + struct pci_dev *pdev = card->dev; + int i; if (card) { - mwifiex_dbg(adapter, INFO, - "%s(): calling free_irq()\n", __func__); - free_irq(card->dev->irq, card->dev); + if (card->msix_enable) { + for (i = 0; i < MWIFIEX_NUM_MSIX_VECTORS; i++) + synchronize_irq(card->msix_entries[i].vector); + + for (i = 0; i < MWIFIEX_NUM_MSIX_VECTORS; i++) + free_irq(card->msix_entries[i].vector, + &card->msix_ctx[i]); + + card->msix_enable = 0; + pci_disable_msix(pdev); + } else { + mwifiex_dbg(adapter, INFO, + "%s(): calling free_irq()\n", __func__); + free_irq(card->dev->irq, &card->share_irq_ctx); + + if (card->msi_enable) + pci_disable_msi(pdev); + } reg = card->pcie.reg; if (reg->sleep_cookie) @@ -2675,6 +2833,7 @@ static struct mwifiex_if_ops pcie_ops = { .init_if = mwifiex_pcie_init, .cleanup_if = mwifiex_pcie_cleanup, .check_fw_status = mwifiex_check_fw_status, + .check_winner_status = mwifiex_check_winner_status, .prog_fw = mwifiex_prog_fw_w_helper, .register_dev = mwifiex_register_dev, .unregister_dev = mwifiex_unregister_dev, diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.h b/drivers/net/wireless/marvell/mwifiex/pcie.h index 6fc28737b576..29e58ce877e3 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.h +++ b/drivers/net/wireless/marvell/mwifiex/pcie.h @@ -26,6 +26,7 @@ #include <linux/pcieport_if.h> #include <linux/interrupt.h> +#include "decl.h" #include "main.h" #define PCIE8766_DEFAULT_FW_NAME "mrvl/pcie8766_uapsta.bin" @@ -135,6 +136,9 @@ struct mwifiex_pcie_card_reg { u16 fw_dump_ctrl; u16 fw_dump_start; u16 fw_dump_end; + u8 fw_dump_host_ready; + u8 fw_dump_read_done; + u8 msix_support; }; static const struct mwifiex_pcie_card_reg mwifiex_reg_8766 = { @@ -166,6 +170,7 @@ static const struct mwifiex_pcie_card_reg mwifiex_reg_8766 = { .ring_tx_start_ptr = 0, .pfu_enabled = 0, .sleep_cookie = 1, + .msix_support = 0, }; static const struct mwifiex_pcie_card_reg mwifiex_reg_8897 = { @@ -200,6 +205,9 @@ static const struct mwifiex_pcie_card_reg mwifiex_reg_8897 = { .fw_dump_ctrl = 0xcf4, .fw_dump_start = 0xcf8, .fw_dump_end = 0xcff, + .fw_dump_host_ready = 0xee, + .fw_dump_read_done = 0xfe, + .msix_support = 0, }; static const struct mwifiex_pcie_card_reg mwifiex_reg_8997 = { @@ -231,6 +239,27 @@ static const struct mwifiex_pcie_card_reg mwifiex_reg_8997 = { .ring_tx_start_ptr = MWIFIEX_BD_FLAG_TX_START_PTR, .pfu_enabled = 1, .sleep_cookie = 0, + .fw_dump_ctrl = 0xcf4, + .fw_dump_start = 0xcf8, + .fw_dump_end = 0xcff, + .fw_dump_host_ready = 0xcc, + .fw_dump_read_done = 0xdd, + .msix_support = 1, +}; + +static struct memory_type_mapping mem_type_mapping_tbl_w8897[] = { + {"ITCM", NULL, 0, 0xF0}, + {"DTCM", NULL, 0, 0xF1}, + {"SQRAM", NULL, 0, 0xF2}, + {"IRAM", NULL, 0, 0xF3}, + {"APU", NULL, 0, 0xF4}, + {"CIU", NULL, 0, 0xF5}, + {"ICU", NULL, 0, 0xF6}, + {"MAC", NULL, 0, 0xF7}, +}; + +static struct memory_type_mapping mem_type_mapping_tbl_w8997[] = { + {"DUMP", NULL, 0, 0xDD}, }; struct mwifiex_pcie_device { @@ -239,6 +268,8 @@ struct mwifiex_pcie_device { u16 blksz_fw_dl; u16 tx_buf_size; bool can_dump_fw; + struct memory_type_mapping *mem_type_mapping_tbl; + u8 num_mem_types; bool can_ext_scan; }; @@ -257,6 +288,8 @@ static const struct mwifiex_pcie_device mwifiex_pcie8897 = { .blksz_fw_dl = MWIFIEX_PCIE_BLOCK_SIZE_FW_DNLD, .tx_buf_size = MWIFIEX_TX_DATA_BUF_SIZE_4K, .can_dump_fw = true, + .mem_type_mapping_tbl = mem_type_mapping_tbl_w8897, + .num_mem_types = ARRAY_SIZE(mem_type_mapping_tbl_w8897), .can_ext_scan = true, }; @@ -265,7 +298,9 @@ static const struct mwifiex_pcie_device mwifiex_pcie8997 = { .reg = &mwifiex_reg_8997, .blksz_fw_dl = MWIFIEX_PCIE_BLOCK_SIZE_FW_DNLD, .tx_buf_size = MWIFIEX_TX_DATA_BUF_SIZE_4K, - .can_dump_fw = false, + .can_dump_fw = true, + .mem_type_mapping_tbl = mem_type_mapping_tbl_w8997, + .num_mem_types = ARRAY_SIZE(mem_type_mapping_tbl_w8997), .can_ext_scan = true, }; @@ -290,6 +325,13 @@ struct mwifiex_pfu_buf_desc { u32 reserved; } __packed; +#define MWIFIEX_NUM_MSIX_VECTORS 4 + +struct mwifiex_msix_context { + struct pci_dev *dev; + u16 msg_id; +}; + struct pcie_service_card { struct pci_dev *dev; struct mwifiex_adapter *adapter; @@ -327,6 +369,12 @@ struct pcie_service_card { void __iomem *pci_mmap; void __iomem *pci_mmap1; int msi_enable; + int msix_enable; +#ifdef CONFIG_PCI + struct msix_entry msix_entries[MWIFIEX_NUM_MSIX_VECTORS]; +#endif + struct mwifiex_msix_context msix_ctx[MWIFIEX_NUM_MSIX_VECTORS]; + struct mwifiex_msix_context share_irq_ctx; }; static inline int diff --git a/drivers/net/wireless/marvell/mwifiex/scan.c b/drivers/net/wireless/marvell/mwifiex/scan.c index c20017ced566..489f7a911a83 100644 --- a/drivers/net/wireless/marvell/mwifiex/scan.c +++ b/drivers/net/wireless/marvell/mwifiex/scan.c @@ -547,6 +547,61 @@ mwifiex_scan_create_channel_list(struct mwifiex_private *priv, return chan_idx; } +/* This function creates a channel list tlv for bgscan config, based + * on region/band information. + */ +static int +mwifiex_bgscan_create_channel_list(struct mwifiex_private *priv, + const struct mwifiex_bg_scan_cfg + *bgscan_cfg_in, + struct mwifiex_chan_scan_param_set + *scan_chan_list) +{ + enum ieee80211_band band; + struct ieee80211_supported_band *sband; + struct ieee80211_channel *ch; + struct mwifiex_adapter *adapter = priv->adapter; + int chan_idx = 0, i; + + for (band = 0; (band < IEEE80211_NUM_BANDS); band++) { + if (!priv->wdev.wiphy->bands[band]) + continue; + + sband = priv->wdev.wiphy->bands[band]; + + for (i = 0; (i < sband->n_channels) ; i++) { + ch = &sband->channels[i]; + if (ch->flags & IEEE80211_CHAN_DISABLED) + continue; + scan_chan_list[chan_idx].radio_type = band; + + if (bgscan_cfg_in->chan_list[0].scan_time) + scan_chan_list[chan_idx].max_scan_time = + cpu_to_le16((u16)bgscan_cfg_in-> + chan_list[0].scan_time); + else if (ch->flags & IEEE80211_CHAN_NO_IR) + scan_chan_list[chan_idx].max_scan_time = + cpu_to_le16(adapter->passive_scan_time); + else + scan_chan_list[chan_idx].max_scan_time = + cpu_to_le16(adapter-> + specific_scan_time); + + if (ch->flags & IEEE80211_CHAN_NO_IR) + scan_chan_list[chan_idx].chan_scan_mode_bitmap + |= MWIFIEX_PASSIVE_SCAN; + else + scan_chan_list[chan_idx].chan_scan_mode_bitmap + &= ~MWIFIEX_PASSIVE_SCAN; + + scan_chan_list[chan_idx].chan_number = + (u32)ch->hw_value; + chan_idx++; + } + } + return chan_idx; +} + /* This function appends rate TLV to scan config command. */ static int mwifiex_append_rate_tlv(struct mwifiex_private *priv, @@ -2037,6 +2092,8 @@ int mwifiex_ret_802_11_scan(struct mwifiex_private *priv, u8 is_bgscan_resp; __le64 fw_tsf = 0; u8 *radio_type; + struct cfg80211_wowlan_nd_match *pmatch; + struct cfg80211_sched_scan_request *nd_config = NULL; is_bgscan_resp = (le16_to_cpu(resp->command) == HostCmd_CMD_802_11_BG_SCAN_QUERY); @@ -2099,6 +2156,21 @@ int mwifiex_ret_802_11_scan(struct mwifiex_private *priv, (struct mwifiex_ie_types_data **) &chan_band_tlv); +#ifdef CONFIG_PM + if (priv->wdev.wiphy->wowlan_config) + nd_config = priv->wdev.wiphy->wowlan_config->nd_config; +#endif + + if (nd_config) { + adapter->nd_info = + kzalloc(sizeof(struct cfg80211_wowlan_nd_match) + + sizeof(struct cfg80211_wowlan_nd_match *) * + scan_rsp->number_of_sets, GFP_ATOMIC); + + if (adapter->nd_info) + adapter->nd_info->n_matches = scan_rsp->number_of_sets; + } + for (idx = 0; idx < scan_rsp->number_of_sets && bytes_left; idx++) { /* * If the TSF TLV was appended to the scan results, save this @@ -2117,6 +2189,23 @@ int mwifiex_ret_802_11_scan(struct mwifiex_private *priv, radio_type = NULL; } + if (chan_band_tlv && adapter->nd_info) { + adapter->nd_info->matches[idx] = + kzalloc(sizeof(*pmatch) + + sizeof(u32), GFP_ATOMIC); + + pmatch = adapter->nd_info->matches[idx]; + + if (pmatch) { + memset(pmatch, 0, sizeof(*pmatch)); + if (chan_band_tlv) { + pmatch->n_channels = 1; + pmatch->channels[0] = + chan_band->chan_number; + } + } + } + ret = mwifiex_parse_single_response_buf(priv, &bss_info, &bytes_left, le64_to_cpu(fw_tsf), @@ -2155,6 +2244,227 @@ int mwifiex_cmd_802_11_scan_ext(struct mwifiex_private *priv, return 0; } +/* This function prepares an background scan config command to be sent + * to the firmware + */ +int mwifiex_cmd_802_11_bg_scan_config(struct mwifiex_private *priv, + struct host_cmd_ds_command *cmd, + void *data_buf) +{ + struct host_cmd_ds_802_11_bg_scan_config *bgscan_config = + &cmd->params.bg_scan_config; + struct mwifiex_bg_scan_cfg *bgscan_cfg_in = data_buf; + u8 *tlv_pos = bgscan_config->tlv; + u8 num_probes; + u32 ssid_len, chan_idx, scan_type, scan_dur, chan_num; + int i; + struct mwifiex_ie_types_num_probes *num_probes_tlv; + struct mwifiex_ie_types_repeat_count *repeat_count_tlv; + struct mwifiex_ie_types_min_rssi_threshold *rssi_threshold_tlv; + struct mwifiex_ie_types_bgscan_start_later *start_later_tlv; + struct mwifiex_ie_types_wildcard_ssid_params *wildcard_ssid_tlv; + struct mwifiex_ie_types_chan_list_param_set *chan_list_tlv; + struct mwifiex_chan_scan_param_set *temp_chan; + + cmd->command = cpu_to_le16(HostCmd_CMD_802_11_BG_SCAN_CONFIG); + cmd->size = cpu_to_le16(sizeof(*bgscan_config) + S_DS_GEN); + + bgscan_config->action = cpu_to_le16(bgscan_cfg_in->action); + bgscan_config->enable = bgscan_cfg_in->enable; + bgscan_config->bss_type = bgscan_cfg_in->bss_type; + bgscan_config->scan_interval = + cpu_to_le32(bgscan_cfg_in->scan_interval); + bgscan_config->report_condition = + cpu_to_le32(bgscan_cfg_in->report_condition); + + /* stop sched scan */ + if (!bgscan_config->enable) + return 0; + + bgscan_config->chan_per_scan = bgscan_cfg_in->chan_per_scan; + + num_probes = (bgscan_cfg_in->num_probes ? bgscan_cfg_in-> + num_probes : priv->adapter->scan_probes); + + if (num_probes) { + num_probes_tlv = (struct mwifiex_ie_types_num_probes *)tlv_pos; + num_probes_tlv->header.type = cpu_to_le16(TLV_TYPE_NUMPROBES); + num_probes_tlv->header.len = + cpu_to_le16(sizeof(num_probes_tlv->num_probes)); + num_probes_tlv->num_probes = cpu_to_le16((u16)num_probes); + + tlv_pos += sizeof(num_probes_tlv->header) + + le16_to_cpu(num_probes_tlv->header.len); + } + + if (bgscan_cfg_in->repeat_count) { + repeat_count_tlv = + (struct mwifiex_ie_types_repeat_count *)tlv_pos; + repeat_count_tlv->header.type = + cpu_to_le16(TLV_TYPE_REPEAT_COUNT); + repeat_count_tlv->header.len = + cpu_to_le16(sizeof(repeat_count_tlv->repeat_count)); + repeat_count_tlv->repeat_count = + cpu_to_le16(bgscan_cfg_in->repeat_count); + + tlv_pos += sizeof(repeat_count_tlv->header) + + le16_to_cpu(repeat_count_tlv->header.len); + } + + if (bgscan_cfg_in->rssi_threshold) { + rssi_threshold_tlv = + (struct mwifiex_ie_types_min_rssi_threshold *)tlv_pos; + rssi_threshold_tlv->header.type = + cpu_to_le16(TLV_TYPE_RSSI_LOW); + rssi_threshold_tlv->header.len = + cpu_to_le16(sizeof(rssi_threshold_tlv->rssi_threshold)); + rssi_threshold_tlv->rssi_threshold = + cpu_to_le16(bgscan_cfg_in->rssi_threshold); + + tlv_pos += sizeof(rssi_threshold_tlv->header) + + le16_to_cpu(rssi_threshold_tlv->header.len); + } + + for (i = 0; i < bgscan_cfg_in->num_ssids; i++) { + ssid_len = bgscan_cfg_in->ssid_list[i].ssid.ssid_len; + + wildcard_ssid_tlv = + (struct mwifiex_ie_types_wildcard_ssid_params *)tlv_pos; + wildcard_ssid_tlv->header.type = + cpu_to_le16(TLV_TYPE_WILDCARDSSID); + wildcard_ssid_tlv->header.len = cpu_to_le16( + (u16)(ssid_len + sizeof(wildcard_ssid_tlv-> + max_ssid_length))); + + /* max_ssid_length = 0 tells firmware to perform + * specific scan for the SSID filled, whereas + * max_ssid_length = IEEE80211_MAX_SSID_LEN is for + * wildcard scan. + */ + if (ssid_len) + wildcard_ssid_tlv->max_ssid_length = 0; + else + wildcard_ssid_tlv->max_ssid_length = + IEEE80211_MAX_SSID_LEN; + + memcpy(wildcard_ssid_tlv->ssid, + bgscan_cfg_in->ssid_list[i].ssid.ssid, ssid_len); + + tlv_pos += (sizeof(wildcard_ssid_tlv->header) + + le16_to_cpu(wildcard_ssid_tlv->header.len)); + } + + chan_list_tlv = (struct mwifiex_ie_types_chan_list_param_set *)tlv_pos; + + if (bgscan_cfg_in->chan_list[0].chan_number) { + dev_dbg(priv->adapter->dev, "info: bgscan: Using supplied channel list\n"); + + chan_list_tlv->header.type = cpu_to_le16(TLV_TYPE_CHANLIST); + + for (chan_idx = 0; + chan_idx < MWIFIEX_BG_SCAN_CHAN_MAX && + bgscan_cfg_in->chan_list[chan_idx].chan_number; + chan_idx++) { + temp_chan = chan_list_tlv->chan_scan_param + chan_idx; + + /* Increment the TLV header length by size appended */ + le16_add_cpu(&chan_list_tlv->header.len, + sizeof(chan_list_tlv->chan_scan_param)); + + temp_chan->chan_number = + bgscan_cfg_in->chan_list[chan_idx].chan_number; + temp_chan->radio_type = + bgscan_cfg_in->chan_list[chan_idx].radio_type; + + scan_type = + bgscan_cfg_in->chan_list[chan_idx].scan_type; + + if (scan_type == MWIFIEX_SCAN_TYPE_PASSIVE) + temp_chan->chan_scan_mode_bitmap + |= MWIFIEX_PASSIVE_SCAN; + else + temp_chan->chan_scan_mode_bitmap + &= ~MWIFIEX_PASSIVE_SCAN; + + if (bgscan_cfg_in->chan_list[chan_idx].scan_time) { + scan_dur = (u16)bgscan_cfg_in-> + chan_list[chan_idx].scan_time; + } else { + scan_dur = (scan_type == + MWIFIEX_SCAN_TYPE_PASSIVE) ? + priv->adapter->passive_scan_time : + priv->adapter->specific_scan_time; + } + + temp_chan->min_scan_time = cpu_to_le16(scan_dur); + temp_chan->max_scan_time = cpu_to_le16(scan_dur); + } + } else { + dev_dbg(priv->adapter->dev, + "info: bgscan: Creating full region channel list\n"); + chan_num = + mwifiex_bgscan_create_channel_list(priv, bgscan_cfg_in, + chan_list_tlv-> + chan_scan_param); + le16_add_cpu(&chan_list_tlv->header.len, + chan_num * + sizeof(chan_list_tlv->chan_scan_param[0])); + } + + tlv_pos += (sizeof(chan_list_tlv->header) + + le16_to_cpu(chan_list_tlv->header.len)); + + if (bgscan_cfg_in->start_later) { + start_later_tlv = + (struct mwifiex_ie_types_bgscan_start_later *)tlv_pos; + start_later_tlv->header.type = + cpu_to_le16(TLV_TYPE_BGSCAN_START_LATER); + start_later_tlv->header.len = + cpu_to_le16(sizeof(start_later_tlv->start_later)); + start_later_tlv->start_later = + cpu_to_le16(bgscan_cfg_in->start_later); + + tlv_pos += sizeof(start_later_tlv->header) + + le16_to_cpu(start_later_tlv->header.len); + } + + /* Append vendor specific IE TLV */ + mwifiex_cmd_append_vsie_tlv(priv, MWIFIEX_VSIE_MASK_BGSCAN, &tlv_pos); + + le16_add_cpu(&cmd->size, tlv_pos - bgscan_config->tlv); + + return 0; +} + +int mwifiex_stop_bg_scan(struct mwifiex_private *priv) +{ + struct mwifiex_bg_scan_cfg *bgscan_cfg; + + if (!priv->sched_scanning) { + dev_dbg(priv->adapter->dev, "bgscan already stopped!\n"); + return 0; + } + + bgscan_cfg = kzalloc(sizeof(*bgscan_cfg), GFP_KERNEL); + if (!bgscan_cfg) + return -ENOMEM; + + bgscan_cfg->bss_type = MWIFIEX_BSS_MODE_INFRA; + bgscan_cfg->action = MWIFIEX_BGSCAN_ACT_SET; + bgscan_cfg->enable = false; + + if (mwifiex_send_cmd(priv, HostCmd_CMD_802_11_BG_SCAN_CONFIG, + HostCmd_ACT_GEN_SET, 0, bgscan_cfg, true)) { + kfree(bgscan_cfg); + return -EFAULT; + } + + kfree(bgscan_cfg); + priv->sched_scanning = false; + + return 0; +} + static void mwifiex_update_chan_statistics(struct mwifiex_private *priv, struct mwifiex_ietypes_chanstats *tlv_stat) diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c index 4c8cae682c89..abf15dbdfe08 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.c +++ b/drivers/net/wireless/marvell/mwifiex/sdio.c @@ -181,7 +181,7 @@ static int mwifiex_sdio_resume(struct device *dev) /* Disable Host Sleep */ mwifiex_cancel_hs(mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_STA), - MWIFIEX_ASYNC_CMD); + MWIFIEX_SYNC_CMD); return 0; } @@ -1039,19 +1039,14 @@ done: /* * This function checks the firmware status in card. - * - * The winner interface is also determined by this function. */ static int mwifiex_check_fw_status(struct mwifiex_adapter *adapter, u32 poll_num) { - struct sdio_mmc_card *card = adapter->card; int ret = 0; u16 firmware_stat; u32 tries; - u8 winner_status; - /* Wait for firmware initialization event */ for (tries = 0; tries < poll_num; tries++) { ret = mwifiex_sdio_read_fw_status(adapter, &firmware_stat); if (ret) @@ -1065,16 +1060,25 @@ static int mwifiex_check_fw_status(struct mwifiex_adapter *adapter, } } - if (ret) { - if (mwifiex_read_reg - (adapter, card->reg->status_reg_0, &winner_status)) - winner_status = 0; + return ret; +} + +/* This function checks if WLAN is the winner. + */ +static int mwifiex_check_winner_status(struct mwifiex_adapter *adapter) +{ + int ret = 0; + u8 winner = 0; + struct sdio_mmc_card *card = adapter->card; + + if (mwifiex_read_reg(adapter, card->reg->status_reg_0, &winner)) + return -1; + + if (winner) + adapter->winner = 0; + else + adapter->winner = 1; - if (winner_status) - adapter->winner = 0; - else - adapter->winner = 1; - } return ret; } @@ -2620,6 +2624,7 @@ static struct mwifiex_if_ops sdio_ops = { .init_if = mwifiex_init_sdio, .cleanup_if = mwifiex_cleanup_sdio, .check_fw_status = mwifiex_check_fw_status, + .check_winner_status = mwifiex_check_winner_status, .prog_fw = mwifiex_prog_fw_w_helper, .register_dev = mwifiex_register_dev, .unregister_dev = mwifiex_unregister_dev, diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c index e486867a4c67..30f152601c57 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c @@ -1813,6 +1813,22 @@ static int mwifiex_cmd_sdio_rx_aggr_cfg(struct host_cmd_ds_command *cmd, return 0; } +/* This function prepares command to get HS wakeup reason. + * + * Preparation includes - + * - Setting command ID, action and proper size + * - Ensuring correct endian-ness + */ +static int mwifiex_cmd_get_wakeup_reason(struct mwifiex_private *priv, + struct host_cmd_ds_command *cmd) +{ + cmd->command = cpu_to_le16(HostCmd_CMD_HS_WAKEUP_REASON); + cmd->size = cpu_to_le16(sizeof(struct host_cmd_ds_wakeup_reason) + + S_DS_GEN); + + return 0; +} + /* * This function prepares the commands before sending them to the firmware. * @@ -1873,6 +1889,10 @@ int mwifiex_sta_prepare_cmd(struct mwifiex_private *priv, uint16_t cmd_no, case HostCmd_CMD_802_11_SCAN: ret = mwifiex_cmd_802_11_scan(cmd_ptr, data_buf); break; + case HostCmd_CMD_802_11_BG_SCAN_CONFIG: + ret = mwifiex_cmd_802_11_bg_scan_config(priv, cmd_ptr, + data_buf); + break; case HostCmd_CMD_802_11_BG_SCAN_QUERY: ret = mwifiex_cmd_802_11_bg_scan_query(cmd_ptr); break; @@ -2063,6 +2083,9 @@ int mwifiex_sta_prepare_cmd(struct mwifiex_private *priv, uint16_t cmd_no, ret = mwifiex_cmd_sdio_rx_aggr_cfg(cmd_ptr, cmd_action, data_buf); break; + case HostCmd_CMD_HS_WAKEUP_REASON: + ret = mwifiex_cmd_get_wakeup_reason(priv, cmd_ptr); + break; case HostCmd_CMD_MC_POLICY: ret = mwifiex_cmd_set_mc_policy(priv, cmd_ptr, cmd_action, data_buf); diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c index 9ac7aa2431b4..d96523e10eb4 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c @@ -1076,9 +1076,12 @@ int mwifiex_process_sta_cmdresp(struct mwifiex_private *priv, u16 cmdresp_no, break; case HostCmd_CMD_802_11_BG_SCAN_QUERY: ret = mwifiex_ret_802_11_scan(priv, resp); + cfg80211_sched_scan_results(priv->wdev.wiphy); mwifiex_dbg(adapter, CMD, "info: CMD_RESP: BG_SCAN result is ready!\n"); break; + case HostCmd_CMD_802_11_BG_SCAN_CONFIG: + break; case HostCmd_CMD_TXPWR_CFG: ret = mwifiex_ret_tx_power_cfg(priv, resp); break; @@ -1233,6 +1236,9 @@ int mwifiex_process_sta_cmdresp(struct mwifiex_private *priv, u16 cmdresp_no, case HostCmd_CMD_SDIO_SP_RX_AGGR_CFG: ret = mwifiex_ret_sdio_rx_aggr_cfg(priv, resp); break; + case HostCmd_CMD_HS_WAKEUP_REASON: + ret = mwifiex_ret_wakeup_reason(priv, resp, data_buf); + break; case HostCmd_CMD_TDLS_CONFIG: break; case HostCmd_CMD_ROBUST_COEX: diff --git a/drivers/net/wireless/marvell/mwifiex/sta_event.c b/drivers/net/wireless/marvell/mwifiex/sta_event.c index ff3ee9dfbbd5..070bce401151 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_event.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_event.c @@ -92,6 +92,9 @@ mwifiex_reset_connect_state(struct mwifiex_private *priv, u16 reason_code) priv->is_data_rate_auto = true; priv->data_rate = 0; + priv->assoc_resp_ht_param = 0; + priv->ht_param_present = false; + if ((GET_BSS_ROLE(priv) == MWIFIEX_BSS_ROLE_STA || GET_BSS_ROLE(priv) == MWIFIEX_BSS_ROLE_UAP) && priv->hist_data) mwifiex_hist_data_reset(priv); @@ -607,11 +610,13 @@ int mwifiex_process_sta_event(struct mwifiex_private *priv) case EVENT_PS_AWAKE: mwifiex_dbg(adapter, EVENT, "info: EVENT: AWAKE\n"); - if (!adapter->pps_uapsd_mode && priv->port_open && + if (!adapter->pps_uapsd_mode && + (priv->port_open || + (priv->bss_mode == NL80211_IFTYPE_ADHOC)) && priv->media_connected && adapter->sleep_period.period) { - adapter->pps_uapsd_mode = true; - mwifiex_dbg(adapter, EVENT, - "event: PPS/UAPSD mode activated\n"); + adapter->pps_uapsd_mode = true; + mwifiex_dbg(adapter, EVENT, + "event: PPS/UAPSD mode activated\n"); } adapter->tx_lock_flag = false; if (adapter->pps_uapsd_mode && adapter->gen_null_pkt) { @@ -686,6 +691,13 @@ int mwifiex_process_sta_event(struct mwifiex_private *priv) HostCmd_ACT_GEN_GET, 0, NULL, false); break; + case EVENT_BG_SCAN_STOPPED: + dev_dbg(adapter->dev, "event: BGS_STOPPED\n"); + cfg80211_sched_scan_stopped(priv->wdev.wiphy); + if (priv->sched_scanning) + priv->sched_scanning = false; + break; + case EVENT_PORT_RELEASE: mwifiex_dbg(adapter, EVENT, "event: PORT RELEASE\n"); priv->port_open = true; diff --git a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c index 6a4fc5d183cf..5cbee58f8781 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c @@ -504,6 +504,20 @@ int mwifiex_enable_hs(struct mwifiex_adapter *adapter) } } + priv = mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_STA); + + if (priv && priv->sched_scanning) { +#ifdef CONFIG_PM + if (!priv->wdev.wiphy->wowlan_config->nd_config) { +#endif + mwifiex_dbg(adapter, CMD, "aborting bgscan!\n"); + mwifiex_stop_bg_scan(priv); + cfg80211_sched_scan_stopped(priv->wdev.wiphy); +#ifdef CONFIG_PM + } +#endif + } + if (adapter->hs_activated) { mwifiex_dbg(adapter, CMD, "cmd: HS Already activated\n"); @@ -1114,11 +1128,12 @@ int mwifiex_set_encode(struct mwifiex_private *priv, struct key_params *kp, * with requisite parameters and calls the IOCTL handler. */ int -mwifiex_get_ver_ext(struct mwifiex_private *priv) +mwifiex_get_ver_ext(struct mwifiex_private *priv, u32 version_str_sel) { struct mwifiex_ver_ext ver_ext; memset(&ver_ext, 0, sizeof(struct host_cmd_ds_version_ext)); + ver_ext.version_str_sel = version_str_sel; if (mwifiex_send_cmd(priv, HostCmd_CMD_VERSION_EXT, HostCmd_ACT_GEN_GET, 0, &ver_ext, true)) return -1; @@ -1450,3 +1465,19 @@ mwifiex_set_gen_ie(struct mwifiex_private *priv, const u8 *ie, int ie_len) return 0; } + +/* This function get Host Sleep wake up reason. + * + */ +int mwifiex_get_wakeup_reason(struct mwifiex_private *priv, u16 action, + int cmd_type, + struct mwifiex_ds_wakeup_reason *wakeup_reason) +{ + int status = 0; + + status = mwifiex_send_cmd(priv, HostCmd_CMD_HS_WAKEUP_REASON, + HostCmd_ACT_GEN_GET, 0, wakeup_reason, + cmd_type == MWIFIEX_SYNC_CMD); + + return status; +} diff --git a/drivers/net/wireless/marvell/mwifiex/wmm.c b/drivers/net/wireless/marvell/mwifiex/wmm.c index acccd6734e3b..0eb246502e1d 100644 --- a/drivers/net/wireless/marvell/mwifiex/wmm.c +++ b/drivers/net/wireless/marvell/mwifiex/wmm.c @@ -438,6 +438,7 @@ mwifiex_wmm_init(struct mwifiex_adapter *adapter) mwifiex_set_ba_params(priv); mwifiex_reset_11n_rx_seq_num(priv); + priv->wmm.drv_pkt_delay_max = MWIFIEX_WMM_DRV_DELAY_MAX; atomic_set(&priv->wmm.tx_pkts_queued, 0); atomic_set(&priv->wmm.highest_queued_prio, HIGH_PRIO_TID); } @@ -475,7 +476,8 @@ mwifiex_wmm_lists_empty(struct mwifiex_adapter *adapter) priv = adapter->priv[i]; if (!priv) continue; - if (!priv->port_open) + if (!priv->port_open && + (priv->bss_mode != NL80211_IFTYPE_ADHOC)) continue; if (adapter->if_ops.is_port_ready && !adapter->if_ops.is_port_ready(priv)) @@ -1099,7 +1101,8 @@ mwifiex_wmm_get_highest_priolist_ptr(struct mwifiex_adapter *adapter, priv_tmp = adapter->bss_prio_tbl[j].bss_prio_cur->priv; - if (!priv_tmp->port_open || + if (((priv_tmp->bss_mode != NL80211_IFTYPE_ADHOC) && + !priv_tmp->port_open) || (atomic_read(&priv_tmp->wmm.tx_pkts_queued) == 0)) continue; diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00.h b/drivers/net/wireless/ralink/rt2x00/rt2x00.h index 26427140a963..6418620f95ff 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00.h +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00.h @@ -107,7 +107,7 @@ * amount of bytes needed to move the data. */ #define ALIGN_SIZE(__skb, __header) \ - ( ((unsigned long)((__skb)->data + (__header))) & 3 ) + (((unsigned long)((__skb)->data + (__header))) & 3) /* * Constants for extra TX headroom for alignment purposes. @@ -128,14 +128,14 @@ #define SLOT_TIME 20 #define SHORT_SLOT_TIME 9 #define SIFS 10 -#define PIFS ( SIFS + SLOT_TIME ) -#define SHORT_PIFS ( SIFS + SHORT_SLOT_TIME ) -#define DIFS ( PIFS + SLOT_TIME ) -#define SHORT_DIFS ( SHORT_PIFS + SHORT_SLOT_TIME ) -#define EIFS ( SIFS + DIFS + \ - GET_DURATION(IEEE80211_HEADER + ACK_SIZE, 10) ) -#define SHORT_EIFS ( SIFS + SHORT_DIFS + \ - GET_DURATION(IEEE80211_HEADER + ACK_SIZE, 10) ) +#define PIFS (SIFS + SLOT_TIME) +#define SHORT_PIFS (SIFS + SHORT_SLOT_TIME) +#define DIFS (PIFS + SLOT_TIME) +#define SHORT_DIFS (SHORT_PIFS + SHORT_SLOT_TIME) +#define EIFS (SIFS + DIFS + \ + GET_DURATION(IEEE80211_HEADER + ACK_SIZE, 10)) +#define SHORT_EIFS (SIFS + SHORT_DIFS + \ + GET_DURATION(IEEE80211_HEADER + ACK_SIZE, 10)) enum rt2x00_chip_intf { RT2X00_CHIP_INTF_PCI, diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c b/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c index 90fdb02b55e7..25ee3cb8e982 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c @@ -629,7 +629,7 @@ static struct dentry *rt2x00debug_create_file_chipset(const char *name, data += sprintf(data, "register\tbase\twords\twordsize\n"); #define RT2X00DEBUGFS_SPRINTF_REGISTER(__name) \ { \ - if(debug->__name.read) \ + if (debug->__name.read) \ data += sprintf(data, __stringify(__name) \ "\t%d\t%d\t%d\n", \ debug->__name.word_base, \ @@ -699,7 +699,7 @@ void rt2x00debug_register(struct rt2x00_dev *rt2x00dev) #define RT2X00DEBUGFS_CREATE_REGISTER_ENTRY(__intf, __name) \ ({ \ - if(debug->__name.read) { \ + if (debug->__name.read) { \ (__intf)->__name##_off_entry = \ debugfs_create_u32(__stringify(__name) "_offset", \ S_IRUSR | S_IWUSR, \ diff --git a/drivers/net/wireless/ralink/rt2x00/rt61pci.h b/drivers/net/wireless/ralink/rt2x00/rt61pci.h index 1442075a8382..ab8641547a1f 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt61pci.h +++ b/drivers/net/wireless/ralink/rt2x00/rt61pci.h @@ -138,14 +138,14 @@ #define PAIRWISE_TA_TABLE_BASE 0x1a00 #define SHARED_KEY_ENTRY(__idx) \ - ( SHARED_KEY_TABLE_BASE + \ - ((__idx) * sizeof(struct hw_key_entry)) ) + (SHARED_KEY_TABLE_BASE + \ + ((__idx) * sizeof(struct hw_key_entry))) #define PAIRWISE_KEY_ENTRY(__idx) \ - ( PAIRWISE_KEY_TABLE_BASE + \ - ((__idx) * sizeof(struct hw_key_entry)) ) + (PAIRWISE_KEY_TABLE_BASE + \ + ((__idx) * sizeof(struct hw_key_entry))) #define PAIRWISE_TA_ENTRY(__idx) \ - ( PAIRWISE_TA_TABLE_BASE + \ - ((__idx) * sizeof(struct hw_pairwise_ta_entry)) ) + (PAIRWISE_TA_TABLE_BASE + \ + ((__idx) * sizeof(struct hw_pairwise_ta_entry))) struct hw_key_entry { u8 key[16]; @@ -180,7 +180,7 @@ struct hw_pairwise_ta_entry { #define HW_BEACON_BASE3 0x2f00 #define HW_BEACON_OFFSET(__index) \ - ( HW_BEACON_BASE0 + (__index * 0x0100) ) + (HW_BEACON_BASE0 + (__index * 0x0100)) /* * HOST-MCU shared memory. @@ -1287,9 +1287,9 @@ struct hw_pairwise_ta_entry { /* * DMA descriptor defines. */ -#define TXD_DESC_SIZE ( 16 * sizeof(__le32) ) -#define TXINFO_SIZE ( 6 * sizeof(__le32) ) -#define RXD_DESC_SIZE ( 16 * sizeof(__le32) ) +#define TXD_DESC_SIZE (16 * sizeof(__le32)) +#define TXINFO_SIZE (6 * sizeof(__le32)) +#define RXD_DESC_SIZE (16 * sizeof(__le32)) /* * TX descriptor format for TX, PRIO and Beacon Ring. diff --git a/drivers/net/wireless/ti/wlcore/Kconfig b/drivers/net/wireless/ti/wlcore/Kconfig index 969c9d79bfc8..8a8f1e711384 100644 --- a/drivers/net/wireless/ti/wlcore/Kconfig +++ b/drivers/net/wireless/ti/wlcore/Kconfig @@ -13,7 +13,7 @@ config WLCORE config WLCORE_SPI tristate "TI wlcore SPI support" - depends on WLCORE && SPI_MASTER + depends on WLCORE && SPI_MASTER && OF select CRC7 ---help--- This module adds support for the SPI interface of adapters using diff --git a/drivers/net/wireless/ti/wlcore/event.c b/drivers/net/wireless/ti/wlcore/event.c index c96405498bf4..4b59f67724de 100644 --- a/drivers/net/wireless/ti/wlcore/event.c +++ b/drivers/net/wireless/ti/wlcore/event.c @@ -38,7 +38,7 @@ int wlcore_event_fw_logger(struct wl1271 *wl) { - u32 ret; + int ret; struct fw_logger_information fw_log; u8 *buffer; u32 internal_fw_addrbase = WL18XX_DATA_RAM_BASE_ADDRESS; diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c index 44f059f7f34e..020ac1a4b408 100644 --- a/drivers/net/wireless/ti/wlcore/spi.c +++ b/drivers/net/wireless/ti/wlcore/spi.c @@ -30,6 +30,8 @@ #include <linux/spi/spi.h> #include <linux/wl12xx.h> #include <linux/platform_device.h> +#include <linux/of_irq.h> +#include <linux/regulator/consumer.h> #include "wlcore.h" #include "wl12xx_80211.h" @@ -81,6 +83,7 @@ struct wl12xx_spi_glue { struct device *dev; struct platform_device *core; + struct regulator *reg; /* Power regulator */ }; static void wl12xx_spi_reset(struct device *child) @@ -318,14 +321,76 @@ static int __must_check wl12xx_spi_raw_write(struct device *child, int addr, return 0; } +/** + * wl12xx_spi_set_power - power on/off the wl12xx unit + * @child: wl12xx device handle. + * @enable: true/false to power on/off the unit. + * + * use the WiFi enable regulator to enable/disable the WiFi unit. + */ +static int wl12xx_spi_set_power(struct device *child, bool enable) +{ + int ret = 0; + struct wl12xx_spi_glue *glue = dev_get_drvdata(child->parent); + + WARN_ON(!glue->reg); + + /* Update regulator state */ + if (enable) { + ret = regulator_enable(glue->reg); + if (ret) + dev_err(child, "Power enable failure\n"); + } else { + ret = regulator_disable(glue->reg); + if (ret) + dev_err(child, "Power disable failure\n"); + } + + return ret; +} + static struct wl1271_if_operations spi_ops = { .read = wl12xx_spi_raw_read, .write = wl12xx_spi_raw_write, .reset = wl12xx_spi_reset, .init = wl12xx_spi_init, + .power = wl12xx_spi_set_power, .set_block_size = NULL, }; +static const struct of_device_id wlcore_spi_of_match_table[] = { + { .compatible = "ti,wl1271" }, + { } +}; +MODULE_DEVICE_TABLE(of, wlcore_spi_of_match_table); + +/** + * wlcore_probe_of - DT node parsing. + * @spi: SPI slave device parameters. + * @res: resource parameters. + * @glue: wl12xx SPI bus to slave device glue parameters. + * @pdev_data: wlcore device parameters + */ +static int wlcore_probe_of(struct spi_device *spi, struct wl12xx_spi_glue *glue, + struct wlcore_platdev_data *pdev_data) +{ + struct device_node *dt_node = spi->dev.of_node; + int ret; + + if (of_find_property(dt_node, "clock-xtal", NULL)) + pdev_data->ref_clock_xtal = true; + + ret = of_property_read_u32(dt_node, "ref-clock-frequency", + &pdev_data->ref_clock_freq); + if (IS_ERR_VALUE(ret)) { + dev_err(glue->dev, + "can't get reference clock frequency (%d)\n", ret); + return ret; + } + + return 0; +} + static int wl1271_probe(struct spi_device *spi) { struct wl12xx_spi_glue *glue; @@ -335,8 +400,6 @@ static int wl1271_probe(struct spi_device *spi) memset(&pdev_data, 0x00, sizeof(pdev_data)); - /* TODO: add DT parsing when needed */ - pdev_data.if_ops = &spi_ops; glue = devm_kzalloc(&spi->dev, sizeof(*glue), GFP_KERNEL); @@ -353,6 +416,21 @@ static int wl1271_probe(struct spi_device *spi) * comes from the board-peripherals file */ spi->bits_per_word = 32; + glue->reg = devm_regulator_get(&spi->dev, "vwlan"); + if (PTR_ERR(glue->reg) == -EPROBE_DEFER) + return -EPROBE_DEFER; + if (IS_ERR(glue->reg)) { + dev_err(glue->dev, "can't get regulator\n"); + return PTR_ERR(glue->reg); + } + + ret = wlcore_probe_of(spi, glue, &pdev_data); + if (IS_ERR_VALUE(ret)) { + dev_err(glue->dev, + "can't get device tree parameters (%d)\n", ret); + return ret; + } + ret = spi_setup(spi); if (ret < 0) { dev_err(glue->dev, "spi_setup failed\n"); @@ -370,7 +448,7 @@ static int wl1271_probe(struct spi_device *spi) memset(res, 0x00, sizeof(res)); res[0].start = spi->irq; - res[0].flags = IORESOURCE_IRQ; + res[0].flags = IORESOURCE_IRQ | irq_get_trigger_type(spi->irq); res[0].name = "irq"; ret = platform_device_add_resources(glue->core, res, ARRAY_SIZE(res)); @@ -408,10 +486,10 @@ static int wl1271_remove(struct spi_device *spi) return 0; } - static struct spi_driver wl1271_spi_driver = { .driver = { .name = "wl1271_spi", + .of_match_table = of_match_ptr(wlcore_spi_of_match_table), }, .probe = wl1271_probe, diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 0333ab0fd926..112825200d41 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -251,6 +251,7 @@ struct xenvif { unsigned int stalled_queues; struct xenbus_watch credit_watch; + struct xenbus_watch mcast_ctrl_watch; spinlock_t lock; diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 56ebd8267386..39a303de20dd 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -327,7 +327,7 @@ static int netback_probe(struct xenbus_device *dev, goto abort_transaction; } - /* We support multicast-control. */ + /* We support dynamic multicast-control. */ err = xenbus_printf(xbt, dev->nodename, "feature-multicast-control", "%d", 1); if (err) { @@ -335,6 +335,14 @@ static int netback_probe(struct xenbus_device *dev, goto abort_transaction; } + err = xenbus_printf(xbt, dev->nodename, + "feature-dynamic-multicast-control", + "%d", 1); + if (err) { + message = "writing feature-dynamic-multicast-control"; + goto abort_transaction; + } + err = xenbus_transaction_end(xbt, 0); } while (err == -EAGAIN); @@ -683,7 +691,8 @@ static void xen_net_rate_changed(struct xenbus_watch *watch, } } -static int xen_register_watchers(struct xenbus_device *dev, struct xenvif *vif) +static int xen_register_credit_watch(struct xenbus_device *dev, + struct xenvif *vif) { int err = 0; char *node; @@ -708,7 +717,7 @@ static int xen_register_watchers(struct xenbus_device *dev, struct xenvif *vif) return err; } -static void xen_unregister_watchers(struct xenvif *vif) +static void xen_unregister_credit_watch(struct xenvif *vif) { if (vif->credit_watch.node) { unregister_xenbus_watch(&vif->credit_watch); @@ -717,6 +726,75 @@ static void xen_unregister_watchers(struct xenvif *vif) } } +static void xen_mcast_ctrl_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + struct xenvif *vif = container_of(watch, struct xenvif, + mcast_ctrl_watch); + struct xenbus_device *dev = xenvif_to_xenbus_device(vif); + int val; + + if (xenbus_scanf(XBT_NIL, dev->otherend, + "request-multicast-control", "%d", &val) < 0) + val = 0; + vif->multicast_control = !!val; +} + +static int xen_register_mcast_ctrl_watch(struct xenbus_device *dev, + struct xenvif *vif) +{ + int err = 0; + char *node; + unsigned maxlen = strlen(dev->otherend) + + sizeof("/request-multicast-control"); + + if (vif->mcast_ctrl_watch.node) { + pr_err_ratelimited("Watch is already registered\n"); + return -EADDRINUSE; + } + + node = kmalloc(maxlen, GFP_KERNEL); + if (!node) { + pr_err("Failed to allocate memory for watch\n"); + return -ENOMEM; + } + snprintf(node, maxlen, "%s/request-multicast-control", + dev->otherend); + vif->mcast_ctrl_watch.node = node; + vif->mcast_ctrl_watch.callback = xen_mcast_ctrl_changed; + err = register_xenbus_watch(&vif->mcast_ctrl_watch); + if (err) { + pr_err("Failed to set watcher %s\n", + vif->mcast_ctrl_watch.node); + kfree(node); + vif->mcast_ctrl_watch.node = NULL; + vif->mcast_ctrl_watch.callback = NULL; + } + return err; +} + +static void xen_unregister_mcast_ctrl_watch(struct xenvif *vif) +{ + if (vif->mcast_ctrl_watch.node) { + unregister_xenbus_watch(&vif->mcast_ctrl_watch); + kfree(vif->mcast_ctrl_watch.node); + vif->mcast_ctrl_watch.node = NULL; + } +} + +static void xen_register_watchers(struct xenbus_device *dev, + struct xenvif *vif) +{ + xen_register_credit_watch(dev, vif); + xen_register_mcast_ctrl_watch(dev, vif); +} + +static void xen_unregister_watchers(struct xenvif *vif) +{ + xen_unregister_mcast_ctrl_watch(vif); + xen_unregister_credit_watch(vif); +} + static void unregister_hotplug_status_watch(struct backend_info *be) { if (be->have_hotplug_status_watch) { @@ -1030,11 +1108,6 @@ static int read_xenbus_vif_flags(struct backend_info *be) val = 0; vif->ipv6_csum = !!val; - if (xenbus_scanf(XBT_NIL, dev->otherend, "request-multicast-control", - "%d", &val) < 0) - val = 0; - vif->multicast_control = !!val; - return 0; } diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 3feb1b2d75d8..0367c63f5960 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -151,6 +151,8 @@ struct bcma_host_ops { #define BCMA_CORE_PCIE2 0x83C /* PCI Express Gen2 */ #define BCMA_CORE_USB30_DEV 0x83D #define BCMA_CORE_ARM_CR4 0x83E +#define BCMA_CORE_GCI 0x840 +#define BCMA_CORE_CMEM 0x846 /* CNDS DDR2/3 memory controller */ #define BCMA_CORE_ARM_CA7 0x847 #define BCMA_CORE_SYS_MEM 0x849 #define BCMA_CORE_DEFAULT 0xFFF @@ -199,6 +201,7 @@ struct bcma_host_ops { #define BCMA_PKG_ID_BCM4707 1 #define BCMA_PKG_ID_BCM4708 2 #define BCMA_PKG_ID_BCM4709 0 +#define BCMA_CHIP_ID_BCM47094 53030 #define BCMA_CHIP_ID_BCM53018 53018 /* Board types (on PCI usually equals to the subsystem dev id) */ diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index db51a6ffb7d6..700d0c6f7480 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -217,6 +217,11 @@ #define BCMA_CC_CLKDIV_JTAG_SHIFT 8 #define BCMA_CC_CLKDIV_UART 0x000000FF #define BCMA_CC_CAP_EXT 0x00AC /* Capabilities */ +#define BCMA_CC_CAP_EXT_SECI_PRESENT 0x00000001 +#define BCMA_CC_CAP_EXT_GSIO_PRESENT 0x00000002 +#define BCMA_CC_CAP_EXT_GCI_PRESENT 0x00000004 +#define BCMA_CC_CAP_EXT_SECI_PUART_PRESENT 0x00000008 /* UART present */ +#define BCMA_CC_CAP_EXT_AOB_PRESENT 0x00000040 #define BCMA_CC_PLLONDELAY 0x00B0 /* Rev >= 4 only */ #define BCMA_CC_FREFSELDELAY 0x00B4 /* Rev >= 4 only */ #define BCMA_CC_SLOWCLKCTL 0x00B8 /* 6 <= Rev <= 9 only */ @@ -351,12 +356,12 @@ #define BCMA_CC_PMU_RES_REQTS 0x0640 /* PMU res req timer sel */ #define BCMA_CC_PMU_RES_REQT 0x0644 /* PMU res req timer */ #define BCMA_CC_PMU_RES_REQM 0x0648 /* PMU res req mask */ -#define BCMA_CC_CHIPCTL_ADDR 0x0650 -#define BCMA_CC_CHIPCTL_DATA 0x0654 -#define BCMA_CC_REGCTL_ADDR 0x0658 -#define BCMA_CC_REGCTL_DATA 0x065C -#define BCMA_CC_PLLCTL_ADDR 0x0660 -#define BCMA_CC_PLLCTL_DATA 0x0664 +#define BCMA_CC_PMU_CHIPCTL_ADDR 0x0650 +#define BCMA_CC_PMU_CHIPCTL_DATA 0x0654 +#define BCMA_CC_PMU_REGCTL_ADDR 0x0658 +#define BCMA_CC_PMU_REGCTL_DATA 0x065C +#define BCMA_CC_PMU_PLLCTL_ADDR 0x0660 +#define BCMA_CC_PMU_PLLCTL_DATA 0x0664 #define BCMA_CC_PMU_STRAPOPT 0x0668 /* (corerev >= 28) */ #define BCMA_CC_PMU_XTAL_FREQ 0x066C /* (pmurev >= 10) */ #define BCMA_CC_PMU_XTAL_FREQ_ILPCTL_MASK 0x00001FFF @@ -566,6 +571,7 @@ * Check availability with ((struct bcma_chipcommon)->capabilities & BCMA_CC_CAP_PMU) */ struct bcma_chipcommon_pmu { + struct bcma_device *core; /* Can be separated core or just ChipCommon one */ u8 rev; /* PMU revision */ u32 crystalfreq; /* The active crystal frequency (in kHz) */ }; @@ -660,6 +666,19 @@ struct bcma_drv_cc_b { #define bcma_cc_maskset32(cc, offset, mask, set) \ bcma_cc_write32(cc, offset, (bcma_cc_read32(cc, offset) & (mask)) | (set)) +/* PMU registers access */ +#define bcma_pmu_read32(cc, offset) \ + bcma_read32((cc)->pmu.core, offset) +#define bcma_pmu_write32(cc, offset, val) \ + bcma_write32((cc)->pmu.core, offset, val) + +#define bcma_pmu_mask32(cc, offset, mask) \ + bcma_pmu_write32(cc, offset, bcma_pmu_read32(cc, offset) & (mask)) +#define bcma_pmu_set32(cc, offset, set) \ + bcma_pmu_write32(cc, offset, bcma_pmu_read32(cc, offset) | (set)) +#define bcma_pmu_maskset32(cc, offset, mask, set) \ + bcma_pmu_write32(cc, offset, (bcma_pmu_read32(cc, offset) & (mask)) | (set)) + extern u32 bcma_chipco_watchdog_timer_set(struct bcma_drv_cc *cc, u32 ticks); extern u32 bcma_chipco_get_alp_clock(struct bcma_drv_cc *cc); diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 9653fdb76a42..e9b0b9ab07e5 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -59,6 +59,8 @@ * bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region * bitmap_release_region(bitmap, pos, order) Free specified bit region * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region + * bitmap_from_u32array(dst, nbits, buf, nwords) *dst = *buf (nwords 32b words) + * bitmap_to_u32array(buf, nwords, src, nbits) *buf = *dst (nwords 32b words) */ /* @@ -163,6 +165,14 @@ extern void bitmap_fold(unsigned long *dst, const unsigned long *orig, extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order); extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); +extern unsigned int bitmap_from_u32array(unsigned long *bitmap, + unsigned int nbits, + const u32 *buf, + unsigned int nwords); +extern unsigned int bitmap_to_u32array(u32 *buf, + unsigned int nwords, + const unsigned long *bitmap, + unsigned int nbits); #ifdef __BIG_ENDIAN extern void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits); #else diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 83d1926c61e4..51e498e5470e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -65,6 +65,7 @@ enum bpf_arg_type { */ ARG_PTR_TO_STACK, /* any pointer to eBPF program stack */ ARG_CONST_STACK_SIZE, /* number of bytes accessed from stack */ + ARG_CONST_STACK_SIZE_OR_ZERO, /* number of bytes accessed from stack or 0 */ ARG_PTR_TO_CTX, /* pointer to context */ ARG_ANYTHING, /* any (initialized) argument is ok */ @@ -151,6 +152,7 @@ struct bpf_array { union { char value[0] __aligned(8); void *ptrs[0] __aligned(8); + void __percpu *pptrs[0] __aligned(8); }; }; #define MAX_TAIL_CALL_CNT 32 @@ -182,6 +184,29 @@ int bpf_prog_new_fd(struct bpf_prog *prog); int bpf_obj_pin_user(u32 ufd, const char __user *pathname); int bpf_obj_get_user(const char __user *pathname); +int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); +int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); +int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, + u64 flags); +int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, + u64 flags); + +/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and + * forced to use 'long' read/writes to try to atomically copy long counters. + * Best-effort only. No barriers here, since it _will_ race with concurrent + * updates from BPF programs. Called from bpf syscall and mostly used with + * size 8 or 16 bytes, so ask compiler to inline it. + */ +static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) +{ + const long *lsrc = src; + long *ldst = dst; + + size /= sizeof(long); + while (size--) + *ldst++ = *lsrc++; +} + /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); #else @@ -213,6 +238,7 @@ extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; extern const struct bpf_func_proto bpf_get_current_comm_proto; extern const struct bpf_func_proto bpf_skb_vlan_push_proto; extern const struct bpf_func_proto bpf_skb_vlan_pop_proto; +extern const struct bpf_func_proto bpf_get_stackid_proto; /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 653dc9c4ebac..472d7d7b01c2 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -201,6 +201,16 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) * @get_module_eeprom: Get the eeprom information from the plug-in module * @get_eee: Get Energy-Efficient (EEE) supported and status. * @set_eee: Set EEE status (enable/disable) as well as LPI timers. + * @get_per_queue_coalesce: Get interrupt coalescing parameters per queue. + * It must check that the given queue number is valid. If neither a RX nor + * a TX queue has this number, return -EINVAL. If only a RX queue or a TX + * queue has this number, set the inapplicable fields to ~0 and return 0. + * Returns a negative error code or zero. + * @set_per_queue_coalesce: Set interrupt coalescing parameters per queue. + * It must check that the given queue number is valid. If neither a RX nor + * a TX queue has this number, return -EINVAL. If only a RX queue or a TX + * queue has this number, ignore the inapplicable fields. + * Returns a negative error code or zero. * * All operations are optional (i.e. the function pointer may be set * to %NULL) and callers must take this into account. Callers must @@ -279,7 +289,10 @@ struct ethtool_ops { const struct ethtool_tunable *, void *); int (*set_tunable)(struct net_device *, const struct ethtool_tunable *, const void *); - + int (*get_per_queue_coalesce)(struct net_device *, u32, + struct ethtool_coalesce *); + int (*set_per_queue_coalesce)(struct net_device *, u32, + struct ethtool_coalesce *); }; #endif /* _LINUX_ETHTOOL_H */ diff --git a/include/linux/if_team.h b/include/linux/if_team.h index b84e49c3a738..174f43f43aff 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -24,6 +24,7 @@ struct team_pcpu_stats { struct u64_stats_sync syncp; u32 rx_dropped; u32 tx_dropped; + u32 rx_nohandler; }; struct team; diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 9c9de11549a7..12f6fba6d21a 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -37,11 +37,6 @@ static inline struct igmpv3_query * return (struct igmpv3_query *)skb_transport_header(skb); } -extern int sysctl_igmp_llm_reports; -extern int sysctl_igmp_max_memberships; -extern int sysctl_igmp_max_msf; -extern int sysctl_igmp_qrv; - struct ip_sf_socklist { unsigned int sl_max; unsigned int sl_count; diff --git a/include/linux/inet_lro.h b/include/linux/inet_lro.h deleted file mode 100644 index 9a715cfa1fe3..000000000000 --- a/include/linux/inet_lro.h +++ /dev/null @@ -1,142 +0,0 @@ -/* - * linux/include/linux/inet_lro.h - * - * Large Receive Offload (ipv4 / tcp) - * - * (C) Copyright IBM Corp. 2007 - * - * Authors: - * Jan-Bernd Themann <themann@de.ibm.com> - * Christoph Raisch <raisch@de.ibm.com> - * - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef __INET_LRO_H_ -#define __INET_LRO_H_ - -#include <net/ip.h> -#include <net/tcp.h> - -/* - * LRO statistics - */ - -struct net_lro_stats { - unsigned long aggregated; - unsigned long flushed; - unsigned long no_desc; -}; - -/* - * LRO descriptor for a tcp session - */ -struct net_lro_desc { - struct sk_buff *parent; - struct sk_buff *last_skb; - struct skb_frag_struct *next_frag; - struct iphdr *iph; - struct tcphdr *tcph; - __wsum data_csum; - __be32 tcp_rcv_tsecr; - __be32 tcp_rcv_tsval; - __be32 tcp_ack; - u32 tcp_next_seq; - u32 skb_tot_frags_len; - u16 ip_tot_len; - u16 tcp_saw_tstamp; /* timestamps enabled */ - __be16 tcp_window; - int pkt_aggr_cnt; /* counts aggregated packets */ - int vlan_packet; - int mss; - int active; -}; - -/* - * Large Receive Offload (LRO) Manager - * - * Fields must be set by driver - */ - -struct net_lro_mgr { - struct net_device *dev; - struct net_lro_stats stats; - - /* LRO features */ - unsigned long features; -#define LRO_F_NAPI 1 /* Pass packets to stack via NAPI */ -#define LRO_F_EXTRACT_VLAN_ID 2 /* Set flag if VLAN IDs are extracted - from received packets and eth protocol - is still ETH_P_8021Q */ - - /* - * Set for generated SKBs that are not added to - * the frag list in fragmented mode - */ - u32 ip_summed; - u32 ip_summed_aggr; /* Set in aggregated SKBs: CHECKSUM_UNNECESSARY - * or CHECKSUM_NONE */ - - int max_desc; /* Max number of LRO descriptors */ - int max_aggr; /* Max number of LRO packets to be aggregated */ - - int frag_align_pad; /* Padding required to properly align layer 3 - * headers in generated skb when using frags */ - - struct net_lro_desc *lro_arr; /* Array of LRO descriptors */ - - /* - * Optimized driver functions - * - * get_skb_header: returns tcp and ip header for packet in SKB - */ - int (*get_skb_header)(struct sk_buff *skb, void **ip_hdr, - void **tcpudp_hdr, u64 *hdr_flags, void *priv); - - /* hdr_flags: */ -#define LRO_IPV4 1 /* ip_hdr is IPv4 header */ -#define LRO_TCP 2 /* tcpudp_hdr is TCP header */ - - /* - * get_frag_header: returns mac, tcp and ip header for packet in SKB - * - * @hdr_flags: Indicate what kind of LRO has to be done - * (IPv4/IPv6/TCP/UDP) - */ - int (*get_frag_header)(struct skb_frag_struct *frag, void **mac_hdr, - void **ip_hdr, void **tcpudp_hdr, u64 *hdr_flags, - void *priv); -}; - -/* - * Processes a SKB - * - * @lro_mgr: LRO manager to use - * @skb: SKB to aggregate - * @priv: Private data that may be used by driver functions - * (for example get_tcp_ip_hdr) - */ - -void lro_receive_skb(struct net_lro_mgr *lro_mgr, - struct sk_buff *skb, - void *priv); -/* - * Forward all aggregated SKBs held by lro_mgr to network stack - */ - -void lro_flush_all(struct net_lro_mgr *lro_mgr); - -#endif diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 402753bccafa..4b2267e1b7c3 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -50,11 +50,13 @@ struct ipv6_devconf { __s32 mc_forwarding; #endif __s32 disable_ipv6; + __s32 drop_unicast_in_l2_multicast; __s32 accept_dad; __s32 force_tllao; __s32 ndisc_notify; __s32 suppress_frag_ndisc; __s32 accept_ra_mtu; + __s32 drop_unsolicited_na; struct ipv6_stable_secret { bool initialized; struct in6_addr secret; diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index d9654f0eecb3..a734bf43d190 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -67,6 +67,8 @@ enum { NETIF_F_HW_L2FW_DOFFLOAD_BIT, /* Allow L2 Forwarding in Hardware */ NETIF_F_BUSY_POLL_BIT, /* Busy poll */ + NETIF_F_HW_TC_BIT, /* Offload TC infrastructure */ + /* * Add your fresh new feature above and remember to update * netdev_features_strings[] in net/core/ethtool.c and maybe @@ -124,6 +126,7 @@ enum { #define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX) #define NETIF_F_HW_L2FW_DOFFLOAD __NETIF_F(HW_L2FW_DOFFLOAD) #define NETIF_F_BUSY_POLL __NETIF_F(BUSY_POLL) +#define NETIF_F_HW_TC __NETIF_F(HW_TC) #define for_each_netdev_feature(mask_addr, bit) \ for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5440b7b705eb..e52077ffe5ed 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -51,6 +51,7 @@ #include <linux/neighbour.h> #include <uapi/linux/netdevice.h> #include <uapi/linux/if_bonding.h> +#include <uapi/linux/pkt_cls.h> struct netpoll_info; struct device; @@ -778,6 +779,25 @@ static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a, typedef u16 (*select_queue_fallback_t)(struct net_device *dev, struct sk_buff *skb); +/* These structures hold the attributes of qdisc and classifiers + * that are being passed to the netdevice through the setup_tc op. + */ +enum { + TC_SETUP_MQPRIO, + TC_SETUP_CLSU32, +}; + +struct tc_cls_u32_offload; + +struct tc_to_netdev { + unsigned int type; + union { + u8 tc; + struct tc_cls_u32_offload *cls_u32; + }; +}; + + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -1150,7 +1170,10 @@ struct net_device_ops { int (*ndo_set_vf_rss_query_en)( struct net_device *dev, int vf, bool setting); - int (*ndo_setup_tc)(struct net_device *dev, u8 tc); + int (*ndo_setup_tc)(struct net_device *dev, + u32 handle, + __be16 protocol, + struct tc_to_netdev *tc); #if IS_ENABLED(CONFIG_FCOE) int (*ndo_fcoe_enable)(struct net_device *dev); int (*ndo_fcoe_disable)(struct net_device *dev); @@ -1291,6 +1314,7 @@ struct net_device_ops { * @IFF_OPENVSWITCH: device is a Open vSwitch master * @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device * @IFF_TEAM: device is a team device + * @IFF_RXFH_CONFIGURED: device has had Rx Flow indirection table configured */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1318,6 +1342,7 @@ enum netdev_priv_flags { IFF_OPENVSWITCH = 1<<22, IFF_L3MDEV_SLAVE = 1<<23, IFF_TEAM = 1<<24, + IFF_RXFH_CONFIGURED = 1<<25, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1345,6 +1370,7 @@ enum netdev_priv_flags { #define IFF_OPENVSWITCH IFF_OPENVSWITCH #define IFF_L3MDEV_SLAVE IFF_L3MDEV_SLAVE #define IFF_TEAM IFF_TEAM +#define IFF_RXFH_CONFIGURED IFF_RXFH_CONFIGURED /** * struct net_device - The DEVICE structure. @@ -1397,6 +1423,8 @@ enum netdev_priv_flags { * do not use this in drivers * @tx_dropped: Dropped packets by core network, * do not use this in drivers + * @rx_nohandler: nohandler dropped packets by core network on + * inactive devices, do not use this in drivers * * @wireless_handlers: List of functions to handle Wireless Extensions, * instead of ioctl, @@ -1611,6 +1639,7 @@ struct net_device { atomic_long_t rx_dropped; atomic_long_t tx_dropped; + atomic_long_t rx_nohandler; #ifdef CONFIG_WIRELESS_EXT const struct iw_handler_def * wireless_handlers; @@ -3741,7 +3770,7 @@ void netdev_lower_state_changed(struct net_device *lower_dev, /* RSS keys are 40 or 52 bytes long */ #define NETDEV_RSS_KEY_LEN 52 -extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN]; +extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly; void netdev_rss_key_fill(void *buffer, size_t len); int dev_get_nest_level(struct net_device *dev, @@ -4045,6 +4074,11 @@ static inline bool netif_is_lag_port(const struct net_device *dev) return netif_is_bond_slave(dev) || netif_is_team_port(dev); } +static inline bool netif_is_rxfh_configured(const struct net_device *dev) +{ + return dev->priv_flags & IFF_RXFH_CONFIGURED; +} + /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */ static inline void netif_keep_dst(struct net_device *dev) { diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index ba0d9789eb6e..1d82dd5e9a08 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -34,8 +34,6 @@ int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n); int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n); int nfnetlink_has_listeners(struct net *net, unsigned int group); -struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size, - u32 dst_portid, gfp_t gfp_mask); int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid, unsigned int group, int echo, gfp_t flags); int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error); diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 0b41959aab9f..da14ab61f363 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -69,16 +69,6 @@ extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group) extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_has_listeners(struct sock *sk, unsigned int group); -extern struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size, - unsigned int ldiff, u32 dst_portid, - gfp_t gfp_mask); -static inline struct sk_buff * -netlink_alloc_skb(struct sock *ssk, unsigned int size, u32 dst_portid, - gfp_t gfp_mask) -{ - return __netlink_alloc_skb(ssk, size, 0, dst_portid, gfp_mask); -} - extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock); extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid, __u32 group, gfp_t allocation); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b35a61a481fa..7da3c25999df 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -964,11 +964,20 @@ DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs); extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs); +extern struct perf_callchain_entry * +get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, + bool crosstask, bool add_mark); +extern int get_callchain_buffers(void); +extern void put_callchain_buffers(void); -static inline void perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) +static inline int perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) { - if (entry->nr < PERF_MAX_STACK_DEPTH) + if (entry->nr < PERF_MAX_STACK_DEPTH) { entry->ip[entry->nr++] = ip; + return 0; + } else { + return -1; /* no more room, stop walking the stack */ + } } extern int sysctl_perf_event_paranoid; diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 1d1ba2c5ee7a..53ecb37ae563 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -11,9 +11,11 @@ #define CORE_SPQE_PAGE_SIZE_BYTES 4096 +#define X_FINAL_CLEANUP_AGG_INT 1 + #define FW_MAJOR_VERSION 8 -#define FW_MINOR_VERSION 4 -#define FW_REVISION_VERSION 2 +#define FW_MINOR_VERSION 7 +#define FW_REVISION_VERSION 3 #define FW_ENGINEERING_VERSION 0 /***********************/ @@ -152,6 +154,9 @@ /* number of queues in a PF queue group */ #define QM_PF_QUEUE_GROUP_SIZE 8 +/* the size of a single queue element in bytes */ +#define QM_PQ_ELEMENT_SIZE 4 + /* base number of Tx PQs in the CM PQ representation. * should be used when storing PQ IDs in CM PQ registers and context */ @@ -285,6 +290,16 @@ #define PXP_NUM_ILT_RECORDS_K2 11000 #define MAX_NUM_ILT_RECORDS MAX(PXP_NUM_ILT_RECORDS_BB, PXP_NUM_ILT_RECORDS_K2) +#define SDM_COMP_TYPE_NONE 0 +#define SDM_COMP_TYPE_WAKE_THREAD 1 +#define SDM_COMP_TYPE_AGG_INT 2 +#define SDM_COMP_TYPE_CM 3 +#define SDM_COMP_TYPE_LOADER 4 +#define SDM_COMP_TYPE_PXP 5 +#define SDM_COMP_TYPE_INDICATE_ERROR 6 +#define SDM_COMP_TYPE_RELEASE_THREAD 7 +#define SDM_COMP_TYPE_RAM 8 + /******************/ /* PBF CONSTANTS */ /******************/ @@ -335,7 +350,7 @@ struct event_ring_entry { /* Multi function mode */ enum mf_mode { - SF, + ERROR_MODE /* Unsupported mode */, MF_OVLAN, MF_NPAR, MAX_MF_MODE @@ -606,4 +621,19 @@ struct status_block { #define STATUS_BLOCK_ZERO_PAD3_SHIFT 24 }; +struct tunnel_parsing_flags { + u8 flags; +#define TUNNEL_PARSING_FLAGS_TYPE_MASK 0x3 +#define TUNNEL_PARSING_FLAGS_TYPE_SHIFT 0 +#define TUNNEL_PARSING_FLAGS_TENNANT_ID_EXIST_MASK 0x1 +#define TUNNEL_PARSING_FLAGS_TENNANT_ID_EXIST_SHIFT 2 +#define TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_MASK 0x3 +#define TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_SHIFT 3 +#define TUNNEL_PARSING_FLAGS_FIRSTHDRIPMATCH_MASK 0x1 +#define TUNNEL_PARSING_FLAGS_FIRSTHDRIPMATCH_SHIFT 5 +#define TUNNEL_PARSING_FLAGS_IPV4_FRAGMENT_MASK 0x1 +#define TUNNEL_PARSING_FLAGS_IPV4_FRAGMENT_SHIFT 6 +#define TUNNEL_PARSING_FLAGS_IPV4_OPTIONS_MASK 0x1 +#define TUNNEL_PARSING_FLAGS_IPV4_OPTIONS_SHIFT 7 +}; #endif /* __COMMON_HSI__ */ diff --git a/include/linux/qed/eth_common.h b/include/linux/qed/eth_common.h index 320b3373ac1d..092cb0c1afcb 100644 --- a/include/linux/qed/eth_common.h +++ b/include/linux/qed/eth_common.h @@ -17,10 +17,8 @@ #define ETH_MAX_RAMROD_PER_CON 8 #define ETH_TX_BD_PAGE_SIZE_BYTES 4096 #define ETH_RX_BD_PAGE_SIZE_BYTES 4096 -#define ETH_RX_SGE_PAGE_SIZE_BYTES 4096 #define ETH_RX_CQE_PAGE_SIZE_BYTES 4096 #define ETH_RX_NUM_NEXT_PAGE_BDS 2 -#define ETH_RX_NUM_NEXT_PAGE_SGES 2 #define ETH_TX_MIN_BDS_PER_NON_LSO_PKT 1 #define ETH_TX_MAX_BDS_PER_NON_LSO_PACKET 18 @@ -34,7 +32,8 @@ #define ETH_NUM_STATISTIC_COUNTERS MAX_NUM_VPORTS -#define ETH_REG_CQE_PBL_SIZE 3 +/* Maximum number of buffers, used for RX packet placement */ +#define ETH_RX_MAX_BUFF_PER_PKT 5 /* num of MAC/VLAN filters */ #define ETH_NUM_MAC_FILTERS 512 @@ -54,9 +53,9 @@ /* TPA constants */ #define ETH_TPA_MAX_AGGS_NUM 64 -#define ETH_TPA_CQE_START_SGL_SIZE 3 -#define ETH_TPA_CQE_CONT_SGL_SIZE 6 -#define ETH_TPA_CQE_END_SGL_SIZE 4 +#define ETH_TPA_CQE_START_LEN_LIST_SIZE ETH_RX_MAX_BUFF_PER_PKT +#define ETH_TPA_CQE_CONT_LEN_LIST_SIZE 6 +#define ETH_TPA_CQE_END_LEN_LIST_SIZE 4 /* Queue Zone sizes */ #define TSTORM_QZONE_SIZE 0 @@ -74,18 +73,18 @@ struct coalescing_timeset { struct eth_tx_1st_bd_flags { u8 bitfields; +#define ETH_TX_1ST_BD_FLAGS_START_BD_MASK 0x1 +#define ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT 0 #define ETH_TX_1ST_BD_FLAGS_FORCE_VLAN_MODE_MASK 0x1 -#define ETH_TX_1ST_BD_FLAGS_FORCE_VLAN_MODE_SHIFT 0 +#define ETH_TX_1ST_BD_FLAGS_FORCE_VLAN_MODE_SHIFT 1 #define ETH_TX_1ST_BD_FLAGS_IP_CSUM_MASK 0x1 -#define ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT 1 +#define ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT 2 #define ETH_TX_1ST_BD_FLAGS_L4_CSUM_MASK 0x1 -#define ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT 2 +#define ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT 3 #define ETH_TX_1ST_BD_FLAGS_VLAN_INSERTION_MASK 0x1 -#define ETH_TX_1ST_BD_FLAGS_VLAN_INSERTION_SHIFT 3 +#define ETH_TX_1ST_BD_FLAGS_VLAN_INSERTION_SHIFT 4 #define ETH_TX_1ST_BD_FLAGS_LSO_MASK 0x1 -#define ETH_TX_1ST_BD_FLAGS_LSO_SHIFT 4 -#define ETH_TX_1ST_BD_FLAGS_START_BD_MASK 0x1 -#define ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT 5 +#define ETH_TX_1ST_BD_FLAGS_LSO_SHIFT 5 #define ETH_TX_1ST_BD_FLAGS_TUNN_IP_CSUM_MASK 0x1 #define ETH_TX_1ST_BD_FLAGS_TUNN_IP_CSUM_SHIFT 6 #define ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_MASK 0x1 @@ -97,38 +96,44 @@ struct eth_tx_data_1st_bd { __le16 vlan; u8 nbds; struct eth_tx_1st_bd_flags bd_flags; - __le16 fw_use_only; + __le16 bitfields; +#define ETH_TX_DATA_1ST_BD_TUNN_CFG_OVERRIDE_MASK 0x1 +#define ETH_TX_DATA_1ST_BD_TUNN_CFG_OVERRIDE_SHIFT 0 +#define ETH_TX_DATA_1ST_BD_RESERVED0_MASK 0x1 +#define ETH_TX_DATA_1ST_BD_RESERVED0_SHIFT 1 +#define ETH_TX_DATA_1ST_BD_FW_USE_ONLY_MASK 0x3FFF +#define ETH_TX_DATA_1ST_BD_FW_USE_ONLY_SHIFT 2 }; /* The parsing information data for the second tx bd of a given packet. */ struct eth_tx_data_2nd_bd { __le16 tunn_ip_size; - __le16 bitfields; -#define ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_MASK 0x1FFF -#define ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_SHIFT 0 -#define ETH_TX_DATA_2ND_BD_RESERVED0_MASK 0x7 -#define ETH_TX_DATA_2ND_BD_RESERVED0_SHIFT 13 - __le16 bitfields2; + __le16 bitfields1; #define ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_MASK 0xF #define ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_SHIFT 0 #define ETH_TX_DATA_2ND_BD_TUNN_INNER_ETH_TYPE_MASK 0x3 #define ETH_TX_DATA_2ND_BD_TUNN_INNER_ETH_TYPE_SHIFT 4 #define ETH_TX_DATA_2ND_BD_DEST_PORT_MODE_MASK 0x3 #define ETH_TX_DATA_2ND_BD_DEST_PORT_MODE_SHIFT 6 +#define ETH_TX_DATA_2ND_BD_START_BD_MASK 0x1 +#define ETH_TX_DATA_2ND_BD_START_BD_SHIFT 8 #define ETH_TX_DATA_2ND_BD_TUNN_TYPE_MASK 0x3 -#define ETH_TX_DATA_2ND_BD_TUNN_TYPE_SHIFT 8 +#define ETH_TX_DATA_2ND_BD_TUNN_TYPE_SHIFT 9 #define ETH_TX_DATA_2ND_BD_TUNN_INNER_IPV6_MASK 0x1 -#define ETH_TX_DATA_2ND_BD_TUNN_INNER_IPV6_SHIFT 10 +#define ETH_TX_DATA_2ND_BD_TUNN_INNER_IPV6_SHIFT 11 #define ETH_TX_DATA_2ND_BD_IPV6_EXT_MASK 0x1 -#define ETH_TX_DATA_2ND_BD_IPV6_EXT_SHIFT 11 +#define ETH_TX_DATA_2ND_BD_IPV6_EXT_SHIFT 12 #define ETH_TX_DATA_2ND_BD_TUNN_IPV6_EXT_MASK 0x1 -#define ETH_TX_DATA_2ND_BD_TUNN_IPV6_EXT_SHIFT 12 +#define ETH_TX_DATA_2ND_BD_TUNN_IPV6_EXT_SHIFT 13 #define ETH_TX_DATA_2ND_BD_L4_UDP_MASK 0x1 -#define ETH_TX_DATA_2ND_BD_L4_UDP_SHIFT 13 +#define ETH_TX_DATA_2ND_BD_L4_UDP_SHIFT 14 #define ETH_TX_DATA_2ND_BD_L4_PSEUDO_CSUM_MODE_MASK 0x1 -#define ETH_TX_DATA_2ND_BD_L4_PSEUDO_CSUM_MODE_SHIFT 14 -#define ETH_TX_DATA_2ND_BD_RESERVED1_MASK 0x1 -#define ETH_TX_DATA_2ND_BD_RESERVED1_SHIFT 15 +#define ETH_TX_DATA_2ND_BD_L4_PSEUDO_CSUM_MODE_SHIFT 15 + __le16 bitfields2; +#define ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_MASK 0x1FFF +#define ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_SHIFT 0 +#define ETH_TX_DATA_2ND_BD_RESERVED0_MASK 0x7 +#define ETH_TX_DATA_2ND_BD_RESERVED0_SHIFT 13 }; /* Regular ETH Rx FP CQE. */ @@ -145,11 +150,68 @@ struct eth_fast_path_rx_reg_cqe { struct parsing_and_err_flags pars_flags; __le16 vlan_tag; __le32 rss_hash; - __le16 len_on_bd; + __le16 len_on_first_bd; u8 placement_offset; - u8 reserved; - __le16 pbl[ETH_REG_CQE_PBL_SIZE]; - u8 reserved1[10]; + struct tunnel_parsing_flags tunnel_pars_flags; + u8 bd_num; + u8 reserved[7]; + u32 fw_debug; + u8 reserved1[3]; + u8 flags; +#define ETH_FAST_PATH_RX_REG_CQE_VALID_MASK 0x1 +#define ETH_FAST_PATH_RX_REG_CQE_VALID_SHIFT 0 +#define ETH_FAST_PATH_RX_REG_CQE_VALID_TOGGLE_MASK 0x1 +#define ETH_FAST_PATH_RX_REG_CQE_VALID_TOGGLE_SHIFT 1 +#define ETH_FAST_PATH_RX_REG_CQE_RESERVED2_MASK 0x3F +#define ETH_FAST_PATH_RX_REG_CQE_RESERVED2_SHIFT 2 +}; + +/* TPA-continue ETH Rx FP CQE. */ +struct eth_fast_path_rx_tpa_cont_cqe { + u8 type; + u8 tpa_agg_index; + __le16 len_list[ETH_TPA_CQE_CONT_LEN_LIST_SIZE]; + u8 reserved[5]; + u8 reserved1; + __le16 reserved2[ETH_TPA_CQE_CONT_LEN_LIST_SIZE]; +}; + +/* TPA-end ETH Rx FP CQE. */ +struct eth_fast_path_rx_tpa_end_cqe { + u8 type; + u8 tpa_agg_index; + __le16 total_packet_len; + u8 num_of_bds; + u8 end_reason; + __le16 num_of_coalesced_segs; + __le32 ts_delta; + __le16 len_list[ETH_TPA_CQE_END_LEN_LIST_SIZE]; + u8 reserved1[3]; + u8 reserved2; + __le16 reserved3[ETH_TPA_CQE_END_LEN_LIST_SIZE]; +}; + +/* TPA-start ETH Rx FP CQE. */ +struct eth_fast_path_rx_tpa_start_cqe { + u8 type; + u8 bitfields; +#define ETH_FAST_PATH_RX_TPA_START_CQE_RSS_HASH_TYPE_MASK 0x7 +#define ETH_FAST_PATH_RX_TPA_START_CQE_RSS_HASH_TYPE_SHIFT 0 +#define ETH_FAST_PATH_RX_TPA_START_CQE_TC_MASK 0xF +#define ETH_FAST_PATH_RX_TPA_START_CQE_TC_SHIFT 3 +#define ETH_FAST_PATH_RX_TPA_START_CQE_RESERVED0_MASK 0x1 +#define ETH_FAST_PATH_RX_TPA_START_CQE_RESERVED0_SHIFT 7 + __le16 seg_len; + struct parsing_and_err_flags pars_flags; + __le16 vlan_tag; + __le32 rss_hash; + __le16 len_on_first_bd; + u8 placement_offset; + struct tunnel_parsing_flags tunnel_pars_flags; + u8 tpa_agg_index; + u8 header_len; + __le16 ext_bd_len_list[ETH_TPA_CQE_START_LEN_LIST_SIZE]; + u32 fw_debug; }; /* The L4 pseudo checksum mode for Ethernet */ @@ -168,13 +230,26 @@ struct eth_slow_path_rx_cqe { u8 type; u8 ramrod_cmd_id; u8 error_flag; - u8 reserved[27]; + u8 reserved[25]; __le16 echo; + u8 reserved1; + u8 flags; +/* for PMD mode - valid indication */ +#define ETH_SLOW_PATH_RX_CQE_VALID_MASK 0x1 +#define ETH_SLOW_PATH_RX_CQE_VALID_SHIFT 0 +/* for PMD mode - valid toggle indication */ +#define ETH_SLOW_PATH_RX_CQE_VALID_TOGGLE_MASK 0x1 +#define ETH_SLOW_PATH_RX_CQE_VALID_TOGGLE_SHIFT 1 +#define ETH_SLOW_PATH_RX_CQE_RESERVED2_MASK 0x3F +#define ETH_SLOW_PATH_RX_CQE_RESERVED2_SHIFT 2 }; /* union for all ETH Rx CQE types */ union eth_rx_cqe { struct eth_fast_path_rx_reg_cqe fast_path_regular; + struct eth_fast_path_rx_tpa_start_cqe fast_path_tpa_start; + struct eth_fast_path_rx_tpa_cont_cqe fast_path_tpa_cont; + struct eth_fast_path_rx_tpa_end_cqe fast_path_tpa_end; struct eth_slow_path_rx_cqe slow_path; }; @@ -183,15 +258,18 @@ enum eth_rx_cqe_type { ETH_RX_CQE_TYPE_UNUSED, ETH_RX_CQE_TYPE_REGULAR, ETH_RX_CQE_TYPE_SLOW_PATH, + ETH_RX_CQE_TYPE_TPA_START, + ETH_RX_CQE_TYPE_TPA_CONT, + ETH_RX_CQE_TYPE_TPA_END, MAX_ETH_RX_CQE_TYPE }; /* ETH Rx producers data */ struct eth_rx_prod_data { __le16 bd_prod; - __le16 sge_prod; __le16 cqe_prod; __le16 reserved; + __le16 reserved1; }; /* The first tx bd of a given packet */ @@ -211,12 +289,17 @@ struct eth_tx_2nd_bd { /* The parsing information data for the third tx bd of a given packet. */ struct eth_tx_data_3rd_bd { __le16 lso_mss; - u8 bitfields; + __le16 bitfields; #define ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_MASK 0xF #define ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_SHIFT 0 #define ETH_TX_DATA_3RD_BD_HDR_NBD_MASK 0xF #define ETH_TX_DATA_3RD_BD_HDR_NBD_SHIFT 4 - u8 resereved0[3]; +#define ETH_TX_DATA_3RD_BD_START_BD_MASK 0x1 +#define ETH_TX_DATA_3RD_BD_START_BD_SHIFT 8 +#define ETH_TX_DATA_3RD_BD_RESERVED0_MASK 0x7F +#define ETH_TX_DATA_3RD_BD_RESERVED0_SHIFT 9 + u8 tunn_l4_hdr_start_offset_w; + u8 tunn_hdr_size_w; }; /* The third tx bd of a given packet */ @@ -226,12 +309,24 @@ struct eth_tx_3rd_bd { struct eth_tx_data_3rd_bd data; }; +/* Complementary information for the regular tx bd of a given packet. */ +struct eth_tx_data_bd { + __le16 reserved0; + __le16 bitfields; +#define ETH_TX_DATA_BD_RESERVED1_MASK 0xFF +#define ETH_TX_DATA_BD_RESERVED1_SHIFT 0 +#define ETH_TX_DATA_BD_START_BD_MASK 0x1 +#define ETH_TX_DATA_BD_START_BD_SHIFT 8 +#define ETH_TX_DATA_BD_RESERVED2_MASK 0x7F +#define ETH_TX_DATA_BD_RESERVED2_SHIFT 9 + __le16 reserved3; +}; + /* The common non-special TX BD ring element */ struct eth_tx_bd { struct regpair addr; __le16 nbytes; - __le16 reserved0; - __le32 reserved1; + struct eth_tx_data_bd data; }; union eth_tx_bd_types { diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 41b9049b57e2..5f8fcaaa6504 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -19,6 +19,10 @@ /* dma_addr_t manip */ #define DMA_LO_LE(x) cpu_to_le32(lower_32_bits(x)) #define DMA_HI_LE(x) cpu_to_le32(upper_32_bits(x)) +#define DMA_REGPAIR_LE(x, val) do { \ + (x).hi = DMA_HI_LE((val)); \ + (x).lo = DMA_LO_LE((val)); \ + } while (0) #define HILO_GEN(hi, lo, type) ((((type)(hi)) << 32) + (lo)) #define HILO_DMA(hi, lo) HILO_GEN(hi, lo, dma_addr_t) diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 81ab178e31c1..e53b0ca49e41 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -33,6 +33,8 @@ struct qed_update_vport_params { u8 vport_id; u8 update_vport_active_flg; u8 vport_active_flg; + u8 update_accept_any_vlan_flg; + u8 accept_any_vlan; u8 update_rss_flg; struct qed_update_vport_rss_params rss_params; }; diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index d4a32e878180..3d43c1d4ecef 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -80,7 +80,7 @@ struct qed_dev_info { u8 num_hwfns; u8 hw_mac[ETH_ALEN]; - bool is_mf; + bool is_mf_default; /* FW version */ u16 fw_major; @@ -360,6 +360,12 @@ enum DP_MODULE { /* to be added...up to 0x8000000 */ }; +enum qed_mf_mode { + QED_MF_DEFAULT, + QED_MF_OVLAN, + QED_MF_NPAR, +}; + struct qed_eth_stats { u64 no_buff_discards; u64 packet_too_big_discard; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4ce9ff7086f4..eab4f8fbed58 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2162,6 +2162,11 @@ static inline int skb_checksum_start_offset(const struct sk_buff *skb) return skb->csum_start - skb_headroom(skb); } +static inline unsigned char *skb_checksum_start(const struct sk_buff *skb) +{ + return skb->head + skb->csum_start; +} + static inline int skb_transport_offset(const struct sk_buff *skb) { return skb_transport_header(skb) - skb->data; @@ -2400,6 +2405,10 @@ static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi, { return __napi_alloc_skb(napi, length, GFP_ATOMIC); } +void napi_consume_skb(struct sk_buff *skb, int budget); + +void __kfree_skb_flush(void); +void __kfree_skb_defer(struct sk_buff *skb); /** * __dev_alloc_pages - allocate page for network Rx @@ -2622,6 +2631,13 @@ static inline int skb_clone_writable(const struct sk_buff *skb, unsigned int len skb_headroom(skb) + len <= skb->hdr_len; } +static inline int skb_try_make_writable(struct sk_buff *skb, + unsigned int write_len) +{ + return skb_cloned(skb) && !skb_clone_writable(skb, write_len) && + pskb_expand_head(skb, 0, 0, GFP_ATOMIC); +} + static inline int __skb_cow(struct sk_buff *skb, unsigned int headroom, int cloned) { @@ -3550,6 +3566,7 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb) struct skb_gso_cb { int mac_offset; int encap_level; + __wsum csum; __u16 csum_start; }; #define SKB_SGO_CB_OFFSET 32 @@ -3576,6 +3593,16 @@ static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra) return 0; } +static inline void gso_reset_checksum(struct sk_buff *skb, __wsum res) +{ + /* Do not update partial checksums if remote checksum is enabled. */ + if (skb->remcsum_offload) + return; + + SKB_GSO_CB(skb)->csum = res; + SKB_GSO_CB(skb)->csum_start = skb_checksum_start(skb) - skb->head; +} + /* Compute the checksum for a gso segment. First compute the checksum value * from the start of transport header to SKB_GSO_CB(skb)->csum_start, and * then add in skb->csum (checksum from csum_start to end of packet). @@ -3586,15 +3613,14 @@ static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra) */ static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res) { - int plen = SKB_GSO_CB(skb)->csum_start - skb_headroom(skb) - - skb_transport_offset(skb); - __wsum partial; + unsigned char *csum_start = skb_transport_header(skb); + int plen = (skb->head + SKB_GSO_CB(skb)->csum_start) - csum_start; + __wsum partial = SKB_GSO_CB(skb)->csum; - partial = csum_partial(skb_transport_header(skb), plen, skb->csum); - skb->csum = res; - SKB_GSO_CB(skb)->csum_start -= plen; + SKB_GSO_CB(skb)->csum = res; + SKB_GSO_CB(skb)->csum_start = csum_start - skb->head; - return csum_fold(partial); + return csum_fold(csum_partial(csum_start, plen, partial)); } static inline bool skb_is_gso(const struct sk_buff *skb) @@ -3684,5 +3710,30 @@ static inline unsigned int skb_gso_network_seglen(const struct sk_buff *skb) return hdr_len + skb_gso_transport_seglen(skb); } +/* Local Checksum Offload. + * Compute outer checksum based on the assumption that the + * inner checksum will be offloaded later. + * See Documentation/networking/checksum-offloads.txt for + * explanation of how this works. + * Fill in outer checksum adjustment (e.g. with sum of outer + * pseudo-header) before calling. + * Also ensure that inner checksum is in linear data area. + */ +static inline __wsum lco_csum(struct sk_buff *skb) +{ + unsigned char *csum_start = skb_checksum_start(skb); + unsigned char *l4_hdr = skb_transport_header(skb); + __wsum partial; + + /* Start with complement of inner checksum adjustment */ + partial = ~csum_unfold(*(__force __sum16 *)(csum_start + + skb->csum_offset)); + + /* Add in checksum of our headers (incl. outer checksum + * adjustment filled in by caller) and return result. + */ + return csum_partial(l4_hdr, csum_start - l4_hdr, partial); +} + #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index b386361ba3e8..bcbf51da4e1e 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -29,9 +29,14 @@ static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb) return (struct tcphdr *)skb_transport_header(skb); } +static inline unsigned int __tcp_hdrlen(const struct tcphdr *th) +{ + return th->doff * 4; +} + static inline unsigned int tcp_hdrlen(const struct sk_buff *skb) { - return tcp_hdr(skb)->doff * 4; + return __tcp_hdrlen(tcp_hdr(skb)); } static inline struct tcphdr *inner_tcp_hdr(const struct sk_buff *skb) @@ -256,6 +261,7 @@ struct tcp_sock { u32 prr_delivered; /* Number of newly delivered packets to * receiver in Recovery. */ u32 prr_out; /* Total number of pkts sent during Recovery. */ + u32 delivered; /* Total data packets delivered incl. rexmits */ u32 rcv_wnd; /* Current receiver window */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 47f52d3cd8df..730d856683e5 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -87,6 +87,8 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, u32 banned_flags); int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, u32 banned_flags); +int ipv4_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, + bool match_wildcard); int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, bool match_wildcard); void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr); diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index f1fbc3b11962..f358ad5e4214 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -306,5 +306,6 @@ int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave); int bond_3ad_set_carrier(struct bonding *bond); void bond_3ad_update_lacp_rate(struct bonding *bond); +void bond_3ad_update_ad_actor_settings(struct bonding *bond); #endif /* _NET_BOND_3AD_H */ diff --git a/include/net/dst_cache.h b/include/net/dst_cache.h new file mode 100644 index 000000000000..151accae708b --- /dev/null +++ b/include/net/dst_cache.h @@ -0,0 +1,97 @@ +#ifndef _NET_DST_CACHE_H +#define _NET_DST_CACHE_H + +#include <linux/jiffies.h> +#include <net/dst.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ip6_fib.h> +#endif + +struct dst_cache { + struct dst_cache_pcpu __percpu *cache; + unsigned long reset_ts; +}; + +/** + * dst_cache_get - perform cache lookup + * @dst_cache: the cache + * + * The caller should use dst_cache_get_ip4() if it need to retrieve the + * source address to be used when xmitting to the cached dst. + * local BH must be disabled. + */ +struct dst_entry *dst_cache_get(struct dst_cache *dst_cache); + +/** + * dst_cache_get_ip4 - perform cache lookup and fetch ipv4 source address + * @dst_cache: the cache + * @saddr: return value for the retrieved source address + * + * local BH must be disabled. + */ +struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr); + +/** + * dst_cache_set_ip4 - store the ipv4 dst into the cache + * @dst_cache: the cache + * @dst: the entry to be cached + * @saddr: the source address to be stored inside the cache + * + * local BH must be disabled. + */ +void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst, + __be32 saddr); + +#if IS_ENABLED(CONFIG_IPV6) + +/** + * dst_cache_set_ip6 - store the ipv6 dst into the cache + * @dst_cache: the cache + * @dst: the entry to be cached + * @saddr: the source address to be stored inside the cache + * + * local BH must be disabled. + */ +void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst, + const struct in6_addr *addr); + +/** + * dst_cache_get_ip6 - perform cache lookup and fetch ipv6 source address + * @dst_cache: the cache + * @saddr: return value for the retrieved source address + * + * local BH must be disabled. + */ +struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache, + struct in6_addr *saddr); +#endif + +/** + * dst_cache_reset - invalidate the cache contents + * @dst_cache: the cache + * + * This do not free the cached dst to avoid races and contentions. + * the dst will be freed on later cache lookup. + */ +static inline void dst_cache_reset(struct dst_cache *dst_cache) +{ + dst_cache->reset_ts = jiffies; +} + +/** + * dst_cache_init - initialize the cache, allocating the required storage + * @dst_cache: the cache + * @gfp: allocation flags + */ +int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp); + +/** + * dst_cache_destroy - empty the cache and free the allocated storage + * @dst_cache: the cache + * + * No synchronization is enforced: it must be called only when the cache + * is unsed. + */ +void dst_cache_destroy(struct dst_cache *dst_cache); + +#endif diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 30a56ab2ccfb..84b833af6882 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -62,6 +62,7 @@ static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a, sizeof(a->u.tun_info) + a->u.tun_info.options_len); } +void metadata_dst_free(struct metadata_dst *); struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags); struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags); diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 43c0e771f417..8d4608ce8716 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -83,7 +83,6 @@ struct genl_family { * @attrs: netlink attributes * @_net: network namespace * @user_ptr: user pointers - * @dst_sk: destination socket */ struct genl_info { u32 snd_seq; @@ -94,7 +93,6 @@ struct genl_info { struct nlattr ** attrs; possible_net_t _net; void * user_ptr[2]; - struct sock * dst_sk; }; static inline struct net *genl_info_net(struct genl_info *info) @@ -188,8 +186,6 @@ int genl_unregister_family(struct genl_family *family); void genl_notify(struct genl_family *family, struct sk_buff *skb, struct genl_info *info, u32 group, gfp_t flags); -struct sk_buff *genlmsg_new_unicast(size_t payload, struct genl_info *info, - gfp_t flags); void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, struct genl_family *family, int flags, u8 cmd); diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 7ff588ca6817..28332bdac333 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -53,6 +53,7 @@ struct sock *__inet6_lookup_established(struct net *net, struct sock *inet6_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, @@ -60,6 +61,7 @@ struct sock *inet6_lookup_listener(struct net *net, static inline struct sock *__inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, @@ -71,12 +73,12 @@ static inline struct sock *__inet6_lookup(struct net *net, if (sk) return sk; - return inet6_lookup_listener(net, hashinfo, saddr, sport, + return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport, daddr, hnum, dif); } static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, - struct sk_buff *skb, + struct sk_buff *skb, int doff, const __be16 sport, const __be16 dport, int iif) @@ -86,16 +88,19 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, if (sk) return sk; - return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, - &ipv6_hdr(skb)->saddr, sport, + return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb, + doff, &ipv6_hdr(skb)->saddr, sport, &ipv6_hdr(skb)->daddr, ntohs(dport), iif); } struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const __be16 dport, const int dif); + +int inet6_hash(struct sock *sk); #endif /* IS_ENABLED(CONFIG_IPV6) */ #define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif) \ diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 12aac0fd6ee7..909972aa3acd 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -13,6 +13,7 @@ struct netns_frags { int timeout; int high_thresh; int low_thresh; + int max_dist; }; /** diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index de2e3ade6102..50f635c2c536 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -207,12 +207,16 @@ void inet_hashinfo_init(struct inet_hashinfo *h); bool inet_ehash_insert(struct sock *sk, struct sock *osk); bool inet_ehash_nolisten(struct sock *sk, struct sock *osk); -void __inet_hash(struct sock *sk, struct sock *osk); -void inet_hash(struct sock *sk); +int __inet_hash(struct sock *sk, struct sock *osk, + int (*saddr_same)(const struct sock *sk1, + const struct sock *sk2, + bool match_wildcard)); +int inet_hash(struct sock *sk); void inet_unhash(struct sock *sk); struct sock *__inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, const __be32 saddr, const __be16 sport, const __be32 daddr, const unsigned short hnum, @@ -220,10 +224,11 @@ struct sock *__inet_lookup_listener(struct net *net, static inline struct sock *inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif) { - return __inet_lookup_listener(net, hashinfo, saddr, sport, + return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, sport, daddr, ntohs(dport), dif); } @@ -299,6 +304,7 @@ static inline struct sock * static inline struct sock *__inet_lookup(struct net *net, struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, const int dif) @@ -307,12 +313,13 @@ static inline struct sock *__inet_lookup(struct net *net, struct sock *sk = __inet_lookup_established(net, hashinfo, saddr, sport, daddr, hnum, dif); - return sk ? : __inet_lookup_listener(net, hashinfo, saddr, sport, - daddr, hnum, dif); + return sk ? : __inet_lookup_listener(net, hashinfo, skb, doff, saddr, + sport, daddr, hnum, dif); } static inline struct sock *inet_lookup(struct net *net, struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, const int dif) @@ -320,7 +327,8 @@ static inline struct sock *inet_lookup(struct net *net, struct sock *sk; local_bh_disable(); - sk = __inet_lookup(net, hashinfo, saddr, sport, daddr, dport, dif); + sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, + dport, dif); local_bh_enable(); return sk; @@ -328,6 +336,7 @@ static inline struct sock *inet_lookup(struct net *net, static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, struct sk_buff *skb, + int doff, const __be16 sport, const __be16 dport) { @@ -337,8 +346,8 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, if (sk) return sk; else - return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, - iph->saddr, sport, + return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb, + doff, iph->saddr, sport, iph->daddr, dport, inet_iif(skb)); } diff --git a/include/net/ip.h b/include/net/ip.h index 1a98f1ca1638..cbb134b2f0e4 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -245,12 +245,6 @@ extern int inet_peer_threshold; extern int inet_peer_minttl; extern int inet_peer_maxttl; -/* From ip_input.c */ -extern int sysctl_ip_early_demux; - -/* From ip_output.c */ -extern int sysctl_ip_dynaddr; - void ipfrag_init(void); void ip_static_sysctl_init(void); diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 0d0ce0b2d870..499a707765ea 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -6,6 +6,7 @@ #include <linux/if_tunnel.h> #include <linux/ip6_tunnel.h> #include <net/ip_tunnels.h> +#include <net/dst_cache.h> #define IP6TUNNEL_ERR_TIMEO (30*HZ) @@ -33,12 +34,6 @@ struct __ip6_tnl_parm { __be32 o_key; }; -struct ip6_tnl_dst { - seqlock_t lock; - struct dst_entry __rcu *dst; - u32 cookie; -}; - /* IPv6 tunnel */ struct ip6_tnl { struct ip6_tnl __rcu *next; /* next tunnel in list */ @@ -46,7 +41,7 @@ struct ip6_tnl { struct net *net; /* netns for packet i/o */ struct __ip6_tnl_parm parms; /* tunnel configuration parameters */ struct flowi fl; /* flowi template for xmit */ - struct ip6_tnl_dst __percpu *dst_cache; /* cached dst */ + struct dst_cache dst_cache; /* cached dst */ int err_count; unsigned long err_time; @@ -66,11 +61,6 @@ struct ipv6_tlv_tnl_enc_lim { __u8 encap_limit; /* tunnel encapsulation limit */ } __packed; -struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t); -int ip6_tnl_dst_init(struct ip6_tnl *t); -void ip6_tnl_dst_destroy(struct ip6_tnl *t); -void ip6_tnl_dst_reset(struct ip6_tnl *t); -void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst); int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr); int ip6_tnl_xmit_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index dda9abf6b89c..5f28b606633e 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -13,6 +13,7 @@ #include <net/netns/generic.h> #include <net/rtnetlink.h> #include <net/lwtunnel.h> +#include <net/dst_cache.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> @@ -57,6 +58,9 @@ struct ip_tunnel_key { struct ip_tunnel_info { struct ip_tunnel_key key; +#ifdef CONFIG_DST_CACHE + struct dst_cache dst_cache; +#endif u8 options_len; u8 mode; }; @@ -85,11 +89,6 @@ struct ip_tunnel_prl_entry { struct rcu_head rcu_head; }; -struct ip_tunnel_dst { - struct dst_entry __rcu *dst; - __be32 saddr; -}; - struct metadata_dst; struct ip_tunnel { @@ -108,7 +107,7 @@ struct ip_tunnel { int tun_hlen; /* Precalculated header length */ int mlink; - struct ip_tunnel_dst __percpu *dst_cache; + struct dst_cache dst_cache; struct ip_tunnel_parm parms; @@ -248,7 +247,6 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm *p); void ip_tunnel_setup(struct net_device *dev, int net_id); -void ip_tunnel_dst_reset_all(struct ip_tunnel *t); int ip_tunnel_encap_setup(struct ip_tunnel *t, struct ip_tunnel_encap *ipencap); @@ -273,15 +271,15 @@ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph, return INET_ECN_encapsulate(tos, inner); } -int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto); +int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto, + bool xnet); void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, u8 proto, u8 tos, u8 ttl, __be16 df, bool xnet); struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, gfp_t flags); -struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, bool gre_csum, - int gso_type_mask); +struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask); static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len) { diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 6570f379aba2..f3c9857c645d 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -259,8 +259,12 @@ static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np) rcu_read_lock(); opt = rcu_dereference(np->opt); - if (opt && !atomic_inc_not_zero(&opt->refcnt)) - opt = NULL; + if (opt) { + if (!atomic_inc_not_zero(&opt->refcnt)) + opt = NULL; + else + opt = rcu_pointer_handoff(opt); + } rcu_read_unlock(); return opt; } diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index 66350ce3e955..e9f116e29c22 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -170,6 +170,8 @@ static inline int lwtunnel_input(struct sk_buff *skb) return -EOPNOTSUPP; } -#endif +#endif /* CONFIG_LWTUNNEL */ + +#define MODULE_ALIAS_RTNL_LWT(encap_type) MODULE_ALIAS("rtnl-lwt-" __stringify(encap_type)) #endif /* __NET_LWTUNNEL_H */ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2b7907a35568..a69cde3ce460 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -80,9 +80,13 @@ struct netns_ipv4 { int sysctl_tcp_ecn; int sysctl_tcp_ecn_fallback; + int sysctl_ip_default_ttl; int sysctl_ip_no_pmtu_disc; int sysctl_ip_fwd_use_pmtu; int sysctl_ip_nonlocal_bind; + /* Shall we try to damage output packets if routing dev changes? */ + int sysctl_ip_dynaddr; + int sysctl_ip_early_demux; int sysctl_fwmark_reflect; int sysctl_tcp_fwmark_accept; @@ -98,6 +102,21 @@ struct netns_ipv4 { int sysctl_tcp_keepalive_probes; int sysctl_tcp_keepalive_intvl; + int sysctl_tcp_syn_retries; + int sysctl_tcp_synack_retries; + int sysctl_tcp_syncookies; + int sysctl_tcp_reordering; + int sysctl_tcp_retries1; + int sysctl_tcp_retries2; + int sysctl_tcp_orphan_retries; + int sysctl_tcp_fin_timeout; + unsigned int sysctl_tcp_notsent_lowat; + + int sysctl_igmp_max_memberships; + int sysctl_igmp_max_msf; + int sysctl_igmp_llm_reports; + int sysctl_igmp_qrv; + struct ping_group_range ping_group_range; atomic_t dev_addr_genid; diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index 68e509750caa..039cc29cb4a8 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -51,7 +51,7 @@ void pn_sock_init(void); struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *sa); void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb); void phonet_get_local_port_range(int *min, int *max); -void pn_sock_hash(struct sock *sk); +int pn_sock_hash(struct sock *sk); void pn_sock_unhash(struct sock *sk); int pn_sock_get_port(struct sock *sk, unsigned short sport); diff --git a/include/net/ping.h b/include/net/ping.h index ac80cb45e630..5fd7cc244833 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -65,7 +65,7 @@ struct pingfakehdr { }; int ping_get_port(struct sock *sk, unsigned short ident); -void ping_hash(struct sock *sk); +int ping_hash(struct sock *sk); void ping_unhash(struct sock *sk); int ping_init_sock(struct sock *sk); diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index bc49967e1a68..2121df574262 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -358,4 +358,38 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) } #endif /* CONFIG_NET_CLS_IND */ +struct tc_cls_u32_knode { + struct tcf_exts *exts; + struct tc_u32_sel *sel; + u32 handle; + u32 val; + u32 mask; + u32 link_handle; + u8 fshift; +}; + +struct tc_cls_u32_hnode { + u32 handle; + u32 prio; + unsigned int divisor; +}; + +enum tc_clsu32_command { + TC_CLSU32_NEW_KNODE, + TC_CLSU32_REPLACE_KNODE, + TC_CLSU32_DELETE_KNODE, + TC_CLSU32_NEW_HNODE, + TC_CLSU32_REPLACE_HNODE, + TC_CLSU32_DELETE_HNODE, +}; + +struct tc_cls_u32_offload { + /* knode values */ + enum tc_clsu32_command command; + union { + struct tc_cls_u32_knode knode; + struct tc_cls_u32_hnode hnode; + }; +}; + #endif diff --git a/include/net/raw.h b/include/net/raw.h index 6a40c6562dd2..3e789008394d 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -57,7 +57,7 @@ int raw_seq_open(struct inode *ino, struct file *file, #endif -void raw_hash_sk(struct sock *sk); +int raw_hash_sk(struct sock *sk); void raw_unhash_sk(struct sock *sk); struct raw_sock { diff --git a/include/net/route.h b/include/net/route.h index a3b9ef74a389..9b0a523bb428 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -329,14 +329,13 @@ static inline int inet_iif(const struct sk_buff *skb) return skb->skb_iif; } -extern int sysctl_ip_default_ttl; - static inline int ip4_dst_hoplimit(const struct dst_entry *dst) { int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); + struct net *net = dev_net(dst->dev); if (hoplimit == 0) - hoplimit = sysctl_ip_default_ttl; + hoplimit = net->ipv4.sysctl_ip_default_ttl; return hoplimit; } diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 205630bb5010..d05b56641abc 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -535,7 +535,6 @@ struct sctp_datamsg { struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *, struct sctp_sndrcvinfo *, struct iov_iter *); -void sctp_datamsg_free(struct sctp_datamsg *); void sctp_datamsg_put(struct sctp_datamsg *); void sctp_chunk_fail(struct sctp_chunk *, int error); int sctp_chunk_abandoned(struct sctp_chunk *); diff --git a/include/net/sock.h b/include/net/sock.h index f5ea148853e2..255d3e03727b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -984,7 +984,7 @@ struct proto { void (*release_cb)(struct sock *sk); /* Keeping track of sk's, looking them up, and port selection methods. */ - void (*hash)(struct sock *sk); + int (*hash)(struct sock *sk); void (*unhash)(struct sock *sk); void (*rehash)(struct sock *sk); int (*get_port)(struct sock *sk, unsigned short snum); @@ -1194,10 +1194,10 @@ static inline void sock_prot_inuse_add(struct net *net, struct proto *prot, /* With per-bucket locks this operation is not-atomic, so that * this version is not worse. */ -static inline void __sk_prot_rehash(struct sock *sk) +static inline int __sk_prot_rehash(struct sock *sk) { sk->sk_prot->unhash(sk); - sk->sk_prot->hash(sk); + return sk->sk_prot->hash(sk); } void sk_prot_clear_portaddr_nulls(struct sock *sk, int size); diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index 592a6bc02b0b..04a31830711b 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -2,6 +2,7 @@ #define __NET_TC_GACT_H #include <net/act_api.h> +#include <linux/tc_act/tc_gact.h> struct tcf_gact { struct tcf_common common; @@ -15,4 +16,19 @@ struct tcf_gact { #define to_gact(a) \ container_of(a->priv, struct tcf_gact, common) +#ifdef CONFIG_NET_CLS_ACT +static inline bool is_tcf_gact_shot(const struct tc_action *a) +{ + struct tcf_gact *gact; + + if (a->ops && a->ops->type != TCA_ACT_GACT) + return false; + + gact = a->priv; + if (gact->tcf_action == TC_ACT_SHOT) + return true; + + return false; +} +#endif #endif /* __NET_TC_GACT_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index ae6468f5c9f3..e90db8546806 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -239,13 +239,6 @@ extern struct inet_timewait_death_row tcp_death_row; extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; -extern int sysctl_tcp_fin_timeout; -extern int sysctl_tcp_syn_retries; -extern int sysctl_tcp_synack_retries; -extern int sysctl_tcp_retries1; -extern int sysctl_tcp_retries2; -extern int sysctl_tcp_orphan_retries; -extern int sysctl_tcp_syncookies; extern int sysctl_tcp_fastopen; extern int sysctl_tcp_retrans_collapse; extern int sysctl_tcp_stdurg; @@ -274,7 +267,6 @@ extern int sysctl_tcp_thin_dupack; extern int sysctl_tcp_early_retrans; extern int sysctl_tcp_limit_output_bytes; extern int sysctl_tcp_challenge_ack_limit; -extern unsigned int sysctl_tcp_notsent_lowat; extern int sysctl_tcp_min_tso_segs; extern int sysctl_tcp_min_rtt_wlen; extern int sysctl_tcp_autocorking; @@ -568,6 +560,7 @@ void tcp_rearm_rto(struct sock *sk); void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req); void tcp_reset(struct sock *sk); void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb); +void tcp_fin(struct sock *sk); /* tcp_timer.c */ void tcp_init_xmit_timers(struct sock *); @@ -963,9 +956,11 @@ static inline void tcp_enable_fack(struct tcp_sock *tp) */ static inline void tcp_enable_early_retrans(struct tcp_sock *tp) { + struct net *net = sock_net((struct sock *)tp); + tp->do_early_retrans = sysctl_tcp_early_retrans && sysctl_tcp_early_retrans < 4 && !sysctl_tcp_thin_dupack && - sysctl_tcp_reordering == 3; + net->ipv4.sysctl_tcp_reordering == 3; } static inline void tcp_disable_early_retrans(struct tcp_sock *tp) @@ -1252,7 +1247,7 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) static inline int tcp_fin_time(const struct sock *sk) { - int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout; + int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout; const int rto = inet_csk(sk)->icsk_rto; if (fin_timeout < (rto << 2) - (rto >> 1)) @@ -1437,6 +1432,7 @@ void tcp_free_fastopen_req(struct tcp_sock *tp); extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; int tcp_fastopen_reset_cipher(void *key, unsigned int len); +void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct tcp_fastopen_cookie *foc, @@ -1685,7 +1681,8 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) { - return tp->notsent_lowat ?: sysctl_tcp_notsent_lowat; + struct net *net = sock_net((struct sock *)tp); + return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat; } static inline bool tcp_stream_memory_free(const struct sock *sk) diff --git a/include/net/udp.h b/include/net/udp.h index 2842541e28e7..92927f729ac8 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -177,9 +177,10 @@ static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) } /* hash routines shared between UDPv4/6 and UDP-Litev4/6 */ -static inline void udp_lib_hash(struct sock *sk) +static inline int udp_lib_hash(struct sock *sk) { BUG(); + return 0; } void udp_lib_unhash(struct sock *sk); diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index cca2ad3082c3..97f5adb121a6 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -103,7 +103,7 @@ static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb, { int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; - return iptunnel_handle_offloads(skb, udp_csum, type); + return iptunnel_handle_offloads(skb, type); } static inline void udp_tunnel_gro_complete(struct sk_buff *skb, int nhoff) diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 0fb86442544b..6eda4ed4d78b 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -9,17 +9,71 @@ #include <linux/udp.h> #include <net/dst_metadata.h> +/* VXLAN protocol (RFC 7348) header: + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |R|R|R|R|I|R|R|R| Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | VXLAN Network Identifier (VNI) | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * I = VXLAN Network Identifier (VNI) present. + */ +struct vxlanhdr { + __be32 vx_flags; + __be32 vx_vni; +}; + +/* VXLAN header flags. */ +#define VXLAN_HF_VNI cpu_to_be32(BIT(27)) + +#define VXLAN_N_VID (1u << 24) +#define VXLAN_VID_MASK (VXLAN_N_VID - 1) +#define VXLAN_VNI_MASK cpu_to_be32(VXLAN_VID_MASK << 8) +#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) + #define VNI_HASH_BITS 10 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS) +#define FDB_HASH_BITS 8 +#define FDB_HASH_SIZE (1<<FDB_HASH_BITS) + +/* Remote checksum offload for VXLAN (VXLAN_F_REMCSUM_[RT]X): + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |R|R|R|R|I|R|R|R|R|R|C| Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | VXLAN Network Identifier (VNI) |O| Csum start | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * C = Remote checksum offload bit. When set indicates that the + * remote checksum offload data is present. + * + * O = Offset bit. Indicates the checksum offset relative to + * checksum start. + * + * Csum start = Checksum start divided by two. + * + * http://tools.ietf.org/html/draft-herbert-vxlan-rco + */ + +/* VXLAN-RCO header flags. */ +#define VXLAN_HF_RCO cpu_to_be32(BIT(21)) + +/* Remote checksum offload header option */ +#define VXLAN_RCO_MASK cpu_to_be32(0x7f) /* Last byte of vni field */ +#define VXLAN_RCO_UDP cpu_to_be32(0x80) /* Indicate UDP RCO (TCP when not set *) */ +#define VXLAN_RCO_SHIFT 1 /* Left shift of start */ +#define VXLAN_RCO_SHIFT_MASK ((1 << VXLAN_RCO_SHIFT) - 1) +#define VXLAN_MAX_REMCSUM_START (0x7f << VXLAN_RCO_SHIFT) /* - * VXLAN Group Based Policy Extension: + * VXLAN Group Based Policy Extension (VXLAN_F_GBP): * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * |1|-|-|-|1|-|-|-|R|D|R|R|A|R|R|R| Group Policy ID | + * |G|R|R|R|I|R|R|R|R|D|R|R|A|R|R|R| Group Policy ID | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | VXLAN Network Identifier (VNI) | Reserved | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * + * G = Group Policy ID present. + * * D = Don't Learn bit. When set, this bit indicates that the egress * VTEP MUST NOT learn the source address of the encapsulated frame. * @@ -27,18 +81,18 @@ * this packet. Policies MUST NOT be applied by devices when the * A bit is set. * - * [0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy + * https://tools.ietf.org/html/draft-smith-vxlan-group-policy */ struct vxlanhdr_gbp { - __u8 vx_flags; + u8 vx_flags; #ifdef __LITTLE_ENDIAN_BITFIELD - __u8 reserved_flags1:3, + u8 reserved_flags1:3, policy_applied:1, reserved_flags2:2, dont_learn:1, reserved_flags3:1; #elif defined(__BIG_ENDIAN_BITFIELD) - __u8 reserved_flags1:1, + u8 reserved_flags1:1, dont_learn:1, reserved_flags2:2, policy_applied:1, @@ -50,7 +104,10 @@ struct vxlanhdr_gbp { __be32 vx_vni; }; -#define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | 0xFFFFFF) +/* VXLAN-GBP header flags. */ +#define VXLAN_HF_GBP cpu_to_be32(BIT(31)) + +#define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | cpu_to_be32(0xFFFFFF)) /* skb->mark mapping * @@ -62,44 +119,6 @@ struct vxlanhdr_gbp { #define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16) #define VXLAN_GBP_ID_MASK (0xFFFF) -/* VXLAN protocol header: - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * |G|R|R|R|I|R|R|C| Reserved | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | VXLAN Network Identifier (VNI) | Reserved | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * - * G = 1 Group Policy (VXLAN-GBP) - * I = 1 VXLAN Network Identifier (VNI) present - * C = 1 Remote checksum offload (RCO) - */ -struct vxlanhdr { - __be32 vx_flags; - __be32 vx_vni; -}; - -/* VXLAN header flags. */ -#define VXLAN_HF_RCO BIT(21) -#define VXLAN_HF_VNI BIT(27) -#define VXLAN_HF_GBP BIT(31) - -/* Remote checksum offload header option */ -#define VXLAN_RCO_MASK 0x7f /* Last byte of vni field */ -#define VXLAN_RCO_UDP 0x80 /* Indicate UDP RCO (TCP when not set *) */ -#define VXLAN_RCO_SHIFT 1 /* Left shift of start */ -#define VXLAN_RCO_SHIFT_MASK ((1 << VXLAN_RCO_SHIFT) - 1) -#define VXLAN_MAX_REMCSUM_START (VXLAN_RCO_MASK << VXLAN_RCO_SHIFT) - -#define VXLAN_N_VID (1u << 24) -#define VXLAN_VID_MASK (VXLAN_N_VID - 1) -#define VXLAN_VNI_MASK (VXLAN_VID_MASK << 8) -#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) - -#define VNI_HASH_BITS 10 -#define VNI_HASH_SIZE (1<<VNI_HASH_BITS) -#define FDB_HASH_BITS 8 -#define FDB_HASH_SIZE (1<<FDB_HASH_BITS) - struct vxlan_metadata { u32 gbp; }; @@ -125,23 +144,24 @@ union vxlan_addr { struct vxlan_rdst { union vxlan_addr remote_ip; __be16 remote_port; - u32 remote_vni; + __be32 remote_vni; u32 remote_ifindex; struct list_head list; struct rcu_head rcu; + struct dst_cache dst_cache; }; struct vxlan_config { union vxlan_addr remote_ip; union vxlan_addr saddr; - u32 vni; + __be32 vni; int remote_ifindex; int mtu; __be16 dst_port; - __u16 port_min; - __u16 port_max; - __u8 tos; - __u8 ttl; + u16 port_min; + u16 port_max; + u8 tos; + u8 ttl; u32 flags; unsigned long age_interval; unsigned int addrmax; @@ -177,7 +197,7 @@ struct vxlan_dev { #define VXLAN_F_L2MISS 0x08 #define VXLAN_F_L3MISS 0x10 #define VXLAN_F_IPV6 0x20 -#define VXLAN_F_UDP_CSUM 0x40 +#define VXLAN_F_UDP_ZERO_CSUM_TX 0x40 #define VXLAN_F_UDP_ZERO_CSUM6_TX 0x80 #define VXLAN_F_UDP_ZERO_CSUM6_RX 0x100 #define VXLAN_F_REMCSUM_TX 0x200 @@ -242,6 +262,68 @@ static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, /* IPv6 header + UDP + VXLAN + Ethernet header */ #define VXLAN6_HEADROOM (40 + 8 + 8 + 14) +static inline struct vxlanhdr *vxlan_hdr(struct sk_buff *skb) +{ + return (struct vxlanhdr *)(udp_hdr(skb) + 1); +} + +static inline __be32 vxlan_vni(__be32 vni_field) +{ +#if defined(__BIG_ENDIAN) + return vni_field >> 8; +#else + return (vni_field & VXLAN_VNI_MASK) << 8; +#endif +} + +static inline __be32 vxlan_vni_field(__be32 vni) +{ +#if defined(__BIG_ENDIAN) + return vni << 8; +#else + return vni >> 8; +#endif +} + +static inline __be32 vxlan_tun_id_to_vni(__be64 tun_id) +{ +#if defined(__BIG_ENDIAN) + return tun_id; +#else + return tun_id >> 32; +#endif +} + +static inline __be64 vxlan_vni_to_tun_id(__be32 vni) +{ +#if defined(__BIG_ENDIAN) + return (__be64)vni; +#else + return (__be64)vni << 32; +#endif +} + +static inline size_t vxlan_rco_start(__be32 vni_field) +{ + return be32_to_cpu(vni_field & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT; +} + +static inline size_t vxlan_rco_offset(__be32 vni_field) +{ + return (vni_field & VXLAN_RCO_UDP) ? + offsetof(struct udphdr, check) : + offsetof(struct tcphdr, check); +} + +static inline __be32 vxlan_compute_rco(unsigned int start, unsigned int offset) +{ + __be32 vni_field = cpu_to_be32(start >> VXLAN_RCO_SHIFT); + + if (offset == offsetof(struct udphdr, check)) + vni_field |= VXLAN_RCO_UDP; + return vni_field; +} + #if IS_ENABLED(CONFIG_VXLAN) void vxlan_get_rx_port(struct net_device *netdev); #else diff --git a/include/trace/events/sunvnet.h b/include/trace/events/sunvnet.h new file mode 100644 index 000000000000..eb080b267e55 --- /dev/null +++ b/include/trace/events/sunvnet.h @@ -0,0 +1,139 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM sunvnet + +#if !defined(_TRACE_SUNVNET_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SUNVNET_H + +#include <linux/tracepoint.h> + +TRACE_EVENT(vnet_rx_one, + + TP_PROTO(int lsid, int rsid, int index, int needs_ack), + + TP_ARGS(lsid, rsid, index, needs_ack), + + TP_STRUCT__entry( + __field(int, lsid) + __field(int, rsid) + __field(int, index) + __field(int, needs_ack) + ), + + TP_fast_assign( + __entry->lsid = lsid; + __entry->rsid = rsid; + __entry->index = index; + __entry->needs_ack = needs_ack; + ), + + TP_printk("(%x:%x) walk_rx_one index %d; needs_ack %d", + __entry->lsid, __entry->rsid, + __entry->index, __entry->needs_ack) +); + +DECLARE_EVENT_CLASS(vnet_tx_stopped_ack_template, + + TP_PROTO(int lsid, int rsid, int ack_end, int npkts), + + TP_ARGS(lsid, rsid, ack_end, npkts), + + TP_STRUCT__entry( + __field(int, lsid) + __field(int, rsid) + __field(int, ack_end) + __field(int, npkts) + ), + + TP_fast_assign( + __entry->lsid = lsid; + __entry->rsid = rsid; + __entry->ack_end = ack_end; + __entry->npkts = npkts; + ), + + TP_printk("(%x:%x) stopped ack for %d; npkts %d", + __entry->lsid, __entry->rsid, + __entry->ack_end, __entry->npkts) +); +DEFINE_EVENT(vnet_tx_stopped_ack_template, vnet_tx_send_stopped_ack, + TP_PROTO(int lsid, int rsid, int ack_end, int npkts), + TP_ARGS(lsid, rsid, ack_end, npkts)); +DEFINE_EVENT(vnet_tx_stopped_ack_template, vnet_tx_defer_stopped_ack, + TP_PROTO(int lsid, int rsid, int ack_end, int npkts), + TP_ARGS(lsid, rsid, ack_end, npkts)); +DEFINE_EVENT(vnet_tx_stopped_ack_template, vnet_tx_pending_stopped_ack, + TP_PROTO(int lsid, int rsid, int ack_end, int npkts), + TP_ARGS(lsid, rsid, ack_end, npkts)); + +TRACE_EVENT(vnet_rx_stopped_ack, + + TP_PROTO(int lsid, int rsid, int end), + + TP_ARGS(lsid, rsid, end), + + TP_STRUCT__entry( + __field(int, lsid) + __field(int, rsid) + __field(int, end) + ), + + TP_fast_assign( + __entry->lsid = lsid; + __entry->rsid = rsid; + __entry->end = end; + ), + + TP_printk("(%x:%x) stopped ack for index %d", + __entry->lsid, __entry->rsid, __entry->end) +); + +TRACE_EVENT(vnet_tx_trigger, + + TP_PROTO(int lsid, int rsid, int start, int err), + + TP_ARGS(lsid, rsid, start, err), + + TP_STRUCT__entry( + __field(int, lsid) + __field(int, rsid) + __field(int, start) + __field(int, err) + ), + + TP_fast_assign( + __entry->lsid = lsid; + __entry->rsid = rsid; + __entry->start = start; + __entry->err = err; + ), + + TP_printk("(%x:%x) Tx trigger for %d sent with err %d %s", + __entry->lsid, __entry->rsid, __entry->start, + __entry->err, __entry->err > 0 ? "(ok)" : " ") +); + +TRACE_EVENT(vnet_skip_tx_trigger, + + TP_PROTO(int lsid, int rsid, int last), + + TP_ARGS(lsid, rsid, last), + + TP_STRUCT__entry( + __field(int, lsid) + __field(int, rsid) + __field(int, last) + ), + + TP_fast_assign( + __entry->lsid = lsid; + __entry->rsid = rsid; + __entry->last = last; + ), + + TP_printk("(%x:%x) Skip Tx trigger. Last trigger sent was %d", + __entry->lsid, __entry->rsid, __entry->last) +); +#endif /* _TRACE_SOCK_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index aa6f8571de13..6496f98d3d68 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -81,6 +81,9 @@ enum bpf_map_type { BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PROG_ARRAY, BPF_MAP_TYPE_PERF_EVENT_ARRAY, + BPF_MAP_TYPE_PERCPU_HASH, + BPF_MAP_TYPE_PERCPU_ARRAY, + BPF_MAP_TYPE_STACK_TRACE, }; enum bpf_prog_type { @@ -270,6 +273,31 @@ enum bpf_func_id { */ BPF_FUNC_perf_event_output, BPF_FUNC_skb_load_bytes, + + /** + * bpf_get_stackid(ctx, map, flags) - walk user or kernel stack and return id + * @ctx: struct pt_regs* + * @map: pointer to stack_trace map + * @flags: bits 0-7 - numer of stack frames to skip + * bit 8 - collect user stack instead of kernel + * bit 9 - compare stacks by hash only + * bit 10 - if two different stacks hash into the same stackid + * discard old + * other bits - reserved + * Return: >= 0 stackid on success or negative error + */ + BPF_FUNC_get_stackid, + + /** + * bpf_csum_diff(from, from_size, to, to_size, seed) - calculate csum diff + * @from: raw from buffer + * @from_size: length of from buffer + * @to: raw to buffer + * @to_size: length of to buffer + * @seed: optional seed + * Return: csum result + */ + BPF_FUNC_csum_diff, __BPF_FUNC_MAX_ID, }; @@ -285,6 +313,7 @@ enum bpf_func_id { /* BPF_FUNC_l4_csum_replace flags. */ #define BPF_F_PSEUDO_HDR (1ULL << 4) +#define BPF_F_MARK_MANGLED_0 (1ULL << 5) /* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ #define BPF_F_INGRESS (1ULL << 0) @@ -292,6 +321,12 @@ enum bpf_func_id { /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ #define BPF_F_TUNINFO_IPV6 (1ULL << 0) +/* BPF_FUNC_get_stackid flags. */ +#define BPF_F_SKIP_FIELD_MASK 0xffULL +#define BPF_F_USER_STACK (1ULL << 8) +#define BPF_F_FAST_STACK_CMP (1ULL << 9) +#define BPF_F_REUSE_STACKID (1ULL << 10) + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 57fa39005e79..f15ae02621a1 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -31,7 +31,7 @@ * physical connectors and other link features that are * advertised through autonegotiation or enabled for * auto-detection. - * @speed: Low bits of the speed + * @speed: Low bits of the speed, 1Mb units, 0 to INT_MAX or SPEED_UNKNOWN * @duplex: Duplex mode; one of %DUPLEX_* * @port: Physical connector type; one of %PORT_* * @phy_address: MDIO address of PHY (transceiver); 0 or 255 if not @@ -47,7 +47,7 @@ * obsoleted by &struct ethtool_coalesce. Read-only; deprecated. * @maxrxpkt: Historically used to report RX IRQ coalescing; now * obsoleted by &struct ethtool_coalesce. Read-only; deprecated. - * @speed_hi: High bits of the speed + * @speed_hi: High bits of the speed, 1Mb units, 0 to INT_MAX or SPEED_UNKNOWN * @eth_tp_mdix: Ethernet twisted-pair MDI(-X) status; one of * %ETH_TP_MDI_*. If the status is unknown or not applicable, the * value will be %ETH_TP_MDI_INVALID. Read-only. @@ -748,6 +748,56 @@ struct ethtool_usrip4_spec { __u8 proto; }; +/** + * struct ethtool_tcpip6_spec - flow specification for TCP/IPv6 etc. + * @ip6src: Source host + * @ip6dst: Destination host + * @psrc: Source port + * @pdst: Destination port + * @tclass: Traffic Class + * + * This can be used to specify a TCP/IPv6, UDP/IPv6 or SCTP/IPv6 flow. + */ +struct ethtool_tcpip6_spec { + __be32 ip6src[4]; + __be32 ip6dst[4]; + __be16 psrc; + __be16 pdst; + __u8 tclass; +}; + +/** + * struct ethtool_ah_espip6_spec - flow specification for IPsec/IPv6 + * @ip6src: Source host + * @ip6dst: Destination host + * @spi: Security parameters index + * @tclass: Traffic Class + * + * This can be used to specify an IPsec transport or tunnel over IPv6. + */ +struct ethtool_ah_espip6_spec { + __be32 ip6src[4]; + __be32 ip6dst[4]; + __be32 spi; + __u8 tclass; +}; + +/** + * struct ethtool_usrip6_spec - general flow specification for IPv6 + * @ip6src: Source host + * @ip6dst: Destination host + * @l4_4_bytes: First 4 bytes of transport (layer 4) header + * @tclass: Traffic Class + * @l4_proto: Transport protocol number (nexthdr after any Extension Headers) + */ +struct ethtool_usrip6_spec { + __be32 ip6src[4]; + __be32 ip6dst[4]; + __be32 l4_4_bytes; + __u8 tclass; + __u8 l4_proto; +}; + union ethtool_flow_union { struct ethtool_tcpip4_spec tcp_ip4_spec; struct ethtool_tcpip4_spec udp_ip4_spec; @@ -755,6 +805,12 @@ union ethtool_flow_union { struct ethtool_ah_espip4_spec ah_ip4_spec; struct ethtool_ah_espip4_spec esp_ip4_spec; struct ethtool_usrip4_spec usr_ip4_spec; + struct ethtool_tcpip6_spec tcp_ip6_spec; + struct ethtool_tcpip6_spec udp_ip6_spec; + struct ethtool_tcpip6_spec sctp_ip6_spec; + struct ethtool_ah_espip6_spec ah_ip6_spec; + struct ethtool_ah_espip6_spec esp_ip6_spec; + struct ethtool_usrip6_spec usr_ip6_spec; struct ethhdr ether_spec; __u8 hdata[52]; }; @@ -1146,6 +1202,21 @@ enum ethtool_sfeatures_retval_bits { #define ETHTOOL_F_WISH (1 << ETHTOOL_F_WISH__BIT) #define ETHTOOL_F_COMPAT (1 << ETHTOOL_F_COMPAT__BIT) +#define MAX_NUM_QUEUE 4096 + +/** + * struct ethtool_per_queue_op - apply sub command to the queues in mask. + * @cmd: ETHTOOL_PERQUEUE + * @sub_command: the sub command which apply to each queues + * @queue_mask: Bitmap of the queues which sub command apply to + * @data: A complete command structure following for each of the queues addressed + */ +struct ethtool_per_queue_op { + __u32 cmd; + __u32 sub_command; + __u32 queue_mask[DIV_ROUND_UP(MAX_NUM_QUEUE, 32)]; + char data[]; +}; /* CMDs currently supported */ #define ETHTOOL_GSET 0x00000001 /* Get settings. */ @@ -1229,6 +1300,8 @@ enum ethtool_sfeatures_retval_bits { #define ETHTOOL_STUNABLE 0x00000049 /* Set tunable configuration */ #define ETHTOOL_GPHYSTATS 0x0000004a /* get PHY-specific statistics */ +#define ETHTOOL_PERQUEUE 0x0000004b /* Set per queue options */ + /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET #define SPARC_ETH_SSET ETHTOOL_SSET @@ -1303,7 +1376,7 @@ enum ethtool_sfeatures_retval_bits { * it was forced up into this mode or autonegotiated. */ -/* The forced speed, 10Mb, 100Mb, gigabit, [2.5|5|10|20|25|40|50|56|100]GbE. */ +/* The forced speed, in units of 1Mb. All values 0 to INT_MAX are legal. */ #define SPEED_10 10 #define SPEED_100 100 #define SPEED_1000 1000 @@ -1319,11 +1392,28 @@ enum ethtool_sfeatures_retval_bits { #define SPEED_UNKNOWN -1 +static inline int ethtool_validate_speed(__u32 speed) +{ + return speed <= INT_MAX || speed == SPEED_UNKNOWN; +} + /* Duplex, half or full. */ #define DUPLEX_HALF 0x00 #define DUPLEX_FULL 0x01 #define DUPLEX_UNKNOWN 0xff +static inline int ethtool_validate_duplex(__u8 duplex) +{ + switch (duplex) { + case DUPLEX_HALF: + case DUPLEX_FULL: + case DUPLEX_UNKNOWN: + return 1; + } + + return 0; +} + /* Which connector port. */ #define PORT_TP 0x00 #define PORT_AUI 0x01 @@ -1367,15 +1457,17 @@ enum ethtool_sfeatures_retval_bits { #define UDP_V4_FLOW 0x02 /* hash or spec (udp_ip4_spec) */ #define SCTP_V4_FLOW 0x03 /* hash or spec (sctp_ip4_spec) */ #define AH_ESP_V4_FLOW 0x04 /* hash only */ -#define TCP_V6_FLOW 0x05 /* hash only */ -#define UDP_V6_FLOW 0x06 /* hash only */ -#define SCTP_V6_FLOW 0x07 /* hash only */ +#define TCP_V6_FLOW 0x05 /* hash or spec (tcp_ip6_spec; nfc only) */ +#define UDP_V6_FLOW 0x06 /* hash or spec (udp_ip6_spec; nfc only) */ +#define SCTP_V6_FLOW 0x07 /* hash or spec (sctp_ip6_spec; nfc only) */ #define AH_ESP_V6_FLOW 0x08 /* hash only */ #define AH_V4_FLOW 0x09 /* hash or spec (ah_ip4_spec) */ #define ESP_V4_FLOW 0x0a /* hash or spec (esp_ip4_spec) */ -#define AH_V6_FLOW 0x0b /* hash only */ -#define ESP_V6_FLOW 0x0c /* hash only */ -#define IP_USER_FLOW 0x0d /* spec only (usr_ip4_spec) */ +#define AH_V6_FLOW 0x0b /* hash or spec (ah_ip6_spec; nfc only) */ +#define ESP_V6_FLOW 0x0c /* hash or spec (esp_ip6_spec; nfc only) */ +#define IPV4_USER_FLOW 0x0d /* spec only (usr_ip4_spec) */ +#define IP_USER_FLOW IPV4_USER_FLOW +#define IPV6_USER_FLOW 0x0e /* spec only (usr_ip6_spec; nfc only) */ #define IPV4_FLOW 0x10 /* hash only */ #define IPV6_FLOW 0x11 /* hash only */ #define ETHER_FLOW 0x12 /* spec only (ether_spec) */ diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h index c3363ba1ae05..5512c90af7e3 100644 --- a/include/uapi/linux/genetlink.h +++ b/include/uapi/linux/genetlink.h @@ -21,6 +21,7 @@ struct genlmsghdr { #define GENL_CMD_CAP_DO 0x02 #define GENL_CMD_CAP_DUMP 0x04 #define GENL_CMD_CAP_HASPOL 0x08 +#define GENL_UNS_ADMIN_PERM 0x10 /* * List of reserved static generic netlink identifiers: diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 18db14477bdd..0890b217580d 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -137,7 +137,10 @@ struct bridge_vlan_info { /* Bridge multicast database attributes * [MDBA_MDB] = { * [MDBA_MDB_ENTRY] = { - * [MDBA_MDB_ENTRY_INFO] + * [MDBA_MDB_ENTRY_INFO] { + * struct br_mdb_entry + * [MDBA_MDB_EATTR attributes] + * } * } * } * [MDBA_ROUTER] = { @@ -166,6 +169,14 @@ enum { }; #define MDBA_MDB_ENTRY_MAX (__MDBA_MDB_ENTRY_MAX - 1) +/* per mdb entry additional attributes */ +enum { + MDBA_MDB_EATTR_UNSPEC, + MDBA_MDB_EATTR_TIMER, + __MDBA_MDB_EATTR_MAX +}; +#define MDBA_MDB_EATTR_MAX (__MDBA_MDB_EATTR_MAX - 1) + enum { MDBA_ROUTER_UNSPEC, MDBA_ROUTER_PORT, @@ -183,6 +194,8 @@ struct br_mdb_entry { #define MDB_TEMPORARY 0 #define MDB_PERMANENT 1 __u8 state; +#define MDB_FLAGS_OFFLOAD (1 << 0) + __u8 flags; __u16 vid; struct { union { diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index a30b78090594..d452cea59020 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -35,6 +35,8 @@ struct rtnl_link_stats { /* for cslip etc */ __u32 rx_compressed; __u32 tx_compressed; + + __u32 rx_nohandler; /* dropped, no handler found */ }; /* The main device statistics structure */ @@ -68,6 +70,8 @@ struct rtnl_link_stats64 { /* for cslip etc */ __u64 rx_compressed; __u64 tx_compressed; + + __u64 rx_nohandler; /* dropped, no handler found */ }; /* The struct should be in sync with struct ifmap */ @@ -401,6 +405,14 @@ enum { #define IFLA_VRF_MAX (__IFLA_VRF_MAX - 1) +enum { + IFLA_VRF_PORT_UNSPEC, + IFLA_VRF_PORT_TABLE, + __IFLA_VRF_PORT_MAX +}; + +#define IFLA_VRF_PORT_MAX (__IFLA_VRF_PORT_MAX - 1) + /* IPVLAN section */ enum { IFLA_IPVLAN_UNSPEC, diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h index 08f894d2ddbd..f291569768dd 100644 --- a/include/uapi/linux/ip.h +++ b/include/uapi/linux/ip.h @@ -165,6 +165,8 @@ enum IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL, IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL, IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN, + IPV4_DEVCONF_DROP_UNICAST_IN_L2_MULTICAST, + IPV4_DEVCONF_DROP_GRATUITOUS_ARP, __IPV4_DEVCONF_MAX }; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 38b4fef20219..ec117b65d5a5 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -174,6 +174,8 @@ enum { DEVCONF_USE_OIF_ADDRS_ONLY, DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT, DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN, + DEVCONF_DROP_UNICAST_IN_L2_MULTICAST, + DEVCONF_DROP_UNSOLICITED_NA, DEVCONF_MAX }; diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index f095155d8749..0dba4e4ed2be 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -107,8 +107,10 @@ struct nlmsgerr { #define NETLINK_PKTINFO 3 #define NETLINK_BROADCAST_ERROR 4 #define NETLINK_NO_ENOBUFS 5 +#ifndef __KERNEL__ #define NETLINK_RX_RING 6 #define NETLINK_TX_RING 7 +#endif #define NETLINK_LISTEN_ALL_NSID 8 #define NETLINK_LIST_MEMBERSHIPS 9 #define NETLINK_CAP_ACK 10 @@ -134,6 +136,7 @@ struct nl_mmap_hdr { __u32 nm_gid; }; +#ifndef __KERNEL__ enum nl_mmap_status { NL_MMAP_STATUS_UNUSED, NL_MMAP_STATUS_RESERVED, @@ -145,6 +148,7 @@ enum nl_mmap_status { #define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO #define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT) #define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr)) +#endif #define NET_MAJOR 36 /* Major 36 is reserved for networking */ diff --git a/include/uapi/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h index f2159d30d1f5..d79399394b46 100644 --- a/include/uapi/linux/netlink_diag.h +++ b/include/uapi/linux/netlink_diag.h @@ -48,6 +48,8 @@ enum { #define NDIAG_SHOW_MEMINFO 0x00000001 /* show memory info of a socket */ #define NDIAG_SHOW_GROUPS 0x00000002 /* show groups of a netlink socket */ +#ifndef __KERNEL__ #define NDIAG_SHOW_RING_CFG 0x00000004 /* show ring configuration */ +#endif #endif diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 65a77b071e22..fe95446e9abf 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -196,6 +196,9 @@ struct tcp_info { __u64 tcpi_bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived */ __u32 tcpi_segs_out; /* RFC4898 tcpEStatsPerfSegsOut */ __u32 tcpi_segs_in; /* RFC4898 tcpEStatsPerfSegsIn */ + + __u32 tcpi_notsent_bytes; + __u32 tcpi_min_rtt; }; /* for TCP_MD5SIG socket option */ diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 13272582eee0..8a932d079c24 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -2,3 +2,6 @@ obj-y := core.o obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o +ifeq ($(CONFIG_PERF_EVENTS),y) +obj-$(CONFIG_BPF_SYSCALL) += stackmap.o +endif diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 89ebbc4d1164..bd3bdf2486a7 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -17,11 +17,39 @@ #include <linux/filter.h> #include <linux/perf_event.h> +static void bpf_array_free_percpu(struct bpf_array *array) +{ + int i; + + for (i = 0; i < array->map.max_entries; i++) + free_percpu(array->pptrs[i]); +} + +static int bpf_array_alloc_percpu(struct bpf_array *array) +{ + void __percpu *ptr; + int i; + + for (i = 0; i < array->map.max_entries; i++) { + ptr = __alloc_percpu_gfp(array->elem_size, 8, + GFP_USER | __GFP_NOWARN); + if (!ptr) { + bpf_array_free_percpu(array); + return -ENOMEM; + } + array->pptrs[i] = ptr; + } + + return 0; +} + /* Called from syscall */ static struct bpf_map *array_map_alloc(union bpf_attr *attr) { + bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; struct bpf_array *array; - u32 elem_size, array_size; + u64 array_size; + u32 elem_size; /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || @@ -36,12 +64,16 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) elem_size = round_up(attr->value_size, 8); - /* check round_up into zero and u32 overflow */ - if (elem_size == 0 || - attr->max_entries > (U32_MAX - PAGE_SIZE - sizeof(*array)) / elem_size) + array_size = sizeof(*array); + if (percpu) + array_size += (u64) attr->max_entries * sizeof(void *); + else + array_size += (u64) attr->max_entries * elem_size; + + /* make sure there is no u32 overflow later in round_up() */ + if (array_size >= U32_MAX - PAGE_SIZE) return ERR_PTR(-ENOMEM); - array_size = sizeof(*array) + attr->max_entries * elem_size; /* allocate all map elements and zero-initialize them */ array = kzalloc(array_size, GFP_USER | __GFP_NOWARN); @@ -52,12 +84,25 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) } /* copy mandatory map attributes */ + array->map.map_type = attr->map_type; array->map.key_size = attr->key_size; array->map.value_size = attr->value_size; array->map.max_entries = attr->max_entries; - array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT; array->elem_size = elem_size; + if (!percpu) + goto out; + + array_size += (u64) attr->max_entries * elem_size * num_possible_cpus(); + + if (array_size >= U32_MAX - PAGE_SIZE || + elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu(array)) { + kvfree(array); + return ERR_PTR(-ENOMEM); + } +out: + array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT; + return &array->map; } @@ -67,12 +112,50 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key) struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; - if (index >= array->map.max_entries) + if (unlikely(index >= array->map.max_entries)) return NULL; return array->value + array->elem_size * index; } +/* Called from eBPF program */ +static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + u32 index = *(u32 *)key; + + if (unlikely(index >= array->map.max_entries)) + return NULL; + + return this_cpu_ptr(array->pptrs[index]); +} + +int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + u32 index = *(u32 *)key; + void __percpu *pptr; + int cpu, off = 0; + u32 size; + + if (unlikely(index >= array->map.max_entries)) + return -ENOENT; + + /* per_cpu areas are zero-filled and bpf programs can only + * access 'value_size' of them, so copying rounded areas + * will not leak any kernel data + */ + size = round_up(map->value_size, 8); + rcu_read_lock(); + pptr = array->pptrs[index]; + for_each_possible_cpu(cpu) { + bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size); + off += size; + } + rcu_read_unlock(); + return 0; +} + /* Called from syscall */ static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) { @@ -99,19 +182,62 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; - if (map_flags > BPF_EXIST) + if (unlikely(map_flags > BPF_EXIST)) /* unknown flags */ return -EINVAL; - if (index >= array->map.max_entries) + if (unlikely(index >= array->map.max_entries)) /* all elements were pre-allocated, cannot insert a new one */ return -E2BIG; - if (map_flags == BPF_NOEXIST) + if (unlikely(map_flags == BPF_NOEXIST)) /* all elements already exist */ return -EEXIST; - memcpy(array->value + array->elem_size * index, value, map->value_size); + if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) + memcpy(this_cpu_ptr(array->pptrs[index]), + value, map->value_size); + else + memcpy(array->value + array->elem_size * index, + value, map->value_size); + return 0; +} + +int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, + u64 map_flags) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + u32 index = *(u32 *)key; + void __percpu *pptr; + int cpu, off = 0; + u32 size; + + if (unlikely(map_flags > BPF_EXIST)) + /* unknown flags */ + return -EINVAL; + + if (unlikely(index >= array->map.max_entries)) + /* all elements were pre-allocated, cannot insert a new one */ + return -E2BIG; + + if (unlikely(map_flags == BPF_NOEXIST)) + /* all elements already exist */ + return -EEXIST; + + /* the user space will provide round_up(value_size, 8) bytes that + * will be copied into per-cpu area. bpf programs can only access + * value_size of it. During lookup the same extra bytes will be + * returned or zeros which were zero-filled by percpu_alloc, + * so no kernel data leaks possible + */ + size = round_up(map->value_size, 8); + rcu_read_lock(); + pptr = array->pptrs[index]; + for_each_possible_cpu(cpu) { + bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size); + off += size; + } + rcu_read_unlock(); return 0; } @@ -133,6 +259,9 @@ static void array_map_free(struct bpf_map *map) */ synchronize_rcu(); + if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) + bpf_array_free_percpu(array); + kvfree(array); } @@ -150,9 +279,24 @@ static struct bpf_map_type_list array_type __read_mostly = { .type = BPF_MAP_TYPE_ARRAY, }; +static const struct bpf_map_ops percpu_array_ops = { + .map_alloc = array_map_alloc, + .map_free = array_map_free, + .map_get_next_key = array_map_get_next_key, + .map_lookup_elem = percpu_array_map_lookup_elem, + .map_update_elem = array_map_update_elem, + .map_delete_elem = array_map_delete_elem, +}; + +static struct bpf_map_type_list percpu_array_type __read_mostly = { + .ops = &percpu_array_ops, + .type = BPF_MAP_TYPE_PERCPU_ARRAY, +}; + static int __init register_array_map(void) { bpf_register_map_type(&array_type); + bpf_register_map_type(&percpu_array_type); return 0; } late_initcall(register_array_map); diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index c5b30fd8a315..a68e95133fcd 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -31,21 +31,27 @@ struct bpf_htab { struct htab_elem { struct hlist_node hash_node; struct rcu_head rcu; - u32 hash; + union { + u32 hash; + u32 key_size; + }; char key[0] __aligned(8); }; /* Called from syscall */ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) { + bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_HASH; struct bpf_htab *htab; int err, i; + u64 cost; htab = kzalloc(sizeof(*htab), GFP_USER); if (!htab) return ERR_PTR(-ENOMEM); /* mandatory map attributes */ + htab->map.map_type = attr->map_type; htab->map.key_size = attr->key_size; htab->map.value_size = attr->value_size; htab->map.max_entries = attr->max_entries; @@ -77,24 +83,34 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) */ goto free_htab; + if (percpu && round_up(htab->map.value_size, 8) > PCPU_MIN_UNIT_SIZE) + /* make sure the size for pcpu_alloc() is reasonable */ + goto free_htab; + htab->elem_size = sizeof(struct htab_elem) + - round_up(htab->map.key_size, 8) + - htab->map.value_size; + round_up(htab->map.key_size, 8); + if (percpu) + htab->elem_size += sizeof(void *); + else + htab->elem_size += htab->map.value_size; /* prevent zero size kmalloc and check for u32 overflow */ if (htab->n_buckets == 0 || htab->n_buckets > U32_MAX / sizeof(struct bucket)) goto free_htab; - if ((u64) htab->n_buckets * sizeof(struct bucket) + - (u64) htab->elem_size * htab->map.max_entries >= - U32_MAX - PAGE_SIZE) + cost = (u64) htab->n_buckets * sizeof(struct bucket) + + (u64) htab->elem_size * htab->map.max_entries; + + if (percpu) + cost += (u64) round_up(htab->map.value_size, 8) * + num_possible_cpus() * htab->map.max_entries; + + if (cost >= U32_MAX - PAGE_SIZE) /* make sure page count doesn't overflow */ goto free_htab; - htab->map.pages = round_up(htab->n_buckets * sizeof(struct bucket) + - htab->elem_size * htab->map.max_entries, - PAGE_SIZE) >> PAGE_SHIFT; + htab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; err = -ENOMEM; htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct bucket), @@ -148,7 +164,7 @@ static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash, } /* Called from syscall or from eBPF program */ -static void *htab_map_lookup_elem(struct bpf_map *map, void *key) +static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct hlist_head *head; @@ -166,6 +182,13 @@ static void *htab_map_lookup_elem(struct bpf_map *map, void *key) l = lookup_elem_raw(head, hash, key, key_size); + return l; +} + +static void *htab_map_lookup_elem(struct bpf_map *map, void *key) +{ + struct htab_elem *l = __htab_map_lookup_elem(map, key); + if (l) return l->key + round_up(map->key_size, 8); @@ -230,65 +253,149 @@ find_first_elem: return -ENOENT; } + +static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size, + void __percpu *pptr) +{ + *(void __percpu **)(l->key + key_size) = pptr; +} + +static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size) +{ + return *(void __percpu **)(l->key + key_size); +} + +static void htab_percpu_elem_free(struct htab_elem *l) +{ + free_percpu(htab_elem_get_ptr(l, l->key_size)); + kfree(l); +} + +static void htab_percpu_elem_free_rcu(struct rcu_head *head) +{ + struct htab_elem *l = container_of(head, struct htab_elem, rcu); + + htab_percpu_elem_free(l); +} + +static void free_htab_elem(struct htab_elem *l, bool percpu, u32 key_size) +{ + if (percpu) { + l->key_size = key_size; + call_rcu(&l->rcu, htab_percpu_elem_free_rcu); + } else { + kfree_rcu(l, rcu); + } +} + +static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, + void *value, u32 key_size, u32 hash, + bool percpu, bool onallcpus) +{ + u32 size = htab->map.value_size; + struct htab_elem *l_new; + void __percpu *pptr; + + l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN); + if (!l_new) + return NULL; + + memcpy(l_new->key, key, key_size); + if (percpu) { + /* round up value_size to 8 bytes */ + size = round_up(size, 8); + + /* alloc_percpu zero-fills */ + pptr = __alloc_percpu_gfp(size, 8, GFP_ATOMIC | __GFP_NOWARN); + if (!pptr) { + kfree(l_new); + return NULL; + } + + if (!onallcpus) { + /* copy true value_size bytes */ + memcpy(this_cpu_ptr(pptr), value, htab->map.value_size); + } else { + int off = 0, cpu; + + for_each_possible_cpu(cpu) { + bpf_long_memcpy(per_cpu_ptr(pptr, cpu), + value + off, size); + off += size; + } + } + htab_elem_set_ptr(l_new, key_size, pptr); + } else { + memcpy(l_new->key + round_up(key_size, 8), value, size); + } + + l_new->hash = hash; + return l_new; +} + +static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old, + u64 map_flags) +{ + if (!l_old && unlikely(atomic_read(&htab->count) >= htab->map.max_entries)) + /* if elem with this 'key' doesn't exist and we've reached + * max_entries limit, fail insertion of new elem + */ + return -E2BIG; + + if (l_old && map_flags == BPF_NOEXIST) + /* elem already exists */ + return -EEXIST; + + if (!l_old && map_flags == BPF_EXIST) + /* elem doesn't exist, cannot update it */ + return -ENOENT; + + return 0; +} + /* Called from syscall or from eBPF program */ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - struct htab_elem *l_new, *l_old; + struct htab_elem *l_new = NULL, *l_old; struct hlist_head *head; - struct bucket *b; unsigned long flags; - u32 key_size; + struct bucket *b; + u32 key_size, hash; int ret; - if (map_flags > BPF_EXIST) + if (unlikely(map_flags > BPF_EXIST)) /* unknown flags */ return -EINVAL; WARN_ON_ONCE(!rcu_read_lock_held()); - /* allocate new element outside of lock */ - l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN); - if (!l_new) - return -ENOMEM; - key_size = map->key_size; - memcpy(l_new->key, key, key_size); - memcpy(l_new->key + round_up(key_size, 8), value, map->value_size); + hash = htab_map_hash(key, key_size); - l_new->hash = htab_map_hash(l_new->key, key_size); - b = __select_bucket(htab, l_new->hash); + /* allocate new element outside of the lock, since + * we're most likley going to insert it + */ + l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false); + if (!l_new) + return -ENOMEM; + + b = __select_bucket(htab, hash); head = &b->head; /* bpf_map_update_elem() can be called in_irq() */ raw_spin_lock_irqsave(&b->lock, flags); - l_old = lookup_elem_raw(head, l_new->hash, key, key_size); + l_old = lookup_elem_raw(head, hash, key, key_size); - if (!l_old && unlikely(atomic_read(&htab->count) >= map->max_entries)) { - /* if elem with this 'key' doesn't exist and we've reached - * max_entries limit, fail insertion of new elem - */ - ret = -E2BIG; + ret = check_flags(htab, l_old, map_flags); + if (ret) goto err; - } - if (l_old && map_flags == BPF_NOEXIST) { - /* elem already exists */ - ret = -EEXIST; - goto err; - } - - if (!l_old && map_flags == BPF_EXIST) { - /* elem doesn't exist, cannot update it */ - ret = -ENOENT; - goto err; - } - - /* add new element to the head of the list, so that concurrent - * search will find it before old elem + /* add new element to the head of the list, so that + * concurrent search will find it before old elem */ hlist_add_head_rcu(&l_new->hash_node, head); if (l_old) { @@ -298,7 +405,6 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, atomic_inc(&htab->count); } raw_spin_unlock_irqrestore(&b->lock, flags); - return 0; err: raw_spin_unlock_irqrestore(&b->lock, flags); @@ -306,10 +412,84 @@ err: return ret; } +static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags, + bool onallcpus) +{ + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + struct htab_elem *l_new = NULL, *l_old; + struct hlist_head *head; + unsigned long flags; + struct bucket *b; + u32 key_size, hash; + int ret; + + if (unlikely(map_flags > BPF_EXIST)) + /* unknown flags */ + return -EINVAL; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + key_size = map->key_size; + + hash = htab_map_hash(key, key_size); + + b = __select_bucket(htab, hash); + head = &b->head; + + /* bpf_map_update_elem() can be called in_irq() */ + raw_spin_lock_irqsave(&b->lock, flags); + + l_old = lookup_elem_raw(head, hash, key, key_size); + + ret = check_flags(htab, l_old, map_flags); + if (ret) + goto err; + + if (l_old) { + void __percpu *pptr = htab_elem_get_ptr(l_old, key_size); + u32 size = htab->map.value_size; + + /* per-cpu hash map can update value in-place */ + if (!onallcpus) { + memcpy(this_cpu_ptr(pptr), value, size); + } else { + int off = 0, cpu; + + size = round_up(size, 8); + for_each_possible_cpu(cpu) { + bpf_long_memcpy(per_cpu_ptr(pptr, cpu), + value + off, size); + off += size; + } + } + } else { + l_new = alloc_htab_elem(htab, key, value, key_size, + hash, true, onallcpus); + if (!l_new) { + ret = -ENOMEM; + goto err; + } + hlist_add_head_rcu(&l_new->hash_node, head); + atomic_inc(&htab->count); + } + ret = 0; +err: + raw_spin_unlock_irqrestore(&b->lock, flags); + return ret; +} + +static int htab_percpu_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) +{ + return __htab_percpu_map_update_elem(map, key, value, map_flags, false); +} + /* Called from syscall or from eBPF program */ static int htab_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + bool percpu = map->map_type == BPF_MAP_TYPE_PERCPU_HASH; struct hlist_head *head; struct bucket *b; struct htab_elem *l; @@ -332,7 +512,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key) if (l) { hlist_del_rcu(&l->hash_node); atomic_dec(&htab->count); - kfree_rcu(l, rcu); + free_htab_elem(l, percpu, key_size); ret = 0; } @@ -352,7 +532,12 @@ static void delete_all_elements(struct bpf_htab *htab) hlist_for_each_entry_safe(l, n, head, hash_node) { hlist_del_rcu(&l->hash_node); atomic_dec(&htab->count); - kfree(l); + if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) { + l->key_size = htab->map.key_size; + htab_percpu_elem_free(l); + } else { + kfree(l); + } } } } @@ -391,9 +576,76 @@ static struct bpf_map_type_list htab_type __read_mostly = { .type = BPF_MAP_TYPE_HASH, }; +/* Called from eBPF program */ +static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key) +{ + struct htab_elem *l = __htab_map_lookup_elem(map, key); + + if (l) + return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size)); + else + return NULL; +} + +int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) +{ + struct htab_elem *l; + void __percpu *pptr; + int ret = -ENOENT; + int cpu, off = 0; + u32 size; + + /* per_cpu areas are zero-filled and bpf programs can only + * access 'value_size' of them, so copying rounded areas + * will not leak any kernel data + */ + size = round_up(map->value_size, 8); + rcu_read_lock(); + l = __htab_map_lookup_elem(map, key); + if (!l) + goto out; + pptr = htab_elem_get_ptr(l, map->key_size); + for_each_possible_cpu(cpu) { + bpf_long_memcpy(value + off, + per_cpu_ptr(pptr, cpu), size); + off += size; + } + ret = 0; +out: + rcu_read_unlock(); + return ret; +} + +int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, + u64 map_flags) +{ + int ret; + + rcu_read_lock(); + ret = __htab_percpu_map_update_elem(map, key, value, map_flags, true); + rcu_read_unlock(); + + return ret; +} + +static const struct bpf_map_ops htab_percpu_ops = { + .map_alloc = htab_map_alloc, + .map_free = htab_map_free, + .map_get_next_key = htab_map_get_next_key, + .map_lookup_elem = htab_percpu_map_lookup_elem, + .map_update_elem = htab_percpu_map_update_elem, + .map_delete_elem = htab_map_delete_elem, +}; + +static struct bpf_map_type_list htab_percpu_type __read_mostly = { + .ops = &htab_percpu_ops, + .type = BPF_MAP_TYPE_PERCPU_HASH, +}; + static int __init register_htab_map(void) { bpf_register_map_type(&htab_type); + bpf_register_map_type(&htab_percpu_type); return 0; } late_initcall(register_htab_map); diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c new file mode 100644 index 000000000000..8a60ee14a977 --- /dev/null +++ b/kernel/bpf/stackmap.c @@ -0,0 +1,237 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/bpf.h> +#include <linux/jhash.h> +#include <linux/filter.h> +#include <linux/vmalloc.h> +#include <linux/stacktrace.h> +#include <linux/perf_event.h> + +struct stack_map_bucket { + struct rcu_head rcu; + u32 hash; + u32 nr; + u64 ip[]; +}; + +struct bpf_stack_map { + struct bpf_map map; + u32 n_buckets; + struct stack_map_bucket __rcu *buckets[]; +}; + +/* Called from syscall */ +static struct bpf_map *stack_map_alloc(union bpf_attr *attr) +{ + u32 value_size = attr->value_size; + struct bpf_stack_map *smap; + u64 cost, n_buckets; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + + /* check sanity of attributes */ + if (attr->max_entries == 0 || attr->key_size != 4 || + value_size < 8 || value_size % 8 || + value_size / 8 > PERF_MAX_STACK_DEPTH) + return ERR_PTR(-EINVAL); + + /* hash table size must be power of 2 */ + n_buckets = roundup_pow_of_two(attr->max_entries); + + cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); + if (cost >= U32_MAX - PAGE_SIZE) + return ERR_PTR(-E2BIG); + + smap = kzalloc(cost, GFP_USER | __GFP_NOWARN); + if (!smap) { + smap = vzalloc(cost); + if (!smap) + return ERR_PTR(-ENOMEM); + } + + err = -E2BIG; + cost += n_buckets * (value_size + sizeof(struct stack_map_bucket)); + if (cost >= U32_MAX - PAGE_SIZE) + goto free_smap; + + smap->map.map_type = attr->map_type; + smap->map.key_size = attr->key_size; + smap->map.value_size = value_size; + smap->map.max_entries = attr->max_entries; + smap->n_buckets = n_buckets; + smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; + + err = get_callchain_buffers(); + if (err) + goto free_smap; + + return &smap->map; + +free_smap: + kvfree(smap); + return ERR_PTR(err); +} + +static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5) +{ + struct pt_regs *regs = (struct pt_regs *) (long) r1; + struct bpf_map *map = (struct bpf_map *) (long) r2; + struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); + struct perf_callchain_entry *trace; + struct stack_map_bucket *bucket, *new_bucket, *old_bucket; + u32 max_depth = map->value_size / 8; + /* stack_map_alloc() checks that max_depth <= PERF_MAX_STACK_DEPTH */ + u32 init_nr = PERF_MAX_STACK_DEPTH - max_depth; + u32 skip = flags & BPF_F_SKIP_FIELD_MASK; + u32 hash, id, trace_nr, trace_len; + bool user = flags & BPF_F_USER_STACK; + bool kernel = !user; + u64 *ips; + + if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | + BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) + return -EINVAL; + + trace = get_perf_callchain(regs, init_nr, kernel, user, false, false); + + if (unlikely(!trace)) + /* couldn't fetch the stack trace */ + return -EFAULT; + + /* get_perf_callchain() guarantees that trace->nr >= init_nr + * and trace-nr <= PERF_MAX_STACK_DEPTH, so trace_nr <= max_depth + */ + trace_nr = trace->nr - init_nr; + + if (trace_nr <= skip) + /* skipping more than usable stack trace */ + return -EFAULT; + + trace_nr -= skip; + trace_len = trace_nr * sizeof(u64); + ips = trace->ip + skip + init_nr; + hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0); + id = hash & (smap->n_buckets - 1); + bucket = rcu_dereference(smap->buckets[id]); + + if (bucket && bucket->hash == hash) { + if (flags & BPF_F_FAST_STACK_CMP) + return id; + if (bucket->nr == trace_nr && + memcmp(bucket->ip, ips, trace_len) == 0) + return id; + } + + /* this call stack is not in the map, try to add it */ + if (bucket && !(flags & BPF_F_REUSE_STACKID)) + return -EEXIST; + + new_bucket = kmalloc(sizeof(struct stack_map_bucket) + map->value_size, + GFP_ATOMIC | __GFP_NOWARN); + if (unlikely(!new_bucket)) + return -ENOMEM; + + memcpy(new_bucket->ip, ips, trace_len); + memset(new_bucket->ip + trace_len / 8, 0, map->value_size - trace_len); + new_bucket->hash = hash; + new_bucket->nr = trace_nr; + + old_bucket = xchg(&smap->buckets[id], new_bucket); + if (old_bucket) + kfree_rcu(old_bucket, rcu); + return id; +} + +const struct bpf_func_proto bpf_get_stackid_proto = { + .func = bpf_get_stackid, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, +}; + +/* Called from syscall or from eBPF program */ +static void *stack_map_lookup_elem(struct bpf_map *map, void *key) +{ + struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); + struct stack_map_bucket *bucket; + u32 id = *(u32 *)key; + + if (unlikely(id >= smap->n_buckets)) + return NULL; + bucket = rcu_dereference(smap->buckets[id]); + return bucket ? bucket->ip : NULL; +} + +static int stack_map_get_next_key(struct bpf_map *map, void *key, void *next_key) +{ + return -EINVAL; +} + +static int stack_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) +{ + return -EINVAL; +} + +/* Called from syscall or from eBPF program */ +static int stack_map_delete_elem(struct bpf_map *map, void *key) +{ + struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); + struct stack_map_bucket *old_bucket; + u32 id = *(u32 *)key; + + if (unlikely(id >= smap->n_buckets)) + return -E2BIG; + + old_bucket = xchg(&smap->buckets[id], NULL); + if (old_bucket) { + kfree_rcu(old_bucket, rcu); + return 0; + } else { + return -ENOENT; + } +} + +/* Called when map->refcnt goes to zero, either from workqueue or from syscall */ +static void stack_map_free(struct bpf_map *map) +{ + struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); + int i; + + synchronize_rcu(); + + for (i = 0; i < smap->n_buckets; i++) + if (smap->buckets[i]) + kfree_rcu(smap->buckets[i], rcu); + kvfree(smap); + put_callchain_buffers(); +} + +static const struct bpf_map_ops stack_map_ops = { + .map_alloc = stack_map_alloc, + .map_free = stack_map_free, + .map_get_next_key = stack_map_get_next_key, + .map_lookup_elem = stack_map_lookup_elem, + .map_update_elem = stack_map_update_elem, + .map_delete_elem = stack_map_delete_elem, +}; + +static struct bpf_map_type_list stack_map_type __read_mostly = { + .ops = &stack_map_ops, + .type = BPF_MAP_TYPE_STACK_TRACE, +}; + +static int __init register_stack_map(void) +{ + bpf_register_map_type(&stack_map_type); + return 0; +} +late_initcall(register_stack_map); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 637397059f76..c95a753c2007 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -239,6 +239,7 @@ static int map_lookup_elem(union bpf_attr *attr) int ufd = attr->map_fd; struct bpf_map *map; void *key, *value, *ptr; + u32 value_size; struct fd f; int err; @@ -259,23 +260,35 @@ static int map_lookup_elem(union bpf_attr *attr) if (copy_from_user(key, ukey, map->key_size) != 0) goto free_key; + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) + value_size = round_up(map->value_size, 8) * num_possible_cpus(); + else + value_size = map->value_size; + err = -ENOMEM; - value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN); + value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); if (!value) goto free_key; - rcu_read_lock(); - ptr = map->ops->map_lookup_elem(map, key); - if (ptr) - memcpy(value, ptr, map->value_size); - rcu_read_unlock(); + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) { + err = bpf_percpu_hash_copy(map, key, value); + } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { + err = bpf_percpu_array_copy(map, key, value); + } else { + rcu_read_lock(); + ptr = map->ops->map_lookup_elem(map, key); + if (ptr) + memcpy(value, ptr, value_size); + rcu_read_unlock(); + err = ptr ? 0 : -ENOENT; + } - err = -ENOENT; - if (!ptr) + if (err) goto free_value; err = -EFAULT; - if (copy_to_user(uvalue, value, map->value_size) != 0) + if (copy_to_user(uvalue, value, value_size) != 0) goto free_value; err = 0; @@ -298,6 +311,7 @@ static int map_update_elem(union bpf_attr *attr) int ufd = attr->map_fd; struct bpf_map *map; void *key, *value; + u32 value_size; struct fd f; int err; @@ -318,21 +332,30 @@ static int map_update_elem(union bpf_attr *attr) if (copy_from_user(key, ukey, map->key_size) != 0) goto free_key; + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) + value_size = round_up(map->value_size, 8) * num_possible_cpus(); + else + value_size = map->value_size; + err = -ENOMEM; - value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN); + value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); if (!value) goto free_key; err = -EFAULT; - if (copy_from_user(value, uvalue, map->value_size) != 0) + if (copy_from_user(value, uvalue, value_size) != 0) goto free_value; - /* eBPF program that use maps are running under rcu_read_lock(), - * therefore all map accessors rely on this fact, so do the same here - */ - rcu_read_lock(); - err = map->ops->map_update_elem(map, key, value, attr->flags); - rcu_read_unlock(); + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) { + err = bpf_percpu_hash_update(map, key, value, attr->flags); + } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { + err = bpf_percpu_array_update(map, key, value, attr->flags); + } else { + rcu_read_lock(); + err = map->ops->map_update_elem(map, key, value, attr->flags); + rcu_read_unlock(); + } free_value: kfree(value); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2e7f7ab739e4..2e08f8e9b771 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -246,6 +246,7 @@ static const struct { {BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call}, {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read}, {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_output}, + {BPF_MAP_TYPE_STACK_TRACE, BPF_FUNC_get_stackid}, }; static void print_verifier_state(struct verifier_env *env) @@ -778,15 +779,24 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn) * bytes from that pointer, make sure that it's within stack boundary * and all elements of stack are initialized */ -static int check_stack_boundary(struct verifier_env *env, - int regno, int access_size) +static int check_stack_boundary(struct verifier_env *env, int regno, + int access_size, bool zero_size_allowed) { struct verifier_state *state = &env->cur_state; struct reg_state *regs = state->regs; int off, i; - if (regs[regno].type != PTR_TO_STACK) + if (regs[regno].type != PTR_TO_STACK) { + if (zero_size_allowed && access_size == 0 && + regs[regno].type == CONST_IMM && + regs[regno].imm == 0) + return 0; + + verbose("R%d type=%s expected=%s\n", regno, + reg_type_str[regs[regno].type], + reg_type_str[PTR_TO_STACK]); return -EACCES; + } off = regs[regno].imm; if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || @@ -829,15 +839,24 @@ static int check_func_arg(struct verifier_env *env, u32 regno, return 0; } - if (arg_type == ARG_PTR_TO_STACK || arg_type == ARG_PTR_TO_MAP_KEY || + if (arg_type == ARG_PTR_TO_MAP_KEY || arg_type == ARG_PTR_TO_MAP_VALUE) { expected_type = PTR_TO_STACK; - } else if (arg_type == ARG_CONST_STACK_SIZE) { + } else if (arg_type == ARG_CONST_STACK_SIZE || + arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) { expected_type = CONST_IMM; } else if (arg_type == ARG_CONST_MAP_PTR) { expected_type = CONST_PTR_TO_MAP; } else if (arg_type == ARG_PTR_TO_CTX) { expected_type = PTR_TO_CTX; + } else if (arg_type == ARG_PTR_TO_STACK) { + expected_type = PTR_TO_STACK; + /* One exception here. In case function allows for NULL to be + * passed in as argument, it's a CONST_IMM type. Final test + * happens during stack boundary checking. + */ + if (reg->type == CONST_IMM && reg->imm == 0) + expected_type = CONST_IMM; } else { verbose("unsupported arg_type %d\n", arg_type); return -EFAULT; @@ -867,8 +886,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno, verbose("invalid map_ptr to access map->key\n"); return -EACCES; } - err = check_stack_boundary(env, regno, (*mapp)->key_size); - + err = check_stack_boundary(env, regno, (*mapp)->key_size, + false); } else if (arg_type == ARG_PTR_TO_MAP_VALUE) { /* bpf_map_xxx(..., map_ptr, ..., value) call: * check [value, value + map->value_size) validity @@ -878,9 +897,12 @@ static int check_func_arg(struct verifier_env *env, u32 regno, verbose("invalid map_ptr to access map->value\n"); return -EACCES; } - err = check_stack_boundary(env, regno, (*mapp)->value_size); + err = check_stack_boundary(env, regno, (*mapp)->value_size, + false); + } else if (arg_type == ARG_CONST_STACK_SIZE || + arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) { + bool zero_size_allowed = (arg_type == ARG_CONST_STACK_SIZE_OR_ZERO); - } else if (arg_type == ARG_CONST_STACK_SIZE) { /* bpf_xxx(..., buf, len) call will access 'len' bytes * from stack pointer 'buf'. Check it * note: regno == len, regno - 1 == buf @@ -890,7 +912,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno, verbose("ARG_CONST_STACK_SIZE cannot be first argument\n"); return -EACCES; } - err = check_stack_boundary(env, regno - 1, reg->imm); + err = check_stack_boundary(env, regno - 1, reg->imm, + zero_size_allowed); } return err; @@ -911,8 +934,11 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) * don't allow any other map type to be passed into * the special func; */ - if (bool_func && bool_map != bool_func) + if (bool_func && bool_map != bool_func) { + verbose("cannot pass map_type %d into func %d\n", + map->map_type, func_id); return -EINVAL; + } } return 0; diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 9c418002b8c1..343c22f5e867 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -159,15 +159,24 @@ put_callchain_entry(int rctx) struct perf_callchain_entry * perf_callchain(struct perf_event *event, struct pt_regs *regs) { - int rctx; - struct perf_callchain_entry *entry; - - int kernel = !event->attr.exclude_callchain_kernel; - int user = !event->attr.exclude_callchain_user; + bool kernel = !event->attr.exclude_callchain_kernel; + bool user = !event->attr.exclude_callchain_user; + /* Disallow cross-task user callchains. */ + bool crosstask = event->ctx->task && event->ctx->task != current; if (!kernel && !user) return NULL; + return get_perf_callchain(regs, 0, kernel, user, crosstask, true); +} + +struct perf_callchain_entry * +get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, + bool crosstask, bool add_mark) +{ + struct perf_callchain_entry *entry; + int rctx; + entry = get_callchain_entry(&rctx); if (rctx == -1) return NULL; @@ -175,10 +184,11 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs) if (!entry) goto exit_put; - entry->nr = 0; + entry->nr = init_nr; if (kernel && !user_mode(regs)) { - perf_callchain_store(entry, PERF_CONTEXT_KERNEL); + if (add_mark) + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); perf_callchain_kernel(entry, regs); } @@ -191,13 +201,11 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs) } if (regs) { - /* - * Disallow cross-task user callchains. - */ - if (event->ctx->task && event->ctx->task != current) + if (crosstask) goto exit_put; - perf_callchain_store(entry, PERF_CONTEXT_USER); + if (add_mark) + perf_callchain_store(entry, PERF_CONTEXT_USER); perf_callchain_user(entry, regs); } } diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 2bbad9c1274c..4199b6d193f5 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -182,8 +182,6 @@ DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user) /* Callchain handling */ extern struct perf_callchain_entry * perf_callchain(struct perf_event *event, struct pt_regs *regs); -extern int get_callchain_buffers(void); -extern void put_callchain_buffers(void); static inline int get_recursion_context(int *recursion) { diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 326a75e884db..4b8caa392b86 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -299,6 +299,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_perf_event_read_proto; case BPF_FUNC_perf_event_output: return &bpf_perf_event_output_proto; + case BPF_FUNC_get_stackid: + return &bpf_get_stackid_proto; default: return NULL; } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8bfd1aca7a3d..60d09e9e8e8b 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1753,6 +1753,14 @@ config TEST_KSTRTOX config TEST_PRINTF tristate "Test printf() family of functions at runtime" +config TEST_BITMAP + tristate "Test bitmap_*() family of functions at runtime" + default n + help + Enable this option to test the bitmap functions at boot. + + If unsure, say N. + config TEST_RHASHTABLE tristate "Perform selftest on resizable hash table" default n diff --git a/lib/Makefile b/lib/Makefile index a7c26a41a738..dda4039588b1 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -43,6 +43,7 @@ obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o obj-$(CONFIG_TEST_PRINTF) += test_printf.o +obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG diff --git a/lib/bitmap.c b/lib/bitmap.c index 814814397cce..c66da508cbf7 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -12,6 +12,8 @@ #include <linux/bitmap.h> #include <linux/bitops.h> #include <linux/bug.h> +#include <linux/kernel.h> +#include <linux/string.h> #include <asm/page.h> #include <asm/uaccess.h> @@ -1060,6 +1062,93 @@ int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order) EXPORT_SYMBOL(bitmap_allocate_region); /** + * bitmap_from_u32array - copy the contents of a u32 array of bits to bitmap + * @bitmap: array of unsigned longs, the destination bitmap, non NULL + * @nbits: number of bits in @bitmap + * @buf: array of u32 (in host byte order), the source bitmap, non NULL + * @nwords: number of u32 words in @buf + * + * copy min(nbits, 32*nwords) bits from @buf to @bitmap, remaining + * bits between nword and nbits in @bitmap (if any) are cleared. In + * last word of @bitmap, the bits beyond nbits (if any) are kept + * unchanged. + * + * Return the number of bits effectively copied. + */ +unsigned int +bitmap_from_u32array(unsigned long *bitmap, unsigned int nbits, + const u32 *buf, unsigned int nwords) +{ + unsigned int dst_idx, src_idx; + + for (src_idx = dst_idx = 0; dst_idx < BITS_TO_LONGS(nbits); ++dst_idx) { + unsigned long part = 0; + + if (src_idx < nwords) + part = buf[src_idx++]; + +#if BITS_PER_LONG == 64 + if (src_idx < nwords) + part |= ((unsigned long) buf[src_idx++]) << 32; +#endif + + if (dst_idx < nbits/BITS_PER_LONG) + bitmap[dst_idx] = part; + else { + unsigned long mask = BITMAP_LAST_WORD_MASK(nbits); + + bitmap[dst_idx] = (bitmap[dst_idx] & ~mask) + | (part & mask); + } + } + + return min_t(unsigned int, nbits, 32*nwords); +} +EXPORT_SYMBOL(bitmap_from_u32array); + +/** + * bitmap_to_u32array - copy the contents of bitmap to a u32 array of bits + * @buf: array of u32 (in host byte order), the dest bitmap, non NULL + * @nwords: number of u32 words in @buf + * @bitmap: array of unsigned longs, the source bitmap, non NULL + * @nbits: number of bits in @bitmap + * + * copy min(nbits, 32*nwords) bits from @bitmap to @buf. Remaining + * bits after nbits in @buf (if any) are cleared. + * + * Return the number of bits effectively copied. + */ +unsigned int +bitmap_to_u32array(u32 *buf, unsigned int nwords, + const unsigned long *bitmap, unsigned int nbits) +{ + unsigned int dst_idx = 0, src_idx = 0; + + while (dst_idx < nwords) { + unsigned long part = 0; + + if (src_idx < BITS_TO_LONGS(nbits)) { + part = bitmap[src_idx]; + if (src_idx >= nbits/BITS_PER_LONG) + part &= BITMAP_LAST_WORD_MASK(nbits); + src_idx++; + } + + buf[dst_idx++] = part & 0xffffffffUL; + +#if BITS_PER_LONG == 64 + if (dst_idx < nwords) { + part >>= 32; + buf[dst_idx++] = part & 0xffffffffUL; + } +#endif + } + + return min_t(unsigned int, nbits, 32*nwords); +} +EXPORT_SYMBOL(bitmap_to_u32array); + +/** * bitmap_copy_le - copy a bitmap, putting the bits into little-endian order. * @dst: destination buffer * @src: bitmap to copy diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c new file mode 100644 index 000000000000..e2cbd43d193c --- /dev/null +++ b/lib/test_bitmap.c @@ -0,0 +1,358 @@ +/* + * Test cases for printf facility. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/bitmap.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/printk.h> +#include <linux/slab.h> +#include <linux/string.h> + +static unsigned total_tests __initdata; +static unsigned failed_tests __initdata; + +static char pbl_buffer[PAGE_SIZE] __initdata; + + +static bool __init +__check_eq_uint(const char *srcfile, unsigned int line, + const unsigned int exp_uint, unsigned int x) +{ + if (exp_uint != x) { + pr_warn("[%s:%u] expected %u, got %u\n", + srcfile, line, exp_uint, x); + return false; + } + return true; +} + + +static bool __init +__check_eq_bitmap(const char *srcfile, unsigned int line, + const unsigned long *exp_bmap, unsigned int exp_nbits, + const unsigned long *bmap, unsigned int nbits) +{ + if (exp_nbits != nbits) { + pr_warn("[%s:%u] bitmap length mismatch: expected %u, got %u\n", + srcfile, line, exp_nbits, nbits); + return false; + } + + if (!bitmap_equal(exp_bmap, bmap, nbits)) { + pr_warn("[%s:%u] bitmaps contents differ: expected \"%*pbl\", got \"%*pbl\"\n", + srcfile, line, + exp_nbits, exp_bmap, nbits, bmap); + return false; + } + return true; +} + +static bool __init +__check_eq_pbl(const char *srcfile, unsigned int line, + const char *expected_pbl, + const unsigned long *bitmap, unsigned int nbits) +{ + snprintf(pbl_buffer, sizeof(pbl_buffer), "%*pbl", nbits, bitmap); + if (strcmp(expected_pbl, pbl_buffer)) { + pr_warn("[%s:%u] expected \"%s\", got \"%s\"\n", + srcfile, line, + expected_pbl, pbl_buffer); + return false; + } + return true; +} + +static bool __init +__check_eq_u32_array(const char *srcfile, unsigned int line, + const u32 *exp_arr, unsigned int exp_len, + const u32 *arr, unsigned int len) +{ + if (exp_len != len) { + pr_warn("[%s:%u] array length differ: expected %u, got %u\n", + srcfile, line, + exp_len, len); + return false; + } + + if (memcmp(exp_arr, arr, len*sizeof(*arr))) { + pr_warn("[%s:%u] array contents differ\n", srcfile, line); + print_hex_dump(KERN_WARNING, " exp: ", DUMP_PREFIX_OFFSET, + 32, 4, exp_arr, exp_len*sizeof(*exp_arr), false); + print_hex_dump(KERN_WARNING, " got: ", DUMP_PREFIX_OFFSET, + 32, 4, arr, len*sizeof(*arr), false); + return false; + } + + return true; +} + +#define __expect_eq(suffix, ...) \ + ({ \ + int result = 0; \ + total_tests++; \ + if (!__check_eq_ ## suffix(__FILE__, __LINE__, \ + ##__VA_ARGS__)) { \ + failed_tests++; \ + result = 1; \ + } \ + result; \ + }) + +#define expect_eq_uint(...) __expect_eq(uint, ##__VA_ARGS__) +#define expect_eq_bitmap(...) __expect_eq(bitmap, ##__VA_ARGS__) +#define expect_eq_pbl(...) __expect_eq(pbl, ##__VA_ARGS__) +#define expect_eq_u32_array(...) __expect_eq(u32_array, ##__VA_ARGS__) + +static void __init test_zero_fill_copy(void) +{ + DECLARE_BITMAP(bmap1, 1024); + DECLARE_BITMAP(bmap2, 1024); + + bitmap_zero(bmap1, 1024); + bitmap_zero(bmap2, 1024); + + /* single-word bitmaps */ + expect_eq_pbl("", bmap1, 23); + + bitmap_fill(bmap1, 19); + expect_eq_pbl("0-18", bmap1, 1024); + + bitmap_copy(bmap2, bmap1, 23); + expect_eq_pbl("0-18", bmap2, 1024); + + bitmap_fill(bmap2, 23); + expect_eq_pbl("0-22", bmap2, 1024); + + bitmap_copy(bmap2, bmap1, 23); + expect_eq_pbl("0-18", bmap2, 1024); + + bitmap_zero(bmap1, 23); + expect_eq_pbl("", bmap1, 1024); + + /* multi-word bitmaps */ + bitmap_zero(bmap1, 1024); + expect_eq_pbl("", bmap1, 1024); + + bitmap_fill(bmap1, 109); + expect_eq_pbl("0-108", bmap1, 1024); + + bitmap_copy(bmap2, bmap1, 1024); + expect_eq_pbl("0-108", bmap2, 1024); + + bitmap_fill(bmap2, 1024); + expect_eq_pbl("0-1023", bmap2, 1024); + + bitmap_copy(bmap2, bmap1, 1024); + expect_eq_pbl("0-108", bmap2, 1024); + + /* the following tests assume a 32- or 64-bit arch (even 128b + * if we care) + */ + + bitmap_fill(bmap2, 1024); + bitmap_copy(bmap2, bmap1, 109); /* ... but 0-padded til word length */ + expect_eq_pbl("0-108,128-1023", bmap2, 1024); + + bitmap_fill(bmap2, 1024); + bitmap_copy(bmap2, bmap1, 97); /* ... but aligned on word length */ + expect_eq_pbl("0-108,128-1023", bmap2, 1024); + + bitmap_zero(bmap2, 97); /* ... but 0-padded til word length */ + expect_eq_pbl("128-1023", bmap2, 1024); +} + +static void __init test_bitmap_u32_array_conversions(void) +{ + DECLARE_BITMAP(bmap1, 1024); + DECLARE_BITMAP(bmap2, 1024); + u32 exp_arr[32], arr[32]; + unsigned nbits; + + for (nbits = 0 ; nbits < 257 ; ++nbits) { + const unsigned int used_u32s = DIV_ROUND_UP(nbits, 32); + unsigned int i, rv; + + bitmap_zero(bmap1, nbits); + bitmap_set(bmap1, nbits, 1024 - nbits); /* garbage */ + + memset(arr, 0xff, sizeof(arr)); + rv = bitmap_to_u32array(arr, used_u32s, bmap1, nbits); + expect_eq_uint(nbits, rv); + + memset(exp_arr, 0xff, sizeof(exp_arr)); + memset(exp_arr, 0, used_u32s*sizeof(*exp_arr)); + expect_eq_u32_array(exp_arr, 32, arr, 32); + + bitmap_fill(bmap2, 1024); + rv = bitmap_from_u32array(bmap2, nbits, arr, used_u32s); + expect_eq_uint(nbits, rv); + expect_eq_bitmap(bmap1, 1024, bmap2, 1024); + + for (i = 0 ; i < nbits ; ++i) { + /* + * test conversion bitmap -> u32[] + */ + + bitmap_zero(bmap1, 1024); + __set_bit(i, bmap1); + bitmap_set(bmap1, nbits, 1024 - nbits); /* garbage */ + + memset(arr, 0xff, sizeof(arr)); + rv = bitmap_to_u32array(arr, used_u32s, bmap1, nbits); + expect_eq_uint(nbits, rv); + + /* 1st used u32 words contain expected bit set, the + * remaining words are left unchanged (0xff) + */ + memset(exp_arr, 0xff, sizeof(exp_arr)); + memset(exp_arr, 0, used_u32s*sizeof(*exp_arr)); + exp_arr[i/32] = (1U<<(i%32)); + expect_eq_u32_array(exp_arr, 32, arr, 32); + + + /* same, with longer array to fill + */ + memset(arr, 0xff, sizeof(arr)); + rv = bitmap_to_u32array(arr, 32, bmap1, nbits); + expect_eq_uint(nbits, rv); + + /* 1st used u32 words contain expected bit set, the + * remaining words are all 0s + */ + memset(exp_arr, 0, sizeof(exp_arr)); + exp_arr[i/32] = (1U<<(i%32)); + expect_eq_u32_array(exp_arr, 32, arr, 32); + + /* + * test conversion u32[] -> bitmap + */ + + /* the 1st nbits of bmap2 are identical to + * bmap1, the remaining bits of bmap2 are left + * unchanged (all 1s) + */ + bitmap_fill(bmap2, 1024); + rv = bitmap_from_u32array(bmap2, nbits, + exp_arr, used_u32s); + expect_eq_uint(nbits, rv); + + expect_eq_bitmap(bmap1, 1024, bmap2, 1024); + + /* same, with more bits to fill + */ + memset(arr, 0xff, sizeof(arr)); /* garbage */ + memset(arr, 0, used_u32s*sizeof(u32)); + arr[i/32] = (1U<<(i%32)); + + bitmap_fill(bmap2, 1024); + rv = bitmap_from_u32array(bmap2, 1024, arr, used_u32s); + expect_eq_uint(used_u32s*32, rv); + + /* the 1st nbits of bmap2 are identical to + * bmap1, the remaining bits of bmap2 are cleared + */ + bitmap_zero(bmap1, 1024); + __set_bit(i, bmap1); + expect_eq_bitmap(bmap1, 1024, bmap2, 1024); + + + /* + * test short conversion bitmap -> u32[] (1 + * word too short) + */ + if (used_u32s > 1) { + bitmap_zero(bmap1, 1024); + __set_bit(i, bmap1); + bitmap_set(bmap1, nbits, + 1024 - nbits); /* garbage */ + memset(arr, 0xff, sizeof(arr)); + + rv = bitmap_to_u32array(arr, used_u32s - 1, + bmap1, nbits); + expect_eq_uint((used_u32s - 1)*32, rv); + + /* 1st used u32 words contain expected + * bit set, the remaining words are + * left unchanged (0xff) + */ + memset(exp_arr, 0xff, sizeof(exp_arr)); + memset(exp_arr, 0, + (used_u32s-1)*sizeof(*exp_arr)); + if ((i/32) < (used_u32s - 1)) + exp_arr[i/32] = (1U<<(i%32)); + expect_eq_u32_array(exp_arr, 32, arr, 32); + } + + /* + * test short conversion u32[] -> bitmap (3 + * bits too short) + */ + if (nbits > 3) { + memset(arr, 0xff, sizeof(arr)); /* garbage */ + memset(arr, 0, used_u32s*sizeof(*arr)); + arr[i/32] = (1U<<(i%32)); + + bitmap_zero(bmap1, 1024); + rv = bitmap_from_u32array(bmap1, nbits - 3, + arr, used_u32s); + expect_eq_uint(nbits - 3, rv); + + /* we are expecting the bit < nbits - + * 3 (none otherwise), and the rest of + * bmap1 unchanged (0-filled) + */ + bitmap_zero(bmap2, 1024); + if (i < nbits - 3) + __set_bit(i, bmap2); + expect_eq_bitmap(bmap2, 1024, bmap1, 1024); + + /* do the same with bmap1 initially + * 1-filled + */ + + bitmap_fill(bmap1, 1024); + rv = bitmap_from_u32array(bmap1, nbits - 3, + arr, used_u32s); + expect_eq_uint(nbits - 3, rv); + + /* we are expecting the bit < nbits - + * 3 (none otherwise), and the rest of + * bmap1 unchanged (1-filled) + */ + bitmap_zero(bmap2, 1024); + if (i < nbits - 3) + __set_bit(i, bmap2); + bitmap_set(bmap2, nbits-3, 1024 - nbits + 3); + expect_eq_bitmap(bmap2, 1024, bmap1, 1024); + } + } + } +} + +static int __init test_bitmap_init(void) +{ + test_zero_fill_copy(); + test_bitmap_u32_array_conversions(); + + if (failed_tests == 0) + pr_info("all %u tests passed\n", total_tests); + else + pr_warn("failed %u out of %u tests\n", + failed_tests, total_tests); + + return failed_tests ? -EINVAL : 0; +} + +static void __exit test_bitmap_cleanup(void) +{ +} + +module_init(test_bitmap_init); +module_exit(test_bitmap_cleanup); + +MODULE_AUTHOR("david decotigny <david.decotigny@googlers.com>"); +MODULE_LICENSE("GPL"); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index d2cd9de4b724..69929c05c843 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -261,7 +261,6 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) * hope the underlying device can handle it. */ new_dev->mtu = real_dev->mtu; - new_dev->priv_flags |= (real_dev->priv_flags & IFF_UNICAST_FLT); vlan = vlan_dev_priv(new_dev); vlan->vlan_proto = htons(ETH_P_8021Q); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index ad5e2fd1012c..055f0e989e90 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -799,6 +799,7 @@ void vlan_setup(struct net_device *dev) ether_setup(dev); dev->priv_flags |= IFF_802_1Q_VLAN | IFF_NO_QUEUE; + dev->priv_flags |= IFF_UNICAST_FLT; dev->priv_flags &= ~IFF_TX_SKB_SHARING; netif_keep_dst(dev); diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index ae63cf72a953..5f1446c9f098 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -184,12 +184,11 @@ int vlan_proc_add_dev(struct net_device *vlandev) /* * Delete directory entry for VLAN device. */ -int vlan_proc_rem_dev(struct net_device *vlandev) +void vlan_proc_rem_dev(struct net_device *vlandev) { /** NOTE: This will consume the memory pointed to by dent, it seems. */ proc_remove(vlan_dev_priv(vlandev)->dent); vlan_dev_priv(vlandev)->dent = NULL; - return 0; } /****** Proc filesystem entry points ****************************************/ diff --git a/net/8021q/vlanproc.h b/net/8021q/vlanproc.h index 063f60a3d5cc..8838a2e92eb6 100644 --- a/net/8021q/vlanproc.h +++ b/net/8021q/vlanproc.h @@ -5,7 +5,7 @@ struct net; int vlan_proc_init(struct net *net); -int vlan_proc_rem_dev(struct net_device *vlandev); +void vlan_proc_rem_dev(struct net_device *vlandev); int vlan_proc_add_dev(struct net_device *vlandev); void vlan_proc_cleanup(struct net *net); @@ -14,7 +14,7 @@ void vlan_proc_cleanup(struct net *net); #define vlan_proc_init(net) (0) #define vlan_proc_cleanup(net) do {} while (0) #define vlan_proc_add_dev(dev) ({(void)(dev), 0; }) -#define vlan_proc_rem_dev(dev) ({(void)(dev), 0; }) +#define vlan_proc_rem_dev(dev) do {} while (0) #endif #endif /* !(__BEN_VLAN_PROC_INC__) */ diff --git a/net/Kconfig b/net/Kconfig index 174354618f8a..b80efecfc1a0 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -392,6 +392,10 @@ config LWTUNNEL weight tunnel endpoint. Tunnel encapsulation parameters are stored with light weight tunnel state associated with fib routes. +config DST_CACHE + bool "dst cache" + default n + endif # if NET # Used by archs to tell that they support BPF_JIT diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index c6fc8f756c9a..2dd40e5ea030 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -12,7 +12,7 @@ config BATMAN_ADV B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is a routing protocol for multi-hop ad-hoc mesh networks. The networks may be wired or wireless. See - http://www.open-mesh.org/ for more information and user space + https://www.open-mesh.org/ for more information and user space tools. config BATMAN_ADV_BLA diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index 21434ab79d2c..207e2af316c7 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -1,5 +1,5 @@ # -# Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: +# Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: # # Marek Lindner, Simon Wunderlich # diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h index 4e59cf3eb079..a7485d676088 100644 --- a/net/batman-adv/bat_algo.h +++ b/net/batman-adv/bat_algo.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2016 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index df625de55ef2..bf0e7d6f12bb 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -31,6 +31,7 @@ #include <linux/init.h> #include <linux/jiffies.h> #include <linux/list.h> +#include <linux/kref.h> #include <linux/netdevice.h> #include <linux/pkt_sched.h> #include <linux/printk.h> @@ -88,7 +89,7 @@ static void batadv_ring_buffer_set(u8 lq_recv[], u8 *lq_index, u8 value) * in the given ring buffer * @lq_recv: pointer to the ring buffer * - * Returns computed average value. + * Return: computed average value. */ static u8 batadv_ring_buffer_avg(const u8 lq_recv[]) { @@ -132,7 +133,7 @@ static void batadv_iv_ogm_orig_free(struct batadv_orig_node *orig_node) * @orig_node: the orig_node that has to be changed * @max_if_num: the current amount of interfaces * - * Returns 0 on success, a negative error code otherwise. + * Return: 0 on success, a negative error code otherwise. */ static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node, int max_if_num) @@ -180,7 +181,7 @@ unlock: * @max_if_num: the current amount of interfaces * @del_if_num: the index of the interface being removed * - * Returns 0 on success, a negative error code otherwise. + * Return: 0 on success, a negative error code otherwise. */ static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node, int max_if_num, int del_if_num) @@ -246,7 +247,7 @@ unlock: * @bat_priv: the bat priv with all the soft interface information * @addr: mac address of the originator * - * Returns the originator object corresponding to the passed mac address or NULL + * Return: the originator object corresponding to the passed mac address or NULL * on failure. * If the object does not exists it is created an initialised. */ @@ -396,7 +397,14 @@ static u8 batadv_hop_penalty(u8 tq, const struct batadv_priv *bat_priv) return new_tq; } -/* is there another aggregated packet here? */ +/** + * batadv_iv_ogm_aggr_packet - checks if there is another OGM attached + * @buff_pos: current position in the skb + * @packet_len: total length of the skb + * @tvlv_len: tvlv length of the previously considered OGM + * + * Return: true if there is enough space for another OGM, false otherwise. + */ static bool batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len, __be16 tvlv_len) { @@ -522,7 +530,7 @@ out: * @if_outgoing: interface for which the retransmission should be considered * @forw_packet: the forwarded packet which should be checked * - * Returns true if new_packet can be aggregated with forw_packet + * Return: true if new_packet can be aggregated with forw_packet */ static bool batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet, @@ -636,10 +644,10 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, unsigned char *skb_buff; unsigned int skb_size; - if (!atomic_inc_not_zero(&if_incoming->refcount)) + if (!kref_get_unless_zero(&if_incoming->refcount)) return; - if (!atomic_inc_not_zero(&if_outgoing->refcount)) + if (!kref_get_unless_zero(&if_outgoing->refcount)) goto out_free_incoming; /* own packet should always be scheduled */ @@ -995,7 +1003,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, neigh_addr = tmp_neigh_node->addr; if (batadv_compare_eth(neigh_addr, ethhdr->h_source) && tmp_neigh_node->if_incoming == if_incoming && - atomic_inc_not_zero(&tmp_neigh_node->refcount)) { + kref_get_unless_zero(&tmp_neigh_node->refcount)) { if (WARN(neigh_node, "too many matching neigh_nodes")) batadv_neigh_node_free_ref(neigh_node); neigh_node = tmp_neigh_node; @@ -1125,7 +1133,7 @@ out: * @if_incoming: interface where the packet was received * @if_outgoing: interface for which the retransmission should be considered * - * Returns 1 if the link can be considered bidirectional, 0 otherwise + * Return: 1 if the link can be considered bidirectional, 0 otherwise */ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, struct batadv_orig_node *orig_neigh_node, @@ -1154,7 +1162,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, if (tmp_neigh_node->if_incoming != if_incoming) continue; - if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) + if (!kref_get_unless_zero(&tmp_neigh_node->refcount)) continue; neigh_node = tmp_neigh_node; @@ -1269,7 +1277,7 @@ out: * @if_incoming: interface on which the OGM packet was received * @if_outgoing: interface for which the retransmission should be considered * - * Returns duplicate status as enum batadv_dup_status + * Return: duplicate status as enum batadv_dup_status */ static enum batadv_dup_status batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, @@ -1308,7 +1316,8 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, /* signalize caller that the packet is to be dropped. */ if (!hlist_empty(&orig_node->neigh_list) && batadv_window_protected(bat_priv, seq_diff, - &orig_ifinfo->batman_seqno_reset)) { + BATADV_TQ_LOCAL_WINDOW_SIZE, + &orig_ifinfo->batman_seqno_reset, NULL)) { ret = BATADV_PROTECTED; goto out; } @@ -1929,7 +1938,7 @@ static void batadv_iv_neigh_print(struct batadv_priv *bat_priv, * @neigh2: the second neighbor object of the comparison * @if_outgoing2: outgoing interface for the second neighbor * - * Returns a value less, equal to or greater than 0 if the metric via neigh1 is + * Return: a value less, equal to or greater than 0 if the metric via neigh1 is * lower, the same as or higher than the metric via neigh2 */ static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1, @@ -1970,7 +1979,7 @@ out: * @neigh2: the second neighbor object of the comparison * @if_outgoing2: outgoing interface for the second neighbor * - * Returns true if the metric via neigh1 is equally good or better than + * Return: true if the metric via neigh1 is equally good or better than * the metric via neigh2, false otherwise. */ static bool diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index 25cbc36e997a..b56bb000a0ab 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2006-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2016 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * @@ -29,10 +29,16 @@ static void batadv_bitmap_shift_left(unsigned long *seq_bits, s32 n) bitmap_shift_left(seq_bits, seq_bits, n, BATADV_TQ_LOCAL_WINDOW_SIZE); } -/* receive and process one packet within the sequence number window. +/** + * batadv_bit_get_packet - receive and process one packet within the sequence + * number window + * @priv: the bat priv with all the soft interface information + * @seq_bits: pointer to the sequence number receive packet + * @seq_num_diff: difference between the current/received sequence number and + * the last sequence number + * @set_mark: whether this packet should be marked in seq_bits * - * returns: - * 1 if the window was moved (either new or very old) + * Return: 1 if the window was moved (either new or very old), * 0 if the window was not moved/shifted. */ int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff, diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h index 0226b220fe5b..3e41bb80eb81 100644 --- a/net/batman-adv/bitarray.h +++ b/net/batman-adv/bitarray.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2006-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2016 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * @@ -24,7 +24,14 @@ #include <linux/compiler.h> #include <linux/types.h> -/* Returns 1 if the corresponding bit in the given seq_bits indicates true +/** + * batadv_test_bit - check if bit is set in the current window + * + * @seq_bits: pointer to the sequence number receive packet + * @last_seqno: latest sequence number in seq_bits + * @curr_seqno: sequence number to test for + * + * Return: 1 if the corresponding bit in the given seq_bits indicates true * and curr_seqno is within range of last_seqno. Otherwise returns 0. */ static inline int batadv_test_bit(const unsigned long *seq_bits, @@ -48,9 +55,6 @@ static inline void batadv_set_bit(unsigned long *seq_bits, s32 n) set_bit(n, seq_bits); /* turn the position on */ } -/* receive and process one packet, returns 1 if received seq_num is considered - * new, 0 if old - */ int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff, int set_mark); diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index c24c481b666f..7781f39c174d 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2016 B.A.T.M.A.N. contributors: * * Simon Wunderlich * @@ -31,6 +31,7 @@ #include <linux/jhash.h> #include <linux/jiffies.h> #include <linux/kernel.h> +#include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> @@ -58,7 +59,13 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv, struct batadv_bla_backbone_gw *backbone_gw); -/* return the index of the claim */ +/** + * batadv_choose_claim - choose the right bucket for a claim. + * @data: data to hash + * @size: size of the hash table + * + * Return: the hash index of the claim + */ static inline u32 batadv_choose_claim(const void *data, u32 size) { struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data; @@ -70,7 +77,13 @@ static inline u32 batadv_choose_claim(const void *data, u32 size) return hash % size; } -/* return the index of the backbone gateway */ +/** + * batadv_choose_backbone_gw - choose the right bucket for a backbone gateway. + * @data: data to hash + * @size: size of the hash table + * + * Return: the hash index of the backbone gateway + */ static inline u32 batadv_choose_backbone_gw(const void *data, u32 size) { const struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data; @@ -82,7 +95,13 @@ static inline u32 batadv_choose_backbone_gw(const void *data, u32 size) return hash % size; } -/* compares address and vid of two backbone gws */ +/** + * batadv_compare_backbone_gw - compare address and vid of two backbone gws + * @node: list node of the first entry to compare + * @data2: pointer to the second backbone gateway + * + * Return: 1 if the backbones have the same data, 0 otherwise + */ static int batadv_compare_backbone_gw(const struct hlist_node *node, const void *data2) { @@ -100,7 +119,13 @@ static int batadv_compare_backbone_gw(const struct hlist_node *node, return 1; } -/* compares address and vid of two claims */ +/** + * batadv_compare_backbone_gw - compare address and vid of two claims + * @node: list node of the first entry to compare + * @data2: pointer to the second claims + * + * Return: 1 if the claim have the same data, 0 otherwise + */ static int batadv_compare_claim(const struct hlist_node *node, const void *data2) { @@ -118,35 +143,63 @@ static int batadv_compare_claim(const struct hlist_node *node, return 1; } -/* free a backbone gw */ +/** + * batadv_backbone_gw_release - release backbone gw from lists and queue for + * free after rcu grace period + * @ref: kref pointer of the backbone gw + */ +static void batadv_backbone_gw_release(struct kref *ref) +{ + struct batadv_bla_backbone_gw *backbone_gw; + + backbone_gw = container_of(ref, struct batadv_bla_backbone_gw, + refcount); + + kfree_rcu(backbone_gw, rcu); +} + +/** + * batadv_backbone_gw_free_ref - decrement the backbone gw refcounter and + * possibly release it + * @backbone_gw: backbone gateway to be free'd + */ static void batadv_backbone_gw_free_ref(struct batadv_bla_backbone_gw *backbone_gw) { - if (atomic_dec_and_test(&backbone_gw->refcount)) - kfree_rcu(backbone_gw, rcu); + kref_put(&backbone_gw->refcount, batadv_backbone_gw_release); } -/* finally deinitialize the claim */ -static void batadv_claim_release(struct batadv_bla_claim *claim) +/** + * batadv_claim_release - release claim from lists and queue for free after rcu + * grace period + * @ref: kref pointer of the claim + */ +static void batadv_claim_release(struct kref *ref) { + struct batadv_bla_claim *claim; + + claim = container_of(ref, struct batadv_bla_claim, refcount); + batadv_backbone_gw_free_ref(claim->backbone_gw); kfree_rcu(claim, rcu); } -/* free a claim, call claim_free_rcu if its the last reference */ +/** + * batadv_claim_free_ref - decrement the claim refcounter and possibly + * release it + * @claim: claim to be free'd + */ static void batadv_claim_free_ref(struct batadv_bla_claim *claim) { - if (atomic_dec_and_test(&claim->refcount)) - batadv_claim_release(claim); + kref_put(&claim->refcount, batadv_claim_release); } /** - * batadv_claim_hash_find + * batadv_claim_hash_find - looks for a claim in the claim hash * @bat_priv: the bat priv with all the soft interface information * @data: search data (may be local/static data) * - * looks for a claim in the hash, and returns it if found - * or NULL otherwise. + * Return: claim if found or NULL otherwise. */ static struct batadv_bla_claim *batadv_claim_hash_find(struct batadv_priv *bat_priv, @@ -169,7 +222,7 @@ static struct batadv_bla_claim if (!batadv_compare_claim(&claim->hash_entry, data)) continue; - if (!atomic_inc_not_zero(&claim->refcount)) + if (!kref_get_unless_zero(&claim->refcount)) continue; claim_tmp = claim; @@ -181,12 +234,12 @@ static struct batadv_bla_claim } /** - * batadv_backbone_hash_find - looks for a claim in the hash + * batadv_backbone_hash_find - looks for a backbone gateway in the hash * @bat_priv: the bat priv with all the soft interface information * @addr: the address of the originator * @vid: the VLAN ID * - * Returns claim if found or NULL otherwise. + * Return: backbone gateway if found or NULL otherwise */ static struct batadv_bla_backbone_gw * batadv_backbone_hash_find(struct batadv_priv *bat_priv, u8 *addr, @@ -213,7 +266,7 @@ batadv_backbone_hash_find(struct batadv_priv *bat_priv, u8 *addr, &search_entry)) continue; - if (!atomic_inc_not_zero(&backbone_gw->refcount)) + if (!kref_get_unless_zero(&backbone_gw->refcount)) continue; backbone_gw_tmp = backbone_gw; @@ -224,7 +277,10 @@ batadv_backbone_hash_find(struct batadv_priv *bat_priv, u8 *addr, return backbone_gw_tmp; } -/* delete all claims for a backbone */ +/** + * batadv_bla_del_backbone_claims - delete all claims for a backbone + * @backbone_gw: backbone gateway where the claims should be removed + */ static void batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) { @@ -372,14 +428,13 @@ out: } /** - * batadv_bla_get_backbone_gw + * batadv_bla_get_backbone_gw - finds or creates a backbone gateway * @bat_priv: the bat priv with all the soft interface information * @orig: the mac address of the originator * @vid: the VLAN ID * @own_backbone: set if the requested backbone is local * - * searches for the backbone gw or creates a new one if it could not - * be found. + * Return: the (possibly created) backbone gateway or NULL on error */ static struct batadv_bla_backbone_gw * batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig, @@ -412,7 +467,8 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig, ether_addr_copy(entry->orig, orig); /* one for the hash, one for returning */ - atomic_set(&entry->refcount, 2); + kref_init(&entry->refcount); + kref_get(&entry->refcount); hash_added = batadv_hash_add(bat_priv->bla.backbone_hash, batadv_compare_backbone_gw, @@ -445,7 +501,13 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig, return entry; } -/* update or add the own backbone gw to make sure we announce +/** + * batadv_bla_update_own_backbone_gw - updates the own backbone gw for a VLAN + * @bat_priv: the bat priv with all the soft interface information + * @primary_if: the selected primary interface + * @vid: VLAN identifier + * + * update or add the own backbone gw to make sure we announce * where we receive other backbone gws */ static void @@ -542,12 +604,9 @@ static void batadv_bla_send_request(struct batadv_bla_backbone_gw *backbone_gw) } /** - * batadv_bla_send_announce + * batadv_bla_send_announce - Send an announcement frame * @bat_priv: the bat priv with all the soft interface information * @backbone_gw: our backbone gateway which should be announced - * - * This function sends an announcement. It is called from multiple - * places. */ static void batadv_bla_send_announce(struct batadv_priv *bat_priv, struct batadv_bla_backbone_gw *backbone_gw) @@ -595,7 +654,8 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, claim->lasttime = jiffies; claim->backbone_gw = backbone_gw; - atomic_set(&claim->refcount, 2); + kref_init(&claim->refcount); + kref_get(&claim->refcount); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_add_claim(): adding new entry %pM, vid %d to hash ...\n", mac, BATADV_PRINT_VID(vid)); @@ -625,7 +685,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, batadv_backbone_gw_free_ref(claim->backbone_gw); } /* set (new) backbone gw */ - atomic_inc(&backbone_gw->refcount); + kref_get(&backbone_gw->refcount); claim->backbone_gw = backbone_gw; spin_lock_bh(&backbone_gw->crc_lock); @@ -637,8 +697,11 @@ claim_free_ref: batadv_claim_free_ref(claim); } -/* Delete a claim from the claim hash which has the - * given mac address and vid. +/** + * batadv_bla_del_claim - delete a claim from the claim hash + * @bat_priv: the bat priv with all the soft interface information + * @mac: mac address of the claim to be removed + * @vid: VLAN id for the claim to be removed */ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, const u8 *mac, const unsigned short vid) @@ -666,7 +729,15 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, batadv_claim_free_ref(claim); } -/* check for ANNOUNCE frame, return 1 if handled */ +/** + * batadv_handle_announce - check for ANNOUNCE frame + * @bat_priv: the bat priv with all the soft interface information + * @an_addr: announcement mac address (ARP Sender HW address) + * @backbone_addr: originator address of the sender (Ethernet source MAC) + * @vid: the VLAN ID of the frame + * + * Return: 1 if handled + */ static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, u8 *backbone_addr, unsigned short vid) { @@ -716,7 +787,16 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, return 1; } -/* check for REQUEST frame, return 1 if handled */ +/** + * batadv_handle_request - check for REQUEST frame + * @bat_priv: the bat priv with all the soft interface information + * @primary_if: the primary hard interface of this batman soft interface + * @backbone_addr: backbone address to be requested (ARP sender HW MAC) + * @ethhdr: ethernet header of a packet + * @vid: the VLAN ID of the frame + * + * Return: 1 if handled + */ static int batadv_handle_request(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, u8 *backbone_addr, struct ethhdr *ethhdr, @@ -740,7 +820,16 @@ static int batadv_handle_request(struct batadv_priv *bat_priv, return 1; } -/* check for UNCLAIM frame, return 1 if handled */ +/** + * batadv_handle_unclaim - check for UNCLAIM frame + * @bat_priv: the bat priv with all the soft interface information + * @primary_if: the primary hard interface of this batman soft interface + * @backbone_addr: originator address of the backbone (Ethernet source) + * @claim_addr: Client to be unclaimed (ARP sender HW MAC) + * @vid: the VLAN ID of the frame + * + * Return: 1 if handled + */ static int batadv_handle_unclaim(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, u8 *backbone_addr, u8 *claim_addr, @@ -769,7 +858,16 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv, return 1; } -/* check for CLAIM frame, return 1 if handled */ +/** + * batadv_handle_claim - check for CLAIM frame + * @bat_priv: the bat priv with all the soft interface information + * @primary_if: the primary hard interface of this batman soft interface + * @backbone_addr: originator address of the backbone (Ethernet Source) + * @claim_addr: client mac address to be claimed (ARP sender HW MAC) + * @vid: the VLAN ID of the frame + * + * Return: 1 if handled + */ static int batadv_handle_claim(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, u8 *backbone_addr, u8 *claim_addr, @@ -798,7 +896,7 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv, } /** - * batadv_check_claim_group + * batadv_check_claim_group - check for claim group membership * @bat_priv: the bat priv with all the soft interface information * @primary_if: the primary interface of this batman interface * @hw_src: the Hardware source in the ARP Header @@ -809,7 +907,7 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv, * This function also applies the group ID of the sender * if it is in the same mesh. * - * returns: + * Return: * 2 - if it is a claim packet and on the same group * 1 - if is a claim packet from another group * 0 - if it is not a claim packet @@ -873,14 +971,12 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv, } /** - * batadv_bla_process_claim + * batadv_bla_process_claim - Check if this is a claim frame, and process it * @bat_priv: the bat priv with all the soft interface information * @primary_if: the primary hard interface of this batman soft interface * @skb: the frame to be checked * - * Check if this is a claim frame, and process it accordingly. - * - * returns 1 if it was a claim frame, otherwise return 0 to + * Return: 1 if it was a claim frame, otherwise return 0 to * tell the callee that it can use the frame on its own. */ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, @@ -1011,7 +1107,13 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, return 1; } -/* Check when we last heard from other nodes, and remove them in case of +/** + * batadv_bla_purge_backbone_gw - Remove backbone gateways after a timeout or + * immediately + * @bat_priv: the bat priv with all the soft interface information + * @now: whether the whole hash shall be wiped now + * + * Check when we last heard from other nodes, and remove them in case of * a time out, or clean all backbone gws if now is set. */ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now) @@ -1059,7 +1161,7 @@ purge_now: } /** - * batadv_bla_purge_claims + * batadv_bla_purge_claims - Remove claims after a timeout or immediately * @bat_priv: the bat priv with all the soft interface information * @primary_if: the selected primary interface, may be NULL if now is set * @now: whether the whole hash shall be wiped now @@ -1108,12 +1210,11 @@ purge_now: } /** - * batadv_bla_update_orig_address + * batadv_bla_update_orig_address - Update the backbone gateways when the own + * originator address changes * @bat_priv: the bat priv with all the soft interface information * @primary_if: the new selected primary_if * @oldif: the old primary interface, may be NULL - * - * Update the backbone gateways when the own orig address changes. */ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, @@ -1184,7 +1285,11 @@ void batadv_bla_status_update(struct net_device *net_dev) batadv_hardif_free_ref(primary_if); } -/* periodic work to do: +/** + * batadv_bla_periodic_work - performs periodic bla work + * @work: kernel work struct + * + * periodic work to do: * * purge structures when they are too old * * send announcements */ @@ -1265,7 +1370,12 @@ out: static struct lock_class_key batadv_claim_hash_lock_class_key; static struct lock_class_key batadv_backbone_hash_lock_class_key; -/* initialize all bla structures */ +/** + * batadv_bla_init - initialize all bla structures + * @bat_priv: the bat priv with all the soft interface information + * + * Return: 0 on success, < 0 on error. + */ int batadv_bla_init(struct batadv_priv *bat_priv) { int i; @@ -1320,7 +1430,7 @@ int batadv_bla_init(struct batadv_priv *bat_priv) } /** - * batadv_bla_check_bcast_duplist + * batadv_bla_check_bcast_duplist - Check if a frame is in the broadcast dup. * @bat_priv: the bat priv with all the soft interface information * @skb: contains the bcast_packet to be checked * @@ -1332,6 +1442,8 @@ int batadv_bla_init(struct batadv_priv *bat_priv) * with a good chance that it is the same packet. If it is furthermore * sent by another host, drop it. We allow equal packets from * the same host however as this might be intended. + * + * Return: 1 if a packet is in the duplicate list, 0 otherwise. */ int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, struct sk_buff *skb) @@ -1390,14 +1502,13 @@ out: } /** - * batadv_bla_is_backbone_gw_orig + * batadv_bla_is_backbone_gw_orig - Check if the originator is a gateway for + * the VLAN identified by vid. * @bat_priv: the bat priv with all the soft interface information * @orig: originator mac address * @vid: VLAN identifier * - * Check if the originator is a gateway for the VLAN identified by vid. - * - * Returns true if orig is a backbone for this vid, false otherwise. + * Return: true if orig is a backbone for this vid, false otherwise. */ bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig, unsigned short vid) @@ -1431,14 +1542,13 @@ bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig, } /** - * batadv_bla_is_backbone_gw + * batadv_bla_is_backbone_gw - check if originator is a backbone gw for a VLAN. * @skb: the frame to be checked * @orig_node: the orig_node of the frame * @hdr_size: maximum length of the frame * - * bla_is_backbone_gw inspects the skb for the VLAN ID and returns 1 - * if the orig_node is also a gateway on the soft interface, otherwise it - * returns 0. + * Return: 1 if the orig_node is also a gateway on the soft interface, otherwise + * it returns 0. */ int batadv_bla_is_backbone_gw(struct sk_buff *skb, struct batadv_orig_node *orig_node, int hdr_size) @@ -1465,7 +1575,12 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb, return 1; } -/* free all bla structures (for softinterface free or module unload) */ +/** + * batadv_bla_init - free all bla structures + * @bat_priv: the bat priv with all the soft interface information + * + * for softinterface free or module unload + */ void batadv_bla_free(struct batadv_priv *bat_priv) { struct batadv_hard_iface *primary_if; @@ -1488,18 +1603,19 @@ void batadv_bla_free(struct batadv_priv *bat_priv) } /** - * batadv_bla_rx + * batadv_bla_rx - check packets coming from the mesh. * @bat_priv: the bat priv with all the soft interface information * @skb: the frame to be checked * @vid: the VLAN ID of the frame * @is_bcast: the packet came in a broadcast packet type. * - * bla_rx avoidance checks if: + * batadv_bla_rx avoidance checks if: * * we have to race for a claim * * if the frame is allowed on the LAN * - * in these cases, the skb is further handled by this function and - * returns 1, otherwise it returns 0 and the caller shall further + * in these cases, the skb is further handled by this function + * + * Return: 1 if handled, otherwise it returns 0 and the caller shall further * process the skb. */ int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, @@ -1583,20 +1699,21 @@ out: } /** - * batadv_bla_tx + * batadv_bla_tx - check packets going into the mesh * @bat_priv: the bat priv with all the soft interface information * @skb: the frame to be checked * @vid: the VLAN ID of the frame * - * bla_tx checks if: + * batadv_bla_tx checks if: * * a claim was received which has to be processed * * the frame is allowed on the mesh * - * in these cases, the skb is further handled by this function and - * returns 1, otherwise it returns 0 and the caller shall further - * process the skb. + * in these cases, the skb is further handled by this function. * * This call might reallocate skb data. + * + * Return: 1 if handled, otherwise it returns 0 and the caller shall further + * process the skb. */ int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid) @@ -1670,6 +1787,13 @@ out: return ret; } +/** + * batadv_bla_claim_table_seq_print_text - print the claim table in a seq file + * @seq: seq file to print on + * @offset: not used + * + * Return: always 0 + */ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) { struct net_device *net_dev = (struct net_device *)seq->private; @@ -1719,6 +1843,14 @@ out: return 0; } +/** + * batadv_bla_backbone_table_seq_print_text - print the backbone table in a seq + * file + * @seq: seq file to print on + * @offset: not used + * + * Return: always 0 + */ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) { struct net_device *net_dev = (struct net_device *)seq->private; diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index 7ea199b8b5ab..579f0fa6fe6a 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2016 B.A.T.M.A.N. contributors: * * Simon Wunderlich * diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index 037ad0a5f485..48253cf8341b 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2016 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -281,6 +281,8 @@ static int batadv_originators_open(struct inode *inode, struct file *file) * originator table of an hard interface * @inode: inode pointer to debugfs file * @file: pointer to the seq_file + * + * Return: 0 on success or negative error number in case of failure */ static int batadv_originators_hardif_open(struct inode *inode, struct file *file) @@ -329,6 +331,8 @@ static int batadv_bla_backbone_table_open(struct inode *inode, * batadv_dat_cache_open - Prepare file handler for reads from dat_chache * @inode: inode which was opened * @file: file handle to be initialized + * + * Return: 0 on success or negative error number in case of failure */ static int batadv_dat_cache_open(struct inode *inode, struct file *file) { @@ -483,6 +487,8 @@ void batadv_debugfs_destroy(void) * batadv_debugfs_add_hardif - creates the base directory for a hard interface * in debugfs. * @hard_iface: hard interface which should be added. + * + * Return: 0 on success or negative error number in case of failure */ int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface) { diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h index 80ab8d6f0ab3..1ab4e2e63afc 100644 --- a/net/batman-adv/debugfs.h +++ b/net/batman-adv/debugfs.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2016 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index a49c705fb86b..e3261118130a 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2016 B.A.T.M.A.N. contributors: * * Antonio Quartulli * @@ -30,6 +30,7 @@ #include <linux/in.h> #include <linux/jiffies.h> #include <linux/kernel.h> +#include <linux/kref.h> #include <linux/list.h> #include <linux/rculist.h> #include <linux/rcupdate.h> @@ -62,21 +63,34 @@ static void batadv_dat_start_timer(struct batadv_priv *bat_priv) } /** + * batadv_dat_entry_release - release dat_entry from lists and queue for free + * after rcu grace period + * @ref: kref pointer of the dat_entry + */ +static void batadv_dat_entry_release(struct kref *ref) +{ + struct batadv_dat_entry *dat_entry; + + dat_entry = container_of(ref, struct batadv_dat_entry, refcount); + + kfree_rcu(dat_entry, rcu); +} + +/** * batadv_dat_entry_free_ref - decrement the dat_entry refcounter and possibly - * free it - * @dat_entry: the entry to free + * release it + * @dat_entry: dat_entry to be free'd */ static void batadv_dat_entry_free_ref(struct batadv_dat_entry *dat_entry) { - if (atomic_dec_and_test(&dat_entry->refcount)) - kfree_rcu(dat_entry, rcu); + kref_put(&dat_entry->refcount, batadv_dat_entry_release); } /** * batadv_dat_to_purge - check whether a dat_entry has to be purged or not * @dat_entry: the entry to check * - * Returns true if the entry has to be purged now, false otherwise. + * Return: true if the entry has to be purged now, false otherwise. */ static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry) { @@ -151,7 +165,7 @@ static void batadv_dat_purge(struct work_struct *work) * @node: node in the local table * @data2: second object to compare the node to * - * Returns 1 if the two entries are the same, 0 otherwise. + * Return: 1 if the two entries are the same, 0 otherwise. */ static int batadv_compare_dat(const struct hlist_node *node, const void *data2) { @@ -166,7 +180,7 @@ static int batadv_compare_dat(const struct hlist_node *node, const void *data2) * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * - * Returns the value of the hw_src field in the ARP packet. + * Return: the value of the hw_src field in the ARP packet. */ static u8 *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size) { @@ -183,7 +197,7 @@ static u8 *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size) * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * - * Returns the value of the ip_src field in the ARP packet. + * Return: the value of the ip_src field in the ARP packet. */ static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size) { @@ -195,7 +209,7 @@ static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size) * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * - * Returns the value of the hw_dst field in the ARP packet. + * Return: the value of the hw_dst field in the ARP packet. */ static u8 *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size) { @@ -207,7 +221,7 @@ static u8 *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size) * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * - * Returns the value of the ip_dst field in the ARP packet. + * Return: the value of the ip_dst field in the ARP packet. */ static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size) { @@ -219,7 +233,7 @@ static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size) * @data: data to hash * @size: size of the hash table * - * Returns the selected index in the hash table for the given data. + * Return: the selected index in the hash table for the given data. */ static u32 batadv_hash_dat(const void *data, u32 size) { @@ -256,7 +270,7 @@ static u32 batadv_hash_dat(const void *data, u32 size) * @ip: search key * @vid: VLAN identifier * - * Returns the dat_entry if found, NULL otherwise. + * Return: the dat_entry if found, NULL otherwise. */ static struct batadv_dat_entry * batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip, @@ -281,7 +295,7 @@ batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip, if (dat_entry->ip != ip) continue; - if (!atomic_inc_not_zero(&dat_entry->refcount)) + if (!kref_get_unless_zero(&dat_entry->refcount)) continue; dat_entry_tmp = dat_entry; @@ -326,7 +340,8 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip, dat_entry->vid = vid; ether_addr_copy(dat_entry->mac_addr, mac_addr); dat_entry->last_update = jiffies; - atomic_set(&dat_entry->refcount, 2); + kref_init(&dat_entry->refcount); + kref_get(&dat_entry->refcount); hash_added = batadv_hash_add(bat_priv->dat.hash, batadv_compare_dat, batadv_hash_dat, dat_entry, @@ -440,7 +455,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, * @candidate: orig_node under evaluation * @max_orig_node: last selected candidate * - * Returns true if the node has been elected as next candidate or false + * Return: true if the node has been elected as next candidate or false * otherwise. */ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res, @@ -527,7 +542,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, max_orig_node)) continue; - if (!atomic_inc_not_zero(&orig_node->refcount)) + if (!kref_get_unless_zero(&orig_node->refcount)) continue; max = tmp_max; @@ -558,7 +573,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, * closest values (from the LEFT, with wrap around if needed) then the hash * value of the key. ip_dst is the key. * - * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM. + * Return: the candidate array of size BATADV_DAT_CANDIDATE_NUM. */ static struct batadv_dat_candidate * batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) @@ -602,7 +617,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) * This function copies the skb with pskb_copy() and is sent as unicast packet * to each of the selected candidates. * - * Returns true if the packet is sent to at least one candidate, false + * Return: true if the packet is sent to at least one candidate, false * otherwise. */ static bool batadv_dat_send_data(struct batadv_priv *bat_priv, @@ -741,6 +756,8 @@ static void batadv_dat_hash_free(struct batadv_priv *bat_priv) /** * batadv_dat_init - initialise the DAT internals * @bat_priv: the bat priv with all the soft interface information + * + * Return: 0 in case of success, a negative error code otherwise */ int batadv_dat_init(struct batadv_priv *bat_priv) { @@ -779,6 +796,8 @@ void batadv_dat_free(struct batadv_priv *bat_priv) * batadv_dat_cache_seq_print_text - print the local DAT hash table * @seq: seq file to print on * @offset: not used + * + * Return: always 0 */ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset) { @@ -831,7 +850,7 @@ out: * @skb: packet to analyse * @hdr_size: size of the possible header before the ARP packet in the skb * - * Returns the ARP type if the skb contains a valid ARP packet, 0 otherwise. + * Return: the ARP type if the skb contains a valid ARP packet, 0 otherwise. */ static u16 batadv_arp_get_type(struct batadv_priv *bat_priv, struct sk_buff *skb, int hdr_size) @@ -904,8 +923,9 @@ out: * @skb: the buffer containing the packet to extract the VID from * @hdr_size: the size of the batman-adv header encapsulating the packet * - * If the packet embedded in the skb is vlan tagged this function returns the - * VID with the BATADV_VLAN_HAS_TAG flag. Otherwise BATADV_NO_FLAGS is returned. + * Return: If the packet embedded in the skb is vlan tagged this function + * returns the VID with the BATADV_VLAN_HAS_TAG flag. Otherwise BATADV_NO_FLAGS + * is returned. */ static unsigned short batadv_dat_get_vid(struct sk_buff *skb, int *hdr_size) { @@ -930,7 +950,7 @@ static unsigned short batadv_dat_get_vid(struct sk_buff *skb, int *hdr_size) * @bat_priv: the bat priv with all the soft interface information * @skb: packet to check * - * Returns true if the message has been sent to the dht candidates, false + * Return: true if the message has been sent to the dht candidates, false * otherwise. In case of a positive return value the message has to be enqueued * to permit the fallback. */ @@ -1020,7 +1040,7 @@ out: * @skb: packet to check * @hdr_size: size of the encapsulation header * - * Returns true if the request has been answered, false otherwise. + * Return: true if the request has been answered, false otherwise. */ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, struct sk_buff *skb, int hdr_size) @@ -1143,7 +1163,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, * @skb: packet to check * @hdr_size: size of the encapsulation header * - * Returns true if the packet was snooped and consumed by DAT. False if the + * Return: true if the packet was snooped and consumed by DAT. False if the * packet has to be delivered to the interface */ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, @@ -1200,7 +1220,7 @@ out: * @bat_priv: the bat priv with all the soft interface information * @forw_packet: the broadcast packet * - * Returns true if the node can drop the packet, false otherwise. + * Return: true if the node can drop the packet, false otherwise. */ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv, struct batadv_forw_packet *forw_packet) diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h index 26d4a525a798..813ecea96cf9 100644 --- a/net/batman-adv/distributed-arp-table.h +++ b/net/batman-adv/distributed-arp-table.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2016 B.A.T.M.A.N. contributors: * * Antonio Quartulli * diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 20d9282f895b..55656e84bc7e 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2013-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2013-2016 B.A.T.M.A.N. contributors: * * Martin Hundebøll <martin@hundeboll.net> * @@ -85,7 +85,7 @@ void batadv_frag_purge_orig(struct batadv_orig_node *orig_node, /** * batadv_frag_size_limit - maximum possible size of packet to be fragmented * - * Returns the maximum size of payload that can be fragmented. + * Return: the maximum size of payload that can be fragmented. */ static int batadv_frag_size_limit(void) { @@ -107,7 +107,7 @@ static int batadv_frag_size_limit(void) * * Caller must hold chain->lock. * - * Returns true if chain is empty and caller can just insert the new fragment + * Return: true if chain is empty and caller can just insert the new fragment * without searching for the right position. */ static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain, @@ -136,7 +136,7 @@ static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain, * Insert a new fragment into the reverse ordered chain in the right table * entry. The hash table entry is cleared if "old" fragments exist in it. * - * Returns true if skb is buffered, false on error. If the chain has all the + * Return: true if skb is buffered, false on error. If the chain has all the * fragments needed to merge the packet, the chain is moved to the passed head * to avoid locking the chain in the table. */ @@ -242,12 +242,11 @@ err: /** * batadv_frag_merge_packets - merge a chain of fragments * @chain: head of chain with fragments - * @skb: packet with total size of skb after merging * * Expand the first skb in the chain and copy the content of the remaining * skb's into the expanded one. After doing so, clear the chain. * - * Returns the merged skb or NULL on error. + * Return: the merged skb or NULL on error. */ static struct sk_buff * batadv_frag_merge_packets(struct hlist_head *chain) @@ -307,6 +306,9 @@ free: * There are three possible outcomes: 1) Packet is merged: Return true and * set *skb to merged packet; 2) Packet is buffered: Return true and set *skb * to NULL; 3) Error: Return false and leave skb as is. + * + * Return: true when packet is merged or buffered, false when skb is not not + * used. */ bool batadv_frag_skb_buffer(struct sk_buff **skb, struct batadv_orig_node *orig_node_src) @@ -344,7 +346,7 @@ out_err: * will exceed the MTU towards the next-hop. If so, the fragment is forwarded * without merging it. * - * Returns true if the fragment is consumed/forwarded, false otherwise. + * Return: true if the fragment is consumed/forwarded, false otherwise. */ bool batadv_frag_skb_fwd(struct sk_buff *skb, struct batadv_hard_iface *recv_if, @@ -399,7 +401,7 @@ out: * passed mtu and the old one with the rest. The new skb contains data from the * tail of the old skb. * - * Returns the new fragment, NULL on error. + * Return: the new fragment, NULL on error. */ static struct sk_buff *batadv_frag_create(struct sk_buff *skb, struct batadv_frag_packet *frag_head, @@ -433,7 +435,7 @@ err: * @orig_node: final destination of the created fragments * @neigh_node: next-hop of the created fragments * - * Returns true on success, false otherwise. + * Return: true on success, false otherwise. */ bool batadv_frag_send_packet(struct sk_buff *skb, struct batadv_orig_node *orig_node, diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h index 8b9877e70b95..9ff77c7ef7c7 100644 --- a/net/batman-adv/fragmentation.h +++ b/net/batman-adv/fragmentation.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2013-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2013-2016 B.A.T.M.A.N. contributors: * * Martin Hundebøll <martin@hundeboll.net> * @@ -42,7 +42,7 @@ bool batadv_frag_send_packet(struct sk_buff *skb, * batadv_frag_check_entry - check if a list of fragments has timed out * @frags_entry: table entry to check * - * Returns true if the frags entry has timed out, false otherwise. + * Return: true if the frags entry has timed out, false otherwise. */ static inline bool batadv_frag_check_entry(struct batadv_frag_table_entry *frags_entry) diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index ccf70bed0d0c..4b598bd531ea 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2016 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -28,6 +28,7 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/kernel.h> +#include <linux/kref.h> #include <linux/list.h> #include <linux/netdevice.h> #include <linux/rculist.h> @@ -59,12 +60,29 @@ */ #define BATADV_DHCP_CHADDR_OFFSET 28 +/** + * batadv_gw_node_release - release gw_node from lists and queue for free after + * rcu grace period + * @ref: kref pointer of the gw_node + */ +static void batadv_gw_node_release(struct kref *ref) +{ + struct batadv_gw_node *gw_node; + + gw_node = container_of(ref, struct batadv_gw_node, refcount); + + batadv_orig_node_free_ref(gw_node->orig_node); + kfree_rcu(gw_node, rcu); +} + +/** + * batadv_gw_node_free_ref - decrement the gw_node refcounter and possibly + * release it + * @gw_node: gateway node to free + */ static void batadv_gw_node_free_ref(struct batadv_gw_node *gw_node) { - if (atomic_dec_and_test(&gw_node->refcount)) { - batadv_orig_node_free_ref(gw_node->orig_node); - kfree_rcu(gw_node, rcu); - } + kref_put(&gw_node->refcount, batadv_gw_node_release); } static struct batadv_gw_node * @@ -77,7 +95,7 @@ batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv) if (!gw_node) goto out; - if (!atomic_inc_not_zero(&gw_node->refcount)) + if (!kref_get_unless_zero(&gw_node->refcount)) gw_node = NULL; out: @@ -100,7 +118,7 @@ batadv_gw_get_selected_orig(struct batadv_priv *bat_priv) if (!orig_node) goto unlock; - if (!atomic_inc_not_zero(&orig_node->refcount)) + if (!kref_get_unless_zero(&orig_node->refcount)) orig_node = NULL; unlock: @@ -118,7 +136,7 @@ static void batadv_gw_select(struct batadv_priv *bat_priv, spin_lock_bh(&bat_priv->gw.list_lock); - if (new_gw_node && !atomic_inc_not_zero(&new_gw_node->refcount)) + if (new_gw_node && !kref_get_unless_zero(&new_gw_node->refcount)) new_gw_node = NULL; curr_gw_node = rcu_dereference_protected(bat_priv->gw.curr_gw, 1); @@ -170,7 +188,7 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) if (!router_ifinfo) goto next; - if (!atomic_inc_not_zero(&gw_node->refcount)) + if (!kref_get_unless_zero(&gw_node->refcount)) goto next; tq_avg = router_ifinfo->bat_iv.tq_avg; @@ -188,7 +206,7 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) if (curr_gw) batadv_gw_node_free_ref(curr_gw); curr_gw = gw_node; - atomic_inc(&curr_gw->refcount); + kref_get(&curr_gw->refcount); } break; @@ -203,7 +221,7 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) if (curr_gw) batadv_gw_node_free_ref(curr_gw); curr_gw = gw_node; - atomic_inc(&curr_gw->refcount); + kref_get(&curr_gw->refcount); } break; } @@ -423,7 +441,7 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, if (gateway->bandwidth_down == 0) return; - if (!atomic_inc_not_zero(&orig_node->refcount)) + if (!kref_get_unless_zero(&orig_node->refcount)) return; gw_node = kzalloc(sizeof(*gw_node), GFP_ATOMIC); @@ -436,7 +454,7 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, gw_node->orig_node = orig_node; gw_node->bandwidth_down = ntohl(gateway->bandwidth_down); gw_node->bandwidth_up = ntohl(gateway->bandwidth_up); - atomic_set(&gw_node->refcount, 1); + kref_init(&gw_node->refcount); spin_lock_bh(&bat_priv->gw.list_lock); hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.list); @@ -456,7 +474,7 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, * @bat_priv: the bat priv with all the soft interface information * @orig_node: originator announcing gateway capabilities * - * Returns gateway node if found or NULL otherwise. + * Return: gateway node if found or NULL otherwise. */ static struct batadv_gw_node * batadv_gw_node_get(struct batadv_priv *bat_priv, @@ -469,7 +487,7 @@ batadv_gw_node_get(struct batadv_priv *bat_priv, if (gw_node_tmp->orig_node != orig_node) continue; - if (!atomic_inc_not_zero(&gw_node_tmp->refcount)) + if (!kref_get_unless_zero(&gw_node_tmp->refcount)) continue; gw_node = gw_node_tmp; @@ -656,13 +674,13 @@ out: * @chaddr: buffer where the client address will be stored. Valid * only if the function returns BATADV_DHCP_TO_CLIENT * - * Returns: + * This function may re-allocate the data buffer of the skb passed as argument. + * + * Return: * - BATADV_DHCP_NO if the packet is not a dhcp message or if there was an error * while parsing it * - BATADV_DHCP_TO_SERVER if this is a message going to the DHCP server * - BATADV_DHCP_TO_CLIENT if this is a message going to a DHCP client - * - * This function may re-allocate the data buffer of the skb passed as argument. */ enum batadv_dhcp_recipient batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, @@ -777,11 +795,11 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, * server. Due to topology changes it may be the case that the GW server * previously selected is not the best one anymore. * - * Returns true if the packet destination is unicast and it is not the best gw, - * false otherwise. - * * This call might reallocate skb data. * Must be invoked only when the DHCP packet is going TO a DHCP SERVER. + * + * Return: true if the packet destination is unicast and it is not the best gw, + * false otherwise. */ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct sk_buff *skb) diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index fa9527785ed3..582dd8c413c8 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2016 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index b51bface8bdd..5ee04f7140af 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2016 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -38,7 +38,7 @@ * @description: text shown when throughput string cannot be parsed * @throughput: pointer holding the returned throughput information * - * Returns false on parse error and true otherwise. + * Return: false on parse error and true otherwise. */ static bool batadv_parse_throughput(struct net_device *net_dev, char *buff, const char *description, u32 *throughput) diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h index ab893e318229..b58346350024 100644 --- a/net/batman-adv/gateway_common.h +++ b/net/batman-adv/gateway_common.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2016 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 57f7107169f5..e2aaa4cc39a7 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -18,6 +18,7 @@ #include "hard-interface.h" #include "main.h" +#include <linux/atomic.h> #include <linux/bug.h> #include <linux/byteorder/generic.h> #include <linux/errno.h> @@ -26,6 +27,7 @@ #include <linux/if_ether.h> #include <linux/if.h> #include <linux/kernel.h> +#include <linux/kref.h> #include <linux/list.h> #include <linux/netdevice.h> #include <linux/printk.h> @@ -47,13 +49,19 @@ #include "sysfs.h" #include "translation-table.h" -void batadv_hardif_free_rcu(struct rcu_head *rcu) +/** + * batadv_hardif_release - release hard interface from lists and queue for + * free after rcu grace period + * @ref: kref pointer of the hard interface + */ +void batadv_hardif_release(struct kref *ref) { struct batadv_hard_iface *hard_iface; - hard_iface = container_of(rcu, struct batadv_hard_iface, rcu); + hard_iface = container_of(ref, struct batadv_hard_iface, refcount); dev_put(hard_iface->net_dev); - kfree(hard_iface); + + kfree_rcu(hard_iface, rcu); } struct batadv_hard_iface * @@ -64,7 +72,7 @@ batadv_hardif_get_by_netdev(const struct net_device *net_dev) rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { if (hard_iface->net_dev == net_dev && - atomic_inc_not_zero(&hard_iface->refcount)) + kref_get_unless_zero(&hard_iface->refcount)) goto out; } @@ -107,7 +115,7 @@ static bool batadv_mutual_parents(const struct net_device *dev1, * This function recursively checks all the fathers of the device passed as * argument looking for a batman-adv soft interface. * - * Returns true if the device is descendant of a batman-adv mesh interface (or + * Return: true if the device is descendant of a batman-adv mesh interface (or * if it is a batman-adv interface itself), false otherwise */ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) @@ -161,7 +169,7 @@ static int batadv_is_valid_iface(const struct net_device *net_dev) * interface * @net_device: the device to check * - * Returns true if the net device is a 802.11 wireless device, false otherwise. + * Return: true if the net device is a 802.11 wireless device, false otherwise. */ bool batadv_is_wifi_netdev(struct net_device *net_device) { @@ -194,7 +202,7 @@ batadv_hardif_get_active(const struct net_device *soft_iface) continue; if (hard_iface->if_status == BATADV_IF_ACTIVE && - atomic_inc_not_zero(&hard_iface->refcount)) + kref_get_unless_zero(&hard_iface->refcount)) goto out; } @@ -228,7 +236,7 @@ static void batadv_primary_if_select(struct batadv_priv *bat_priv, ASSERT_RTNL(); - if (new_hard_iface && !atomic_inc_not_zero(&new_hard_iface->refcount)) + if (new_hard_iface && !kref_get_unless_zero(&new_hard_iface->refcount)) new_hard_iface = NULL; curr_hard_iface = rcu_dereference_protected(bat_priv->primary_if, 1); @@ -426,7 +434,8 @@ batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface) * * Invoke ndo_del_slave on master passing slave as argument. In this way slave * is free'd and master can correctly change its internal state. - * Return 0 on success, a negative value representing the error otherwise + * + * Return: 0 on success, a negative value representing the error otherwise */ static int batadv_master_del_slave(struct batadv_hard_iface *slave, struct net_device *master) @@ -455,7 +464,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) goto out; - if (!atomic_inc_not_zero(&hard_iface->refcount)) + if (!kref_get_unless_zero(&hard_iface->refcount)) goto out; soft_iface = dev_get_by_name(&init_net, iface_name); @@ -676,7 +685,8 @@ batadv_hardif_add_interface(struct net_device *net_dev) hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS; /* extra reference for return */ - atomic_set(&hard_iface->refcount, 2); + kref_init(&hard_iface->refcount); + kref_get(&hard_iface->refcount); batadv_check_known_mac_addr(hard_iface->net_dev); list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list); diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index 7b12ea8ea29d..5cecc6bc1b1e 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -20,8 +20,8 @@ #include "main.h" -#include <linux/atomic.h> #include <linux/compiler.h> +#include <linux/kref.h> #include <linux/notifier.h> #include <linux/rcupdate.h> #include <linux/stddef.h> @@ -61,18 +61,16 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, void batadv_hardif_remove_interfaces(void); int batadv_hardif_min_mtu(struct net_device *soft_iface); void batadv_update_min_mtu(struct net_device *soft_iface); -void batadv_hardif_free_rcu(struct rcu_head *rcu); +void batadv_hardif_release(struct kref *ref); /** - * batadv_hardif_free_ref - decrement the hard interface refcounter and - * possibly free it + * batadv_hardif_free_ref - decrement the hard interface refcounter and possibly + * release it * @hard_iface: the hard interface to free */ -static inline void -batadv_hardif_free_ref(struct batadv_hard_iface *hard_iface) +static inline void batadv_hardif_free_ref(struct batadv_hard_iface *hard_iface) { - if (atomic_dec_and_test(&hard_iface->refcount)) - call_rcu(&hard_iface->rcu, batadv_hardif_free_rcu); + kref_put(&hard_iface->refcount, batadv_hardif_release); } static inline struct batadv_hard_iface * @@ -85,7 +83,7 @@ batadv_primary_if_get_selected(struct batadv_priv *bat_priv) if (!hard_iface) goto out; - if (!atomic_inc_not_zero(&hard_iface->refcount)) + if (!kref_get_unless_zero(&hard_iface->refcount)) hard_iface = NULL; out: diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c index 2ea6a18d793f..a0a0fdb85805 100644 --- a/net/batman-adv/hash.c +++ b/net/batman-adv/hash.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2006-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2016 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h index 377626250ac7..9bb57b87447c 100644 --- a/net/batman-adv/hash.h +++ b/net/batman-adv/hash.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2006-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2016 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * @@ -30,14 +30,17 @@ struct lock_class_key; /* callback to a compare function. should compare 2 element datas for their - * keys, return 0 if same and not 0 if not same + * keys + * + * Return: 0 if same and not 0 if not same */ typedef int (*batadv_hashdata_compare_cb)(const struct hlist_node *, const void *); -/* the hashfunction, should return an index - * based on the key in the data of the first - * argument and the size the second +/* the hashfunction + * + * Return: an index based on the key in the data of the first argument and the + * size the second */ typedef u32 (*batadv_hashdata_choose_cb)(const void *, u32); typedef void (*batadv_hashdata_free_cb)(struct hlist_node *, void *); @@ -96,7 +99,7 @@ static inline void batadv_hash_delete(struct batadv_hashtable *hash, * @data: data passed to the aforementioned callbacks as argument * @data_node: to be added element * - * Returns 0 on success, 1 if the element already is in the hash + * Return: 0 on success, 1 if the element already is in the hash * and -1 on error. */ static inline int batadv_hash_add(struct batadv_hashtable *hash, @@ -139,10 +142,11 @@ out: return ret; } -/* removes data from hash, if found. returns pointer do data on success, so you - * can remove the used structure yourself, or NULL on error . data could be the - * structure you use with just the key filled, we just need the key for - * comparing. +/* removes data from hash, if found. data could be the structure you use with + * just the key filled, we just need the key for comparing. + * + * Return: returns pointer do data on success, so you can remove the used + * structure yourself, or NULL on error */ static inline void *batadv_hash_remove(struct batadv_hashtable *hash, batadv_hashdata_compare_cb compare, diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index bcabb5e3f4d3..a69da37bbad5 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h index e937143f0b10..618d5de06f20 100644 --- a/net/batman-adv/icmp_socket.h +++ b/net/batman-adv/icmp_socket.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 4b5d61fbadb1..568c5503f637 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -29,6 +29,7 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/kernel.h> +#include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/module.h> @@ -233,7 +234,7 @@ void batadv_mesh_free(struct net_device *soft_iface) * @bat_priv: the bat priv with all the soft interface information * @addr: the address to check * - * Returns 'true' if the mac address was found, false otherwise. + * Return: 'true' if the mac address was found, false otherwise. */ bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr) { @@ -262,7 +263,7 @@ bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr) * function that requires the primary interface * @seq: debugfs table seq_file struct * - * Returns primary interface if found or NULL otherwise. + * Return: primary interface if found or NULL otherwise. */ struct batadv_hard_iface * batadv_seq_print_text_primary_if_get(struct seq_file *seq) @@ -297,7 +298,7 @@ out: * batadv_max_header_len - calculate maximum encapsulation overhead for a * payload packet * - * Return the maximum encapsulation overhead in bytes. + * Return: the maximum encapsulation overhead in bytes. */ int batadv_max_header_len(void) { @@ -599,6 +600,8 @@ int batadv_algo_seq_print_text(struct seq_file *seq, void *offset) * * payload_ptr must always point to an address in the skb head buffer and not to * a fragment. + * + * Return: big endian crc32c of the checksummed data */ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr) { @@ -622,15 +625,27 @@ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr) } /** - * batadv_tvlv_handler_free_ref - decrement the tvlv handler refcounter and - * possibly free it + * batadv_tvlv_handler_release - release tvlv handler from lists and queue for + * free after rcu grace period + * @ref: kref pointer of the tvlv + */ +static void batadv_tvlv_handler_release(struct kref *ref) +{ + struct batadv_tvlv_handler *tvlv_handler; + + tvlv_handler = container_of(ref, struct batadv_tvlv_handler, refcount); + kfree_rcu(tvlv_handler, rcu); +} + +/** + * batadv_tvlv_handler_free_ref - decrement the tvlv container refcounter and + * possibly release it * @tvlv_handler: the tvlv handler to free */ static void batadv_tvlv_handler_free_ref(struct batadv_tvlv_handler *tvlv_handler) { - if (atomic_dec_and_test(&tvlv_handler->refcount)) - kfree_rcu(tvlv_handler, rcu); + kref_put(&tvlv_handler->refcount, batadv_tvlv_handler_release); } /** @@ -640,7 +655,7 @@ batadv_tvlv_handler_free_ref(struct batadv_tvlv_handler *tvlv_handler) * @type: tvlv handler type to look for * @version: tvlv handler version to look for * - * Returns tvlv handler if found or NULL otherwise. + * Return: tvlv handler if found or NULL otherwise. */ static struct batadv_tvlv_handler *batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version) @@ -656,7 +671,7 @@ static struct batadv_tvlv_handler if (tvlv_handler_tmp->version != version) continue; - if (!atomic_inc_not_zero(&tvlv_handler_tmp->refcount)) + if (!kref_get_unless_zero(&tvlv_handler_tmp->refcount)) continue; tvlv_handler = tvlv_handler_tmp; @@ -668,14 +683,25 @@ static struct batadv_tvlv_handler } /** + * batadv_tvlv_container_release - release tvlv from lists and free + * @ref: kref pointer of the tvlv + */ +static void batadv_tvlv_container_release(struct kref *ref) +{ + struct batadv_tvlv_container *tvlv; + + tvlv = container_of(ref, struct batadv_tvlv_container, refcount); + kfree(tvlv); +} + +/** * batadv_tvlv_container_free_ref - decrement the tvlv container refcounter and - * possibly free it + * possibly release it * @tvlv: the tvlv container to free */ static void batadv_tvlv_container_free_ref(struct batadv_tvlv_container *tvlv) { - if (atomic_dec_and_test(&tvlv->refcount)) - kfree(tvlv); + kref_put(&tvlv->refcount, batadv_tvlv_container_release); } /** @@ -688,13 +714,15 @@ static void batadv_tvlv_container_free_ref(struct batadv_tvlv_container *tvlv) * Has to be called with the appropriate locks being acquired * (tvlv.container_list_lock). * - * Returns tvlv container if found or NULL otherwise. + * Return: tvlv container if found or NULL otherwise. */ static struct batadv_tvlv_container *batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version) { struct batadv_tvlv_container *tvlv_tmp, *tvlv = NULL; + lockdep_assert_held(&bat_priv->tvlv.container_list_lock); + hlist_for_each_entry(tvlv_tmp, &bat_priv->tvlv.container_list, list) { if (tvlv_tmp->tvlv_hdr.type != type) continue; @@ -702,7 +730,7 @@ static struct batadv_tvlv_container if (tvlv_tmp->tvlv_hdr.version != version) continue; - if (!atomic_inc_not_zero(&tvlv_tmp->refcount)) + if (!kref_get_unless_zero(&tvlv_tmp->refcount)) continue; tvlv = tvlv_tmp; @@ -720,13 +748,15 @@ static struct batadv_tvlv_container * Has to be called with the appropriate locks being acquired * (tvlv.container_list_lock). * - * Returns size of all currently registered tvlv containers in bytes. + * Return: size of all currently registered tvlv containers in bytes. */ static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv) { struct batadv_tvlv_container *tvlv; u16 tvlv_len = 0; + lockdep_assert_held(&bat_priv->tvlv.container_list_lock); + hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) { tvlv_len += sizeof(struct batadv_tvlv_hdr); tvlv_len += ntohs(tvlv->tvlv_hdr.len); @@ -808,7 +838,7 @@ void batadv_tvlv_container_register(struct batadv_priv *bat_priv, memcpy(tvlv_new + 1, tvlv_value, ntohs(tvlv_new->tvlv_hdr.len)); INIT_HLIST_NODE(&tvlv_new->list); - atomic_set(&tvlv_new->refcount, 1); + kref_init(&tvlv_new->refcount); spin_lock_bh(&bat_priv->tvlv.container_list_lock); tvlv_old = batadv_tvlv_container_get(bat_priv, type, version); @@ -826,7 +856,7 @@ void batadv_tvlv_container_register(struct batadv_priv *bat_priv, * @additional_packet_len: requested additional packet size on top of minimum * size * - * Returns true of the packet buffer could be changed to the requested size, + * Return: true of the packet buffer could be changed to the requested size, * false otherwise. */ static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff, @@ -862,7 +892,7 @@ static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff, * The ogm packet might be enlarged or shrunk depending on the current size * and the size of the to-be-appended tvlv containers. * - * Returns size of all appended tvlv containers in bytes. + * Return: size of all appended tvlv containers in bytes. */ u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, unsigned char **packet_buff, @@ -915,7 +945,7 @@ end: * @tvlv_value: tvlv content * @tvlv_value_len: tvlv content length * - * Returns success if handler was not found or the return value of the handler + * Return: success if handler was not found or the return value of the handler * callback. */ static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv, @@ -968,7 +998,7 @@ static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv, * @tvlv_value: tvlv content * @tvlv_value_len: tvlv content length * - * Returns success when processing an OGM or the return value of all called + * Return: success when processing an OGM or the return value of all called * handler callbacks. */ int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, @@ -1094,7 +1124,7 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv, tvlv_handler->type = type; tvlv_handler->version = version; tvlv_handler->flags = flags; - atomic_set(&tvlv_handler->refcount, 1); + kref_init(&tvlv_handler->refcount); INIT_HLIST_NODE(&tvlv_handler->list); spin_lock_bh(&bat_priv->tvlv.handler_list_lock); @@ -1190,8 +1220,8 @@ out: * @skb: the buffer containing the packet * @header_len: length of the batman header preceding the ethernet header * - * If the packet embedded in the skb is vlan tagged this function returns the - * VID with the BATADV_VLAN_HAS_TAG flag. Otherwise BATADV_NO_FLAGS is returned. + * Return: VID with the BATADV_VLAN_HAS_TAG flag when the packet embedded in the + * skb is vlan tagged. Otherwise BATADV_NO_FLAGS. */ unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len) { @@ -1218,7 +1248,7 @@ unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len) * @vid: the VLAN identifier for which the AP isolation attributed as to be * looked up * - * Returns true if AP isolation is on for the VLAN idenfied by vid, false + * Return: true if AP isolation is on for the VLAN idenfied by vid, false * otherwise */ bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid) diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 9dbd9107e7e1..32dfc9e578af 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -35,6 +35,9 @@ /* Time To Live of broadcast messages */ #define BATADV_TTL 50 +/* maximum sequence number age of broadcast messages */ +#define BATADV_BCAST_MAX_AGE 64 + /* purge originators after time in seconds if no valid packet comes in * -> TODO: check influence on BATADV_TQ_LOCAL_WINDOW_SIZE */ @@ -273,9 +276,14 @@ static inline void _batadv_dbg(int type __always_unused, pr_err("%s: " fmt, _netdev->name, ## arg); \ } while (0) -/* returns 1 if they are the same ethernet addr +/** + * batadv_compare_eth - Compare two not u16 aligned Ethernet addresses + * @data1: Pointer to a six-byte array containing the Ethernet address + * @data2: Pointer other six-byte array containing the Ethernet address * * note: can't use ether_addr_equal() as it requires aligned memory + * + * Return: 1 if they are the same ethernet addr */ static inline bool batadv_compare_eth(const void *data1, const void *data2) { @@ -287,7 +295,7 @@ static inline bool batadv_compare_eth(const void *data1, const void *data2) * @timestamp: base value to compare with (in jiffies) * @timeout: added to base value before comparing (in milliseconds) * - * Returns true if current time is after timestamp + timeout + * Return: true if current time is after timestamp + timeout */ static inline bool batadv_has_timed_out(unsigned long timestamp, unsigned int timeout) @@ -326,7 +334,13 @@ static inline void batadv_add_counter(struct batadv_priv *bat_priv, size_t idx, #define batadv_inc_counter(b, i) batadv_add_counter(b, i, 1) -/* Sum and return the cpu-local counters for index 'idx' */ +/** + * batadv_sum_counter - Sum the cpu-local counters for index 'idx' + * @bat_priv: the bat priv with all the soft interface information + * @idx: index of counter to sum up + * + * Return: sum of all cpu-local counters + */ static inline u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx) { u64 *counters, sum = 0; diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 75fa5013af72..8caa2c72efa3 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2014-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2014-2016 B.A.T.M.A.N. contributors: * * Linus Lüssing * @@ -30,6 +30,7 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/ipv6.h> +#include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> @@ -55,7 +56,7 @@ * Collect multicast addresses of the local multicast listeners * on the given soft interface, dev, in the given mcast_list. * - * Returns -ENOMEM on memory allocation error or the number of + * Return: -ENOMEM on memory allocation error or the number of * items added to the mcast_list otherwise. */ static int batadv_mcast_mla_softif_get(struct net_device *dev, @@ -87,7 +88,7 @@ static int batadv_mcast_mla_softif_get(struct net_device *dev, * @mcast_addr: the multicast address to check * @mcast_list: the list with multicast addresses to search in * - * Returns true if the given address is already in the given list. + * Return: true if the given address is already in the given list. * Otherwise returns false. */ static bool batadv_mcast_mla_is_duplicate(u8 *mcast_addr, @@ -195,8 +196,9 @@ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv, * batadv_mcast_has_bridge - check whether the soft-iface is bridged * @bat_priv: the bat priv with all the soft interface information * - * Checks whether there is a bridge on top of our soft interface. Returns - * true if so, false otherwise. + * Checks whether there is a bridge on top of our soft interface. + * + * Return: true if there is a bridge, false otherwise. */ static bool batadv_mcast_has_bridge(struct batadv_priv *bat_priv) { @@ -218,7 +220,7 @@ static bool batadv_mcast_has_bridge(struct batadv_priv *bat_priv) * Updates the own multicast tvlv with our current multicast related settings, * capabilities and inabilities. * - * Returns true if the tvlv container is registered afterwards. Otherwise + * Return: true if the tvlv container is registered afterwards. Otherwise * returns false. */ static bool batadv_mcast_mla_tvlv_update(struct batadv_priv *bat_priv) @@ -289,8 +291,8 @@ out: * Checks whether the given IPv4 packet has the potential to be forwarded with a * mode more optimal than classic flooding. * - * If so then returns 0. Otherwise -EINVAL is returned or -ENOMEM in case of - * memory allocation failure. + * Return: If so then 0. Otherwise -EINVAL or -ENOMEM in case of memory + * allocation failure. */ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv, struct sk_buff *skb, @@ -327,8 +329,7 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv, * Checks whether the given IPv6 packet has the potential to be forwarded with a * mode more optimal than classic flooding. * - * If so then returns 0. Otherwise -EINVAL is returned or -ENOMEM if we are out - * of memory. + * Return: If so then 0. Otherwise -EINVAL is or -ENOMEM if we are out of memory */ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv, struct sk_buff *skb, @@ -366,8 +367,7 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv, * Checks whether the given multicast ethernet frame has the potential to be * forwarded with a mode more optimal than classic flooding. * - * If so then returns 0. Otherwise -EINVAL is returned or -ENOMEM if we are out - * of memory. + * Return: If so then 0. Otherwise -EINVAL is or -ENOMEM if we are out of memory */ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv, struct sk_buff *skb, @@ -398,7 +398,7 @@ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv, * @bat_priv: the bat priv with all the soft interface information * @ethhdr: ethernet header of a packet * - * Returns the number of nodes which want all IPv4 multicast traffic if the + * Return: the number of nodes which want all IPv4 multicast traffic if the * given ethhdr is from an IPv4 packet or the number of nodes which want all * IPv6 traffic if it matches an IPv6 packet. */ @@ -421,7 +421,7 @@ static int batadv_mcast_forw_want_all_ip_count(struct batadv_priv *bat_priv, * @bat_priv: the bat priv with all the soft interface information * @ethhdr: the ether header containing the multicast destination * - * Returns an orig_node matching the multicast address provided by ethhdr + * Return: an orig_node matching the multicast address provided by ethhdr * via a translation table lookup. This increases the returned nodes refcount. */ static struct batadv_orig_node * @@ -436,7 +436,7 @@ batadv_mcast_forw_tt_node_get(struct batadv_priv *bat_priv, * batadv_mcast_want_forw_ipv4_node_get - get a node with an ipv4 flag * @bat_priv: the bat priv with all the soft interface information * - * Returns an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 flag set and + * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 flag set and * increases its refcount. */ static struct batadv_orig_node * @@ -448,7 +448,7 @@ batadv_mcast_forw_ipv4_node_get(struct batadv_priv *bat_priv) hlist_for_each_entry_rcu(tmp_orig_node, &bat_priv->mcast.want_all_ipv4_list, mcast_want_all_ipv4_node) { - if (!atomic_inc_not_zero(&tmp_orig_node->refcount)) + if (!kref_get_unless_zero(&tmp_orig_node->refcount)) continue; orig_node = tmp_orig_node; @@ -463,7 +463,7 @@ batadv_mcast_forw_ipv4_node_get(struct batadv_priv *bat_priv) * batadv_mcast_want_forw_ipv6_node_get - get a node with an ipv6 flag * @bat_priv: the bat priv with all the soft interface information * - * Returns an orig_node which has the BATADV_MCAST_WANT_ALL_IPV6 flag set + * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV6 flag set * and increases its refcount. */ static struct batadv_orig_node * @@ -475,7 +475,7 @@ batadv_mcast_forw_ipv6_node_get(struct batadv_priv *bat_priv) hlist_for_each_entry_rcu(tmp_orig_node, &bat_priv->mcast.want_all_ipv6_list, mcast_want_all_ipv6_node) { - if (!atomic_inc_not_zero(&tmp_orig_node->refcount)) + if (!kref_get_unless_zero(&tmp_orig_node->refcount)) continue; orig_node = tmp_orig_node; @@ -491,7 +491,7 @@ batadv_mcast_forw_ipv6_node_get(struct batadv_priv *bat_priv) * @bat_priv: the bat priv with all the soft interface information * @ethhdr: an ethernet header to determine the protocol family from * - * Returns an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 or + * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 or * BATADV_MCAST_WANT_ALL_IPV6 flag, depending on the provided ethhdr, set and * increases its refcount. */ @@ -514,7 +514,7 @@ batadv_mcast_forw_ip_node_get(struct batadv_priv *bat_priv, * batadv_mcast_want_forw_unsnoop_node_get - get a node with an unsnoopable flag * @bat_priv: the bat priv with all the soft interface information * - * Returns an orig_node which has the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag + * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag * set and increases its refcount. */ static struct batadv_orig_node * @@ -526,7 +526,7 @@ batadv_mcast_forw_unsnoop_node_get(struct batadv_priv *bat_priv) hlist_for_each_entry_rcu(tmp_orig_node, &bat_priv->mcast.want_all_unsnoopables_list, mcast_want_all_unsnoopables_node) { - if (!atomic_inc_not_zero(&tmp_orig_node->refcount)) + if (!kref_get_unless_zero(&tmp_orig_node->refcount)) continue; orig_node = tmp_orig_node; @@ -543,7 +543,7 @@ batadv_mcast_forw_unsnoop_node_get(struct batadv_priv *bat_priv) * @skb: The multicast packet to check * @orig: an originator to be set to forward the skb to * - * Returns the forwarding mode as enum batadv_forw_mode and in case of + * Return: the forwarding mode as enum batadv_forw_mode and in case of * BATADV_FORW_SINGLE set the orig to the single originator the skb * should be forwarded to. */ diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h index 8f3cb04b9f13..80bceec55592 100644 --- a/net/batman-adv/multicast.h +++ b/net/batman-adv/multicast.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2014-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2014-2016 B.A.T.M.A.N. contributors: * * Linus Lüssing * @@ -23,7 +23,7 @@ struct sk_buff; /** - * batadv_forw_mode - the way a packet should be forwarded as + * enum batadv_forw_mode - the way a packet should be forwarded as * @BATADV_FORW_ALL: forward the packet to all nodes (currently via classic * flooding) * @BATADV_FORW_SINGLE: forward the packet to a single node (currently via the diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index cc63b44f0d2e..a4eb8ee4abb1 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2012-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2012-2016 B.A.T.M.A.N. contributors: * * Martin Hundebøll, Jeppe Ledet-Pedersen * @@ -32,6 +32,7 @@ #include <linux/jhash.h> #include <linux/jiffies.h> #include <linux/kernel.h> +#include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> @@ -64,6 +65,8 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb, /** * batadv_nc_init - one-time initialization for network coding + * + * Return: 0 on success or negative error number in case of failure */ int __init batadv_nc_init(void) { @@ -142,6 +145,8 @@ static void batadv_nc_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, /** * batadv_nc_mesh_init - initialise coding hash table and start house keeping * @bat_priv: the bat priv with all the soft interface information + * + * Return: 0 on success or negative error number in case of failure */ int batadv_nc_mesh_init(struct batadv_priv *bat_priv) { @@ -205,34 +210,50 @@ void batadv_nc_init_orig(struct batadv_orig_node *orig_node) /** * batadv_nc_node_release - release nc_node from lists and queue for free after * rcu grace period - * @nc_node: the nc node to free + * @ref: kref pointer of the nc_node */ -static void batadv_nc_node_release(struct batadv_nc_node *nc_node) +static void batadv_nc_node_release(struct kref *ref) { + struct batadv_nc_node *nc_node; + + nc_node = container_of(ref, struct batadv_nc_node, refcount); + batadv_orig_node_free_ref(nc_node->orig_node); kfree_rcu(nc_node, rcu); } /** - * batadv_nc_node_free_ref - decrement the nc node refcounter and possibly + * batadv_nc_node_free_ref - decrement the nc_node refcounter and possibly * release it - * @nc_node: the nc node to free + * @nc_node: nc_node to be free'd */ static void batadv_nc_node_free_ref(struct batadv_nc_node *nc_node) { - if (atomic_dec_and_test(&nc_node->refcount)) - batadv_nc_node_release(nc_node); + kref_put(&nc_node->refcount, batadv_nc_node_release); } /** - * batadv_nc_path_free_ref - decrements the nc path refcounter and possibly - * frees it - * @nc_path: the nc node to free + * batadv_nc_path_release - release nc_path from lists and queue for free after + * rcu grace period + * @ref: kref pointer of the nc_path + */ +static void batadv_nc_path_release(struct kref *ref) +{ + struct batadv_nc_path *nc_path; + + nc_path = container_of(ref, struct batadv_nc_path, refcount); + + kfree_rcu(nc_path, rcu); +} + +/** + * batadv_nc_path_free_ref - decrement the nc_path refcounter and possibly + * release it + * @nc_path: nc_path to be free'd */ static void batadv_nc_path_free_ref(struct batadv_nc_path *nc_path) { - if (atomic_dec_and_test(&nc_path->refcount)) - kfree_rcu(nc_path, rcu); + kref_put(&nc_path->refcount, batadv_nc_path_release); } /** @@ -251,7 +272,7 @@ static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet) * @bat_priv: the bat priv with all the soft interface information * @nc_node: the nc node to check * - * Returns true if the entry has to be purged now, false otherwise + * Return: true if the entry has to be purged now, false otherwise */ static bool batadv_nc_to_purge_nc_node(struct batadv_priv *bat_priv, struct batadv_nc_node *nc_node) @@ -267,7 +288,7 @@ static bool batadv_nc_to_purge_nc_node(struct batadv_priv *bat_priv, * @bat_priv: the bat priv with all the soft interface information * @nc_path: the nc path to check * - * Returns true if the entry has to be purged now, false otherwise + * Return: true if the entry has to be purged now, false otherwise */ static bool batadv_nc_to_purge_nc_path_coding(struct batadv_priv *bat_priv, struct batadv_nc_path *nc_path) @@ -287,7 +308,7 @@ static bool batadv_nc_to_purge_nc_path_coding(struct batadv_priv *bat_priv, * @bat_priv: the bat priv with all the soft interface information * @nc_path: the nc path to check * - * Returns true if the entry has to be purged now, false otherwise + * Return: true if the entry has to be purged now, false otherwise */ static bool batadv_nc_to_purge_nc_path_decoding(struct batadv_priv *bat_priv, struct batadv_nc_path *nc_path) @@ -470,7 +491,7 @@ static void batadv_nc_hash_key_gen(struct batadv_nc_path *key, const char *src, * @data: data to hash * @size: size of the hash table * - * Returns the selected index in the hash table for the given data. + * Return: the selected index in the hash table for the given data. */ static u32 batadv_nc_hash_choose(const void *data, u32 size) { @@ -489,7 +510,7 @@ static u32 batadv_nc_hash_choose(const void *data, u32 size) * @node: node in the local table * @data2: second object to compare the node to * - * Returns 1 if the two entry are the same, 0 otherwise + * Return: 1 if the two entry are the same, 0 otherwise */ static int batadv_nc_hash_compare(const struct hlist_node *node, const void *data2) @@ -516,7 +537,7 @@ static int batadv_nc_hash_compare(const struct hlist_node *node, * @hash: hash table containing the nc path * @data: search key * - * Returns the nc_path if found, NULL otherwise. + * Return: the nc_path if found, NULL otherwise. */ static struct batadv_nc_path * batadv_nc_hash_find(struct batadv_hashtable *hash, @@ -537,7 +558,7 @@ batadv_nc_hash_find(struct batadv_hashtable *hash, if (!batadv_nc_hash_compare(&nc_path->hash_entry, data)) continue; - if (!atomic_inc_not_zero(&nc_path->refcount)) + if (!kref_get_unless_zero(&nc_path->refcount)) continue; nc_path_tmp = nc_path; @@ -571,7 +592,7 @@ static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet) * timeout. If so, the packet is no longer kept and the entry deleted from the * queue. Has to be called with the appropriate locks. * - * Returns false as soon as the entry in the fifo queue has not been timed out + * Return: false as soon as the entry in the fifo queue has not been timed out * yet and true otherwise. */ static bool batadv_nc_sniffed_purge(struct batadv_priv *bat_priv, @@ -610,7 +631,7 @@ out: * packet is no longer delayed, immediately sent and the entry deleted from the * queue. Has to be called with the appropriate locks. * - * Returns false as soon as the entry in the fifo queue has not been timed out + * Return: false as soon as the entry in the fifo queue has not been timed out * yet and true otherwise. */ static bool batadv_nc_fwd_flush(struct batadv_priv *bat_priv, @@ -731,7 +752,7 @@ static void batadv_nc_worker(struct work_struct *work) * @orig_node: neighboring orig node which may be used as nc candidate * @ogm_packet: incoming ogm packet also used for the checks * - * Returns true if: + * Return: true if: * 1) The OGM must have the most recent sequence number. * 2) The TTL must be decremented by one and only one. * 3) The OGM must be received from the first hop from orig_node. @@ -772,7 +793,7 @@ static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv, * (can be equal to orig_node) * @in_coding: traverse incoming or outgoing network coding list * - * Returns the nc_node if found, NULL otherwise. + * Return: the nc_node if found, NULL otherwise. */ static struct batadv_nc_node *batadv_nc_find_nc_node(struct batadv_orig_node *orig_node, @@ -793,7 +814,7 @@ static struct batadv_nc_node if (!batadv_compare_eth(nc_node->addr, orig_node->orig)) continue; - if (!atomic_inc_not_zero(&nc_node->refcount)) + if (!kref_get_unless_zero(&nc_node->refcount)) continue; /* Found a match */ @@ -814,7 +835,7 @@ static struct batadv_nc_node * (can be equal to orig_node) * @in_coding: traverse incoming or outgoing network coding list * - * Returns the nc_node if found or created, NULL in case of an error. + * Return: the nc_node if found or created, NULL in case of an error. */ static struct batadv_nc_node *batadv_nc_get_nc_node(struct batadv_priv *bat_priv, @@ -837,14 +858,15 @@ static struct batadv_nc_node if (!nc_node) return NULL; - if (!atomic_inc_not_zero(&orig_neigh_node->refcount)) + if (!kref_get_unless_zero(&orig_neigh_node->refcount)) goto free; /* Initialize nc_node */ INIT_LIST_HEAD(&nc_node->list); ether_addr_copy(nc_node->addr, orig_node->orig); nc_node->orig_node = orig_neigh_node; - atomic_set(&nc_node->refcount, 2); + kref_init(&nc_node->refcount); + kref_get(&nc_node->refcount); /* Select ingoing or outgoing coding node */ if (in_coding) { @@ -932,7 +954,7 @@ out: * @src: ethernet source address - first half of the nc path search key * @dst: ethernet destination address - second half of the nc path search key * - * Returns pointer to nc_path if the path was found or created, returns NULL + * Return: pointer to nc_path if the path was found or created, returns NULL * on error. */ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv, @@ -963,7 +985,8 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv, /* Initialize nc_path */ INIT_LIST_HEAD(&nc_path->packet_list); spin_lock_init(&nc_path->packet_list_lock); - atomic_set(&nc_path->refcount, 2); + kref_init(&nc_path->refcount); + kref_get(&nc_path->refcount); nc_path->last_valid = jiffies; ether_addr_copy(nc_path->next_hop, dst); ether_addr_copy(nc_path->prev_hop, src); @@ -989,6 +1012,8 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv, * batadv_nc_random_weight_tq - scale the receivers TQ-value to avoid unfair * selection of a receiver with slightly lower TQ than the other * @tq: to be weighted tq value + * + * Return: scaled tq value */ static u8 batadv_nc_random_weight_tq(u8 tq) { @@ -1029,7 +1054,7 @@ static void batadv_nc_memxor(char *dst, const char *src, unsigned int len) * @nc_packet: structure containing the packet to the skb can be coded with * @neigh_node: next hop to forward packet to * - * Returns true if both packets are consumed, false otherwise. + * Return: true if both packets are consumed, false otherwise. */ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, struct sk_buff *skb, @@ -1228,7 +1253,7 @@ out: * Since the source encoded the packet we can be certain it has all necessary * decode information. * - * Returns true if coding of a decoded packet is allowed. + * Return: true if coding of a decoded packet is allowed. */ static bool batadv_nc_skb_coding_possible(struct sk_buff *skb, u8 *dst, u8 *src) { @@ -1246,7 +1271,7 @@ static bool batadv_nc_skb_coding_possible(struct sk_buff *skb, u8 *dst, u8 *src) * @skb: data skb to forward * @eth_dst: next hop mac address of skb * - * Returns true if coding of a decoded skb is allowed. + * Return: true if coding of a decoded skb is allowed. */ static struct batadv_nc_packet * batadv_nc_path_search(struct batadv_priv *bat_priv, @@ -1314,7 +1339,7 @@ batadv_nc_path_search(struct batadv_priv *bat_priv, * @eth_src: source mac address of skb * @in_nc_node: pointer to skb next hop's neighbor nc node * - * Returns an nc packet if a suitable coding packet was found, NULL otherwise. + * Return: an nc packet if a suitable coding packet was found, NULL otherwise. */ static struct batadv_nc_packet * batadv_nc_skb_src_search(struct batadv_priv *bat_priv, @@ -1397,7 +1422,7 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv, * next hop that potentially sent a packet which our next hop also received * (overheard) and has stored for later decoding. * - * Returns true if the skb was consumed (encoded packet sent) or false otherwise + * Return: true if the skb was consumed (encoded packet sent) or false otherwise */ static bool batadv_nc_skb_dst_search(struct sk_buff *skb, struct batadv_neigh_node *neigh_node, @@ -1451,7 +1476,7 @@ static bool batadv_nc_skb_dst_search(struct sk_buff *skb, * @neigh_node: next hop to forward packet to * @packet_id: checksum to identify packet * - * Returns true if the packet was buffered or false in case of an error. + * Return: true if the packet was buffered or false in case of an error. */ static bool batadv_nc_skb_add_to_path(struct sk_buff *skb, struct batadv_nc_path *nc_path, @@ -1485,7 +1510,7 @@ static bool batadv_nc_skb_add_to_path(struct sk_buff *skb, * @skb: data skb to forward * @neigh_node: next hop to forward packet to * - * Returns true if the skb was consumed (encoded packet sent) or false otherwise + * Return: true if the skb was consumed (encoded packet sent) or false otherwise */ bool batadv_nc_skb_forward(struct sk_buff *skb, struct batadv_neigh_node *neigh_node) @@ -1624,7 +1649,7 @@ void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, * @skb: unicast skb to decode * @nc_packet: decode data needed to decode the skb * - * Returns pointer to decoded unicast packet if the packet was decoded or NULL + * Return: pointer to decoded unicast packet if the packet was decoded or NULL * in case of an error. */ static struct batadv_unicast_packet * @@ -1718,7 +1743,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, * @ethhdr: pointer to the ethernet header inside the coded packet * @coded: coded packet we try to find decode data for * - * Returns pointer to nc packet if the needed data was found or NULL otherwise. + * Return: pointer to nc packet if the needed data was found or NULL otherwise. */ static struct batadv_nc_packet * batadv_nc_find_decoding_packet(struct batadv_priv *bat_priv, @@ -1781,6 +1806,9 @@ batadv_nc_find_decoding_packet(struct batadv_priv *bat_priv, * resulting unicast packet * @skb: incoming coded packet * @recv_if: pointer to interface this packet was received on + * + * Return: NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP + * otherwise. */ static int batadv_nc_recv_coded_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) @@ -1865,6 +1893,8 @@ void batadv_nc_mesh_free(struct batadv_priv *bat_priv) * batadv_nc_nodes_seq_print_text - print the nc node information * @seq: seq file to print on * @offset: not used + * + * Return: always 0 */ int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset) { @@ -1927,6 +1957,8 @@ out: /** * batadv_nc_init_debugfs - create nc folder and related files in debugfs * @bat_priv: the bat priv with all the soft interface information + * + * Return: 0 on success or negative error number in case of failure */ int batadv_nc_init_debugfs(struct batadv_priv *bat_priv) { diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h index 8f6d4ad8778a..d6d7fb4ec5d5 100644 --- a/net/batman-adv/network-coding.h +++ b/net/batman-adv/network-coding.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2012-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2012-2016 B.A.T.M.A.N. contributors: * * Martin Hundebøll, Jeppe Ledet-Pedersen * diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index fe578f75c391..eacd0e5a0238 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2016 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -18,11 +18,13 @@ #include "originator.h" #include "main.h" +#include <linux/atomic.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/fs.h> #include <linux/jiffies.h> #include <linux/kernel.h> +#include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> @@ -47,7 +49,13 @@ static struct lock_class_key batadv_orig_hash_lock_class_key; static void batadv_purge_orig(struct work_struct *work); -/* returns 1 if they are the same originator */ +/** + * batadv_compare_orig - comparing function used in the originator hash table + * @node: node in the local table + * @data2: second object to compare the node to + * + * Return: 1 if they are the same originator + */ int batadv_compare_orig(const struct hlist_node *node, const void *data2) { const void *data1 = container_of(node, struct batadv_orig_node, @@ -61,7 +69,7 @@ int batadv_compare_orig(const struct hlist_node *node, const void *data2) * @orig_node: the originator serving the VLAN * @vid: the VLAN identifier * - * Returns the vlan object identified by vid and belonging to orig_node or NULL + * Return: the vlan object identified by vid and belonging to orig_node or NULL * if it does not exist. */ struct batadv_orig_node_vlan * @@ -75,7 +83,7 @@ batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node, if (tmp->vid != vid) continue; - if (!atomic_inc_not_zero(&tmp->refcount)) + if (!kref_get_unless_zero(&tmp->refcount)) continue; vlan = tmp; @@ -93,7 +101,7 @@ batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node, * @orig_node: the originator serving the VLAN * @vid: the VLAN identifier * - * Returns NULL in case of failure or the vlan object identified by vid and + * Return: NULL in case of failure or the vlan object identified by vid and * belonging to orig_node otherwise. The object is created and added to the list * if it does not exist. * @@ -116,7 +124,8 @@ batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node, if (!vlan) goto out; - atomic_set(&vlan->refcount, 2); + kref_init(&vlan->refcount); + kref_get(&vlan->refcount); vlan->vid = vid; hlist_add_head_rcu(&vlan->list, &orig_node->vlan_list); @@ -128,14 +137,27 @@ out: } /** - * batadv_orig_node_vlan_free_ref - decrement the refcounter and possibly free - * the originator-vlan object + * batadv_orig_node_vlan_release - release originator-vlan object from lists + * and queue for free after rcu grace period + * @ref: kref pointer of the originator-vlan object + */ +static void batadv_orig_node_vlan_release(struct kref *ref) +{ + struct batadv_orig_node_vlan *orig_vlan; + + orig_vlan = container_of(ref, struct batadv_orig_node_vlan, refcount); + + kfree_rcu(orig_vlan, rcu); +} + +/** + * batadv_orig_node_vlan_free_ref - decrement the refcounter and possibly + * release the originator-vlan object * @orig_vlan: the originator-vlan object to release */ void batadv_orig_node_vlan_free_ref(struct batadv_orig_node_vlan *orig_vlan) { - if (atomic_dec_and_test(&orig_vlan->refcount)) - kfree_rcu(orig_vlan, rcu); + kref_put(&orig_vlan->refcount, batadv_orig_node_vlan_release); } int batadv_originator_init(struct batadv_priv *bat_priv) @@ -165,11 +187,14 @@ err: /** * batadv_neigh_ifinfo_release - release neigh_ifinfo from lists and queue for * free after rcu grace period - * @neigh_ifinfo: the neigh_ifinfo object to release + * @ref: kref pointer of the neigh_ifinfo */ -static void -batadv_neigh_ifinfo_release(struct batadv_neigh_ifinfo *neigh_ifinfo) +static void batadv_neigh_ifinfo_release(struct kref *ref) { + struct batadv_neigh_ifinfo *neigh_ifinfo; + + neigh_ifinfo = container_of(ref, struct batadv_neigh_ifinfo, refcount); + if (neigh_ifinfo->if_outgoing != BATADV_IF_DEFAULT) batadv_hardif_free_ref(neigh_ifinfo->if_outgoing); @@ -183,18 +208,21 @@ batadv_neigh_ifinfo_release(struct batadv_neigh_ifinfo *neigh_ifinfo) */ void batadv_neigh_ifinfo_free_ref(struct batadv_neigh_ifinfo *neigh_ifinfo) { - if (atomic_dec_and_test(&neigh_ifinfo->refcount)) - batadv_neigh_ifinfo_release(neigh_ifinfo); + kref_put(&neigh_ifinfo->refcount, batadv_neigh_ifinfo_release); } /** * batadv_hardif_neigh_release - release hardif neigh node from lists and * queue for free after rcu grace period - * @hardif_neigh: hardif neigh neighbor to free + * @ref: kref pointer of the neigh_node */ -static void -batadv_hardif_neigh_release(struct batadv_hardif_neigh_node *hardif_neigh) +static void batadv_hardif_neigh_release(struct kref *ref) { + struct batadv_hardif_neigh_node *hardif_neigh; + + hardif_neigh = container_of(ref, struct batadv_hardif_neigh_node, + refcount); + spin_lock_bh(&hardif_neigh->if_incoming->neigh_list_lock); hlist_del_init_rcu(&hardif_neigh->list); spin_unlock_bh(&hardif_neigh->if_incoming->neigh_list_lock); @@ -210,22 +238,23 @@ batadv_hardif_neigh_release(struct batadv_hardif_neigh_node *hardif_neigh) */ void batadv_hardif_neigh_free_ref(struct batadv_hardif_neigh_node *hardif_neigh) { - if (atomic_dec_and_test(&hardif_neigh->refcount)) - batadv_hardif_neigh_release(hardif_neigh); + kref_put(&hardif_neigh->refcount, batadv_hardif_neigh_release); } /** * batadv_neigh_node_release - release neigh_node from lists and queue for * free after rcu grace period - * @neigh_node: neigh neighbor to free + * @ref: kref pointer of the neigh_node */ -static void batadv_neigh_node_release(struct batadv_neigh_node *neigh_node) +static void batadv_neigh_node_release(struct kref *ref) { struct hlist_node *node_tmp; + struct batadv_neigh_node *neigh_node; struct batadv_hardif_neigh_node *hardif_neigh; struct batadv_neigh_ifinfo *neigh_ifinfo; struct batadv_algo_ops *bao; + neigh_node = container_of(ref, struct batadv_neigh_node, refcount); bao = neigh_node->orig_node->bat_priv->bat_algo_ops; hlist_for_each_entry_safe(neigh_ifinfo, node_tmp, @@ -250,14 +279,13 @@ static void batadv_neigh_node_release(struct batadv_neigh_node *neigh_node) } /** - * batadv_neigh_node_free_ref - decrement the neighbors refcounter - * and possibly release it + * batadv_neigh_node_free_ref - decrement the neighbors refcounter and possibly + * release it * @neigh_node: neigh neighbor to free */ void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node) { - if (atomic_dec_and_test(&neigh_node->refcount)) - batadv_neigh_node_release(neigh_node); + kref_put(&neigh_node->refcount, batadv_neigh_node_release); } /** @@ -266,7 +294,7 @@ void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node) * @if_outgoing: the interface where the payload packet has been received or * the OGM should be sent to * - * Returns the neighbor which should be router for this orig_node/iface. + * Return: the neighbor which should be router for this orig_node/iface. * * The object is returned with refcounter increased by 1. */ @@ -286,7 +314,7 @@ batadv_orig_router_get(struct batadv_orig_node *orig_node, break; } - if (router && !atomic_inc_not_zero(&router->refcount)) + if (router && !kref_get_unless_zero(&router->refcount)) router = NULL; rcu_read_unlock(); @@ -298,7 +326,7 @@ batadv_orig_router_get(struct batadv_orig_node *orig_node, * @orig_node: the orig node to be queried * @if_outgoing: the interface for which the ifinfo should be acquired * - * Returns the requested orig_ifinfo or NULL if not found. + * Return: the requested orig_ifinfo or NULL if not found. * * The object is returned with refcounter increased by 1. */ @@ -314,7 +342,7 @@ batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node, if (tmp->if_outgoing != if_outgoing) continue; - if (!atomic_inc_not_zero(&tmp->refcount)) + if (!kref_get_unless_zero(&tmp->refcount)) continue; orig_ifinfo = tmp; @@ -330,7 +358,7 @@ batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node, * @orig_node: the orig node to be queried * @if_outgoing: the interface for which the ifinfo should be acquired * - * Returns NULL in case of failure or the orig_ifinfo object for the if_outgoing + * Return: NULL in case of failure or the orig_ifinfo object for the if_outgoing * interface otherwise. The object is created and added to the list * if it does not exist. * @@ -354,7 +382,7 @@ batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node, goto out; if (if_outgoing != BATADV_IF_DEFAULT && - !atomic_inc_not_zero(&if_outgoing->refcount)) { + !kref_get_unless_zero(&if_outgoing->refcount)) { kfree(orig_ifinfo); orig_ifinfo = NULL; goto out; @@ -365,7 +393,8 @@ batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node, orig_ifinfo->batman_seqno_reset = reset_time; orig_ifinfo->if_outgoing = if_outgoing; INIT_HLIST_NODE(&orig_ifinfo->list); - atomic_set(&orig_ifinfo->refcount, 2); + kref_init(&orig_ifinfo->refcount); + kref_get(&orig_ifinfo->refcount); hlist_add_head_rcu(&orig_ifinfo->list, &orig_node->ifinfo_list); out: @@ -375,12 +404,12 @@ out: /** * batadv_neigh_ifinfo_get - find the ifinfo from an neigh_node - * @neigh_node: the neigh node to be queried + * @neigh: the neigh node to be queried * @if_outgoing: the interface for which the ifinfo should be acquired * * The object is returned with refcounter increased by 1. * - * Returns the requested neigh_ifinfo or NULL if not found + * Return: the requested neigh_ifinfo or NULL if not found */ struct batadv_neigh_ifinfo * batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh, @@ -395,7 +424,7 @@ batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh, if (tmp_neigh_ifinfo->if_outgoing != if_outgoing) continue; - if (!atomic_inc_not_zero(&tmp_neigh_ifinfo->refcount)) + if (!kref_get_unless_zero(&tmp_neigh_ifinfo->refcount)) continue; neigh_ifinfo = tmp_neigh_ifinfo; @@ -408,10 +437,10 @@ batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh, /** * batadv_neigh_ifinfo_new - search and possibly create an neigh_ifinfo object - * @neigh_node: the neigh node to be queried + * @neigh: the neigh node to be queried * @if_outgoing: the interface for which the ifinfo should be acquired * - * Returns NULL in case of failure or the neigh_ifinfo object for the + * Return: NULL in case of failure or the neigh_ifinfo object for the * if_outgoing interface otherwise. The object is created and added to the list * if it does not exist. * @@ -433,14 +462,15 @@ batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh, if (!neigh_ifinfo) goto out; - if (if_outgoing && !atomic_inc_not_zero(&if_outgoing->refcount)) { + if (if_outgoing && !kref_get_unless_zero(&if_outgoing->refcount)) { kfree(neigh_ifinfo); neigh_ifinfo = NULL; goto out; } INIT_HLIST_NODE(&neigh_ifinfo->list); - atomic_set(&neigh_ifinfo->refcount, 2); + kref_init(&neigh_ifinfo->refcount); + kref_get(&neigh_ifinfo->refcount); neigh_ifinfo->if_outgoing = if_outgoing; hlist_add_head_rcu(&neigh_ifinfo->list, &neigh->ifinfo_list); @@ -459,7 +489,8 @@ out: * * Looks for and possibly returns a neighbour belonging to this originator list * which is connected through the provided hard interface. - * Returns NULL if the neighbour is not found. + * + * Return: neighbor when found. Othwerwise NULL */ static struct batadv_neigh_node * batadv_neigh_node_get(const struct batadv_orig_node *orig_node, @@ -476,7 +507,7 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node, if (tmp_neigh_node->if_incoming != hard_iface) continue; - if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) + if (!kref_get_unless_zero(&tmp_neigh_node->refcount)) continue; res = tmp_neigh_node; @@ -492,7 +523,7 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node, * @hard_iface: the interface this neighbour is connected to * @neigh_addr: the interface address of the neighbour to retrieve * - * Returns the hardif neighbour node if found or created or NULL otherwise. + * Return: the hardif neighbour node if found or created or NULL otherwise. */ static struct batadv_hardif_neigh_node * batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface, @@ -508,7 +539,7 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface, if (hardif_neigh) goto out; - if (!atomic_inc_not_zero(&hard_iface->refcount)) + if (!kref_get_unless_zero(&hard_iface->refcount)) goto out; hardif_neigh = kzalloc(sizeof(*hardif_neigh), GFP_ATOMIC); @@ -522,7 +553,7 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface, hardif_neigh->if_incoming = hard_iface; hardif_neigh->last_seen = jiffies; - atomic_set(&hardif_neigh->refcount, 1); + kref_init(&hardif_neigh->refcount); if (bat_priv->bat_algo_ops->bat_hardif_neigh_init) bat_priv->bat_algo_ops->bat_hardif_neigh_init(hardif_neigh); @@ -540,7 +571,7 @@ out: * @hard_iface: the interface this neighbour is connected to * @neigh_addr: the interface address of the neighbour to retrieve * - * Returns the hardif neighbour node if found or created or NULL otherwise. + * Return: the hardif neighbour node if found or created or NULL otherwise. */ static struct batadv_hardif_neigh_node * batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface, @@ -562,7 +593,8 @@ batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface, * @neigh_addr: the address of the neighbour * * Looks for and possibly returns a neighbour belonging to this hard interface. - * Returns NULL if the neighbour is not found. + * + * Return: neighbor when found. Othwerwise NULL */ struct batadv_hardif_neigh_node * batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface, @@ -576,7 +608,7 @@ batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface, if (!batadv_compare_eth(tmp_hardif_neigh->addr, neigh_addr)) continue; - if (!atomic_inc_not_zero(&tmp_hardif_neigh->refcount)) + if (!kref_get_unless_zero(&tmp_hardif_neigh->refcount)) continue; hardif_neigh = tmp_hardif_neigh; @@ -594,7 +626,8 @@ batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface, * @neigh_addr: the mac address of the neighbour interface * * Allocates a new neigh_node object and initialises all the generic fields. - * Returns the new object or NULL on failure. + * + * Return: neighbor when found. Othwerwise NULL */ struct batadv_neigh_node * batadv_neigh_node_new(struct batadv_orig_node *orig_node, @@ -617,7 +650,7 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node, if (!neigh_node) goto out; - if (!atomic_inc_not_zero(&hard_iface->refcount)) { + if (!kref_get_unless_zero(&hard_iface->refcount)) { kfree(neigh_node); neigh_node = NULL; goto out; @@ -632,14 +665,15 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node, neigh_node->orig_node = orig_node; /* extra reference for return */ - atomic_set(&neigh_node->refcount, 2); + kref_init(&neigh_node->refcount); + kref_get(&neigh_node->refcount); spin_lock_bh(&orig_node->neigh_list_lock); hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); spin_unlock_bh(&orig_node->neigh_list_lock); /* increment unique neighbor refcount */ - atomic_inc(&hardif_neigh->refcount); + kref_get(&hardif_neigh->refcount); batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv, "Creating new neighbor %pM for orig_node %pM on interface %s\n", @@ -656,7 +690,7 @@ out: * @seq: neighbour table seq_file struct * @offset: not used * - * Always returns 0. + * Return: always 0 */ int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset) { @@ -688,12 +722,15 @@ int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset) /** * batadv_orig_ifinfo_release - release orig_ifinfo from lists and queue for * free after rcu grace period - * @orig_ifinfo: the orig_ifinfo object to release + * @ref: kref pointer of the orig_ifinfo */ -static void batadv_orig_ifinfo_release(struct batadv_orig_ifinfo *orig_ifinfo) +static void batadv_orig_ifinfo_release(struct kref *ref) { + struct batadv_orig_ifinfo *orig_ifinfo; struct batadv_neigh_node *router; + orig_ifinfo = container_of(ref, struct batadv_orig_ifinfo, refcount); + if (orig_ifinfo->if_outgoing != BATADV_IF_DEFAULT) batadv_hardif_free_ref(orig_ifinfo->if_outgoing); @@ -712,8 +749,7 @@ static void batadv_orig_ifinfo_release(struct batadv_orig_ifinfo *orig_ifinfo) */ void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo) { - if (atomic_dec_and_test(&orig_ifinfo->refcount)) - batadv_orig_ifinfo_release(orig_ifinfo); + kref_put(&orig_ifinfo->refcount, batadv_orig_ifinfo_release); } /** @@ -740,14 +776,17 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu) /** * batadv_orig_node_release - release orig_node from lists and queue for * free after rcu grace period - * @orig_node: the orig node to free + * @ref: kref pointer of the orig_node */ -static void batadv_orig_node_release(struct batadv_orig_node *orig_node) +static void batadv_orig_node_release(struct kref *ref) { struct hlist_node *node_tmp; struct batadv_neigh_node *neigh_node; + struct batadv_orig_node *orig_node; struct batadv_orig_ifinfo *orig_ifinfo; + orig_node = container_of(ref, struct batadv_orig_node, refcount); + spin_lock_bh(&orig_node->neigh_list_lock); /* for all neighbors towards this originator ... */ @@ -777,8 +816,7 @@ static void batadv_orig_node_release(struct batadv_orig_node *orig_node) */ void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node) { - if (atomic_dec_and_test(&orig_node->refcount)) - batadv_orig_node_release(orig_node); + kref_put(&orig_node->refcount, batadv_orig_node_release); } void batadv_originator_free(struct batadv_priv *bat_priv) @@ -820,7 +858,8 @@ void batadv_originator_free(struct batadv_priv *bat_priv) * * Creates a new originator object and initialise all the generic fields. * The new object is not added to the originator list. - * Returns the newly created object or NULL on failure. + * + * Return: the newly created object or NULL on failure. */ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, const u8 *addr) @@ -849,7 +888,8 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, batadv_nc_init_orig(orig_node); /* extra reference for return */ - atomic_set(&orig_node->refcount, 2); + kref_init(&orig_node->refcount); + kref_get(&orig_node->refcount); orig_node->bat_priv = bat_priv; ether_addr_copy(orig_node->orig, addr); @@ -937,7 +977,7 @@ batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv, * @bat_priv: the bat priv with all the soft interface information * @orig_node: orig node which is to be checked * - * Returns true if any ifinfo entry was purged, false otherwise. + * Return: true if any ifinfo entry was purged, false otherwise. */ static bool batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv, @@ -989,7 +1029,7 @@ batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv, * @bat_priv: the bat priv with all the soft interface information * @orig_node: orig node which is to be checked * - * Returns true if any neighbor was purged, false otherwise + * Return: true if any neighbor was purged, false otherwise */ static bool batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, @@ -1048,7 +1088,7 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, * @orig_node: orig node which is to be checked * @if_outgoing: the interface for which the metric should be compared * - * Returns the current best neighbor, with refcount increased. + * Return: the current best neighbor, with refcount increased. */ static struct batadv_neigh_node * batadv_find_best_neighbor(struct batadv_priv *bat_priv, @@ -1064,7 +1104,7 @@ batadv_find_best_neighbor(struct batadv_priv *bat_priv, best, if_outgoing) <= 0)) continue; - if (!atomic_inc_not_zero(&neigh->refcount)) + if (!kref_get_unless_zero(&neigh->refcount)) continue; if (best) @@ -1085,7 +1125,7 @@ batadv_find_best_neighbor(struct batadv_priv *bat_priv, * This function checks if the orig_node or substructures of it have become * obsolete, and purges this information if that's the case. * - * Returns true if the orig_node is to be removed, false otherwise. + * Return: true if the orig_node is to be removed, false otherwise. */ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node) @@ -1230,7 +1270,7 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset) * @seq: debugfs table seq_file struct * @offset: not used * - * Returns 0 + * Return: 0 */ int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset) { diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index cf0730414ed2..99507408f4cf 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -20,10 +20,10 @@ #include "main.h" -#include <linux/atomic.h> #include <linux/compiler.h> #include <linux/if_ether.h> #include <linux/jhash.h> +#include <linux/kref.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/stddef.h> @@ -115,7 +115,7 @@ batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data) if (!batadv_compare_eth(orig_node, data)) continue; - if (!atomic_inc_not_zero(&orig_node->refcount)) + if (!kref_get_unless_zero(&orig_node->refcount)) continue; orig_node_tmp = orig_node; diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 0558e3237e0e..e7f915181aba 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -158,7 +158,7 @@ enum batadv_tt_client_flags { }; /** - * batadv_vlan_flags - flags for the four MSB of any vlan ID field + * enum batadv_vlan_flags - flags for the four MSB of any vlan ID field * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not */ enum batadv_vlan_flags { @@ -209,6 +209,11 @@ struct batadv_bla_claim_dst { * @version: batman-adv protocol version, part of the genereal header * @ttl: time to live for this packet, part of the genereal header * @flags: contains routing relevant flags - see enum batadv_iv_flags + * @seqno: sequence identification + * @orig: address of the source node + * @prev_sender: address of the previous sender + * @reserved: reserved byte for alignment + * @tq: transmission quality * @tvlv_len: length of tvlv data following the ogm header */ struct batadv_ogm_packet { @@ -230,7 +235,7 @@ struct batadv_ogm_packet { #define BATADV_OGM_HLEN sizeof(struct batadv_ogm_packet) /** - * batadv_icmp_header - common members among all the ICMP packets + * struct batadv_icmp_header - common members among all the ICMP packets * @packet_type: batman-adv packet type, part of the general header * @version: batman-adv protocol version, part of the genereal header * @ttl: time to live for this packet, part of the genereal header @@ -256,7 +261,7 @@ struct batadv_icmp_header { }; /** - * batadv_icmp_packet - ICMP packet + * struct batadv_icmp_packet - ICMP packet * @packet_type: batman-adv packet type, part of the general header * @version: batman-adv protocol version, part of the genereal header * @ttl: time to live for this packet, part of the genereal header @@ -282,7 +287,7 @@ struct batadv_icmp_packet { #define BATADV_RR_LEN 16 /** - * batadv_icmp_packet_rr - ICMP RouteRecord packet + * struct batadv_icmp_packet_rr - ICMP RouteRecord packet * @packet_type: batman-adv packet type, part of the general header * @version: batman-adv protocol version, part of the genereal header * @ttl: time to live for this packet, part of the genereal header @@ -345,6 +350,7 @@ struct batadv_unicast_packet { * @u: common unicast packet header * @src: address of the source * @subtype: packet subtype + * @reserved: reserved byte for alignment */ struct batadv_unicast_4addr_packet { struct batadv_unicast_packet u; @@ -413,7 +419,6 @@ struct batadv_bcast_packet { * @packet_type: batman-adv packet type, part of the general header * @version: batman-adv protocol version, part of the genereal header * @ttl: time to live for this packet, part of the genereal header - * @reserved: Align following fields to 2-byte boundaries * @first_source: original source of first included packet * @first_orig_dest: original destinal of first included packet * @first_crc: checksum of first included packet @@ -495,7 +500,7 @@ struct batadv_tvlv_gateway_data { * struct batadv_tvlv_tt_data - tt data propagated through the tt tvlv container * @flags: translation table flags (see batadv_tt_data_flags) * @ttvn: translation table version number - * @vlan_num: number of announced VLANs. In the TVLV this struct is followed by + * @num_vlan: number of announced VLANs. In the TVLV this struct is followed by * one batadv_tvlv_tt_vlan_data object per announced vlan */ struct batadv_tvlv_tt_data { diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index e4f2646d9246..205310b56c2b 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -25,6 +25,7 @@ #include <linux/etherdevice.h> #include <linux/if_ether.h> #include <linux/jiffies.h> +#include <linux/kref.h> #include <linux/netdevice.h> #include <linux/printk.h> #include <linux/rculist.h> @@ -72,7 +73,7 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, rcu_read_lock(); curr_router = rcu_dereference(orig_ifinfo->router); - if (curr_router && !atomic_inc_not_zero(&curr_router->refcount)) + if (curr_router && !kref_get_unless_zero(&curr_router->refcount)) curr_router = NULL; rcu_read_unlock(); @@ -100,7 +101,7 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, batadv_neigh_node_free_ref(curr_router); /* increase refcount of new best neighbor */ - if (neigh_node && !atomic_inc_not_zero(&neigh_node->refcount)) + if (neigh_node && !kref_get_unless_zero(&neigh_node->refcount)) neigh_node = NULL; spin_lock_bh(&orig_node->neigh_list_lock); @@ -140,21 +141,35 @@ out: batadv_neigh_node_free_ref(router); } -/* checks whether the host restarted and is in the protection time. - * returns: - * 0 if the packet is to be accepted +/** + * batadv_window_protected - checks whether the host restarted and is in the + * protection time. + * @bat_priv: the bat priv with all the soft interface information + * @seq_num_diff: difference between the current/received sequence number and + * the last sequence number + * @seq_old_max_diff: maximum age of sequence number not considered as restart + * @last_reset: jiffies timestamp of the last reset, will be updated when reset + * is detected + * @protection_started: is set to true if the protection window was started, + * doesn't change otherwise. + * + * Return: + * 0 if the packet is to be accepted. * 1 if the packet is to be ignored. */ int batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff, - unsigned long *last_reset) + s32 seq_old_max_diff, unsigned long *last_reset, + bool *protection_started) { - if (seq_num_diff <= -BATADV_TQ_LOCAL_WINDOW_SIZE || + if (seq_num_diff <= -seq_old_max_diff || seq_num_diff >= BATADV_EXPECTED_SEQNO_RANGE) { if (!batadv_has_timed_out(*last_reset, BATADV_RESET_PROTECTION_MS)) return 1; *last_reset = jiffies; + if (protection_started) + *protection_started = true; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "old packet received, start protection\n"); } @@ -198,7 +213,7 @@ bool batadv_check_management_packet(struct sk_buff *skb, * @bat_priv: the bat priv with all the soft interface information * @skb: icmp packet to process * - * Returns NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP + * Return: NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP * otherwise. */ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv, @@ -398,10 +413,11 @@ out: * @skb: packet to check * @hdr_size: size of header to pull * - * Check for short header and bad addresses in given packet. Returns negative - * value when check fails and 0 otherwise. The negative value depends on the - * reason: -ENODATA for bad header, -EBADR for broadcast destination or source, - * and -EREMOTE for non-local (other host) destination. + * Check for short header and bad addresses in given packet. + * + * Return: negative value when check fails and 0 otherwise. The negative value + * depends on the reason: -ENODATA for bad header, -EBADR for broadcast + * destination or source, and -EREMOTE for non-local (other host) destination. */ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, int hdr_size) @@ -435,7 +451,7 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv, * @orig_node: the destination node * @recv_if: pointer to interface this packet was received on * - * Returns the router which should be used for this orig_node on + * Return: the router which should be used for this orig_node on * this interface, or NULL if not available. */ struct batadv_neigh_node * @@ -482,14 +498,14 @@ batadv_find_router(struct batadv_priv *bat_priv, hlist_for_each_entry_rcu(cand, &orig_node->ifinfo_list, list) { /* acquire some structures and references ... */ - if (!atomic_inc_not_zero(&cand->refcount)) + if (!kref_get_unless_zero(&cand->refcount)) continue; cand_router = rcu_dereference(cand->router); if (!cand_router) goto next; - if (!atomic_inc_not_zero(&cand_router->refcount)) { + if (!kref_get_unless_zero(&cand_router->refcount)) { cand_router = NULL; goto next; } @@ -508,8 +524,8 @@ batadv_find_router(struct batadv_priv *bat_priv, /* mark the first possible candidate */ if (!first_candidate) { - atomic_inc(&cand_router->refcount); - atomic_inc(&cand->refcount); + kref_get(&cand_router->refcount); + kref_get(&cand->refcount); first_candidate = cand; first_candidate_router = cand_router; } @@ -648,7 +664,7 @@ out: * the new corresponding information (originator address where the destination * client currently is and its known TTVN) * - * Returns true if the packet header has been updated, false otherwise + * Return: true if the packet header has been updated, false otherwise */ static bool batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, @@ -805,7 +821,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, * @skb: unicast tvlv packet to process * @recv_if: pointer to interface this packet was received on * - * Returns NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP + * Return: NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP * otherwise. */ int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb, @@ -904,9 +920,8 @@ rx_success: * batadv_recv_unicast_tvlv - receive and process unicast tvlv packets * @skb: unicast tvlv packet to process * @recv_if: pointer to interface this packet was received on - * @dst_addr: the payload destination * - * Returns NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP + * Return: NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP * otherwise. */ int batadv_recv_unicast_tvlv(struct sk_buff *skb, @@ -960,7 +975,7 @@ int batadv_recv_unicast_tvlv(struct sk_buff *skb, * the assembled packet will exceed our MTU; 2) Buffer fragment, if we till * lack further fragments; 3) Merge fragments, if we have all needed parts. * - * Return NET_RX_DROP if the skb is not consumed, NET_RX_SUCCESS otherwise. + * Return: NET_RX_DROP if the skb is not consumed, NET_RX_SUCCESS otherwise. */ int batadv_recv_frag_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) @@ -1065,7 +1080,8 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, /* check whether the packet is old and the host just restarted. */ if (batadv_window_protected(bat_priv, seq_diff, - &orig_node->bcast_seqno_reset)) + BATADV_BCAST_MAX_AGE, + &orig_node->bcast_seqno_reset, NULL)) goto spin_unlock; /* mark broadcast in flood history, update window position diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index 204bbe4952a6..02a5caa84127 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -52,6 +52,7 @@ batadv_find_router(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_hard_iface *recv_if); int batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff, - unsigned long *last_reset); + s32 seq_old_max_diff, unsigned long *last_reset, + bool *protection_started); #endif /* _NET_BATMAN_ADV_ROUTING_H_ */ diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 782fa33ec296..d8b03fd604e0 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -111,7 +111,7 @@ send_skb_err: * host, NULL can be passed as recv_if and no interface alternating is * attempted. * - * Returns NET_XMIT_SUCCESS on success, NET_XMIT_DROP on failure, or + * Return: NET_XMIT_SUCCESS on success, NET_XMIT_DROP on failure, or * NET_XMIT_POLICED if the skb is buffered for later transmit. */ int batadv_send_skb_to_orig(struct sk_buff *skb, @@ -165,7 +165,7 @@ out: * @hdr_size: amount of bytes to push at the beginning of the skb * @orig_node: the destination node * - * Returns false if the buffer extension was not possible or true otherwise. + * Return: false if the buffer extension was not possible or true otherwise. */ static bool batadv_send_skb_push_fill_unicast(struct sk_buff *skb, int hdr_size, @@ -196,7 +196,7 @@ batadv_send_skb_push_fill_unicast(struct sk_buff *skb, int hdr_size, * @skb: the skb containing the payload to encapsulate * @orig_node: the destination node * - * Returns false if the payload could not be encapsulated or true otherwise. + * Return: false if the payload could not be encapsulated or true otherwise. */ static bool batadv_send_skb_prepare_unicast(struct sk_buff *skb, struct batadv_orig_node *orig_node) @@ -211,10 +211,10 @@ static bool batadv_send_skb_prepare_unicast(struct sk_buff *skb, * unicast 4addr header * @bat_priv: the bat priv with all the soft interface information * @skb: the skb containing the payload to encapsulate - * @orig_node: the destination node + * @orig: the destination node * @packet_subtype: the unicast 4addr packet subtype to use * - * Returns false if the payload could not be encapsulated or true otherwise. + * Return: false if the payload could not be encapsulated or true otherwise. */ bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv, struct sk_buff *skb, @@ -265,7 +265,7 @@ out: * as packet_type. Then send this frame to the given orig_node and release a * reference to this orig_node. * - * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. + * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. */ int batadv_send_skb_unicast(struct batadv_priv *bat_priv, struct sk_buff *skb, int packet_type, @@ -339,7 +339,7 @@ out: * BATADV_UNICAST_4ADDR was supplied as packet_type. Then send this frame * to the according destination node. * - * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. + * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. */ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv, struct sk_buff *skb, int packet_type, @@ -373,7 +373,7 @@ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv, * Look up the currently selected gateway. Wrap the given skb into a batman-adv * unicast header and send this frame to this gateway node. * - * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. + * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. */ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid) @@ -430,14 +430,19 @@ _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, send_time); } -/* add a broadcast packet to the queue and setup timers. broadcast packets - * are sent multiple times to increase probability for being received. +/** + * batadv_add_bcast_packet_to_list - queue broadcast packet for multiple sends + * @bat_priv: the bat priv with all the soft interface information + * @skb: broadcast packet to add + * @delay: number of jiffies to wait before sending * - * This function returns NETDEV_TX_OK on success and NETDEV_TX_BUSY on - * errors. + * add a broadcast packet to the queue and setup timers. broadcast packets + * are sent multiple times to increase probability for being received. * * The skb is not consumed, so the caller should make sure that the * skb is freed. + * + * Return: NETDEV_TX_OK on success and NETDEV_TX_BUSY on errors. */ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, const struct sk_buff *skb, diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index 82059f259e46..7ff95cada2e7 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -69,7 +69,7 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, * header via the translation table. Wrap the given skb into a batman-adv * unicast header. Then send this frame to the according destination node. * - * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. + * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. */ static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv, struct sk_buff *skb, u8 *dst_hint, @@ -92,7 +92,7 @@ static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv, * unicast-4addr header. Then send this frame to the according destination * node. * - * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. + * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. */ static inline int batadv_send_skb_via_tt_4addr(struct batadv_priv *bat_priv, struct sk_buff *skb, diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index ac4d08de5df4..d4490ff75edd 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -30,6 +30,7 @@ #include <linux/if_vlan.h> #include <linux/jiffies.h> #include <linux/kernel.h> +#include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> @@ -478,22 +479,34 @@ out: } /** + * batadv_softif_vlan_release - release vlan from lists and queue for free after + * rcu grace period + * @ref: kref pointer of the vlan object + */ +static void batadv_softif_vlan_release(struct kref *ref) +{ + struct batadv_softif_vlan *vlan; + + vlan = container_of(ref, struct batadv_softif_vlan, refcount); + + spin_lock_bh(&vlan->bat_priv->softif_vlan_list_lock); + hlist_del_rcu(&vlan->list); + spin_unlock_bh(&vlan->bat_priv->softif_vlan_list_lock); + + kfree_rcu(vlan, rcu); +} + +/** * batadv_softif_vlan_free_ref - decrease the vlan object refcounter and - * possibly free it - * @softif_vlan: the vlan object to release + * possibly release it + * @vlan: the vlan object to release */ void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *vlan) { if (!vlan) return; - if (atomic_dec_and_test(&vlan->refcount)) { - spin_lock_bh(&vlan->bat_priv->softif_vlan_list_lock); - hlist_del_rcu(&vlan->list); - spin_unlock_bh(&vlan->bat_priv->softif_vlan_list_lock); - - kfree_rcu(vlan, rcu); - } + kref_put(&vlan->refcount, batadv_softif_vlan_release); } /** @@ -501,7 +514,7 @@ void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *vlan) * @bat_priv: the bat priv with all the soft interface information * @vid: the identifier of the vlan object to retrieve * - * Returns the private data of the vlan matching the vid passed as argument or + * Return: the private data of the vlan matching the vid passed as argument or * NULL otherwise. The refcounter of the returned object is incremented by 1. */ struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv, @@ -514,7 +527,7 @@ struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv, if (vlan_tmp->vid != vid) continue; - if (!atomic_inc_not_zero(&vlan_tmp->refcount)) + if (!kref_get_unless_zero(&vlan_tmp->refcount)) continue; vlan = vlan_tmp; @@ -530,7 +543,7 @@ struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv, * @bat_priv: the bat priv with all the soft interface information * @vid: the VLAN identifier * - * Returns 0 on success, a negative error otherwise. + * Return: 0 on success, a negative error otherwise. */ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) { @@ -549,7 +562,7 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) vlan->bat_priv = bat_priv; vlan->vid = vid; - atomic_set(&vlan->refcount, 1); + kref_init(&vlan->refcount); atomic_set(&vlan->ap_isolation, 0); @@ -594,12 +607,13 @@ static void batadv_softif_destroy_vlan(struct batadv_priv *bat_priv, /** * batadv_interface_add_vid - ndo_add_vid API implementation * @dev: the netdev of the mesh interface + * @proto: protocol of the the vlan id * @vid: identifier of the new vlan * * Set up all the internal structures for handling the new vlan on top of the * mesh interface * - * Returns 0 on success or a negative error code in case of failure. + * Return: 0 on success or a negative error code in case of failure. */ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto, unsigned short vid) @@ -651,12 +665,13 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto, /** * batadv_interface_kill_vid - ndo_kill_vid API implementation * @dev: the netdev of the mesh interface + * @proto: protocol of the the vlan id * @vid: identifier of the deleted vlan * * Destroy all the internal structures used to handle the vlan identified by vid * on top of the mesh interface * - * Returns 0 on success, -EINVAL if the specified prototype is not ETH_P_8021Q + * Return: 0 on success, -EINVAL if the specified prototype is not ETH_P_8021Q * or -ENOENT if the specified vlan id wasn't registered. */ static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto, @@ -745,7 +760,7 @@ static void batadv_softif_destroy_finish(struct work_struct *work) * batadv_softif_init_late - late stage initialization of soft interface * @dev: registered network device to modify * - * Returns error code on failures + * Return: error code on failures */ static int batadv_softif_init_late(struct net_device *dev) { @@ -847,7 +862,7 @@ free_bat_counters: * @dev: batadv_soft_interface used as master interface * @slave_dev: net_device which should become the slave interface * - * Return 0 if successful or error otherwise. + * Return: 0 if successful or error otherwise. */ static int batadv_softif_slave_add(struct net_device *dev, struct net_device *slave_dev) @@ -872,7 +887,7 @@ out: * @dev: batadv_soft_interface used as master interface * @slave_dev: net_device which should be removed from the master interface * - * Return 0 if successful or error otherwise. + * Return: 0 if successful or error otherwise. */ static int batadv_softif_slave_del(struct net_device *dev, struct net_device *slave_dev) diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index 8e82176f40b1..d17cfbacf809 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index fe87777fda8a..ab4382ba3855 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2016 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -25,6 +25,7 @@ #include <linux/fs.h> #include <linux/if.h> #include <linux/if_vlan.h> +#include <linux/kref.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/printk.h> @@ -64,7 +65,7 @@ static struct batadv_priv *batadv_kobj_to_batpriv(struct kobject *obj) * batadv_vlan_kobj_to_batpriv - convert a vlan kobj in the associated batpriv * @obj: kobject to covert * - * Returns the associated batadv_priv struct. + * Return: the associated batadv_priv struct. */ static struct batadv_priv *batadv_vlan_kobj_to_batpriv(struct kobject *obj) { @@ -82,9 +83,10 @@ static struct batadv_priv *batadv_vlan_kobj_to_batpriv(struct kobject *obj) /** * batadv_kobj_to_vlan - convert a kobj in the associated softif_vlan struct + * @bat_priv: the bat priv with all the soft interface information * @obj: kobject to covert * - * Returns the associated softif_vlan struct if found, NULL otherwise. + * Return: the associated softif_vlan struct if found, NULL otherwise. */ static struct batadv_softif_vlan * batadv_kobj_to_vlan(struct batadv_priv *bat_priv, struct kobject *obj) @@ -96,7 +98,7 @@ batadv_kobj_to_vlan(struct batadv_priv *bat_priv, struct kobject *obj) if (vlan_tmp->kobj != obj) continue; - if (!atomic_inc_not_zero(&vlan_tmp->refcount)) + if (!kref_get_unless_zero(&vlan_tmp->refcount)) continue; vlan = vlan_tmp; @@ -491,7 +493,7 @@ static ssize_t batadv_store_gw_bwidth(struct kobject *kobj, * @attr: the batman-adv attribute the user is interacting with * @buff: the buffer that will contain the data to send back to the user * - * Returns the number of bytes written into 'buff' on success or a negative + * Return: the number of bytes written into 'buff' on success or a negative * error code in case of failure */ static ssize_t batadv_show_isolation_mark(struct kobject *kobj, @@ -511,7 +513,7 @@ static ssize_t batadv_show_isolation_mark(struct kobject *kobj, * @buff: the buffer containing the user data * @count: number of bytes in the buffer * - * Returns 'count' on success or a negative error code in case of failure + * Return: 'count' on success or a negative error code in case of failure */ static ssize_t batadv_store_isolation_mark(struct kobject *kobj, struct attribute *attr, char *buff, @@ -620,9 +622,7 @@ static struct batadv_attribute *batadv_mesh_attrs[] = { BATADV_ATTR_VLAN_BOOL(ap_isolation, S_IRUGO | S_IWUSR, NULL); -/** - * batadv_vlan_attrs - array of vlan specific sysfs attributes - */ +/* array of vlan specific sysfs attributes */ static struct batadv_attribute *batadv_vlan_attrs[] = { &batadv_attr_vlan_ap_isolation, NULL, @@ -683,7 +683,7 @@ void batadv_sysfs_del_meshif(struct net_device *dev) * @dev: netdev of the mesh interface * @vlan: private data of the newly added VLAN interface * - * Returns 0 on success and -ENOMEM if any of the structure allocations fails. + * Return: 0 on success and -ENOMEM if any of the structure allocations fails. */ int batadv_sysfs_add_vlan(struct net_device *dev, struct batadv_softif_vlan *vlan) diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h index 61974428a7af..c76021b4e198 100644 --- a/net/batman-adv/sysfs.h +++ b/net/batman-adv/sysfs.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2016 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 0e80fd1461ab..5c7fa02ea57b 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich, Antonio Quartulli * @@ -31,6 +31,7 @@ #include <linux/jhash.h> #include <linux/jiffies.h> #include <linux/kernel.h> +#include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> @@ -68,7 +69,15 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv, unsigned short vid, const char *message, bool roaming); -/* returns 1 if they are the same mac addr and vid */ +/** + * batadv_compare_tt - check if two TT entries are the same + * @node: the list element pointer of the first TT entry + * @data2: pointer to the tt_common_entry of the second TT entry + * + * Compare the MAC address and the VLAN ID of the two TT entries and check if + * they are the same TT client. + * Return: 1 if the two TT clients are the same, 0 otherwise + */ static int batadv_compare_tt(const struct hlist_node *node, const void *data2) { const void *data1 = container_of(node, struct batadv_tt_common_entry, @@ -84,7 +93,7 @@ static int batadv_compare_tt(const struct hlist_node *node, const void *data2) * @data: pointer to the tt_common_entry object to map * @size: the size of the hash table * - * Returns the hash index where the object represented by 'data' should be + * Return: the hash index where the object represented by 'data' should be * stored at. */ static inline u32 batadv_choose_tt(const void *data, u32 size) @@ -105,7 +114,7 @@ static inline u32 batadv_choose_tt(const void *data, u32 size) * @addr: the mac address of the client to look for * @vid: VLAN identifier * - * Returns a pointer to the tt_common struct belonging to the searched client if + * Return: a pointer to the tt_common struct belonging to the searched client if * found, NULL otherwise. */ static struct batadv_tt_common_entry * @@ -133,7 +142,7 @@ batadv_tt_hash_find(struct batadv_hashtable *hash, const u8 *addr, if (tt->vid != vid) continue; - if (!atomic_inc_not_zero(&tt->refcount)) + if (!kref_get_unless_zero(&tt->refcount)) continue; tt_tmp = tt; @@ -150,7 +159,7 @@ batadv_tt_hash_find(struct batadv_hashtable *hash, const u8 *addr, * @addr: the mac address of the client to look for * @vid: VLAN identifier * - * Returns a pointer to the corresponding tt_local_entry struct if the client is + * Return: a pointer to the corresponding tt_local_entry struct if the client is * found, NULL otherwise. */ static struct batadv_tt_local_entry * @@ -175,7 +184,7 @@ batadv_tt_local_hash_find(struct batadv_priv *bat_priv, const u8 *addr, * @addr: the mac address of the client to look for * @vid: VLAN identifier * - * Returns a pointer to the corresponding tt_global_entry struct if the client + * Return: a pointer to the corresponding tt_global_entry struct if the client * is found, NULL otherwise. */ static struct batadv_tt_global_entry * @@ -194,34 +203,68 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr, return tt_global_entry; } +/** + * batadv_tt_local_entry_release - release tt_local_entry from lists and queue + * for free after rcu grace period + * @ref: kref pointer of the nc_node + */ +static void batadv_tt_local_entry_release(struct kref *ref) +{ + struct batadv_tt_local_entry *tt_local_entry; + + tt_local_entry = container_of(ref, struct batadv_tt_local_entry, + common.refcount); + + kfree_rcu(tt_local_entry, common.rcu); +} + +/** + * batadv_tt_local_entry_free_ref - decrement the tt_local_entry refcounter and + * possibly release it + * @tt_local_entry: tt_local_entry to be free'd + */ static void batadv_tt_local_entry_free_ref(struct batadv_tt_local_entry *tt_local_entry) { - if (atomic_dec_and_test(&tt_local_entry->common.refcount)) - kfree_rcu(tt_local_entry, common.rcu); + kref_put(&tt_local_entry->common.refcount, + batadv_tt_local_entry_release); } /** - * batadv_tt_global_entry_free_ref - decrement the refcounter for a - * tt_global_entry and possibly free it - * @tt_global_entry: the object to free + * batadv_tt_global_entry_release - release tt_global_entry from lists and queue + * for free after rcu grace period + * @ref: kref pointer of the nc_node + */ +static void batadv_tt_global_entry_release(struct kref *ref) +{ + struct batadv_tt_global_entry *tt_global_entry; + + tt_global_entry = container_of(ref, struct batadv_tt_global_entry, + common.refcount); + + batadv_tt_global_del_orig_list(tt_global_entry); + kfree_rcu(tt_global_entry, common.rcu); +} + +/** + * batadv_tt_global_entry_free_ref - decrement the tt_global_entry refcounter + * and possibly release it + * @tt_global_entry: tt_global_entry to be free'd */ static void batadv_tt_global_entry_free_ref(struct batadv_tt_global_entry *tt_global_entry) { - if (atomic_dec_and_test(&tt_global_entry->common.refcount)) { - batadv_tt_global_del_orig_list(tt_global_entry); - kfree_rcu(tt_global_entry, common.rcu); - } + kref_put(&tt_global_entry->common.refcount, + batadv_tt_global_entry_release); } /** * batadv_tt_global_hash_count - count the number of orig entries - * @hash: hash table containing the tt entries + * @bat_priv: the bat priv with all the soft interface information * @addr: the mac address of the client to count entries for * @vid: VLAN identifier * - * Return the number of originators advertising the given address/data + * Return: the number of originators advertising the given address/data * (excluding ourself). */ int batadv_tt_global_hash_count(struct batadv_priv *bat_priv, @@ -286,9 +329,9 @@ static void batadv_tt_local_size_dec(struct batadv_priv *bat_priv, } /** - * batadv_tt_global_size_mod - change the size by v of the local table - * identified by vid - * @bat_priv: the bat priv with all the soft interface information + * batadv_tt_global_size_mod - change the size by v of the global table + * for orig_node identified by vid + * @orig_node: the originator for which the table has to be modified * @vid: the VLAN identifier * @v: the amount to sum to the global table size */ @@ -340,22 +383,28 @@ static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node, /** * batadv_tt_orig_list_entry_release - release tt orig entry from lists and * queue for free after rcu grace period - * @orig_entry: tt orig entry to be free'd + * @ref: kref pointer of the tt orig entry */ -static void -batadv_tt_orig_list_entry_release(struct batadv_tt_orig_list_entry *orig_entry) +static void batadv_tt_orig_list_entry_release(struct kref *ref) { + struct batadv_tt_orig_list_entry *orig_entry; + + orig_entry = container_of(ref, struct batadv_tt_orig_list_entry, + refcount); + batadv_orig_node_free_ref(orig_entry->orig_node); kfree_rcu(orig_entry, rcu); } +/** + * batadv_tt_orig_list_entry_free_ref - decrement the tt orig entry refcounter + * and possibly release it + * @orig_entry: tt orig entry to be free'd + */ static void batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry) { - if (!atomic_dec_and_test(&orig_entry->refcount)) - return; - - batadv_tt_orig_list_entry_release(orig_entry); + kref_put(&orig_entry->refcount, batadv_tt_orig_list_entry_release); } /** @@ -437,7 +486,7 @@ unlock: * batadv_tt_len - compute length in bytes of given number of tt changes * @changes_num: number of tt changes * - * Returns computed length in bytes. + * Return: computed length in bytes. */ static int batadv_tt_len(int changes_num) { @@ -448,7 +497,7 @@ static int batadv_tt_len(int changes_num) * batadv_tt_entries - compute the number of entries fitting in tt_len bytes * @tt_len: available space * - * Returns the number of entries. + * Return: the number of entries. */ static u16 batadv_tt_entries(u16 tt_len) { @@ -460,7 +509,7 @@ static u16 batadv_tt_entries(u16 tt_len) * size when transmitted over the air * @bat_priv: the bat priv with all the soft interface information * - * Returns local translation table size in bytes. + * Return: local translation table size in bytes. */ static int batadv_tt_local_table_transmit_size(struct batadv_priv *bat_priv) { @@ -526,7 +575,7 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv, * @mark: the value contained in the skb->mark field of the received packet (if * any) * - * Returns true if the client was successfully added, false otherwise. + * Return: true if the client was successfully added, false otherwise. */ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, unsigned short vid, int ifindex, u32 mark) @@ -620,7 +669,8 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, tt_local->common.vid = vid; if (batadv_is_wifi_netdev(in_dev)) tt_local->common.flags |= BATADV_TT_CLIENT_WIFI; - atomic_set(&tt_local->common.refcount, 2); + kref_init(&tt_local->common.refcount); + kref_get(&tt_local->common.refcount); tt_local->last_seen = jiffies; tt_local->common.added_at = tt_local->last_seen; @@ -721,12 +771,11 @@ out: * function reserves the amount of space needed to send the entire global TT * table. In case of success the value is updated with the real amount of * reserved bytes - * Allocate the needed amount of memory for the entire TT TVLV and write its * header made up by one tvlv_tt_data object and a series of tvlv_tt_vlan_data * objects, one per active VLAN served by the originator node. * - * Return the size of the allocated buffer or 0 in case of failure. + * Return: the size of the allocated buffer or 0 in case of failure. */ static u16 batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, @@ -800,7 +849,7 @@ out: * header made up by one tvlv_tt_data object and a series of tvlv_tt_vlan_data * objects, one per active VLAN. * - * Return the size of the allocated buffer or 0 in case of failure. + * Return: the size of the allocated buffer or 0 in case of failure. */ static u16 batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, @@ -1042,7 +1091,7 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv, * @message: message to append to the log on deletion * @roaming: true if the deletion is due to a roaming event * - * Returns the flags assigned to the local entry before being deleted + * Return: the flags assigned to the local entry before being deleted */ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid, const char *message, @@ -1242,10 +1291,16 @@ static void batadv_tt_changes_list_free(struct batadv_priv *bat_priv) spin_unlock_bh(&bat_priv->tt.changes_list_lock); } -/* retrieves the orig_tt_list_entry belonging to orig_node from the +/** + * batadv_tt_global_orig_entry_find - find a TT orig_list_entry + * @entry: the TT global entry where the orig_list_entry has to be + * extracted from + * @orig_node: the originator for which the orig_list_entry has to be found + * + * retrieve the orig_tt_list_entry belonging to orig_node from the * batadv_tt_global_entry list * - * returns it with an increased refcounter, NULL if not found + * Return: it with an increased refcounter, NULL if not found */ static struct batadv_tt_orig_list_entry * batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry, @@ -1259,7 +1314,7 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry, hlist_for_each_entry_rcu(tmp_orig_entry, head, list) { if (tmp_orig_entry->orig_node != orig_node) continue; - if (!atomic_inc_not_zero(&tmp_orig_entry->refcount)) + if (!kref_get_unless_zero(&tmp_orig_entry->refcount)) continue; orig_entry = tmp_orig_entry; @@ -1270,8 +1325,15 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry, return orig_entry; } -/* find out if an orig_node is already in the list of a tt_global_entry. - * returns true if found, false otherwise +/** + * batadv_tt_global_entry_has_orig - check if a TT global entry is also handled + * by a given originator + * @entry: the TT global entry to check + * @orig_node: the originator to search in the list + * + * find out if an orig_node is already in the list of a tt_global_entry. + * + * Return: true if found, false otherwise */ static bool batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry, @@ -1309,11 +1371,12 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, goto out; INIT_HLIST_NODE(&orig_entry->list); - atomic_inc(&orig_node->refcount); + kref_get(&orig_node->refcount); batadv_tt_global_size_inc(orig_node, tt_global->common.vid); orig_entry->orig_node = orig_node; orig_entry->ttvn = ttvn; - atomic_set(&orig_entry->refcount, 2); + kref_init(&orig_entry->refcount); + kref_get(&orig_entry->refcount); spin_lock_bh(&tt_global->list_lock); hlist_add_head_rcu(&orig_entry->list, @@ -1343,7 +1406,7 @@ out: * * The caller must hold orig_node refcount. * - * Return true if the new entry has been added, false otherwise + * Return: true if the new entry has been added, false otherwise */ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, @@ -1389,7 +1452,8 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, */ if (flags & BATADV_TT_CLIENT_ROAM) tt_global_entry->roam_at = jiffies; - atomic_set(&common->refcount, 2); + kref_init(&common->refcount); + kref_get(&common->refcount); common->added_at = jiffies; INIT_HLIST_HEAD(&tt_global_entry->orig_list); @@ -1501,7 +1565,7 @@ out: * @tt_global_entry: global translation table entry to be analyzed * * This functon assumes the caller holds rcu_read_lock(). - * Returns best originator list entry or NULL on errors. + * Return: best originator list entry or NULL on errors. */ static struct batadv_tt_orig_list_entry * batadv_transtable_best_orig(struct batadv_priv *bat_priv, @@ -2031,7 +2095,7 @@ _batadv_is_ap_isolated(struct batadv_tt_local_entry *tt_local_entry, * @addr: mac address of the destination client * @vid: VLAN identifier * - * Returns a pointer to the originator that was selected as destination in the + * Return: a pointer to the originator that was selected as destination in the * mesh for contacting the client 'addr', NULL otherwise. * In case of multiple originators serving the same client, the function returns * the best one (best in terms of metric towards the destination node). @@ -2071,7 +2135,7 @@ struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv, /* found anything? */ if (best_entry) orig_node = best_entry->orig_node; - if (orig_node && !atomic_inc_not_zero(&orig_node->refcount)) + if (orig_node && !kref_get_unless_zero(&orig_node->refcount)) orig_node = NULL; rcu_read_unlock(); @@ -2106,7 +2170,7 @@ out: * because the XOR operation can combine them all while trying to reduce the * noise as much as possible. * - * Returns the checksum of the global table of a given originator. + * Return: the checksum of the global table of a given originator. */ static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, @@ -2183,7 +2247,7 @@ static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv, * For details about the computation, please refer to the documentation for * batadv_tt_global_crc(). * - * Returns the checksum of the local table + * Return: the checksum of the local table */ static u32 batadv_tt_local_crc(struct batadv_priv *bat_priv, unsigned short vid) @@ -2289,7 +2353,7 @@ static void batadv_tt_req_purge(struct batadv_priv *bat_priv) * @bat_priv: the bat priv with all the soft interface information * @orig_node: orig node this request is being issued for * - * Returns the pointer to the new tt_req_node struct if no request + * Return: the pointer to the new tt_req_node struct if no request * has already been issued for this orig_node, NULL otherwise. */ static struct batadv_tt_req_node * @@ -2324,7 +2388,7 @@ unlock: * @entry_ptr: to be checked local tt entry * @data_ptr: not used but definition required to satisfy the callback prototype * - * Returns 1 if the entry is a valid, 0 otherwise. + * Return: 1 if the entry is a valid, 0 otherwise. */ static int batadv_tt_local_valid(const void *entry_ptr, const void *data_ptr) { @@ -2408,9 +2472,8 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, * @orig_node: originator for which the CRCs have to be checked * @tt_vlan: pointer to the first tvlv VLAN entry * @num_vlan: number of tvlv VLAN entries - * @create: if true, create VLAN objects if not found * - * Return true if all the received CRCs match the locally stored ones, false + * Return: true if all the received CRCs match the locally stored ones, false * otherwise */ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node, @@ -2513,6 +2576,8 @@ static void batadv_tt_global_update_crc(struct batadv_priv *bat_priv, * @num_vlan: number of tvlv VLAN entries * @full_table: ask for the entire translation table if true, while only for the * last TT diff otherwise + * + * Return: true if the TT Request was sent, false otherwise */ static int batadv_send_tt_request(struct batadv_priv *bat_priv, struct batadv_orig_node *dst_orig_node, @@ -2593,7 +2658,7 @@ out: * @req_src: mac address of tt request sender * @req_dst: mac address of tt request recipient * - * Returns true if tt request reply was sent, false otherwise. + * Return: true if tt request reply was sent, false otherwise. */ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv, struct batadv_tvlv_tt_data *tt_data, @@ -2725,7 +2790,7 @@ out: * @tt_data: tt data containing the tt request information * @req_src: mac address of tt request sender * - * Returns true if tt request reply was sent, false otherwise. + * Return: true if tt request reply was sent, false otherwise. */ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv, struct batadv_tvlv_tt_data *tt_data, @@ -2843,7 +2908,7 @@ out: * @req_src: mac address of tt request sender * @req_dst: mac address of tt request recipient * - * Returns true if tt request reply was sent, false otherwise. + * Return: true if tt request reply was sent, false otherwise. */ static bool batadv_send_tt_response(struct batadv_priv *bat_priv, struct batadv_tvlv_tt_data *tt_data, @@ -2938,7 +3003,7 @@ static void batadv_tt_update_changes(struct batadv_priv *bat_priv, * @addr: the mac address of the client to check * @vid: VLAN identifier * - * Returns true if the client is served by this node, false otherwise. + * Return: true if the client is served by this node, false otherwise. */ bool batadv_is_my_client(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid) @@ -3055,11 +3120,16 @@ static void batadv_tt_roam_purge(struct batadv_priv *bat_priv) spin_unlock_bh(&bat_priv->tt.roam_list_lock); } -/* This function checks whether the client already reached the +/** + * batadv_tt_check_roam_count - check if a client has roamed too frequently + * @bat_priv: the bat priv with all the soft interface information + * @client: mac address of the roaming client + * + * This function checks whether the client already reached the * maximum number of possible roaming phases. In this case the ROAMING_ADV * will not be sent. * - * returns true if the ROAMING_ADV can be sent, false otherwise + * Return: true if the ROAMING_ADV can be sent, false otherwise */ static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, u8 *client) { @@ -3371,13 +3441,12 @@ out: * batadv_tt_update_orig - update global translation table with new tt * information received via ogms * @bat_priv: the bat priv with all the soft interface information - * @orig: the orig_node of the ogm - * @tt_vlan: pointer to the first tvlv VLAN entry + * @orig_node: the orig_node of the ogm + * @tt_buff: pointer to the first tvlv VLAN entry * @tt_num_vlan: number of tvlv VLAN entries * @tt_change: pointer to the first entry in the TT buffer * @tt_num_changes: number of tt changes inside the tt buffer * @ttvn: translation table version number of this changeset - * @tt_crc: crc32 checksum of orig node's translation table */ static void batadv_tt_update_orig(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, @@ -3459,7 +3528,7 @@ request_table: * @addr: the mac address of the client to check * @vid: VLAN identifier * - * Returns true if we know that the client has moved from its old originator + * Return: true if we know that the client has moved from its old originator * to another one. This entry is still kept for consistency purposes and will be * deleted later by a DEL or because of timeout */ @@ -3485,7 +3554,7 @@ out: * @addr: the mac address of the local client to query * @vid: VLAN identifier * - * Returns true if the local client is known to be roaming (it is not served by + * Return: true if the local client is known to be roaming (it is not served by * this node anymore) or not. If yes, the client is still present in the table * to keep the latter consistent with the node TTVN */ @@ -3614,7 +3683,7 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, * @tvlv_value: tvlv buffer containing the tt data * @tvlv_value_len: tvlv buffer length * - * Returns NET_RX_DROP if the tt tvlv is to be re-routed, NET_RX_SUCCESS + * Return: NET_RX_DROP if the tt tvlv is to be re-routed, NET_RX_SUCCESS * otherwise. */ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, @@ -3695,7 +3764,7 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, * @tvlv_value: tvlv buffer containing the tt data * @tvlv_value_len: tvlv buffer length * - * Returns NET_RX_DROP if the tt roam tvlv is to be re-routed, NET_RX_SUCCESS + * Return: NET_RX_DROP if the tt roam tvlv is to be re-routed, NET_RX_SUCCESS * otherwise. */ static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, @@ -3741,7 +3810,7 @@ out: * batadv_tt_init - initialise the translation table internals * @bat_priv: the bat priv with all the soft interface information * - * Return 0 on success or negative error number in case of failure. + * Return: 0 on success or negative error number in case of failure. */ int batadv_tt_init(struct batadv_priv *bat_priv) { @@ -3779,7 +3848,7 @@ int batadv_tt_init(struct batadv_priv *bat_priv) * @addr: the mac address of the client * @vid: the identifier of the VLAN where this client is connected * - * Returns true if the client is marked with the TT_CLIENT_ISOLA flag, false + * Return: true if the client is marked with the TT_CLIENT_ISOLA flag, false * otherwise */ bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv, diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index abd8e116e5fb..7c7e2c006bfe 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich, Antonio Quartulli * diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 3437b667a2cd..612de23178e6 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -25,6 +25,7 @@ #include <linux/bitops.h> #include <linux/compiler.h> #include <linux/if_ether.h> +#include <linux/kref.h> #include <linux/netdevice.h> #include <linux/sched.h> /* for linux/wait.h */ #include <linux/spinlock.h> @@ -73,7 +74,7 @@ enum batadv_dhcp_recipient { #define BATADV_TT_SYNC_MASK 0x00F0 /** - * struct batadv_hard_iface_bat_iv - per hard interface B.A.T.M.A.N. IV data + * struct batadv_hard_iface_bat_iv - per hard-interface B.A.T.M.A.N. IV data * @ogm_buff: buffer holding the OGM packet * @ogm_buff_len: length of the OGM packet buffer * @ogm_seqno: OGM sequence number - used to identify each OGM @@ -97,8 +98,8 @@ struct batadv_hard_iface_bat_iv { * batman-adv for this interface * @soft_iface: the batman-adv interface which uses this network interface * @rcu: struct used for freeing in an RCU-safe manner - * @bat_iv: BATMAN IV specific per hard interface data - * @cleanup_work: work queue callback item for hard interface deinit + * @bat_iv: per hard-interface B.A.T.M.A.N. IV data + * @cleanup_work: work queue callback item for hard-interface deinit * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs * @neigh_list: list of unique single hop neighbors via this interface * @neigh_list_lock: lock protecting neigh_list @@ -110,7 +111,7 @@ struct batadv_hard_iface { struct net_device *net_dev; u8 num_bcasts; struct kobject *hardif_obj; - atomic_t refcount; + struct kref refcount; struct packet_type batman_adv_ptype; struct net_device *soft_iface; struct rcu_head rcu; @@ -125,7 +126,7 @@ struct batadv_hard_iface { /** * struct batadv_orig_ifinfo - originator info per outgoing interface * @list: list node for orig_node::ifinfo_list - * @if_outgoing: pointer to outgoing hard interface + * @if_outgoing: pointer to outgoing hard-interface * @router: router that should be used to reach this originator * @last_real_seqno: last and best known sequence number * @last_ttl: ttl of last received packet @@ -140,7 +141,7 @@ struct batadv_orig_ifinfo { u32 last_real_seqno; u8 last_ttl; unsigned long batman_seqno_reset; - atomic_t refcount; + struct kref refcount; struct rcu_head rcu; }; @@ -196,13 +197,13 @@ struct batadv_orig_node_vlan { unsigned short vid; struct batadv_vlan_tt tt; struct hlist_node list; - atomic_t refcount; + struct kref refcount; struct rcu_head rcu; }; /** * struct batadv_orig_bat_iv - B.A.T.M.A.N. IV private orig_node members - * @bcast_own: set of bitfields (one per hard interface) where each one counts + * @bcast_own: set of bitfields (one per hard-interface) where each one counts * the number of our OGMs this orig_node rebroadcasted "back" to us (relative * to last_real_seqno). Every bitfield is BATADV_TQ_LOCAL_WINDOW_SIZE bits long. * @bcast_own_sum: sum of bcast_own @@ -298,7 +299,7 @@ struct batadv_orig_node { struct batadv_priv *bat_priv; /* bcast_seqno_lock protects: bcast_bits & last_bcast_seqno */ spinlock_t bcast_seqno_lock; - atomic_t refcount; + struct kref refcount; struct rcu_head rcu; #ifdef CONFIG_BATMAN_ADV_NC struct list_head in_coding_list; @@ -341,15 +342,16 @@ struct batadv_gw_node { struct batadv_orig_node *orig_node; u32 bandwidth_down; u32 bandwidth_up; - atomic_t refcount; + struct kref refcount; struct rcu_head rcu; }; /** - * batadv_hardif_neigh_node - unique neighbor per hard interface + * struct batadv_hardif_neigh_node - unique neighbor per hard-interface * @list: list node for batadv_hard_iface::neigh_list * @addr: the MAC address of the neighboring interface - * @if_incoming: pointer to incoming hard interface + * @if_incoming: pointer to incoming hard-interface + * @last_seen: when last packet via this neighbor was received * @refcount: number of contexts the object is used * @rcu: struct used for freeing in a RCU-safe manner */ @@ -358,7 +360,7 @@ struct batadv_hardif_neigh_node { u8 addr[ETH_ALEN]; struct batadv_hard_iface *if_incoming; unsigned long last_seen; - atomic_t refcount; + struct kref refcount; struct rcu_head rcu; }; @@ -369,7 +371,7 @@ struct batadv_hardif_neigh_node { * @addr: the MAC address of the neighboring interface * @ifinfo_list: list for routing metrics per outgoing interface * @ifinfo_lock: lock protecting private ifinfo members and list - * @if_incoming: pointer to incoming hard interface + * @if_incoming: pointer to incoming hard-interface * @last_seen: when last packet via this neighbor was received * @refcount: number of contexts the object is used * @rcu: struct used for freeing in an RCU-safe manner @@ -382,13 +384,13 @@ struct batadv_neigh_node { spinlock_t ifinfo_lock; /* protects ifinfo_list and its members */ struct batadv_hard_iface *if_incoming; unsigned long last_seen; - atomic_t refcount; + struct kref refcount; struct rcu_head rcu; }; /** * struct batadv_neigh_ifinfo_bat_iv - neighbor information per outgoing - * interface for BATMAN IV + * interface for B.A.T.M.A.N. IV * @tq_recv: ring buffer of received TQ values from this neigh node * @tq_index: ring buffer index * @tq_avg: averaged tq of all tq values in the ring buffer (tq_recv) @@ -407,7 +409,7 @@ struct batadv_neigh_ifinfo_bat_iv { /** * struct batadv_neigh_ifinfo - neighbor information per outgoing interface * @list: list node for batadv_neigh_node::ifinfo_list - * @if_outgoing: pointer to outgoing hard interface + * @if_outgoing: pointer to outgoing hard-interface * @bat_iv: B.A.T.M.A.N. IV private structure * @last_ttl: last received ttl from this neigh node * @refcount: number of contexts the object is used @@ -418,7 +420,7 @@ struct batadv_neigh_ifinfo { struct batadv_hard_iface *if_outgoing; struct batadv_neigh_ifinfo_bat_iv bat_iv; u8 last_ttl; - atomic_t refcount; + struct kref refcount; struct rcu_head rcu; }; @@ -744,7 +746,7 @@ struct batadv_softif_vlan { atomic_t ap_isolation; /* boolean */ struct batadv_vlan_tt tt; struct hlist_node list; - atomic_t refcount; + struct kref refcount; struct rcu_head rcu; }; @@ -771,6 +773,9 @@ struct batadv_softif_vlan { * @orig_interval: OGM broadcast interval in milliseconds * @hop_penalty: penalty which will be applied to an OGM's tq-field on every hop * @log_level: configured log level (see batadv_dbg_level) + * @isolation_mark: the skb->mark value used to match packets for AP isolation + * @isolation_mark_mask: bitmask identifying the bits in skb->mark to be used + * for the isolation mark * @bcast_seqno: last sent broadcast packet sequence number * @bcast_queue_left: number of remaining buffered broadcast packet slots * @batman_queue_left: number of remaining OGM packet slots @@ -783,8 +788,8 @@ struct batadv_softif_vlan { * @forw_bat_list_lock: lock protecting forw_bat_list * @forw_bcast_list_lock: lock protecting forw_bcast_list * @orig_work: work queue callback item for orig node purging - * @cleanup_work: work queue callback item for soft interface deinit - * @primary_if: one of the hard interfaces assigned to this mesh interface + * @cleanup_work: work queue callback item for soft-interface deinit + * @primary_if: one of the hard-interfaces assigned to this mesh interface * becomes the primary interface * @bat_algo_ops: routing algorithm used by this mesh interface * @softif_vlan_list: a list of softif_vlan structs, one per VLAN created on top @@ -925,7 +930,7 @@ struct batadv_bla_backbone_gw { atomic_t request_sent; u16 crc; spinlock_t crc_lock; /* protects crc */ - atomic_t refcount; + struct kref refcount; struct rcu_head rcu; }; @@ -946,7 +951,7 @@ struct batadv_bla_claim { unsigned long lasttime; struct hlist_node hash_entry; struct rcu_head rcu; - atomic_t refcount; + struct kref refcount; }; #endif @@ -967,7 +972,7 @@ struct batadv_tt_common_entry { struct hlist_node hash_entry; u16 flags; unsigned long added_at; - atomic_t refcount; + struct kref refcount; struct rcu_head rcu; }; @@ -1009,7 +1014,7 @@ struct batadv_tt_orig_list_entry { struct batadv_orig_node *orig_node; u8 ttvn; struct hlist_node list; - atomic_t refcount; + struct kref refcount; struct rcu_head rcu; }; @@ -1062,7 +1067,7 @@ struct batadv_tt_roam_node { struct batadv_nc_node { struct list_head list; u8 addr[ETH_ALEN]; - atomic_t refcount; + struct kref refcount; struct rcu_head rcu; struct batadv_orig_node *orig_node; unsigned long last_seen; @@ -1082,7 +1087,7 @@ struct batadv_nc_node { struct batadv_nc_path { struct hlist_node hash_entry; struct rcu_head rcu; - atomic_t refcount; + struct kref refcount; struct list_head packet_list; spinlock_t packet_list_lock; /* Protects packet_list */ u8 next_hop[ETH_ALEN]; @@ -1225,7 +1230,7 @@ struct batadv_dat_entry { unsigned short vid; unsigned long last_update; struct hlist_node hash_entry; - atomic_t refcount; + struct kref refcount; struct rcu_head rcu; }; @@ -1261,7 +1266,7 @@ struct batadv_dat_candidate { struct batadv_tvlv_container { struct hlist_node list; struct batadv_tvlv_hdr tvlv_hdr; - atomic_t refcount; + struct kref refcount; }; /** @@ -1288,7 +1293,7 @@ struct batadv_tvlv_handler { u8 type; u8 version; u8 flags; - atomic_t refcount; + struct kref refcount; struct rcu_head rcu; }; diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 74c278e00225..73786e2fe065 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -41,6 +41,14 @@ fail: return -EMSGSIZE; } +static void __mdb_entry_fill_flags(struct br_mdb_entry *e, unsigned char flags) +{ + e->state = flags & MDB_PG_FLAGS_PERMANENT; + e->flags = 0; + if (flags & MDB_PG_FLAGS_OFFLOAD) + e->flags |= MDB_FLAGS_OFFLOAD; +} + static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev) { @@ -80,26 +88,41 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL; pp = &p->next) { + struct nlattr *nest_ent; + struct br_mdb_entry e; + port = p->port; - if (port) { - struct br_mdb_entry e; - memset(&e, 0, sizeof(e)); - e.ifindex = port->dev->ifindex; - e.state = p->state; - e.vid = p->addr.vid; - if (p->addr.proto == htons(ETH_P_IP)) - e.addr.u.ip4 = p->addr.u.ip4; + if (!port) + continue; + + memset(&e, 0, sizeof(e)); + e.ifindex = port->dev->ifindex; + e.vid = p->addr.vid; + __mdb_entry_fill_flags(&e, p->flags); + if (p->addr.proto == htons(ETH_P_IP)) + e.addr.u.ip4 = p->addr.u.ip4; #if IS_ENABLED(CONFIG_IPV6) - if (p->addr.proto == htons(ETH_P_IPV6)) - e.addr.u.ip6 = p->addr.u.ip6; + if (p->addr.proto == htons(ETH_P_IPV6)) + e.addr.u.ip6 = p->addr.u.ip6; #endif - e.addr.proto = p->addr.proto; - if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(e), &e)) { - nla_nest_cancel(skb, nest2); - err = -EMSGSIZE; - goto out; - } + e.addr.proto = p->addr.proto; + nest_ent = nla_nest_start(skb, + MDBA_MDB_ENTRY_INFO); + if (!nest_ent) { + nla_nest_cancel(skb, nest2); + err = -EMSGSIZE; + goto out; } + if (nla_put_nohdr(skb, sizeof(e), &e) || + nla_put_u32(skb, + MDBA_MDB_EATTR_TIMER, + br_timer_value(&p->timer))) { + nla_nest_cancel(skb, nest_ent); + nla_nest_cancel(skb, nest2); + err = -EMSGSIZE; + goto out; + } + nla_nest_end(skb, nest_ent); } nla_nest_end(skb, nest2); skip: @@ -209,7 +232,7 @@ static inline size_t rtnl_mdb_nlmsg_size(void) } static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry, - int type) + int type, struct net_bridge_port_group *pg) { struct switchdev_obj_port_mdb mdb = { .obj = { @@ -232,10 +255,13 @@ static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry, #endif mdb.obj.orig_dev = port_dev; - if (port_dev && type == RTM_NEWMDB) - switchdev_port_obj_add(port_dev, &mdb.obj); - else if (port_dev && type == RTM_DELMDB) + if (port_dev && type == RTM_NEWMDB) { + err = switchdev_port_obj_add(port_dev, &mdb.obj); + if (!err && pg) + pg->flags |= MDB_PG_FLAGS_OFFLOAD; + } else if (port_dev && type == RTM_DELMDB) { switchdev_port_obj_del(port_dev, &mdb.obj); + } skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC); if (!skb) @@ -253,21 +279,21 @@ errout: rtnl_set_sk_err(net, RTNLGRP_MDB, err); } -void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, - struct br_ip *group, int type, u8 state) +void br_mdb_notify(struct net_device *dev, struct net_bridge_port_group *pg, + int type) { struct br_mdb_entry entry; memset(&entry, 0, sizeof(entry)); - entry.ifindex = port->dev->ifindex; - entry.addr.proto = group->proto; - entry.addr.u.ip4 = group->u.ip4; + entry.ifindex = pg->port->dev->ifindex; + entry.addr.proto = pg->addr.proto; + entry.addr.u.ip4 = pg->addr.u.ip4; #if IS_ENABLED(CONFIG_IPV6) - entry.addr.u.ip6 = group->u.ip6; + entry.addr.u.ip6 = pg->addr.u.ip6; #endif - entry.state = state; - entry.vid = group->vid; - __br_mdb_notify(dev, &entry, type); + entry.vid = pg->addr.vid; + __mdb_entry_fill_flags(&entry, pg->flags); + __br_mdb_notify(dev, &entry, type, pg); } static int nlmsg_populate_rtr_fill(struct sk_buff *skb, @@ -412,7 +438,8 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, } static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, - struct br_ip *group, unsigned char state) + struct br_ip *group, unsigned char state, + struct net_bridge_port_group **pg) { struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; @@ -443,6 +470,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, if (unlikely(!p)) return -ENOMEM; rcu_assign_pointer(*pp, p); + *pg = p; if (state == MDB_TEMPORARY) mod_timer(&p->timer, now + br->multicast_membership_interval); @@ -450,7 +478,8 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, } static int __br_mdb_add(struct net *net, struct net_bridge *br, - struct br_mdb_entry *entry) + struct br_mdb_entry *entry, + struct net_bridge_port_group **pg) { struct br_ip ip; struct net_device *dev; @@ -479,7 +508,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, #endif spin_lock_bh(&br->multicast_lock); - ret = br_mdb_add_group(br, p, &ip, entry->state); + ret = br_mdb_add_group(br, p, &ip, entry->state, pg); spin_unlock_bh(&br->multicast_lock); return ret; } @@ -487,6 +516,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); + struct net_bridge_port_group *pg; struct net_bridge_vlan_group *vg; struct net_device *dev, *pdev; struct br_mdb_entry *entry; @@ -516,15 +546,15 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) if (br_vlan_enabled(br) && vg && entry->vid == 0) { list_for_each_entry(v, &vg->vlan_list, vlist) { entry->vid = v->vid; - err = __br_mdb_add(net, br, entry); + err = __br_mdb_add(net, br, entry, &pg); if (err) break; - __br_mdb_notify(dev, entry, RTM_NEWMDB); + __br_mdb_notify(dev, entry, RTM_NEWMDB, pg); } } else { - err = __br_mdb_add(net, br, entry); + err = __br_mdb_add(net, br, entry, &pg); if (!err) - __br_mdb_notify(dev, entry, RTM_NEWMDB); + __br_mdb_notify(dev, entry, RTM_NEWMDB, pg); } return err; @@ -568,7 +598,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) if (p->port->state == BR_STATE_DISABLED) goto unlock; - entry->state = p->state; + __mdb_entry_fill_flags(entry, p->flags); rcu_assign_pointer(*pp, p->next); hlist_del_init(&p->mglist); del_timer(&p->timer); @@ -620,12 +650,12 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) entry->vid = v->vid; err = __br_mdb_del(br, entry); if (!err) - __br_mdb_notify(dev, entry, RTM_DELMDB); + __br_mdb_notify(dev, entry, RTM_DELMDB, NULL); } } else { err = __br_mdb_del(br, entry); if (!err) - __br_mdb_notify(dev, entry, RTM_DELMDB); + __br_mdb_notify(dev, entry, RTM_DELMDB, NULL); } return err; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 03661d97463c..8b6e4249be1b 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -283,8 +283,7 @@ static void br_multicast_del_pg(struct net_bridge *br, rcu_assign_pointer(*pp, p->next); hlist_del_init(&p->mglist); del_timer(&p->timer); - br_mdb_notify(br->dev, p->port, &pg->addr, RTM_DELMDB, - p->state); + br_mdb_notify(br->dev, p, RTM_DELMDB); call_rcu_bh(&p->rcu, br_multicast_free_pg); if (!mp->ports && !mp->mglist && @@ -304,7 +303,7 @@ static void br_multicast_port_group_expired(unsigned long data) spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || timer_pending(&pg->timer) || - hlist_unhashed(&pg->mglist) || pg->state & MDB_PERMANENT) + hlist_unhashed(&pg->mglist) || pg->flags & MDB_PG_FLAGS_PERMANENT) goto out; br_multicast_del_pg(br, pg); @@ -649,7 +648,7 @@ struct net_bridge_port_group *br_multicast_new_port_group( struct net_bridge_port *port, struct br_ip *group, struct net_bridge_port_group __rcu *next, - unsigned char state) + unsigned char flags) { struct net_bridge_port_group *p; @@ -659,7 +658,7 @@ struct net_bridge_port_group *br_multicast_new_port_group( p->addr = *group; p->port = port; - p->state = state; + p->flags = flags; rcu_assign_pointer(p->next, next); hlist_add_head(&p->mglist, &port->mglist); setup_timer(&p->timer, br_multicast_port_group_expired, @@ -702,11 +701,11 @@ static int br_multicast_add_group(struct net_bridge *br, break; } - p = br_multicast_new_port_group(port, group, *pp, MDB_TEMPORARY); + p = br_multicast_new_port_group(port, group, *pp, 0); if (unlikely(!p)) goto err; rcu_assign_pointer(*pp, p); - br_mdb_notify(br->dev, port, group, RTM_NEWMDB, MDB_TEMPORARY); + br_mdb_notify(br->dev, p, RTM_NEWMDB); found: mod_timer(&p->timer, now + br->multicast_membership_interval); @@ -975,7 +974,7 @@ void br_multicast_disable_port(struct net_bridge_port *port) spin_lock(&br->multicast_lock); hlist_for_each_entry_safe(pg, n, &port->mglist, mglist) - if (pg->state == MDB_TEMPORARY) + if (!(pg->flags & MDB_PG_FLAGS_PERMANENT)) br_multicast_del_pg(br, pg); if (!hlist_unhashed(&port->rlist)) { @@ -1453,8 +1452,7 @@ br_multicast_leave_group(struct net_bridge *br, hlist_del_init(&p->mglist); del_timer(&p->timer); call_rcu_bh(&p->rcu, br_multicast_free_pg); - br_mdb_notify(br->dev, port, group, RTM_DELMDB, - p->state); + br_mdb_notify(br->dev, p, RTM_DELMDB); if (!mp->ports && !mp->mglist && netif_running(br->dev)) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 40197ff8918a..e9c635eae24d 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -598,7 +598,6 @@ static int br_set_port_state(struct net_bridge_port *p, u8 state) return -ENETDOWN; br_set_state(p, state); - br_log_state(p); br_port_state_selection(p->br); return 0; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 216018c76018..1b5d145dfcbf 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -150,6 +150,9 @@ struct net_bridge_fdb_entry struct rcu_head rcu; }; +#define MDB_PG_FLAGS_PERMANENT BIT(0) +#define MDB_PG_FLAGS_OFFLOAD BIT(1) + struct net_bridge_port_group { struct net_bridge_port *port; struct net_bridge_port_group __rcu *next; @@ -157,7 +160,7 @@ struct net_bridge_port_group { struct rcu_head rcu; struct timer_list timer; struct br_ip addr; - unsigned char state; + unsigned char flags; }; struct net_bridge_mdb_entry @@ -554,11 +557,11 @@ void br_multicast_free_pg(struct rcu_head *head); struct net_bridge_port_group * br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group, struct net_bridge_port_group __rcu *next, - unsigned char state); + unsigned char flags); void br_mdb_init(void); void br_mdb_uninit(void); -void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, - struct br_ip *group, int type, u8 state); +void br_mdb_notify(struct net_device *dev, struct net_bridge_port_group *pg, + int type); void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port, int type); @@ -897,7 +900,6 @@ static inline void br_nf_core_fini(void) {} #endif /* br_stp.c */ -void br_log_state(const struct net_bridge_port *p); void br_set_state(struct net_bridge_port *p, unsigned int state); struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no); void br_init_port(struct net_bridge_port *p); diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index b3cca126b103..c22816a0b1b1 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -30,13 +30,6 @@ static const char *const br_port_state_names[] = { [BR_STATE_BLOCKING] = "blocking", }; -void br_log_state(const struct net_bridge_port *p) -{ - br_info(p->br, "port %u(%s) entered %s state\n", - (unsigned int) p->port_no, p->dev->name, - br_port_state_names[p->state]); -} - void br_set_state(struct net_bridge_port *p, unsigned int state) { struct switchdev_attr attr = { @@ -52,6 +45,10 @@ void br_set_state(struct net_bridge_port *p, unsigned int state) if (err && err != -EOPNOTSUPP) br_warn(p->br, "error setting offload STP state on port %u(%s)\n", (unsigned int) p->port_no, p->dev->name); + else + br_info(p->br, "port %u(%s) entered %s state\n", + (unsigned int) p->port_no, p->dev->name, + br_port_state_names[p->state]); } /* called under bridge lock */ @@ -126,7 +123,6 @@ static void br_root_port_block(const struct net_bridge *br, (unsigned int) p->port_no, p->dev->name); br_set_state(p, BR_STATE_LISTENING); - br_log_state(p); br_ifinfo_notify(RTM_NEWLINK, p); if (br->forward_delay > 0) @@ -407,7 +403,6 @@ static void br_make_blocking(struct net_bridge_port *p) br_topology_change_detection(p->br); br_set_state(p, BR_STATE_BLOCKING); - br_log_state(p); br_ifinfo_notify(RTM_NEWLINK, p); del_timer(&p->forward_delay_timer); @@ -431,7 +426,6 @@ static void br_make_forwarding(struct net_bridge_port *p) else br_set_state(p, BR_STATE_LEARNING); - br_log_state(p); br_ifinfo_notify(RTM_NEWLINK, p); if (br->forward_delay != 0) diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index a31ac6ad76a2..984d46263007 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -102,7 +102,6 @@ void br_stp_enable_port(struct net_bridge_port *p) { br_init_port(p); br_port_state_selection(p->br); - br_log_state(p); br_ifinfo_notify(RTM_NEWLINK, p); } @@ -118,7 +117,6 @@ void br_stp_disable_port(struct net_bridge_port *p) p->topology_change_ack = 0; p->config_pending = 0; - br_log_state(p); br_ifinfo_notify(RTM_NEWLINK, p); del_timer(&p->message_age_timer); diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index 5f0f5af0ec35..da058b85aa22 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -98,7 +98,6 @@ static void br_forward_delay_timer_expired(unsigned long arg) br_topology_change_detection(br); netif_carrier_on(br->dev); } - br_log_state(p); rcu_read_lock(); br_ifinfo_notify(RTM_NEWLINK, p); rcu_read_unlock(); diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 85e43af4af7a..9309bb4f2a5b 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -955,6 +955,13 @@ err_rhtbl: */ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) { + struct switchdev_obj_port_vlan v = { + .obj.orig_dev = port->dev, + .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, + .flags = flags, + .vid_begin = vid, + .vid_end = vid, + }; struct net_bridge_vlan *vlan; int ret; @@ -962,6 +969,10 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) vlan = br_vlan_find(nbp_vlan_group(port), vid); if (vlan) { + /* Pass the flags to the hardware bridge */ + ret = switchdev_port_obj_add(port->dev, &v.obj); + if (ret && ret != -EOPNOTSUPP) + return ret; __vlan_add_flags(vlan, flags); return 0; } diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index fdba3d9fbff3..adc8d7221dbb 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -48,6 +48,7 @@ static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, struct iphdr *niph; const struct tcphdr *oth; struct tcphdr _oth; + struct net *net = sock_net(oldskb->sk); if (!nft_bridge_iphdr_validate(oldskb)) return; @@ -63,9 +64,9 @@ static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, skb_reserve(nskb, LL_MAX_HEADER); niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, - sysctl_ip_default_ttl); + net->ipv4.sysctl_ip_default_ttl); nf_reject_ip_tcphdr_put(nskb, oldskb, oth); - niph->ttl = sysctl_ip_default_ttl; + niph->ttl = net->ipv4.sysctl_ip_default_ttl; niph->tot_len = htons(nskb->len); ip_send_check(niph); @@ -85,6 +86,7 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, void *payload; __wsum csum; u8 proto; + struct net *net = sock_net(oldskb->sk); if (oldskb->csum_bad || !nft_bridge_iphdr_validate(oldskb)) return; @@ -119,7 +121,7 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, skb_reserve(nskb, LL_MAX_HEADER); niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP, - sysctl_ip_default_ttl); + net->ipv4.sysctl_ip_default_ttl); skb_reset_transport_header(nskb); icmph = (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr)); diff --git a/net/core/Makefile b/net/core/Makefile index 0b835de04de3..7a8fb8aef992 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -24,3 +24,4 @@ obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o obj-$(CONFIG_LWTUNNEL) += lwtunnel.o +obj-$(CONFIG_DST_CACHE) += dst_cache.o diff --git a/net/core/dev.c b/net/core/dev.c index 0ef061b2badc..edb7179bc051 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3829,8 +3829,14 @@ static void net_tx_action(struct softirq_action *h) trace_consume_skb(skb); else trace_kfree_skb(skb, net_tx_action); - __kfree_skb(skb); + + if (skb->fclone != SKB_FCLONE_UNAVAILABLE) + __kfree_skb(skb); + else + __kfree_skb_defer(skb); } + + __kfree_skb_flush(); } if (sd->output_queue) { @@ -4154,7 +4160,10 @@ ncls: ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } else { drop: - atomic_long_inc(&skb->dev->rx_dropped); + if (!deliver_exact) + atomic_long_inc(&skb->dev->rx_dropped); + else + atomic_long_inc(&skb->dev->rx_nohandler); kfree_skb(skb); /* Jamal, now you will not able to escape explaining * me how you were going to use this. :-) @@ -5152,6 +5161,7 @@ static void net_rx_action(struct softirq_action *h) } } + __kfree_skb_flush(); local_irq_disable(); list_splice_tail_init(&sd->poll_list, &list); @@ -7253,24 +7263,31 @@ void netdev_run_todo(void) } } -/* Convert net_device_stats to rtnl_link_stats64. They have the same - * fields in the same order, with only the type differing. +/* Convert net_device_stats to rtnl_link_stats64. rtnl_link_stats64 has + * all the same fields in the same order as net_device_stats, with only + * the type differing, but rtnl_link_stats64 may have additional fields + * at the end for newer counters. */ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, const struct net_device_stats *netdev_stats) { #if BITS_PER_LONG == 64 - BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats)); + BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats)); memcpy(stats64, netdev_stats, sizeof(*stats64)); + /* zero out counters that only exist in rtnl_link_stats64 */ + memset((char *)stats64 + sizeof(*netdev_stats), 0, + sizeof(*stats64) - sizeof(*netdev_stats)); #else - size_t i, n = sizeof(*stats64) / sizeof(u64); + size_t i, n = sizeof(*netdev_stats) / sizeof(unsigned long); const unsigned long *src = (const unsigned long *)netdev_stats; u64 *dst = (u64 *)stats64; - BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) != - sizeof(*stats64) / sizeof(u64)); + BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64)); for (i = 0; i < n; i++) dst[i] = src[i]; + /* zero out counters that only exist in rtnl_link_stats64 */ + memset((char *)stats64 + n * sizeof(u64), 0, + sizeof(*stats64) - n * sizeof(u64)); #endif } EXPORT_SYMBOL(netdev_stats_to_stats64); @@ -7300,6 +7317,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, } storage->rx_dropped += atomic_long_read(&dev->rx_dropped); storage->tx_dropped += atomic_long_read(&dev->tx_dropped); + storage->rx_nohandler += atomic_long_read(&dev->rx_nohandler); return storage; } EXPORT_SYMBOL(dev_get_stats); diff --git a/net/core/dst.c b/net/core/dst.c index a1656e3b8d72..b5cbbe07f786 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -265,7 +265,7 @@ again: lwtstate_put(dst->lwtstate); if (dst->flags & DST_METADATA) - kfree(dst); + metadata_dst_free((struct metadata_dst *)dst); else kmem_cache_free(dst->ops->kmem_cachep, dst); @@ -395,6 +395,14 @@ struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags) } EXPORT_SYMBOL_GPL(metadata_dst_alloc); +void metadata_dst_free(struct metadata_dst *md_dst) +{ +#ifdef CONFIG_DST_CACHE + dst_cache_destroy(&md_dst->u.tun_info.dst_cache); +#endif + kfree(md_dst); +} + struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags) { int cpu; diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c new file mode 100644 index 000000000000..3938f3f38d69 --- /dev/null +++ b/net/core/dst_cache.c @@ -0,0 +1,168 @@ +/* + * net/core/dst_cache.c - dst entry cache + * + * Copyright (c) 2016 Paolo Abeni <pabeni@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/percpu.h> +#include <net/dst_cache.h> +#include <net/route.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ip6_fib.h> +#endif +#include <uapi/linux/in.h> + +struct dst_cache_pcpu { + unsigned long refresh_ts; + struct dst_entry *dst; + u32 cookie; + union { + struct in_addr in_saddr; + struct in6_addr in6_saddr; + }; +}; + +void dst_cache_per_cpu_dst_set(struct dst_cache_pcpu *dst_cache, + struct dst_entry *dst, u32 cookie) +{ + dst_release(dst_cache->dst); + if (dst) + dst_hold(dst); + + dst_cache->cookie = cookie; + dst_cache->dst = dst; +} + +struct dst_entry *dst_cache_per_cpu_get(struct dst_cache *dst_cache, + struct dst_cache_pcpu *idst) +{ + struct dst_entry *dst; + + dst = idst->dst; + if (!dst) + goto fail; + + /* the cache already hold a dst reference; it can't go away */ + dst_hold(dst); + + if (unlikely(!time_after(idst->refresh_ts, dst_cache->reset_ts) || + (dst->obsolete && !dst->ops->check(dst, idst->cookie)))) { + dst_cache_per_cpu_dst_set(idst, NULL, 0); + dst_release(dst); + goto fail; + } + return dst; + +fail: + idst->refresh_ts = jiffies; + return NULL; +} + +struct dst_entry *dst_cache_get(struct dst_cache *dst_cache) +{ + if (!dst_cache->cache) + return NULL; + + return dst_cache_per_cpu_get(dst_cache, this_cpu_ptr(dst_cache->cache)); +} +EXPORT_SYMBOL_GPL(dst_cache_get); + +struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr) +{ + struct dst_cache_pcpu *idst; + struct dst_entry *dst; + + if (!dst_cache->cache) + return NULL; + + idst = this_cpu_ptr(dst_cache->cache); + dst = dst_cache_per_cpu_get(dst_cache, idst); + if (!dst) + return NULL; + + *saddr = idst->in_saddr.s_addr; + return container_of(dst, struct rtable, dst); +} +EXPORT_SYMBOL_GPL(dst_cache_get_ip4); + +void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst, + __be32 saddr) +{ + struct dst_cache_pcpu *idst; + + if (!dst_cache->cache) + return; + + idst = this_cpu_ptr(dst_cache->cache); + dst_cache_per_cpu_dst_set(idst, dst, 0); + idst->in_saddr.s_addr = saddr; +} +EXPORT_SYMBOL_GPL(dst_cache_set_ip4); + +#if IS_ENABLED(CONFIG_IPV6) +void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst, + const struct in6_addr *addr) +{ + struct dst_cache_pcpu *idst; + + if (!dst_cache->cache) + return; + + idst = this_cpu_ptr(dst_cache->cache); + dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst, + rt6_get_cookie((struct rt6_info *)dst)); + idst->in6_saddr = *addr; +} +EXPORT_SYMBOL_GPL(dst_cache_set_ip6); + +struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache, + struct in6_addr *saddr) +{ + struct dst_cache_pcpu *idst; + struct dst_entry *dst; + + if (!dst_cache->cache) + return NULL; + + idst = this_cpu_ptr(dst_cache->cache); + dst = dst_cache_per_cpu_get(dst_cache, idst); + if (!dst) + return NULL; + + *saddr = idst->in6_saddr; + return dst; +} +EXPORT_SYMBOL_GPL(dst_cache_get_ip6); +#endif + +int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp) +{ + dst_cache->cache = alloc_percpu_gfp(struct dst_cache_pcpu, + gfp | __GFP_ZERO); + if (!dst_cache->cache) + return -ENOMEM; + + dst_cache_reset(dst_cache); + return 0; +} +EXPORT_SYMBOL_GPL(dst_cache_init); + +void dst_cache_destroy(struct dst_cache *dst_cache) +{ + int i; + + if (!dst_cache->cache) + return; + + for_each_possible_cpu(i) + dst_release(per_cpu_ptr(dst_cache->cache, i)->dst); + + free_percpu(dst_cache->cache); +} +EXPORT_SYMBOL_GPL(dst_cache_destroy); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index daf04709dd3c..2406101002b1 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -98,6 +98,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_RXALL_BIT] = "rx-all", [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", [NETIF_F_BUSY_POLL_BIT] = "busy-poll", + [NETIF_F_HW_TC_BIT] = "hw-tc-offload", }; static const char @@ -632,7 +633,7 @@ static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr, return 0; } -u8 netdev_rss_key[NETDEV_RSS_KEY_LEN]; +u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly; void netdev_rss_key_fill(void *buffer, size_t len) { @@ -642,6 +643,37 @@ void netdev_rss_key_fill(void *buffer, size_t len) } EXPORT_SYMBOL(netdev_rss_key_fill); +static int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max) +{ + u32 dev_size, current_max = 0; + u32 *indir; + int ret; + + if (!dev->ethtool_ops->get_rxfh_indir_size || + !dev->ethtool_ops->get_rxfh) + return -EOPNOTSUPP; + dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); + if (dev_size == 0) + return -EOPNOTSUPP; + + indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER); + if (!indir) + return -ENOMEM; + + ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL); + if (ret) + goto out; + + while (dev_size--) + current_max = max(current_max, indir[dev_size]); + + *max = current_max; + +out: + kfree(indir); + return ret; +} + static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, void __user *useraddr) { @@ -738,6 +770,14 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, } ret = ops->set_rxfh(dev, indir, NULL, ETH_RSS_HASH_NO_CHANGE); + if (ret) + goto out; + + /* indicate whether rxfh was set to default */ + if (user_size == 0) + dev->priv_flags &= ~IFF_RXFH_CONFIGURED; + else + dev->priv_flags |= IFF_RXFH_CONFIGURED; out: kfree(indir); @@ -897,6 +937,14 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc); + if (ret) + goto out; + + /* indicate whether rxfh was set to default */ + if (rxfh.indir_size == 0) + dev->priv_flags &= ~IFF_RXFH_CONFIGURED; + else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) + dev->priv_flags |= IFF_RXFH_CONFIGURED; out: kfree(rss_config); @@ -1227,14 +1275,31 @@ static noinline_for_stack int ethtool_get_channels(struct net_device *dev, static noinline_for_stack int ethtool_set_channels(struct net_device *dev, void __user *useraddr) { - struct ethtool_channels channels; + struct ethtool_channels channels, max; + u32 max_rx_in_use = 0; - if (!dev->ethtool_ops->set_channels) + if (!dev->ethtool_ops->set_channels || !dev->ethtool_ops->get_channels) return -EOPNOTSUPP; if (copy_from_user(&channels, useraddr, sizeof(channels))) return -EFAULT; + dev->ethtool_ops->get_channels(dev, &max); + + /* ensure new counts are within the maximums */ + if ((channels.rx_count > max.max_rx) || + (channels.tx_count > max.max_tx) || + (channels.combined_count > max.max_combined) || + (channels.other_count > max.max_other)) + return -EINVAL; + + /* ensure the new Rx count fits within the configured Rx flow + * indirection table settings */ + if (netif_is_rxfh_configured(dev) && + !ethtool_get_max_rxfh_channel(dev, &max_rx_in_use) && + (channels.combined_count + channels.rx_count) <= max_rx_in_use) + return -EINVAL; + return dev->ethtool_ops->set_channels(dev, &channels); } @@ -1823,13 +1888,121 @@ out: return ret; } +static int ethtool_get_per_queue_coalesce(struct net_device *dev, + void __user *useraddr, + struct ethtool_per_queue_op *per_queue_opt) +{ + u32 bit; + int ret; + DECLARE_BITMAP(queue_mask, MAX_NUM_QUEUE); + + if (!dev->ethtool_ops->get_per_queue_coalesce) + return -EOPNOTSUPP; + + useraddr += sizeof(*per_queue_opt); + + bitmap_from_u32array(queue_mask, + MAX_NUM_QUEUE, + per_queue_opt->queue_mask, + DIV_ROUND_UP(MAX_NUM_QUEUE, 32)); + + for_each_set_bit(bit, queue_mask, MAX_NUM_QUEUE) { + struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE }; + + ret = dev->ethtool_ops->get_per_queue_coalesce(dev, bit, &coalesce); + if (ret != 0) + return ret; + if (copy_to_user(useraddr, &coalesce, sizeof(coalesce))) + return -EFAULT; + useraddr += sizeof(coalesce); + } + + return 0; +} + +static int ethtool_set_per_queue_coalesce(struct net_device *dev, + void __user *useraddr, + struct ethtool_per_queue_op *per_queue_opt) +{ + u32 bit; + int i, ret = 0; + int n_queue; + struct ethtool_coalesce *backup = NULL, *tmp = NULL; + DECLARE_BITMAP(queue_mask, MAX_NUM_QUEUE); + + if ((!dev->ethtool_ops->set_per_queue_coalesce) || + (!dev->ethtool_ops->get_per_queue_coalesce)) + return -EOPNOTSUPP; + + useraddr += sizeof(*per_queue_opt); + + bitmap_from_u32array(queue_mask, + MAX_NUM_QUEUE, + per_queue_opt->queue_mask, + DIV_ROUND_UP(MAX_NUM_QUEUE, 32)); + n_queue = bitmap_weight(queue_mask, MAX_NUM_QUEUE); + tmp = backup = kmalloc_array(n_queue, sizeof(*backup), GFP_KERNEL); + if (!backup) + return -ENOMEM; + + for_each_set_bit(bit, queue_mask, MAX_NUM_QUEUE) { + struct ethtool_coalesce coalesce; + + ret = dev->ethtool_ops->get_per_queue_coalesce(dev, bit, tmp); + if (ret != 0) + goto roll_back; + + tmp++; + + if (copy_from_user(&coalesce, useraddr, sizeof(coalesce))) { + ret = -EFAULT; + goto roll_back; + } + + ret = dev->ethtool_ops->set_per_queue_coalesce(dev, bit, &coalesce); + if (ret != 0) + goto roll_back; + + useraddr += sizeof(coalesce); + } + +roll_back: + if (ret != 0) { + tmp = backup; + for_each_set_bit(i, queue_mask, bit) { + dev->ethtool_ops->set_per_queue_coalesce(dev, i, tmp); + tmp++; + } + } + kfree(backup); + + return ret; +} + +static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_per_queue_op per_queue_opt; + + if (copy_from_user(&per_queue_opt, useraddr, sizeof(per_queue_opt))) + return -EFAULT; + + switch (per_queue_opt.sub_command) { + case ETHTOOL_GCOALESCE: + return ethtool_get_per_queue_coalesce(dev, useraddr, &per_queue_opt); + case ETHTOOL_SCOALESCE: + return ethtool_set_per_queue_coalesce(dev, useraddr, &per_queue_opt); + default: + return -EOPNOTSUPP; + }; +} + /* The main entry point in this file. Called from net/core/dev_ioctl.c */ int dev_ethtool(struct net *net, struct ifreq *ifr) { struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); void __user *useraddr = ifr->ifr_data; - u32 ethcmd; + u32 ethcmd, sub_cmd; int rc; netdev_features_t old_features; @@ -1839,8 +2012,14 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) if (copy_from_user(ðcmd, useraddr, sizeof(ethcmd))) return -EFAULT; + if (ethcmd == ETHTOOL_PERQUEUE) { + if (copy_from_user(&sub_cmd, useraddr + sizeof(ethcmd), sizeof(sub_cmd))) + return -EFAULT; + } else { + sub_cmd = ethcmd; + } /* Allow some commands to be done by anyone */ - switch (ethcmd) { + switch (sub_cmd) { case ETHTOOL_GSET: case ETHTOOL_GDRVINFO: case ETHTOOL_GMSGLVL: @@ -2070,6 +2249,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GPHYSTATS: rc = ethtool_get_phy_stats(dev, useraddr); break; + case ETHTOOL_PERQUEUE: + rc = ethtool_set_per_queue(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } diff --git a/net/core/filter.c b/net/core/filter.c index 94d26201080d..a3aba15a8025 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -530,12 +530,14 @@ do_pass: *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP); break; - /* RET_K, RET_A are remaped into 2 insns. */ + /* RET_K is remaped into 2 insns. RET_A case doesn't need an + * extra mov as BPF_REG_0 is already mapped into BPF_REG_A. + */ case BPF_RET | BPF_A: case BPF_RET | BPF_K: - *insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ? - BPF_K : BPF_X, BPF_REG_0, - BPF_REG_A, fp->k); + if (BPF_RVAL(fp->code) == BPF_K) + *insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0, + 0, fp->k); *insn = BPF_EXIT_INSN(); break; @@ -1181,7 +1183,7 @@ static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk) if (bpf_prog_size(prog->len) > sysctl_optmem_max) return -ENOMEM; - if (sk_unhashed(sk)) { + if (sk_unhashed(sk) && sk->sk_reuseport) { err = reuseport_alloc(sk); if (err) return err; @@ -1333,15 +1335,22 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk) return 0; } -#define BPF_LDST_LEN 16U +struct bpf_scratchpad { + union { + __be32 diff[MAX_BPF_STACK / sizeof(__be32)]; + u8 buff[MAX_BPF_STACK]; + }; +}; + +static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp); static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) { + struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp); struct sk_buff *skb = (struct sk_buff *) (long) r1; int offset = (int) r2; void *from = (void *) (long) r3; unsigned int len = (unsigned int) r4; - char buf[BPF_LDST_LEN]; void *ptr; if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM))) @@ -1355,14 +1364,12 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) * * so check for invalid 'offset' and too large 'len' */ - if (unlikely((u32) offset > 0xffff || len > sizeof(buf))) + if (unlikely((u32) offset > 0xffff || len > sizeof(sp->buff))) return -EFAULT; - - if (unlikely(skb_cloned(skb) && - !skb_clone_writable(skb, offset + len))) + if (unlikely(skb_try_make_writable(skb, offset + len))) return -EFAULT; - ptr = skb_header_pointer(skb, offset, len, buf); + ptr = skb_header_pointer(skb, offset, len, sp->buff); if (unlikely(!ptr)) return -EFAULT; @@ -1371,7 +1378,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) memcpy(ptr, from, len); - if (ptr == buf) + if (ptr == sp->buff) /* skb_store_bits cannot return -EFAULT here */ skb_store_bits(skb, offset, ptr, len); @@ -1400,7 +1407,7 @@ static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) unsigned int len = (unsigned int) r4; void *ptr; - if (unlikely((u32) offset > 0xffff || len > BPF_LDST_LEN)) + if (unlikely((u32) offset > 0xffff || len > MAX_BPF_STACK)) return -EFAULT; ptr = skb_header_pointer(skb, offset, len, to); @@ -1432,9 +1439,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) return -EINVAL; if (unlikely((u32) offset > 0xffff)) return -EFAULT; - - if (unlikely(skb_cloned(skb) && - !skb_clone_writable(skb, offset + sizeof(sum)))) + if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum)))) return -EFAULT; ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); @@ -1474,23 +1479,31 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) { struct sk_buff *skb = (struct sk_buff *) (long) r1; bool is_pseudo = flags & BPF_F_PSEUDO_HDR; + bool is_mmzero = flags & BPF_F_MARK_MANGLED_0; int offset = (int) r2; __sum16 sum, *ptr; - if (unlikely(flags & ~(BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK))) + if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR | + BPF_F_HDR_FIELD_MASK))) return -EINVAL; if (unlikely((u32) offset > 0xffff)) return -EFAULT; - - if (unlikely(skb_cloned(skb) && - !skb_clone_writable(skb, offset + sizeof(sum)))) + if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum)))) return -EFAULT; ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); if (unlikely(!ptr)) return -EFAULT; + if (is_mmzero && !*ptr) + return 0; switch (flags & BPF_F_HDR_FIELD_MASK) { + case 0: + if (unlikely(from != 0)) + return -EINVAL; + + inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo); + break; case 2: inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo); break; @@ -1501,6 +1514,8 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) return -EINVAL; } + if (is_mmzero && !*ptr) + *ptr = CSUM_MANGLED_0; if (ptr == &sum) /* skb_store_bits guaranteed to not return -EFAULT here */ skb_store_bits(skb, offset, ptr, sizeof(sum)); @@ -1519,6 +1534,45 @@ const struct bpf_func_proto bpf_l4_csum_replace_proto = { .arg5_type = ARG_ANYTHING, }; +static u64 bpf_csum_diff(u64 r1, u64 from_size, u64 r3, u64 to_size, u64 seed) +{ + struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp); + u64 diff_size = from_size + to_size; + __be32 *from = (__be32 *) (long) r1; + __be32 *to = (__be32 *) (long) r3; + int i, j = 0; + + /* This is quite flexible, some examples: + * + * from_size == 0, to_size > 0, seed := csum --> pushing data + * from_size > 0, to_size == 0, seed := csum --> pulling data + * from_size > 0, to_size > 0, seed := 0 --> diffing data + * + * Even for diffing, from_size and to_size don't need to be equal. + */ + if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) || + diff_size > sizeof(sp->diff))) + return -EINVAL; + + for (i = 0; i < from_size / sizeof(__be32); i++, j++) + sp->diff[j] = ~from[i]; + for (i = 0; i < to_size / sizeof(__be32); i++, j++) + sp->diff[j] = to[i]; + + return csum_partial(sp->diff, diff_size, seed); +} + +const struct bpf_func_proto bpf_csum_diff_proto = { + .func = bpf_csum_diff, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_STACK, + .arg2_type = ARG_CONST_STACK_SIZE_OR_ZERO, + .arg3_type = ARG_PTR_TO_STACK, + .arg4_type = ARG_CONST_STACK_SIZE_OR_ZERO, + .arg5_type = ARG_ANYTHING, +}; + static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) { struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2; @@ -1682,6 +1736,13 @@ bool bpf_helper_changes_skb_data(void *func) return true; if (func == bpf_skb_vlan_pop) return true; + if (func == bpf_skb_store_bytes) + return true; + if (func == bpf_l3_csum_replace) + return true; + if (func == bpf_l4_csum_replace) + return true; + return false; } @@ -1849,6 +1910,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_skb_store_bytes_proto; case BPF_FUNC_skb_load_bytes: return &bpf_skb_load_bytes_proto; + case BPF_FUNC_csum_diff: + return &bpf_csum_diff_proto; case BPF_FUNC_l3_csum_replace: return &bpf_l3_csum_replace_proto; case BPF_FUNC_l4_csum_replace: diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 299cfc24d888..669ecc9f884e 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -27,6 +27,31 @@ #include <net/rtnetlink.h> #include <net/ip6_fib.h> +#ifdef CONFIG_MODULES + +static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) +{ + /* Only lwt encaps implemented without using an interface for + * the encap need to return a string here. + */ + switch (encap_type) { + case LWTUNNEL_ENCAP_MPLS: + return "MPLS"; + case LWTUNNEL_ENCAP_ILA: + return "ILA"; + case LWTUNNEL_ENCAP_IP6: + case LWTUNNEL_ENCAP_IP: + case LWTUNNEL_ENCAP_NONE: + case __LWTUNNEL_ENCAP_MAX: + /* should not have got here */ + WARN_ON(1); + break; + } + return NULL; +} + +#endif /* CONFIG_MODULES */ + struct lwtunnel_state *lwtunnel_state_alloc(int encap_len) { struct lwtunnel_state *lws; @@ -85,6 +110,18 @@ int lwtunnel_build_state(struct net_device *dev, u16 encap_type, ret = -EOPNOTSUPP; rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[encap_type]); +#ifdef CONFIG_MODULES + if (!ops) { + const char *encap_type_str = lwtunnel_encap_str(encap_type); + + if (encap_type_str) { + rcu_read_unlock(); + request_module("rtnl-lwt-%s", encap_type_str); + rcu_read_lock(); + ops = rcu_dereference(lwtun_encaps[encap_type]); + } + } +#endif if (likely(ops && ops->build_state)) ret = ops->build_state(dev, encap, family, cfg, lws); rcu_read_unlock(); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index b6c8a6629b39..4ae17c3166fc 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -29,7 +29,6 @@ #ifdef CONFIG_SYSFS static const char fmt_hex[] = "%#x\n"; -static const char fmt_long_hex[] = "%#lx\n"; static const char fmt_dec[] = "%d\n"; static const char fmt_ulong[] = "%lu\n"; static const char fmt_u64[] = "%llu\n"; @@ -574,6 +573,7 @@ NETSTAT_ENTRY(tx_heartbeat_errors); NETSTAT_ENTRY(tx_window_errors); NETSTAT_ENTRY(rx_compressed); NETSTAT_ENTRY(tx_compressed); +NETSTAT_ENTRY(rx_nohandler); static struct attribute *netstat_attrs[] = { &dev_attr_rx_packets.attr, @@ -599,6 +599,7 @@ static struct attribute *netstat_attrs[] = { &dev_attr_tx_window_errors.attr, &dev_attr_rx_compressed.attr, &dev_attr_tx_compressed.attr, + &dev_attr_rx_nohandler.attr, NULL }; diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 0260c84ed83c..11fce17274f6 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -9,7 +9,6 @@ * Authors: Thomas Graf <tgraf@suug.ch> */ -#include <linux/module.h> #include <linux/slab.h> #include <linux/cgroup.h> #include <linux/fdtable.h> diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index f1efbc39ef6b..2ec86fc552df 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -11,7 +11,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/module.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/string.h> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d735e854f916..62737f437c8e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -804,6 +804,8 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a, a->rx_compressed = b->rx_compressed; a->tx_compressed = b->tx_compressed; + + a->rx_nohandler = b->rx_nohandler; } static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b) @@ -1412,6 +1414,58 @@ static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { [IFLA_PORT_RESPONSE] = { .type = NLA_U16, }, }; +static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla) +{ + const struct rtnl_link_ops *ops = NULL; + struct nlattr *linfo[IFLA_INFO_MAX + 1]; + + if (nla_parse_nested(linfo, IFLA_INFO_MAX, nla, ifla_info_policy) < 0) + return NULL; + + if (linfo[IFLA_INFO_KIND]) { + char kind[MODULE_NAME_LEN]; + + nla_strlcpy(kind, linfo[IFLA_INFO_KIND], sizeof(kind)); + ops = rtnl_link_ops_get(kind); + } + + return ops; +} + +static bool link_master_filtered(struct net_device *dev, int master_idx) +{ + struct net_device *master; + + if (!master_idx) + return false; + + master = netdev_master_upper_dev_get(dev); + if (!master || master->ifindex != master_idx) + return true; + + return false; +} + +static bool link_kind_filtered(const struct net_device *dev, + const struct rtnl_link_ops *kind_ops) +{ + if (kind_ops && dev->rtnl_link_ops != kind_ops) + return true; + + return false; +} + +static bool link_dump_filtered(struct net_device *dev, + int master_idx, + const struct rtnl_link_ops *kind_ops) +{ + if (link_master_filtered(dev, master_idx) || + link_kind_filtered(dev, kind_ops)) + return true; + + return false; +} + static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); @@ -1421,6 +1475,9 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) struct hlist_head *head; struct nlattr *tb[IFLA_MAX+1]; u32 ext_filter_mask = 0; + const struct rtnl_link_ops *kind_ops = NULL; + unsigned int flags = NLM_F_MULTI; + int master_idx = 0; int err; int hdrlen; @@ -1443,18 +1500,29 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); + + if (tb[IFLA_MASTER]) + master_idx = nla_get_u32(tb[IFLA_MASTER]); + + if (tb[IFLA_LINKINFO]) + kind_ops = linkinfo_to_kind_ops(tb[IFLA_LINKINFO]); + + if (master_idx || kind_ops) + flags |= NLM_F_DUMP_FILTERED; } for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; hlist_for_each_entry(dev, head, index_hlist) { + if (link_dump_filtered(dev, master_idx, kind_ops)) + continue; if (idx < s_idx) goto cont; err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 0, - NLM_F_MULTI, + flags, ext_filter_mask); /* If we ran out of room on the first message, * we're in trouble diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5bf88f58bee7..488566b09c6d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -349,8 +349,16 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) } EXPORT_SYMBOL(build_skb); +#define NAPI_SKB_CACHE_SIZE 64 + +struct napi_alloc_cache { + struct page_frag_cache page; + size_t skb_count; + void *skb_cache[NAPI_SKB_CACHE_SIZE]; +}; + static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); -static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache); +static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache); static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { @@ -380,9 +388,9 @@ EXPORT_SYMBOL(netdev_alloc_frag); static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { - struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache); + struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); - return __alloc_page_frag(nc, fragsz, gfp_mask); + return __alloc_page_frag(&nc->page, fragsz, gfp_mask); } void *napi_alloc_frag(unsigned int fragsz) @@ -476,7 +484,7 @@ EXPORT_SYMBOL(__netdev_alloc_skb); struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, gfp_t gfp_mask) { - struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache); + struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); struct sk_buff *skb; void *data; @@ -496,7 +504,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; - data = __alloc_page_frag(nc, len, gfp_mask); + data = __alloc_page_frag(&nc->page, len, gfp_mask); if (unlikely(!data)) return NULL; @@ -507,7 +515,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, } /* use OR instead of assignment to avoid clearing of bits in mask */ - if (nc->pfmemalloc) + if (nc->page.pfmemalloc) skb->pfmemalloc = 1; skb->head_frag = 1; @@ -749,6 +757,73 @@ void consume_skb(struct sk_buff *skb) } EXPORT_SYMBOL(consume_skb); +void __kfree_skb_flush(void) +{ + struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); + + /* flush skb_cache if containing objects */ + if (nc->skb_count) { + kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count, + nc->skb_cache); + nc->skb_count = 0; + } +} + +static inline void _kfree_skb_defer(struct sk_buff *skb) +{ + struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); + + /* drop skb->head and call any destructors for packet */ + skb_release_all(skb); + + /* record skb to CPU local list */ + nc->skb_cache[nc->skb_count++] = skb; + +#ifdef CONFIG_SLUB + /* SLUB writes into objects when freeing */ + prefetchw(skb); +#endif + + /* flush skb_cache if it is filled */ + if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) { + kmem_cache_free_bulk(skbuff_head_cache, NAPI_SKB_CACHE_SIZE, + nc->skb_cache); + nc->skb_count = 0; + } +} +void __kfree_skb_defer(struct sk_buff *skb) +{ + _kfree_skb_defer(skb); +} + +void napi_consume_skb(struct sk_buff *skb, int budget) +{ + if (unlikely(!skb)) + return; + + /* if budget is 0 assume netpoll w/ IRQs disabled */ + if (unlikely(!budget)) { + dev_consume_skb_irq(skb); + return; + } + + if (likely(atomic_read(&skb->users) == 1)) + smp_rmb(); + else if (likely(!atomic_dec_and_test(&skb->users))) + return; + /* if reaching here SKB is ready to free */ + trace_consume_skb(skb); + + /* if SKB is a clone, don't handle this case */ + if (unlikely(skb->fclone != SKB_FCLONE_UNAVAILABLE)) { + __kfree_skb(skb); + return; + } + + _kfree_skb_defer(skb); +} +EXPORT_SYMBOL(napi_consume_skb); + /* Make sure a field is enclosed inside headers_start/headers_end section */ #define CHECK_SKB_FIELD(field) \ BUILD_BUG_ON(offsetof(struct sk_buff, field) < \ @@ -3006,8 +3081,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, if (unlikely(!proto)) return ERR_PTR(-EINVAL); - csum = !head_skb->encap_hdr_csum && - !!can_checksum_protocol(features, proto); + csum = !!can_checksum_protocol(features, proto); headroom = skb_headroom(head_skb); pos = skb_headlen(head_skb); @@ -3100,13 +3174,15 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, if (nskb->len == len + doffset) goto perform_csum_check; - if (!sg && !nskb->remcsum_offload) { - nskb->ip_summed = CHECKSUM_NONE; - nskb->csum = skb_copy_and_csum_bits(head_skb, offset, - skb_put(nskb, len), - len, 0); + if (!sg) { + if (!nskb->remcsum_offload) + nskb->ip_summed = CHECKSUM_NONE; + SKB_GSO_CB(nskb)->csum = + skb_copy_and_csum_bits(head_skb, offset, + skb_put(nskb, len), + len, 0); SKB_GSO_CB(nskb)->csum_start = - skb_headroom(nskb) + doffset; + skb_headroom(nskb) + doffset; continue; } @@ -3172,12 +3248,19 @@ skip_fraglist: nskb->truesize += nskb->data_len; perform_csum_check: - if (!csum && !nskb->remcsum_offload) { - nskb->csum = skb_checksum(nskb, doffset, - nskb->len - doffset, 0); - nskb->ip_summed = CHECKSUM_NONE; + if (!csum) { + if (skb_has_shared_frag(nskb)) { + err = __skb_linearize(nskb); + if (err) + goto err; + } + if (!nskb->remcsum_offload) + nskb->ip_summed = CHECKSUM_NONE; + SKB_GSO_CB(nskb)->csum = + skb_checksum(nskb, doffset, + nskb->len - doffset, 0); SKB_GSO_CB(nskb)->csum_start = - skb_headroom(nskb) + doffset; + skb_headroom(nskb) + doffset; } } while ((offset += len) < head_skb->len); @@ -4415,9 +4498,7 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) skb->mac_len += VLAN_HLEN; __skb_pull(skb, offset); - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_add(skb->csum, csum_partial(skb->data - + (2 * ETH_ALEN), VLAN_HLEN, 0)); + skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); } __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); return 0; diff --git a/net/core/sock.c b/net/core/sock.c index 6c1c8bc93412..46dc8ad7d050 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1531,6 +1531,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk = NULL; goto out; } + RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); newsk->sk_err = 0; newsk->sk_priority = 0; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 902d606324a0..b5672e5fe649 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -802,7 +802,7 @@ static int dccp_v4_rcv(struct sk_buff *skb) } lookup: - sk = __inet_lookup_skb(&dccp_hashinfo, skb, + sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh), dh->dccph_sport, dh->dccph_dport); if (!sk) { dccp_pr_debug("failed to look up flow ID in table and " diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index b8608b71a66d..4663a01d5039 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -668,7 +668,7 @@ static int dccp_v6_rcv(struct sk_buff *skb) DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); lookup: - sk = __inet6_lookup_skb(&dccp_hashinfo, skb, + sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh), dh->dccph_sport, dh->dccph_dport, inet6_iif(skb)); if (!sk) { @@ -993,7 +993,7 @@ static struct proto dccp_v6_prot = { .sendmsg = dccp_sendmsg, .recvmsg = dccp_recvmsg, .backlog_rcv = dccp_v6_do_rcv, - .hash = inet_hash, + .hash = inet6_hash, .unhash = inet_unhash, .accept = inet_csk_accept, .get_port = inet_csk_get_port, diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index a548be247e15..e0bd013a1e5e 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -182,12 +182,14 @@ static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd, static HLIST_HEAD(raw_head); static DEFINE_RWLOCK(raw_lock); -static void raw_hash(struct sock *sk) +static int raw_hash(struct sock *sk) { write_lock_bh(&raw_lock); sk_add_node(sk, &raw_head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock_bh(&raw_lock); + + return 0; } static void raw_unhash(struct sock *sk) @@ -462,12 +464,14 @@ static inline struct dgram_sock *dgram_sk(const struct sock *sk) return container_of(sk, struct dgram_sock, sk); } -static void dgram_hash(struct sock *sk) +static int dgram_hash(struct sock *sk) { write_lock_bh(&dgram_lock); sk_add_node(sk, &dgram_head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock_bh(&dgram_lock); + + return 0; } static void dgram_unhash(struct sock *sk) @@ -1026,8 +1030,13 @@ static int ieee802154_create(struct net *net, struct socket *sock, /* Checksums on by default */ sock_set_flag(sk, SOCK_ZAPPED); - if (sk->sk_prot->hash) - sk->sk_prot->hash(sk); + if (sk->sk_prot->hash) { + rc = sk->sk_prot->hash(sk); + if (rc) { + sk_common_release(sk); + goto out; + } + } if (sk->sk_prot->init) { rc = sk->sk_prot->init(sk); diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 775824720b6b..238225b0c970 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -186,6 +186,7 @@ config NET_IPGRE_DEMUX config NET_IP_TUNNEL tristate + select DST_CACHE default n config NET_IPGRE @@ -405,14 +406,6 @@ config INET_XFRM_MODE_BEET If unsure, say Y. -config INET_LRO - tristate "Large Receive Offload (ipv4/tcp)" - default y - ---help--- - Support for Large Receive Offload (ipv4/tcp). - - If unsure, say Y. - config INET_DIAG tristate "INET: socket monitoring interface" default y diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 62c049b647e9..bfa133691cde 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -32,7 +32,6 @@ obj-$(CONFIG_INET_ESP) += esp4.o obj-$(CONFIG_INET_IPCOMP) += ipcomp.o obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o -obj-$(CONFIG_INET_LRO) += inet_lro.o obj-$(CONFIG_INET_TUNNEL) += tunnel4.o obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 5c5db6636704..209d1ed28954 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -370,7 +370,11 @@ lookup_protocol: */ inet->inet_sport = htons(inet->inet_num); /* Add to protocol hash chains. */ - sk->sk_prot->hash(sk); + err = sk->sk_prot->hash(sk); + if (err) { + sk_common_release(sk); + goto out; + } } if (sk->sk_prot->init) { @@ -1091,12 +1095,6 @@ void inet_unregister_protosw(struct inet_protosw *p) } EXPORT_SYMBOL(inet_unregister_protosw); -/* - * Shall we try to damage output packets if routing dev changes? - */ - -int sysctl_ip_dynaddr __read_mostly; - static int inet_sk_reselect_saddr(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); @@ -1127,7 +1125,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) if (new_saddr == old_saddr) return 0; - if (sysctl_ip_dynaddr > 1) { + if (sock_net(sk)->ipv4.sysctl_ip_dynaddr > 1) { pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n", __func__, &old_saddr, &new_saddr); } @@ -1142,8 +1140,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) * Besides that, it does not check for connection * uniqueness. Wait for troubles. */ - __sk_prot_rehash(sk); - return 0; + return __sk_prot_rehash(sk); } int inet_sk_rebuild_header(struct sock *sk) @@ -1183,7 +1180,7 @@ int inet_sk_rebuild_header(struct sock *sk) * Other protocols have to map its equivalent state to TCP_SYN_SENT. * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme */ - if (!sysctl_ip_dynaddr || + if (!sock_net(sk)->ipv4.sysctl_ip_dynaddr || sk->sk_state != TCP_SYN_SENT || (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || (err = inet_sk_reselect_saddr(sk)) != 0) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 59b3e0e8fd51..c102eb5ac55c 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -735,6 +735,14 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb) (!IN_DEV_ROUTE_LOCALNET(in_dev) && ipv4_is_loopback(tip))) goto out; + /* + * For some 802.11 wireless deployments (and possibly other networks), + * there will be an ARP proxy and gratuitous ARP frames are attacks + * and thus should not be accepted. + */ + if (sip == tip && IN_DEV_ORCONF(in_dev, DROP_GRATUITOUS_ARP)) + goto out; + /* * Special case: We must set Frame Relay source Q.922 address */ diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index f6303b17546b..29b8d3a7b19b 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2185,6 +2185,8 @@ static struct devinet_sysctl_table { "igmpv3_unsolicited_report_interval"), DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN, "ignore_routes_with_linkdown"), + DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP, + "drop_gratuitous_arp"), DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), @@ -2192,6 +2194,8 @@ static struct devinet_sysctl_table { "promote_secondaries"), DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET, "route_localnet"), + DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST, + "drop_unicast_in_l2_multicast"), }, }; diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 976f0dcf6991..88dab0c1670c 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -774,7 +774,6 @@ static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e, uh->dest = e->dport; uh->source = sport; uh->len = htons(skb->len); - uh->check = 0; udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb, fl4->saddr, fl4->daddr, skb->len); @@ -784,11 +783,11 @@ static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e, int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, u8 *protocol, struct flowi4 *fl4) { - bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM); - int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; + int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM : + SKB_GSO_UDP_TUNNEL; __be16 sport; - skb = iptunnel_handle_offloads(skb, csum, type); + skb = iptunnel_handle_offloads(skb, type); if (IS_ERR(skb)) return PTR_ERR(skb); @@ -804,8 +803,8 @@ EXPORT_SYMBOL(fou_build_header); int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, u8 *protocol, struct flowi4 *fl4) { - bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM); - int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; + int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM : + SKB_GSO_UDP_TUNNEL; struct guehdr *guehdr; size_t hdrlen, optlen = 0; __be16 sport; @@ -814,7 +813,6 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) && skb->ip_summed == CHECKSUM_PARTIAL) { - csum = false; optlen += GUE_PLEN_REMCSUM; type |= SKB_GSO_TUNNEL_REMCSUM; need_priv = true; @@ -822,7 +820,7 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, optlen += need_priv ? GUE_LEN_PRIV : 0; - skb = iptunnel_handle_offloads(skb, csum, type); + skb = iptunnel_handle_offloads(skb, type); if (IS_ERR(skb)) return PTR_ERR(skb); diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 5a8ee3282550..003b0ebbcfdd 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -18,15 +18,13 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, netdev_features_t features) { + int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); struct sk_buff *segs = ERR_PTR(-EINVAL); - netdev_features_t enc_features; - int ghl; - struct gre_base_hdr *greh; u16 mac_offset = skb->mac_header; - int mac_len = skb->mac_len; __be16 protocol = skb->protocol; - int tnl_hlen; - bool csum; + u16 mac_len = skb->mac_len; + int gre_offset, outer_hlen; + bool need_csum; if (unlikely(skb_shinfo(skb)->gso_type & ~(SKB_GSO_TCPV4 | @@ -43,74 +41,59 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, if (!skb->encapsulation) goto out; - if (unlikely(!pskb_may_pull(skb, sizeof(*greh)))) + if (unlikely(tnl_hlen < sizeof(struct gre_base_hdr))) goto out; - greh = (struct gre_base_hdr *)skb_transport_header(skb); - - ghl = skb_inner_mac_header(skb) - skb_transport_header(skb); - if (unlikely(ghl < sizeof(*greh))) + if (unlikely(!pskb_may_pull(skb, tnl_hlen))) goto out; - csum = !!(greh->flags & GRE_CSUM); - if (csum) - skb->encap_hdr_csum = 1; - /* setup inner skb. */ - skb->protocol = greh->protocol; skb->encapsulation = 0; - - if (unlikely(!pskb_may_pull(skb, ghl))) - goto out; - - __skb_pull(skb, ghl); + __skb_pull(skb, tnl_hlen); skb_reset_mac_header(skb); skb_set_network_header(skb, skb_inner_network_offset(skb)); skb->mac_len = skb_inner_network_offset(skb); + skb->protocol = skb->inner_protocol; + + need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM); + skb->encap_hdr_csum = need_csum; + + features &= skb->dev->hw_enc_features; /* segment inner packet. */ - enc_features = skb->dev->hw_enc_features & features; - segs = skb_mac_gso_segment(skb, enc_features); + segs = skb_mac_gso_segment(skb, features); if (IS_ERR_OR_NULL(segs)) { - skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len); + skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, + mac_len); goto out; } + outer_hlen = skb_tnl_header_len(skb); + gre_offset = outer_hlen - tnl_hlen; skb = segs; - tnl_hlen = skb_tnl_header_len(skb); do { - __skb_push(skb, ghl); - if (csum) { - __be32 *pcsum; - - if (skb_has_shared_frag(skb)) { - int err; - - err = __skb_linearize(skb); - if (err) { - kfree_skb_list(segs); - segs = ERR_PTR(err); - goto out; - } - } - - skb_reset_transport_header(skb); - - greh = (struct gre_base_hdr *) - skb_transport_header(skb); - pcsum = (__be32 *)(greh + 1); - *pcsum = 0; - *(__sum16 *)pcsum = gso_make_checksum(skb, 0); - } - __skb_push(skb, tnl_hlen - ghl); + struct gre_base_hdr *greh; + __be32 *pcsum; skb_reset_inner_headers(skb); skb->encapsulation = 1; - skb_reset_mac_header(skb); - skb_set_network_header(skb, mac_len); skb->mac_len = mac_len; skb->protocol = protocol; + + __skb_push(skb, outer_hlen); + skb_reset_mac_header(skb); + skb_set_network_header(skb, mac_len); + skb_set_transport_header(skb, gre_offset); + + if (!need_csum) + continue; + + greh = (struct gre_base_hdr *)skb_transport_header(skb); + pcsum = (__be32 *)(greh + 1); + + *pcsum = 0; + *(__sum16 *)pcsum = gso_make_checksum(skb, 0); } while ((skb = skb->next)); out: return segs; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 05e4cba14162..2aea9f1a2a31 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -107,12 +107,6 @@ #include <linux/seq_file.h> #endif -#define IP_MAX_MEMBERSHIPS 20 -#define IP_MAX_MSF 10 - -/* IGMP reports for link-local multicast groups are enabled by default */ -int sysctl_igmp_llm_reports __read_mostly = 1; - #ifdef CONFIG_IP_MULTICAST /* Parameter names and values are taken from igmp-v2-06 draft */ @@ -433,6 +427,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted) { struct net_device *dev = pmc->interface->dev; + struct net *net = dev_net(dev); struct igmpv3_report *pih; struct igmpv3_grec *pgr = NULL; struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list; @@ -440,7 +435,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, if (pmc->multiaddr == IGMP_ALL_HOSTS) return skb; - if (ipv4_is_local_multicast(pmc->multiaddr) && !sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) return skb; isquery = type == IGMPV3_MODE_IS_INCLUDE || @@ -543,6 +538,7 @@ empty_source: static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) { struct sk_buff *skb = NULL; + struct net *net = dev_net(in_dev->dev); int type; if (!pmc) { @@ -551,7 +547,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) if (pmc->multiaddr == IGMP_ALL_HOSTS) continue; if (ipv4_is_local_multicast(pmc->multiaddr) && - !sysctl_igmp_llm_reports) + !net->ipv4.sysctl_igmp_llm_reports) continue; spin_lock_bh(&pmc->lock); if (pmc->sfcount[MCAST_EXCLUDE]) @@ -687,7 +683,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) return igmpv3_send_report(in_dev, pmc); - if (ipv4_is_local_multicast(group) && !sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports) return 0; if (type == IGMP_HOST_LEAVE_MESSAGE) @@ -766,9 +762,10 @@ static void igmp_ifc_timer_expire(unsigned long data) static void igmp_ifc_event(struct in_device *in_dev) { + struct net *net = dev_net(in_dev->dev); if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) return; - in_dev->mr_ifc_count = in_dev->mr_qrv ?: sysctl_igmp_qrv; + in_dev->mr_ifc_count = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; igmp_ifc_start_timer(in_dev, 1); } @@ -858,12 +855,13 @@ static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) static bool igmp_heard_report(struct in_device *in_dev, __be32 group) { struct ip_mc_list *im; + struct net *net = dev_net(in_dev->dev); /* Timers are only set for non-local groups */ if (group == IGMP_ALL_HOSTS) return false; - if (ipv4_is_local_multicast(group) && !sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports) return false; rcu_read_lock(); @@ -887,6 +885,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, __be32 group = ih->group; int max_delay; int mark = 0; + struct net *net = dev_net(in_dev->dev); if (len == 8) { @@ -972,7 +971,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, if (im->multiaddr == IGMP_ALL_HOSTS) continue; if (ipv4_is_local_multicast(im->multiaddr) && - !sysctl_igmp_llm_reports) + !net->ipv4.sysctl_igmp_llm_reports) continue; spin_lock_bh(&im->lock); if (im->tm_running) @@ -1088,6 +1087,7 @@ static void ip_mc_filter_del(struct in_device *in_dev, __be32 addr) static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) { struct ip_mc_list *pmc; + struct net *net = dev_net(in_dev->dev); /* this is an "ip_mc_list" for convenience; only the fields below * are actually used. In particular, the refcnt and users are not @@ -1102,7 +1102,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) pmc->interface = im->interface; in_dev_hold(in_dev); pmc->multiaddr = im->multiaddr; - pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; + pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; pmc->sfmode = im->sfmode; if (pmc->sfmode == MCAST_INCLUDE) { struct ip_sf_list *psf; @@ -1187,6 +1187,7 @@ static void igmp_group_dropped(struct ip_mc_list *im) { struct in_device *in_dev = im->interface; #ifdef CONFIG_IP_MULTICAST + struct net *net = dev_net(in_dev->dev); int reporter; #endif @@ -1198,7 +1199,7 @@ static void igmp_group_dropped(struct ip_mc_list *im) #ifdef CONFIG_IP_MULTICAST if (im->multiaddr == IGMP_ALL_HOSTS) return; - if (ipv4_is_local_multicast(im->multiaddr) && !sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) return; reporter = im->reporter; @@ -1223,6 +1224,9 @@ static void igmp_group_dropped(struct ip_mc_list *im) static void igmp_group_added(struct ip_mc_list *im) { struct in_device *in_dev = im->interface; +#ifdef CONFIG_IP_MULTICAST + struct net *net = dev_net(in_dev->dev); +#endif if (im->loaded == 0) { im->loaded = 1; @@ -1232,7 +1236,7 @@ static void igmp_group_added(struct ip_mc_list *im) #ifdef CONFIG_IP_MULTICAST if (im->multiaddr == IGMP_ALL_HOSTS) return; - if (ipv4_is_local_multicast(im->multiaddr) && !sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) return; if (in_dev->dead) @@ -1245,7 +1249,7 @@ static void igmp_group_added(struct ip_mc_list *im) } /* else, v3 */ - im->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; + im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; igmp_ifc_event(in_dev); #endif } @@ -1314,6 +1318,9 @@ static void ip_mc_hash_remove(struct in_device *in_dev, void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) { struct ip_mc_list *im; +#ifdef CONFIG_IP_MULTICAST + struct net *net = dev_net(in_dev->dev); +#endif ASSERT_RTNL(); @@ -1340,7 +1347,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) spin_lock_init(&im->lock); #ifdef CONFIG_IP_MULTICAST setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im); - im->unsolicit_count = sysctl_igmp_qrv; + im->unsolicit_count = net->ipv4.sysctl_igmp_qrv; #endif im->next_rcu = in_dev->mc_list; @@ -1533,6 +1540,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev) #ifdef CONFIG_IP_MULTICAST struct ip_mc_list *im; int type; + struct net *net = dev_net(in_dev->dev); ASSERT_RTNL(); @@ -1540,7 +1548,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev) if (im->multiaddr == IGMP_ALL_HOSTS) continue; if (ipv4_is_local_multicast(im->multiaddr) && - !sysctl_igmp_llm_reports) + !net->ipv4.sysctl_igmp_llm_reports) continue; /* a failover is happening and switches @@ -1639,6 +1647,9 @@ void ip_mc_down(struct in_device *in_dev) void ip_mc_init_dev(struct in_device *in_dev) { +#ifdef CONFIG_IP_MULTICAST + struct net *net = dev_net(in_dev->dev); +#endif ASSERT_RTNL(); #ifdef CONFIG_IP_MULTICAST @@ -1646,7 +1657,7 @@ void ip_mc_init_dev(struct in_device *in_dev) (unsigned long)in_dev); setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, (unsigned long)in_dev); - in_dev->mr_qrv = sysctl_igmp_qrv; + in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv; #endif spin_lock_init(&in_dev->mc_tomb_lock); @@ -1657,11 +1668,14 @@ void ip_mc_init_dev(struct in_device *in_dev) void ip_mc_up(struct in_device *in_dev) { struct ip_mc_list *pmc; +#ifdef CONFIG_IP_MULTICAST + struct net *net = dev_net(in_dev->dev); +#endif ASSERT_RTNL(); #ifdef CONFIG_IP_MULTICAST - in_dev->mr_qrv = sysctl_igmp_qrv; + in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv; #endif ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); @@ -1727,11 +1741,6 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) /* * Join a socket to a group */ -int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS; -int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF; -#ifdef CONFIG_IP_MULTICAST -int sysctl_igmp_qrv __read_mostly = IGMP_QUERY_ROBUSTNESS_VARIABLE; -#endif static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, __be32 *psfsrc) @@ -1756,6 +1765,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, if (!psf->sf_count[MCAST_INCLUDE] && !psf->sf_count[MCAST_EXCLUDE]) { #ifdef CONFIG_IP_MULTICAST struct in_device *in_dev = pmc->interface; + struct net *net = dev_net(in_dev->dev); #endif /* no more filters for this source */ @@ -1766,7 +1776,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, #ifdef CONFIG_IP_MULTICAST if (psf->sf_oldin && !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) { - psf->sf_crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; + psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; psf->sf_next = pmc->tomb; pmc->tomb = psf; rv = 1; @@ -1824,12 +1834,13 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, pmc->sfcount[MCAST_INCLUDE]) { #ifdef CONFIG_IP_MULTICAST struct ip_sf_list *psf; + struct net *net = dev_net(in_dev->dev); #endif /* filter mode change */ pmc->sfmode = MCAST_INCLUDE; #ifdef CONFIG_IP_MULTICAST - pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; + pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; in_dev->mr_ifc_count = pmc->crcount; for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; @@ -1996,6 +2007,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) { #ifdef CONFIG_IP_MULTICAST struct ip_sf_list *psf; + struct net *net = dev_net(pmc->interface->dev); in_dev = pmc->interface; #endif @@ -2007,7 +2019,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, #ifdef CONFIG_IP_MULTICAST /* else no filters; keep old mode for reports */ - pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; + pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; in_dev->mr_ifc_count = pmc->crcount; for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; @@ -2074,7 +2086,7 @@ int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr) count++; } err = -ENOBUFS; - if (count >= sysctl_igmp_max_memberships) + if (count >= net->ipv4.sysctl_igmp_max_memberships) goto done; iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL); if (!iml) @@ -2246,7 +2258,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct } /* else, add a new source to the filter */ - if (psl && psl->sl_count >= sysctl_igmp_max_msf) { + if (psl && psl->sl_count >= net->ipv4.sysctl_igmp_max_msf) { err = -ENOBUFS; goto done; } @@ -2919,6 +2931,12 @@ static int __net_init igmp_net_init(struct net *net) goto out_sock; } + /* Sysctl initialization */ + net->ipv4.sysctl_igmp_max_memberships = 20; + net->ipv4.sysctl_igmp_max_msf = 10; + /* IGMP reports for link-local multicast groups are enabled by default */ + net->ipv4.sysctl_igmp_llm_reports = 1; + net->ipv4.sysctl_igmp_qrv = 2; return 0; out_sock: diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 64148914803a..d7682306370b 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -24,6 +24,7 @@ #include <net/tcp_states.h> #include <net/xfrm.h> #include <net/tcp.h> +#include <net/sock_reuseport.h> #ifdef INET_CSK_DEBUG const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; @@ -67,7 +68,8 @@ int inet_csk_bind_conflict(const struct sock *sk, if ((!reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN) && (!reuseport || !sk2->sk_reuseport || - (sk2->sk_state != TCP_TIME_WAIT && + rcu_access_pointer(sk->sk_reuseport_cb) || + (sk2->sk_state != TCP_TIME_WAIT && !uid_eq(uid, sock_i_uid(sk2))))) { if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr || @@ -89,161 +91,153 @@ EXPORT_SYMBOL_GPL(inet_csk_bind_conflict); /* Obtain a reference to a local port for the given sock, * if snum is zero it means select any available local port. + * We try to allocate an odd port (and leave even ports for connect()) */ int inet_csk_get_port(struct sock *sk, unsigned short snum) { - struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; + struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; + int ret = 1, attempts = 5, port = snum; + int smallest_size = -1, smallest_port; struct inet_bind_hashbucket *head; - struct inet_bind_bucket *tb; - int ret, attempts = 5; struct net *net = sock_net(sk); - int smallest_size = -1, smallest_rover; + int i, low, high, attempt_half; + struct inet_bind_bucket *tb; kuid_t uid = sock_i_uid(sk); - int attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0; + u32 remaining, offset; - local_bh_disable(); - if (!snum) { - int remaining, rover, low, high; + if (port) { +have_port: + head = &hinfo->bhash[inet_bhashfn(net, port, + hinfo->bhash_size)]; + spin_lock_bh(&head->lock); + inet_bind_bucket_for_each(tb, &head->chain) + if (net_eq(ib_net(tb), net) && tb->port == port) + goto tb_found; + goto tb_not_found; + } again: - inet_get_local_port_range(net, &low, &high); - if (attempt_half) { - int half = low + ((high - low) >> 1); - - if (attempt_half == 1) - high = half; - else - low = half; - } - remaining = (high - low) + 1; - smallest_rover = rover = prandom_u32() % remaining + low; - - smallest_size = -1; - do { - if (inet_is_local_reserved_port(net, rover)) - goto next_nolock; - head = &hashinfo->bhash[inet_bhashfn(net, rover, - hashinfo->bhash_size)]; - spin_lock(&head->lock); - inet_bind_bucket_for_each(tb, &head->chain) - if (net_eq(ib_net(tb), net) && tb->port == rover) { - if (((tb->fastreuse > 0 && - sk->sk_reuse && - sk->sk_state != TCP_LISTEN) || - (tb->fastreuseport > 0 && - sk->sk_reuseport && - uid_eq(tb->fastuid, uid))) && - (tb->num_owners < smallest_size || smallest_size == -1)) { - smallest_size = tb->num_owners; - smallest_rover = rover; - } - if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { - snum = rover; - goto tb_found; - } - goto next; + attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0; +other_half_scan: + inet_get_local_port_range(net, &low, &high); + high++; /* [32768, 60999] -> [32768, 61000[ */ + if (high - low < 4) + attempt_half = 0; + if (attempt_half) { + int half = low + (((high - low) >> 2) << 1); + + if (attempt_half == 1) + high = half; + else + low = half; + } + remaining = high - low; + if (likely(remaining > 1)) + remaining &= ~1U; + + offset = prandom_u32() % remaining; + /* __inet_hash_connect() favors ports having @low parity + * We do the opposite to not pollute connect() users. + */ + offset |= 1U; + smallest_size = -1; + smallest_port = low; /* avoid compiler warning */ + +other_parity_scan: + port = low + offset; + for (i = 0; i < remaining; i += 2, port += 2) { + if (unlikely(port >= high)) + port -= remaining; + if (inet_is_local_reserved_port(net, port)) + continue; + head = &hinfo->bhash[inet_bhashfn(net, port, + hinfo->bhash_size)]; + spin_lock_bh(&head->lock); + inet_bind_bucket_for_each(tb, &head->chain) + if (net_eq(ib_net(tb), net) && tb->port == port) { + if (((tb->fastreuse > 0 && reuse) || + (tb->fastreuseport > 0 && + sk->sk_reuseport && + !rcu_access_pointer(sk->sk_reuseport_cb) && + uid_eq(tb->fastuid, uid))) && + (tb->num_owners < smallest_size || smallest_size == -1)) { + smallest_size = tb->num_owners; + smallest_port = port; } - break; - next: - spin_unlock(&head->lock); - next_nolock: - if (++rover > high) - rover = low; - } while (--remaining > 0); - - /* Exhausted local port range during search? It is not - * possible for us to be holding one of the bind hash - * locks if this test triggers, because if 'remaining' - * drops to zero, we broke out of the do/while loop at - * the top level, not from the 'break;' statement. - */ - ret = 1; - if (remaining <= 0) { - if (smallest_size != -1) { - snum = smallest_rover; - goto have_snum; - } - if (attempt_half == 1) { - /* OK we now try the upper half of the range */ - attempt_half = 2; - goto again; + if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) + goto tb_found; + goto next_port; } - goto fail; - } - /* OK, here is the one we will use. HEAD is - * non-NULL and we hold it's mutex. - */ - snum = rover; - } else { -have_snum: - head = &hashinfo->bhash[inet_bhashfn(net, snum, - hashinfo->bhash_size)]; - spin_lock(&head->lock); - inet_bind_bucket_for_each(tb, &head->chain) - if (net_eq(ib_net(tb), net) && tb->port == snum) - goto tb_found; + goto tb_not_found; +next_port: + spin_unlock_bh(&head->lock); + cond_resched(); + } + + if (smallest_size != -1) { + port = smallest_port; + goto have_port; } - tb = NULL; - goto tb_not_found; + offset--; + if (!(offset & 1)) + goto other_parity_scan; + + if (attempt_half == 1) { + /* OK we now try the upper half of the range */ + attempt_half = 2; + goto other_half_scan; + } + return ret; + +tb_not_found: + tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, + net, head, port); + if (!tb) + goto fail_unlock; tb_found: if (!hlist_empty(&tb->owners)) { if (sk->sk_reuse == SK_FORCE_REUSE) goto success; - if (((tb->fastreuse > 0 && - sk->sk_reuse && sk->sk_state != TCP_LISTEN) || + if (((tb->fastreuse > 0 && reuse) || (tb->fastreuseport > 0 && sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && - smallest_size == -1) { + smallest_size == -1) goto success; - } else { - ret = 1; - if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) { - if (((sk->sk_reuse && sk->sk_state != TCP_LISTEN) || - (tb->fastreuseport > 0 && - sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && - smallest_size != -1 && --attempts >= 0) { - spin_unlock(&head->lock); - goto again; - } - - goto fail_unlock; + if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) { + if ((reuse || + (tb->fastreuseport > 0 && + sk->sk_reuseport && + !rcu_access_pointer(sk->sk_reuseport_cb) && + uid_eq(tb->fastuid, uid))) && + smallest_size != -1 && --attempts >= 0) { + spin_unlock_bh(&head->lock); + goto again; } + goto fail_unlock; } - } -tb_not_found: - ret = 1; - if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, - net, head, snum)) == NULL) - goto fail_unlock; - if (hlist_empty(&tb->owners)) { - if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) - tb->fastreuse = 1; - else + if (!reuse) tb->fastreuse = 0; + if (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid)) + tb->fastreuseport = 0; + } else { + tb->fastreuse = reuse; if (sk->sk_reuseport) { tb->fastreuseport = 1; tb->fastuid = uid; - } else - tb->fastreuseport = 0; - } else { - if (tb->fastreuse && - (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) - tb->fastreuse = 0; - if (tb->fastreuseport && - (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))) + } else { tb->fastreuseport = 0; + } } success: if (!inet_csk(sk)->icsk_bind_hash) - inet_bind_hash(sk, tb, snum); + inet_bind_hash(sk, tb, port); WARN_ON(inet_csk(sk)->icsk_bind_hash != tb); ret = 0; fail_unlock: - spin_unlock(&head->lock); -fail: - local_bh_enable(); + spin_unlock_bh(&head->lock); return ret; } EXPORT_SYMBOL_GPL(inet_csk_get_port); @@ -482,10 +476,6 @@ EXPORT_SYMBOL_GPL(inet_csk_route_child_sock); #define AF_INET_FAMILY(fam) true #endif -/* Only thing we need from tcp.h */ -extern int sysctl_tcp_synack_retries; - - /* Decide when to expire the request and when to resend SYN-ACK */ static inline void syn_ack_recalc(struct request_sock *req, const int thresh, const int max_retries, @@ -557,6 +547,7 @@ static void reqsk_timer_handler(unsigned long data) { struct request_sock *req = (struct request_sock *)data; struct sock *sk_listener = req->rsk_listener; + struct net *net = sock_net(sk_listener); struct inet_connection_sock *icsk = inet_csk(sk_listener); struct request_sock_queue *queue = &icsk->icsk_accept_queue; int qlen, expire = 0, resend = 0; @@ -566,7 +557,7 @@ static void reqsk_timer_handler(unsigned long data) if (sk_state_load(sk_listener) != TCP_LISTEN) goto drop; - max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; + max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries; thresh = max_retries; /* Normally all the openreqs are young and become mature * (i.e. converted to established socket) for first timeout. @@ -737,6 +728,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog) { struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); + int err = -EADDRINUSE; reqsk_queue_alloc(&icsk->icsk_accept_queue); @@ -754,13 +746,14 @@ int inet_csk_listen_start(struct sock *sk, int backlog) inet->inet_sport = htons(inet->inet_num); sk_dst_reset(sk); - sk->sk_prot->hash(sk); + err = sk->sk_prot->hash(sk); - return 0; + if (likely(!err)) + return 0; } sk->sk_state = TCP_CLOSE; - return -EADDRINUSE; + return err; } EXPORT_SYMBOL_GPL(inet_csk_listen_start); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 6029157a19ed..50c0d96b8441 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -357,18 +357,18 @@ struct sock *inet_diag_find_one_icsk(struct net *net, struct sock *sk; if (req->sdiag_family == AF_INET) - sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0], + sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_if); #if IS_ENABLED(CONFIG_IPV6) else if (req->sdiag_family == AF_INET6) { if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) - sk = inet_lookup(net, hashinfo, req->id.idiag_dst[3], + sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[3], req->id.idiag_dport, req->id.idiag_src[3], req->id.idiag_sport, req->id.idiag_if); else - sk = inet6_lookup(net, hashinfo, + sk = inet6_lookup(net, hashinfo, NULL, 0, (struct in6_addr *)req->id.idiag_dst, req->id.idiag_dport, (struct in6_addr *)req->id.idiag_src, diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index ccc5980797fc..bc68eced0105 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -20,10 +20,12 @@ #include <linux/wait.h> #include <linux/vmalloc.h> +#include <net/addrconf.h> #include <net/inet_connection_sock.h> #include <net/inet_hashtables.h> #include <net/secure_seq.h> #include <net/ip.h> +#include <net/sock_reuseport.h> static u32 inet_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, const __be32 faddr, @@ -205,6 +207,7 @@ static inline int compute_score(struct sock *sk, struct net *net, struct sock *__inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, const __be32 saddr, __be16 sport, const __be32 daddr, const unsigned short hnum, const int dif) @@ -214,6 +217,7 @@ struct sock *__inet_lookup_listener(struct net *net, unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; int score, hiscore, matches = 0, reuseport = 0; + bool select_ok = true; u32 phash = 0; rcu_read_lock(); @@ -229,6 +233,15 @@ begin: if (reuseport) { phash = inet_ehashfn(net, daddr, hnum, saddr, sport); + if (select_ok) { + struct sock *sk2; + sk2 = reuseport_select_sock(sk, phash, + skb, doff); + if (sk2) { + result = sk2; + goto found; + } + } matches = 1; } } else if (score == hiscore && reuseport) { @@ -246,11 +259,13 @@ begin: if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE) goto begin; if (result) { +found: if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) result = NULL; else if (unlikely(compute_score(result, net, hnum, daddr, dif) < hiscore)) { sock_put(result); + select_ok = false; goto begin; } } @@ -449,32 +464,74 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk) } EXPORT_SYMBOL_GPL(inet_ehash_nolisten); -void __inet_hash(struct sock *sk, struct sock *osk) +static int inet_reuseport_add_sock(struct sock *sk, + struct inet_listen_hashbucket *ilb, + int (*saddr_same)(const struct sock *sk1, + const struct sock *sk2, + bool match_wildcard)) +{ + struct sock *sk2; + struct hlist_nulls_node *node; + kuid_t uid = sock_i_uid(sk); + + sk_nulls_for_each_rcu(sk2, node, &ilb->head) { + if (sk2 != sk && + sk2->sk_family == sk->sk_family && + ipv6_only_sock(sk2) == ipv6_only_sock(sk) && + sk2->sk_bound_dev_if == sk->sk_bound_dev_if && + sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) && + saddr_same(sk, sk2, false)) + return reuseport_add_sock(sk, sk2); + } + + /* Initial allocation may have already happened via setsockopt */ + if (!rcu_access_pointer(sk->sk_reuseport_cb)) + return reuseport_alloc(sk); + return 0; +} + +int __inet_hash(struct sock *sk, struct sock *osk, + int (*saddr_same)(const struct sock *sk1, + const struct sock *sk2, + bool match_wildcard)) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct inet_listen_hashbucket *ilb; + int err = 0; if (sk->sk_state != TCP_LISTEN) { inet_ehash_nolisten(sk, osk); - return; + return 0; } WARN_ON(!sk_unhashed(sk)); ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; spin_lock(&ilb->lock); + if (sk->sk_reuseport) { + err = inet_reuseport_add_sock(sk, ilb, saddr_same); + if (err) + goto unlock; + } __sk_nulls_add_node_rcu(sk, &ilb->head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); +unlock: spin_unlock(&ilb->lock); + + return err; } EXPORT_SYMBOL(__inet_hash); -void inet_hash(struct sock *sk) +int inet_hash(struct sock *sk) { + int err = 0; + if (sk->sk_state != TCP_CLOSE) { local_bh_disable(); - __inet_hash(sk, NULL); + err = __inet_hash(sk, NULL, ipv4_rcv_saddr_equal); local_bh_enable(); } + + return err; } EXPORT_SYMBOL_GPL(inet_hash); @@ -493,6 +550,8 @@ void inet_unhash(struct sock *sk) lock = inet_ehash_lockp(hashinfo, sk->sk_hash); spin_lock_bh(lock); + if (rcu_access_pointer(sk->sk_reuseport_cb)) + reuseport_detach_sock(sk); done = __sk_nulls_del_node_init_rcu(sk); if (done) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); @@ -506,106 +565,106 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *, __u16, struct inet_timewait_sock **)) { struct inet_hashinfo *hinfo = death_row->hashinfo; - const unsigned short snum = inet_sk(sk)->inet_num; + struct inet_timewait_sock *tw = NULL; struct inet_bind_hashbucket *head; - struct inet_bind_bucket *tb; - int ret; + int port = inet_sk(sk)->inet_num; struct net *net = sock_net(sk); + struct inet_bind_bucket *tb; + u32 remaining, offset; + int ret, i, low, high; + static u32 hint; + + if (port) { + head = &hinfo->bhash[inet_bhashfn(net, port, + hinfo->bhash_size)]; + tb = inet_csk(sk)->icsk_bind_hash; + spin_lock_bh(&head->lock); + if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { + inet_ehash_nolisten(sk, NULL); + spin_unlock_bh(&head->lock); + return 0; + } + spin_unlock(&head->lock); + /* No definite answer... Walk to established hash table */ + ret = check_established(death_row, sk, port, NULL); + local_bh_enable(); + return ret; + } - if (!snum) { - int i, remaining, low, high, port; - static u32 hint; - u32 offset = hint + port_offset; - struct inet_timewait_sock *tw = NULL; + inet_get_local_port_range(net, &low, &high); + high++; /* [32768, 60999] -> [32768, 61000[ */ + remaining = high - low; + if (likely(remaining > 1)) + remaining &= ~1U; - inet_get_local_port_range(net, &low, &high); - remaining = (high - low) + 1; + offset = (hint + port_offset) % remaining; + /* In first pass we try ports of @low parity. + * inet_csk_get_port() does the opposite choice. + */ + offset &= ~1U; +other_parity_scan: + port = low + offset; + for (i = 0; i < remaining; i += 2, port += 2) { + if (unlikely(port >= high)) + port -= remaining; + if (inet_is_local_reserved_port(net, port)) + continue; + head = &hinfo->bhash[inet_bhashfn(net, port, + hinfo->bhash_size)]; + spin_lock_bh(&head->lock); - /* By starting with offset being an even number, - * we tend to leave about 50% of ports for other uses, - * like bind(0). + /* Does not bother with rcv_saddr checks, because + * the established check is already unique enough. */ - offset &= ~1; - - local_bh_disable(); - for (i = 0; i < remaining; i++) { - port = low + (i + offset) % remaining; - if (inet_is_local_reserved_port(net, port)) - continue; - head = &hinfo->bhash[inet_bhashfn(net, port, - hinfo->bhash_size)]; - spin_lock(&head->lock); - - /* Does not bother with rcv_saddr checks, - * because the established check is already - * unique enough. - */ - inet_bind_bucket_for_each(tb, &head->chain) { - if (net_eq(ib_net(tb), net) && - tb->port == port) { - if (tb->fastreuse >= 0 || - tb->fastreuseport >= 0) - goto next_port; - WARN_ON(hlist_empty(&tb->owners)); - if (!check_established(death_row, sk, - port, &tw)) - goto ok; + inet_bind_bucket_for_each(tb, &head->chain) { + if (net_eq(ib_net(tb), net) && tb->port == port) { + if (tb->fastreuse >= 0 || + tb->fastreuseport >= 0) goto next_port; - } + WARN_ON(hlist_empty(&tb->owners)); + if (!check_established(death_row, sk, + port, &tw)) + goto ok; + goto next_port; } - - tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, - net, head, port); - if (!tb) { - spin_unlock(&head->lock); - break; - } - tb->fastreuse = -1; - tb->fastreuseport = -1; - goto ok; - - next_port: - spin_unlock(&head->lock); } - local_bh_enable(); - - return -EADDRNOTAVAIL; -ok: - hint += (i + 2) & ~1; - - /* Head lock still held and bh's disabled */ - inet_bind_hash(sk, tb, port); - if (sk_unhashed(sk)) { - inet_sk(sk)->inet_sport = htons(port); - inet_ehash_nolisten(sk, (struct sock *)tw); + tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, + net, head, port); + if (!tb) { + spin_unlock_bh(&head->lock); + return -ENOMEM; } - if (tw) - inet_twsk_bind_unhash(tw, hinfo); - spin_unlock(&head->lock); + tb->fastreuse = -1; + tb->fastreuseport = -1; + goto ok; +next_port: + spin_unlock_bh(&head->lock); + cond_resched(); + } - if (tw) - inet_twsk_deschedule_put(tw); + offset++; + if ((offset & 1) && remaining > 1) + goto other_parity_scan; - ret = 0; - goto out; - } + return -EADDRNOTAVAIL; - head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)]; - tb = inet_csk(sk)->icsk_bind_hash; - spin_lock_bh(&head->lock); - if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { - inet_ehash_nolisten(sk, NULL); - spin_unlock_bh(&head->lock); - return 0; - } else { - spin_unlock(&head->lock); - /* No definite answer... Walk to established hash table */ - ret = check_established(death_row, sk, snum, NULL); -out: - local_bh_enable(); - return ret; +ok: + hint += i + 2; + + /* Head lock still held and bh's disabled */ + inet_bind_hash(sk, tb, port); + if (sk_unhashed(sk)) { + inet_sk(sk)->inet_sport = htons(port); + inet_ehash_nolisten(sk, (struct sock *)tw); } + if (tw) + inet_twsk_bind_unhash(tw, hinfo); + spin_unlock(&head->lock); + if (tw) + inet_twsk_deschedule_put(tw); + local_bh_enable(); + return 0; } /* diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c deleted file mode 100644 index f17ea49b28fb..000000000000 --- a/net/ipv4/inet_lro.c +++ /dev/null @@ -1,374 +0,0 @@ -/* - * linux/net/ipv4/inet_lro.c - * - * Large Receive Offload (ipv4 / tcp) - * - * (C) Copyright IBM Corp. 2007 - * - * Authors: - * Jan-Bernd Themann <themann@de.ibm.com> - * Christoph Raisch <raisch@de.ibm.com> - * - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - - -#include <linux/module.h> -#include <linux/if_vlan.h> -#include <linux/inet_lro.h> -#include <net/checksum.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>"); -MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)"); - -#define TCP_HDR_LEN(tcph) (tcph->doff << 2) -#define IP_HDR_LEN(iph) (iph->ihl << 2) -#define TCP_PAYLOAD_LENGTH(iph, tcph) \ - (ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph)) - -#define IPH_LEN_WO_OPTIONS 5 -#define TCPH_LEN_WO_OPTIONS 5 -#define TCPH_LEN_W_TIMESTAMP 8 - -#define LRO_MAX_PG_HLEN 64 - -#define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; } - -/* - * Basic tcp checks whether packet is suitable for LRO - */ - -static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph, - int len, const struct net_lro_desc *lro_desc) -{ - /* check ip header: don't aggregate padded frames */ - if (ntohs(iph->tot_len) != len) - return -1; - - if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0) - return -1; - - if (iph->ihl != IPH_LEN_WO_OPTIONS) - return -1; - - if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack || - tcph->rst || tcph->syn || tcph->fin) - return -1; - - if (INET_ECN_is_ce(ipv4_get_dsfield(iph))) - return -1; - - if (tcph->doff != TCPH_LEN_WO_OPTIONS && - tcph->doff != TCPH_LEN_W_TIMESTAMP) - return -1; - - /* check tcp options (only timestamp allowed) */ - if (tcph->doff == TCPH_LEN_W_TIMESTAMP) { - __be32 *topt = (__be32 *)(tcph + 1); - - if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) - | (TCPOPT_TIMESTAMP << 8) - | TCPOLEN_TIMESTAMP)) - return -1; - - /* timestamp should be in right order */ - topt++; - if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval), - ntohl(*topt))) - return -1; - - /* timestamp reply should not be zero */ - topt++; - if (*topt == 0) - return -1; - } - - return 0; -} - -static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc) -{ - struct iphdr *iph = lro_desc->iph; - struct tcphdr *tcph = lro_desc->tcph; - __be32 *p; - __wsum tcp_hdr_csum; - - tcph->ack_seq = lro_desc->tcp_ack; - tcph->window = lro_desc->tcp_window; - - if (lro_desc->tcp_saw_tstamp) { - p = (__be32 *)(tcph + 1); - *(p+2) = lro_desc->tcp_rcv_tsecr; - } - - csum_replace2(&iph->check, iph->tot_len, htons(lro_desc->ip_tot_len)); - iph->tot_len = htons(lro_desc->ip_tot_len); - - tcph->check = 0; - tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0); - lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum); - tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, - lro_desc->ip_tot_len - - IP_HDR_LEN(iph), IPPROTO_TCP, - lro_desc->data_csum); -} - -static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len) -{ - __wsum tcp_csum; - __wsum tcp_hdr_csum; - __wsum tcp_ps_hdr_csum; - - tcp_csum = ~csum_unfold(tcph->check); - tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum); - - tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, - len + TCP_HDR_LEN(tcph), - IPPROTO_TCP, 0); - - return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum), - tcp_ps_hdr_csum); -} - -static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb, - struct iphdr *iph, struct tcphdr *tcph) -{ - int nr_frags; - __be32 *ptr; - u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph); - - nr_frags = skb_shinfo(skb)->nr_frags; - lro_desc->parent = skb; - lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]); - lro_desc->iph = iph; - lro_desc->tcph = tcph; - lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len; - lro_desc->tcp_ack = tcph->ack_seq; - lro_desc->tcp_window = tcph->window; - - lro_desc->pkt_aggr_cnt = 1; - lro_desc->ip_tot_len = ntohs(iph->tot_len); - - if (tcph->doff == 8) { - ptr = (__be32 *)(tcph+1); - lro_desc->tcp_saw_tstamp = 1; - lro_desc->tcp_rcv_tsval = *(ptr+1); - lro_desc->tcp_rcv_tsecr = *(ptr+2); - } - - lro_desc->mss = tcp_data_len; - lro_desc->active = 1; - - lro_desc->data_csum = lro_tcp_data_csum(iph, tcph, - tcp_data_len); -} - -static inline void lro_clear_desc(struct net_lro_desc *lro_desc) -{ - memset(lro_desc, 0, sizeof(struct net_lro_desc)); -} - -static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph, - struct tcphdr *tcph, int tcp_data_len) -{ - struct sk_buff *parent = lro_desc->parent; - __be32 *topt; - - lro_desc->pkt_aggr_cnt++; - lro_desc->ip_tot_len += tcp_data_len; - lro_desc->tcp_next_seq += tcp_data_len; - lro_desc->tcp_window = tcph->window; - lro_desc->tcp_ack = tcph->ack_seq; - - /* don't update tcp_rcv_tsval, would not work with PAWS */ - if (lro_desc->tcp_saw_tstamp) { - topt = (__be32 *) (tcph + 1); - lro_desc->tcp_rcv_tsecr = *(topt + 2); - } - - lro_desc->data_csum = csum_block_add(lro_desc->data_csum, - lro_tcp_data_csum(iph, tcph, - tcp_data_len), - parent->len); - - parent->len += tcp_data_len; - parent->data_len += tcp_data_len; - if (tcp_data_len > lro_desc->mss) - lro_desc->mss = tcp_data_len; -} - -static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb, - struct iphdr *iph, struct tcphdr *tcph) -{ - struct sk_buff *parent = lro_desc->parent; - int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph); - - lro_add_common(lro_desc, iph, tcph, tcp_data_len); - - skb_pull(skb, (skb->len - tcp_data_len)); - parent->truesize += skb->truesize; - - if (lro_desc->last_skb) - lro_desc->last_skb->next = skb; - else - skb_shinfo(parent)->frag_list = skb; - - lro_desc->last_skb = skb; -} - - -static int lro_check_tcp_conn(struct net_lro_desc *lro_desc, - struct iphdr *iph, - struct tcphdr *tcph) -{ - if ((lro_desc->iph->saddr != iph->saddr) || - (lro_desc->iph->daddr != iph->daddr) || - (lro_desc->tcph->source != tcph->source) || - (lro_desc->tcph->dest != tcph->dest)) - return -1; - return 0; -} - -static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr, - struct net_lro_desc *lro_arr, - struct iphdr *iph, - struct tcphdr *tcph) -{ - struct net_lro_desc *lro_desc = NULL; - struct net_lro_desc *tmp; - int max_desc = lro_mgr->max_desc; - int i; - - for (i = 0; i < max_desc; i++) { - tmp = &lro_arr[i]; - if (tmp->active) - if (!lro_check_tcp_conn(tmp, iph, tcph)) { - lro_desc = tmp; - goto out; - } - } - - for (i = 0; i < max_desc; i++) { - if (!lro_arr[i].active) { - lro_desc = &lro_arr[i]; - goto out; - } - } - - LRO_INC_STATS(lro_mgr, no_desc); -out: - return lro_desc; -} - -static void lro_flush(struct net_lro_mgr *lro_mgr, - struct net_lro_desc *lro_desc) -{ - if (lro_desc->pkt_aggr_cnt > 1) - lro_update_tcp_ip_header(lro_desc); - - skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss; - - if (lro_mgr->features & LRO_F_NAPI) - netif_receive_skb(lro_desc->parent); - else - netif_rx(lro_desc->parent); - - LRO_INC_STATS(lro_mgr, flushed); - lro_clear_desc(lro_desc); -} - -static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, - void *priv) -{ - struct net_lro_desc *lro_desc; - struct iphdr *iph; - struct tcphdr *tcph; - u64 flags; - int vlan_hdr_len = 0; - - if (!lro_mgr->get_skb_header || - lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph, - &flags, priv)) - goto out; - - if (!(flags & LRO_IPV4) || !(flags & LRO_TCP)) - goto out; - - lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph); - if (!lro_desc) - goto out; - - if ((skb->protocol == htons(ETH_P_8021Q)) && - !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID)) - vlan_hdr_len = VLAN_HLEN; - - if (!lro_desc->active) { /* start new lro session */ - if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL)) - goto out; - - skb->ip_summed = lro_mgr->ip_summed_aggr; - lro_init_desc(lro_desc, skb, iph, tcph); - LRO_INC_STATS(lro_mgr, aggregated); - return 0; - } - - if (lro_desc->tcp_next_seq != ntohl(tcph->seq)) - goto out2; - - if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc)) - goto out2; - - lro_add_packet(lro_desc, skb, iph, tcph); - LRO_INC_STATS(lro_mgr, aggregated); - - if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) || - lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu)) - lro_flush(lro_mgr, lro_desc); - - return 0; - -out2: /* send aggregated SKBs to stack */ - lro_flush(lro_mgr, lro_desc); - -out: - return 1; -} - -void lro_receive_skb(struct net_lro_mgr *lro_mgr, - struct sk_buff *skb, - void *priv) -{ - if (__lro_proc_skb(lro_mgr, skb, priv)) { - if (lro_mgr->features & LRO_F_NAPI) - netif_receive_skb(skb); - else - netif_rx(skb); - } -} -EXPORT_SYMBOL(lro_receive_skb); - -void lro_flush_all(struct net_lro_mgr *lro_mgr) -{ - int i; - struct net_lro_desc *lro_desc = lro_mgr->lro_arr; - - for (i = 0; i < lro_mgr->max_desc; i++) { - if (lro_desc[i].active) - lro_flush(lro_mgr, &lro_desc[i]); - } -} -EXPORT_SYMBOL(lro_flush_all); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 187c6fcc3027..efbd47d1a531 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -54,8 +54,6 @@ * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c * as well. Or notify me, at least. --ANK */ - -static int sysctl_ipfrag_max_dist __read_mostly = 64; static const char ip_frag_cache_name[] = "ip4-frags"; struct ipfrag_skb_cb @@ -150,7 +148,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, const void *a) qp->daddr = arg->iph->daddr; qp->vif = arg->vif; qp->user = arg->user; - qp->peer = sysctl_ipfrag_max_dist ? + qp->peer = q->net->max_dist ? inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, arg->vif, 1) : NULL; } @@ -275,7 +273,7 @@ static struct ipq *ip_find(struct net *net, struct iphdr *iph, static int ip_frag_too_far(struct ipq *qp) { struct inet_peer *peer = qp->peer; - unsigned int max = sysctl_ipfrag_max_dist; + unsigned int max = qp->q.net->max_dist; unsigned int start, end; int rc; @@ -749,6 +747,14 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "ipfrag_max_dist", + .data = &init_net.ipv4.frags.max_dist, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero + }, { } }; @@ -762,14 +768,6 @@ static struct ctl_table ip4_frags_ctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .procname = "ipfrag_max_dist", - .data = &sysctl_ipfrag_max_dist, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero - }, { } }; @@ -790,10 +788,7 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net) table[1].data = &net->ipv4.frags.low_thresh; table[1].extra2 = &net->ipv4.frags.high_thresh; table[2].data = &net->ipv4.frags.timeout; - - /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) - table[0].procname = NULL; + table[3].data = &net->ipv4.frags.max_dist; } hdr = register_net_sysctl(net, "net/ipv4", table); @@ -865,6 +860,8 @@ static int __net_init ipv4_frags_init_net(struct net *net) */ net->ipv4.frags.timeout = IP_FRAG_TIME; + net->ipv4.frags.max_dist = 64; + res = inet_frags_init_net(&net->ipv4.frags); if (res) return res; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 41ba68de46d8..202437d6087b 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -238,7 +238,7 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, return -EINVAL; } } - return iptunnel_pull_header(skb, hdr_len, tpi->proto); + return iptunnel_pull_header(skb, hdr_len, tpi->proto, false); } static void ipgre_err(struct sk_buff *skb, u32 info, @@ -440,6 +440,17 @@ drop: return 0; } +static __sum16 gre_checksum(struct sk_buff *skb) +{ + __wsum csum; + + if (skb->ip_summed == CHECKSUM_PARTIAL) + csum = lco_csum(skb); + else + csum = skb_checksum(skb, 0, skb->len, 0); + return csum_fold(csum); +} + static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags, __be16 proto, __be32 key, __be32 seq) { @@ -467,8 +478,7 @@ static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags, !(skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) { *ptr = 0; - *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, - skb->len, 0)); + *(__sum16 *)ptr = gre_checksum(skb); } } } @@ -493,8 +503,7 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, static struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool csum) { - return iptunnel_handle_offloads(skb, csum, - csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); + return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); } static struct rtable *gre_get_rt(struct sk_buff *skb, @@ -531,9 +540,16 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) goto err_free_skb; key = &tun_info->key; - rt = gre_get_rt(skb, dev, &fl, key); - if (IS_ERR(rt)) - goto err_free_skb; + rt = !skb->mark ? dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr) : + NULL; + if (!rt) { + rt = gre_get_rt(skb, dev, &fl, key); + if (IS_ERR(rt)) + goto err_free_skb; + if (!skb->mark) + dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst, + fl.saddr); + } tunnel_hlen = ip_gre_calc_hlen(key->tun_flags); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index d77eb0c3b684..e3d782746d9d 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -308,15 +308,12 @@ drop: return true; } -int sysctl_ip_early_demux __read_mostly = 1; -EXPORT_SYMBOL(sysctl_ip_early_demux); - static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; - if (sysctl_ip_early_demux && + if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && !skb->sk && !ip_is_fragment(iph)) { @@ -362,8 +359,31 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) rt = skb_rtable(skb); if (rt->rt_type == RTN_MULTICAST) { IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INMCAST, skb->len); - } else if (rt->rt_type == RTN_BROADCAST) + } else if (rt->rt_type == RTN_BROADCAST) { IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INBCAST, skb->len); + } else if (skb->pkt_type == PACKET_BROADCAST || + skb->pkt_type == PACKET_MULTICAST) { + struct in_device *in_dev = __in_dev_get_rcu(skb->dev); + + /* RFC 1122 3.3.6: + * + * When a host sends a datagram to a link-layer broadcast + * address, the IP destination address MUST be a legal IP + * broadcast or IP multicast address. + * + * A host SHOULD silently discard a datagram that is received + * via a link-layer broadcast (see Section 2.4) but does not + * specify an IP multicast or broadcast destination address. + * + * This doesn't explicitly say L2 *broadcast*, but broadcast is + * in a way a form of multicast and the most common use case for + * this is 802.11 protecting against cross-station spoofing (the + * so-called "hole-196" attack) so do it for both. + */ + if (in_dev && + IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST)) + goto drop; + } return dst_input(skb); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 64878efa045c..f734c42acdaf 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -79,9 +79,6 @@ #include <linux/netlink.h> #include <linux/tcp.h> -int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; -EXPORT_SYMBOL(sysctl_ip_default_ttl); - static int ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int mtu, diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index a50124260f5a..035ad645a8d9 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -573,6 +573,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { struct inet_sock *inet = inet_sk(sk); + struct net *net = sock_net(sk); int val = 0, err; bool needs_rtnl = setsockopt_needs_rtnl(optname); @@ -912,7 +913,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, } /* numsrc >= (1G-4) overflow in 32 bits */ if (msf->imsf_numsrc >= 0x3ffffffcU || - msf->imsf_numsrc > sysctl_igmp_max_msf) { + msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) { kfree(msf); err = -ENOBUFS; break; @@ -1067,7 +1068,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, /* numsrc >= (4G-140)/128 overflow in 32 bits */ if (gsf->gf_numsrc >= 0x1ffffff || - gsf->gf_numsrc > sysctl_igmp_max_msf) { + gsf->gf_numsrc > net->ipv4.sysctl_igmp_max_msf) { err = -ENOBUFS; goto mc_msf_out; } @@ -1342,10 +1343,13 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, val = inet->tos; break; case IP_TTL: + { + struct net *net = sock_net(sk); val = (inet->uc_ttl == -1 ? - sysctl_ip_default_ttl : + net->ipv4.sysctl_ip_default_ttl : inet->uc_ttl); break; + } case IP_HDRINCL: val = inet->hdrincl; break; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 89e8861e05fc..dff8a05739a2 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -68,61 +68,6 @@ static unsigned int ip_tunnel_hash(__be32 key, __be32 remote) IP_TNL_HASH_BITS); } -static void __tunnel_dst_set(struct ip_tunnel_dst *idst, - struct dst_entry *dst, __be32 saddr) -{ - struct dst_entry *old_dst; - - dst_clone(dst); - old_dst = xchg((__force struct dst_entry **)&idst->dst, dst); - dst_release(old_dst); - idst->saddr = saddr; -} - -static noinline void tunnel_dst_set(struct ip_tunnel *t, - struct dst_entry *dst, __be32 saddr) -{ - __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr); -} - -static void tunnel_dst_reset(struct ip_tunnel *t) -{ - tunnel_dst_set(t, NULL, 0); -} - -void ip_tunnel_dst_reset_all(struct ip_tunnel *t) -{ - int i; - - for_each_possible_cpu(i) - __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0); -} -EXPORT_SYMBOL(ip_tunnel_dst_reset_all); - -static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, - u32 cookie, __be32 *saddr) -{ - struct ip_tunnel_dst *idst; - struct dst_entry *dst; - - rcu_read_lock(); - idst = raw_cpu_ptr(t->dst_cache); - dst = rcu_dereference(idst->dst); - if (dst && !atomic_inc_not_zero(&dst->__refcnt)) - dst = NULL; - if (dst) { - if (!dst->obsolete || dst->ops->check(dst, cookie)) { - *saddr = idst->saddr; - } else { - tunnel_dst_reset(t); - dst_release(dst); - dst = NULL; - } - } - rcu_read_unlock(); - return (struct rtable *)dst; -} - static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, __be16 flags, __be32 key) { @@ -381,7 +326,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev) if (!IS_ERR(rt)) { tdev = rt->dst.dev; - tunnel_dst_set(tunnel, &rt->dst, fl4.saddr); + dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, + fl4.saddr); ip_rt_put(rt); } if (dev->type != ARPHRD_ETHER) @@ -729,7 +675,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) goto tx_error; - rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL; + rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) : + NULL; if (!rt) { rt = ip_route_output_key(tunnel->net, &fl4); @@ -739,7 +686,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } if (connected) - tunnel_dst_set(tunnel, &rt->dst, fl4.saddr); + dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, + fl4.saddr); } if (rt->dst.dev == dev) { @@ -836,7 +784,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, if (set_mtu) dev->mtu = mtu; } - ip_tunnel_dst_reset_all(t); + dst_cache_reset(&t->dst_cache); netdev_state_change(dev); } @@ -975,7 +923,7 @@ static void ip_tunnel_dev_free(struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); gro_cells_destroy(&tunnel->gro_cells); - free_percpu(tunnel->dst_cache); + dst_cache_destroy(&tunnel->dst_cache); free_percpu(dev->tstats); free_netdev(dev); } @@ -1169,15 +1117,15 @@ int ip_tunnel_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; - tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); - if (!tunnel->dst_cache) { + err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); + if (err) { free_percpu(dev->tstats); - return -ENOMEM; + return err; } err = gro_cells_init(&tunnel->gro_cells, dev); if (err) { - free_percpu(tunnel->dst_cache); + dst_cache_destroy(&tunnel->dst_cache); free_percpu(dev->tstats); return err; } @@ -1207,7 +1155,7 @@ void ip_tunnel_uninit(struct net_device *dev) if (itn->fb_tunnel_dev != dev) ip_tunnel_del(itn, netdev_priv(dev)); - ip_tunnel_dst_reset_all(tunnel); + dst_cache_reset(&tunnel->dst_cache); } EXPORT_SYMBOL_GPL(ip_tunnel_uninit); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 859d415c0b2d..eaca2449a09a 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -86,7 +86,8 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, } EXPORT_SYMBOL_GPL(iptunnel_xmit); -int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) +int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto, + bool xnet) { if (unlikely(!pskb_may_pull(skb, hdr_len))) return -ENOMEM; @@ -109,13 +110,10 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) skb->protocol = inner_proto; } - nf_reset(skb); - secpath_reset(skb); skb_clear_hash_if_not_l4(skb); - skb_dst_drop(skb); skb->vlan_tci = 0; skb_set_queue_mapping(skb, 0); - skb->pkt_type = PACKET_HOST; + skb_scrub_packet(skb, xnet); return 0; } EXPORT_SYMBOL_GPL(iptunnel_pull_header); @@ -148,7 +146,6 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, EXPORT_SYMBOL_GPL(iptunnel_metadata_reply); struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, - bool csum_help, int gso_type_mask) { int err; @@ -166,20 +163,15 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, return skb; } - /* If packet is not gso and we are resolving any partial checksum, - * clear encapsulation flag. This allows setting CHECKSUM_PARTIAL - * on the outer header without confusing devices that implement - * NETIF_F_IP_CSUM with encapsulation. - */ - if (csum_help) - skb->encapsulation = 0; - - if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) { - err = skb_checksum_help(skb); - if (unlikely(err)) - goto error; - } else if (skb->ip_summed != CHECKSUM_PARTIAL) + if (skb->ip_summed != CHECKSUM_PARTIAL) { skb->ip_summed = CHECKSUM_NONE; + /* We clear encapsulation here to prevent badly-written + * drivers potentially deciding to offload an inner checksum + * if we set CHECKSUM_PARTIAL on the outer header. + * This should go away when the drivers are all fixed. + */ + skb->encapsulation = 0; + } return skb; error: diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 4044da61e747..ec51d02166de 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -195,7 +195,7 @@ static int ipip_rcv(struct sk_buff *skb) if (tunnel) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; - if (iptunnel_pull_header(skb, 0, tpi.proto)) + if (iptunnel_pull_header(skb, 0, tpi.proto, false)) goto drop; return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error); } @@ -219,7 +219,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(skb->protocol != htons(ETH_P_IP))) goto tx_error; - skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP); + skb = iptunnel_handle_offloads(skb, SKB_GSO_IPIP); if (IS_ERR(skb)) goto out; diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 5fdc556514ba..7b8fbb352877 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -21,6 +21,7 @@ static struct iphdr * synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr) { struct iphdr *iph; + struct net *net = sock_net(skb->sk); skb_reset_network_header(skb); iph = (struct iphdr *)skb_put(skb, sizeof(*iph)); @@ -29,7 +30,7 @@ synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr) iph->tos = 0; iph->id = 0; iph->frag_off = htons(IP_DF); - iph->ttl = sysctl_ip_default_ttl; + iph->ttl = net->ipv4.sysctl_ip_default_ttl; iph->protocol = IPPROTO_TCP; iph->check = 0; iph->saddr = saddr; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index d3a27165f9cc..76dce90c4581 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -145,10 +145,12 @@ fail: } EXPORT_SYMBOL_GPL(ping_get_port); -void ping_hash(struct sock *sk) +int ping_hash(struct sock *sk) { pr_debug("ping_hash(sk->port=%u)\n", inet_sk(sk)->inet_num); BUG(); /* "Please do not press this button again." */ + + return 0; } void ping_unhash(struct sock *sk) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 3abd9d7a3adf..9f665b63a927 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -390,7 +390,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "\nIp: %d %d", IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2, - sysctl_ip_default_ttl); + net->ipv4.sysctl_ip_default_ttl); BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 7113bae4e6a0..8d22de74080c 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -93,7 +93,7 @@ static struct raw_hashinfo raw_v4_hashinfo = { .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock), }; -void raw_hash_sk(struct sock *sk) +int raw_hash_sk(struct sock *sk) { struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; struct hlist_head *head; @@ -104,6 +104,8 @@ void raw_hash_sk(struct sock *sk) sk_add_node(sk, head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock_bh(&h->lock); + + return 0; } EXPORT_SYMBOL_GPL(raw_hash_sk); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 643a86c49020..ba0dcffada3b 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -19,8 +19,6 @@ #include <net/tcp.h> #include <net/route.h> -extern int sysctl_tcp_syncookies; - static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly; #define COOKIEBITS 24 /* Upper bits store count */ @@ -307,7 +305,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) __u8 rcv_wscale; struct flowi4 fl4; - if (!sysctl_tcp_syncookies || !th->ack || th->rst) + if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) goto out; if (tcp_synq_no_recent_overflow(sk)) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 4d367b4139a3..1e1fe6086dd9 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -283,31 +283,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .procname = "ip_default_ttl", - .data = &sysctl_ip_default_ttl, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &ip_ttl_min, - .extra2 = &ip_ttl_max, - }, - { - .procname = "tcp_syn_retries", - .data = &sysctl_tcp_syn_retries, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &tcp_syn_retries_min, - .extra2 = &tcp_syn_retries_max - }, - { - .procname = "tcp_synack_retries", - .data = &sysctl_tcp_synack_retries, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "tcp_max_orphans", .data = &sysctl_tcp_max_orphans, .maxlen = sizeof(int), @@ -322,51 +297,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .procname = "ip_early_demux", - .data = &sysctl_ip_early_demux, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "ip_dynaddr", - .data = &sysctl_ip_dynaddr, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "tcp_retries1", - .data = &sysctl_tcp_retries1, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra2 = &tcp_retr1_max - }, - { - .procname = "tcp_retries2", - .data = &sysctl_tcp_retries2, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "tcp_fin_timeout", - .data = &sysctl_tcp_fin_timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, -#ifdef CONFIG_SYN_COOKIES - { - .procname = "tcp_syncookies", - .data = &sysctl_tcp_syncookies, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, -#endif - { .procname = "tcp_fastopen", .data = &sysctl_tcp_fastopen, .maxlen = sizeof(int), @@ -415,30 +345,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .procname = "igmp_max_memberships", - .data = &sysctl_igmp_max_memberships, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "igmp_max_msf", - .data = &sysctl_igmp_max_msf, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, -#ifdef CONFIG_IP_MULTICAST - { - .procname = "igmp_qrv", - .data = &sysctl_igmp_qrv, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &one - }, -#endif - { .procname = "inet_peer_threshold", .data = &inet_peer_threshold, .maxlen = sizeof(int), @@ -460,13 +366,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec_jiffies, }, { - .procname = "tcp_orphan_retries", - .data = &sysctl_tcp_orphan_retries, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "tcp_fack", .data = &sysctl_tcp_fack, .maxlen = sizeof(int), @@ -481,13 +380,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec, }, { - .procname = "tcp_reordering", - .data = &sysctl_tcp_reordering, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "tcp_max_reordering", .data = &sysctl_tcp_max_reordering, .maxlen = sizeof(int), @@ -517,13 +409,6 @@ static struct ctl_table ipv4_table[] = { .extra1 = &one, }, { - .procname = "tcp_notsent_lowat", - .data = &sysctl_tcp_notsent_lowat, - .maxlen = sizeof(sysctl_tcp_notsent_lowat), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { .procname = "tcp_rmem", .data = &sysctl_tcp_rmem, .maxlen = sizeof(sysctl_tcp_rmem), @@ -845,6 +730,29 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec }, { + .procname = "ip_dynaddr", + .data = &init_net.ipv4.sysctl_ip_dynaddr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "ip_early_demux", + .data = &init_net.ipv4.sysctl_ip_early_demux, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "ip_default_ttl", + .data = &init_net.ipv4.sysctl_ip_default_ttl, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &ip_ttl_min, + .extra2 = &ip_ttl_max, + }, + { .procname = "ip_local_port_range", .maxlen = sizeof(init_net.ipv4.ip_local_ports.range), .data = &init_net.ipv4.ip_local_ports.range, @@ -934,12 +842,36 @@ static struct ctl_table ipv4_net_table[] = { }, { .procname = "igmp_link_local_mcast_reports", - .data = &sysctl_igmp_llm_reports, + .data = &init_net.ipv4.sysctl_igmp_llm_reports, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "igmp_max_memberships", + .data = &init_net.ipv4.sysctl_igmp_max_memberships, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { + .procname = "igmp_max_msf", + .data = &init_net.ipv4.sysctl_igmp_max_msf, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, +#ifdef CONFIG_IP_MULTICAST + { + .procname = "igmp_qrv", + .data = &init_net.ipv4.sysctl_igmp_qrv, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one + }, +#endif + { .procname = "tcp_keepalive_time", .data = &init_net.ipv4.sysctl_tcp_keepalive_time, .maxlen = sizeof(int), @@ -960,6 +892,74 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "tcp_syn_retries", + .data = &init_net.ipv4.sysctl_tcp_syn_retries, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &tcp_syn_retries_min, + .extra2 = &tcp_syn_retries_max + }, + { + .procname = "tcp_synack_retries", + .data = &init_net.ipv4.sysctl_tcp_synack_retries, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, +#ifdef CONFIG_SYN_COOKIES + { + .procname = "tcp_syncookies", + .data = &init_net.ipv4.sysctl_tcp_syncookies, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, +#endif + { + .procname = "tcp_reordering", + .data = &init_net.ipv4.sysctl_tcp_reordering, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_retries1", + .data = &init_net.ipv4.sysctl_tcp_retries1, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra2 = &tcp_retr1_max + }, + { + .procname = "tcp_retries2", + .data = &init_net.ipv4.sysctl_tcp_retries2, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_orphan_retries", + .data = &init_net.ipv4.sysctl_tcp_orphan_retries, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_fin_timeout", + .data = &init_net.ipv4.sysctl_tcp_fin_timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { + .procname = "tcp_notsent_lowat", + .data = &init_net.ipv4.sysctl_tcp_notsent_lowat, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; @@ -988,6 +988,10 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) if (!net->ipv4.sysctl_local_reserved_ports) goto err_ports; + net->ipv4.sysctl_ip_default_ttl = IPDEFTTL; + net->ipv4.sysctl_ip_dynaddr = 0; + net->ipv4.sysctl_ip_early_demux = 1; + return 0; err_ports: diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 483ffdf5aa4d..f9faadb42485 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -282,8 +282,6 @@ #include <asm/unaligned.h> #include <net/busy_poll.h> -int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; - int sysctl_tcp_min_tso_segs __read_mostly = 2; int sysctl_tcp_autocorking __read_mostly = 1; @@ -406,7 +404,7 @@ void tcp_init_sock(struct sock *sk) tp->mss_cache = TCP_MSS_DEFAULT; u64_stats_init(&tp->syncp); - tp->reordering = sysctl_tcp_reordering; + tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering; tcp_enable_early_retrans(tp); tcp_assign_congestion_control(sk); @@ -1466,8 +1464,10 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) { offset = seq - TCP_SKB_CB(skb)->seq; - if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) + if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { + pr_err_once("%s: found a SYN, please report !\n", __func__); offset--; + } if (offset < skb->len || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) { *off = offset; return skb; @@ -1657,8 +1657,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, break; offset = *seq - TCP_SKB_CB(skb)->seq; - if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) + if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { + pr_err_once("%s: found a SYN, please report !\n", __func__); offset--; + } if (offset < skb->len) goto found_ok_skb; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) @@ -2326,6 +2328,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + struct net *net = sock_net(sk); int val; int err = 0; @@ -2522,7 +2525,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_LINGER2: if (val < 0) tp->linger2 = -1; - else if (val > sysctl_tcp_fin_timeout / HZ) + else if (val > net->ipv4.sysctl_tcp_fin_timeout / HZ) tp->linger2 = 0; else tp->linger2 = val * HZ; @@ -2639,6 +2642,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp; unsigned int start; + int notsent_bytes; u64 rate64; u32 rate; @@ -2719,6 +2723,11 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) } while (u64_stats_fetch_retry_irq(&tp->syncp, start)); info->tcpi_segs_out = tp->segs_out; info->tcpi_segs_in = tp->segs_in; + + notsent_bytes = READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_nxt); + info->tcpi_notsent_bytes = max(0, notsent_bytes); + + info->tcpi_min_rtt = tcp_min_rtt(tp); } EXPORT_SYMBOL_GPL(tcp_get_info); @@ -2727,6 +2736,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); int val, len; if (get_user(len, optlen)) @@ -2761,12 +2771,12 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = keepalive_probes(tp); break; case TCP_SYNCNT: - val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; + val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; break; case TCP_LINGER2: val = tp->linger2; if (val >= 0) - val = (val ? : sysctl_tcp_fin_timeout) / HZ; + val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ; break; case TCP_DEFER_ACCEPT: val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept, diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 55be6ac70cff..fdb286ddba04 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -124,6 +124,41 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req, return false; } + +/* If an incoming SYN or SYNACK frame contains a payload and/or FIN, + * queue this additional data / FIN. + */ +void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (TCP_SKB_CB(skb)->end_seq == tp->rcv_nxt) + return; + + skb = skb_clone(skb, GFP_ATOMIC); + if (!skb) + return; + + skb_dst_drop(skb); + __skb_pull(skb, tcp_hdrlen(skb)); + skb_set_owner_r(skb, sk); + + TCP_SKB_CB(skb)->seq++; + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN; + + tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + __skb_queue_tail(&sk->sk_receive_queue, skb); + tp->syn_data_acked = 1; + + /* u64_stats_update_begin(&tp->syncp) not needed here, + * as we certainly are not changing upper 32bit value (0) + */ + tp->bytes_received = skb->len; + + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) + tcp_fin(sk); +} + static struct sock *tcp_fastopen_create_child(struct sock *sk, struct sk_buff *skb, struct dst_entry *dst, @@ -132,7 +167,6 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, struct tcp_sock *tp; struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; struct sock *child; - u32 end_seq; bool own_req; req->num_retrans = 0; @@ -178,35 +212,11 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, tcp_init_metrics(child); tcp_init_buffer_space(child); - /* Queue the data carried in the SYN packet. - * We used to play tricky games with skb_get(). - * With lockless listener, it is a dead end. - * Do not think about it. - * - * XXX (TFO) - we honor a zero-payload TFO request for now, - * (any reason not to?) but no need to queue the skb since - * there is no data. How about SYN+FIN? - */ - end_seq = TCP_SKB_CB(skb)->end_seq; - if (end_seq != TCP_SKB_CB(skb)->seq + 1) { - struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); - - if (likely(skb2)) { - skb_dst_drop(skb2); - __skb_pull(skb2, tcp_hdrlen(skb)); - skb_set_owner_r(skb2, child); - __skb_queue_tail(&child->sk_receive_queue, skb2); - tp->syn_data_acked = 1; - - /* u64_stats_update_begin(&tp->syncp) not needed here, - * as we certainly are not changing upper 32bit value (0) - */ - tp->bytes_received = end_seq - TCP_SKB_CB(skb)->seq - 1; - } else { - end_seq = TCP_SKB_CB(skb)->seq + 1; - } - } - tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = end_seq; + tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; + + tcp_fastopen_add_skb(child, skb); + + tcp_rsk(req)->rcv_nxt = tp->rcv_nxt; /* tcp_conn_request() is sending the SYNACK, * and queues the child into listener accept queue. */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3b2c8e90a475..e6e65f79ade8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -80,9 +80,7 @@ int sysctl_tcp_timestamps __read_mostly = 1; int sysctl_tcp_window_scaling __read_mostly = 1; int sysctl_tcp_sack __read_mostly = 1; int sysctl_tcp_fack __read_mostly = 1; -int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; int sysctl_tcp_max_reordering __read_mostly = 300; -EXPORT_SYMBOL(sysctl_tcp_reordering); int sysctl_tcp_dsack __read_mostly = 1; int sysctl_tcp_app_win __read_mostly = 31; int sysctl_tcp_adv_win_scale __read_mostly = 1; @@ -126,6 +124,10 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) +#define REXMIT_NONE 0 /* no loss recovery to do */ +#define REXMIT_LOST 1 /* retransmit packets marked lost */ +#define REXMIT_NEW 2 /* FRTO-style transmit of unsent/new packets */ + /* Adapt the MSS value used to make delayed ack decision to the * real world. */ @@ -1210,6 +1212,7 @@ static u8 tcp_sacktag_one(struct sock *sk, sacked |= TCPCB_SACKED_ACKED; state->flag |= FLAG_DATA_SACKED; tp->sacked_out += pcount; + tp->delivered += pcount; /* Out-of-order packets delivered */ fack_count += pcount; @@ -1821,8 +1824,12 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend) static void tcp_add_reno_sack(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + u32 prior_sacked = tp->sacked_out; + tp->sacked_out++; tcp_check_reno_reordering(sk, 0); + if (tp->sacked_out > prior_sacked) + tp->delivered++; /* Some out-of-order packet is delivered */ tcp_verify_left_out(tp); } @@ -1834,6 +1841,7 @@ static void tcp_remove_reno_sacks(struct sock *sk, int acked) if (acked > 0) { /* One ACK acked hole. The rest eat duplicate ACKs. */ + tp->delivered += max_t(int, acked - tp->sacked_out, 1); if (acked - 1 >= tp->sacked_out) tp->sacked_out = 0; else @@ -1873,6 +1881,7 @@ void tcp_enter_loss(struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); struct sk_buff *skb; bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery; bool is_reneg; /* is receiver reneging on SACKs? */ @@ -1923,9 +1932,9 @@ void tcp_enter_loss(struct sock *sk) * suggests that the degree of reordering is over-estimated. */ if (icsk->icsk_ca_state <= TCP_CA_Disorder && - tp->sacked_out >= sysctl_tcp_reordering) + tp->sacked_out >= net->ipv4.sysctl_tcp_reordering) tp->reordering = min_t(unsigned int, tp->reordering, - sysctl_tcp_reordering); + net->ipv4.sysctl_tcp_reordering); tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; tcp_ecn_queue_cwr(tp); @@ -2109,6 +2118,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) { struct tcp_sock *tp = tcp_sk(sk); __u32 packets_out; + int tcp_reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering; /* Trick#1: The loss is proven. */ if (tp->lost_out) @@ -2123,7 +2133,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) */ packets_out = tp->packets_out; if (packets_out <= tp->reordering && - tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) && + tp->sacked_out >= max_t(__u32, packets_out/2, tcp_reordering) && !tcp_may_send_now(sk)) { /* We have nothing to send. This connection is limited * either by receiver window or by application. @@ -2467,14 +2477,12 @@ static void tcp_init_cwnd_reduction(struct sock *sk) tcp_ecn_queue_cwr(tp); } -static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked, - int fast_rexmit, int flag) +static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, + int flag) { struct tcp_sock *tp = tcp_sk(sk); int sndcnt = 0; int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); - int newly_acked_sacked = prior_unsacked - - (tp->packets_out - tp->sacked_out); if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd)) return; @@ -2492,7 +2500,8 @@ static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked, } else { sndcnt = min(delta, newly_acked_sacked); } - sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0)); + /* Force a fast retransmit upon entering fast recovery */ + sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1)); tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; } @@ -2537,7 +2546,7 @@ static void tcp_try_keep_open(struct sock *sk) } } -static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked) +static void tcp_try_to_open(struct sock *sk, int flag) { struct tcp_sock *tp = tcp_sk(sk); @@ -2551,8 +2560,6 @@ static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked) if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { tcp_try_keep_open(sk); - } else { - tcp_cwnd_reduction(sk, prior_unsacked, 0, flag); } } @@ -2662,7 +2669,8 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) /* Process an ACK in CA_Loss state. Move to CA_Open if lost data are * recovered or spurious. Otherwise retransmits more on partial ACKs. */ -static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) +static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack, + int *rexmit) { struct tcp_sock *tp = tcp_sk(sk); bool recovered = !before(tp->snd_una, tp->high_seq); @@ -2684,10 +2692,15 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) tp->frto = 0; /* Step 3.a. loss was real */ } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) { tp->high_seq = tp->snd_nxt; - __tcp_push_pending_frames(sk, tcp_current_mss(sk), - TCP_NAGLE_OFF); - if (after(tp->snd_nxt, tp->high_seq)) - return; /* Step 2.b */ + /* Step 2.b. Try send new data (but deferred until cwnd + * is updated in tcp_ack()). Otherwise fall back to + * the conventional recovery. + */ + if (tcp_send_head(sk) && + after(tcp_wnd_end(tp), tp->snd_nxt)) { + *rexmit = REXMIT_NEW; + return; + } tp->frto = 0; } } @@ -2706,12 +2719,11 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) else if (flag & FLAG_SND_UNA_ADVANCED) tcp_reset_reno_sack(tp); } - tcp_xmit_retransmit_queue(sk); + *rexmit = REXMIT_LOST; } /* Undo during fast recovery after partial ACK. */ -static bool tcp_try_undo_partial(struct sock *sk, const int acked, - const int prior_unsacked, int flag) +static bool tcp_try_undo_partial(struct sock *sk, const int acked) { struct tcp_sock *tp = tcp_sk(sk); @@ -2726,10 +2738,8 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked, * can undo. Otherwise we clock out new packets but do not * mark more packets lost or retransmit more. */ - if (tp->retrans_out) { - tcp_cwnd_reduction(sk, prior_unsacked, 0, flag); + if (tp->retrans_out) return true; - } if (!tcp_any_retrans_done(sk)) tp->retrans_stamp = 0; @@ -2748,21 +2758,21 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked, * taking into account both packets sitting in receiver's buffer and * packets lost by network. * - * Besides that it does CWND reduction, when packet loss is detected - * and changes state of machine. + * Besides that it updates the congestion state when packet loss or ECN + * is detected. But it does not reduce the cwnd, it is done by the + * congestion control later. * * It does _not_ decide what to send, it is made in function * tcp_xmit_retransmit_queue(). */ static void tcp_fastretrans_alert(struct sock *sk, const int acked, - const int prior_unsacked, - bool is_dupack, int flag) + bool is_dupack, int *ack_flag, int *rexmit) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); + int fast_rexmit = 0, flag = *ack_flag; bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && (tcp_fackets_out(tp) > tp->reordering)); - int fast_rexmit = 0; if (WARN_ON(!tp->packets_out && tp->sacked_out)) tp->sacked_out = 0; @@ -2809,8 +2819,10 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, /* Use RACK to detect loss */ if (sysctl_tcp_recovery & TCP_RACK_LOST_RETRANS && - tcp_rack_mark_lost(sk)) + tcp_rack_mark_lost(sk)) { flag |= FLAG_LOST_RETRANS; + *ack_flag |= FLAG_LOST_RETRANS; + } /* E. Process state. */ switch (icsk->icsk_ca_state) { @@ -2819,7 +2831,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, if (tcp_is_reno(tp) && is_dupack) tcp_add_reno_sack(sk); } else { - if (tcp_try_undo_partial(sk, acked, prior_unsacked, flag)) + if (tcp_try_undo_partial(sk, acked)) return; /* Partial ACK arrived. Force fast retransmit. */ do_lost = tcp_is_reno(tp) || @@ -2831,7 +2843,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, } break; case TCP_CA_Loss: - tcp_process_loss(sk, flag, is_dupack); + tcp_process_loss(sk, flag, is_dupack, rexmit); if (icsk->icsk_ca_state != TCP_CA_Open && !(flag & FLAG_LOST_RETRANS)) return; @@ -2848,7 +2860,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, tcp_try_undo_dsack(sk); if (!tcp_time_to_recover(sk, flag)) { - tcp_try_to_open(sk, flag, prior_unsacked); + tcp_try_to_open(sk, flag); return; } @@ -2870,8 +2882,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, if (do_lost) tcp_update_scoreboard(sk, fast_rexmit); - tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit, flag); - tcp_xmit_retransmit_queue(sk); + *rexmit = REXMIT_LOST; } /* Kathleen Nichols' algorithm for tracking the minimum value of @@ -3096,7 +3107,7 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, * arrived at the other end. */ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, - u32 prior_snd_una, + u32 prior_snd_una, int *acked, struct tcp_sacktag_state *sack) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -3154,10 +3165,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, flag |= FLAG_ORIG_SACK_ACKED; } - if (sacked & TCPCB_SACKED_ACKED) + if (sacked & TCPCB_SACKED_ACKED) { tp->sacked_out -= acked_pcount; - else if (tcp_is_sack(tp) && !tcp_skb_spurious_retrans(tp, skb)) - tcp_rack_advance(tp, &skb->skb_mstamp, sacked); + } else if (tcp_is_sack(tp)) { + tp->delivered += acked_pcount; + if (!tcp_skb_spurious_retrans(tp, skb)) + tcp_rack_advance(tp, &skb->skb_mstamp, sacked); + } if (sacked & TCPCB_LOST) tp->lost_out -= acked_pcount; @@ -3266,6 +3280,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, } } #endif + *acked = pkts_acked; return flag; } @@ -3299,21 +3314,36 @@ static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag) /* Decide wheather to run the increase function of congestion control. */ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) { - if (tcp_in_cwnd_reduction(sk)) - return false; - /* If reordering is high then always grow cwnd whenever data is * delivered regardless of its ordering. Otherwise stay conservative * and only grow cwnd on in-order delivery (RFC5681). A stretched ACK w/ * new SACK or ECE mark may first advance cwnd here and later reduce * cwnd in tcp_fastretrans_alert() based on more states. */ - if (tcp_sk(sk)->reordering > sysctl_tcp_reordering) + if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering) return flag & FLAG_FORWARD_PROGRESS; return flag & FLAG_DATA_ACKED; } +/* The "ultimate" congestion control function that aims to replace the rigid + * cwnd increase and decrease control (tcp_cong_avoid,tcp_*cwnd_reduction). + * It's called toward the end of processing an ACK with precise rate + * information. All transmission or retransmission are delayed afterwards. + */ +static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked, + int flag) +{ + if (tcp_in_cwnd_reduction(sk)) { + /* Reduce cwnd if state mandates */ + tcp_cwnd_reduction(sk, acked_sacked, flag); + } else if (tcp_may_raise_cwnd(sk, flag)) { + /* Advance cwnd if state allows */ + tcp_cong_avoid(sk, ack, acked_sacked); + } + tcp_update_pacing_rate(sk); +} + /* Check that window update is acceptable. * The function assumes that snd_una<=ack<=snd_next. */ @@ -3509,6 +3539,27 @@ static inline void tcp_in_ack_event(struct sock *sk, u32 flags) icsk->icsk_ca_ops->in_ack_event(sk, flags); } +/* Congestion control has updated the cwnd already. So if we're in + * loss recovery then now we do any new sends (for FRTO) or + * retransmits (for CA_Loss or CA_recovery) that make sense. + */ +static void tcp_xmit_recovery(struct sock *sk, int rexmit) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (rexmit == REXMIT_NONE) + return; + + if (unlikely(rexmit == 2)) { + __tcp_push_pending_frames(sk, tcp_current_mss(sk), + TCP_NAGLE_OFF); + if (after(tp->snd_nxt, tp->high_seq)) + return; + tp->frto = 0; + } + tcp_xmit_retransmit_queue(sk); +} + /* This routine deals with incoming acks, but not outgoing ones. */ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) { @@ -3521,8 +3572,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) bool is_dupack = false; u32 prior_fackets; int prior_packets = tp->packets_out; - const int prior_unsacked = tp->packets_out - tp->sacked_out; + u32 prior_delivered = tp->delivered; int acked = 0; /* Number of packets newly acked */ + int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */ sack_state.first_sackt.v64 = 0; @@ -3611,23 +3663,16 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) goto no_queue; /* See if we can take anything off of the retransmit queue. */ - acked = tp->packets_out; - flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, + flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked, &sack_state); - acked -= tp->packets_out; if (tcp_ack_is_dubious(sk, flag)) { is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); - tcp_fastretrans_alert(sk, acked, prior_unsacked, - is_dupack, flag); + tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); } if (tp->tlp_high_seq) tcp_process_tlp_ack(sk, ack, flag); - /* Advance cwnd if state allows */ - if (tcp_may_raise_cwnd(sk, flag)) - tcp_cong_avoid(sk, ack, acked); - if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) { struct dst_entry *dst = __sk_dst_get(sk); if (dst) @@ -3636,14 +3681,14 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (icsk->icsk_pending == ICSK_TIME_RETRANS) tcp_schedule_loss_probe(sk); - tcp_update_pacing_rate(sk); + tcp_cong_control(sk, ack, tp->delivered - prior_delivered, flag); + tcp_xmit_recovery(sk, rexmit); return 1; no_queue: /* If data was DSACKed, see if we can undo a cwnd reduction. */ if (flag & FLAG_DSACKING_ACK) - tcp_fastretrans_alert(sk, acked, prior_unsacked, - is_dupack, flag); + tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. @@ -3666,8 +3711,8 @@ old_ack: if (TCP_SKB_CB(skb)->sacked) { flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, &sack_state); - tcp_fastretrans_alert(sk, acked, prior_unsacked, - is_dupack, flag); + tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); + tcp_xmit_recovery(sk, rexmit); } SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt); @@ -3998,7 +4043,7 @@ void tcp_reset(struct sock *sk) * * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT. */ -static void tcp_fin(struct sock *sk) +void tcp_fin(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -5512,6 +5557,9 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, tp->syn_data_acked = tp->syn_data; if (tp->syn_data_acked) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE); + + tcp_fastopen_add_skb(sk, synack); + return false; } @@ -6118,9 +6166,10 @@ static bool tcp_syn_flood_action(const struct sock *sk, struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; const char *msg = "Dropping request"; bool want_cookie = false; + struct net *net = sock_net(sk); #ifdef CONFIG_SYN_COOKIES - if (sysctl_tcp_syncookies) { + if (net->ipv4.sysctl_tcp_syncookies) { msg = "Sending cookies"; want_cookie = true; NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); @@ -6129,7 +6178,7 @@ static bool tcp_syn_flood_action(const struct sock *sk, NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); if (!queue->synflood_warned && - sysctl_tcp_syncookies != 2 && + net->ipv4.sysctl_tcp_syncookies != 2 && xchg(&queue->synflood_warned, 1) == 0) pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", proto, ntohs(tcp_hdr(skb)->dest), msg); @@ -6162,6 +6211,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn; struct tcp_options_received tmp_opt; struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); struct sock *fastopen_sk = NULL; struct dst_entry *dst = NULL; struct request_sock *req; @@ -6172,7 +6222,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, * limitations, they conserve resources and peer is * evidently real one. */ - if ((sysctl_tcp_syncookies == 2 || + if ((net->ipv4.sysctl_tcp_syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) { want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name); if (!want_cookie) @@ -6238,7 +6288,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, } } /* Kill the following clause, if you dislike this way. */ - else if (!sysctl_tcp_syncookies && + else if (!net->ipv4.sysctl_tcp_syncookies && (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < (sysctl_max_syn_backlog >> 2)) && !tcp_peer_is_proven(req, dst, false, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 487ac67059e2..4c8d58dfac9b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -642,8 +642,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) * Incoming packet is checked with md5 hash with finding key, * no RST generated if md5 hash doesn't match. */ - sk1 = __inet_lookup_listener(net, - &tcp_hashinfo, ip_hdr(skb)->saddr, + sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0, + ip_hdr(skb)->saddr, th->source, ip_hdr(skb)->daddr, ntohs(th->source), inet_iif(skb)); /* don't send rst if it can't find key */ @@ -865,7 +865,6 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) kfree(inet_rsk(req)->opt); } - #ifdef CONFIG_TCP_MD5SIG /* * RFC2385 MD5 checksumming requires a mapping of @@ -1587,7 +1586,8 @@ int tcp_v4_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->sacked = 0; lookup: - sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); + sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source, + th->dest); if (!sk) goto no_tcp_socket; @@ -1703,7 +1703,8 @@ do_time_wait: switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { case TCP_TW_SYN: { struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev), - &tcp_hashinfo, + &tcp_hashinfo, skb, + __tcp_hdrlen(th), iph->saddr, th->source, iph->daddr, th->dest, inet_iif(skb)); @@ -2395,6 +2396,16 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL; + net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES; + net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; + net->ipv4.sysctl_tcp_syncookies = 1; + net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH; + net->ipv4.sysctl_tcp_retries1 = TCP_RETR1; + net->ipv4.sysctl_tcp_retries2 = TCP_RETR2; + net->ipv4.sysctl_tcp_orphan_retries = 0; + net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; + net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX; + return 0; fail: tcp_sk_exit(net); diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index c8cbc2b4b792..c26241f3057b 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -369,6 +369,7 @@ void tcp_update_metrics(struct sock *sk) const struct inet_connection_sock *icsk = inet_csk(sk); struct dst_entry *dst = __sk_dst_get(sk); struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); struct tcp_metrics_block *tm; unsigned long rtt; u32 val; @@ -473,7 +474,7 @@ void tcp_update_metrics(struct sock *sk) if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) { val = tcp_metric_get(tm, TCP_METRIC_REORDERING); if (val < tp->reordering && - tp->reordering != sysctl_tcp_reordering) + tp->reordering != net->ipv4.sysctl_tcp_reordering) tcp_metric_set(tm, TCP_METRIC_REORDERING, tp->reordering); } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 75632a925824..fadd8b978951 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -27,9 +27,6 @@ #include <net/inet_common.h> #include <net/xfrm.h> -int sysctl_tcp_syncookies __read_mostly = 1; -EXPORT_SYMBOL(sysctl_tcp_syncookies); - int sysctl_tcp_abort_on_overflow __read_mostly; struct inet_timewait_death_row tcp_death_row = { diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 9864a2dbadce..773083b7f1e9 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -135,7 +135,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, th->fin = th->psh = 0; th->check = newcheck; - if (skb->ip_summed != CHECKSUM_PARTIAL) + if (skb->ip_summed == CHECKSUM_PARTIAL) + gso_reset_checksum(skb, ~th->check); + else th->check = gso_make_checksum(skb, ~th->check); seq += mss; @@ -169,7 +171,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, skb->data_len); th->check = ~csum_fold((__force __wsum)((__force u32)th->check + (__force u32)delta)); - if (skb->ip_summed != CHECKSUM_PARTIAL) + if (skb->ip_summed == CHECKSUM_PARTIAL) + gso_reset_checksum(skb, ~th->check); + else th->check = gso_make_checksum(skb, ~th->check); out: return segs; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index fda379cd600d..7d2c7a400456 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -62,9 +62,6 @@ int sysctl_tcp_tso_win_divisor __read_mostly = 3; /* By default, RFC2861 behavior. */ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; -unsigned int sysctl_tcp_notsent_lowat __read_mostly = UINT_MAX; -EXPORT_SYMBOL(sysctl_tcp_notsent_lowat); - static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, int push_one, gfp_t gfp); @@ -3476,6 +3473,7 @@ void tcp_send_probe0(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); unsigned long probe_max; int err; @@ -3489,7 +3487,7 @@ void tcp_send_probe0(struct sock *sk) } if (err <= 0) { - if (icsk->icsk_backoff < sysctl_tcp_retries2) + if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2) icsk->icsk_backoff++; icsk->icsk_probes_out++; probe_max = TCP_RTO_MAX; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index a4730a28b220..49bc474f8e35 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -22,11 +22,6 @@ #include <linux/gfp.h> #include <net/tcp.h> -int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; -int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES; -int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; -int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; -int sysctl_tcp_orphan_retries __read_mostly; int sysctl_tcp_thin_linear_timeouts __read_mostly; static void tcp_write_err(struct sock *sk) @@ -82,7 +77,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) /* Calculate maximal number or retries on an orphaned socket. */ static int tcp_orphan_retries(struct sock *sk, bool alive) { - int retries = sysctl_tcp_orphan_retries; /* May be zero. */ + int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */ /* We know from an ICMP that something is wrong. */ if (sk->sk_err_soft && !alive) @@ -157,6 +152,7 @@ static int tcp_write_timeout(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); int retry_until; bool do_reset, syn_set = false; @@ -169,10 +165,10 @@ static int tcp_write_timeout(struct sock *sk) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); } - retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; + retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; syn_set = true; } else { - if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) { + if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0, 0)) { /* Some middle-boxes may black-hole Fast Open _after_ * the handshake. Therefore we conservatively disable * Fast Open on this path on recurring timeouts with @@ -181,7 +177,7 @@ static int tcp_write_timeout(struct sock *sk) if (tp->syn_data_acked && tp->bytes_acked <= tp->rx_opt.mss_clamp) { tcp_fastopen_cache_set(sk, 0, NULL, true, 0); - if (icsk->icsk_retransmits == sysctl_tcp_retries1) + if (icsk->icsk_retransmits == net->ipv4.sysctl_tcp_retries1) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); } @@ -191,7 +187,7 @@ static int tcp_write_timeout(struct sock *sk) dst_negative_advice(sk); } - retry_until = sysctl_tcp_retries2; + retry_until = net->ipv4.sysctl_tcp_retries2; if (sock_flag(sk, SOCK_DEAD)) { const bool alive = icsk->icsk_rto < TCP_RTO_MAX; @@ -305,7 +301,7 @@ static void tcp_probe_timer(struct sock *sk) (s32)(tcp_time_stamp - start_ts) > icsk->icsk_user_timeout) goto abort; - max_probes = sysctl_tcp_retries2; + max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2; if (sock_flag(sk, SOCK_DEAD)) { const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX; @@ -332,7 +328,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); int max_retries = icsk->icsk_syn_retries ? : - sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */ + sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */ struct request_sock *req; req = tcp_sk(sk)->fastopen_rsk; @@ -360,6 +356,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk) void tcp_retransmit_timer(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); struct inet_connection_sock *icsk = inet_csk(sk); if (tp->fastopen_rsk) { @@ -490,7 +487,7 @@ out_reset_timer: icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); - if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0, 0)) + if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0, 0)) __sk_dst_reset(sk); out:; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 95d2f198017e..836abe58a9c5 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -356,8 +356,8 @@ EXPORT_SYMBOL(udp_lib_get_port); * match_wildcard == false: addresses must be exactly the same, i.e. * 0.0.0.0 only equals to 0.0.0.0 */ -static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2, - bool match_wildcard) +int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2, + bool match_wildcard) { struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); @@ -848,32 +848,20 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb, { struct udphdr *uh = udp_hdr(skb); - if (nocheck) + if (nocheck) { uh->check = 0; - else if (skb_is_gso(skb)) + } else if (skb_is_gso(skb)) { uh->check = ~udp_v4_check(len, saddr, daddr, 0); - else if (skb_dst(skb) && skb_dst(skb)->dev && - (skb_dst(skb)->dev->features & - (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) { - - BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); - + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { + uh->check = 0; + uh->check = udp_v4_check(len, saddr, daddr, lco_csum(skb)); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + } else { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct udphdr, check); uh->check = ~udp_v4_check(len, saddr, daddr, 0); - } else { - __wsum csum; - - BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); - - uh->check = 0; - csum = skb_checksum(skb, 0, len, 0); - uh->check = udp_v4_check(len, saddr, daddr, csum); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - - skb->ip_summed = CHECKSUM_UNNECESSARY; } } EXPORT_SYMBOL(udp_set_csum); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 4c519c1dc161..56c4c8b88b28 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -32,42 +32,56 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features), __be16 new_protocol, bool is_ipv6) { + int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); struct sk_buff *segs = ERR_PTR(-EINVAL); + bool remcsum, need_csum, offload_csum; + struct udphdr *uh = udp_hdr(skb); u16 mac_offset = skb->mac_header; - int mac_len = skb->mac_len; - int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); __be16 protocol = skb->protocol; - netdev_features_t enc_features; + u16 mac_len = skb->mac_len; int udp_offset, outer_hlen; - unsigned int oldlen; - bool need_csum = !!(skb_shinfo(skb)->gso_type & - SKB_GSO_UDP_TUNNEL_CSUM); - bool remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM); - bool offload_csum = false, dont_encap = (need_csum || remcsum); - - oldlen = (u16)~skb->len; + u32 partial; if (unlikely(!pskb_may_pull(skb, tnl_hlen))) goto out; + /* adjust partial header checksum to negate old length */ + partial = (__force u32)uh->check + (__force u16)~uh->len; + + /* setup inner skb. */ skb->encapsulation = 0; __skb_pull(skb, tnl_hlen); skb_reset_mac_header(skb); skb_set_network_header(skb, skb_inner_network_offset(skb)); skb->mac_len = skb_inner_network_offset(skb); skb->protocol = new_protocol; + + need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM); skb->encap_hdr_csum = need_csum; + + remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM); skb->remcsum_offload = remcsum; /* Try to offload checksum if possible */ offload_csum = !!(need_csum && - ((skb->dev->features & NETIF_F_HW_CSUM) || - (skb->dev->features & (is_ipv6 ? - NETIF_F_IPV6_CSUM : NETIF_F_IP_CSUM)))); + (skb->dev->features & + (is_ipv6 ? (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM) : + (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM)))); + + features &= skb->dev->hw_enc_features; + + /* The only checksum offload we care about from here on out is the + * outer one so strip the existing checksum feature flags and + * instead set the flag based on our outer checksum offload value. + */ + if (remcsum) { + features &= ~NETIF_F_CSUM_MASK; + if (offload_csum) + features |= NETIF_F_HW_CSUM; + } /* segment inner packet. */ - enc_features = skb->dev->hw_enc_features & features; - segs = gso_inner_segment(skb, enc_features); + segs = gso_inner_segment(skb, features); if (IS_ERR_OR_NULL(segs)) { skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, mac_len); @@ -78,17 +92,13 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, udp_offset = outer_hlen - tnl_hlen; skb = segs; do { - struct udphdr *uh; - int len; - __be32 delta; + __be16 len; - if (dont_encap) { - skb->encapsulation = 0; + if (remcsum) skb->ip_summed = CHECKSUM_NONE; - } else { - /* Only set up inner headers if we might be offloading - * inner checksum. - */ + + /* Set up inner headers if we are offloading inner checksum */ + if (skb->ip_summed == CHECKSUM_PARTIAL) { skb_reset_inner_headers(skb); skb->encapsulation = 1; } @@ -96,43 +106,28 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, skb->mac_len = mac_len; skb->protocol = protocol; - skb_push(skb, outer_hlen); + __skb_push(skb, outer_hlen); skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); skb_set_transport_header(skb, udp_offset); - len = skb->len - udp_offset; + len = htons(skb->len - udp_offset); uh = udp_hdr(skb); - uh->len = htons(len); + uh->len = len; if (!need_csum) continue; - delta = htonl(oldlen + len); - uh->check = ~csum_fold((__force __wsum) - ((__force u32)uh->check + - (__force u32)delta)); - if (offload_csum) { - skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - } else if (remcsum) { - /* Need to calculate checksum from scratch, - * inner checksums are never when doing - * remote_checksum_offload. - */ - - skb->csum = skb_checksum(skb, udp_offset, - skb->len - udp_offset, - 0); - uh->check = csum_fold(skb->csum); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - } else { - uh->check = gso_make_checksum(skb, ~uh->check); + ((__force u32)len + partial)); + if (skb->encapsulation || !offload_csum) { + uh->check = gso_make_checksum(skb, ~uh->check); if (uh->check == 0) uh->check = CSUM_MANGLED_0; + } else { + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); } } while ((skb = skb->next)); out: diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 40c897515ddc..11e875ffd7ac 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -207,6 +207,7 @@ config IPV6_NDISC_NODETYPE config IPV6_TUNNEL tristate "IPv6: IP-in-IPv6 tunnel (RFC2473)" select INET6_TUNNEL + select DST_CACHE ---help--- Support for IPv6-in-IPv6 and IPv4-in-IPv6 tunnels described in RFC 2473. diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index bdd7eac4307a..4751f8922362 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4714,6 +4714,8 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] = cnf->ignore_routes_with_linkdown; /* we omit DEVCONF_STABLE_SECRET for now */ array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only; + array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast; + array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na; } static inline size_t inet6_ifla6_size(void) @@ -5788,6 +5790,20 @@ static struct addrconf_sysctl_table .proc_handler = addrconf_sysctl_ignore_routes_with_linkdown, }, { + .procname = "drop_unicast_in_l2_multicast", + .data = &ipv6_devconf.drop_unicast_in_l2_multicast, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "drop_unsolicited_na", + .data = &ipv6_devconf.drop_unsolicited_na, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { /* sentinel */ } }, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 9f5137cd604e..b11c37cfd67c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -235,7 +235,11 @@ lookup_protocol: * creation time automatically shares. */ inet->inet_sport = htons(inet->inet_num); - sk->sk_prot->hash(sk); + err = sk->sk_prot->hash(sk); + if (err) { + sk_common_release(sk); + goto out; + } } if (sk->sk_prot->init) { err = sk->sk_prot->init(sk); diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c index 32dc9aab7297..30613050e4ca 100644 --- a/net/ipv6/ila/ila_common.c +++ b/net/ipv6/ila/ila_common.c @@ -99,5 +99,6 @@ static void __exit ila_fini(void) module_init(ila_init); module_exit(ila_fini); +MODULE_ALIAS_RTNL_LWT(ILA); MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>"); MODULE_LICENSE("GPL"); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 36c3f0155010..532c3ef282c5 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -26,6 +26,7 @@ #include <net/ip6_route.h> #include <net/sock.h> #include <net/inet6_connection_sock.h> +#include <net/sock_reuseport.h> int inet6_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb, bool relax) @@ -48,6 +49,7 @@ int inet6_csk_bind_conflict(const struct sock *sk, if ((!reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN) && (!reuseport || !sk2->sk_reuseport || + rcu_access_pointer(sk->sk_reuseport_cb) || (sk2->sk_state != TCP_TIME_WAIT && !uid_eq(uid, sock_i_uid((struct sock *)sk2))))) { diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 21ace5a2bf7c..70f2628be6fa 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -17,11 +17,13 @@ #include <linux/module.h> #include <linux/random.h> +#include <net/addrconf.h> #include <net/inet_connection_sock.h> #include <net/inet_hashtables.h> #include <net/inet6_hashtables.h> #include <net/secure_seq.h> #include <net/ip.h> +#include <net/sock_reuseport.h> u32 inet6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, @@ -121,7 +123,9 @@ static inline int compute_score(struct sock *sk, struct net *net, } struct sock *inet6_lookup_listener(struct net *net, - struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, + struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, + const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const unsigned short hnum, const int dif) { @@ -129,6 +133,7 @@ struct sock *inet6_lookup_listener(struct net *net, const struct hlist_nulls_node *node; struct sock *result; int score, hiscore, matches = 0, reuseport = 0; + bool select_ok = true; u32 phash = 0; unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; @@ -146,6 +151,15 @@ begin: if (reuseport) { phash = inet6_ehashfn(net, daddr, hnum, saddr, sport); + if (select_ok) { + struct sock *sk2; + sk2 = reuseport_select_sock(sk, phash, + skb, doff); + if (sk2) { + result = sk2; + goto found; + } + } matches = 1; } } else if (score == hiscore && reuseport) { @@ -163,11 +177,13 @@ begin: if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE) goto begin; if (result) { +found: if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) result = NULL; else if (unlikely(compute_score(result, net, hnum, daddr, dif) < hiscore)) { sock_put(result); + select_ok = false; goto begin; } } @@ -177,6 +193,7 @@ begin: EXPORT_SYMBOL_GPL(inet6_lookup_listener); struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const __be16 dport, const int dif) @@ -184,7 +201,8 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, struct sock *sk; local_bh_disable(); - sk = __inet6_lookup(net, hashinfo, saddr, sport, daddr, ntohs(dport), dif); + sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, + ntohs(dport), dif); local_bh_enable(); return sk; @@ -274,3 +292,59 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row, __inet6_check_established); } EXPORT_SYMBOL_GPL(inet6_hash_connect); + +int inet6_hash(struct sock *sk) +{ + if (sk->sk_state != TCP_CLOSE) { + local_bh_disable(); + __inet_hash(sk, NULL, ipv6_rcv_saddr_equal); + local_bh_enable(); + } + + return 0; +} +EXPORT_SYMBOL_GPL(inet6_hash); + +/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6 + * only, and any IPv4 addresses if not IPv6 only + * match_wildcard == false: addresses must be exactly the same, i.e. + * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, + * and 0.0.0.0 equals to 0.0.0.0 only + */ +int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, + bool match_wildcard) +{ + const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); + int sk2_ipv6only = inet_v6_ipv6only(sk2); + int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); + int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; + + /* if both are mapped, treat as IPv4 */ + if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) { + if (!sk2_ipv6only) { + if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr) + return 1; + if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr) + return match_wildcard; + } + return 0; + } + + if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY) + return 1; + + if (addr_type2 == IPV6_ADDR_ANY && match_wildcard && + !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) + return 1; + + if (addr_type == IPV6_ADDR_ANY && match_wildcard && + !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED)) + return 1; + + if (sk2_rcv_saddr6 && + ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6)) + return 1; + + return 0; +} +EXPORT_SYMBOL_GPL(ipv6_rcv_saddr_equal); diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c index 9a4d7322fb22..8f920580976f 100644 --- a/net/ipv6/ip6_checksum.c +++ b/net/ipv6/ip6_checksum.c @@ -98,27 +98,16 @@ void udp6_set_csum(bool nocheck, struct sk_buff *skb, uh->check = 0; else if (skb_is_gso(skb)) uh->check = ~udp_v6_check(len, saddr, daddr, 0); - else if (skb_dst(skb) && skb_dst(skb)->dev && - (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) { - - BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); - + else if (skb->ip_summed == CHECKSUM_PARTIAL) { + uh->check = 0; + uh->check = udp_v6_check(len, saddr, daddr, lco_csum(skb)); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + } else { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct udphdr, check); uh->check = ~udp_v6_check(len, saddr, daddr, 0); - } else { - __wsum csum; - - BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); - - uh->check = 0; - csum = skb_checksum(skb, 0, len, 0); - uh->check = udp_v6_check(len, saddr, daddr, csum); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - - skb->ip_summed = CHECKSUM_UNNECESSARY; } } EXPORT_SYMBOL(udp6_set_csum); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index a69aad1e29d1..f7c9560b75fa 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -360,7 +360,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); ip6gre_tunnel_unlink(ign, t); - ip6_tnl_dst_reset(t); + dst_cache_reset(&t->dst_cache); dev_put(dev); } @@ -633,7 +633,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, } if (!fl6->flowi6_mark) - dst = ip6_tnl_dst_get(tunnel); + dst = dst_cache_get(&tunnel->dst_cache); if (!dst) { dst = ip6_route_output(net, NULL, fl6); @@ -702,7 +702,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, } if (!fl6->flowi6_mark && ndst) - ip6_tnl_dst_set(tunnel, ndst); + dst_cache_set_ip6(&tunnel->dst_cache, ndst, &fl6->saddr); skb_dst_set(skb, dst); proto = NEXTHDR_GRE; @@ -1009,7 +1009,7 @@ static int ip6gre_tnl_change(struct ip6_tnl *t, t->parms.o_key = p->o_key; t->parms.i_flags = p->i_flags; t->parms.o_flags = p->o_flags; - ip6_tnl_dst_reset(t); + dst_cache_reset(&t->dst_cache); ip6gre_tnl_link_config(t, set_mtu); return 0; } @@ -1219,7 +1219,7 @@ static void ip6gre_dev_free(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - ip6_tnl_dst_destroy(t); + dst_cache_destroy(&t->dst_cache); free_percpu(dev->tstats); free_netdev(dev); } @@ -1257,7 +1257,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) if (!dev->tstats) return -ENOMEM; - ret = ip6_tnl_dst_init(tunnel); + ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); if (ret) { free_percpu(dev->tstats); dev->tstats = NULL; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 9075acf081dd..c05c425c2389 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -49,7 +49,7 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { + if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { const struct inet6_protocol *ipprot; ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); @@ -134,6 +134,16 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 1) goto err; + /* If enabled, drop unicast packets that were encapsulated in link-layer + * multicast or broadcast to protected against the so-called "hole-196" + * attack in 802.11 wireless. + */ + if (!ipv6_addr_is_multicast(&hdr->daddr) && + (skb->pkt_type == PACKET_BROADCAST || + skb->pkt_type == PACKET_MULTICAST) && + idev->cnf.drop_unicast_in_l2_multicast) + goto err; + /* RFC4291 2.7 * Nodes must not originate a packet to a multicast address whose scope * field contains the reserved value 0; if such a packet is received, it diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 137fca42aaa6..3f3aabd2f07b 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -122,97 +122,6 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev) return &dev->stats; } -/* - * Locking : hash tables are protected by RCU and RTNL - */ - -static void ip6_tnl_per_cpu_dst_set(struct ip6_tnl_dst *idst, - struct dst_entry *dst) -{ - write_seqlock_bh(&idst->lock); - dst_release(rcu_dereference_protected( - idst->dst, - lockdep_is_held(&idst->lock.lock))); - if (dst) { - dst_hold(dst); - idst->cookie = rt6_get_cookie((struct rt6_info *)dst); - } else { - idst->cookie = 0; - } - rcu_assign_pointer(idst->dst, dst); - write_sequnlock_bh(&idst->lock); -} - -struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t) -{ - struct ip6_tnl_dst *idst; - struct dst_entry *dst; - unsigned int seq; - u32 cookie; - - idst = raw_cpu_ptr(t->dst_cache); - - rcu_read_lock(); - do { - seq = read_seqbegin(&idst->lock); - dst = rcu_dereference(idst->dst); - cookie = idst->cookie; - } while (read_seqretry(&idst->lock, seq)); - - if (dst && !atomic_inc_not_zero(&dst->__refcnt)) - dst = NULL; - rcu_read_unlock(); - - if (dst && dst->obsolete && !dst->ops->check(dst, cookie)) { - ip6_tnl_per_cpu_dst_set(idst, NULL); - dst_release(dst); - dst = NULL; - } - return dst; -} -EXPORT_SYMBOL_GPL(ip6_tnl_dst_get); - -void ip6_tnl_dst_reset(struct ip6_tnl *t) -{ - int i; - - for_each_possible_cpu(i) - ip6_tnl_per_cpu_dst_set(per_cpu_ptr(t->dst_cache, i), NULL); -} -EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset); - -void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst) -{ - ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), dst); - -} -EXPORT_SYMBOL_GPL(ip6_tnl_dst_set); - -void ip6_tnl_dst_destroy(struct ip6_tnl *t) -{ - if (!t->dst_cache) - return; - - ip6_tnl_dst_reset(t); - free_percpu(t->dst_cache); -} -EXPORT_SYMBOL_GPL(ip6_tnl_dst_destroy); - -int ip6_tnl_dst_init(struct ip6_tnl *t) -{ - int i; - - t->dst_cache = alloc_percpu(struct ip6_tnl_dst); - if (!t->dst_cache) - return -ENOMEM; - - for_each_possible_cpu(i) - seqlock_init(&per_cpu_ptr(t->dst_cache, i)->lock); - - return 0; -} -EXPORT_SYMBOL_GPL(ip6_tnl_dst_init); - /** * ip6_tnl_lookup - fetch tunnel matching the end-point addresses * @remote: the address of the tunnel exit-point @@ -329,7 +238,7 @@ static void ip6_dev_free(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - ip6_tnl_dst_destroy(t); + dst_cache_destroy(&t->dst_cache); free_percpu(dev->tstats); free_netdev(dev); } @@ -462,7 +371,7 @@ ip6_tnl_dev_uninit(struct net_device *dev) RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); else ip6_tnl_unlink(ip6n, t); - ip6_tnl_dst_reset(t); + dst_cache_reset(&t->dst_cache); dev_put(dev); } @@ -1069,7 +978,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); neigh_release(neigh); } else if (!fl6->flowi6_mark) - dst = ip6_tnl_dst_get(t); + dst = dst_cache_get(&t->dst_cache); if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr)) goto tx_err_link_failure; @@ -1133,7 +1042,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, } if (!fl6->flowi6_mark && ndst) - ip6_tnl_dst_set(t, ndst); + dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr); skb_dst_set(skb, dst); skb->transport_header = skb->network_header; @@ -1366,7 +1275,7 @@ ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) t->parms.flowinfo = p->flowinfo; t->parms.link = p->link; t->parms.proto = p->proto; - ip6_tnl_dst_reset(t); + dst_cache_reset(&t->dst_cache); ip6_tnl_link_config(t); return 0; } @@ -1637,7 +1546,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev) if (!dev->tstats) return -ENOMEM; - ret = ip6_tnl_dst_init(t); + ret = dst_cache_init(&t->dst_cache, GFP_KERNEL); if (ret) { free_percpu(dev->tstats); dev->tstats = NULL; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 0a8610b33d79..d90a11f14040 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -640,7 +640,7 @@ vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) t->parms.i_key = p->i_key; t->parms.o_key = p->o_key; t->parms.proto = p->proto; - ip6_tnl_dst_reset(t); + dst_cache_reset(&t->dst_cache); vti6_link_config(t); return 0; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 84afb9a77278..c245895a3d41 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -883,6 +883,7 @@ static void ndisc_recv_na(struct sk_buff *skb) offsetof(struct nd_msg, opt)); struct ndisc_options ndopts; struct net_device *dev = skb->dev; + struct inet6_dev *idev = __in6_dev_get(dev); struct inet6_ifaddr *ifp; struct neighbour *neigh; @@ -902,6 +903,14 @@ static void ndisc_recv_na(struct sk_buff *skb) return; } + /* For some 802.11 wireless deployments (and possibly other networks), + * there will be a NA proxy and unsolicitd packets are attacks + * and thus should not be accepted. + */ + if (!msg->icmph.icmp6_solicited && idev && + idev->cnf.drop_unsolicited_na) + return; + if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) { ND_PRINTK(2, warn, "NS: invalid ND option\n"); return; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 18f3498a6c80..e2ea31175ef9 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -496,10 +496,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, IP6CB(head)->flags |= IP6SKB_FRAGMENTED; /* Yes, and fold redundant checksum back. 8) */ - if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_partial(skb_network_header(head), - skb_network_header_len(head), - head->csum); + skb_postpush_rcsum(head, skb_network_header(head), + skb_network_header_len(head)); rcu_read_lock(); IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 2066d1c25a11..f45b8ffc2840 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -475,7 +475,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev) ipip6_tunnel_unlink(sitn, tunnel); ipip6_tunnel_del_prl(tunnel, NULL); } - ip_tunnel_dst_reset_all(tunnel); + dst_cache_reset(&tunnel->dst_cache); dev_put(dev); } @@ -740,7 +740,7 @@ static int ipip_rcv(struct sk_buff *skb) if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; - if (iptunnel_pull_header(skb, 0, tpi.proto)) + if (iptunnel_pull_header(skb, 0, tpi.proto, false)) goto drop; return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error); } @@ -911,7 +911,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, goto tx_error; } - skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT); + skb = iptunnel_handle_offloads(skb, SKB_GSO_SIT); if (IS_ERR(skb)) { ip_rt_put(rt); goto out; @@ -1000,7 +1000,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tiph = &tunnel->parms.iph; - skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP); + skb = iptunnel_handle_offloads(skb, SKB_GSO_IPIP); if (IS_ERR(skb)) goto out; @@ -1093,7 +1093,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) t->parms.link = p->link; ipip6_tunnel_bind_dev(t->dev); } - ip_tunnel_dst_reset_all(t); + dst_cache_reset(&t->dst_cache); netdev_state_change(t->dev); } @@ -1124,7 +1124,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t, t->ip6rd.relay_prefix = relay_prefix; t->ip6rd.prefixlen = ip6rd->prefixlen; t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen; - ip_tunnel_dst_reset_all(t); + dst_cache_reset(&t->dst_cache); netdev_state_change(t->dev); return 0; } @@ -1278,7 +1278,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); break; } - ip_tunnel_dst_reset_all(t); + dst_cache_reset(&t->dst_cache); netdev_state_change(dev); break; @@ -1339,7 +1339,7 @@ static void ipip6_dev_free(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - free_percpu(tunnel->dst_cache); + dst_cache_destroy(&tunnel->dst_cache); free_percpu(dev->tstats); free_netdev(dev); } @@ -1372,6 +1372,7 @@ static void ipip6_tunnel_setup(struct net_device *dev) static int ipip6_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); + int err; tunnel->dev = dev; tunnel->net = dev_net(dev); @@ -1382,10 +1383,10 @@ static int ipip6_tunnel_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; - tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); - if (!tunnel->dst_cache) { + err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); + if (err) { free_percpu(dev->tstats); - return -ENOMEM; + return err; } return 0; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 2906ef20795e..0e393ff7f5d0 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -148,7 +148,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) struct dst_entry *dst; __u8 rcv_wscale; - if (!sysctl_tcp_syncookies || !th->ack || th->rst) + if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) goto out; if (tcp_synq_no_recent_overflow(sk)) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5c8c84273028..33f2820181f9 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -867,7 +867,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) * no RST generated if md5 hash doesn't match. */ sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev), - &tcp_hashinfo, &ipv6h->saddr, + &tcp_hashinfo, NULL, 0, + &ipv6h->saddr, th->source, &ipv6h->daddr, ntohs(th->source), tcp_v6_iif(skb)); if (!sk1) @@ -1376,8 +1377,8 @@ static int tcp_v6_rcv(struct sk_buff *skb) hdr = ipv6_hdr(skb); lookup: - sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest, - inet6_iif(skb)); + sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), + th->source, th->dest, inet6_iif(skb)); if (!sk) goto no_tcp_socket; @@ -1501,6 +1502,7 @@ do_time_wait: struct sock *sk2; sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, + skb, __tcp_hdrlen(th), &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, ntohs(th->dest), tcp_v6_iif(skb)); @@ -1866,7 +1868,7 @@ struct proto tcpv6_prot = { .sendpage = tcp_sendpage, .backlog_rcv = tcp_v6_do_rcv, .release_cb = tcp_release_cb, - .hash = inet_hash, + .hash = inet6_hash, .unhash = inet_unhash, .get_port = inet_csk_get_port, .enter_memory_pressure = tcp_enter_memory_pressure, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 22e28a44e3c8..0711f8fe4d44 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -37,6 +37,7 @@ #include <linux/slab.h> #include <asm/uaccess.h> +#include <net/addrconf.h> #include <net/ndisc.h> #include <net/protocol.h> #include <net/transp_v6.h> @@ -77,49 +78,6 @@ static u32 udp6_ehashfn(const struct net *net, udp_ipv6_hash_secret + net_hash_mix(net)); } -/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6 - * only, and any IPv4 addresses if not IPv6 only - * match_wildcard == false: addresses must be exactly the same, i.e. - * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, - * and 0.0.0.0 equals to 0.0.0.0 only - */ -int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, - bool match_wildcard) -{ - const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); - int sk2_ipv6only = inet_v6_ipv6only(sk2); - int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); - int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; - - /* if both are mapped, treat as IPv4 */ - if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) { - if (!sk2_ipv6only) { - if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr) - return 1; - if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr) - return match_wildcard; - } - return 0; - } - - if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY) - return 1; - - if (addr_type2 == IPV6_ADDR_ANY && match_wildcard && - !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) - return 1; - - if (addr_type == IPV6_ADDR_ANY && match_wildcard && - !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED)) - return 1; - - if (sk2_rcv_saddr6 && - ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6)) - return 1; - - return 0; -} - static u32 udp6_portaddr_hash(const struct net *net, const struct in6_addr *addr6, unsigned int port) @@ -590,6 +548,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, const struct in6_addr *daddr = &hdr->daddr; struct udphdr *uh = (struct udphdr *)(skb->data+offset); struct sock *sk; + int harderr; int err; struct net *net = dev_net(skb->dev); @@ -601,26 +560,27 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return; } + harderr = icmpv6_err_convert(type, code, &err); + np = inet6_sk(sk); + if (type == ICMPV6_PKT_TOOBIG) { if (!ip6_sk_accept_pmtu(sk)) goto out; ip6_sk_update_pmtu(skb, sk, info); + if (np->pmtudisc != IPV6_PMTUDISC_DONT) + harderr = 1; } if (type == NDISC_REDIRECT) { ip6_sk_redirect(skb, sk); goto out; } - np = inet6_sk(sk); - - if (!icmpv6_err_convert(type, code, &err) && !np->recverr) - goto out; - - if (sk->sk_state != TCP_ESTABLISHED && !np->recverr) - goto out; - - if (np->recverr) + if (!np->recverr) { + if (!harderr || sk->sk_state != TCP_ESTABLISHED) + goto out; + } else { ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1)); + } sk->sk_err = err; sk->sk_error_report(sk); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index a2c8747d2936..6b54ff3ff4cb 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -25,6 +25,7 @@ #include <net/udp.h> #include <net/inet_common.h> #include <net/inet_hashtables.h> +#include <net/inet6_hashtables.h> #include <net/tcp_states.h> #include <net/protocol.h> #include <net/xfrm.h> @@ -718,7 +719,7 @@ static struct proto l2tp_ip6_prot = { .sendmsg = l2tp_ip6_sendmsg, .recvmsg = l2tp_ip6_recvmsg, .backlog_rcv = l2tp_ip6_backlog_recv, - .hash = inet_hash, + .hash = inet6_hash, .unhash = inet_unhash, .obj_size = sizeof(struct l2tp_ip6_sock), #ifdef CONFIG_COMPAT diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 8dab4e569571..b3c52e3f689a 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -38,7 +38,7 @@ static u16 llc_ui_sap_link_no_max[256]; static struct sockaddr_llc llc_ui_addrnull; static const struct proto_ops llc_ui_ops; -static int llc_ui_wait_for_conn(struct sock *sk, long timeout); +static long llc_ui_wait_for_conn(struct sock *sk, long timeout); static int llc_ui_wait_for_disc(struct sock *sk, long timeout); static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout); @@ -551,7 +551,7 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout) return rc; } -static int llc_ui_wait_for_conn(struct sock *sk, long timeout) +static long llc_ui_wait_for_conn(struct sock *sk, long timeout) { DEFINE_WAIT(wait); diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index fb31aa87de81..644a8da6d4bd 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -227,5 +227,6 @@ static void __exit mpls_iptunnel_exit(void) } module_exit(mpls_iptunnel_exit); +MODULE_ALIAS_RTNL_LWT(MPLS); MODULE_DESCRIPTION("MultiProtocol Label Switching IP Tunnels"); MODULE_LICENSE("GPL v2"); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 3264cb49b333..a3f5cd9b3c4c 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -1019,8 +1019,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (IS_ERR(skb)) goto tx_error; - skb = iptunnel_handle_offloads( - skb, false, __tun_gso_type_mask(AF_INET, cp->af)); + skb = iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET, cp->af)); if (IS_ERR(skb)) goto tx_error; @@ -1112,8 +1111,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (IS_ERR(skb)) goto tx_error; - skb = iptunnel_handle_offloads( - skb, false, __tun_gso_type_mask(AF_INET6, cp->af)); + skb = iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET6, cp->af)); if (IS_ERR(skb)) goto tx_error; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 857ae89633af..2278d9ab723b 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -127,13 +127,6 @@ int nfnetlink_has_listeners(struct net *net, unsigned int group) } EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); -struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size, - u32 dst_portid, gfp_t gfp_mask) -{ - return netlink_alloc_skb(net->nfnl, size, dst_portid, gfp_mask); -} -EXPORT_SYMBOL_GPL(nfnetlink_alloc_skb); - int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid, unsigned int group, int echo, gfp_t flags) { diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 8ca932057c13..11f81c8385fc 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -330,14 +330,13 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size, * message. WARNING: has to be <= 128k due to slab restrictions */ n = max(inst_size, pkt_size); - skb = nfnetlink_alloc_skb(net, n, peer_portid, GFP_ATOMIC); + skb = alloc_skb(n, GFP_ATOMIC); if (!skb) { if (n > pkt_size) { /* try to allocate only as much as we need for current * packet */ - skb = nfnetlink_alloc_skb(net, pkt_size, - peer_portid, GFP_ATOMIC); + skb = alloc_skb(pkt_size, GFP_ATOMIC); } } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 1d3936587ace..75429997ed41 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -301,7 +301,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, __be32 **packet_id_ptr) { size_t size; - size_t data_len = 0, cap_len = 0, rem_len = 0; + size_t data_len = 0, cap_len = 0; unsigned int hlen = 0; struct sk_buff *skb; struct nlattr *nla; @@ -361,7 +361,6 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, hlen = min_t(unsigned int, hlen, data_len); size += sizeof(struct nlattr) + hlen; cap_len = entskb->len; - rem_len = data_len - hlen; break; } @@ -386,8 +385,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, size += nla_total_size(seclen); } - skb = __netlink_alloc_skb(net->nfnl, size, rem_len, queue->peer_portid, - GFP_ATOMIC); + skb = alloc_skb(size, GFP_ATOMIC); if (!skb) { skb_tx_error(entskb); return NULL; diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 3ab591e73ec0..7f4414d26a66 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -105,19 +105,24 @@ tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr) * belonging to established connections going through that one. */ static inline struct sock * -nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, +nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp, + const u8 protocol, const __be32 saddr, const __be32 daddr, const __be16 sport, const __be16 dport, const struct net_device *in, const enum nf_tproxy_lookup_t lookup_type) { struct sock *sk; + struct tcphdr *tcph; switch (protocol) { case IPPROTO_TCP: switch (lookup_type) { case NFT_LOOKUP_LISTENER: - sk = inet_lookup_listener(net, &tcp_hashinfo, + tcph = hp; + sk = inet_lookup_listener(net, &tcp_hashinfo, skb, + ip_hdrlen(skb) + + __tcp_hdrlen(tcph), saddr, sport, daddr, dport, in->ifindex); @@ -169,19 +174,23 @@ nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, #ifdef XT_TPROXY_HAVE_IPV6 static inline struct sock * -nf_tproxy_get_sock_v6(struct net *net, const u8 protocol, +nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp, + const u8 protocol, const struct in6_addr *saddr, const struct in6_addr *daddr, const __be16 sport, const __be16 dport, const struct net_device *in, const enum nf_tproxy_lookup_t lookup_type) { struct sock *sk; + struct tcphdr *tcph; switch (protocol) { case IPPROTO_TCP: switch (lookup_type) { case NFT_LOOKUP_LISTENER: - sk = inet6_lookup_listener(net, &tcp_hashinfo, + tcph = hp; + sk = inet6_lookup_listener(net, &tcp_hashinfo, skb, + thoff + __tcp_hdrlen(tcph), saddr, sport, daddr, ntohs(dport), in->ifindex); @@ -267,7 +276,7 @@ tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb, * to a listener socket if there's one */ struct sock *sk2; - sk2 = nf_tproxy_get_sock_v4(net, iph->protocol, + sk2 = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol, iph->saddr, laddr ? laddr : iph->daddr, hp->source, lport ? lport : hp->dest, skb->dev, NFT_LOOKUP_LISTENER); @@ -305,7 +314,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport, * addresses, this happens if the redirect already happened * and the current packet belongs to an already established * connection */ - sk = nf_tproxy_get_sock_v4(net, iph->protocol, + sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol, iph->saddr, iph->daddr, hp->source, hp->dest, skb->dev, NFT_LOOKUP_ESTABLISHED); @@ -321,7 +330,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport, else if (!sk) /* no, there's no established connection, check if * there's a listener on the redirected addr/port */ - sk = nf_tproxy_get_sock_v4(net, iph->protocol, + sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol, iph->saddr, laddr, hp->source, lport, skb->dev, NFT_LOOKUP_LISTENER); @@ -429,7 +438,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, * to a listener socket if there's one */ struct sock *sk2; - sk2 = nf_tproxy_get_sock_v6(par->net, tproto, + sk2 = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto, &iph->saddr, tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr), hp->source, @@ -472,7 +481,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) * addresses, this happens if the redirect already happened * and the current packet belongs to an already established * connection */ - sk = nf_tproxy_get_sock_v6(par->net, tproto, + sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto, &iph->saddr, &iph->daddr, hp->source, hp->dest, par->in, NFT_LOOKUP_ESTABLISHED); @@ -487,8 +496,8 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) else if (!sk) /* no there's no established connection, check if * there's a listener on the redirected addr/port */ - sk = nf_tproxy_get_sock_v6(par->net, tproto, - &iph->saddr, laddr, + sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, + tproto, &iph->saddr, laddr, hp->source, lport, par->in, NFT_LOOKUP_LISTENER); diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 2ec08f04b816..49d14ecad444 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -112,14 +112,15 @@ extract_icmp4_fields(const struct sk_buff *skb, * box. */ static struct sock * -xt_socket_get_sock_v4(struct net *net, const u8 protocol, +xt_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff, + const u8 protocol, const __be32 saddr, const __be32 daddr, const __be16 sport, const __be16 dport, const struct net_device *in) { switch (protocol) { case IPPROTO_TCP: - return __inet_lookup(net, &tcp_hashinfo, + return __inet_lookup(net, &tcp_hashinfo, skb, doff, saddr, sport, daddr, dport, in->ifindex); case IPPROTO_UDP: @@ -148,6 +149,8 @@ static struct sock *xt_socket_lookup_slow_v4(struct net *net, const struct net_device *indev) { const struct iphdr *iph = ip_hdr(skb); + struct sk_buff *data_skb = NULL; + int doff = 0; __be32 uninitialized_var(daddr), uninitialized_var(saddr); __be16 uninitialized_var(dport), uninitialized_var(sport); u8 uninitialized_var(protocol); @@ -169,6 +172,10 @@ static struct sock *xt_socket_lookup_slow_v4(struct net *net, sport = hp->source; daddr = iph->daddr; dport = hp->dest; + data_skb = (struct sk_buff *)skb; + doff = iph->protocol == IPPROTO_TCP ? + ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) : + ip_hdrlen(skb) + sizeof(*hp); } else if (iph->protocol == IPPROTO_ICMP) { if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, @@ -198,8 +205,8 @@ static struct sock *xt_socket_lookup_slow_v4(struct net *net, } #endif - return xt_socket_get_sock_v4(net, protocol, saddr, daddr, - sport, dport, indev); + return xt_socket_get_sock_v4(net, data_skb, doff, protocol, saddr, + daddr, sport, dport, indev); } static bool @@ -318,14 +325,15 @@ extract_icmp6_fields(const struct sk_buff *skb, } static struct sock * -xt_socket_get_sock_v6(struct net *net, const u8 protocol, +xt_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff, + const u8 protocol, const struct in6_addr *saddr, const struct in6_addr *daddr, const __be16 sport, const __be16 dport, const struct net_device *in) { switch (protocol) { case IPPROTO_TCP: - return inet6_lookup(net, &tcp_hashinfo, + return inet6_lookup(net, &tcp_hashinfo, skb, doff, saddr, sport, daddr, dport, in->ifindex); case IPPROTO_UDP: @@ -343,6 +351,8 @@ static struct sock *xt_socket_lookup_slow_v6(struct net *net, __be16 uninitialized_var(dport), uninitialized_var(sport); const struct in6_addr *daddr = NULL, *saddr = NULL; struct ipv6hdr *iph = ipv6_hdr(skb); + struct sk_buff *data_skb = NULL; + int doff = 0; int thoff = 0, tproto; tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); @@ -362,6 +372,10 @@ static struct sock *xt_socket_lookup_slow_v6(struct net *net, sport = hp->source; daddr = &iph->daddr; dport = hp->dest; + data_skb = (struct sk_buff *)skb; + doff = tproto == IPPROTO_TCP ? + thoff + __tcp_hdrlen((struct tcphdr *)hp) : + thoff + sizeof(*hp); } else if (tproto == IPPROTO_ICMPV6) { struct ipv6hdr ipv6_var; @@ -373,7 +387,7 @@ static struct sock *xt_socket_lookup_slow_v6(struct net *net, return NULL; } - return xt_socket_get_sock_v6(net, tproto, saddr, daddr, + return xt_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr, sport, dport, indev); } diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig index 2c5e95e9bfbd..5d6e8c05b3d4 100644 --- a/net/netlink/Kconfig +++ b/net/netlink/Kconfig @@ -2,15 +2,6 @@ # Netlink Sockets # -config NETLINK_MMAP - bool "NETLINK: mmaped IO" - ---help--- - This option enables support for memory mapped netlink IO. This - reduces overhead by avoiding copying data between kernel- and - userspace. - - If unsure, say N. - config NETLINK_DIAG tristate "NETLINK: socket monitoring interface" default n diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index f1ffb34e253f..c8416792cce0 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -225,7 +225,7 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb, dev_hold(dev); - if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head)) + if (is_vmalloc_addr(skb->head)) nskb = netlink_to_full_skb(skb, GFP_ATOMIC); else nskb = skb_clone(skb, GFP_ATOMIC); @@ -300,610 +300,8 @@ static void netlink_rcv_wake(struct sock *sk) wake_up_interruptible(&nlk->wait); } -#ifdef CONFIG_NETLINK_MMAP -static bool netlink_rx_is_mmaped(struct sock *sk) -{ - return nlk_sk(sk)->rx_ring.pg_vec != NULL; -} - -static bool netlink_tx_is_mmaped(struct sock *sk) -{ - return nlk_sk(sk)->tx_ring.pg_vec != NULL; -} - -static __pure struct page *pgvec_to_page(const void *addr) -{ - if (is_vmalloc_addr(addr)) - return vmalloc_to_page(addr); - else - return virt_to_page(addr); -} - -static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len) -{ - unsigned int i; - - for (i = 0; i < len; i++) { - if (pg_vec[i] != NULL) { - if (is_vmalloc_addr(pg_vec[i])) - vfree(pg_vec[i]); - else - free_pages((unsigned long)pg_vec[i], order); - } - } - kfree(pg_vec); -} - -static void *alloc_one_pg_vec_page(unsigned long order) -{ - void *buffer; - gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | - __GFP_NOWARN | __GFP_NORETRY; - - buffer = (void *)__get_free_pages(gfp_flags, order); - if (buffer != NULL) - return buffer; - - buffer = vzalloc((1 << order) * PAGE_SIZE); - if (buffer != NULL) - return buffer; - - gfp_flags &= ~__GFP_NORETRY; - return (void *)__get_free_pages(gfp_flags, order); -} - -static void **alloc_pg_vec(struct netlink_sock *nlk, - struct nl_mmap_req *req, unsigned int order) -{ - unsigned int block_nr = req->nm_block_nr; - unsigned int i; - void **pg_vec; - - pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL); - if (pg_vec == NULL) - return NULL; - - for (i = 0; i < block_nr; i++) { - pg_vec[i] = alloc_one_pg_vec_page(order); - if (pg_vec[i] == NULL) - goto err1; - } - - return pg_vec; -err1: - free_pg_vec(pg_vec, order, block_nr); - return NULL; -} - - -static void -__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec, - unsigned int order) -{ - struct netlink_sock *nlk = nlk_sk(sk); - struct sk_buff_head *queue; - struct netlink_ring *ring; - - queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; - ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; - - spin_lock_bh(&queue->lock); - - ring->frame_max = req->nm_frame_nr - 1; - ring->head = 0; - ring->frame_size = req->nm_frame_size; - ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; - - swap(ring->pg_vec_len, req->nm_block_nr); - swap(ring->pg_vec_order, order); - swap(ring->pg_vec, pg_vec); - - __skb_queue_purge(queue); - spin_unlock_bh(&queue->lock); - - WARN_ON(atomic_read(&nlk->mapped)); - - if (pg_vec) - free_pg_vec(pg_vec, order, req->nm_block_nr); -} - -static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, - bool tx_ring) -{ - struct netlink_sock *nlk = nlk_sk(sk); - struct netlink_ring *ring; - void **pg_vec = NULL; - unsigned int order = 0; - - ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; - - if (atomic_read(&nlk->mapped)) - return -EBUSY; - if (atomic_read(&ring->pending)) - return -EBUSY; - - if (req->nm_block_nr) { - if (ring->pg_vec != NULL) - return -EBUSY; - - if ((int)req->nm_block_size <= 0) - return -EINVAL; - if (!PAGE_ALIGNED(req->nm_block_size)) - return -EINVAL; - if (req->nm_frame_size < NL_MMAP_HDRLEN) - return -EINVAL; - if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT)) - return -EINVAL; - - ring->frames_per_block = req->nm_block_size / - req->nm_frame_size; - if (ring->frames_per_block == 0) - return -EINVAL; - if (ring->frames_per_block * req->nm_block_nr != - req->nm_frame_nr) - return -EINVAL; - - order = get_order(req->nm_block_size); - pg_vec = alloc_pg_vec(nlk, req, order); - if (pg_vec == NULL) - return -ENOMEM; - } else { - if (req->nm_frame_nr) - return -EINVAL; - } - - mutex_lock(&nlk->pg_vec_lock); - if (atomic_read(&nlk->mapped) == 0) { - __netlink_set_ring(sk, req, tx_ring, pg_vec, order); - mutex_unlock(&nlk->pg_vec_lock); - return 0; - } - - mutex_unlock(&nlk->pg_vec_lock); - - if (pg_vec) - free_pg_vec(pg_vec, order, req->nm_block_nr); - - return -EBUSY; -} - -static void netlink_mm_open(struct vm_area_struct *vma) -{ - struct file *file = vma->vm_file; - struct socket *sock = file->private_data; - struct sock *sk = sock->sk; - - if (sk) - atomic_inc(&nlk_sk(sk)->mapped); -} - -static void netlink_mm_close(struct vm_area_struct *vma) -{ - struct file *file = vma->vm_file; - struct socket *sock = file->private_data; - struct sock *sk = sock->sk; - - if (sk) - atomic_dec(&nlk_sk(sk)->mapped); -} - -static const struct vm_operations_struct netlink_mmap_ops = { - .open = netlink_mm_open, - .close = netlink_mm_close, -}; - -static int netlink_mmap(struct file *file, struct socket *sock, - struct vm_area_struct *vma) -{ - struct sock *sk = sock->sk; - struct netlink_sock *nlk = nlk_sk(sk); - struct netlink_ring *ring; - unsigned long start, size, expected; - unsigned int i; - int err = -EINVAL; - - if (vma->vm_pgoff) - return -EINVAL; - - mutex_lock(&nlk->pg_vec_lock); - - expected = 0; - for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { - if (ring->pg_vec == NULL) - continue; - expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE; - } - - if (expected == 0) - goto out; - - size = vma->vm_end - vma->vm_start; - if (size != expected) - goto out; - - start = vma->vm_start; - for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { - if (ring->pg_vec == NULL) - continue; - - for (i = 0; i < ring->pg_vec_len; i++) { - struct page *page; - void *kaddr = ring->pg_vec[i]; - unsigned int pg_num; - - for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) { - page = pgvec_to_page(kaddr); - err = vm_insert_page(vma, start, page); - if (err < 0) - goto out; - start += PAGE_SIZE; - kaddr += PAGE_SIZE; - } - } - } - - atomic_inc(&nlk->mapped); - vma->vm_ops = &netlink_mmap_ops; - err = 0; -out: - mutex_unlock(&nlk->pg_vec_lock); - return err; -} - -static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len) -{ -#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 - struct page *p_start, *p_end; - - /* First page is flushed through netlink_{get,set}_status */ - p_start = pgvec_to_page(hdr + PAGE_SIZE); - p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1); - while (p_start <= p_end) { - flush_dcache_page(p_start); - p_start++; - } -#endif -} - -static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr) -{ - smp_rmb(); - flush_dcache_page(pgvec_to_page(hdr)); - return hdr->nm_status; -} - -static void netlink_set_status(struct nl_mmap_hdr *hdr, - enum nl_mmap_status status) -{ - smp_mb(); - hdr->nm_status = status; - flush_dcache_page(pgvec_to_page(hdr)); -} - -static struct nl_mmap_hdr * -__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos) -{ - unsigned int pg_vec_pos, frame_off; - - pg_vec_pos = pos / ring->frames_per_block; - frame_off = pos % ring->frames_per_block; - - return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size); -} - -static struct nl_mmap_hdr * -netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos, - enum nl_mmap_status status) -{ - struct nl_mmap_hdr *hdr; - - hdr = __netlink_lookup_frame(ring, pos); - if (netlink_get_status(hdr) != status) - return NULL; - - return hdr; -} - -static struct nl_mmap_hdr * -netlink_current_frame(const struct netlink_ring *ring, - enum nl_mmap_status status) -{ - return netlink_lookup_frame(ring, ring->head, status); -} - -static void netlink_increment_head(struct netlink_ring *ring) -{ - ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0; -} - -static void netlink_forward_ring(struct netlink_ring *ring) -{ - unsigned int head = ring->head; - const struct nl_mmap_hdr *hdr; - - do { - hdr = __netlink_lookup_frame(ring, ring->head); - if (hdr->nm_status == NL_MMAP_STATUS_UNUSED) - break; - if (hdr->nm_status != NL_MMAP_STATUS_SKIP) - break; - netlink_increment_head(ring); - } while (ring->head != head); -} - -static bool netlink_has_valid_frame(struct netlink_ring *ring) -{ - unsigned int head = ring->head, pos = head; - const struct nl_mmap_hdr *hdr; - - do { - hdr = __netlink_lookup_frame(ring, pos); - if (hdr->nm_status == NL_MMAP_STATUS_VALID) - return true; - pos = pos != 0 ? pos - 1 : ring->frame_max; - } while (pos != head); - - return false; -} - -static bool netlink_dump_space(struct netlink_sock *nlk) -{ - struct netlink_ring *ring = &nlk->rx_ring; - struct nl_mmap_hdr *hdr; - unsigned int n; - - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); - if (hdr == NULL) - return false; - - n = ring->head + ring->frame_max / 2; - if (n > ring->frame_max) - n -= ring->frame_max; - - hdr = __netlink_lookup_frame(ring, n); - - return hdr->nm_status == NL_MMAP_STATUS_UNUSED; -} - -static unsigned int netlink_poll(struct file *file, struct socket *sock, - poll_table *wait) -{ - struct sock *sk = sock->sk; - struct netlink_sock *nlk = nlk_sk(sk); - unsigned int mask; - int err; - - if (nlk->rx_ring.pg_vec != NULL) { - /* Memory mapped sockets don't call recvmsg(), so flow control - * for dumps is performed here. A dump is allowed to continue - * if at least half the ring is unused. - */ - while (nlk->cb_running && netlink_dump_space(nlk)) { - err = netlink_dump(sk); - if (err < 0) { - sk->sk_err = -err; - sk->sk_error_report(sk); - break; - } - } - netlink_rcv_wake(sk); - } - - mask = datagram_poll(file, sock, wait); - - /* We could already have received frames in the normal receive - * queue, that will show up as NL_MMAP_STATUS_COPY in the ring, - * so if mask contains pollin/etc already, there's no point - * walking the ring. - */ - if ((mask & (POLLIN | POLLRDNORM)) != (POLLIN | POLLRDNORM)) { - spin_lock_bh(&sk->sk_receive_queue.lock); - if (nlk->rx_ring.pg_vec) { - if (netlink_has_valid_frame(&nlk->rx_ring)) - mask |= POLLIN | POLLRDNORM; - } - spin_unlock_bh(&sk->sk_receive_queue.lock); - } - - spin_lock_bh(&sk->sk_write_queue.lock); - if (nlk->tx_ring.pg_vec) { - if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED)) - mask |= POLLOUT | POLLWRNORM; - } - spin_unlock_bh(&sk->sk_write_queue.lock); - - return mask; -} - -static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb) -{ - return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN); -} - -static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk, - struct netlink_ring *ring, - struct nl_mmap_hdr *hdr) -{ - unsigned int size; - void *data; - - size = ring->frame_size - NL_MMAP_HDRLEN; - data = (void *)hdr + NL_MMAP_HDRLEN; - - skb->head = data; - skb->data = data; - skb_reset_tail_pointer(skb); - skb->end = skb->tail + size; - skb->len = 0; - - skb->destructor = netlink_skb_destructor; - NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED; - NETLINK_CB(skb).sk = sk; -} - -static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, - u32 dst_portid, u32 dst_group, - struct scm_cookie *scm) -{ - struct netlink_sock *nlk = nlk_sk(sk); - struct netlink_ring *ring; - struct nl_mmap_hdr *hdr; - struct sk_buff *skb; - unsigned int maxlen; - int err = 0, len = 0; - - mutex_lock(&nlk->pg_vec_lock); - - ring = &nlk->tx_ring; - maxlen = ring->frame_size - NL_MMAP_HDRLEN; - - do { - unsigned int nm_len; - - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID); - if (hdr == NULL) { - if (!(msg->msg_flags & MSG_DONTWAIT) && - atomic_read(&nlk->tx_ring.pending)) - schedule(); - continue; - } - - nm_len = ACCESS_ONCE(hdr->nm_len); - if (nm_len > maxlen) { - err = -EINVAL; - goto out; - } - - netlink_frame_flush_dcache(hdr, nm_len); - - skb = alloc_skb(nm_len, GFP_KERNEL); - if (skb == NULL) { - err = -ENOBUFS; - goto out; - } - __skb_put(skb, nm_len); - memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len); - netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); - - netlink_increment_head(ring); - - NETLINK_CB(skb).portid = nlk->portid; - NETLINK_CB(skb).dst_group = dst_group; - NETLINK_CB(skb).creds = scm->creds; - - err = security_netlink_send(sk, skb); - if (err) { - kfree_skb(skb); - goto out; - } - - if (unlikely(dst_group)) { - atomic_inc(&skb->users); - netlink_broadcast(sk, skb, dst_portid, dst_group, - GFP_KERNEL); - } - err = netlink_unicast(sk, skb, dst_portid, - msg->msg_flags & MSG_DONTWAIT); - if (err < 0) - goto out; - len += err; - - } while (hdr != NULL || - (!(msg->msg_flags & MSG_DONTWAIT) && - atomic_read(&nlk->tx_ring.pending))); - - if (len > 0) - err = len; -out: - mutex_unlock(&nlk->pg_vec_lock); - return err; -} - -static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb) -{ - struct nl_mmap_hdr *hdr; - - hdr = netlink_mmap_hdr(skb); - hdr->nm_len = skb->len; - hdr->nm_group = NETLINK_CB(skb).dst_group; - hdr->nm_pid = NETLINK_CB(skb).creds.pid; - hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); - hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); - netlink_frame_flush_dcache(hdr, hdr->nm_len); - netlink_set_status(hdr, NL_MMAP_STATUS_VALID); - - NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED; - kfree_skb(skb); -} - -static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb) -{ - struct netlink_sock *nlk = nlk_sk(sk); - struct netlink_ring *ring = &nlk->rx_ring; - struct nl_mmap_hdr *hdr; - - spin_lock_bh(&sk->sk_receive_queue.lock); - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); - if (hdr == NULL) { - spin_unlock_bh(&sk->sk_receive_queue.lock); - kfree_skb(skb); - netlink_overrun(sk); - return; - } - netlink_increment_head(ring); - __skb_queue_tail(&sk->sk_receive_queue, skb); - spin_unlock_bh(&sk->sk_receive_queue.lock); - - hdr->nm_len = skb->len; - hdr->nm_group = NETLINK_CB(skb).dst_group; - hdr->nm_pid = NETLINK_CB(skb).creds.pid; - hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); - hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); - netlink_set_status(hdr, NL_MMAP_STATUS_COPY); -} - -#else /* CONFIG_NETLINK_MMAP */ -#define netlink_rx_is_mmaped(sk) false -#define netlink_tx_is_mmaped(sk) false -#define netlink_mmap sock_no_mmap -#define netlink_poll datagram_poll -#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, scm) 0 -#endif /* CONFIG_NETLINK_MMAP */ - static void netlink_skb_destructor(struct sk_buff *skb) { -#ifdef CONFIG_NETLINK_MMAP - struct nl_mmap_hdr *hdr; - struct netlink_ring *ring; - struct sock *sk; - - /* If a packet from the kernel to userspace was freed because of an - * error without being delivered to userspace, the kernel must reset - * the status. In the direction userspace to kernel, the status is - * always reset here after the packet was processed and freed. - */ - if (netlink_skb_is_mmaped(skb)) { - hdr = netlink_mmap_hdr(skb); - sk = NETLINK_CB(skb).sk; - - if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) { - netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); - ring = &nlk_sk(sk)->tx_ring; - } else { - if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) { - hdr->nm_len = 0; - netlink_set_status(hdr, NL_MMAP_STATUS_VALID); - } - ring = &nlk_sk(sk)->rx_ring; - } - - WARN_ON(atomic_read(&ring->pending) == 0); - atomic_dec(&ring->pending); - sock_put(sk); - - skb->head = NULL; - } -#endif if (is_vmalloc_addr(skb->head)) { if (!skb->cloned || !atomic_dec_return(&(skb_shinfo(skb)->dataref))) @@ -937,18 +335,6 @@ static void netlink_sock_destruct(struct sock *sk) } skb_queue_purge(&sk->sk_receive_queue); -#ifdef CONFIG_NETLINK_MMAP - if (1) { - struct nl_mmap_req req; - - memset(&req, 0, sizeof(req)); - if (nlk->rx_ring.pg_vec) - __netlink_set_ring(sk, &req, false, NULL, 0); - memset(&req, 0, sizeof(req)); - if (nlk->tx_ring.pg_vec) - __netlink_set_ring(sk, &req, true, NULL, 0); - } -#endif /* CONFIG_NETLINK_MMAP */ if (!sock_flag(sk, SOCK_DEAD)) { printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); @@ -1194,9 +580,6 @@ static int __netlink_create(struct net *net, struct socket *sock, mutex_init(nlk->cb_mutex); } init_waitqueue_head(&nlk->wait); -#ifdef CONFIG_NETLINK_MMAP - mutex_init(&nlk->pg_vec_lock); -#endif sk->sk_destruct = netlink_sock_destruct; sk->sk_protocol = protocol; @@ -1728,8 +1111,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, nlk = nlk_sk(sk); if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - test_bit(NETLINK_S_CONGESTED, &nlk->state)) && - !netlink_skb_is_mmaped(skb)) { + test_bit(NETLINK_S_CONGESTED, &nlk->state))) { DECLARE_WAITQUEUE(wait, current); if (!*timeo) { if (!ssk || netlink_is_kernel(ssk)) @@ -1767,14 +1149,7 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) netlink_deliver_tap(skb); -#ifdef CONFIG_NETLINK_MMAP - if (netlink_skb_is_mmaped(skb)) - netlink_queue_mmaped_skb(sk, skb); - else if (netlink_rx_is_mmaped(sk)) - netlink_ring_set_copied(sk, skb); - else -#endif /* CONFIG_NETLINK_MMAP */ - skb_queue_tail(&sk->sk_receive_queue, skb); + skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk); return len; } @@ -1798,9 +1173,6 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) int delta; WARN_ON(skb->sk != NULL); - if (netlink_skb_is_mmaped(skb)) - return skb; - delta = skb->end - skb->tail; if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize) return skb; @@ -1876,79 +1248,6 @@ retry: } EXPORT_SYMBOL(netlink_unicast); -struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size, - unsigned int ldiff, u32 dst_portid, - gfp_t gfp_mask) -{ -#ifdef CONFIG_NETLINK_MMAP - unsigned int maxlen, linear_size; - struct sock *sk = NULL; - struct sk_buff *skb; - struct netlink_ring *ring; - struct nl_mmap_hdr *hdr; - - sk = netlink_getsockbyportid(ssk, dst_portid); - if (IS_ERR(sk)) - goto out; - - ring = &nlk_sk(sk)->rx_ring; - /* fast-path without atomic ops for common case: non-mmaped receiver */ - if (ring->pg_vec == NULL) - goto out_put; - - /* We need to account the full linear size needed as a ring - * slot cannot have non-linear parts. - */ - linear_size = size + ldiff; - if (ring->frame_size - NL_MMAP_HDRLEN < linear_size) - goto out_put; - - skb = alloc_skb_head(gfp_mask); - if (skb == NULL) - goto err1; - - spin_lock_bh(&sk->sk_receive_queue.lock); - /* check again under lock */ - if (ring->pg_vec == NULL) - goto out_free; - - /* check again under lock */ - maxlen = ring->frame_size - NL_MMAP_HDRLEN; - if (maxlen < linear_size) - goto out_free; - - netlink_forward_ring(ring); - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); - if (hdr == NULL) - goto err2; - - netlink_ring_setup_skb(skb, sk, ring, hdr); - netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED); - atomic_inc(&ring->pending); - netlink_increment_head(ring); - - spin_unlock_bh(&sk->sk_receive_queue.lock); - return skb; - -err2: - kfree_skb(skb); - spin_unlock_bh(&sk->sk_receive_queue.lock); - netlink_overrun(sk); -err1: - sock_put(sk); - return NULL; - -out_free: - kfree_skb(skb); - spin_unlock_bh(&sk->sk_receive_queue.lock); -out_put: - sock_put(sk); -out: -#endif - return alloc_skb(size, gfp_mask); -} -EXPORT_SYMBOL_GPL(__netlink_alloc_skb); - int netlink_has_listeners(struct sock *sk, unsigned int group) { int res = 0; @@ -2225,8 +1524,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, if (level != SOL_NETLINK) return -ENOPROTOOPT; - if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING && - optlen >= sizeof(int) && + if (optlen >= sizeof(int) && get_user(val, (unsigned int __user *)optval)) return -EFAULT; @@ -2279,25 +1577,6 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, } err = 0; break; -#ifdef CONFIG_NETLINK_MMAP - case NETLINK_RX_RING: - case NETLINK_TX_RING: { - struct nl_mmap_req req; - - /* Rings might consume more memory than queue limits, require - * CAP_NET_ADMIN. - */ - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (optlen < sizeof(req)) - return -EINVAL; - if (copy_from_user(&req, optval, sizeof(req))) - return -EFAULT; - err = netlink_set_ring(sk, &req, - optname == NETLINK_TX_RING); - break; - } -#endif /* CONFIG_NETLINK_MMAP */ case NETLINK_LISTEN_ALL_NSID: if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST)) return -EPERM; @@ -2467,18 +1746,6 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) smp_rmb(); } - /* It's a really convoluted way for userland to ask for mmaped - * sendmsg(), but that's what we've got... - */ - if (netlink_tx_is_mmaped(sk) && - iter_is_iovec(&msg->msg_iter) && - msg->msg_iter.nr_segs == 1 && - msg->msg_iter.iov->iov_base == NULL) { - err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, - &scm); - goto out; - } - err = -EMSGSIZE; if (len > sk->sk_sndbuf - 32) goto out; @@ -2794,8 +2061,7 @@ static int netlink_dump(struct sock *sk) goto errout_skb; } - if (!netlink_rx_is_mmaped(sk) && - atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) + if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) goto errout_skb; /* NLMSG_GOODSIZE is small to avoid high order allocations being @@ -2808,15 +2074,12 @@ static int netlink_dump(struct sock *sk) if (alloc_min_size < nlk->max_recvmsg_len) { alloc_size = nlk->max_recvmsg_len; - skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, - GFP_KERNEL | - __GFP_NOWARN | - __GFP_NORETRY); + skb = alloc_skb(alloc_size, GFP_KERNEL | + __GFP_NOWARN | __GFP_NORETRY); } if (!skb) { alloc_size = alloc_min_size; - skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, - GFP_KERNEL); + skb = alloc_skb(alloc_size, GFP_KERNEL); } if (!skb) goto errout_skb; @@ -2831,8 +2094,7 @@ static int netlink_dump(struct sock *sk) * reasonable static buffer based on the expected largest dump of a * single netdev. The outcome is MSG_TRUNC error. */ - if (!netlink_rx_is_mmaped(sk)) - skb_reserve(skb, skb_tailroom(skb) - alloc_size); + skb_reserve(skb, skb_tailroom(skb) - alloc_size); netlink_skb_set_owner_r(skb, sk); len = cb->dump(skb, cb); @@ -2884,16 +2146,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, struct netlink_sock *nlk; int ret; - /* Memory mapped dump requests need to be copied to avoid looping - * on the pending state in netlink_mmap_sendmsg() while the CB hold - * a reference to the skb. - */ - if (netlink_skb_is_mmaped(skb)) { - skb = skb_copy(skb, GFP_KERNEL); - if (skb == NULL) - return -ENOBUFS; - } else - atomic_inc(&skb->users); + atomic_inc(&skb->users); sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); if (sk == NULL) { @@ -2966,8 +2219,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) if (!(nlk->flags & NETLINK_F_CAP_ACK) && err) payload += nlmsg_len(nlh); - skb = netlink_alloc_skb(in_skb->sk, nlmsg_total_size(payload), - NETLINK_CB(in_skb).portid, GFP_KERNEL); + skb = nlmsg_new(payload, GFP_KERNEL); if (!skb) { struct sock *sk; @@ -3241,7 +2493,7 @@ static const struct proto_ops netlink_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = netlink_getname, - .poll = netlink_poll, + .poll = datagram_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -3249,7 +2501,7 @@ static const struct proto_ops netlink_ops = { .getsockopt = netlink_getsockopt, .sendmsg = netlink_sendmsg, .recvmsg = netlink_recvmsg, - .mmap = netlink_mmap, + .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 14437d9b1965..e68ef9ccd703 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -44,12 +44,6 @@ struct netlink_sock { int (*netlink_bind)(struct net *net, int group); void (*netlink_unbind)(struct net *net, int group); struct module *module; -#ifdef CONFIG_NETLINK_MMAP - struct mutex pg_vec_lock; - struct netlink_ring rx_ring; - struct netlink_ring tx_ring; - atomic_t mapped; -#endif /* CONFIG_NETLINK_MMAP */ struct rhash_head node; struct rcu_head rcu; @@ -60,15 +54,6 @@ static inline struct netlink_sock *nlk_sk(struct sock *sk) return container_of(sk, struct netlink_sock, sk); } -static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb) -{ -#ifdef CONFIG_NETLINK_MMAP - return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; -#else - return false; -#endif /* CONFIG_NETLINK_MMAP */ -} - struct netlink_table { struct rhashtable hash; struct hlist_head mc_list; diff --git a/net/netlink/diag.c b/net/netlink/diag.c index 3ee63a3cff30..8dd836a8dd60 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -8,41 +8,6 @@ #include "af_netlink.h" -#ifdef CONFIG_NETLINK_MMAP -static int sk_diag_put_ring(struct netlink_ring *ring, int nl_type, - struct sk_buff *nlskb) -{ - struct netlink_diag_ring ndr; - - ndr.ndr_block_size = ring->pg_vec_pages << PAGE_SHIFT; - ndr.ndr_block_nr = ring->pg_vec_len; - ndr.ndr_frame_size = ring->frame_size; - ndr.ndr_frame_nr = ring->frame_max + 1; - - return nla_put(nlskb, nl_type, sizeof(ndr), &ndr); -} - -static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb) -{ - struct netlink_sock *nlk = nlk_sk(sk); - int ret; - - mutex_lock(&nlk->pg_vec_lock); - ret = sk_diag_put_ring(&nlk->rx_ring, NETLINK_DIAG_RX_RING, nlskb); - if (!ret) - ret = sk_diag_put_ring(&nlk->tx_ring, NETLINK_DIAG_TX_RING, - nlskb); - mutex_unlock(&nlk->pg_vec_lock); - - return ret; -} -#else -static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb) -{ - return 0; -} -#endif - static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb) { struct netlink_sock *nlk = nlk_sk(sk); @@ -87,10 +52,6 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO)) goto out_nlmsg_trim; - if ((req->ndiag_show & NDIAG_SHOW_RING_CFG) && - sk_diag_put_rings_cfg(sk, skb)) - goto out_nlmsg_trim; - nlmsg_end(skb, nlh); return 0; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index f830326b3b1d..a09132a69869 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -463,26 +463,6 @@ int genl_unregister_family(struct genl_family *family) EXPORT_SYMBOL(genl_unregister_family); /** - * genlmsg_new_unicast - Allocate generic netlink message for unicast - * @payload: size of the message payload - * @info: information on destination - * @flags: the type of memory to allocate - * - * Allocates a new sk_buff large enough to cover the specified payload - * plus required Netlink headers. Will check receiving socket for - * memory mapped i/o capability and use it if enabled. Will fall back - * to non-mapped skb if message size exceeds the frame size of the ring. - */ -struct sk_buff *genlmsg_new_unicast(size_t payload, struct genl_info *info, - gfp_t flags) -{ - size_t len = nlmsg_total_size(genlmsg_total_size(payload)); - - return netlink_alloc_skb(info->dst_sk, len, info->snd_portid, flags); -} -EXPORT_SYMBOL_GPL(genlmsg_new_unicast); - -/** * genlmsg_put - Add generic netlink header to netlink message * @skb: socket buffer holding the message * @portid: netlink portid the message is addressed to @@ -580,6 +560,10 @@ static int genl_family_rcv_msg(struct genl_family *family, !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; + if ((ops->flags & GENL_UNS_ADMIN_PERM) && + !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { int rc; @@ -638,7 +622,6 @@ static int genl_family_rcv_msg(struct genl_family *family, info.genlhdr = nlmsg_data(nlh); info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; info.attrs = attrbuf; - info.dst_sk = skb->sk; genl_info_net_set(&info, net); memset(&info.user_ptr, 0, sizeof(info.user_ptr)); diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index d143aa9f6654..cd5fd9d728a7 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -10,6 +10,7 @@ config OPENVSWITCH select LIBCRC32C select MPLS select NET_MPLS_GSO + select DST_CACHE ---help--- Open vSwitch is a multilayer Ethernet switch targeted at virtualized environments. In addition to supporting a variety of features diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 2d59df521915..e9dd47b2a85b 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -158,9 +158,7 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, new_mpls_lse = (__be32 *)skb_mpls_header(skb); *new_mpls_lse = mpls->mpls_lse; - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse, - MPLS_HLEN, 0)); + skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN); hdr = eth_hdr(skb); hdr->h_proto = mpls->mpls_ethertype; @@ -280,7 +278,7 @@ static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key, ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst, mask->eth_dst); - ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); + skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source); ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest); @@ -639,7 +637,7 @@ static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *sk /* Reconstruct the MAC header. */ skb_push(skb, data->l2_len); memcpy(skb->data, &data->l2_data, data->l2_len); - ovs_skb_postpush_rcsum(skb, skb->data, data->l2_len); + skb_postpush_rcsum(skb, skb->data, data->l2_len); skb_reset_mac_header(skb); ovs_vport_send(vport, skb); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index deadfdab1bc3..c4e8455d5d56 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -422,10 +422,6 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, struct sk_buff *nskb = NULL; struct sk_buff *user_skb = NULL; /* to be queued to userspace */ struct nlattr *nla; - struct genl_info info = { - .dst_sk = ovs_dp_get_net(dp)->genl_sock, - .snd_portid = upcall_info->portid, - }; size_t len; unsigned int hlen; int err, dp_ifindex; @@ -466,7 +462,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, hlen = skb->len; len = upcall_msg_size(upcall_info, hlen); - user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC); + user_skb = genlmsg_new(len, GFP_ATOMIC); if (!user_skb) { err = -ENOMEM; goto out; @@ -654,7 +650,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { static const struct genl_ops dp_packet_genl_ops[] = { { .cmd = OVS_PACKET_CMD_EXECUTE, - .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = packet_policy, .doit = ovs_packet_cmd_execute } @@ -876,7 +872,7 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act return NULL; len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags); - skb = genlmsg_new_unicast(len, info, GFP_KERNEL); + skb = genlmsg_new(len, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); @@ -1391,12 +1387,12 @@ static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { static const struct genl_ops dp_flow_genl_ops[] = { { .cmd = OVS_FLOW_CMD_NEW, - .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = flow_policy, .doit = ovs_flow_cmd_new }, { .cmd = OVS_FLOW_CMD_DEL, - .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = flow_policy, .doit = ovs_flow_cmd_del }, @@ -1407,7 +1403,7 @@ static const struct genl_ops dp_flow_genl_ops[] = { .dumpit = ovs_flow_cmd_dump }, { .cmd = OVS_FLOW_CMD_SET, - .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = flow_policy, .doit = ovs_flow_cmd_set, }, @@ -1481,9 +1477,9 @@ error: return -EMSGSIZE; } -static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info) +static struct sk_buff *ovs_dp_cmd_alloc_info(void) { - return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL); + return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL); } /* Called with rcu_read_lock or ovs_mutex. */ @@ -1536,7 +1532,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) goto err; - reply = ovs_dp_cmd_alloc_info(info); + reply = ovs_dp_cmd_alloc_info(); if (!reply) return -ENOMEM; @@ -1657,7 +1653,7 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; int err; - reply = ovs_dp_cmd_alloc_info(info); + reply = ovs_dp_cmd_alloc_info(); if (!reply) return -ENOMEM; @@ -1690,7 +1686,7 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; int err; - reply = ovs_dp_cmd_alloc_info(info); + reply = ovs_dp_cmd_alloc_info(); if (!reply) return -ENOMEM; @@ -1723,7 +1719,7 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; int err; - reply = ovs_dp_cmd_alloc_info(info); + reply = ovs_dp_cmd_alloc_info(); if (!reply) return -ENOMEM; @@ -1777,12 +1773,12 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { static const struct genl_ops dp_datapath_genl_ops[] = { { .cmd = OVS_DP_CMD_NEW, - .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = datapath_policy, .doit = ovs_dp_cmd_new }, { .cmd = OVS_DP_CMD_DEL, - .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = datapath_policy, .doit = ovs_dp_cmd_del }, @@ -1793,7 +1789,7 @@ static const struct genl_ops dp_datapath_genl_ops[] = { .dumpit = ovs_dp_cmd_dump }, { .cmd = OVS_DP_CMD_SET, - .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = datapath_policy, .doit = ovs_dp_cmd_set, }, @@ -2158,12 +2154,12 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { static const struct genl_ops dp_vport_genl_ops[] = { { .cmd = OVS_VPORT_CMD_NEW, - .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = vport_policy, .doit = ovs_vport_cmd_new }, { .cmd = OVS_VPORT_CMD_DEL, - .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = vport_policy, .doit = ovs_vport_cmd_del }, @@ -2174,7 +2170,7 @@ static const struct genl_ops dp_vport_genl_ops[] = { .dumpit = ovs_vport_cmd_dump }, { .cmd = OVS_VPORT_CMD_SET, - .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = vport_policy, .doit = ovs_vport_cmd_set, }, diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index d1bd4a45ca2d..58b8efc23668 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -1959,6 +1959,12 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, if (!tun_dst) return -ENOMEM; + err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL); + if (err) { + dst_release((struct dst_entry *)tun_dst); + return err; + } + a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, sizeof(*ovs_tun), log); if (IS_ERR(a)) { diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 6a6adf314363..4e3972344aa6 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -58,7 +58,7 @@ static void netdev_port_receive(struct sk_buff *skb) return; skb_push(skb, ETH_HLEN); - ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); + skb_postpush_rcsum(skb, skb->data, ETH_HLEN); ovs_vport_receive(vport, skb, skb_tunnel_info(skb)); return; error: diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index c10899cb9040..f01f28a567ad 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -185,13 +185,6 @@ static inline struct vport *vport_from_priv(void *priv) int ovs_vport_receive(struct vport *, struct sk_buff *, const struct ip_tunnel_info *); -static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, - const void *start, unsigned int len) -{ - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_add(skb->csum, csum_partial(start, len, 0)); -} - static inline const char *ovs_vport_name(struct vport *vport) { return vport->dev->name; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 992396aa635c..b7e7851ddc5d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1960,6 +1960,64 @@ static unsigned int run_filter(struct sk_buff *skb, return res; } +static int __packet_rcv_vnet(const struct sk_buff *skb, + struct virtio_net_hdr *vnet_hdr) +{ + *vnet_hdr = (const struct virtio_net_hdr) { 0 }; + + if (skb_is_gso(skb)) { + struct skb_shared_info *sinfo = skb_shinfo(skb); + + /* This is a hint as to how much should be linear. */ + vnet_hdr->hdr_len = + __cpu_to_virtio16(vio_le(), skb_headlen(skb)); + vnet_hdr->gso_size = + __cpu_to_virtio16(vio_le(), sinfo->gso_size); + + if (sinfo->gso_type & SKB_GSO_TCPV4) + vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; + else if (sinfo->gso_type & SKB_GSO_TCPV6) + vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; + else if (sinfo->gso_type & SKB_GSO_UDP) + vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; + else if (sinfo->gso_type & SKB_GSO_FCOE) + return -EINVAL; + else + BUG(); + + if (sinfo->gso_type & SKB_GSO_TCP_ECN) + vnet_hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; + } else + vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + vnet_hdr->csum_start = __cpu_to_virtio16(vio_le(), + skb_checksum_start_offset(skb)); + vnet_hdr->csum_offset = __cpu_to_virtio16(vio_le(), + skb->csum_offset); + } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + vnet_hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID; + } /* else everything is zero */ + + return 0; +} + +static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb, + size_t *len) +{ + struct virtio_net_hdr vnet_hdr; + + if (*len < sizeof(vnet_hdr)) + return -EINVAL; + *len -= sizeof(vnet_hdr); + + if (__packet_rcv_vnet(skb, &vnet_hdr)) + return -EINVAL; + + return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr)); +} + /* * This function makes lazy skb cloning in hope that most of packets * are discarded by BPF. @@ -2148,7 +2206,9 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, unsigned int maclen = skb_network_offset(skb); netoff = TPACKET_ALIGN(po->tp_hdrlen + (maclen < 16 ? 16 : maclen)) + - po->tp_reserve; + po->tp_reserve; + if (po->has_vnet_hdr) + netoff += sizeof(struct virtio_net_hdr); macoff = netoff - maclen; } if (po->tp_version <= TPACKET_V2) { @@ -2185,7 +2245,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, h.raw = packet_current_rx_frame(po, skb, TP_STATUS_KERNEL, (macoff+snaplen)); if (!h.raw) - goto ring_is_full; + goto drop_n_account; if (po->tp_version <= TPACKET_V2) { packet_increment_rx_head(po, &po->rx_ring); /* @@ -2204,6 +2264,14 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, } spin_unlock(&sk->sk_receive_queue.lock); + if (po->has_vnet_hdr) { + if (__packet_rcv_vnet(skb, h.raw + macoff - + sizeof(struct virtio_net_hdr))) { + spin_lock(&sk->sk_receive_queue.lock); + goto drop_n_account; + } + } + skb_copy_bits(skb, 0, h.raw + macoff, snaplen); if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) @@ -2299,7 +2367,7 @@ drop: kfree_skb(skb); return 0; -ring_is_full: +drop_n_account: po->stats.stats1.tp_drops++; spin_unlock(&sk->sk_receive_queue.lock); @@ -2347,15 +2415,92 @@ static void tpacket_set_protocol(const struct net_device *dev, } } +static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len) +{ + unsigned short gso_type = 0; + + if ((vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && + (__virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) + + __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2 > + __virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len))) + vnet_hdr->hdr_len = __cpu_to_virtio16(vio_le(), + __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) + + __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2); + + if (__virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len) > len) + return -EINVAL; + + if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { + switch (vnet_hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_TCPV4: + gso_type = SKB_GSO_TCPV4; + break; + case VIRTIO_NET_HDR_GSO_TCPV6: + gso_type = SKB_GSO_TCPV6; + break; + case VIRTIO_NET_HDR_GSO_UDP: + gso_type = SKB_GSO_UDP; + break; + default: + return -EINVAL; + } + + if (vnet_hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) + gso_type |= SKB_GSO_TCP_ECN; + + if (vnet_hdr->gso_size == 0) + return -EINVAL; + } + + vnet_hdr->gso_type = gso_type; /* changes type, temporary storage */ + return 0; +} + +static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len, + struct virtio_net_hdr *vnet_hdr) +{ + int n; + + if (*len < sizeof(*vnet_hdr)) + return -EINVAL; + *len -= sizeof(*vnet_hdr); + + n = copy_from_iter(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter); + if (n != sizeof(*vnet_hdr)) + return -EFAULT; + + return __packet_snd_vnet_parse(vnet_hdr, *len); +} + +static int packet_snd_vnet_gso(struct sk_buff *skb, + struct virtio_net_hdr *vnet_hdr) +{ + if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { + u16 s = __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start); + u16 o = __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset); + + if (!skb_partial_csum_set(skb, s, o)) + return -EINVAL; + } + + skb_shinfo(skb)->gso_size = + __virtio16_to_cpu(vio_le(), vnet_hdr->gso_size); + skb_shinfo(skb)->gso_type = vnet_hdr->gso_type; + + /* Header must be checked, and gso_segs computed. */ + skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; + skb_shinfo(skb)->gso_segs = 0; + return 0; +} + static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, - void *frame, struct net_device *dev, int size_max, - __be16 proto, unsigned char *addr, int hlen) + void *frame, struct net_device *dev, void *data, int tp_len, + __be16 proto, unsigned char *addr, int hlen, int copylen) { union tpacket_uhdr ph; - int to_write, offset, len, tp_len, nr_frags, len_max; + int to_write, offset, len, nr_frags, len_max; struct socket *sock = po->sk.sk_socket; struct page *page; - void *data; int err; ph.raw = frame; @@ -2367,51 +2512,9 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, sock_tx_timestamp(&po->sk, &skb_shinfo(skb)->tx_flags); skb_shinfo(skb)->destructor_arg = ph.raw; - switch (po->tp_version) { - case TPACKET_V2: - tp_len = ph.h2->tp_len; - break; - default: - tp_len = ph.h1->tp_len; - break; - } - if (unlikely(tp_len > size_max)) { - pr_err("packet size is too long (%d > %d)\n", tp_len, size_max); - return -EMSGSIZE; - } - skb_reserve(skb, hlen); skb_reset_network_header(skb); - if (unlikely(po->tp_tx_has_off)) { - int off_min, off_max, off; - off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); - off_max = po->tx_ring.frame_size - tp_len; - if (sock->type == SOCK_DGRAM) { - switch (po->tp_version) { - case TPACKET_V2: - off = ph.h2->tp_net; - break; - default: - off = ph.h1->tp_net; - break; - } - } else { - switch (po->tp_version) { - case TPACKET_V2: - off = ph.h2->tp_mac; - break; - default: - off = ph.h1->tp_mac; - break; - } - } - if (unlikely((off < off_min) || (off_max < off))) - return -EINVAL; - data = ph.raw + off; - } else { - data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll); - } to_write = tp_len; if (sock->type == SOCK_DGRAM) { @@ -2419,20 +2522,17 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, NULL, tp_len); if (unlikely(err < 0)) return -EINVAL; - } else if (dev->hard_header_len) { - if (ll_header_truncated(dev, tp_len)) - return -EINVAL; - + } else if (copylen) { skb_push(skb, dev->hard_header_len); - err = skb_store_bits(skb, 0, data, - dev->hard_header_len); + skb_put(skb, copylen - dev->hard_header_len); + err = skb_store_bits(skb, 0, data, copylen); if (unlikely(err)) return err; if (!skb->protocol) tpacket_set_protocol(dev, skb); - data += dev->hard_header_len; - to_write -= dev->hard_header_len; + data += copylen; + to_write -= copylen; } offset = offset_in_page(data); @@ -2469,10 +2569,66 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, return tp_len; } +static int tpacket_parse_header(struct packet_sock *po, void *frame, + int size_max, void **data) +{ + union tpacket_uhdr ph; + int tp_len, off; + + ph.raw = frame; + + switch (po->tp_version) { + case TPACKET_V2: + tp_len = ph.h2->tp_len; + break; + default: + tp_len = ph.h1->tp_len; + break; + } + if (unlikely(tp_len > size_max)) { + pr_err("packet size is too long (%d > %d)\n", tp_len, size_max); + return -EMSGSIZE; + } + + if (unlikely(po->tp_tx_has_off)) { + int off_min, off_max; + + off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); + off_max = po->tx_ring.frame_size - tp_len; + if (po->sk.sk_type == SOCK_DGRAM) { + switch (po->tp_version) { + case TPACKET_V2: + off = ph.h2->tp_net; + break; + default: + off = ph.h1->tp_net; + break; + } + } else { + switch (po->tp_version) { + case TPACKET_V2: + off = ph.h2->tp_mac; + break; + default: + off = ph.h1->tp_mac; + break; + } + } + if (unlikely((off < off_min) || (off_max < off))) + return -EINVAL; + } else { + off = po->tp_hdrlen - sizeof(struct sockaddr_ll); + } + + *data = frame + off; + return tp_len; +} + static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { struct sk_buff *skb; struct net_device *dev; + struct virtio_net_hdr *vnet_hdr = NULL; __be16 proto; int err, reserve = 0; void *ph; @@ -2480,9 +2636,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) bool need_wait = !(msg->msg_flags & MSG_DONTWAIT); int tp_len, size_max; unsigned char *addr; + void *data; int len_sum = 0; int status = TP_STATUS_AVAILABLE; - int hlen, tlen; + int hlen, tlen, copylen = 0; mutex_lock(&po->pg_vec_lock); @@ -2515,7 +2672,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) size_max = po->tx_ring.frame_size - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); - if (size_max > dev->mtu + reserve + VLAN_HLEN) + if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr) size_max = dev->mtu + reserve + VLAN_HLEN; do { @@ -2527,11 +2684,36 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) continue; } + skb = NULL; + tp_len = tpacket_parse_header(po, ph, size_max, &data); + if (tp_len < 0) + goto tpacket_error; + status = TP_STATUS_SEND_REQUEST; hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; + if (po->has_vnet_hdr) { + vnet_hdr = data; + data += sizeof(*vnet_hdr); + tp_len -= sizeof(*vnet_hdr); + if (tp_len < 0 || + __packet_snd_vnet_parse(vnet_hdr, tp_len)) { + tp_len = -EINVAL; + goto tpacket_error; + } + copylen = __virtio16_to_cpu(vio_le(), + vnet_hdr->hdr_len); + } + if (dev->hard_header_len) { + if (ll_header_truncated(dev, tp_len)) { + tp_len = -EINVAL; + goto tpacket_error; + } + copylen = max_t(int, copylen, dev->hard_header_len); + } skb = sock_alloc_send_skb(&po->sk, - hlen + tlen + sizeof(struct sockaddr_ll), + hlen + tlen + sizeof(struct sockaddr_ll) + + (copylen - dev->hard_header_len), !need_wait, &err); if (unlikely(skb == NULL)) { @@ -2540,14 +2722,16 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) err = len_sum; goto out_status; } - tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, - addr, hlen); + tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto, + addr, hlen, copylen); if (likely(tp_len >= 0) && tp_len > dev->mtu + reserve && + !po->has_vnet_hdr && !packet_extra_vlan_len_allowed(dev, skb)) tp_len = -EMSGSIZE; if (unlikely(tp_len < 0)) { +tpacket_error: if (po->tp_loss) { __packet_set_status(po, ph, TP_STATUS_AVAILABLE); @@ -2561,6 +2745,11 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) } } + if (po->has_vnet_hdr && packet_snd_vnet_gso(skb, vnet_hdr)) { + tp_len = -EINVAL; + goto tpacket_error; + } + packet_pick_tx_queue(dev, skb); skb->destructor = tpacket_destruct_skb; @@ -2643,12 +2832,9 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) struct sockcm_cookie sockc; struct virtio_net_hdr vnet_hdr = { 0 }; int offset = 0; - int vnet_hdr_len; struct packet_sock *po = pkt_sk(sk); - unsigned short gso_type = 0; int hlen, tlen; int extra_len = 0; - ssize_t n; /* * Get and verify the address. @@ -2686,53 +2872,9 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (sock->type == SOCK_RAW) reserve = dev->hard_header_len; if (po->has_vnet_hdr) { - vnet_hdr_len = sizeof(vnet_hdr); - - err = -EINVAL; - if (len < vnet_hdr_len) - goto out_unlock; - - len -= vnet_hdr_len; - - err = -EFAULT; - n = copy_from_iter(&vnet_hdr, vnet_hdr_len, &msg->msg_iter); - if (n != vnet_hdr_len) - goto out_unlock; - - if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && - (__virtio16_to_cpu(vio_le(), vnet_hdr.csum_start) + - __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset) + 2 > - __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len))) - vnet_hdr.hdr_len = __cpu_to_virtio16(vio_le(), - __virtio16_to_cpu(vio_le(), vnet_hdr.csum_start) + - __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset) + 2); - - err = -EINVAL; - if (__virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len) > len) + err = packet_snd_vnet_parse(msg, &len, &vnet_hdr); + if (err) goto out_unlock; - - if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) { - switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { - case VIRTIO_NET_HDR_GSO_TCPV4: - gso_type = SKB_GSO_TCPV4; - break; - case VIRTIO_NET_HDR_GSO_TCPV6: - gso_type = SKB_GSO_TCPV6; - break; - case VIRTIO_NET_HDR_GSO_UDP: - gso_type = SKB_GSO_UDP; - break; - default: - goto out_unlock; - } - - if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN) - gso_type |= SKB_GSO_TCP_ECN; - - if (vnet_hdr.gso_size == 0) - goto out_unlock; - - } } if (unlikely(sock_flag(sk, SOCK_NOFCS))) { @@ -2744,7 +2886,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) } err = -EMSGSIZE; - if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN + extra_len)) + if (!vnet_hdr.gso_type && + (len > dev->mtu + reserve + VLAN_HLEN + extra_len)) goto out_unlock; err = -ENOBUFS; @@ -2775,7 +2918,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); - if (!gso_type && (len > dev->mtu + reserve + extra_len) && + if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) && !packet_extra_vlan_len_allowed(dev, skb)) { err = -EMSGSIZE; goto out_free; @@ -2789,24 +2932,10 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) packet_pick_tx_queue(dev, skb); if (po->has_vnet_hdr) { - if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { - u16 s = __virtio16_to_cpu(vio_le(), vnet_hdr.csum_start); - u16 o = __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset); - if (!skb_partial_csum_set(skb, s, o)) { - err = -EINVAL; - goto out_free; - } - } - - skb_shinfo(skb)->gso_size = - __virtio16_to_cpu(vio_le(), vnet_hdr.gso_size); - skb_shinfo(skb)->gso_type = gso_type; - - /* Header must be checked, and gso_segs computed. */ - skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; - skb_shinfo(skb)->gso_segs = 0; - - len += vnet_hdr_len; + err = packet_snd_vnet_gso(skb, &vnet_hdr); + if (err) + goto out_free; + len += sizeof(vnet_hdr); } skb_probe_transport_header(skb, reserve); @@ -3177,51 +3306,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, packet_rcv_has_room(pkt_sk(sk), NULL); if (pkt_sk(sk)->has_vnet_hdr) { - struct virtio_net_hdr vnet_hdr = { 0 }; - - err = -EINVAL; - vnet_hdr_len = sizeof(vnet_hdr); - if (len < vnet_hdr_len) - goto out_free; - - len -= vnet_hdr_len; - - if (skb_is_gso(skb)) { - struct skb_shared_info *sinfo = skb_shinfo(skb); - - /* This is a hint as to how much should be linear. */ - vnet_hdr.hdr_len = - __cpu_to_virtio16(vio_le(), skb_headlen(skb)); - vnet_hdr.gso_size = - __cpu_to_virtio16(vio_le(), sinfo->gso_size); - if (sinfo->gso_type & SKB_GSO_TCPV4) - vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4; - else if (sinfo->gso_type & SKB_GSO_TCPV6) - vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6; - else if (sinfo->gso_type & SKB_GSO_UDP) - vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP; - else if (sinfo->gso_type & SKB_GSO_FCOE) - goto out_free; - else - BUG(); - if (sinfo->gso_type & SKB_GSO_TCP_ECN) - vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN; - } else - vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE; - - if (skb->ip_summed == CHECKSUM_PARTIAL) { - vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - vnet_hdr.csum_start = __cpu_to_virtio16(vio_le(), - skb_checksum_start_offset(skb)); - vnet_hdr.csum_offset = __cpu_to_virtio16(vio_le(), - skb->csum_offset); - } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { - vnet_hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID; - } /* else everything is zero */ - - err = memcpy_to_msg(msg, (void *)&vnet_hdr, vnet_hdr_len); - if (err < 0) + err = packet_rcv_vnet(msg, skb, &len); + if (err) goto out_free; + vnet_hdr_len = sizeof(struct virtio_net_hdr); } /* You lose any data beyond the buffer you gave. If it worries @@ -3552,8 +3640,6 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv } if (optlen < len) return -EINVAL; - if (pkt_sk(sk)->has_vnet_hdr) - return -EINVAL; if (copy_from_user(&req_u.req, optval, len)) return -EFAULT; return packet_set_ring(sk, &req_u, 0, diff --git a/net/phonet/socket.c b/net/phonet/socket.c index d575ef4e9aa6..ffd5f2297584 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -140,13 +140,15 @@ void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb) rcu_read_unlock(); } -void pn_sock_hash(struct sock *sk) +int pn_sock_hash(struct sock *sk) { struct hlist_head *hlist = pn_hash_list(pn_sk(sk)->sobject); mutex_lock(&pnsocks.lock); sk_add_node_rcu(sk, hlist); mutex_unlock(&pnsocks.lock); + + return 0; } EXPORT_SYMBOL(pn_sock_hash); @@ -200,7 +202,7 @@ static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len) pn->resource = spn->spn_resource; /* Enable RX on the socket */ - sk->sk_prot->hash(sk); + err = sk->sk_prot->hash(sk); out_port: mutex_unlock(&port_mutex); out: diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 9d6ddbacd875..ad60299b088b 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -37,7 +37,6 @@ #include <net/tcp.h> #include <net/net_namespace.h> #include <net/netns/generic.h> -#include <net/tcp.h> #include "rds.h" #include "tcp.h" diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index b07c535ba8e7..eeb3eb3ea9eb 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -105,9 +105,7 @@ static void *tcf_csum_skb_nextlayer(struct sk_buff *skb, int hl = ihl + jhl; if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) || - (skb_cloned(skb) && - !skb_clone_writable(skb, hl + ntkoff) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + skb_try_make_writable(skb, hl + ntkoff)) return NULL; else return (void *)(skb_network_header(skb) + ihl); @@ -365,9 +363,7 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags) } if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) { - if (skb_cloned(skb) && - !skb_clone_writable(skb, sizeof(*iph) + ntkoff) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_try_make_writable(skb, sizeof(*iph) + ntkoff)) goto fail; ip_send_check(ip_hdr(skb)); diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index b7c4ead8b5a8..27607b863aba 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -126,9 +126,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, addr = iph->daddr; if (!((old_addr ^ addr) & mask)) { - if (skb_cloned(skb) && - !skb_clone_writable(skb, sizeof(*iph) + noff) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_try_make_writable(skb, sizeof(*iph) + noff)) goto drop; new_addr &= mask; @@ -156,9 +154,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, struct tcphdr *tcph; if (!pskb_may_pull(skb, ihl + sizeof(*tcph) + noff) || - (skb_cloned(skb) && - !skb_clone_writable(skb, ihl + sizeof(*tcph) + noff) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + skb_try_make_writable(skb, ihl + sizeof(*tcph) + noff)) goto drop; tcph = (void *)(skb_network_header(skb) + ihl); @@ -171,9 +167,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, struct udphdr *udph; if (!pskb_may_pull(skb, ihl + sizeof(*udph) + noff) || - (skb_cloned(skb) && - !skb_clone_writable(skb, ihl + sizeof(*udph) + noff) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + skb_try_make_writable(skb, ihl + sizeof(*udph) + noff)) goto drop; udph = (void *)(skb_network_header(skb) + ihl); @@ -213,10 +207,8 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, if ((old_addr ^ addr) & mask) break; - if (skb_cloned(skb) && - !skb_clone_writable(skb, ihl + sizeof(*icmph) + - sizeof(*iph) + noff) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_try_make_writable(skb, ihl + sizeof(*icmph) + + sizeof(*iph) + noff)) goto drop; icmph = (void *)(skb_network_header(skb) + ihl); diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 4fbb67430ce4..d54bc942ea87 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -43,6 +43,7 @@ #include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> +#include <linux/netdevice.h> struct tc_u_knode { struct tc_u_knode __rcu *next; @@ -424,6 +425,93 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) return 0; } +static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) +{ + struct net_device *dev = tp->q->dev_queue->dev; + struct tc_cls_u32_offload u32_offload = {0}; + struct tc_to_netdev offload; + + offload.type = TC_SETUP_CLSU32; + offload.cls_u32 = &u32_offload; + + if (dev->netdev_ops->ndo_setup_tc) { + offload.cls_u32->command = TC_CLSU32_DELETE_KNODE; + offload.cls_u32->knode.handle = handle; + dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); + } +} + +static void u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) +{ + struct net_device *dev = tp->q->dev_queue->dev; + struct tc_cls_u32_offload u32_offload = {0}; + struct tc_to_netdev offload; + + offload.type = TC_SETUP_CLSU32; + offload.cls_u32 = &u32_offload; + + if (dev->netdev_ops->ndo_setup_tc) { + offload.cls_u32->command = TC_CLSU32_NEW_HNODE; + offload.cls_u32->hnode.divisor = h->divisor; + offload.cls_u32->hnode.handle = h->handle; + offload.cls_u32->hnode.prio = h->prio; + + dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); + } +} + +static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) +{ + struct net_device *dev = tp->q->dev_queue->dev; + struct tc_cls_u32_offload u32_offload = {0}; + struct tc_to_netdev offload; + + offload.type = TC_SETUP_CLSU32; + offload.cls_u32 = &u32_offload; + + if (dev->netdev_ops->ndo_setup_tc) { + offload.cls_u32->command = TC_CLSU32_DELETE_HNODE; + offload.cls_u32->hnode.divisor = h->divisor; + offload.cls_u32->hnode.handle = h->handle; + offload.cls_u32->hnode.prio = h->prio; + + dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); + } +} + +static void u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n) +{ + struct net_device *dev = tp->q->dev_queue->dev; + struct tc_cls_u32_offload u32_offload = {0}; + struct tc_to_netdev offload; + + offload.type = TC_SETUP_CLSU32; + offload.cls_u32 = &u32_offload; + + if (dev->netdev_ops->ndo_setup_tc) { + offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE; + offload.cls_u32->knode.handle = n->handle; + offload.cls_u32->knode.fshift = n->fshift; +#ifdef CONFIG_CLS_U32_MARK + offload.cls_u32->knode.val = n->val; + offload.cls_u32->knode.mask = n->mask; +#else + offload.cls_u32->knode.val = 0; + offload.cls_u32->knode.mask = 0; +#endif + offload.cls_u32->knode.sel = &n->sel; + offload.cls_u32->knode.exts = &n->exts; + if (n->ht_down) + offload.cls_u32->knode.link_handle = n->ht_down->handle; + + dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); + } +} + static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) { struct tc_u_knode *n; @@ -434,6 +522,7 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) RCU_INIT_POINTER(ht->ht[h], rtnl_dereference(n->next)); tcf_unbind_filter(tp, &n->res); + u32_remove_hw_knode(tp, n->handle); call_rcu(&n->rcu, u32_delete_key_freepf_rcu); } } @@ -454,6 +543,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) phn; hn = &phn->next, phn = rtnl_dereference(*hn)) { if (phn == ht) { + u32_clear_hw_hnode(tp, ht); RCU_INIT_POINTER(*hn, ht->next); kfree_rcu(ht, rcu); return 0; @@ -540,8 +630,10 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg) if (ht == NULL) return 0; - if (TC_U32_KEY(ht->handle)) + if (TC_U32_KEY(ht->handle)) { + u32_remove_hw_knode(tp, ht->handle); return u32_delete_key(tp, (struct tc_u_knode *)ht); + } if (root_ht == ht) return -EINVAL; @@ -769,6 +861,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, u32_replace_knode(tp, tp_c, new); tcf_unbind_filter(tp, &n->res); call_rcu(&n->rcu, u32_delete_key_rcu); + u32_replace_hw_knode(tp, new); return 0; } @@ -795,6 +888,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, RCU_INIT_POINTER(ht->next, tp_c->hlist); rcu_assign_pointer(tp_c->hlist, ht); *arg = (unsigned long)ht; + + u32_replace_hw_hnode(tp, ht); return 0; } @@ -877,7 +972,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, RCU_INIT_POINTER(n->next, pins); rcu_assign_pointer(*ins, n); - + u32_replace_hw_knode(tp, n); *arg = (unsigned long)n; return 0; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index af1acf009866..de1e176e35cc 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1841,7 +1841,7 @@ reclassify: return err; } - return -1; + return TC_ACT_UNSPEC; /* signal: continue lookup */ #ifdef CONFIG_NET_CLS_ACT reset: if (unlikely(limit++ >= MAX_REC_LOOP)) { diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index ad70ecf57ce7..f9947d1f4952 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -28,6 +28,7 @@ static void mqprio_destroy(struct Qdisc *sch) { struct net_device *dev = qdisc_dev(sch); struct mqprio_sched *priv = qdisc_priv(sch); + struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO}; unsigned int ntx; if (priv->qdiscs) { @@ -39,7 +40,7 @@ static void mqprio_destroy(struct Qdisc *sch) } if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc) - dev->netdev_ops->ndo_setup_tc(dev, 0); + dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc); else netdev_set_num_tc(dev, 0); } @@ -140,8 +141,11 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) * supplied and verified mapping */ if (qopt->hw) { + struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO, + .tc = qopt->num_tc}; + priv->hw_owned = 1; - err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc); + err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc); if (err) goto err; } else { diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index a3380917f197..3aa43073e0b9 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -70,19 +70,6 @@ static struct sctp_datamsg *sctp_datamsg_new(gfp_t gfp) return msg; } -void sctp_datamsg_free(struct sctp_datamsg *msg) -{ - struct sctp_chunk *chunk; - - /* This doesn't have to be a _safe vairant because - * sctp_chunk_free() only drops the refs. - */ - list_for_each_entry(chunk, &msg->chunks, frag_list) - sctp_chunk_free(chunk); - - sctp_datamsg_put(msg); -} - /* Final destructruction of datamsg memory. */ static void sctp_datamsg_destroy(struct sctp_datamsg *msg) { diff --git a/net/sctp/input.c b/net/sctp/input.c index 49d2cc751386..21a2d6b7abaf 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -937,7 +937,6 @@ static struct sctp_association *__sctp_lookup_association( struct sctp_transport *t; struct sctp_association *asoc = NULL; - rcu_read_lock(); t = sctp_addrs_lookup_transport(net, local, peer); if (!t || !sctp_transport_hold(t)) goto out; @@ -949,7 +948,6 @@ static struct sctp_association *__sctp_lookup_association( sctp_transport_put(t); out: - rcu_read_unlock(); return asoc; } @@ -962,7 +960,9 @@ struct sctp_association *sctp_lookup_association(struct net *net, { struct sctp_association *asoc; + rcu_read_lock(); asoc = __sctp_lookup_association(net, laddr, paddr, transportp); + rcu_read_unlock(); return asoc; } diff --git a/net/sctp/proc.c b/net/sctp/proc.c index ded7d931a6a5..cfc3c7101a38 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -161,7 +161,6 @@ static void sctp_seq_dump_remote_addrs(struct seq_file *seq, struct sctp_associa struct sctp_af *af; primary = &assoc->peer.primary_addr; - rcu_read_lock(); list_for_each_entry_rcu(transport, &assoc->peer.transport_addr_list, transports) { addr = &transport->ipaddr; @@ -172,7 +171,6 @@ static void sctp_seq_dump_remote_addrs(struct seq_file *seq, struct sctp_associa } af->seq_dump_addr(seq, addr); } - rcu_read_unlock(); } static void *sctp_eps_seq_start(struct seq_file *seq, loff_t *pos) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index e878da0949db..b89501e5c1a1 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6106,9 +6106,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, return retval; } -static void sctp_hash(struct sock *sk) +static int sctp_hash(struct sock *sk) { /* STUB */ + return 0; } static void sctp_unhash(struct sock *sk) diff --git a/net/tipc/link.c b/net/tipc/link.c index 347cdc99ed09..e31d92f80572 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -123,7 +123,6 @@ struct tipc_stats { struct tipc_link { u32 addr; char name[TIPC_MAX_LINK_NAME]; - struct tipc_media_addr *media_addr; struct net *net; /* Management and link supervision data */ @@ -904,8 +903,10 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, if (unlikely(l->backlog[i].len >= l->backlog[i].limit)) return link_schedule_user(l, list); } - if (unlikely(msg_size(hdr) > mtu)) + if (unlikely(msg_size(hdr) > mtu)) { + skb_queue_purge(list); return -EMSGSIZE; + } /* Prepare each packet for sending, and add to relevant queue: */ while (skb_queue_len(list)) { @@ -917,8 +918,10 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, if (likely(skb_queue_len(transmq) < maxwin)) { _skb = skb_clone(skb, GFP_ATOMIC); - if (!_skb) + if (!_skb) { + skb_queue_purge(list); return -ENOBUFS; + } __skb_dequeue(list); __skb_queue_tail(transmq, skb); __skb_queue_tail(xmitq, _skb); @@ -1261,26 +1264,6 @@ drop: return rc; } -/* - * Send protocol message to the other endpoint. - */ -static void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, - int probe_msg, u32 gap, u32 tolerance, - u32 priority) -{ - struct sk_buff *skb = NULL; - struct sk_buff_head xmitq; - - __skb_queue_head_init(&xmitq); - tipc_link_build_proto_msg(l, msg_typ, probe_msg, gap, - tolerance, priority, &xmitq); - skb = __skb_dequeue(&xmitq); - if (!skb) - return; - tipc_bearer_xmit_skb(l->net, l->bearer_id, skb, l->media_addr); - l->rcv_unacked = 0; -} - static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, u16 rcvgap, int tolerance, int priority, struct sk_buff_head *xmitq) @@ -1479,6 +1462,12 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) l->tolerance = peers_tol; + if (peers_prio && in_range(peers_prio, TIPC_MIN_LINK_PRI, + TIPC_MAX_LINK_PRI)) { + l->priority = peers_prio; + rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); + } + l->silent_intv_cnt = 0; l->stats.recv_states++; if (msg_probe(hdr)) @@ -2023,16 +2012,18 @@ msg_full: return -EMSGSIZE; } -void tipc_link_set_tolerance(struct tipc_link *l, u32 tol) +void tipc_link_set_tolerance(struct tipc_link *l, u32 tol, + struct sk_buff_head *xmitq) { l->tolerance = tol; - tipc_link_proto_xmit(l, STATE_MSG, 0, 0, tol, 0); + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, tol, 0, xmitq); } -void tipc_link_set_prio(struct tipc_link *l, u32 prio) +void tipc_link_set_prio(struct tipc_link *l, u32 prio, + struct sk_buff_head *xmitq) { l->priority = prio; - tipc_link_proto_xmit(l, STATE_MSG, 0, 0, 0, prio); + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, prio, xmitq); } void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit) diff --git a/net/tipc/link.h b/net/tipc/link.h index b2ae0f4276af..b4ee9d6e181d 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -112,8 +112,10 @@ char tipc_link_plane(struct tipc_link *l); int tipc_link_prio(struct tipc_link *l); int tipc_link_window(struct tipc_link *l); unsigned long tipc_link_tolerance(struct tipc_link *l); -void tipc_link_set_tolerance(struct tipc_link *l, u32 tol); -void tipc_link_set_prio(struct tipc_link *l, u32 prio); +void tipc_link_set_tolerance(struct tipc_link *l, u32 tol, + struct sk_buff_head *xmitq); +void tipc_link_set_prio(struct tipc_link *l, u32 prio, + struct sk_buff_head *xmitq); void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit); void tipc_link_set_queue_limits(struct tipc_link *l, u32 window); int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 91fce70291a8..777b979b8463 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -418,6 +418,9 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, struct tipc_subscription *s) { struct sub_seq *sseq = nseq->sseqs; + struct tipc_name_seq ns; + + tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns); list_add(&s->nameseq_list, &nseq->subscriptions); @@ -425,7 +428,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, return; while (sseq != &nseq->sseqs[nseq->first_free]) { - if (tipc_subscrp_check_overlap(s, sseq->lower, sseq->upper)) { + if (tipc_subscrp_check_overlap(&ns, sseq->lower, sseq->upper)) { struct publication *crs; struct name_info *info = sseq->info; int must_report = 1; @@ -722,9 +725,10 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref, void tipc_nametbl_subscribe(struct tipc_subscription *s) { struct tipc_net *tn = net_generic(s->net, tipc_net_id); - u32 type = s->seq.type; + u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap); int index = hash(type); struct name_seq *seq; + struct tipc_name_seq ns; spin_lock_bh(&tn->nametbl_lock); seq = nametbl_find_seq(s->net, type); @@ -735,8 +739,9 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s) tipc_nameseq_subscribe(seq, s); spin_unlock_bh(&seq->lock); } else { + tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns); pr_warn("Failed to create subscription for {%u,%u,%u}\n", - s->seq.type, s->seq.lower, s->seq.upper); + ns.type, ns.lower, ns.upper); } spin_unlock_bh(&tn->nametbl_lock); } @@ -748,9 +753,10 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s) { struct tipc_net *tn = net_generic(s->net, tipc_net_id); struct name_seq *seq; + u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap); spin_lock_bh(&tn->nametbl_lock); - seq = nametbl_find_seq(s->net, s->seq.type); + seq = nametbl_find_seq(s->net, type); if (seq != NULL) { spin_lock_bh(&seq->lock); list_del_init(&s->nameseq_list); diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 2c016fdefe97..de66d8f945ed 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -1104,7 +1104,6 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info) req_nlh = (struct nlmsghdr *)skb->data; msg.req = nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN; msg.cmd = req_userhdr->cmd; - msg.dst_sk = info->dst_sk; msg.net = genl_info_net(info); if ((msg.cmd & 0xC000) && (!netlink_net_capable(skb, CAP_NET_ADMIN))) { diff --git a/net/tipc/node.c b/net/tipc/node.c index 9d7a16fc5ca4..9fcc2fb0ee00 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1166,7 +1166,7 @@ msg_full: * @dnode: address of destination node * @selector: a number used for deterministic link selection * Consumes the buffer chain, except when returning -ELINKCONG - * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE + * Returns 0 if success, otherwise: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE,-ENOBUF */ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, int selector) @@ -1174,33 +1174,43 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, struct tipc_link_entry *le = NULL; struct tipc_node *n; struct sk_buff_head xmitq; - int bearer_id = -1; - int rc = -EHOSTUNREACH; + int bearer_id; + int rc; + + if (in_own_node(net, dnode)) { + tipc_sk_rcv(net, list); + return 0; + } - __skb_queue_head_init(&xmitq); n = tipc_node_find(net, dnode); - if (likely(n)) { - tipc_node_read_lock(n); - bearer_id = n->active_links[selector & 1]; - if (bearer_id >= 0) { - le = &n->links[bearer_id]; - spin_lock_bh(&le->lock); - rc = tipc_link_xmit(le->link, list, &xmitq); - spin_unlock_bh(&le->lock); - } + if (unlikely(!n)) { + skb_queue_purge(list); + return -EHOSTUNREACH; + } + + tipc_node_read_lock(n); + bearer_id = n->active_links[selector & 1]; + if (unlikely(bearer_id == INVALID_BEARER_ID)) { tipc_node_read_unlock(n); - if (likely(!rc)) - tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr); - else if (rc == -ENOBUFS) - tipc_node_link_down(n, bearer_id, false); tipc_node_put(n); - return rc; + skb_queue_purge(list); + return -EHOSTUNREACH; } - if (likely(in_own_node(net, dnode))) { - tipc_sk_rcv(net, list); - return 0; - } + __skb_queue_head_init(&xmitq); + le = &n->links[bearer_id]; + spin_lock_bh(&le->lock); + rc = tipc_link_xmit(le->link, list, &xmitq); + spin_unlock_bh(&le->lock); + tipc_node_read_unlock(n); + + if (likely(rc == 0)) + tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr); + else if (rc == -ENOBUFS) + tipc_node_link_down(n, bearer_id, false); + + tipc_node_put(n); + return rc; } @@ -1637,9 +1647,12 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info) char *name; struct tipc_link *link; struct tipc_node *node; + struct sk_buff_head xmitq; struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; struct net *net = sock_net(skb->sk); + __skb_queue_head_init(&xmitq); + if (!info->attrs[TIPC_NLA_LINK]) return -EINVAL; @@ -1683,13 +1696,13 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info) u32 tol; tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); - tipc_link_set_tolerance(link, tol); + tipc_link_set_tolerance(link, tol, &xmitq); } if (props[TIPC_NLA_PROP_PRIO]) { u32 prio; prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); - tipc_link_set_prio(link, prio); + tipc_link_set_prio(link, prio, &xmitq); } if (props[TIPC_NLA_PROP_WIN]) { u32 win; @@ -1701,7 +1714,7 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info) out: tipc_node_read_unlock(node); - + tipc_bearer_xmit(net, bearer_id, &xmitq, &node->links[bearer_id].maddr); return res; } diff --git a/net/tipc/server.c b/net/tipc/server.c index 922e04a43396..2446bfbaa309 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -571,13 +571,13 @@ static void tipc_work_stop(struct tipc_server *s) static int tipc_work_start(struct tipc_server *s) { - s->rcv_wq = alloc_workqueue("tipc_rcv", WQ_UNBOUND, 1); + s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0); if (!s->rcv_wq) { pr_err("can't start tipc receive workqueue\n"); return -ENOMEM; } - s->send_wq = alloc_workqueue("tipc_send", WQ_UNBOUND, 1); + s->send_wq = alloc_ordered_workqueue("tipc_send", 0); if (!s->send_wq) { pr_err("can't start tipc send workqueue\n"); destroy_workqueue(s->rcv_wq); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 69ee2eeef968..22963cafd5ed 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -92,25 +92,42 @@ static void tipc_subscrp_send_event(struct tipc_subscription *sub, * * Returns 1 if there is overlap, otherwise 0. */ -int tipc_subscrp_check_overlap(struct tipc_subscription *sub, u32 found_lower, +int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower, u32 found_upper) { - if (found_lower < sub->seq.lower) - found_lower = sub->seq.lower; - if (found_upper > sub->seq.upper) - found_upper = sub->seq.upper; + if (found_lower < seq->lower) + found_lower = seq->lower; + if (found_upper > seq->upper) + found_upper = seq->upper; if (found_lower > found_upper) return 0; return 1; } +u32 tipc_subscrp_convert_seq_type(u32 type, int swap) +{ + return htohl(type, swap); +} + +void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap, + struct tipc_name_seq *out) +{ + out->type = htohl(in->type, swap); + out->lower = htohl(in->lower, swap); + out->upper = htohl(in->upper, swap); +} + void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper, u32 event, u32 port_ref, u32 node, int must) { - if (!tipc_subscrp_check_overlap(sub, found_lower, found_upper)) + struct tipc_name_seq seq; + + tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq); + if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper)) return; - if (!must && !(sub->filter & TIPC_SUB_PORTS)) + if (!must && + !(htohl(sub->evt.s.filter, sub->swap) & TIPC_SUB_PORTS)) return; tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref, @@ -171,12 +188,14 @@ static struct tipc_subscriber *tipc_subscrb_create(int conid) static void tipc_subscrb_delete(struct tipc_subscriber *subscriber) { struct tipc_subscription *sub, *temp; + u32 timeout; spin_lock_bh(&subscriber->lock); /* Destroy any existing subscriptions for subscriber */ list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list, subscrp_list) { - if (del_timer(&sub->timer)) { + timeout = htohl(sub->evt.s.timeout, sub->swap); + if ((timeout == TIPC_WAIT_FOREVER) || del_timer(&sub->timer)) { tipc_subscrp_delete(sub); tipc_subscrb_put(subscriber); } @@ -200,13 +219,16 @@ static void tipc_subscrp_cancel(struct tipc_subscr *s, struct tipc_subscriber *subscriber) { struct tipc_subscription *sub, *temp; + u32 timeout; spin_lock_bh(&subscriber->lock); /* Find first matching subscription, exit if not found */ list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list, subscrp_list) { if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) { - if (del_timer(&sub->timer)) { + timeout = htohl(sub->evt.s.timeout, sub->swap); + if ((timeout == TIPC_WAIT_FOREVER) || + del_timer(&sub->timer)) { tipc_subscrp_delete(sub); tipc_subscrb_put(subscriber); } @@ -216,66 +238,67 @@ static void tipc_subscrp_cancel(struct tipc_subscr *s, spin_unlock_bh(&subscriber->lock); } -static int tipc_subscrp_create(struct net *net, struct tipc_subscr *s, - struct tipc_subscriber *subscriber, - struct tipc_subscription **sub_p) +static struct tipc_subscription *tipc_subscrp_create(struct net *net, + struct tipc_subscr *s, + int swap) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_subscription *sub; - int swap; - - /* Determine subscriber's endianness */ - swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE)); - - /* Detect & process a subscription cancellation request */ - if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) { - s->filter &= ~htohl(TIPC_SUB_CANCEL, swap); - tipc_subscrp_cancel(s, subscriber); - return 0; - } + u32 filter = htohl(s->filter, swap); /* Refuse subscription if global limit exceeded */ if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { pr_warn("Subscription rejected, limit reached (%u)\n", TIPC_MAX_SUBSCRIPTIONS); - return -EINVAL; + return NULL; } /* Allocate subscription object */ sub = kmalloc(sizeof(*sub), GFP_ATOMIC); if (!sub) { pr_warn("Subscription rejected, no memory\n"); - return -ENOMEM; + return NULL; } /* Initialize subscription object */ sub->net = net; - sub->seq.type = htohl(s->seq.type, swap); - sub->seq.lower = htohl(s->seq.lower, swap); - sub->seq.upper = htohl(s->seq.upper, swap); - sub->timeout = msecs_to_jiffies(htohl(s->timeout, swap)); - sub->filter = htohl(s->filter, swap); - if ((!(sub->filter & TIPC_SUB_PORTS) == - !(sub->filter & TIPC_SUB_SERVICE)) || - (sub->seq.lower > sub->seq.upper)) { + if (((filter & TIPC_SUB_PORTS) && (filter & TIPC_SUB_SERVICE)) || + (htohl(s->seq.lower, swap) > htohl(s->seq.upper, swap))) { pr_warn("Subscription rejected, illegal request\n"); kfree(sub); - return -EINVAL; + return NULL; } - spin_lock_bh(&subscriber->lock); - list_add(&sub->subscrp_list, &subscriber->subscrp_list); - spin_unlock_bh(&subscriber->lock); - sub->subscriber = subscriber; + sub->swap = swap; memcpy(&sub->evt.s, s, sizeof(*s)); atomic_inc(&tn->subscription_count); + return sub; +} + +static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s, + struct tipc_subscriber *subscriber, int swap) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_subscription *sub = NULL; + u32 timeout; + + sub = tipc_subscrp_create(net, s, swap); + if (!sub) + return tipc_conn_terminate(tn->topsrv, subscriber->conid); + + spin_lock_bh(&subscriber->lock); + list_add(&sub->subscrp_list, &subscriber->subscrp_list); + tipc_subscrb_get(subscriber); + sub->subscriber = subscriber; + tipc_nametbl_subscribe(sub); + spin_unlock_bh(&subscriber->lock); + + timeout = htohl(sub->evt.s.timeout, swap); + if (timeout == TIPC_WAIT_FOREVER) + return; + setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub); - if (sub->timeout != TIPC_WAIT_FOREVER) - sub->timeout += jiffies; - if (!mod_timer(&sub->timer, sub->timeout)) - tipc_subscrb_get(subscriber); - *sub_p = sub; - return 0; + mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout)); } /* Handle one termination request for the subscriber */ @@ -289,14 +312,21 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid, struct sockaddr_tipc *addr, void *usr_data, void *buf, size_t len) { - struct tipc_subscriber *subscrb = usr_data; - struct tipc_subscription *sub = NULL; - struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_subscriber *subscriber = usr_data; + struct tipc_subscr *s = (struct tipc_subscr *)buf; + int swap; + + /* Determine subscriber's endianness */ + swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE | + TIPC_SUB_CANCEL)); - if (tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscrb, &sub)) - return tipc_conn_terminate(tn->topsrv, subscrb->conid); + /* Detect & process a subscription cancellation request */ + if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) { + s->filter &= ~htohl(TIPC_SUB_CANCEL, swap); + return tipc_subscrp_cancel(s, subscriber); + } - tipc_nametbl_subscribe(sub); + tipc_subscrp_subscribe(net, s, subscriber, swap); } /* Handle one request to establish a new subscriber */ diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index 92ee18cc5fe6..be60103082c9 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -50,21 +50,15 @@ struct tipc_subscriber; * @subscriber: pointer to its subscriber * @seq: name sequence associated with subscription * @net: point to network namespace - * @timeout: duration of subscription (in ms) - * @filter: event filtering to be done for subscription * @timer: timer governing subscription duration (optional) * @nameseq_list: adjacent subscriptions in name sequence's subscription list * @subscrp_list: adjacent subscriptions in subscriber's subscription list - * @server_ref: object reference of server port associated with subscription * @swap: indicates if subscriber uses opposite endianness in its messages * @evt: template for events generated by subscription */ struct tipc_subscription { struct tipc_subscriber *subscriber; - struct tipc_name_seq seq; struct net *net; - unsigned long timeout; - u32 filter; struct timer_list timer; struct list_head nameseq_list; struct list_head subscrp_list; @@ -72,11 +66,14 @@ struct tipc_subscription { struct tipc_event evt; }; -int tipc_subscrp_check_overlap(struct tipc_subscription *sub, u32 found_lower, +int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower, u32 found_upper); void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper, u32 event, u32 port_ref, u32 node, int must); +void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap, + struct tipc_name_seq *out); +u32 tipc_subscrp_convert_seq_type(u32 type, int swap); int tipc_topsrv_start(struct net *net); void tipc_topsrv_stop(struct net *net); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f75f847e688d..8269da73e9e5 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1534,7 +1534,6 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) { int i; unsigned char max_level = 0; - int unix_sock_count = 0; if (too_many_unix_fds(current)) return -ETOOMANYREFS; @@ -1542,11 +1541,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) for (i = scm->fp->count - 1; i >= 0; i--) { struct sock *sk = unix_get_socket(scm->fp->fp[i]); - if (sk) { - unix_sock_count++; + if (sk) max_level = max(max_level, unix_sk(sk)->recursion_level); - } } if (unlikely(max_level > MAX_RECURSION_LEVEL)) return -ETOOMANYREFS; diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index edd638b5825f..c4f8ae0c8afe 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -16,6 +16,7 @@ hostprogs-y += tracex5 hostprogs-y += tracex6 hostprogs-y += trace_output hostprogs-y += lathist +hostprogs-y += offwaketime test_verifier-objs := test_verifier.o libbpf.o test_maps-objs := test_maps.o libbpf.o @@ -32,6 +33,7 @@ tracex5-objs := bpf_load.o libbpf.o tracex5_user.o tracex6-objs := bpf_load.o libbpf.o tracex6_user.o trace_output-objs := bpf_load.o libbpf.o trace_output_user.o lathist-objs := bpf_load.o libbpf.o lathist_user.o +offwaketime-objs := bpf_load.o libbpf.o offwaketime_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -47,6 +49,7 @@ always += tracex6_kern.o always += trace_output_kern.o always += tcbpf1_kern.o always += lathist_kern.o +always += offwaketime_kern.o HOSTCFLAGS += -I$(objtree)/usr/include @@ -63,6 +66,7 @@ HOSTLOADLIBES_tracex5 += -lelf HOSTLOADLIBES_tracex6 += -lelf HOSTLOADLIBES_trace_output += -lelf -lrt HOSTLOADLIBES_lathist += -lelf +HOSTLOADLIBES_offwaketime += -lelf # point this to your LLVM backend with bpf support LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index 7ad19e1dbaf4..811bcca0f29d 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -39,6 +39,8 @@ static int (*bpf_redirect)(int ifindex, int flags) = (void *) BPF_FUNC_redirect; static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data, int size) = (void *) BPF_FUNC_perf_event_output; +static int (*bpf_get_stackid)(void *ctx, void *map, int flags) = + (void *) BPF_FUNC_get_stackid; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime_kern.c new file mode 100644 index 000000000000..c0aa5a9b9c48 --- /dev/null +++ b/samples/bpf/offwaketime_kern.c @@ -0,0 +1,131 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <uapi/linux/bpf.h> +#include "bpf_helpers.h" +#include <uapi/linux/ptrace.h> +#include <uapi/linux/perf_event.h> +#include <linux/version.h> +#include <linux/sched.h> + +#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) + +#define MINBLOCK_US 1 + +struct key_t { + char waker[TASK_COMM_LEN]; + char target[TASK_COMM_LEN]; + u32 wret; + u32 tret; +}; + +struct bpf_map_def SEC("maps") counts = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct key_t), + .value_size = sizeof(u64), + .max_entries = 10000, +}; + +struct bpf_map_def SEC("maps") start = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(u32), + .value_size = sizeof(u64), + .max_entries = 10000, +}; + +struct wokeby_t { + char name[TASK_COMM_LEN]; + u32 ret; +}; + +struct bpf_map_def SEC("maps") wokeby = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(u32), + .value_size = sizeof(struct wokeby_t), + .max_entries = 10000, +}; + +struct bpf_map_def SEC("maps") stackmap = { + .type = BPF_MAP_TYPE_STACK_TRACE, + .key_size = sizeof(u32), + .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64), + .max_entries = 10000, +}; + +#define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP) + +SEC("kprobe/try_to_wake_up") +int waker(struct pt_regs *ctx) +{ + struct task_struct *p = (void *) PT_REGS_PARM1(ctx); + struct wokeby_t woke = {}; + u32 pid; + + pid = _(p->pid); + + bpf_get_current_comm(&woke.name, sizeof(woke.name)); + woke.ret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS); + + bpf_map_update_elem(&wokeby, &pid, &woke, BPF_ANY); + return 0; +} + +static inline int update_counts(struct pt_regs *ctx, u32 pid, u64 delta) +{ + struct key_t key = {}; + struct wokeby_t *woke; + u64 zero = 0, *val; + + bpf_get_current_comm(&key.target, sizeof(key.target)); + key.tret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS); + + woke = bpf_map_lookup_elem(&wokeby, &pid); + if (woke) { + key.wret = woke->ret; + __builtin_memcpy(&key.waker, woke->name, TASK_COMM_LEN); + bpf_map_delete_elem(&wokeby, &pid); + } + + val = bpf_map_lookup_elem(&counts, &key); + if (!val) { + bpf_map_update_elem(&counts, &key, &zero, BPF_NOEXIST); + val = bpf_map_lookup_elem(&counts, &key); + if (!val) + return 0; + } + (*val) += delta; + return 0; +} + +SEC("kprobe/finish_task_switch") +int oncpu(struct pt_regs *ctx) +{ + struct task_struct *p = (void *) PT_REGS_PARM1(ctx); + u64 delta, ts, *tsp; + u32 pid; + + /* record previous thread sleep time */ + pid = _(p->pid); + ts = bpf_ktime_get_ns(); + bpf_map_update_elem(&start, &pid, &ts, BPF_ANY); + + /* calculate current thread's delta time */ + pid = bpf_get_current_pid_tgid(); + tsp = bpf_map_lookup_elem(&start, &pid); + if (!tsp) + /* missed start or filtered */ + return 0; + + delta = bpf_ktime_get_ns() - *tsp; + bpf_map_delete_elem(&start, &pid); + delta = delta / 1000; + if (delta < MINBLOCK_US) + return 0; + + return update_counts(ctx, pid, delta); +} +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c new file mode 100644 index 000000000000..17cf3024e22c --- /dev/null +++ b/samples/bpf/offwaketime_user.c @@ -0,0 +1,185 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <signal.h> +#include <linux/bpf.h> +#include <string.h> +#include <linux/perf_event.h> +#include <errno.h> +#include <assert.h> +#include <stdbool.h> +#include <sys/resource.h> +#include "libbpf.h" +#include "bpf_load.h" + +#define MAX_SYMS 300000 +#define PRINT_RAW_ADDR 0 + +static struct ksym { + long addr; + char *name; +} syms[MAX_SYMS]; +static int sym_cnt; + +static int ksym_cmp(const void *p1, const void *p2) +{ + return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr; +} + +static int load_kallsyms(void) +{ + FILE *f = fopen("/proc/kallsyms", "r"); + char func[256], buf[256]; + char symbol; + void *addr; + int i = 0; + + if (!f) + return -ENOENT; + + while (!feof(f)) { + if (!fgets(buf, sizeof(buf), f)) + break; + if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3) + break; + if (!addr) + continue; + syms[i].addr = (long) addr; + syms[i].name = strdup(func); + i++; + } + sym_cnt = i; + qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp); + return 0; +} + +static void *search(long key) +{ + int start = 0, end = sym_cnt; + int result; + + while (start < end) { + size_t mid = start + (end - start) / 2; + + result = key - syms[mid].addr; + if (result < 0) + end = mid; + else if (result > 0) + start = mid + 1; + else + return &syms[mid]; + } + + if (start >= 1 && syms[start - 1].addr < key && + key < syms[start].addr) + /* valid ksym */ + return &syms[start - 1]; + + /* out of range. return _stext */ + return &syms[0]; +} + +static void print_ksym(__u64 addr) +{ + struct ksym *sym; + + if (!addr) + return; + sym = search(addr); + if (PRINT_RAW_ADDR) + printf("%s/%llx;", sym->name, addr); + else + printf("%s;", sym->name); +} + +#define TASK_COMM_LEN 16 + +struct key_t { + char waker[TASK_COMM_LEN]; + char target[TASK_COMM_LEN]; + __u32 wret; + __u32 tret; +}; + +static void print_stack(struct key_t *key, __u64 count) +{ + __u64 ip[PERF_MAX_STACK_DEPTH] = {}; + static bool warned; + int i; + + printf("%s;", key->target); + if (bpf_lookup_elem(map_fd[3], &key->tret, ip) != 0) { + printf("---;"); + } else { + for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) + print_ksym(ip[i]); + } + printf("-;"); + if (bpf_lookup_elem(map_fd[3], &key->wret, ip) != 0) { + printf("---;"); + } else { + for (i = 0; i < PERF_MAX_STACK_DEPTH; i++) + print_ksym(ip[i]); + } + printf(";%s %lld\n", key->waker, count); + + if ((key->tret == -EEXIST || key->wret == -EEXIST) && !warned) { + printf("stackmap collisions seen. Consider increasing size\n"); + warned = true; + } else if (((int)(key->tret) < 0 || (int)(key->wret) < 0)) { + printf("err stackid %d %d\n", key->tret, key->wret); + } +} + +static void print_stacks(int fd) +{ + struct key_t key = {}, next_key; + __u64 value; + + while (bpf_get_next_key(fd, &key, &next_key) == 0) { + bpf_lookup_elem(fd, &next_key, &value); + print_stack(&next_key, value); + key = next_key; + } +} + +static void int_exit(int sig) +{ + print_stacks(map_fd[0]); + exit(0); +} + +int main(int argc, char **argv) +{ + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + char filename[256]; + int delay = 1; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + setrlimit(RLIMIT_MEMLOCK, &r); + + signal(SIGINT, int_exit); + + if (load_kallsyms()) { + printf("failed to process /proc/kallsyms\n"); + return 2; + } + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + if (argc > 1) + delay = atoi(argv[1]); + sleep(delay); + print_stacks(map_fd[0]); + + return 0; +} diff --git a/samples/bpf/test_maps.c b/samples/bpf/test_maps.c index 6299ee95cd11..ad466ed33093 100644 --- a/samples/bpf/test_maps.c +++ b/samples/bpf/test_maps.c @@ -89,6 +89,100 @@ static void test_hashmap_sanity(int i, void *data) close(map_fd); } +/* sanity tests for percpu map API */ +static void test_percpu_hashmap_sanity(int task, void *data) +{ + long long key, next_key; + int expected_key_mask = 0; + unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + long long value[nr_cpus]; + int map_fd, i; + + map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key), + sizeof(value[0]), 2); + if (map_fd < 0) { + printf("failed to create hashmap '%s'\n", strerror(errno)); + exit(1); + } + + for (i = 0; i < nr_cpus; i++) + value[i] = i + 100; + key = 1; + /* insert key=1 element */ + assert(!(expected_key_mask & key)); + assert(bpf_update_elem(map_fd, &key, value, BPF_ANY) == 0); + expected_key_mask |= key; + + /* BPF_NOEXIST means: add new element if it doesn't exist */ + assert(bpf_update_elem(map_fd, &key, value, BPF_NOEXIST) == -1 && + /* key=1 already exists */ + errno == EEXIST); + + /* -1 is an invalid flag */ + assert(bpf_update_elem(map_fd, &key, value, -1) == -1 && + errno == EINVAL); + + /* check that key=1 can be found. value could be 0 if the lookup + * was run from a different cpu. + */ + value[0] = 1; + assert(bpf_lookup_elem(map_fd, &key, value) == 0 && value[0] == 100); + + key = 2; + /* check that key=2 is not found */ + assert(bpf_lookup_elem(map_fd, &key, value) == -1 && errno == ENOENT); + + /* BPF_EXIST means: update existing element */ + assert(bpf_update_elem(map_fd, &key, value, BPF_EXIST) == -1 && + /* key=2 is not there */ + errno == ENOENT); + + /* insert key=2 element */ + assert(!(expected_key_mask & key)); + assert(bpf_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0); + expected_key_mask |= key; + + /* key=1 and key=2 were inserted, check that key=0 cannot be inserted + * due to max_entries limit + */ + key = 0; + assert(bpf_update_elem(map_fd, &key, value, BPF_NOEXIST) == -1 && + errno == E2BIG); + + /* check that key = 0 doesn't exist */ + assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT); + + /* iterate over two elements */ + while (!bpf_get_next_key(map_fd, &key, &next_key)) { + assert((expected_key_mask & next_key) == next_key); + expected_key_mask &= ~next_key; + + assert(bpf_lookup_elem(map_fd, &next_key, value) == 0); + for (i = 0; i < nr_cpus; i++) + assert(value[i] == i + 100); + + key = next_key; + } + assert(errno == ENOENT); + + /* Update with BPF_EXIST */ + key = 1; + assert(bpf_update_elem(map_fd, &key, value, BPF_EXIST) == 0); + + /* delete both elements */ + key = 1; + assert(bpf_delete_elem(map_fd, &key) == 0); + key = 2; + assert(bpf_delete_elem(map_fd, &key) == 0); + assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT); + + key = 0; + /* check that map is empty */ + assert(bpf_get_next_key(map_fd, &key, &next_key) == -1 && + errno == ENOENT); + close(map_fd); +} + static void test_arraymap_sanity(int i, void *data) { int key, next_key, map_fd; @@ -142,6 +236,94 @@ static void test_arraymap_sanity(int i, void *data) close(map_fd); } +static void test_percpu_arraymap_many_keys(void) +{ + unsigned nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + unsigned nr_keys = 20000; + long values[nr_cpus]; + int key, map_fd, i; + + map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key), + sizeof(values[0]), nr_keys); + if (map_fd < 0) { + printf("failed to create per-cpu arraymap '%s'\n", + strerror(errno)); + exit(1); + } + + for (i = 0; i < nr_cpus; i++) + values[i] = i + 10; + + for (key = 0; key < nr_keys; key++) + assert(bpf_update_elem(map_fd, &key, values, BPF_ANY) == 0); + + for (key = 0; key < nr_keys; key++) { + for (i = 0; i < nr_cpus; i++) + values[i] = 0; + assert(bpf_lookup_elem(map_fd, &key, values) == 0); + for (i = 0; i < nr_cpus; i++) + assert(values[i] == i + 10); + } + + close(map_fd); +} + +static void test_percpu_arraymap_sanity(int i, void *data) +{ + unsigned nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + long values[nr_cpus]; + int key, next_key, map_fd; + + map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key), + sizeof(values[0]), 2); + if (map_fd < 0) { + printf("failed to create arraymap '%s'\n", strerror(errno)); + exit(1); + } + + for (i = 0; i < nr_cpus; i++) + values[i] = i + 100; + + key = 1; + /* insert key=1 element */ + assert(bpf_update_elem(map_fd, &key, values, BPF_ANY) == 0); + + values[0] = 0; + assert(bpf_update_elem(map_fd, &key, values, BPF_NOEXIST) == -1 && + errno == EEXIST); + + /* check that key=1 can be found */ + assert(bpf_lookup_elem(map_fd, &key, values) == 0 && values[0] == 100); + + key = 0; + /* check that key=0 is also found and zero initialized */ + assert(bpf_lookup_elem(map_fd, &key, values) == 0 && + values[0] == 0 && values[nr_cpus - 1] == 0); + + + /* check that key=2 cannot be inserted due to max_entries limit */ + key = 2; + assert(bpf_update_elem(map_fd, &key, values, BPF_EXIST) == -1 && + errno == E2BIG); + + /* check that key = 2 doesn't exist */ + assert(bpf_lookup_elem(map_fd, &key, values) == -1 && errno == ENOENT); + + /* iterate over two elements */ + assert(bpf_get_next_key(map_fd, &key, &next_key) == 0 && + next_key == 0); + assert(bpf_get_next_key(map_fd, &next_key, &next_key) == 0 && + next_key == 1); + assert(bpf_get_next_key(map_fd, &next_key, &next_key) == -1 && + errno == ENOENT); + + /* delete shouldn't succeed */ + key = 1; + assert(bpf_delete_elem(map_fd, &key) == -1 && errno == EINVAL); + + close(map_fd); +} + #define MAP_SIZE (32 * 1024) static void test_map_large(void) { @@ -209,7 +391,9 @@ static void run_parallel(int tasks, void (*fn)(int i, void *data), void *data) static void test_map_stress(void) { run_parallel(100, test_hashmap_sanity, NULL); + run_parallel(100, test_percpu_hashmap_sanity, NULL); run_parallel(100, test_arraymap_sanity, NULL); + run_parallel(100, test_percpu_arraymap_sanity, NULL); } #define TASKS 1024 @@ -282,7 +466,11 @@ static void test_map_parallel(void) int main(void) { test_hashmap_sanity(0, NULL); + test_percpu_hashmap_sanity(0, NULL); test_arraymap_sanity(0, NULL); + test_percpu_arraymap_sanity(0, NULL); + test_percpu_arraymap_many_keys(); + test_map_large(); test_map_parallel(); test_map_stress(); diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c index b32367cfbff4..09c1adc27d42 100644 --- a/samples/bpf/tracex2_kern.c +++ b/samples/bpf/tracex2_kern.c @@ -70,7 +70,7 @@ struct hist_key { }; struct bpf_map_def SEC("maps") my_hist_map = { - .type = BPF_MAP_TYPE_HASH, + .type = BPF_MAP_TYPE_PERCPU_HASH, .key_size = sizeof(struct hist_key), .value_size = sizeof(long), .max_entries = 1024, diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c index cd0241c1447a..ab5b19e68acf 100644 --- a/samples/bpf/tracex2_user.c +++ b/samples/bpf/tracex2_user.c @@ -37,6 +37,8 @@ struct hist_key { static void print_hist_for_pid(int fd, void *task) { struct hist_key key = {}, next_key; + unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + long values[nr_cpus]; char starstr[MAX_STARS]; long value; long data[MAX_INDEX] = {}; @@ -49,7 +51,10 @@ static void print_hist_for_pid(int fd, void *task) key = next_key; continue; } - bpf_lookup_elem(fd, &next_key, &value); + bpf_lookup_elem(fd, &next_key, values); + value = 0; + for (i = 0; i < nr_cpus; i++) + value += values[i]; ind = next_key.index; data[ind] = value; if (value && ind > max_ind) diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3_kern.c index bf337fbb0947..9974c3d7c18b 100644 --- a/samples/bpf/tracex3_kern.c +++ b/samples/bpf/tracex3_kern.c @@ -20,7 +20,7 @@ struct bpf_map_def SEC("maps") my_map = { /* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe * example will no longer be meaningful */ -SEC("kprobe/blk_mq_start_request") +SEC("kprobe/blk_start_request") int bpf_prog1(struct pt_regs *ctx) { long rq = PT_REGS_PARM1(ctx); @@ -42,13 +42,13 @@ static unsigned int log2l(unsigned long long n) #define SLOTS 100 struct bpf_map_def SEC("maps") lat_map = { - .type = BPF_MAP_TYPE_ARRAY, + .type = BPF_MAP_TYPE_PERCPU_ARRAY, .key_size = sizeof(u32), .value_size = sizeof(u64), .max_entries = SLOTS, }; -SEC("kprobe/blk_update_request") +SEC("kprobe/blk_account_io_completion") int bpf_prog2(struct pt_regs *ctx) { long rq = PT_REGS_PARM1(ctx); @@ -81,7 +81,7 @@ int bpf_prog2(struct pt_regs *ctx) value = bpf_map_lookup_elem(&lat_map, &index); if (value) - __sync_fetch_and_add((long *)value, 1); + *value += 1; return 0; } diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c index 0aaa933ab938..48716f7f0d8b 100644 --- a/samples/bpf/tracex3_user.c +++ b/samples/bpf/tracex3_user.c @@ -20,11 +20,13 @@ static void clear_stats(int fd) { + unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + __u64 values[nr_cpus]; __u32 key; - __u64 value = 0; + memset(values, 0, sizeof(values)); for (key = 0; key < SLOTS; key++) - bpf_update_elem(fd, &key, &value, BPF_ANY); + bpf_update_elem(fd, &key, values, BPF_ANY); } const char *color[] = { @@ -75,15 +77,20 @@ static void print_banner(void) static void print_hist(int fd) { - __u32 key; - __u64 value; - __u64 cnt[SLOTS]; - __u64 max_cnt = 0; + unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); __u64 total_events = 0; + long values[nr_cpus]; + __u64 max_cnt = 0; + __u64 cnt[SLOTS]; + __u64 value; + __u32 key; + int i; for (key = 0; key < SLOTS; key++) { + bpf_lookup_elem(fd, &key, values); value = 0; - bpf_lookup_elem(fd, &key, &value); + for (i = 0; i < nr_cpus; i++) + value += values[i]; cnt[key] = value; total_events += value; if (value > max_cnt) diff --git a/tools/net/bpf_dbg.c b/tools/net/bpf_dbg.c index 9a287bec695a..4f254bcc4423 100644 --- a/tools/net/bpf_dbg.c +++ b/tools/net/bpf_dbg.c @@ -129,16 +129,16 @@ struct bpf_regs { }; static struct sock_filter bpf_image[BPF_MAXINSNS + 1]; -static unsigned int bpf_prog_len = 0; +static unsigned int bpf_prog_len; static int bpf_breakpoints[64]; static struct bpf_regs bpf_regs[BPF_MAXINSNS + 1]; static struct bpf_regs bpf_curr; -static unsigned int bpf_regs_len = 0; +static unsigned int bpf_regs_len; static int pcap_fd = -1; -static unsigned int pcap_packet = 0; -static size_t pcap_map_size = 0; +static unsigned int pcap_packet; +static size_t pcap_map_size; static char *pcap_ptr_va_start, *pcap_ptr_va_curr; static const char * const op_table[] = { @@ -1172,7 +1172,7 @@ static int cmd_breakpoint(char *subcmd) static int cmd_run(char *num) { - static uint32_t pass = 0, fail = 0; + static uint32_t pass, fail; bool has_limit = true; int pkts = 0, i = 0; diff --git a/tools/net/bpf_exp.l b/tools/net/bpf_exp.l index 7cc72a336645..bd83149e7be0 100644 --- a/tools/net/bpf_exp.l +++ b/tools/net/bpf_exp.l @@ -23,6 +23,9 @@ #include <stdio.h> #include <stdint.h> #include <stdlib.h> +#include <string.h> + +#include <linux/filter.h> #include "bpf_exp.yacc.h" @@ -79,22 +82,71 @@ extern void yyerror(const char *str); "txa" { return OP_TXA; } "#"?("len") { return K_PKT_LEN; } -"#"?("proto") { return K_PROTO; } -"#"?("type") { return K_TYPE; } -"#"?("poff") { return K_POFF; } -"#"?("ifidx") { return K_IFIDX; } -"#"?("nla") { return K_NLATTR; } -"#"?("nlan") { return K_NLATTR_NEST; } -"#"?("mark") { return K_MARK; } -"#"?("queue") { return K_QUEUE; } -"#"?("hatype") { return K_HATYPE; } -"#"?("rxhash") { return K_RXHASH; } -"#"?("cpu") { return K_CPU; } -"#"?("vlan_tci") { return K_VLAN_TCI; } -"#"?("vlan_pr") { return K_VLAN_AVAIL; } -"#"?("vlan_avail") { return K_VLAN_AVAIL; } -"#"?("vlan_tpid") { return K_VLAN_TPID; } -"#"?("rand") { return K_RAND; } + +"#"?("proto") { + yylval.number = SKF_AD_PROTOCOL; + return extension; + } +"#"?("type") { + yylval.number = SKF_AD_PKTTYPE; + return extension; + } +"#"?("poff") { + yylval.number = SKF_AD_PAY_OFFSET; + return extension; + } +"#"?("ifidx") { + yylval.number = SKF_AD_IFINDEX; + return extension; + } +"#"?("nla") { + yylval.number = SKF_AD_NLATTR; + return extension; + } +"#"?("nlan") { + yylval.number = SKF_AD_NLATTR_NEST; + return extension; + } +"#"?("mark") { + yylval.number = SKF_AD_MARK; + return extension; + } +"#"?("queue") { + yylval.number = SKF_AD_QUEUE; + return extension; + } +"#"?("hatype") { + yylval.number = SKF_AD_HATYPE; + return extension; + } +"#"?("rxhash") { + yylval.number = SKF_AD_RXHASH; + return extension; + } +"#"?("cpu") { + yylval.number = SKF_AD_CPU; + return extension; + } +"#"?("vlan_tci") { + yylval.number = SKF_AD_VLAN_TAG; + return extension; + } +"#"?("vlan_pr") { + yylval.number = SKF_AD_VLAN_TAG_PRESENT; + return extension; + } +"#"?("vlan_avail") { + yylval.number = SKF_AD_VLAN_TAG_PRESENT; + return extension; + } +"#"?("vlan_tpid") { + yylval.number = SKF_AD_VLAN_TPID; + return extension; + } +"#"?("rand") { + yylval.number = SKF_AD_RANDOM; + return extension; + } ":" { return ':'; } "," { return ','; } diff --git a/tools/net/bpf_exp.y b/tools/net/bpf_exp.y index e24eea1b0db5..56ba1de50784 100644 --- a/tools/net/bpf_exp.y +++ b/tools/net/bpf_exp.y @@ -35,6 +35,7 @@ enum jmp_type { JTL, JFL, JKL }; extern FILE *yyin; +extern int yylineno; extern int yylex(void); extern void yyerror(const char *str); @@ -55,14 +56,14 @@ static void bpf_set_jmp_label(char *label, enum jmp_type type); %token OP_RET OP_TAX OP_TXA OP_LDXB OP_MOD OP_NEG OP_JNEQ OP_JLT OP_JLE OP_LDI %token OP_LDXI -%token K_PKT_LEN K_PROTO K_TYPE K_NLATTR K_NLATTR_NEST K_MARK K_QUEUE K_HATYPE -%token K_RXHASH K_CPU K_IFIDX K_VLAN_TCI K_VLAN_AVAIL K_VLAN_TPID K_POFF K_RAND +%token K_PKT_LEN %token ':' ',' '[' ']' '(' ')' 'x' 'a' '+' 'M' '*' '&' '#' '%' -%token number label +%token extension number label %type <label> label +%type <number> extension %type <number> number %% @@ -125,51 +126,9 @@ ldb bpf_set_curr_instr(BPF_LD | BPF_B | BPF_IND, 0, 0, $6); } | OP_LDB '[' number ']' { bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, $3); } - | OP_LDB K_PROTO { + | OP_LDB extension { bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PROTOCOL); } - | OP_LDB K_TYPE { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PKTTYPE); } - | OP_LDB K_IFIDX { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_IFINDEX); } - | OP_LDB K_NLATTR { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_NLATTR); } - | OP_LDB K_NLATTR_NEST { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_NLATTR_NEST); } - | OP_LDB K_MARK { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_MARK); } - | OP_LDB K_QUEUE { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_QUEUE); } - | OP_LDB K_HATYPE { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_HATYPE); } - | OP_LDB K_RXHASH { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_RXHASH); } - | OP_LDB K_CPU { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_CPU); } - | OP_LDB K_VLAN_TCI { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TAG); } - | OP_LDB K_VLAN_AVAIL { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT); } - | OP_LDB K_POFF { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PAY_OFFSET); } - | OP_LDB K_RAND { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_RANDOM); } - | OP_LDB K_VLAN_TPID { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TPID); } + SKF_AD_OFF + $2); } ; ldh @@ -179,51 +138,9 @@ ldh bpf_set_curr_instr(BPF_LD | BPF_H | BPF_IND, 0, 0, $6); } | OP_LDH '[' number ']' { bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, $3); } - | OP_LDH K_PROTO { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PROTOCOL); } - | OP_LDH K_TYPE { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PKTTYPE); } - | OP_LDH K_IFIDX { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_IFINDEX); } - | OP_LDH K_NLATTR { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_NLATTR); } - | OP_LDH K_NLATTR_NEST { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_NLATTR_NEST); } - | OP_LDH K_MARK { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_MARK); } - | OP_LDH K_QUEUE { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_QUEUE); } - | OP_LDH K_HATYPE { + | OP_LDH extension { bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_HATYPE); } - | OP_LDH K_RXHASH { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_RXHASH); } - | OP_LDH K_CPU { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_CPU); } - | OP_LDH K_VLAN_TCI { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TAG); } - | OP_LDH K_VLAN_AVAIL { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT); } - | OP_LDH K_POFF { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PAY_OFFSET); } - | OP_LDH K_RAND { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_RANDOM); } - | OP_LDH K_VLAN_TPID { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TPID); } + SKF_AD_OFF + $2); } ; ldi @@ -238,51 +155,9 @@ ld bpf_set_curr_instr(BPF_LD | BPF_IMM, 0, 0, $3); } | OP_LD K_PKT_LEN { bpf_set_curr_instr(BPF_LD | BPF_W | BPF_LEN, 0, 0, 0); } - | OP_LD K_PROTO { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PROTOCOL); } - | OP_LD K_TYPE { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PKTTYPE); } - | OP_LD K_IFIDX { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_IFINDEX); } - | OP_LD K_NLATTR { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_NLATTR); } - | OP_LD K_NLATTR_NEST { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_NLATTR_NEST); } - | OP_LD K_MARK { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_MARK); } - | OP_LD K_QUEUE { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_QUEUE); } - | OP_LD K_HATYPE { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_HATYPE); } - | OP_LD K_RXHASH { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_RXHASH); } - | OP_LD K_CPU { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_CPU); } - | OP_LD K_VLAN_TCI { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TAG); } - | OP_LD K_VLAN_AVAIL { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT); } - | OP_LD K_POFF { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PAY_OFFSET); } - | OP_LD K_RAND { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_RANDOM); } - | OP_LD K_VLAN_TPID { + | OP_LD extension { bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TPID); } + SKF_AD_OFF + $2); } | OP_LD 'M' '[' number ']' { bpf_set_curr_instr(BPF_LD | BPF_MEM, 0, 0, $4); } | OP_LD '[' 'x' '+' number ']' { @@ -776,5 +651,6 @@ void bpf_asm_compile(FILE *fp, bool cstyle) void yyerror(const char *str) { + fprintf(stderr, "error: %s at line %d\n", str, yylineno); exit(1); } diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile index 47147b968514..08360060ab14 100644 --- a/tools/testing/selftests/lib/Makefile +++ b/tools/testing/selftests/lib/Makefile @@ -3,6 +3,6 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests" all: -TEST_PROGS := printf.sh +TEST_PROGS := printf.sh bitmap.sh include ../lib.mk diff --git a/tools/testing/selftests/lib/bitmap.sh b/tools/testing/selftests/lib/bitmap.sh new file mode 100755 index 000000000000..2da187b6ddad --- /dev/null +++ b/tools/testing/selftests/lib/bitmap.sh @@ -0,0 +1,10 @@ +#!/bin/sh +# Runs bitmap infrastructure tests using test_bitmap kernel module + +if /sbin/modprobe -q test_bitmap; then + /sbin/modprobe -q -r test_bitmap + echo "bitmap: ok" +else + echo "bitmap: [FAIL]" + exit 1 +fi diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 6fb23366b258..69bb3fc38fb2 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -2,3 +2,4 @@ socket psock_fanout psock_tpacket reuseport_bpf +reuseport_bpf_cpu diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 41449b5ad0a9..c658792d47b4 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -4,7 +4,7 @@ CFLAGS = -Wall -O2 -g CFLAGS += -I../../../../usr/include/ -NET_PROGS = socket psock_fanout psock_tpacket reuseport_bpf +NET_PROGS = socket psock_fanout psock_tpacket reuseport_bpf reuseport_bpf_cpu all: $(NET_PROGS) %: %.c diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c index bec1b5dd2530..96ba386b1b7b 100644 --- a/tools/testing/selftests/net/reuseport_bpf.c +++ b/tools/testing/selftests/net/reuseport_bpf.c @@ -9,10 +9,12 @@ #include <errno.h> #include <error.h> +#include <fcntl.h> #include <linux/bpf.h> #include <linux/filter.h> #include <linux/unistd.h> #include <netinet/in.h> +#include <netinet/tcp.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -169,9 +171,15 @@ static void build_recv_group(const struct test_params p, int fd[], uint16_t mod, if (bind(fd[i], addr, sockaddr_size())) error(1, errno, "failed to bind recv socket %d", i); - if (p.protocol == SOCK_STREAM) + if (p.protocol == SOCK_STREAM) { + opt = 4; + if (setsockopt(fd[i], SOL_TCP, TCP_FASTOPEN, &opt, + sizeof(opt))) + error(1, errno, + "failed to set TCP_FASTOPEN on %d", i); if (listen(fd[i], p.recv_socks * 10)) error(1, errno, "failed to listen on socket"); + } } free(addr); } @@ -189,10 +197,8 @@ static void send_from(struct test_params p, uint16_t sport, char *buf, if (bind(fd, saddr, sockaddr_size())) error(1, errno, "failed to bind send socket"); - if (connect(fd, daddr, sockaddr_size())) - error(1, errno, "failed to connect"); - if (send(fd, buf, len, 0) < 0) + if (sendto(fd, buf, len, MSG_FASTOPEN, daddr, sockaddr_size()) < 0) error(1, errno, "failed to send message"); close(fd); @@ -260,7 +266,7 @@ static void test_recv_order(const struct test_params p, int fd[], int mod) } } -static void test_reuseport_ebpf(const struct test_params p) +static void test_reuseport_ebpf(struct test_params p) { int i, fd[p.recv_socks]; @@ -268,6 +274,7 @@ static void test_reuseport_ebpf(const struct test_params p) build_recv_group(p, fd, p.recv_socks, attach_ebpf); test_recv_order(p, fd, p.recv_socks); + p.send_port_min += p.recv_socks * 2; fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2); attach_ebpf(fd[0], p.recv_socks / 2); test_recv_order(p, fd, p.recv_socks / 2); @@ -276,7 +283,7 @@ static void test_reuseport_ebpf(const struct test_params p) close(fd[i]); } -static void test_reuseport_cbpf(const struct test_params p) +static void test_reuseport_cbpf(struct test_params p) { int i, fd[p.recv_socks]; @@ -284,6 +291,7 @@ static void test_reuseport_cbpf(const struct test_params p) build_recv_group(p, fd, p.recv_socks, attach_cbpf); test_recv_order(p, fd, p.recv_socks); + p.send_port_min += p.recv_socks * 2; fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2); attach_cbpf(fd[0], p.recv_socks / 2); test_recv_order(p, fd, p.recv_socks / 2); @@ -377,7 +385,7 @@ static void test_filter_no_reuseport(const struct test_params p) static void test_filter_without_bind(void) { - int fd1, fd2; + int fd1, fd2, opt = 1; fprintf(stderr, "Testing filter add without bind...\n"); fd1 = socket(AF_INET, SOCK_DGRAM, 0); @@ -386,6 +394,10 @@ static void test_filter_without_bind(void) fd2 = socket(AF_INET, SOCK_DGRAM, 0); if (fd2 < 0) error(1, errno, "failed to create socket 2"); + if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt))) + error(1, errno, "failed to set SO_REUSEPORT on socket 1"); + if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt))) + error(1, errno, "failed to set SO_REUSEPORT on socket 2"); attach_ebpf(fd1, 10); attach_cbpf(fd2, 10); @@ -394,6 +406,32 @@ static void test_filter_without_bind(void) close(fd2); } +void enable_fastopen(void) +{ + int fd = open("/proc/sys/net/ipv4/tcp_fastopen", 0); + int rw_mask = 3; /* bit 1: client side; bit-2 server side */ + int val, size; + char buf[16]; + + if (fd < 0) + error(1, errno, "Unable to open tcp_fastopen sysctl"); + if (read(fd, buf, sizeof(buf)) <= 0) + error(1, errno, "Unable to read tcp_fastopen sysctl"); + val = atoi(buf); + close(fd); + + if ((val & rw_mask) != rw_mask) { + fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR); + if (fd < 0) + error(1, errno, + "Unable to open tcp_fastopen sysctl for writing"); + val |= rw_mask; + size = snprintf(buf, 16, "%d", val); + if (write(fd, buf, size) <= 0) + error(1, errno, "Unable to write tcp_fastopen sysctl"); + close(fd); + } +} int main(void) { @@ -506,6 +544,71 @@ int main(void) .recv_port = 8007, .send_port_min = 9100}); + /* TCP fastopen is required for the TCP tests */ + enable_fastopen(); + fprintf(stderr, "---- IPv4 TCP ----\n"); + test_reuseport_ebpf((struct test_params) { + .recv_family = AF_INET, + .send_family = AF_INET, + .protocol = SOCK_STREAM, + .recv_socks = 10, + .recv_port = 8008, + .send_port_min = 9120}); + test_reuseport_cbpf((struct test_params) { + .recv_family = AF_INET, + .send_family = AF_INET, + .protocol = SOCK_STREAM, + .recv_socks = 10, + .recv_port = 8009, + .send_port_min = 9160}); + test_extra_filter((struct test_params) { + .recv_family = AF_INET, + .protocol = SOCK_STREAM, + .recv_port = 8010}); + test_filter_no_reuseport((struct test_params) { + .recv_family = AF_INET, + .protocol = SOCK_STREAM, + .recv_port = 8011}); + + fprintf(stderr, "---- IPv6 TCP ----\n"); + test_reuseport_ebpf((struct test_params) { + .recv_family = AF_INET6, + .send_family = AF_INET6, + .protocol = SOCK_STREAM, + .recv_socks = 10, + .recv_port = 8012, + .send_port_min = 9200}); + test_reuseport_cbpf((struct test_params) { + .recv_family = AF_INET6, + .send_family = AF_INET6, + .protocol = SOCK_STREAM, + .recv_socks = 10, + .recv_port = 8013, + .send_port_min = 9240}); + test_extra_filter((struct test_params) { + .recv_family = AF_INET6, + .protocol = SOCK_STREAM, + .recv_port = 8014}); + test_filter_no_reuseport((struct test_params) { + .recv_family = AF_INET6, + .protocol = SOCK_STREAM, + .recv_port = 8015}); + + fprintf(stderr, "---- IPv6 TCP w/ mapped IPv4 ----\n"); + test_reuseport_ebpf((struct test_params) { + .recv_family = AF_INET6, + .send_family = AF_INET, + .protocol = SOCK_STREAM, + .recv_socks = 10, + .recv_port = 8016, + .send_port_min = 9320}); + test_reuseport_cbpf((struct test_params) { + .recv_family = AF_INET6, + .send_family = AF_INET, + .protocol = SOCK_STREAM, + .recv_socks = 10, + .recv_port = 8017, + .send_port_min = 9360}); test_filter_without_bind(); diff --git a/tools/testing/selftests/net/reuseport_bpf_cpu.c b/tools/testing/selftests/net/reuseport_bpf_cpu.c new file mode 100644 index 000000000000..b23d6f54de7b --- /dev/null +++ b/tools/testing/selftests/net/reuseport_bpf_cpu.c @@ -0,0 +1,258 @@ +/* + * Test functionality of BPF filters with SO_REUSEPORT. This program creates + * an SO_REUSEPORT receiver group containing one socket per CPU core. It then + * creates a BPF program that will select a socket from this group based + * on the core id that receives the packet. The sending code artificially + * moves itself to run on different core ids and sends one message from + * each core. Since these packets are delivered over loopback, they should + * arrive on the same core that sent them. The receiving code then ensures + * that the packet was received on the socket for the corresponding core id. + * This entire process is done for several different core id permutations + * and for each IPv4/IPv6 and TCP/UDP combination. + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <linux/filter.h> +#include <linux/in.h> +#include <linux/unistd.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/epoll.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <unistd.h> + +static const int PORT = 8888; + +static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto) +{ + struct sockaddr_storage addr; + struct sockaddr_in *addr4; + struct sockaddr_in6 *addr6; + size_t i; + int opt; + + switch (family) { + case AF_INET: + addr4 = (struct sockaddr_in *)&addr; + addr4->sin_family = AF_INET; + addr4->sin_addr.s_addr = htonl(INADDR_ANY); + addr4->sin_port = htons(PORT); + break; + case AF_INET6: + addr6 = (struct sockaddr_in6 *)&addr; + addr6->sin6_family = AF_INET6; + addr6->sin6_addr = in6addr_any; + addr6->sin6_port = htons(PORT); + break; + default: + error(1, 0, "Unsupported family %d", family); + } + + for (i = 0; i < len; ++i) { + rcv_fd[i] = socket(family, proto, 0); + if (rcv_fd[i] < 0) + error(1, errno, "failed to create receive socket"); + + opt = 1; + if (setsockopt(rcv_fd[i], SOL_SOCKET, SO_REUSEPORT, &opt, + sizeof(opt))) + error(1, errno, "failed to set SO_REUSEPORT"); + + if (bind(rcv_fd[i], (struct sockaddr *)&addr, sizeof(addr))) + error(1, errno, "failed to bind receive socket"); + + if (proto == SOCK_STREAM && listen(rcv_fd[i], len * 10)) + error(1, errno, "failed to listen on receive port"); + } +} + +static void attach_bpf(int fd) +{ + struct sock_filter code[] = { + /* A = raw_smp_processor_id() */ + { BPF_LD | BPF_W | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_CPU }, + /* return A */ + { BPF_RET | BPF_A, 0, 0, 0 }, + }; + struct sock_fprog p = { + .len = 2, + .filter = code, + }; + + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p))) + error(1, errno, "failed to set SO_ATTACH_REUSEPORT_CBPF"); +} + +static void send_from_cpu(int cpu_id, int family, int proto) +{ + struct sockaddr_storage saddr, daddr; + struct sockaddr_in *saddr4, *daddr4; + struct sockaddr_in6 *saddr6, *daddr6; + cpu_set_t cpu_set; + int fd; + + switch (family) { + case AF_INET: + saddr4 = (struct sockaddr_in *)&saddr; + saddr4->sin_family = AF_INET; + saddr4->sin_addr.s_addr = htonl(INADDR_ANY); + saddr4->sin_port = 0; + + daddr4 = (struct sockaddr_in *)&daddr; + daddr4->sin_family = AF_INET; + daddr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + daddr4->sin_port = htons(PORT); + break; + case AF_INET6: + saddr6 = (struct sockaddr_in6 *)&saddr; + saddr6->sin6_family = AF_INET6; + saddr6->sin6_addr = in6addr_any; + saddr6->sin6_port = 0; + + daddr6 = (struct sockaddr_in6 *)&daddr; + daddr6->sin6_family = AF_INET6; + daddr6->sin6_addr = in6addr_loopback; + daddr6->sin6_port = htons(PORT); + break; + default: + error(1, 0, "Unsupported family %d", family); + } + + memset(&cpu_set, 0, sizeof(cpu_set)); + CPU_SET(cpu_id, &cpu_set); + if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) + error(1, errno, "failed to pin to cpu"); + + fd = socket(family, proto, 0); + if (fd < 0) + error(1, errno, "failed to create send socket"); + + if (bind(fd, (struct sockaddr *)&saddr, sizeof(saddr))) + error(1, errno, "failed to bind send socket"); + + if (connect(fd, (struct sockaddr *)&daddr, sizeof(daddr))) + error(1, errno, "failed to connect send socket"); + + if (send(fd, "a", 1, 0) < 0) + error(1, errno, "failed to send message"); + + close(fd); +} + +static +void receive_on_cpu(int *rcv_fd, int len, int epfd, int cpu_id, int proto) +{ + struct epoll_event ev; + int i, fd; + char buf[8]; + + i = epoll_wait(epfd, &ev, 1, -1); + if (i < 0) + error(1, errno, "epoll_wait failed"); + + if (proto == SOCK_STREAM) { + fd = accept(ev.data.fd, NULL, NULL); + if (fd < 0) + error(1, errno, "failed to accept"); + i = recv(fd, buf, sizeof(buf), 0); + close(fd); + } else { + i = recv(ev.data.fd, buf, sizeof(buf), 0); + } + + if (i < 0) + error(1, errno, "failed to recv"); + + for (i = 0; i < len; ++i) + if (ev.data.fd == rcv_fd[i]) + break; + if (i == len) + error(1, 0, "failed to find socket"); + fprintf(stderr, "send cpu %d, receive socket %d\n", cpu_id, i); + if (cpu_id != i) + error(1, 0, "cpu id/receive socket mismatch"); +} + +static void test(int *rcv_fd, int len, int family, int proto) +{ + struct epoll_event ev; + int epfd, cpu; + + build_rcv_group(rcv_fd, len, family, proto); + attach_bpf(rcv_fd[0]); + + epfd = epoll_create(1); + if (epfd < 0) + error(1, errno, "failed to create epoll"); + for (cpu = 0; cpu < len; ++cpu) { + ev.events = EPOLLIN; + ev.data.fd = rcv_fd[cpu]; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fd[cpu], &ev)) + error(1, errno, "failed to register sock epoll"); + } + + /* Forward iterate */ + for (cpu = 0; cpu < len; ++cpu) { + send_from_cpu(cpu, family, proto); + receive_on_cpu(rcv_fd, len, epfd, cpu, proto); + } + + /* Reverse iterate */ + for (cpu = len - 1; cpu >= 0; --cpu) { + send_from_cpu(cpu, family, proto); + receive_on_cpu(rcv_fd, len, epfd, cpu, proto); + } + + /* Even cores */ + for (cpu = 0; cpu < len; cpu += 2) { + send_from_cpu(cpu, family, proto); + receive_on_cpu(rcv_fd, len, epfd, cpu, proto); + } + + /* Odd cores */ + for (cpu = 1; cpu < len; cpu += 2) { + send_from_cpu(cpu, family, proto); + receive_on_cpu(rcv_fd, len, epfd, cpu, proto); + } + + close(epfd); + for (cpu = 0; cpu < len; ++cpu) + close(rcv_fd[cpu]); +} + +int main(void) +{ + int *rcv_fd, cpus; + + cpus = sysconf(_SC_NPROCESSORS_ONLN); + if (cpus <= 0) + error(1, errno, "failed counting cpus"); + + rcv_fd = calloc(cpus, sizeof(int)); + if (!rcv_fd) + error(1, 0, "failed to allocate array"); + + fprintf(stderr, "---- IPv4 UDP ----\n"); + test(rcv_fd, cpus, AF_INET, SOCK_DGRAM); + + fprintf(stderr, "---- IPv6 UDP ----\n"); + test(rcv_fd, cpus, AF_INET6, SOCK_DGRAM); + + fprintf(stderr, "---- IPv4 TCP ----\n"); + test(rcv_fd, cpus, AF_INET, SOCK_STREAM); + + fprintf(stderr, "---- IPv6 TCP ----\n"); + test(rcv_fd, cpus, AF_INET6, SOCK_STREAM); + + free(rcv_fd); + + fprintf(stderr, "SUCCESS\n"); + return 0; +} |