diff options
331 files changed, 6500 insertions, 2230 deletions
@@ -15,6 +15,7 @@ Adriana Reus <adi.reus@gmail.com> <adriana.reus@intel.com> Alan Cox <alan@lxorguk.ukuu.org.uk> Alan Cox <root@hraefn.swansea.linux.org.uk> Aleksey Gorelov <aleksey_gorelov@phoenix.com> +Aleksandar Markovic <aleksandar.markovic@mips.com> <aleksandar.markovic@imgtec.com> Al Viro <viro@ftp.linux.org.uk> Al Viro <viro@zenIV.linux.org.uk> Andreas Herrmann <aherrman@de.ibm.com> @@ -101,6 +102,7 @@ Leonid I Ananiev <leonid.i.ananiev@intel.com> Linas Vepstas <linas@austin.ibm.com> Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@web.de> Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch> +Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com> Mark Brown <broonie@sirena.org.uk> Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com> Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com> @@ -119,6 +121,7 @@ Matt Redfearn <matt.redfearn@mips.com> <matt.redfearn@imgtec.com> Mayuresh Janorkar <mayur@ti.com> Michael Buesch <m@bues.ch> Michel Dänzer <michel@tungstengraphics.com> +Miodrag Dinic <miodrag.dinic@mips.com> <miodrag.dinic@imgtec.com> Mitesh shah <mshah@teja.com> Mohit Kumar <mohit.kumar@st.com> <mohit.kumar.dhaka@gmail.com> Morten Welinder <terra@gnome.org> diff --git a/Documentation/devicetree/bindings/net/dsa/lan9303.txt b/Documentation/devicetree/bindings/net/dsa/lan9303.txt index 4448d063ddf6..464d6bf87605 100644 --- a/Documentation/devicetree/bindings/net/dsa/lan9303.txt +++ b/Documentation/devicetree/bindings/net/dsa/lan9303.txt @@ -52,7 +52,7 @@ I2C managed mode: port@1 { /* external port 1 */ reg = <1>; - label = "lan1; + label = "lan1"; }; port@2 { /* external port 2 */ @@ -89,7 +89,7 @@ MDIO managed mode: port@1 { /* external port 1 */ reg = <1>; - label = "lan1; + label = "lan1"; }; port@2 { /* external port 2 */ diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt b/Documentation/devicetree/bindings/net/fsl-fec.txt index 6f55bdd52f8a..f0dc94409107 100644 --- a/Documentation/devicetree/bindings/net/fsl-fec.txt +++ b/Documentation/devicetree/bindings/net/fsl-fec.txt @@ -34,6 +34,19 @@ Optional properties: - fsl,err006687-workaround-present: If present indicates that the system has the hardware workaround for ERR006687 applied and does not need a software workaround. + -interrupt-names: names of the interrupts listed in interrupts property in + the same order. The defaults if not specified are + __Number of interrupts__ __Default__ + 1 "int0" + 2 "int0", "pps" + 3 "int0", "int1", "int2" + 4 "int0", "int1", "int2", "pps" + The order may be changed as long as they correspond to the interrupts + property. Currently, only i.mx7 uses "int1" and "int2". They correspond to + tx/rx queues 1 and 2. "int0" will be used for queue 0 and ENET_MII interrupts. + For imx6sx, "int0" handles all 3 queues and ENET_MII. "pps" is for the pulse + per second interrupt associated with 1588 precision time protocol(PTP). + Optional subnodes: - mdio : specifies the mdio bus in the FEC, used as a container for phy nodes diff --git a/Documentation/devicetree/bindings/net/stmmac.txt b/Documentation/devicetree/bindings/net/stmmac.txt index c3a7be6615c5..3a28a5d8857d 100644 --- a/Documentation/devicetree/bindings/net/stmmac.txt +++ b/Documentation/devicetree/bindings/net/stmmac.txt @@ -12,7 +12,7 @@ Required properties: Valid interrupt names are: - "macirq" (combined signal for various interrupt events) - "eth_wake_irq" (the interrupt to manage the remote wake-up packet detection) - - "eth_lpi" (the interrupt that occurs when Tx or Rx enters/exits LPI state) + - "eth_lpi" (the interrupt that occurs when Rx exits the LPI state) - phy-mode: See ethernet.txt file in the same directory. - snps,reset-gpio gpio number for phy reset. - snps,reset-active-low boolean flag to indicate if phy reset is active low. diff --git a/Documentation/networking/ila.txt b/Documentation/networking/ila.txt new file mode 100644 index 000000000000..78df879abd26 --- /dev/null +++ b/Documentation/networking/ila.txt @@ -0,0 +1,285 @@ +Identifier Locator Addressing (ILA) + + +Introduction +============ + +Identifier-locator addressing (ILA) is a technique used with IPv6 that +differentiates between location and identity of a network node. Part of an +address expresses the immutable identity of the node, and another part +indicates the location of the node which can be dynamic. Identifier-locator +addressing can be used to efficiently implement overlay networks for +network virtualization as well as solutions for use cases in mobility. + +ILA can be thought of as means to implement an overlay network without +encapsulation. This is accomplished by performing network address +translation on destination addresses as a packet traverses a network. To +the network, an ILA translated packet appears to be no different than any +other IPv6 packet. For instance, if the transport protocol is TCP then an +ILA translated packet looks like just another TCP/IPv6 packet. The +advantage of this is that ILA is transparent to the network so that +optimizations in the network, such as ECMP, RSS, GRO, GSO, etc., just work. + +The ILA protocol is described in Internet-Draft draft-herbert-intarea-ila. + + +ILA terminology +=============== + + - Identifier A number that identifies an addressable node in the network + independent of its location. ILA identifiers are sixty-four + bit values. + + - Locator A network prefix that routes to a physical host. Locators + provide the topological location of an addressed node. ILA + locators are sixty-four bit prefixes. + + - ILA mapping + A mapping of an ILA identifier to a locator (or to a + locator and meta data). An ILA domain maintains a database + that contains mappings for all destinations in the domain. + + - SIR address + An IPv6 address composed of a SIR prefix (upper sixty- + four bits) and an identifier (lower sixty-four bits). + SIR addresses are visible to applications and provide a + means for them to address nodes independent of their + location. + + - ILA address + An IPv6 address composed of a locator (upper sixty-four + bits) and an identifier (low order sixty-four bits). ILA + addresses are never visible to an application. + + - ILA host An end host that is capable of performing ILA translations + on transmit or receive. + + - ILA router A network node that performs ILA translation and forwarding + of translated packets. + + - ILA forwarding cache + A type of ILA router that only maintains a working set + cache of mappings. + + - ILA node A network node capable of performing ILA translations. This + can be an ILA router, ILA forwarding cache, or ILA host. + + +Operation +========= + +There are two fundamental operations with ILA: + + - Translate a SIR address to an ILA address. This is performed on ingress + to an ILA overlay. + + - Translate an ILA address to a SIR address. This is performed on egress + from the ILA overlay. + +ILA can be deployed either on end hosts or intermediate devices in the +network; these are provided by "ILA hosts" and "ILA routers" respectively. +Configuration and datapath for these two points of deployment is somewhat +different. + +The diagram below illustrates the flow of packets through ILA as well +as showing ILA hosts and routers. + + +--------+ +--------+ + | Host A +-+ +--->| Host B | + | | | (2) ILA (') | | + +--------+ | ...addressed.... ( ) +--------+ + V +---+--+ . packet . +---+--+ (_) + (1) SIR | | ILA |----->-------->---->| ILA | | (3) SIR + addressed +->|router| . . |router|->-+ addressed + packet +---+--+ . IPv6 . +---+--+ packet + / . Network . + / . . +--+-++--------+ + +--------+ / . . |ILA || Host | + | Host +--+ . .- -|host|| | + | | . . +--+-++--------+ + +--------+ ................ + + +Transport checksum handling +=========================== + +When an address is translated by ILA, an encapsulated transport checksum +that includes the translated address in a pseudo header may be rendered +incorrect on the wire. This is a problem for intermediate devices, +including checksum offload in NICs, that process the checksum. There are +three options to deal with this: + +- no action Allow the checksum to be incorrect on the wire. Before + a receiver verifies a checksum the ILA to SIR address + translation must be done. + +- adjust transport checksum + When ILA translation is performed the packet is parsed + and if a transport layer checksum is found then it is + adjusted to reflect the correct checksum per the + translated address. + +- checksum neutral mapping + When an address is translated the difference can be offset + elsewhere in a part of the packet that is covered by the + the checksum. The low order sixteen bits of the identifier + are used. This method is preferred since it doesn't require + parsing a packet beyond the IP header and in most cases the + adjustment can be precomputed and saved with the mapping. + +Note that the checksum neutral adjustment affects the low order sixteen +bits of the identifier. When ILA to SIR address translation is done on +egress the low order bits are restored to the original value which +restores the identifier as it was originally sent. + + +Identifier types +================ + +ILA defines different types of identifiers for different use cases. + +The defined types are: + + 0: interface identifier + + 1: locally unique identifier + + 2: virtual networking identifier for IPv4 address + + 3: virtual networking identifier for IPv6 unicast address + + 4: virtual networking identifier for IPv6 multicast address + + 5: non-local address identifier + +In the current implementation of kernel ILA only locally unique identifiers +(LUID) are supported. LUID allows for a generic, unformatted 64 bit +identifier. + + +Identifier formats +================== + +Kernel ILA supports two optional fields in an identifier for formatting: +"C-bit" and "identifier type". The presence of these fields is determined +by configuration as demonstrated below. + +If the identifier type is present it occupies the three highest order +bits of an identifier. The possible values are given in the above list. + +If the C-bit is present, this is used as an indication that checksum +neutral mapping has been done. The C-bit can only be set in an +ILA address, never a SIR address. + +In the simplest format the identifier types, C-bit, and checksum +adjustment value are not present so an identifier is considered an +unstructured sixty-four bit value. + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Identifier | + + + + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +The checksum neutral adjustment may be configured to always be +present using neutral-map-auto. In this case there is no C-bit, but the +checksum adjustment is in the low order 16 bits. The identifier is +still sixty-four bits. + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Identifier | + | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | Checksum-neutral adjustment | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +The C-bit may used to explicitly indicate that checksum neutral +mapping has been applied to an ILA address. The format is: + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | |C| Identifier | + | +-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | Checksum-neutral adjustment | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +The identifier type field may be present to indicate the identifier +type. If it is not present then the type is inferred based on mapping +configuration. The checksum neutral adjustment may automatically +used with the identifier type as illustrated below. + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type| Identifier | + +-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | Checksum-neutral adjustment | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +If the identifier type and the C-bit can be present simultaneously so +the identifier format would be: + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type|C| Identifier | + +-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | Checksum-neutral adjustment | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + +Configuration +============= + +There are two methods to configure ILA mappings. One is by using LWT routes +and the other is ila_xlat (called from NFHOOK PREROUTING hook). ila_xlat +is intended to be used in the receive path for ILA hosts . + +An ILA router has also been implemented in XDP. Description of that is +outside the scope of this document. + +The usage of for ILA LWT routes is: + +ip route add DEST/128 encap ila LOC csum-mode MODE ident-type TYPE via ADDR + +Destination (DEST) can either be a SIR address (for an ILA host or ingress +ILA router) or an ILA address (egress ILA router). LOC is the sixty-four +bit locator (with format W:X:Y:Z) that overwrites the upper sixty-four +bits of the destination address. Checksum MODE is one of "no-action", +"adj-transport", "neutral-map", and "neutral-map-auto". If neutral-map is +set then the C-bit will be present. Identifier TYPE one of "luid" or +"use-format." In the case of use-format, the identifier type field is +present and the effective type is taken from that. + +The usage of ila_xlat is: + +ip ila add loc_match MATCH loc LOC csum-mode MODE ident-type TYPE + +MATCH indicates the incoming locator that must be matched to apply +a the translaiton. LOC is the locator that overwrites the upper +sixty-four bits of the destination address. MODE and TYPE have the +same meanings as described above. + + +Some examples +============= + +# Configure an ILA route that uses checksum neutral mapping as well +# as type field. Note that the type field is set in the SIR address +# (the 2000 implies type is 1 which is LUID). +ip route add 3333:0:0:1:2000:0:1:87/128 encap ila 2001:0:87:0 \ + csum-mode neutral-map ident-type use-format + +# Configure an ILA LWT route that uses auto checksum neutral mapping +# (no C-bit) and configure identifier type to be LUID so that the +# identifier type field will not be present. +ip route add 3333:0:0:1:2000:0:2:87/128 encap ila 2001:0:87:1 \ + csum-mode neutral-map-auto ident-type luid + +ila_xlat configuration + +# Configure an ILA to SIR mapping that matches a locator and overwrites +# it with a SIR address (3333:0:0:1 in this example). The C-bit and +# identifier field are used. +ip ila add loc_match 2001:0:119:0 loc 3333:0:0:1 \ + csum-mode neutral-map-auto ident-type use-format + +# Configure an ILA to SIR mapping where checksum neutral is automatically +# set without the C-bit and the identifier type is configured to be LUID +# so that the identifier type field is not present. +ip ila add loc_match 2001:0:119:0 loc 3333:0:0:1 \ + csum-mode neutral-map-auto ident-type use-format diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 54410a1d4065..46c7e1085efc 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -289,8 +289,7 @@ tcp_ecn_fallback - BOOLEAN Default: 1 (fallback enabled) tcp_fack - BOOLEAN - Enable FACK congestion avoidance and fast retransmission. - The value is not used, if tcp_sack is not enabled. + This is a legacy option, it has no effect anymore. tcp_fin_timeout - INTEGER The length of time an orphaned (no longer referenced by any @@ -1732,6 +1731,15 @@ ndisc_notify - BOOLEAN 1 - Generate unsolicited neighbour advertisements when device is brought up or hardware address changes. +ndisc_tclass - INTEGER + The IPv6 Traffic Class to use by default when sending IPv6 Neighbor + Discovery (Router Solicitation, Router Advertisement, Neighbor + Solicitation, Neighbor Advertisement, Redirect) messages. + These 8 bits can be interpreted as 6 high order bits holding the DSCP + value and 2 low order bits representing ECN (which you probably want + to leave cleared). + 0 - (default) + mldv1_unsolicited_report_interval - INTEGER The interval in milliseconds in which the next unsolicited MLDv1 report retransmit will take place. diff --git a/Documentation/process/kernel-enforcement-statement.rst b/Documentation/process/kernel-enforcement-statement.rst index 1e23d4227337..b3170671a1df 100644 --- a/Documentation/process/kernel-enforcement-statement.rst +++ b/Documentation/process/kernel-enforcement-statement.rst @@ -50,8 +50,9 @@ be stronger. Except where noted below, we speak only for ourselves, and not for any company we might work for today, have in the past, or will in the future. + - Laura Abbott - Bjorn Andersson (Linaro) - - Andrea Arcangeli (Red Hat) + - Andrea Arcangeli - Neil Armstrong - Jens Axboe - Pablo Neira Ayuso @@ -60,15 +61,17 @@ we might work for today, have in the past, or will in the future. - Felipe Balbi - Arnd Bergmann - Ard Biesheuvel - - Paolo Bonzini (Red Hat) + - Tim Bird + - Paolo Bonzini - Christian Borntraeger - Mark Brown (Linaro) - Paul Burton - Javier Martinez Canillas - Rob Clark - Jonathan Corbet + - Dennis Dalessandro - Vivien Didelot (Savoir-faire Linux) - - Hans de Goede (Red Hat) + - Hans de Goede - Mel Gorman (SUSE) - Sven Eckelmann - Alex Elder (Linaro) @@ -79,6 +82,7 @@ we might work for today, have in the past, or will in the future. - Juergen Gross - Shawn Guo - Ulf Hansson + - Stephen Hemminger (Microsoft) - Tejun Heo - Rob Herring - Masami Hiramatsu @@ -104,18 +108,21 @@ we might work for today, have in the past, or will in the future. - Viresh Kumar - Aneesh Kumar K.V - Julia Lawall - - Doug Ledford (Red Hat) + - Doug Ledford - Chuck Lever (Oracle) - Daniel Lezcano - Shaohua Li - - Xin Long (Red Hat) + - Xin Long - Tony Luck + - Catalin Marinas (Arm Ltd) - Mike Marshall - Chris Mason - Paul E. McKenney - David S. Miller - Ingo Molnar - Kuninori Morimoto + - Trond Myklebust + - Martin K. Petersen (Oracle) - Borislav Petkov - Jiri Pirko - Josh Poimboeuf @@ -124,18 +131,20 @@ we might work for today, have in the past, or will in the future. - Joerg Roedel - Leon Romanovsky - Steven Rostedt (VMware) - - Ivan Safonov + - Frank Rowand - Ivan Safonov - Anna Schumaker - Jes Sorensen - K.Y. Srinivasan - Heiko Stuebner - Jiri Kosina (SUSE) + - Willy Tarreau - Dmitry Torokhov - Linus Torvalds - Thierry Reding - Rik van Riel - Geert Uytterhoeven (Glider bvba) + - Eduardo Valentin (Amazon.com) - Daniel Vetter - Linus Walleij - Richard Weinberger @@ -145,3 +154,4 @@ we might work for today, have in the past, or will in the future. - Masahiro Yamada - Wei Yongjun - Lv Zheng + - Marc Zyngier (Arm Ltd) diff --git a/MAINTAINERS b/MAINTAINERS index f048f85d741b..968e37507bf2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -873,7 +873,7 @@ F: drivers/android/ F: drivers/staging/android/ ANDROID GOLDFISH RTC DRIVER -M: Miodrag Dinic <miodrag.dinic@imgtec.com> +M: Miodrag Dinic <miodrag.dinic@mips.com> S: Supported F: Documentation/devicetree/bindings/rtc/google,goldfish-rtc.txt F: drivers/rtc/rtc-goldfish.c @@ -7751,6 +7751,11 @@ S: Maintained F: Documentation/scsi/53c700.txt F: drivers/scsi/53c700* +LEAKING_ADDRESSES +M: Tobin C. Harding <me@tobin.cc> +S: Maintained +F: scripts/leaking_addresses.pl + LED SUBSYSTEM M: Richard Purdie <rpurdie@rpsys.net> M: Jacek Anaszewski <jacek.anaszewski@gmail.com> @@ -9026,7 +9031,7 @@ F: drivers/*/*loongson1* F: drivers/*/*/*loongson1* MIPS RINT INSTRUCTION EMULATION -M: Aleksandar Markovic <aleksandar.markovic@imgtec.com> +M: Aleksandar Markovic <aleksandar.markovic@mips.com> L: linux-mips@linux-mips.org S: Supported F: arch/mips/math-emu/sp_rint.c @@ -10691,10 +10696,9 @@ S: Maintained F: drivers/pinctrl/spear/ PISTACHIO SOC SUPPORT -M: James Hartley <james.hartley@imgtec.com> -M: Ionela Voinescu <ionela.voinescu@imgtec.com> +M: James Hartley <james.hartley@sondrel.com> L: linux-mips@linux-mips.org -S: Maintained +S: Odd Fixes F: arch/mips/pistachio/ F: arch/mips/include/asm/mach-pistachio/ F: arch/mips/boot/dts/img/pistachio* @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Fearless Coyote # *DOCUMENTATION* diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 817e5cfef83a..36ae4454554c 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -44,10 +44,12 @@ endif ifeq ($(CONFIG_CPU_BIG_ENDIAN),y) KBUILD_CPPFLAGS += -mbig-endian +CHECKFLAGS += -D__ARMEB__ AS += -EB LD += -EB else KBUILD_CPPFLAGS += -mlittle-endian +CHECKFLAGS += -D__ARMEL__ AS += -EL LD += -EL endif diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S index 7a4c59154361..7d06aa19c3e6 100644 --- a/arch/arm/boot/compressed/vmlinux.lds.S +++ b/arch/arm/boot/compressed/vmlinux.lds.S @@ -85,6 +85,15 @@ SECTIONS _edata = .; + /* + * The image_end section appears after any additional loadable sections + * that the linker may decide to insert in the binary image. Having + * this symbol allows further debug in the near future. + */ + .image_end (NOLOAD) : { + _edata_real = .; + } + _magic_sig = ZIMAGE_MAGIC(0x016f2818); _magic_start = ZIMAGE_MAGIC(_start); _magic_end = ZIMAGE_MAGIC(_edata); diff --git a/arch/arm/boot/dts/armada-375.dtsi b/arch/arm/boot/dts/armada-375.dtsi index 7225c7ce9a8d..2cb1bcd30976 100644 --- a/arch/arm/boot/dts/armada-375.dtsi +++ b/arch/arm/boot/dts/armada-375.dtsi @@ -178,9 +178,9 @@ reg = <0x8000 0x1000>; cache-unified; cache-level = <2>; - arm,double-linefill-incr = <1>; + arm,double-linefill-incr = <0>; arm,double-linefill-wrap = <0>; - arm,double-linefill = <1>; + arm,double-linefill = <0>; prefetch-data = <1>; }; diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi index 4960722aab32..00ff549d4e39 100644 --- a/arch/arm/boot/dts/armada-38x.dtsi +++ b/arch/arm/boot/dts/armada-38x.dtsi @@ -143,9 +143,9 @@ reg = <0x8000 0x1000>; cache-unified; cache-level = <2>; - arm,double-linefill-incr = <1>; + arm,double-linefill-incr = <0>; arm,double-linefill-wrap = <0>; - arm,double-linefill = <1>; + arm,double-linefill = <0>; prefetch-data = <1>; }; diff --git a/arch/arm/boot/dts/armada-39x.dtsi b/arch/arm/boot/dts/armada-39x.dtsi index ea657071e278..5218bd2a248d 100644 --- a/arch/arm/boot/dts/armada-39x.dtsi +++ b/arch/arm/boot/dts/armada-39x.dtsi @@ -111,9 +111,9 @@ reg = <0x8000 0x1000>; cache-unified; cache-level = <2>; - arm,double-linefill-incr = <1>; + arm,double-linefill-incr = <0>; arm,double-linefill-wrap = <0>; - arm,double-linefill = <1>; + arm,double-linefill = <0>; prefetch-data = <1>; }; diff --git a/arch/arm/boot/dts/uniphier-ld4.dtsi b/arch/arm/boot/dts/uniphier-ld4.dtsi index 79183db5b386..93586faf950f 100644 --- a/arch/arm/boot/dts/uniphier-ld4.dtsi +++ b/arch/arm/boot/dts/uniphier-ld4.dtsi @@ -209,7 +209,8 @@ interrupts = <0 80 4>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb0>; - clocks = <&mio_clk 7>, <&mio_clk 8>, <&mio_clk 12>; + clocks = <&sys_clk 8>, <&mio_clk 7>, <&mio_clk 8>, + <&mio_clk 12>; resets = <&sys_rst 8>, <&mio_rst 7>, <&mio_rst 8>, <&mio_rst 12>; }; @@ -221,7 +222,8 @@ interrupts = <0 81 4>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb1>; - clocks = <&mio_clk 7>, <&mio_clk 9>, <&mio_clk 13>; + clocks = <&sys_clk 8>, <&mio_clk 7>, <&mio_clk 9>, + <&mio_clk 13>; resets = <&sys_rst 8>, <&mio_rst 7>, <&mio_rst 9>, <&mio_rst 13>; }; @@ -233,7 +235,8 @@ interrupts = <0 82 4>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb2>; - clocks = <&mio_clk 7>, <&mio_clk 10>, <&mio_clk 14>; + clocks = <&sys_clk 8>, <&mio_clk 7>, <&mio_clk 10>, + <&mio_clk 14>; resets = <&sys_rst 8>, <&mio_rst 7>, <&mio_rst 10>, <&mio_rst 14>; }; diff --git a/arch/arm/boot/dts/uniphier-pro4.dtsi b/arch/arm/boot/dts/uniphier-pro4.dtsi index b3dbbd9b6e39..2a9bd7f9f5db 100644 --- a/arch/arm/boot/dts/uniphier-pro4.dtsi +++ b/arch/arm/boot/dts/uniphier-pro4.dtsi @@ -241,7 +241,8 @@ interrupts = <0 80 4>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb2>; - clocks = <&mio_clk 7>, <&mio_clk 8>, <&mio_clk 12>; + clocks = <&sys_clk 8>, <&mio_clk 7>, <&mio_clk 8>, + <&mio_clk 12>; resets = <&sys_rst 8>, <&mio_rst 7>, <&mio_rst 8>, <&mio_rst 12>; }; @@ -253,7 +254,8 @@ interrupts = <0 81 4>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb3>; - clocks = <&mio_clk 7>, <&mio_clk 9>, <&mio_clk 13>; + clocks = <&sys_clk 8>, <&mio_clk 7>, <&mio_clk 9>, + <&mio_clk 13>; resets = <&sys_rst 8>, <&mio_rst 7>, <&mio_rst 9>, <&mio_rst 13>; }; diff --git a/arch/arm/boot/dts/uniphier-sld8.dtsi b/arch/arm/boot/dts/uniphier-sld8.dtsi index b08390332971..ebd0c3f63e7f 100644 --- a/arch/arm/boot/dts/uniphier-sld8.dtsi +++ b/arch/arm/boot/dts/uniphier-sld8.dtsi @@ -209,7 +209,8 @@ interrupts = <0 80 4>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb0>; - clocks = <&mio_clk 7>, <&mio_clk 8>, <&mio_clk 12>; + clocks = <&sys_clk 8>, <&mio_clk 7>, <&mio_clk 8>, + <&mio_clk 12>; resets = <&sys_rst 8>, <&mio_rst 7>, <&mio_rst 8>, <&mio_rst 12>; }; @@ -221,7 +222,8 @@ interrupts = <0 81 4>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb1>; - clocks = <&mio_clk 7>, <&mio_clk 9>, <&mio_clk 13>; + clocks = <&sys_clk 8>, <&mio_clk 7>, <&mio_clk 9>, + <&mio_clk 13>; resets = <&sys_rst 8>, <&mio_rst 7>, <&mio_rst 9>, <&mio_rst 13>; }; @@ -233,7 +235,8 @@ interrupts = <0 82 4>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb2>; - clocks = <&mio_clk 7>, <&mio_clk 10>, <&mio_clk 14>; + clocks = <&sys_clk 8>, <&mio_clk 7>, <&mio_clk 10>, + <&mio_clk 14>; resets = <&sys_rst 8>, <&mio_rst 7>, <&mio_rst 10>, <&mio_rst 14>; }; diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 721ab5ecfb9b..0f2c8a2a8131 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -20,7 +20,6 @@ generic-y += simd.h generic-y += sizes.h generic-y += timex.h generic-y += trace_clock.h -generic-y += unaligned.h generated-y += mach-types.h generated-y += unistd-nr.h diff --git a/arch/arm/include/asm/unaligned.h b/arch/arm/include/asm/unaligned.h new file mode 100644 index 000000000000..ab905ffcf193 --- /dev/null +++ b/arch/arm/include/asm/unaligned.h @@ -0,0 +1,27 @@ +#ifndef __ASM_ARM_UNALIGNED_H +#define __ASM_ARM_UNALIGNED_H + +/* + * We generally want to set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS on ARMv6+, + * but we don't want to use linux/unaligned/access_ok.h since that can lead + * to traps on unaligned stm/ldm or strd/ldrd. + */ +#include <asm/byteorder.h> + +#if defined(__LITTLE_ENDIAN) +# include <linux/unaligned/le_struct.h> +# include <linux/unaligned/be_byteshift.h> +# include <linux/unaligned/generic.h> +# define get_unaligned __get_unaligned_le +# define put_unaligned __put_unaligned_le +#elif defined(__BIG_ENDIAN) +# include <linux/unaligned/be_struct.h> +# include <linux/unaligned/le_byteshift.h> +# include <linux/unaligned/generic.h> +# define get_unaligned __get_unaligned_be +# define put_unaligned __put_unaligned_be +#else +# error need to define endianess +#endif + +#endif /* __ASM_ARM_UNALIGNED_H */ diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c index 0064b86a2c87..30a13647c54c 100644 --- a/arch/arm/kvm/emulate.c +++ b/arch/arm/kvm/emulate.c @@ -227,7 +227,7 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu) u32 return_offset = (is_thumb) ? 2 : 4; kvm_update_psr(vcpu, UND_MODE); - *vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) - return_offset; + *vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; /* Branch to exception vector */ *vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset; @@ -239,10 +239,8 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu) */ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr) { - unsigned long cpsr = *vcpu_cpsr(vcpu); - bool is_thumb = (cpsr & PSR_T_BIT); u32 vect_offset; - u32 return_offset = (is_thumb) ? 4 : 0; + u32 return_offset = (is_pabt) ? 4 : 8; bool is_lpae; kvm_update_psr(vcpu, ABT_MODE); diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile index 5fca24d52fe6..5638ce0c9524 100644 --- a/arch/arm/kvm/hyp/Makefile +++ b/arch/arm/kvm/hyp/Makefile @@ -3,7 +3,7 @@ # Makefile for Kernel-based Virtual Machine module, HYP part # -ccflags-y += -fno-stack-protector +ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING KVM=../../../../virt/kvm diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi index ee4aff53a5f5..09c429cb6d61 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi @@ -299,7 +299,8 @@ interrupts = <0 243 4>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb0>; - clocks = <&mio_clk 7>, <&mio_clk 8>, <&mio_clk 12>; + clocks = <&sys_clk 8>, <&mio_clk 7>, <&mio_clk 8>, + <&mio_clk 12>; resets = <&sys_rst 8>, <&mio_rst 7>, <&mio_rst 8>, <&mio_rst 12>; }; @@ -311,7 +312,8 @@ interrupts = <0 244 4>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb1>; - clocks = <&mio_clk 7>, <&mio_clk 9>, <&mio_clk 13>; + clocks = <&sys_clk 8>, <&mio_clk 7>, <&mio_clk 9>, + <&mio_clk 13>; resets = <&sys_rst 8>, <&mio_rst 7>, <&mio_rst 9>, <&mio_rst 13>; }; @@ -323,7 +325,8 @@ interrupts = <0 245 4>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb2>; - clocks = <&mio_clk 7>, <&mio_clk 10>, <&mio_clk 14>; + clocks = <&sys_clk 8>, <&mio_clk 7>, <&mio_clk 10>, + <&mio_clk 14>; resets = <&sys_rst 8>, <&mio_rst 7>, <&mio_rst 10>, <&mio_rst 14>; }; diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 7c54d8fde855..f04400d494b7 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -3,7 +3,7 @@ # Makefile for Kernel-based Virtual Machine module, HYP part # -ccflags-y += -fno-stack-protector +ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING KVM=../../../../virt/kvm diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index da6a8cfa54a0..3556715a774e 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -33,12 +33,26 @@ #define LOWER_EL_AArch64_VECTOR 0x400 #define LOWER_EL_AArch32_VECTOR 0x600 +/* + * Table taken from ARMv8 ARM DDI0487B-B, table G1-10. + */ +static const u8 return_offsets[8][2] = { + [0] = { 0, 0 }, /* Reset, unused */ + [1] = { 4, 2 }, /* Undefined */ + [2] = { 0, 0 }, /* SVC, unused */ + [3] = { 4, 4 }, /* Prefetch abort */ + [4] = { 8, 8 }, /* Data abort */ + [5] = { 0, 0 }, /* HVC, unused */ + [6] = { 4, 4 }, /* IRQ, unused */ + [7] = { 4, 4 }, /* FIQ, unused */ +}; + static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) { unsigned long cpsr; unsigned long new_spsr_value = *vcpu_cpsr(vcpu); bool is_thumb = (new_spsr_value & COMPAT_PSR_T_BIT); - u32 return_offset = (is_thumb) ? 4 : 0; + u32 return_offset = return_offsets[vect_offset >> 2][is_thumb]; u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); cpsr = mode | COMPAT_PSR_I_BIT; diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h index c86a947f5368..a3d0211970e9 100644 --- a/arch/ia64/include/asm/acpi.h +++ b/arch/ia64/include/asm/acpi.h @@ -112,6 +112,8 @@ static inline void arch_acpi_set_pdc_bits(u32 *buf) buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP; } +#define acpi_unlazy_tlb(x) + #ifdef CONFIG_ACPI_NUMA extern cpumask_t early_cpu_possible_map; #define for_each_possible_early_cpu(cpu) \ diff --git a/arch/mips/generic/board-ni169445.its.S b/arch/mips/generic/board-ni169445.its.S index d12e12fe90be..e4cb4f95a8cc 100644 --- a/arch/mips/generic/board-ni169445.its.S +++ b/arch/mips/generic/board-ni169445.its.S @@ -1,4 +1,4 @@ -{ +/ { images { fdt@ni169445 { description = "NI 169445 device tree"; diff --git a/arch/mips/generic/init.c b/arch/mips/generic/init.c index cf409ba358a1..5ba6fcc26fa7 100644 --- a/arch/mips/generic/init.c +++ b/arch/mips/generic/init.c @@ -20,7 +20,7 @@ #include <asm/fw/fw.h> #include <asm/irq_cpu.h> #include <asm/machine.h> -#include <asm/mips-cpc.h> +#include <asm/mips-cps.h> #include <asm/prom.h> #include <asm/smp-ops.h> #include <asm/time.h> diff --git a/arch/mips/generic/kexec.c b/arch/mips/generic/kexec.c index e9fb735299e3..1ca409f58929 100644 --- a/arch/mips/generic/kexec.c +++ b/arch/mips/generic/kexec.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2016 Imagination Technologies - * Author: Marcin Nowakowski <marcin.nowakowski@imgtec.com> + * Author: Marcin Nowakowski <marcin.nowakowski@mips.com> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h index 3708b8ccc0b4..8bc5df49b0e1 100644 --- a/arch/mips/include/asm/mips-cm.h +++ b/arch/mips/include/asm/mips-cm.h @@ -142,8 +142,8 @@ GCR_ACCESSOR_RO(64, 0x000, config) GCR_ACCESSOR_RW(64, 0x008, base) #define CM_GCR_BASE_GCRBASE GENMASK_ULL(47, 15) #define CM_GCR_BASE_CMDEFTGT GENMASK(1, 0) -#define CM_GCR_BASE_CMDEFTGT_DISABLED 0 -#define CM_GCR_BASE_CMDEFTGT_MEM 1 +#define CM_GCR_BASE_CMDEFTGT_MEM 0 +#define CM_GCR_BASE_CMDEFTGT_RESERVED 1 #define CM_GCR_BASE_CMDEFTGT_IOCU0 2 #define CM_GCR_BASE_CMDEFTGT_IOCU1 3 diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h index 5d3563c55e0c..2161357cc68f 100644 --- a/arch/mips/include/asm/stackframe.h +++ b/arch/mips/include/asm/stackframe.h @@ -199,6 +199,10 @@ sll k0, 3 /* extract cu0 bit */ .set noreorder bltz k0, 8f + move k0, sp + .if \docfi + .cfi_register sp, k0 + .endif #ifdef CONFIG_EVA /* * Flush interAptiv's Return Prediction Stack (RPS) by writing @@ -225,10 +229,6 @@ MTC0 k0, CP0_ENTRYHI #endif .set reorder - move k0, sp - .if \docfi - .cfi_register sp, k0 - .endif /* Called from user mode, new stack. */ get_saved_sp docfi=\docfi tosp=1 8: diff --git a/arch/mips/kernel/probes-common.h b/arch/mips/kernel/probes-common.h index dd08e41134b6..d2bf77b18822 100644 --- a/arch/mips/kernel/probes-common.h +++ b/arch/mips/kernel/probes-common.h @@ -1,6 +1,6 @@ /* * Copyright (C) 2016 Imagination Technologies - * Author: Marcin Nowakowski <marcin.nowakowski@imgtec.com> + * Author: Marcin Nowakowski <marcin.nowakowski@mips.com> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c index 05295a4909f1..a2322009cac3 100644 --- a/arch/mips/kernel/smp-cmp.c +++ b/arch/mips/kernel/smp-cmp.c @@ -19,7 +19,7 @@ #undef DEBUG #include <linux/kernel.h> -#include <linux/sched.h> +#include <linux/sched/task_stack.h> #include <linux/smp.h> #include <linux/cpumask.h> #include <linux/interrupt.h> @@ -50,8 +50,8 @@ static void cmp_init_secondary(void) #ifdef CONFIG_MIPS_MT_SMP if (cpu_has_mipsmt) - c->vpe_id = (read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) & - TCBIND_CURVPE; + cpu_set_vpe_id(c, (read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) & + TCBIND_CURVPE); #endif } diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index 7d6af41888e8..ecc1a853f48d 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -306,7 +306,7 @@ static int cps_boot_secondary(int cpu, struct task_struct *idle) int err; /* We don't yet support booting CPUs in other clusters */ - if (cpu_cluster(&cpu_data[cpu]) != cpu_cluster(¤t_cpu_data)) + if (cpu_cluster(&cpu_data[cpu]) != cpu_cluster(&raw_current_cpu_data)) return -ENOSYS; vpe_cfg->pc = (unsigned long)&smp_bootstrap; diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index bbe19b64def5..88be966d3e61 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -42,7 +42,7 @@ #include <asm/processor.h> #include <asm/idle.h> #include <asm/r4k-timer.h> -#include <asm/mips-cpc.h> +#include <asm/mips-cps.h> #include <asm/mmu_context.h> #include <asm/time.h> #include <asm/setup.h> @@ -66,6 +66,7 @@ EXPORT_SYMBOL(cpu_sibling_map); cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); +static DECLARE_COMPLETION(cpu_starting); static DECLARE_COMPLETION(cpu_running); /* @@ -374,6 +375,12 @@ asmlinkage void start_secondary(void) cpumask_set_cpu(cpu, &cpu_coherent_mask); notify_cpu_starting(cpu); + /* Notify boot CPU that we're starting & ready to sync counters */ + complete(&cpu_starting); + + synchronise_count_slave(cpu); + + /* The CPU is running and counters synchronised, now mark it online */ set_cpu_online(cpu, true); set_cpu_sibling_map(cpu); @@ -381,8 +388,11 @@ asmlinkage void start_secondary(void) calculate_cpu_foreign_map(); + /* + * Notify boot CPU that we're up & online and it can safely return + * from __cpu_up + */ complete(&cpu_running); - synchronise_count_slave(cpu); /* * irq will be enabled in ->smp_finish(), enabling it too early @@ -445,17 +455,17 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) if (err) return err; - /* - * We must check for timeout here, as the CPU will not be marked - * online until the counters are synchronised. - */ - if (!wait_for_completion_timeout(&cpu_running, + /* Wait for CPU to start and be ready to sync counters */ + if (!wait_for_completion_timeout(&cpu_starting, msecs_to_jiffies(1000))) { pr_crit("CPU%u: failed to start\n", cpu); return -EIO; } synchronise_count_master(cpu); + + /* Wait for CPU to finish startup & mark itself online before return */ + wait_for_completion(&cpu_running); return 0; } diff --git a/arch/mips/mm/uasm-micromips.c b/arch/mips/mm/uasm-micromips.c index c28ff53c8da0..cdb5a191b9d5 100644 --- a/arch/mips/mm/uasm-micromips.c +++ b/arch/mips/mm/uasm-micromips.c @@ -80,7 +80,7 @@ static const struct insn const insn_table_MM[insn_invalid] = { [insn_jr] = {M(mm_pool32a_op, 0, 0, 0, mm_jalr_op, mm_pool32axf_op), RS}, [insn_lb] = {M(mm_lb32_op, 0, 0, 0, 0, 0), RT | RS | SIMM}, [insn_ld] = {0, 0}, - [insn_lh] = {M(mm_lh32_op, 0, 0, 0, 0, 0), RS | RS | SIMM}, + [insn_lh] = {M(mm_lh32_op, 0, 0, 0, 0, 0), RT | RS | SIMM}, [insn_ll] = {M(mm_pool32c_op, 0, 0, (mm_ll_func << 1), 0, 0), RS | RT | SIMM}, [insn_lld] = {0, 0}, [insn_lui] = {M(mm_pool32i_op, mm_lui_op, 0, 0, 0, 0), RS | SIMM}, diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 01b7a87ea678..962b0259b4b6 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -1513,7 +1513,7 @@ ld_skb_common: } src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); if (src < 0) - return dst; + return src; if (BPF_MODE(insn->code) == BPF_XADD) { switch (BPF_SIZE(insn->code)) { case BPF_W: diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S index 93b945597ecf..7cfba738f104 100644 --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S +++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S @@ -157,8 +157,8 @@ LABEL skip_ %I .endr # Find min length - vmovdqa _lens+0*16(state), %xmm0 - vmovdqa _lens+1*16(state), %xmm1 + vmovdqu _lens+0*16(state), %xmm0 + vmovdqu _lens+1*16(state), %xmm1 vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} @@ -178,8 +178,8 @@ LABEL skip_ %I vpsubd %xmm2, %xmm0, %xmm0 vpsubd %xmm2, %xmm1, %xmm1 - vmovdqa %xmm0, _lens+0*16(state) - vmovdqa %xmm1, _lens+1*16(state) + vmovdqu %xmm0, _lens+0*16(state) + vmovdqu %xmm1, _lens+1*16(state) # "state" and "args" are the same address, arg1 # len is arg2 @@ -235,8 +235,8 @@ ENTRY(sha1_mb_mgr_get_comp_job_avx2) jc .return_null # Find min length - vmovdqa _lens(state), %xmm0 - vmovdqa _lens+1*16(state), %xmm1 + vmovdqu _lens(state), %xmm0 + vmovdqu _lens+1*16(state), %xmm1 vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S index 8fe6338bcc84..16c4ccb1f154 100644 --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S +++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S @@ -155,8 +155,8 @@ LABEL skip_ %I .endr # Find min length - vmovdqa _lens+0*16(state), %xmm0 - vmovdqa _lens+1*16(state), %xmm1 + vmovdqu _lens+0*16(state), %xmm0 + vmovdqu _lens+1*16(state), %xmm1 vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} @@ -176,8 +176,8 @@ LABEL skip_ %I vpsubd %xmm2, %xmm0, %xmm0 vpsubd %xmm2, %xmm1, %xmm1 - vmovdqa %xmm0, _lens+0*16(state) - vmovdqa %xmm1, _lens+1*16(state) + vmovdqu %xmm0, _lens+0*16(state) + vmovdqu %xmm1, _lens+1*16(state) # "state" and "args" are the same address, arg1 # len is arg2 @@ -234,8 +234,8 @@ ENTRY(sha256_mb_mgr_get_comp_job_avx2) jc .return_null # Find min length - vmovdqa _lens(state), %xmm0 - vmovdqa _lens+1*16(state), %xmm1 + vmovdqu _lens(state), %xmm0 + vmovdqu _lens+1*16(state), %xmm1 vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 72d867f6b518..8d0ec9df1cbe 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -150,6 +150,8 @@ static inline void disable_acpi(void) { } extern int x86_acpi_numa_init(void); #endif /* CONFIG_ACPI_NUMA */ +#define acpi_unlazy_tlb(x) leave_mm(x) + #ifdef CONFIG_ACPI_APEI static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr) { diff --git a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c index 10cec43aac38..7f85b76f43bc 100644 --- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c +++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c @@ -24,14 +24,6 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex); static char mce_helper[128]; static char *mce_helper_argv[2] = { mce_helper, NULL }; -#define mce_log_get_idx_check(p) \ -({ \ - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ - !lockdep_is_held(&mce_chrdev_read_mutex), \ - "suspicious mce_log_get_idx_check() usage"); \ - smp_load_acquire(&(p)); \ -}) - /* * Lockless MCE logging infrastructure. * This avoids deadlocks on printk locks without having to break locks. Also @@ -53,43 +45,32 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val, void *data) { struct mce *mce = (struct mce *)data; - unsigned int next, entry; - - wmb(); - for (;;) { - entry = mce_log_get_idx_check(mcelog.next); - for (;;) { - - /* - * When the buffer fills up discard new entries. - * Assume that the earlier errors are the more - * interesting ones: - */ - if (entry >= MCE_LOG_LEN) { - set_bit(MCE_OVERFLOW, - (unsigned long *)&mcelog.flags); - return NOTIFY_OK; - } - /* Old left over entry. Skip: */ - if (mcelog.entry[entry].finished) { - entry++; - continue; - } - break; - } - smp_rmb(); - next = entry + 1; - if (cmpxchg(&mcelog.next, entry, next) == entry) - break; + unsigned int entry; + + mutex_lock(&mce_chrdev_read_mutex); + + entry = mcelog.next; + + /* + * When the buffer fills up discard new entries. Assume that the + * earlier errors are the more interesting ones: + */ + if (entry >= MCE_LOG_LEN) { + set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags); + goto unlock; } + + mcelog.next = entry + 1; + memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); - wmb(); mcelog.entry[entry].finished = 1; - wmb(); /* wake processes polling /dev/mcelog */ wake_up_interruptible(&mce_chrdev_wait); +unlock: + mutex_unlock(&mce_chrdev_read_mutex); + return NOTIFY_OK; } @@ -177,13 +158,6 @@ static int mce_chrdev_release(struct inode *inode, struct file *file) return 0; } -static void collect_tscs(void *data) -{ - unsigned long *cpu_tsc = (unsigned long *)data; - - cpu_tsc[smp_processor_id()] = rdtsc(); -} - static int mce_apei_read_done; /* Collect MCE record of previous boot in persistent storage via APEI ERST. */ @@ -231,14 +205,9 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf, size_t usize, loff_t *off) { char __user *buf = ubuf; - unsigned long *cpu_tsc; - unsigned prev, next; + unsigned next; int i, err; - cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL); - if (!cpu_tsc) - return -ENOMEM; - mutex_lock(&mce_chrdev_read_mutex); if (!mce_apei_read_done) { @@ -247,65 +216,29 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf, goto out; } - next = mce_log_get_idx_check(mcelog.next); - /* Only supports full reads right now */ err = -EINVAL; if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) goto out; + next = mcelog.next; err = 0; - prev = 0; - do { - for (i = prev; i < next; i++) { - unsigned long start = jiffies; - struct mce *m = &mcelog.entry[i]; - - while (!m->finished) { - if (time_after_eq(jiffies, start + 2)) { - memset(m, 0, sizeof(*m)); - goto timeout; - } - cpu_relax(); - } - smp_rmb(); - err |= copy_to_user(buf, m, sizeof(*m)); - buf += sizeof(*m); -timeout: - ; - } - - memset(mcelog.entry + prev, 0, - (next - prev) * sizeof(struct mce)); - prev = next; - next = cmpxchg(&mcelog.next, prev, 0); - } while (next != prev); - - synchronize_sched(); - /* - * Collect entries that were still getting written before the - * synchronize. - */ - on_each_cpu(collect_tscs, cpu_tsc, 1); - - for (i = next; i < MCE_LOG_LEN; i++) { + for (i = 0; i < next; i++) { struct mce *m = &mcelog.entry[i]; - if (m->finished && m->tsc < cpu_tsc[m->cpu]) { - err |= copy_to_user(buf, m, sizeof(*m)); - smp_rmb(); - buf += sizeof(*m); - memset(m, 0, sizeof(*m)); - } + err |= copy_to_user(buf, m, sizeof(*m)); + buf += sizeof(*m); } + memset(mcelog.entry, 0, next * sizeof(struct mce)); + mcelog.next = 0; + if (err) err = -EFAULT; out: mutex_unlock(&mce_chrdev_read_mutex); - kfree(cpu_tsc); return err ? err : buf - ubuf; } diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index d88967659098..5b609e28ce3f 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -79,7 +79,7 @@ static void kvm_get_wallclock(struct timespec *now) static int kvm_set_wallclock(const struct timespec *now) { - return -1; + return -ENODEV; } static u64 kvm_clock_read(void) diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 62e7d70aadd5..da0c160e5589 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -172,19 +172,27 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_X86_64_NONE: break; case R_X86_64_64: + if (*(u64 *)loc != 0) + goto invalid_relocation; *(u64 *)loc = val; break; case R_X86_64_32: + if (*(u32 *)loc != 0) + goto invalid_relocation; *(u32 *)loc = val; if (val != *(u32 *)loc) goto overflow; break; case R_X86_64_32S: + if (*(s32 *)loc != 0) + goto invalid_relocation; *(s32 *)loc = val; if ((s64)val != *(s32 *)loc) goto overflow; break; case R_X86_64_PC32: + if (*(u32 *)loc != 0) + goto invalid_relocation; val -= (u64)loc; *(u32 *)loc = val; #if 0 @@ -200,6 +208,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, } return 0; +invalid_relocation: + pr_err("x86/modules: Skipping invalid relocation target, existing value is nonzero for type %d, loc %p, val %Lx\n", + (int)ELF64_R_TYPE(rel[i].r_info), loc, val); + return -ENOEXEC; + overflow: pr_err("overflow in relocation type %d val %Lx\n", (int)ELF64_R_TYPE(rel[i].r_info), val); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 69c5612be786..36c90d631096 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1992,6 +1992,11 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP); vcpu->arch.pv_eoi.msr_val = 0; apic_update_ppr(apic); + if (vcpu->arch.apicv_active) { + kvm_x86_ops->apicv_post_state_restore(vcpu); + kvm_x86_ops->hwapic_irr_update(vcpu, -1); + kvm_x86_ops->hwapic_isr_update(vcpu, -1); + } vcpu->arch.apic_arb_prio = 0; vcpu->arch.apic_attention = 0; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 95a01609d7ee..a6f4f095f8f4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5619,9 +5619,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); - if (kvm_vcpu_apicv_active(vcpu)) - memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); - if (vmx->vpid != 0) vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 16c5f37933a2..0286327e65fa 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -40,7 +40,7 @@ static char sme_cmdline_off[] __initdata = "off"; * section is later cleared. */ u64 sme_me_mask __section(.data) = 0; -EXPORT_SYMBOL_GPL(sme_me_mask); +EXPORT_SYMBOL(sme_me_mask); /* Buffer used for early in-place encryption by BSP, no locking needed */ static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE); diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 0f3d0cea4d00..3118392cdf75 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -85,6 +85,7 @@ void leave_mm(int cpu) switch_mm(NULL, &init_mm, NULL); } +EXPORT_SYMBOL_GPL(leave_mm); void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) @@ -195,12 +196,22 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); write_cr3(build_cr3(next, new_asid)); - trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, - TLB_FLUSH_ALL); + + /* + * NB: This gets called via leave_mm() in the idle path + * where RCU functions differently. Tracing normally + * uses RCU, so we need to use the _rcuidle variant. + * + * (There is no good reason for this. The idle code should + * be rearranged to call this before rcu_idle_enter().) + */ + trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); } else { /* The new ASID is already up to date. */ write_cr3(build_cr3_noflush(next, new_asid)); - trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0); + + /* See above wrt _rcuidle. */ + trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); } this_cpu_write(cpu_tlbstate.loaded_mm, next); diff --git a/crypto/ccm.c b/crypto/ccm.c index 1ce37ae0ce56..0a083342ec8c 100644 --- a/crypto/ccm.c +++ b/crypto/ccm.c @@ -363,7 +363,7 @@ static int crypto_ccm_decrypt(struct aead_request *req) unsigned int cryptlen = req->cryptlen; u8 *authtag = pctx->auth_tag; u8 *odata = pctx->odata; - u8 *iv = req->iv; + u8 *iv = pctx->idata; int err; cryptlen -= authsize; @@ -379,6 +379,8 @@ static int crypto_ccm_decrypt(struct aead_request *req) if (req->src != req->dst) dst = pctx->dst; + memcpy(iv, req->iv, 16); + skcipher_request_set_tfm(skreq, ctx->ctr); skcipher_request_set_callback(skreq, pctx->flags, crypto_ccm_decrypt_done, req); diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 2736e25e9dc6..d50a7b6ccddd 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -710,6 +710,8 @@ static DEFINE_RAW_SPINLOCK(c3_lock); static void acpi_idle_enter_bm(struct acpi_processor *pr, struct acpi_processor_cx *cx, bool timer_bc) { + acpi_unlazy_tlb(smp_processor_id()); + /* * Must be done before busmaster disable as we might need to * access HPET ! diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 6804ddab3052..8082871b409a 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -160,6 +160,14 @@ static int __init init_nvs_nosave(const struct dmi_system_id *d) return 0; } +static bool acpi_sleep_no_lps0; + +static int __init init_no_lps0(const struct dmi_system_id *d) +{ + acpi_sleep_no_lps0 = true; + return 0; +} + static const struct dmi_system_id acpisleep_dmi_table[] __initconst = { { .callback = init_old_suspend_ordering, @@ -343,6 +351,19 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "80E3"), }, }, + /* + * https://bugzilla.kernel.org/show_bug.cgi?id=196907 + * Some Dell XPS13 9360 cannot do suspend-to-idle using the Low Power + * S0 Idle firmware interface. + */ + { + .callback = init_no_lps0, + .ident = "Dell XPS13 9360", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "XPS 13 9360"), + }, + }, {}, }; @@ -485,6 +506,7 @@ static void acpi_pm_end(void) } #else /* !CONFIG_ACPI_SLEEP */ #define acpi_target_sleep_state ACPI_STATE_S0 +#define acpi_sleep_no_lps0 (false) static inline void acpi_sleep_dmi_check(void) {} #endif /* CONFIG_ACPI_SLEEP */ @@ -863,6 +885,12 @@ static int lps0_device_attach(struct acpi_device *adev, if (lps0_device_handle) return 0; + if (acpi_sleep_no_lps0) { + acpi_handle_info(adev->handle, + "Low Power S0 Idle interface disabled\n"); + return 0; + } + if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) return 0; diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 2371a92808be..adaa4a964f0c 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -34,13 +34,14 @@ lib-y := efi-stub-helper.o gop.o secureboot.o lib-$(CONFIG_RESET_ATTACK_MITIGATION) += tpm.o # include the stub's generic dependencies from lib/ when building for ARM/arm64 -arm-deps := fdt_rw.c fdt_ro.c fdt_wip.c fdt.c fdt_empty_tree.c fdt_sw.c sort.c +arm-deps-y := fdt_rw.c fdt_ro.c fdt_wip.c fdt.c fdt_empty_tree.c fdt_sw.c +arm-deps-$(CONFIG_ARM64) += sort.c $(obj)/lib-%.o: $(srctree)/lib/%.c FORCE $(call if_changed_rule,cc_o_c) lib-$(CONFIG_EFI_ARMSTUB) += arm-stub.o fdt.o string.o random.o \ - $(patsubst %.c,lib-%.o,$(arm-deps)) + $(patsubst %.c,lib-%.o,$(arm-deps-y)) lib-$(CONFIG_ARM) += arm32-stub.o lib-$(CONFIG_ARM64) += arm64-stub.o @@ -91,5 +92,4 @@ quiet_cmd_stubcopy = STUBCPY $@ # explicitly by the decompressor linker script. # STUBCOPY_FLAGS-$(CONFIG_ARM) += --rename-section .data=.data.efistub -STUBCOPY_RM-$(CONFIG_ARM) += -R ___ksymtab+sort -R ___kcrctab+sort STUBCOPY_RELOC-$(CONFIG_ARM) := R_ARM_ABS diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index a94601d5939e..01a9d78ee415 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -350,7 +350,9 @@ void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size, * The easiest way to find adjacent regions is to sort the memory map * before traversing it. */ - sort(memory_map, map_size / desc_size, desc_size, cmp_mem_desc, NULL); + if (IS_ENABLED(CONFIG_ARM64)) + sort(memory_map, map_size / desc_size, desc_size, cmp_mem_desc, + NULL); for (l = 0; l < map_size; l += desc_size, prev = in) { u64 paddr, size; @@ -367,7 +369,8 @@ void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size, * a 4k page size kernel to kexec a 64k page size kernel and * vice versa. */ - if (!regions_are_adjacent(prev, in) || + if ((IS_ENABLED(CONFIG_ARM64) && + !regions_are_adjacent(prev, in)) || !regions_have_compatible_memory_type_attrs(prev, in)) { paddr = round_down(in->phys_addr, SZ_64K); diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index a7355ab3bb22..6ff0be8cbdc9 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -867,11 +867,16 @@ static void msf_from_bcd(struct atapi_msf *msf) int cdrom_check_status(ide_drive_t *drive, struct request_sense *sense) { struct cdrom_info *info = drive->driver_data; - struct cdrom_device_info *cdi = &info->devinfo; + struct cdrom_device_info *cdi; unsigned char cmd[BLK_MAX_CDB]; ide_debug_log(IDE_DBG_FUNC, "enter"); + if (!info) + return -EIO; + + cdi = &info->devinfo; + memset(cmd, 0, BLK_MAX_CDB); cmd[0] = GPCMD_TEST_UNIT_READY; diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 5dc7ea4b6bc4..f0b06b14e782 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -913,15 +913,16 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state = &drv->states[index]; unsigned long eax = flg2MWAIT(state->flags); unsigned int cstate; + int cpu = smp_processor_id(); cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1; /* - * NB: if CPUIDLE_FLAG_TLB_FLUSHED is set, this idle transition - * will probably flush the TLB. It's not guaranteed to flush - * the TLB, though, so it's not clear that we can do anything - * useful with this knowledge. + * leave_mm() to avoid costly and often unnecessary wakeups + * for flushing the user TLB's associated with the active mm. */ + if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED) + leave_mm(cpu); if (!(lapic_timer_reliable_states & (1 << (cstate)))) tick_broadcast_enter(); diff --git a/drivers/input/sparse-keymap.c b/drivers/input/sparse-keymap.c index bb0349fa64bc..fd03e55768c9 100644 --- a/drivers/input/sparse-keymap.c +++ b/drivers/input/sparse-keymap.c @@ -255,6 +255,7 @@ void sparse_keymap_report_entry(struct input_dev *dev, const struct key_entry *k case KE_VSW: input_report_switch(dev, ke->sw.code, value); + input_sync(dev); break; } } diff --git a/drivers/input/touchscreen/ar1021_i2c.c b/drivers/input/touchscreen/ar1021_i2c.c index f9dcbd63e598..b35b640fdadf 100644 --- a/drivers/input/touchscreen/ar1021_i2c.c +++ b/drivers/input/touchscreen/ar1021_i2c.c @@ -117,6 +117,7 @@ static int ar1021_i2c_probe(struct i2c_client *client, input->open = ar1021_i2c_open; input->close = ar1021_i2c_close; + __set_bit(INPUT_PROP_DIRECT, input->propbit); input_set_capability(input, EV_KEY, BTN_TOUCH); input_set_abs_params(input, ABS_X, 0, AR1021_MAX_X, 0, 0); input_set_abs_params(input, ABS_Y, 0, AR1021_MAX_Y, 0, 0); diff --git a/drivers/irqchip/irq-mvebu-gicp.c b/drivers/irqchip/irq-mvebu-gicp.c index b283fc90be1e..17a4a7b6cdbb 100644 --- a/drivers/irqchip/irq-mvebu-gicp.c +++ b/drivers/irqchip/irq-mvebu-gicp.c @@ -194,6 +194,7 @@ static int mvebu_gicp_probe(struct platform_device *pdev) return -ENOMEM; gicp->dev = &pdev->dev; + spin_lock_init(&gicp->spi_lock); gicp->res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!gicp->res) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 18b58e1376f1..99a3b0cd5bd6 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2046,6 +2046,7 @@ static int bond_miimon_inspect(struct bonding *bond) bond_for_each_slave_rcu(bond, slave, iter) { slave->new_link = BOND_LINK_NOCHANGE; + slave->link_new_state = slave->link; link_state = bond_check_dev_link(bond, slave->dev, 0); @@ -3267,7 +3268,7 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) hash ^= (hash >> 16); hash ^= (hash >> 8); - return hash; + return hash >> 1; } /*-------------------------- Device entry points ----------------------------*/ diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index c4afc8f1a66d..320651a57c6f 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -284,7 +284,7 @@ static int lan9303_indirect_phy_wait_for_completion(struct lan9303 *chip) } if (!(reg & LAN9303_PMI_ACCESS_MII_BUSY)) return 0; - msleep(1); + usleep_range(1000, 2000); } return -EIO; @@ -376,7 +376,7 @@ static int lan9303_switch_wait_for_completion(struct lan9303 *chip) } if (!(reg & LAN9303_SWITCH_CSR_CMD_BUSY)) return 0; - msleep(1); + usleep_range(1000, 2000); } return -EIO; @@ -1057,17 +1057,7 @@ static int lan9303_port_enable(struct dsa_switch *ds, int port, { struct lan9303 *chip = ds->priv; - /* enable internal packet processing */ - switch (port) { - case 1: - case 2: - return lan9303_enable_processing_port(chip, port); - default: - dev_dbg(chip->dev, - "Error: request to power up invalid port %d\n", port); - } - - return -ENODEV; + return lan9303_enable_processing_port(chip, port); } static void lan9303_port_disable(struct dsa_switch *ds, int port, @@ -1075,18 +1065,9 @@ static void lan9303_port_disable(struct dsa_switch *ds, int port, { struct lan9303 *chip = ds->priv; - /* disable internal packet processing */ - switch (port) { - case 1: - case 2: - lan9303_disable_processing_port(chip, port); - lan9303_phy_write(ds, chip->phy_addr_sel_strap + port, - MII_BMCR, BMCR_PDOWN); - break; - default: - dev_dbg(chip->dev, - "Error: request to power down invalid port %d\n", port); - } + lan9303_disable_processing_port(chip, port); + lan9303_phy_write(ds, chip->phy_addr_sel_strap + port, + MII_BMCR, BMCR_PDOWN); } static int lan9303_port_bridge_join(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/lan9303_i2c.c b/drivers/net/dsa/lan9303_i2c.c index 24ec20f7f444..909a7e864246 100644 --- a/drivers/net/dsa/lan9303_i2c.c +++ b/drivers/net/dsa/lan9303_i2c.c @@ -50,7 +50,7 @@ static int lan9303_i2c_probe(struct i2c_client *client, return -ENOMEM; sw_dev->chip.regmap = devm_regmap_init_i2c(client, - &lan9303_i2c_regmap_config); + &lan9303_i2c_regmap_config); if (IS_ERR(sw_dev->chip.regmap)) { ret = PTR_ERR(sw_dev->chip.regmap); dev_err(&client->dev, "Failed to allocate register map: %d\n", diff --git a/drivers/net/dsa/lan9303_mdio.c b/drivers/net/dsa/lan9303_mdio.c index 0bc56b9900f9..cc9c2ea1c4fe 100644 --- a/drivers/net/dsa/lan9303_mdio.c +++ b/drivers/net/dsa/lan9303_mdio.c @@ -116,7 +116,7 @@ static int lan9303_mdio_probe(struct mdio_device *mdiodev) return -ENOMEM; sw_dev->chip.regmap = devm_regmap_init(&mdiodev->dev, NULL, sw_dev, - &lan9303_mdio_regmap_config); + &lan9303_mdio_regmap_config); if (IS_ERR(sw_dev->chip.regmap)) { ret = PTR_ERR(sw_dev->chip.regmap); dev_err(&mdiodev->dev, "regmap init failed: %d\n", ret); diff --git a/drivers/net/ethernet/3com/3c574_cs.c b/drivers/net/ethernet/3com/3c574_cs.c index 48bc7fa0258c..3044a6f35f04 100644 --- a/drivers/net/ethernet/3com/3c574_cs.c +++ b/drivers/net/ethernet/3com/3c574_cs.c @@ -1046,6 +1046,7 @@ static int el3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) switch(cmd) { case SIOCGMIIPHY: /* Get the address of the PHY in use. */ data->phy_id = phy; + /* fall through */ case SIOCGMIIREG: /* Read the specified MII register. */ { int saved_window; diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c index eae9827035dc..bcad4a7fac9f 100644 --- a/drivers/net/ethernet/8390/pcnet_cs.c +++ b/drivers/net/ethernet/8390/pcnet_cs.c @@ -1107,6 +1107,7 @@ static int ei_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) switch (cmd) { case SIOCGMIIPHY: data->phy_id = info->phy_id; + /* fall through */ case SIOCGMIIREG: /* Read MII PHY register. */ data->val_out = mdio_read(mii_addr, data->phy_id, data->reg_num & 0x1f); return 0; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 3d53153ce751..a74a8fbad53a 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -2206,7 +2206,7 @@ static int xgbe_setup_tc(struct net_device *netdev, enum tc_setup_type type, struct tc_mqprio_qopt *mqprio = type_data; u8 tc; - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c index d937083db9a4..894eda5b13cf 100644 --- a/drivers/net/ethernet/broadcom/bgmac-platform.c +++ b/drivers/net/ethernet/broadcom/bgmac-platform.c @@ -131,6 +131,7 @@ static void bgmac_nicpm_speed_set(struct net_device *net_dev) switch (bgmac->net_dev->phydev->speed) { default: netdev_err(net_dev, "Unsupported speed. Defaulting to 1000Mb\n"); + /* fall through */ case SPEED_1000: val |= NICPM_IOMUX_CTRL_SPD_1000M << NICPM_IOMUX_CTRL_SPD_SHIFT; break; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 1216c1f1e052..4c739d5355d2 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -4289,7 +4289,7 @@ int __bnx2x_setup_tc(struct net_device *dev, enum tc_setup_type type, { struct tc_mqprio_qopt *mqprio = type_data; - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 96416f5d97f3..33c49ad697e4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4915,16 +4915,14 @@ hwrm_ver_get_exit: int bnxt_hwrm_fw_set_time(struct bnxt *bp) { -#if IS_ENABLED(CONFIG_RTC_LIB) struct hwrm_fw_set_time_input req = {0}; - struct rtc_time tm; - struct timeval tv; + struct tm tm; + time64_t now = ktime_get_real_seconds(); if (bp->hwrm_spec_code < 0x10400) return -EOPNOTSUPP; - do_gettimeofday(&tv); - rtc_time_to_tm(tv.tv_sec, &tm); + time64_to_tm(now, 0, &tm); bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_SET_TIME, -1, -1); req.year = cpu_to_le16(1900 + tm.tm_year); req.month = 1 + tm.tm_mon; @@ -4933,9 +4931,6 @@ int bnxt_hwrm_fw_set_time(struct bnxt *bp) req.minute = tm.tm_min; req.second = tm.tm_sec; return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); -#else - return -EOPNOTSUPP; -#endif } static int bnxt_hwrm_port_qstats(struct bnxt *bp) @@ -7388,7 +7383,7 @@ static int bnxt_setup_tc(struct net_device *dev, enum tc_setup_type type, switch (type) { case TC_SETUP_BLOCK: return bnxt_setup_tc_block(dev, type_data); - case TC_SETUP_MQPRIO: { + case TC_SETUP_QDISC_MQPRIO: { struct tc_mqprio_qopt *mqprio = type_data; mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c index b6aa7db99705..69186d188c43 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c @@ -148,7 +148,6 @@ static int bnxt_vf_rep_setup_tc_block(struct net_device *dev, return tcf_block_cb_register(f->block, bnxt_vf_rep_setup_tc_block_cb, vf_rep, vf_rep); - return 0; case TC_BLOCK_UNBIND: tcf_block_cb_unregister(f->block, bnxt_vf_rep_setup_tc_block_cb, vf_rep); diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 5dafcde67e45..72a67f74b97b 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -611,6 +611,9 @@ static int macb_mii_init(struct macb *bp) err_out_unregister_bus: mdiobus_unregister(bp->mii_bus); err_out_free_mdiobus: + of_node_put(bp->phy_node); + if (np && of_phy_is_fixed_link(np)) + of_phy_deregister_fixed_link(np); mdiobus_free(bp->mii_bus); err_out: return err; @@ -3550,6 +3553,9 @@ static int macb_probe(struct platform_device *pdev) err_out_unregister_mdio: phy_disconnect(dev->phydev); mdiobus_unregister(bp->mii_bus); + of_node_put(bp->phy_node); + if (np && of_phy_is_fixed_link(np)) + of_phy_deregister_fixed_link(np); mdiobus_free(bp->mii_bus); /* Shutdown the PHY if there is a GPIO reset */ @@ -3572,6 +3578,7 @@ static int macb_remove(struct platform_device *pdev) { struct net_device *dev; struct macb *bp; + struct device_node *np = pdev->dev.of_node; dev = platform_get_drvdata(pdev); @@ -3580,6 +3587,8 @@ static int macb_remove(struct platform_device *pdev) if (dev->phydev) phy_disconnect(dev->phydev); mdiobus_unregister(bp->mii_bus); + if (np && of_phy_is_fixed_link(np)) + of_phy_deregister_fixed_link(np); dev->phydev = NULL; mdiobus_free(bp->mii_bus); diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 988c06a28e5e..8f1dd55b3e08 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -361,17 +361,8 @@ static void nic_set_lmac_vf_mapping(struct nicpf *nic) } } -static void nic_free_lmacmem(struct nicpf *nic) +static void nic_get_hw_info(struct nicpf *nic) { - kfree(nic->vf_lmac_map); - kfree(nic->link); - kfree(nic->duplex); - kfree(nic->speed); -} - -static int nic_get_hw_info(struct nicpf *nic) -{ - u8 max_lmac; u16 sdevid; struct hw_info *hw = nic->hw; @@ -419,41 +410,16 @@ static int nic_get_hw_info(struct nicpf *nic) break; } hw->tl4_cnt = MAX_QUEUES_PER_QSET * pci_sriov_get_totalvfs(nic->pdev); - - /* Allocate memory for LMAC tracking elements */ - max_lmac = hw->bgx_cnt * MAX_LMAC_PER_BGX; - nic->vf_lmac_map = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL); - if (!nic->vf_lmac_map) - goto error; - nic->link = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL); - if (!nic->link) - goto error; - nic->duplex = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL); - if (!nic->duplex) - goto error; - nic->speed = kmalloc_array(max_lmac, sizeof(u32), GFP_KERNEL); - if (!nic->speed) - goto error; - return 0; - -error: - nic_free_lmacmem(nic); - return -ENOMEM; } #define BGX0_BLOCK 8 #define BGX1_BLOCK 9 -static int nic_init_hw(struct nicpf *nic) +static void nic_init_hw(struct nicpf *nic) { - int i, err; + int i; u64 cqm_cfg; - /* Get HW capability info */ - err = nic_get_hw_info(nic); - if (err) - return err; - /* Enable NIC HW block */ nic_reg_write(nic, NIC_PF_CFG, 0x3); @@ -498,8 +464,6 @@ static int nic_init_hw(struct nicpf *nic) cqm_cfg = nic_reg_read(nic, NIC_PF_CQM_CFG); if (cqm_cfg < NICPF_CQM_MIN_DROP_LEVEL) nic_reg_write(nic, NIC_PF_CQM_CFG, NICPF_CQM_MIN_DROP_LEVEL); - - return 0; } /* Channel parse index configuration */ @@ -1269,6 +1233,7 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct device *dev = &pdev->dev; struct nicpf *nic; + u8 max_lmac; int err; BUILD_BUG_ON(sizeof(union nic_mbx) > 16); @@ -1278,10 +1243,8 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return -ENOMEM; nic->hw = devm_kzalloc(dev, sizeof(struct hw_info), GFP_KERNEL); - if (!nic->hw) { - devm_kfree(dev, nic); + if (!nic->hw) return -ENOMEM; - } pci_set_drvdata(pdev, nic); @@ -1322,11 +1285,33 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) nic->node = nic_get_node_id(pdev); - /* Initialize hardware */ - err = nic_init_hw(nic); - if (err) + /* Get HW capability info */ + nic_get_hw_info(nic); + + /* Allocate memory for LMAC tracking elements */ + err = -ENOMEM; + max_lmac = nic->hw->bgx_cnt * MAX_LMAC_PER_BGX; + + nic->vf_lmac_map = devm_kmalloc_array(dev, max_lmac, sizeof(u8), + GFP_KERNEL); + if (!nic->vf_lmac_map) + goto err_release_regions; + + nic->link = devm_kmalloc_array(dev, max_lmac, sizeof(u8), GFP_KERNEL); + if (!nic->link) goto err_release_regions; + nic->duplex = devm_kmalloc_array(dev, max_lmac, sizeof(u8), GFP_KERNEL); + if (!nic->duplex) + goto err_release_regions; + + nic->speed = devm_kmalloc_array(dev, max_lmac, sizeof(u32), GFP_KERNEL); + if (!nic->speed) + goto err_release_regions; + + /* Initialize hardware */ + nic_init_hw(nic); + nic_set_lmac_vf_mapping(nic); /* Register interrupts */ @@ -1360,9 +1345,6 @@ err_unregister_interrupts: err_release_regions: pci_release_regions(pdev); err_disable_device: - nic_free_lmacmem(nic); - devm_kfree(dev, nic->hw); - devm_kfree(dev, nic); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); return err; @@ -1384,10 +1366,6 @@ static void nic_remove(struct pci_dev *pdev) nic_unregister_interrupts(nic); pci_release_regions(pdev); - nic_free_lmacmem(nic); - devm_kfree(&pdev->dev, nic->hw); - devm_kfree(&pdev->dev, nic); - pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); } diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h index f2d623a7aee0..123e2c1b65f5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h @@ -37,7 +37,7 @@ #define T4FW_VERSION_MAJOR 0x01 #define T4FW_VERSION_MINOR 0x10 -#define T4FW_VERSION_MICRO 0x2D +#define T4FW_VERSION_MICRO 0x3F #define T4FW_VERSION_BUILD 0x00 #define T4FW_MIN_VERSION_MAJOR 0x01 @@ -46,7 +46,7 @@ #define T5FW_VERSION_MAJOR 0x01 #define T5FW_VERSION_MINOR 0x10 -#define T5FW_VERSION_MICRO 0x2D +#define T5FW_VERSION_MICRO 0x3F #define T5FW_VERSION_BUILD 0x00 #define T5FW_MIN_VERSION_MAJOR 0x00 @@ -55,7 +55,7 @@ #define T6FW_VERSION_MAJOR 0x01 #define T6FW_VERSION_MINOR 0x10 -#define T6FW_VERSION_MICRO 0x2D +#define T6FW_VERSION_MICRO 0x3F #define T6FW_VERSION_BUILD 0x00 #define T6FW_MIN_VERSION_MAJOR 0x00 diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index ebc55b6a6349..7caa8da48421 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -351,7 +351,7 @@ static int dpaa_setup_tc(struct net_device *net_dev, enum tc_setup_type type, u8 num_tc; int i; - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; @@ -2728,11 +2728,11 @@ static int dpaa_eth_probe(struct platform_device *pdev) /* bp init */ for (i = 0; i < DPAA_BPS_NUM; i++) { - int err; - dpaa_bps[i] = dpaa_bp_alloc(dev); - if (IS_ERR(dpaa_bps[i])) + if (IS_ERR(dpaa_bps[i])) { + err = PTR_ERR(dpaa_bps[i]); goto free_dpaa_bps; + } /* the raw size of the buffers used for reception */ dpaa_bps[i]->raw_size = bpool_buffer_raw_size(i, DPAA_BPS_NUM); /* avoid runtime computations by keeping the usable size here */ diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 44720f83af27..5385074b3b7d 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -583,12 +583,11 @@ struct fec_enet_private { u64 ethtool_stats[0]; }; -void fec_ptp_init(struct platform_device *pdev); +void fec_ptp_init(struct platform_device *pdev, int irq_idx); void fec_ptp_stop(struct platform_device *pdev); void fec_ptp_start_cyclecounter(struct net_device *ndev); int fec_ptp_set(struct net_device *ndev, struct ifreq *ifr); int fec_ptp_get(struct net_device *ndev, struct ifreq *ifr); -uint fec_ptp_check_pps_event(struct fec_enet_private *fep); /****************************************************************************/ #endif /* FEC_H */ diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 3dc2d771a222..610573855213 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1602,10 +1602,6 @@ fec_enet_interrupt(int irq, void *dev_id) ret = IRQ_HANDLED; complete(&fep->mdio_done); } - - if (fep->ptp_clock) - if (fec_ptp_check_pps_event(fep)) - ret = IRQ_HANDLED; return ret; } @@ -3312,6 +3308,19 @@ fec_enet_get_queue_num(struct platform_device *pdev, int *num_tx, int *num_rx) } +static int fec_enet_get_irq_cnt(struct platform_device *pdev) +{ + int irq_cnt = platform_irq_count(pdev); + + if (irq_cnt > FEC_IRQ_NUM) + irq_cnt = FEC_IRQ_NUM; /* last for pps */ + else if (irq_cnt == 2) + irq_cnt = 1; /* last for pps */ + else if (irq_cnt <= 0) + irq_cnt = 1; /* At least 1 irq is needed */ + return irq_cnt; +} + static int fec_probe(struct platform_device *pdev) { @@ -3325,6 +3334,8 @@ fec_probe(struct platform_device *pdev) struct device_node *np = pdev->dev.of_node, *phy_node; int num_tx_qs; int num_rx_qs; + char irq_name[8]; + int irq_cnt; fec_enet_get_queue_num(pdev, &num_tx_qs, &num_rx_qs); @@ -3465,18 +3476,20 @@ fec_probe(struct platform_device *pdev) if (ret) goto failed_reset; + irq_cnt = fec_enet_get_irq_cnt(pdev); if (fep->bufdesc_ex) - fec_ptp_init(pdev); + fec_ptp_init(pdev, irq_cnt); ret = fec_enet_init(ndev); if (ret) goto failed_init; - for (i = 0; i < FEC_IRQ_NUM; i++) { - irq = platform_get_irq(pdev, i); + for (i = 0; i < irq_cnt; i++) { + sprintf(irq_name, "int%d", i); + irq = platform_get_irq_byname(pdev, irq_name); + if (irq < 0) + irq = platform_get_irq(pdev, i); if (irq < 0) { - if (i) - break; ret = irq; goto failed_irq; } diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index 6ebad3fac81d..f81439796ac7 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -549,6 +549,37 @@ static void fec_time_keep(struct work_struct *work) schedule_delayed_work(&fep->time_keep, HZ); } +/* This function checks the pps event and reloads the timer compare counter. */ +static irqreturn_t fec_pps_interrupt(int irq, void *dev_id) +{ + struct net_device *ndev = dev_id; + struct fec_enet_private *fep = netdev_priv(ndev); + u32 val; + u8 channel = fep->pps_channel; + struct ptp_clock_event event; + + val = readl(fep->hwp + FEC_TCSR(channel)); + if (val & FEC_T_TF_MASK) { + /* Write the next next compare(not the next according the spec) + * value to the register + */ + writel(fep->next_counter, fep->hwp + FEC_TCCR(channel)); + do { + writel(val, fep->hwp + FEC_TCSR(channel)); + } while (readl(fep->hwp + FEC_TCSR(channel)) & FEC_T_TF_MASK); + + /* Update the counter; */ + fep->next_counter = (fep->next_counter + fep->reload_period) & + fep->cc.mask; + + event.type = PTP_CLOCK_PPS; + ptp_clock_event(fep->ptp_clock, &event); + return IRQ_HANDLED; + } + + return IRQ_NONE; +} + /** * fec_ptp_init * @ndev: The FEC network adapter @@ -558,10 +589,12 @@ static void fec_time_keep(struct work_struct *work) * cyclecounter init routine and exits. */ -void fec_ptp_init(struct platform_device *pdev) +void fec_ptp_init(struct platform_device *pdev, int irq_idx) { struct net_device *ndev = platform_get_drvdata(pdev); struct fec_enet_private *fep = netdev_priv(ndev); + int irq; + int ret; fep->ptp_caps.owner = THIS_MODULE; snprintf(fep->ptp_caps.name, 16, "fec ptp"); @@ -587,6 +620,20 @@ void fec_ptp_init(struct platform_device *pdev) INIT_DELAYED_WORK(&fep->time_keep, fec_time_keep); + irq = platform_get_irq_byname(pdev, "pps"); + if (irq < 0) + irq = platform_get_irq(pdev, irq_idx); + /* Failure to get an irq is not fatal, + * only the PTP_CLOCK_PPS clock events should stop + */ + if (irq >= 0) { + ret = devm_request_irq(&pdev->dev, irq, fec_pps_interrupt, + 0, pdev->name, ndev); + if (ret < 0) + dev_warn(&pdev->dev, "request for pps irq failed(%d)\n", + ret); + } + fep->ptp_clock = ptp_clock_register(&fep->ptp_caps, &pdev->dev); if (IS_ERR(fep->ptp_clock)) { fep->ptp_clock = NULL; @@ -605,36 +652,3 @@ void fec_ptp_stop(struct platform_device *pdev) if (fep->ptp_clock) ptp_clock_unregister(fep->ptp_clock); } - -/** - * fec_ptp_check_pps_event - * @fep: the fec_enet_private structure handle - * - * This function check the pps event and reload the timer compare counter. - */ -uint fec_ptp_check_pps_event(struct fec_enet_private *fep) -{ - u32 val; - u8 channel = fep->pps_channel; - struct ptp_clock_event event; - - val = readl(fep->hwp + FEC_TCSR(channel)); - if (val & FEC_T_TF_MASK) { - /* Write the next next compare(not the next according the spec) - * value to the register - */ - writel(fep->next_counter, fep->hwp + FEC_TCCR(channel)); - do { - writel(val, fep->hwp + FEC_TCSR(channel)); - } while (readl(fep->hwp + FEC_TCSR(channel)) & FEC_T_TF_MASK); - - /* Update the counter; */ - fep->next_counter = (fep->next_counter + fep->reload_period) & fep->cc.mask; - - event.type = PTP_CLOCK_PPS; - ptp_clock_event(fep->ptp_clock, &event); - return 1; - } - - return 0; -} diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c index 1789b206be58..6552d68ea6e1 100644 --- a/drivers/net/ethernet/freescale/fman/fman_port.c +++ b/drivers/net/ethernet/freescale/fman/fman_port.c @@ -1339,8 +1339,10 @@ int fman_port_config(struct fman_port *port, struct fman_port_params *params) switch (port->port_type) { case FMAN_PORT_TYPE_RX: set_rx_dflt_cfg(port, params); + /* fall through */ case FMAN_PORT_TYPE_TX: set_tx_dflt_cfg(port, params, &port->dts_params); + /* fall through */ default: set_dflt_cfg(port, params); } diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index 1d6da1ea7bfb..88c0a0636b44 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -615,7 +615,6 @@ static int mac_probe(struct platform_device *_of_dev) mac_dev = devm_kzalloc(dev, sizeof(*mac_dev), GFP_KERNEL); if (!mac_dev) { err = -ENOMEM; - dev_err(dev, "devm_kzalloc() = %d\n", err); goto _return; } priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); @@ -709,12 +708,8 @@ static int mac_probe(struct platform_device *_of_dev) } if (!of_device_is_available(mac_node)) { - devm_iounmap(dev, priv->vaddr); - __devm_release_region(dev, fman_get_mem_region(priv->fman), - res.start, res.end + 1 - res.start); - devm_kfree(dev, mac_dev); - dev_set_drvdata(dev, NULL); - return -ENODEV; + err = -ENODEV; + goto _return_of_get_parent; } /* Get the cell-index */ @@ -825,6 +820,7 @@ static int mac_probe(struct platform_device *_of_dev) phy = of_phy_find_device(mac_dev->phy_node); if (!phy) { err = -EINVAL; + of_node_put(mac_dev->phy_node); goto _return_of_get_parent; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 844c83ea549e..ce5ed8845042 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -390,7 +390,7 @@ struct hclge_pf_res_cmd { #define HCLGE_CFG_TQP_DESC_N_S 16 #define HCLGE_CFG_TQP_DESC_N_M GENMASK(31, 16) #define HCLGE_CFG_PHY_ADDR_S 0 -#define HCLGE_CFG_PHY_ADDR_M GENMASK(4, 0) +#define HCLGE_CFG_PHY_ADDR_M GENMASK(7, 0) #define HCLGE_CFG_MEDIA_TP_S 8 #define HCLGE_CFG_MEDIA_TP_M GENMASK(15, 8) #define HCLGE_CFG_RX_BUF_LEN_S 16 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index c6ba89089ef3..781d5a8cbb6a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2325,18 +2325,7 @@ static int hclge_update_speed_duplex(struct hclge_dev *hdev) /* get the speed and duplex as autoneg'result from mac cmd when phy * doesn't exit. */ - if (mac.phydev) - return 0; - - /* update mac->antoneg. */ - ret = hclge_query_autoneg_result(hdev); - if (ret) { - dev_err(&hdev->pdev->dev, - "autoneg result query failed %d\n", ret); - return ret; - } - - if (!mac.autoneg) + if (mac.phydev || !mac.autoneg) return 0; ret = hclge_query_mac_an_speed_dup(hdev, &speed, &duplex); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index 2a0af11c9b59..59415090ff0f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -1252,7 +1252,7 @@ out: static int hns3_nic_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; return hns3_setup_tc(dev, type_data); diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index d0cff2807d0b..b918bc2f2e4f 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -75,6 +75,7 @@ #include <asm/firmware.h> #include <linux/workqueue.h> #include <linux/if_vlan.h> +#include <linux/utsname.h> #include "ibmvnic.h" @@ -2813,6 +2814,55 @@ static int send_version_xchg(struct ibmvnic_adapter *adapter) return ibmvnic_send_crq(adapter, &crq); } +struct vnic_login_client_data { + u8 type; + __be16 len; + char name; +} __packed; + +static int vnic_client_data_len(struct ibmvnic_adapter *adapter) +{ + int len; + + /* Calculate the amount of buffer space needed for the + * vnic client data in the login buffer. There are four entries, + * OS name, LPAR name, device name, and a null last entry. + */ + len = 4 * sizeof(struct vnic_login_client_data); + len += 6; /* "Linux" plus NULL */ + len += strlen(utsname()->nodename) + 1; + len += strlen(adapter->netdev->name) + 1; + + return len; +} + +static void vnic_add_client_data(struct ibmvnic_adapter *adapter, + struct vnic_login_client_data *vlcd) +{ + const char *os_name = "Linux"; + int len; + + /* Type 1 - LPAR OS */ + vlcd->type = 1; + len = strlen(os_name) + 1; + vlcd->len = cpu_to_be16(len); + strncpy(&vlcd->name, os_name, len); + vlcd = (struct vnic_login_client_data *)((char *)&vlcd->name + len); + + /* Type 2 - LPAR name */ + vlcd->type = 2; + len = strlen(utsname()->nodename) + 1; + vlcd->len = cpu_to_be16(len); + strncpy(&vlcd->name, utsname()->nodename, len); + vlcd = (struct vnic_login_client_data *)((char *)&vlcd->name + len); + + /* Type 3 - device name */ + vlcd->type = 3; + len = strlen(adapter->netdev->name) + 1; + vlcd->len = cpu_to_be16(len); + strncpy(&vlcd->name, adapter->netdev->name, len); +} + static void send_login(struct ibmvnic_adapter *adapter) { struct ibmvnic_login_rsp_buffer *login_rsp_buffer; @@ -2825,13 +2875,18 @@ static void send_login(struct ibmvnic_adapter *adapter) size_t buffer_size; __be64 *tx_list_p; __be64 *rx_list_p; + int client_data_len; + struct vnic_login_client_data *vlcd; int i; + client_data_len = vnic_client_data_len(adapter); + buffer_size = sizeof(struct ibmvnic_login_buffer) + - sizeof(u64) * (adapter->req_tx_queues + adapter->req_rx_queues); + sizeof(u64) * (adapter->req_tx_queues + adapter->req_rx_queues) + + client_data_len; - login_buffer = kmalloc(buffer_size, GFP_ATOMIC); + login_buffer = kzalloc(buffer_size, GFP_ATOMIC); if (!login_buffer) goto buf_alloc_failed; @@ -2898,6 +2953,15 @@ static void send_login(struct ibmvnic_adapter *adapter) } } + /* Insert vNIC login client data */ + vlcd = (struct vnic_login_client_data *) + ((char *)rx_list_p + (sizeof(u64) * adapter->req_rx_queues)); + login_buffer->client_data_offset = + cpu_to_be32((char *)vlcd - (char *)login_buffer); + login_buffer->client_data_len = cpu_to_be32(client_data_len); + + vnic_add_client_data(adapter, vlcd); + netdev_dbg(adapter->netdev, "Login Buffer:\n"); for (i = 0; i < (adapter->login_buf_sz - 1) / 8 + 1; i++) { netdev_dbg(adapter->netdev, "%016lx\n", diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 4670af80d612..8ed829c5b026 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -57,6 +57,8 @@ struct ibmvnic_login_buffer { __be32 off_rxcomp_subcrqs; __be32 login_rsp_ioba; __be32 login_rsp_len; + __be32 client_data_offset; + __be32 client_data_len; } __packed __aligned(8); struct ibmvnic_login_rsp_buffer { diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 81e4425f0529..adc62fb38c49 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -1389,7 +1389,7 @@ static int __fm10k_setup_tc(struct net_device *dev, enum tc_setup_type type, { struct tc_mqprio_qopt *mqprio = type_data; - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 05b94d87a6c3..17e6f64299cf 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7550,7 +7550,7 @@ static int __i40e_setup_tc(struct net_device *netdev, enum tc_setup_type type, void *type_data) { switch (type) { - case TC_SETUP_MQPRIO: + case TC_SETUP_QDISC_MQPRIO: return i40e_setup_tc(netdev, type_data); case TC_SETUP_BLOCK: return i40e_setup_tc_block(netdev, type_data); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index e22bce7cdacd..43cf39527660 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2488,7 +2488,7 @@ static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type, struct igb_adapter *adapter = netdev_priv(dev); switch (type) { - case TC_SETUP_CBS: + case TC_SETUP_QDISC_CBS: return igb_offload_cbs(adapter, type_data); default: diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index e5dcb25be398..6eaca8366ac8 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -9431,7 +9431,7 @@ static int __ixgbe_setup_tc(struct net_device *dev, enum tc_setup_type type, switch (type) { case TC_SETUP_BLOCK: return ixgbe_setup_tc_block(dev, type_data); - case TC_SETUP_MQPRIO: + case TC_SETUP_QDISC_MQPRIO: return ixgbe_setup_tc_mqprio(dev, type_data); default: return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 965b6a829a5d..6c20e811f973 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -799,6 +799,42 @@ enum mvpp2_bm_type { MVPP2_BM_SWF_SHORT }; +/* GMAC MIB Counters register definitions */ +#define MVPP21_MIB_COUNTERS_OFFSET 0x1000 +#define MVPP21_MIB_COUNTERS_PORT_SZ 0x400 +#define MVPP22_MIB_COUNTERS_OFFSET 0x0 +#define MVPP22_MIB_COUNTERS_PORT_SZ 0x100 + +#define MVPP2_MIB_GOOD_OCTETS_RCVD 0x0 +#define MVPP2_MIB_BAD_OCTETS_RCVD 0x8 +#define MVPP2_MIB_CRC_ERRORS_SENT 0xc +#define MVPP2_MIB_UNICAST_FRAMES_RCVD 0x10 +#define MVPP2_MIB_BROADCAST_FRAMES_RCVD 0x18 +#define MVPP2_MIB_MULTICAST_FRAMES_RCVD 0x1c +#define MVPP2_MIB_FRAMES_64_OCTETS 0x20 +#define MVPP2_MIB_FRAMES_65_TO_127_OCTETS 0x24 +#define MVPP2_MIB_FRAMES_128_TO_255_OCTETS 0x28 +#define MVPP2_MIB_FRAMES_256_TO_511_OCTETS 0x2c +#define MVPP2_MIB_FRAMES_512_TO_1023_OCTETS 0x30 +#define MVPP2_MIB_FRAMES_1024_TO_MAX_OCTETS 0x34 +#define MVPP2_MIB_GOOD_OCTETS_SENT 0x38 +#define MVPP2_MIB_UNICAST_FRAMES_SENT 0x40 +#define MVPP2_MIB_MULTICAST_FRAMES_SENT 0x48 +#define MVPP2_MIB_BROADCAST_FRAMES_SENT 0x4c +#define MVPP2_MIB_FC_SENT 0x54 +#define MVPP2_MIB_FC_RCVD 0x58 +#define MVPP2_MIB_RX_FIFO_OVERRUN 0x5c +#define MVPP2_MIB_UNDERSIZE_RCVD 0x60 +#define MVPP2_MIB_FRAGMENTS_RCVD 0x64 +#define MVPP2_MIB_OVERSIZE_RCVD 0x68 +#define MVPP2_MIB_JABBER_RCVD 0x6c +#define MVPP2_MIB_MAC_RCV_ERROR 0x70 +#define MVPP2_MIB_BAD_CRC_EVENT 0x74 +#define MVPP2_MIB_COLLISION 0x78 +#define MVPP2_MIB_LATE_COLLISION 0x7c + +#define MVPP2_MIB_COUNTERS_STATS_DELAY (1 * HZ) + /* Definitions */ /* Shared Packet Processor resources */ @@ -826,6 +862,7 @@ struct mvpp2 { struct clk *axi_clk; /* List of pointers to port structures */ + int port_count; struct mvpp2_port **port_list; /* Aggregated TXQs */ @@ -847,6 +884,10 @@ struct mvpp2 { /* Maximum number of RXQs per port */ unsigned int max_port_rxqs; + + /* Workqueue to gather hardware statistics */ + char queue_name[30]; + struct workqueue_struct *stats_queue; }; struct mvpp2_pcpu_stats { @@ -891,6 +932,7 @@ struct mvpp2_port { /* Per-port registers' base address */ void __iomem *base; + void __iomem *stats_base; struct mvpp2_rx_queue **rxqs; unsigned int nrxqs; @@ -909,6 +951,11 @@ struct mvpp2_port { u16 tx_ring_size; u16 rx_ring_size; struct mvpp2_pcpu_stats __percpu *stats; + u64 *ethtool_stats; + + /* Per-port work and its lock to gather hardware statistics */ + struct mutex gather_stats_lock; + struct delayed_work stats_work; phy_interface_t phy_interface; struct device_node *phy_node; @@ -4778,9 +4825,131 @@ static void mvpp2_port_loopback_set(struct mvpp2_port *port) writel(val, port->base + MVPP2_GMAC_CTRL_1_REG); } +struct mvpp2_ethtool_counter { + unsigned int offset; + const char string[ETH_GSTRING_LEN]; + bool reg_is_64b; +}; + +static u64 mvpp2_read_count(struct mvpp2_port *port, + const struct mvpp2_ethtool_counter *counter) +{ + u64 val; + + val = readl(port->stats_base + counter->offset); + if (counter->reg_is_64b) + val += (u64)readl(port->stats_base + counter->offset + 4) << 32; + + return val; +} + +/* Due to the fact that software statistics and hardware statistics are, by + * design, incremented at different moments in the chain of packet processing, + * it is very likely that incoming packets could have been dropped after being + * counted by hardware but before reaching software statistics (most probably + * multicast packets), and in the oppposite way, during transmission, FCS bytes + * are added in between as well as TSO skb will be split and header bytes added. + * Hence, statistics gathered from userspace with ifconfig (software) and + * ethtool (hardware) cannot be compared. + */ +static const struct mvpp2_ethtool_counter mvpp2_ethtool_regs[] = { + { MVPP2_MIB_GOOD_OCTETS_RCVD, "good_octets_received", true }, + { MVPP2_MIB_BAD_OCTETS_RCVD, "bad_octets_received" }, + { MVPP2_MIB_CRC_ERRORS_SENT, "crc_errors_sent" }, + { MVPP2_MIB_UNICAST_FRAMES_RCVD, "unicast_frames_received" }, + { MVPP2_MIB_BROADCAST_FRAMES_RCVD, "broadcast_frames_received" }, + { MVPP2_MIB_MULTICAST_FRAMES_RCVD, "multicast_frames_received" }, + { MVPP2_MIB_FRAMES_64_OCTETS, "frames_64_octets" }, + { MVPP2_MIB_FRAMES_65_TO_127_OCTETS, "frames_65_to_127_octet" }, + { MVPP2_MIB_FRAMES_128_TO_255_OCTETS, "frames_128_to_255_octet" }, + { MVPP2_MIB_FRAMES_256_TO_511_OCTETS, "frames_256_to_511_octet" }, + { MVPP2_MIB_FRAMES_512_TO_1023_OCTETS, "frames_512_to_1023_octet" }, + { MVPP2_MIB_FRAMES_1024_TO_MAX_OCTETS, "frames_1024_to_max_octet" }, + { MVPP2_MIB_GOOD_OCTETS_SENT, "good_octets_sent", true }, + { MVPP2_MIB_UNICAST_FRAMES_SENT, "unicast_frames_sent" }, + { MVPP2_MIB_MULTICAST_FRAMES_SENT, "multicast_frames_sent" }, + { MVPP2_MIB_BROADCAST_FRAMES_SENT, "broadcast_frames_sent" }, + { MVPP2_MIB_FC_SENT, "fc_sent" }, + { MVPP2_MIB_FC_RCVD, "fc_received" }, + { MVPP2_MIB_RX_FIFO_OVERRUN, "rx_fifo_overrun" }, + { MVPP2_MIB_UNDERSIZE_RCVD, "undersize_received" }, + { MVPP2_MIB_FRAGMENTS_RCVD, "fragments_received" }, + { MVPP2_MIB_OVERSIZE_RCVD, "oversize_received" }, + { MVPP2_MIB_JABBER_RCVD, "jabber_received" }, + { MVPP2_MIB_MAC_RCV_ERROR, "mac_receive_error" }, + { MVPP2_MIB_BAD_CRC_EVENT, "bad_crc_event" }, + { MVPP2_MIB_COLLISION, "collision" }, + { MVPP2_MIB_LATE_COLLISION, "late_collision" }, +}; + +static void mvpp2_ethtool_get_strings(struct net_device *netdev, u32 sset, + u8 *data) +{ + if (sset == ETH_SS_STATS) { + int i; + + for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++) + memcpy(data + i * ETH_GSTRING_LEN, + &mvpp2_ethtool_regs[i].string, ETH_GSTRING_LEN); + } +} + +static void mvpp2_gather_hw_statistics(struct work_struct *work) +{ + struct delayed_work *del_work = to_delayed_work(work); + struct mvpp2_port *port = container_of(del_work, struct mvpp2_port, + stats_work); + u64 *pstats; + int i; + + mutex_lock(&port->gather_stats_lock); + + pstats = port->ethtool_stats; + for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++) + *pstats++ += mvpp2_read_count(port, &mvpp2_ethtool_regs[i]); + + /* No need to read again the counters right after this function if it + * was called asynchronously by the user (ie. use of ethtool). + */ + cancel_delayed_work(&port->stats_work); + queue_delayed_work(port->priv->stats_queue, &port->stats_work, + MVPP2_MIB_COUNTERS_STATS_DELAY); + + mutex_unlock(&port->gather_stats_lock); +} + +static void mvpp2_ethtool_get_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct mvpp2_port *port = netdev_priv(dev); + + /* Update statistics for the given port, then take the lock to avoid + * concurrent accesses on the ethtool_stats structure during its copy. + */ + mvpp2_gather_hw_statistics(&port->stats_work.work); + + mutex_lock(&port->gather_stats_lock); + memcpy(data, port->ethtool_stats, + sizeof(u64) * ARRAY_SIZE(mvpp2_ethtool_regs)); + mutex_unlock(&port->gather_stats_lock); +} + +static int mvpp2_ethtool_get_sset_count(struct net_device *dev, int sset) +{ + if (sset == ETH_SS_STATS) + return ARRAY_SIZE(mvpp2_ethtool_regs); + + return -EOPNOTSUPP; +} + static void mvpp2_port_reset(struct mvpp2_port *port) { u32 val; + unsigned int i; + + /* Read the GOP statistics to reset the hardware counters */ + for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++) + mvpp2_read_count(port, &mvpp2_ethtool_regs[i]); val = readl(port->base + MVPP2_GMAC_CTRL_2_REG) & ~MVPP2_GMAC_PORT_RESET_MASK; @@ -6769,6 +6938,9 @@ static int mvpp2_irqs_init(struct mvpp2_port *port) for (i = 0; i < port->nqvecs; i++) { struct mvpp2_queue_vector *qv = port->qvecs + i; + if (qv->type == MVPP2_QUEUE_VECTOR_PRIVATE) + irq_set_status_flags(qv->irq, IRQ_NO_BALANCING); + err = request_irq(qv->irq, mvpp2_isr, 0, port->dev->name, qv); if (err) goto err; @@ -6798,6 +6970,7 @@ static void mvpp2_irqs_deinit(struct mvpp2_port *port) struct mvpp2_queue_vector *qv = port->qvecs + i; irq_set_affinity_hint(qv->irq, NULL); + irq_clear_status_flags(qv->irq, IRQ_NO_BALANCING); free_irq(qv->irq, qv); } } @@ -6912,6 +7085,10 @@ static int mvpp2_open(struct net_device *dev) if (priv->hw_version == MVPP22) mvpp22_init_rss(port); + /* Start hardware statistics gathering */ + queue_delayed_work(priv->stats_queue, &port->stats_work, + MVPP2_MIB_COUNTERS_STATS_DELAY); + return 0; err_free_link_irq: @@ -6956,6 +7133,8 @@ static int mvpp2_stop(struct net_device *dev) mvpp2_cleanup_rxqs(port); mvpp2_cleanup_txqs(port); + cancel_delayed_work_sync(&port->stats_work); + return 0; } @@ -7267,6 +7446,9 @@ static const struct ethtool_ops mvpp2_eth_tool_ops = { .get_drvinfo = mvpp2_ethtool_get_drvinfo, .get_ringparam = mvpp2_ethtool_get_ringparam, .set_ringparam = mvpp2_ethtool_set_ringparam, + .get_strings = mvpp2_ethtool_get_strings, + .get_ethtool_stats = mvpp2_ethtool_get_stats, + .get_sset_count = mvpp2_ethtool_get_sset_count, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, }; @@ -7670,6 +7852,10 @@ static int mvpp2_port_probe(struct platform_device *pdev, err = PTR_ERR(port->base); goto err_free_irq; } + + port->stats_base = port->priv->lms_base + + MVPP21_MIB_COUNTERS_OFFSET + + port->gop_id * MVPP21_MIB_COUNTERS_PORT_SZ; } else { if (of_property_read_u32(port_node, "gop-port-id", &port->gop_id)) { @@ -7679,15 +7865,29 @@ static int mvpp2_port_probe(struct platform_device *pdev, } port->base = priv->iface_base + MVPP22_GMAC_BASE(port->gop_id); + port->stats_base = port->priv->iface_base + + MVPP22_MIB_COUNTERS_OFFSET + + port->gop_id * MVPP22_MIB_COUNTERS_PORT_SZ; } - /* Alloc per-cpu stats */ + /* Alloc per-cpu and ethtool stats */ port->stats = netdev_alloc_pcpu_stats(struct mvpp2_pcpu_stats); if (!port->stats) { err = -ENOMEM; goto err_free_irq; } + port->ethtool_stats = devm_kcalloc(&pdev->dev, + ARRAY_SIZE(mvpp2_ethtool_regs), + sizeof(u64), GFP_KERNEL); + if (!port->ethtool_stats) { + err = -ENOMEM; + goto err_free_stats; + } + + mutex_init(&port->gather_stats_lock); + INIT_DELAYED_WORK(&port->stats_work, mvpp2_gather_hw_statistics); + mvpp2_port_copy_mac_addr(dev, priv, port_node, &mac_from); port->tx_ring_size = MVPP2_MAX_TXD; @@ -8010,7 +8210,7 @@ static int mvpp2_probe(struct platform_device *pdev) struct mvpp2 *priv; struct resource *res; void __iomem *base; - int port_count, i; + int i; int err; priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); @@ -8125,14 +8325,14 @@ static int mvpp2_probe(struct platform_device *pdev) goto err_mg_clk; } - port_count = of_get_available_child_count(dn); - if (port_count == 0) { + priv->port_count = of_get_available_child_count(dn); + if (priv->port_count == 0) { dev_err(&pdev->dev, "no ports enabled\n"); err = -ENODEV; goto err_mg_clk; } - priv->port_list = devm_kcalloc(&pdev->dev, port_count, + priv->port_list = devm_kcalloc(&pdev->dev, priv->port_count, sizeof(*priv->port_list), GFP_KERNEL); if (!priv->port_list) { @@ -8149,6 +8349,21 @@ static int mvpp2_probe(struct platform_device *pdev) i++; } + /* Statistics must be gathered regularly because some of them (like + * packets counters) are 32-bit registers and could overflow quite + * quickly. For instance, a 10Gb link used at full bandwidth with the + * smallest packets (64B) will overflow a 32-bit counter in less than + * 30 seconds. Then, use a workqueue to fill 64-bit counters. + */ + snprintf(priv->queue_name, sizeof(priv->queue_name), + "stats-wq-%s%s", netdev_name(priv->port_list[0]->dev), + priv->port_count > 1 ? "+" : ""); + priv->stats_queue = create_singlethread_workqueue(priv->queue_name); + if (!priv->stats_queue) { + err = -ENOMEM; + goto err_mg_clk; + } + platform_set_drvdata(pdev, priv); return 0; @@ -8170,9 +8385,14 @@ static int mvpp2_remove(struct platform_device *pdev) struct device_node *port_node; int i = 0; + flush_workqueue(priv->stats_queue); + destroy_workqueue(priv->stats_queue); + for_each_available_child_of_node(dn, port_node) { - if (priv->port_list[i]) + if (priv->port_list[i]) { + mutex_destroy(&priv->port_list[i]->gather_stats_lock); mvpp2_port_remove(priv->port_list[i]); + } i++; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 736a6ccaf05e..99051a294fa6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -135,7 +135,7 @@ static int __mlx4_en_setup_tc(struct net_device *dev, enum tc_setup_type type, { struct tc_mqprio_qopt *mqprio = type_data; - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; if (mqprio->num_tc && mqprio->num_tc != MLX4_EN_NUM_UP_HIGH) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 751f62cae969..2a0739d07a08 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -655,12 +655,14 @@ struct mlx5e_tc_table { struct mlx5e_vlan_table { struct mlx5e_flow_table ft; - unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; - struct mlx5_flow_handle *active_vlans_rule[VLAN_N_VID]; + DECLARE_BITMAP(active_cvlans, VLAN_N_VID); + DECLARE_BITMAP(active_svlans, VLAN_N_VID); + struct mlx5_flow_handle *active_cvlans_rule[VLAN_N_VID]; + struct mlx5_flow_handle *active_svlans_rule[VLAN_N_VID]; struct mlx5_flow_handle *untagged_rule; struct mlx5_flow_handle *any_cvlan_rule; struct mlx5_flow_handle *any_svlan_rule; - bool filter_disabled; + bool cvlan_filter_disabled; }; struct mlx5e_l2_table { @@ -887,8 +889,8 @@ int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, u16 vid); int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, u16 vid); -void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv); -void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv); +void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv); +void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv); void mlx5e_timestamp_set(struct mlx5e_priv *priv); struct mlx5e_redirect_rqt_param { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index f0d11ad05ed2..def513484845 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -118,7 +118,7 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) int i; list_size = 0; - for_each_set_bit(vlan, priv->fs.vlan.active_vlans, VLAN_N_VID) + for_each_set_bit(vlan, priv->fs.vlan.active_cvlans, VLAN_N_VID) list_size++; max_list_size = 1 << MLX5_CAP_GEN(priv->mdev, log_max_vlan_list); @@ -135,7 +135,7 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) return -ENOMEM; i = 0; - for_each_set_bit(vlan, priv->fs.vlan.active_vlans, VLAN_N_VID) { + for_each_set_bit(vlan, priv->fs.vlan.active_cvlans, VLAN_N_VID) { if (i >= list_size) break; vlans[i++] = vlan; @@ -154,7 +154,8 @@ enum mlx5e_vlan_rule_type { MLX5E_VLAN_RULE_TYPE_UNTAGGED, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, - MLX5E_VLAN_RULE_TYPE_MATCH_VID, + MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, + MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, }; static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, @@ -174,6 +175,10 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, switch (rule_type) { case MLX5E_VLAN_RULE_TYPE_UNTAGGED: + /* cvlan_tag enabled in match criteria and + * disabled in match value means both S & C tags + * don't exist (untagged of both) + */ rule_p = &priv->fs.vlan.untagged_rule; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); @@ -190,8 +195,18 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, outer_headers.svlan_tag); MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1); break; - default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */ - rule_p = &priv->fs.vlan.active_vlans_rule[vid]; + case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID: + rule_p = &priv->fs.vlan.active_svlans_rule[vid]; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.svlan_tag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.first_vid); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, + vid); + break; + default: /* MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID */ + rule_p = &priv->fs.vlan.active_cvlans_rule[vid]; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1); @@ -223,7 +238,7 @@ static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv, if (!spec) return -ENOMEM; - if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_VID) + if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID) mlx5e_vport_context_update_vlans(priv); err = __mlx5e_add_vlan_rule(priv, rule_type, vid, spec); @@ -255,11 +270,17 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, priv->fs.vlan.any_svlan_rule = NULL; } break; - case MLX5E_VLAN_RULE_TYPE_MATCH_VID: + case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID: + if (priv->fs.vlan.active_svlans_rule[vid]) { + mlx5_del_flow_rules(priv->fs.vlan.active_svlans_rule[vid]); + priv->fs.vlan.active_svlans_rule[vid] = NULL; + } + break; + case MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID: mlx5e_vport_context_update_vlans(priv); - if (priv->fs.vlan.active_vlans_rule[vid]) { - mlx5_del_flow_rules(priv->fs.vlan.active_vlans_rule[vid]); - priv->fs.vlan.active_vlans_rule[vid] = NULL; + if (priv->fs.vlan.active_cvlans_rule[vid]) { + mlx5_del_flow_rules(priv->fs.vlan.active_cvlans_rule[vid]); + priv->fs.vlan.active_cvlans_rule[vid] = NULL; } mlx5e_vport_context_update_vlans(priv); break; @@ -283,46 +304,83 @@ static int mlx5e_add_any_vid_rules(struct mlx5e_priv *priv) return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0); } -void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) +void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv) { - if (!priv->fs.vlan.filter_disabled) + if (!priv->fs.vlan.cvlan_filter_disabled) return; - priv->fs.vlan.filter_disabled = false; + priv->fs.vlan.cvlan_filter_disabled = false; if (priv->netdev->flags & IFF_PROMISC) return; mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); } -void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv) +void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv) { - if (priv->fs.vlan.filter_disabled) + if (priv->fs.vlan.cvlan_filter_disabled) return; - priv->fs.vlan.filter_disabled = true; + priv->fs.vlan.cvlan_filter_disabled = true; if (priv->netdev->flags & IFF_PROMISC) return; mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); } -int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, - u16 vid) +static int mlx5e_vlan_rx_add_cvid(struct mlx5e_priv *priv, u16 vid) { - struct mlx5e_priv *priv = netdev_priv(dev); + int err; + + set_bit(vid, priv->fs.vlan.active_cvlans); - set_bit(vid, priv->fs.vlan.active_vlans); + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid); + if (err) + clear_bit(vid, priv->fs.vlan.active_cvlans); - return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); + return err; } -int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, - u16 vid) +static int mlx5e_vlan_rx_add_svid(struct mlx5e_priv *priv, u16 vid) +{ + struct net_device *netdev = priv->netdev; + int err; + + set_bit(vid, priv->fs.vlan.active_svlans); + + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid); + if (err) { + clear_bit(vid, priv->fs.vlan.active_svlans); + return err; + } + + /* Need to fix some features.. */ + netdev_update_features(netdev); + return err; +} + +int mlx5e_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct mlx5e_priv *priv = netdev_priv(dev); - clear_bit(vid, priv->fs.vlan.active_vlans); + if (be16_to_cpu(proto) == ETH_P_8021Q) + return mlx5e_vlan_rx_add_cvid(priv, vid); + else if (be16_to_cpu(proto) == ETH_P_8021AD) + return mlx5e_vlan_rx_add_svid(priv, vid); + + return -EOPNOTSUPP; +} - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); +int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (be16_to_cpu(proto) == ETH_P_8021Q) { + clear_bit(vid, priv->fs.vlan.active_cvlans); + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid); + } else if (be16_to_cpu(proto) == ETH_P_8021AD) { + clear_bit(vid, priv->fs.vlan.active_svlans); + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid); + netdev_update_features(dev); + } return 0; } @@ -333,11 +391,14 @@ static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv) mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); - for_each_set_bit(i, priv->fs.vlan.active_vlans, VLAN_N_VID) { - mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i); + for_each_set_bit(i, priv->fs.vlan.active_cvlans, VLAN_N_VID) { + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i); } - if (priv->fs.vlan.filter_disabled && + for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID) + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i); + + if (priv->fs.vlan.cvlan_filter_disabled && !(priv->netdev->flags & IFF_PROMISC)) mlx5e_add_any_vid_rules(priv); } @@ -348,11 +409,14 @@ static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv) mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); - for_each_set_bit(i, priv->fs.vlan.active_vlans, VLAN_N_VID) { - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i); + for_each_set_bit(i, priv->fs.vlan.active_cvlans, VLAN_N_VID) { + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i); } - if (priv->fs.vlan.filter_disabled && + for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID) + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i); + + if (priv->fs.vlan.cvlan_filter_disabled && !(priv->netdev->flags & IFF_PROMISC)) mlx5e_del_any_vid_rules(priv); } @@ -365,21 +429,24 @@ static void mlx5e_execute_l2_action(struct mlx5e_priv *priv, struct mlx5e_l2_hash_node *hn) { u8 action = hn->action; + u8 mac_addr[ETH_ALEN]; int l2_err = 0; + ether_addr_copy(mac_addr, hn->ai.addr); + switch (action) { case MLX5E_ACTION_ADD: mlx5e_add_l2_flow_rule(priv, &hn->ai, MLX5E_FULLMATCH); - if (!is_multicast_ether_addr(hn->ai.addr)) { - l2_err = mlx5_mpfs_add_mac(priv->mdev, hn->ai.addr); + if (!is_multicast_ether_addr(mac_addr)) { + l2_err = mlx5_mpfs_add_mac(priv->mdev, mac_addr); hn->mpfs = !l2_err; } hn->action = MLX5E_ACTION_NONE; break; case MLX5E_ACTION_DEL: - if (!is_multicast_ether_addr(hn->ai.addr) && hn->mpfs) - l2_err = mlx5_mpfs_del_mac(priv->mdev, hn->ai.addr); + if (!is_multicast_ether_addr(mac_addr) && hn->mpfs) + l2_err = mlx5_mpfs_del_mac(priv->mdev, mac_addr); mlx5e_del_l2_flow_rule(priv, &hn->ai); mlx5e_del_l2_from_hash(hn); break; @@ -387,7 +454,7 @@ static void mlx5e_execute_l2_action(struct mlx5e_priv *priv, if (l2_err) netdev_warn(priv->netdev, "MPFS, failed to %s mac %pM, err(%d)\n", - action == MLX5E_ACTION_ADD ? "add" : "del", hn->ai.addr, l2_err); + action == MLX5E_ACTION_ADD ? "add" : "del", mac_addr, l2_err); } static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv) @@ -545,8 +612,11 @@ void mlx5e_set_rx_mode_work(struct work_struct *work) bool disable_broadcast = ea->broadcast_enabled && !broadcast_enabled; if (enable_promisc) { + if (!priv->channels.params.vlan_strip_disable) + netdev_warn_once(ndev, + "S-tagged traffic will be dropped while C-tag vlan stripping is enabled\n"); mlx5e_add_l2_flow_rule(priv, &ea->promisc, MLX5E_PROMISC); - if (!priv->fs.vlan.filter_disabled) + if (!priv->fs.vlan.cvlan_filter_disabled) mlx5e_add_any_vid_rules(priv); } if (enable_allmulti) @@ -561,7 +631,7 @@ void mlx5e_set_rx_mode_work(struct work_struct *work) if (disable_allmulti) mlx5e_del_l2_flow_rule(priv, &ea->allmulti); if (disable_promisc) { - if (!priv->fs.vlan.filter_disabled) + if (!priv->fs.vlan.cvlan_filter_disabled) mlx5e_del_any_vid_rules(priv); mlx5e_del_l2_flow_rule(priv, &ea->promisc); } @@ -1265,13 +1335,15 @@ err_destroy_flow_table: return err; } -#define MLX5E_NUM_VLAN_GROUPS 3 +#define MLX5E_NUM_VLAN_GROUPS 4 #define MLX5E_VLAN_GROUP0_SIZE BIT(12) -#define MLX5E_VLAN_GROUP1_SIZE BIT(1) -#define MLX5E_VLAN_GROUP2_SIZE BIT(0) +#define MLX5E_VLAN_GROUP1_SIZE BIT(12) +#define MLX5E_VLAN_GROUP2_SIZE BIT(1) +#define MLX5E_VLAN_GROUP3_SIZE BIT(0) #define MLX5E_VLAN_TABLE_SIZE (MLX5E_VLAN_GROUP0_SIZE +\ MLX5E_VLAN_GROUP1_SIZE +\ - MLX5E_VLAN_GROUP2_SIZE) + MLX5E_VLAN_GROUP2_SIZE +\ + MLX5E_VLAN_GROUP3_SIZE) static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in, int inlen) @@ -1294,7 +1366,8 @@ static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_VLAN_GROUP1_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); @@ -1305,7 +1378,7 @@ static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_VLAN_GROUP2_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); @@ -1314,6 +1387,17 @@ static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in goto err_destroy_groups; ft->num_groups++; + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + return 0; err_destroy_groups: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f877f2f5f2a5..d2b057a3e512 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -196,6 +196,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_bytes += rq_stats->bytes; s->rx_lro_packets += rq_stats->lro_packets; s->rx_lro_bytes += rq_stats->lro_bytes; + s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets; s->rx_csum_none += rq_stats->csum_none; s->rx_csum_complete += rq_stats->csum_complete; s->rx_csum_unnecessary += rq_stats->csum_unnecessary; @@ -224,6 +225,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->tx_tso_bytes += sq_stats->tso_bytes; s->tx_tso_inner_packets += sq_stats->tso_inner_packets; s->tx_tso_inner_bytes += sq_stats->tso_inner_bytes; + s->tx_added_vlan_packets += sq_stats->added_vlan_packets; s->tx_queue_stopped += sq_stats->stopped; s->tx_queue_wake += sq_stats->wake; s->tx_queue_dropped += sq_stats->dropped; @@ -3146,7 +3148,7 @@ int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, case TC_SETUP_BLOCK: return mlx5e_setup_tc_block(dev, type_data); #endif - case TC_SETUP_MQPRIO: + case TC_SETUP_QDISC_MQPRIO: return mlx5e_setup_tc_mqprio(dev, type_data); default: return -EOPNOTSUPP; @@ -3260,14 +3262,14 @@ out: return err; } -static int set_feature_vlan_filter(struct net_device *netdev, bool enable) +static int set_feature_cvlan_filter(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); if (enable) - mlx5e_enable_vlan_filter(priv); + mlx5e_enable_cvlan_filter(priv); else - mlx5e_disable_vlan_filter(priv); + mlx5e_disable_cvlan_filter(priv); return 0; } @@ -3378,7 +3380,7 @@ static int mlx5e_set_features(struct net_device *netdev, set_feature_lro); err |= mlx5e_handle_feature(netdev, features, NETIF_F_HW_VLAN_CTAG_FILTER, - set_feature_vlan_filter); + set_feature_cvlan_filter); err |= mlx5e_handle_feature(netdev, features, NETIF_F_HW_TC, set_feature_tc_num_filters); err |= mlx5e_handle_feature(netdev, features, NETIF_F_RXALL, @@ -3395,6 +3397,25 @@ static int mlx5e_set_features(struct net_device *netdev, return err ? -EINVAL : 0; } +static netdev_features_t mlx5e_fix_features(struct net_device *netdev, + netdev_features_t features) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mutex_lock(&priv->state_lock); + if (!bitmap_empty(priv->fs.vlan.active_svlans, VLAN_N_VID)) { + /* HW strips the outer C-tag header, this is a problem + * for S-tag traffic. + */ + features &= ~NETIF_F_HW_VLAN_CTAG_RX; + if (!priv->channels.params.vlan_strip_disable) + netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n"); + } + mutex_unlock(&priv->state_lock); + + return features; +} + static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -3872,6 +3893,7 @@ static const struct net_device_ops mlx5e_netdev_ops = { .ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid, .ndo_set_features = mlx5e_set_features, + .ndo_fix_features = mlx5e_fix_features, .ndo_change_mtu = mlx5e_change_mtu, .ndo_do_ioctl = mlx5e_ioctl, .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate, @@ -4174,6 +4196,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; + netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; if (mlx5e_vxlan_allowed(mdev) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) { netdev->hw_features |= NETIF_F_GSO_PARTIAL; @@ -4231,6 +4254,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) } netdev->features |= NETIF_F_HIGHDMA; + netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER; netdev->priv_flags |= IFF_UNICAST_FLT; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 6d7df4750e0f..a9d08f292fbe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -563,7 +563,6 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) || (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA); - skb->mac_len = ETH_HLEN; proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth); tot_len = cqe_bcnt - network_depth; @@ -610,10 +609,11 @@ static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe, skb_set_hash(skb, be32_to_cpu(cqe->rss_hash_result), ht); } -static inline bool is_first_ethertype_ip(struct sk_buff *skb) +static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth) { __be16 ethertype = ((struct ethhdr *)skb->data)->h_proto; + ethertype = __vlan_get_protocol(skb, ethertype, network_depth); return (ethertype == htons(ETH_P_IP) || ethertype == htons(ETH_P_IPV6)); } @@ -623,6 +623,8 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, struct sk_buff *skb, bool lro) { + int network_depth = 0; + if (unlikely(!(netdev->features & NETIF_F_RXCSUM))) goto csum_none; @@ -632,9 +634,17 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, return; } - if (is_first_ethertype_ip(skb)) { + if (is_last_ethertype_ip(skb, &network_depth)) { skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); + if (network_depth > ETH_HLEN) + /* CQE csum is calculated from the IP header and does + * not cover VLAN headers (if present). This will add + * the checksum manually. + */ + skb->csum = csum_partial(skb->data + ETH_HLEN, + network_depth - ETH_HLEN, + skb->csum); rq->stats.csum_complete++; return; } @@ -664,6 +674,7 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, struct net_device *netdev = rq->netdev; int lro_num_seg; + skb->mac_len = ETH_HLEN; lro_num_seg = be32_to_cpu(cqe->srqn) >> 24; if (lro_num_seg > 1) { mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt); @@ -685,9 +696,11 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, if (likely(netdev->features & NETIF_F_RXHASH)) mlx5e_skb_set_hash(cqe, skb); - if (cqe_has_vlan(cqe)) + if (cqe_has_vlan(cqe)) { __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), be16_to_cpu(cqe->vlan_info)); + rq->stats.removed_vlan_packets++; + } skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 8bc30484ecc1..b74ddc7984bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -42,8 +42,10 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_bytes) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_added_vlan_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_removed_vlan_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, @@ -733,6 +735,7 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx_full) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, removed_vlan_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, @@ -755,6 +758,7 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index d094663edd9b..d679e21f686e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -59,8 +59,10 @@ struct mlx5e_sw_stats { u64 tx_tso_bytes; u64 tx_tso_inner_packets; u64 tx_tso_inner_bytes; + u64 tx_added_vlan_packets; u64 rx_lro_packets; u64 rx_lro_bytes; + u64 rx_removed_vlan_packets; u64 rx_csum_unnecessary; u64 rx_csum_none; u64 rx_csum_complete; @@ -153,6 +155,7 @@ struct mlx5e_rq_stats { u64 csum_none; u64 lro_packets; u64 lro_bytes; + u64 removed_vlan_packets; u64 xdp_drop; u64 xdp_tx; u64 xdp_tx_full; @@ -180,6 +183,7 @@ struct mlx5e_sq_stats { u64 tso_inner_bytes; u64 csum_partial; u64 csum_partial_inner; + u64 added_vlan_packets; u64 nop; /* less likely accessed in data path */ u64 csum_none; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index de651de35c9b..569b42a01026 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -361,6 +361,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, if (skb_vlan_tag_present(skb)) { mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs, &skb_data, &skb_len); ihs += VLAN_HLEN; + sq->stats.added_vlan_packets++; } else { memcpy(eseg->inline_hdr.start, skb_data, ihs); mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs); @@ -369,7 +370,10 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr.start), MLX5_SEND_WQE_DS); } else if (skb_vlan_tag_present(skb)) { eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN); + if (skb->vlan_proto == cpu_to_be16(ETH_P_8021AD)) + eseg->insert.type |= cpu_to_be16(MLX5_ETH_WQE_SVLAN); eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb)); + sq->stats.added_vlan_packets++; } headlen = skb_len - skb->data_len; diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 80f4efd3e82f..9463c3fa254f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -19,7 +19,8 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_acl.o spectrum_flower.o \ spectrum_cnt.o spectrum_fid.o \ spectrum_ipip.o spectrum_acl_flex_actions.o \ - spectrum_mr.o spectrum_mr_tcam.o + spectrum_mr.o spectrum_mr_tcam.o \ + spectrum_qdisc.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 5066553dd0b6..6c4e08b8058a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -1758,6 +1758,191 @@ static inline void mlxsw_reg_spvmlr_pack(char *payload, u8 local_port, } } +/* CWTP - Congetion WRED ECN TClass Profile + * ---------------------------------------- + * Configures the profiles for queues of egress port and traffic class + */ +#define MLXSW_REG_CWTP_ID 0x2802 +#define MLXSW_REG_CWTP_BASE_LEN 0x28 +#define MLXSW_REG_CWTP_PROFILE_DATA_REC_LEN 0x08 +#define MLXSW_REG_CWTP_LEN 0x40 + +MLXSW_REG_DEFINE(cwtp, MLXSW_REG_CWTP_ID, MLXSW_REG_CWTP_LEN); + +/* reg_cwtp_local_port + * Local port number + * Not supported for CPU port + * Access: Index + */ +MLXSW_ITEM32(reg, cwtp, local_port, 0, 16, 8); + +/* reg_cwtp_traffic_class + * Traffic Class to configure + * Access: Index + */ +MLXSW_ITEM32(reg, cwtp, traffic_class, 32, 0, 8); + +/* reg_cwtp_profile_min + * Minimum Average Queue Size of the profile in cells. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, cwtp, profile_min, MLXSW_REG_CWTP_BASE_LEN, + 0, 20, MLXSW_REG_CWTP_PROFILE_DATA_REC_LEN, 0, false); + +/* reg_cwtp_profile_percent + * Percentage of WRED and ECN marking for maximum Average Queue size + * Range is 0 to 100, units of integer percentage + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, cwtp, profile_percent, MLXSW_REG_CWTP_BASE_LEN, + 24, 7, MLXSW_REG_CWTP_PROFILE_DATA_REC_LEN, 4, false); + +/* reg_cwtp_profile_max + * Maximum Average Queue size of the profile in cells + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, cwtp, profile_max, MLXSW_REG_CWTP_BASE_LEN, + 0, 20, MLXSW_REG_CWTP_PROFILE_DATA_REC_LEN, 4, false); + +#define MLXSW_REG_CWTP_MIN_VALUE 64 +#define MLXSW_REG_CWTP_MAX_PROFILE 2 +#define MLXSW_REG_CWTP_DEFAULT_PROFILE 1 + +static inline void mlxsw_reg_cwtp_pack(char *payload, u8 local_port, + u8 traffic_class) +{ + int i; + + MLXSW_REG_ZERO(cwtp, payload); + mlxsw_reg_cwtp_local_port_set(payload, local_port); + mlxsw_reg_cwtp_traffic_class_set(payload, traffic_class); + + for (i = 0; i <= MLXSW_REG_CWTP_MAX_PROFILE; i++) { + mlxsw_reg_cwtp_profile_min_set(payload, i, + MLXSW_REG_CWTP_MIN_VALUE); + mlxsw_reg_cwtp_profile_max_set(payload, i, + MLXSW_REG_CWTP_MIN_VALUE); + } +} + +#define MLXSW_REG_CWTP_PROFILE_TO_INDEX(profile) (profile - 1) + +static inline void +mlxsw_reg_cwtp_profile_pack(char *payload, u8 profile, u32 min, u32 max, + u32 probability) +{ + u8 index = MLXSW_REG_CWTP_PROFILE_TO_INDEX(profile); + + mlxsw_reg_cwtp_profile_min_set(payload, index, min); + mlxsw_reg_cwtp_profile_max_set(payload, index, max); + mlxsw_reg_cwtp_profile_percent_set(payload, index, probability); +} + +/* CWTPM - Congestion WRED ECN TClass and Pool Mapping + * --------------------------------------------------- + * The CWTPM register maps each egress port and traffic class to profile num. + */ +#define MLXSW_REG_CWTPM_ID 0x2803 +#define MLXSW_REG_CWTPM_LEN 0x44 + +MLXSW_REG_DEFINE(cwtpm, MLXSW_REG_CWTPM_ID, MLXSW_REG_CWTPM_LEN); + +/* reg_cwtpm_local_port + * Local port number + * Not supported for CPU port + * Access: Index + */ +MLXSW_ITEM32(reg, cwtpm, local_port, 0, 16, 8); + +/* reg_cwtpm_traffic_class + * Traffic Class to configure + * Access: Index + */ +MLXSW_ITEM32(reg, cwtpm, traffic_class, 32, 0, 8); + +/* reg_cwtpm_ew + * Control enablement of WRED for traffic class: + * 0 - Disable + * 1 - Enable + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, ew, 36, 1, 1); + +/* reg_cwtpm_ee + * Control enablement of ECN for traffic class: + * 0 - Disable + * 1 - Enable + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, ee, 36, 0, 1); + +/* reg_cwtpm_tcp_g + * TCP Green Profile. + * Index of the profile within {port, traffic class} to use. + * 0 for disabling both WRED and ECN for this type of traffic. + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, tcp_g, 52, 0, 2); + +/* reg_cwtpm_tcp_y + * TCP Yellow Profile. + * Index of the profile within {port, traffic class} to use. + * 0 for disabling both WRED and ECN for this type of traffic. + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, tcp_y, 56, 16, 2); + +/* reg_cwtpm_tcp_r + * TCP Red Profile. + * Index of the profile within {port, traffic class} to use. + * 0 for disabling both WRED and ECN for this type of traffic. + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, tcp_r, 56, 0, 2); + +/* reg_cwtpm_ntcp_g + * Non-TCP Green Profile. + * Index of the profile within {port, traffic class} to use. + * 0 for disabling both WRED and ECN for this type of traffic. + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, ntcp_g, 60, 0, 2); + +/* reg_cwtpm_ntcp_y + * Non-TCP Yellow Profile. + * Index of the profile within {port, traffic class} to use. + * 0 for disabling both WRED and ECN for this type of traffic. + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, ntcp_y, 64, 16, 2); + +/* reg_cwtpm_ntcp_r + * Non-TCP Red Profile. + * Index of the profile within {port, traffic class} to use. + * 0 for disabling both WRED and ECN for this type of traffic. + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, ntcp_r, 64, 0, 2); + +#define MLXSW_REG_CWTPM_RESET_PROFILE 0 + +static inline void mlxsw_reg_cwtpm_pack(char *payload, u8 local_port, + u8 traffic_class, u8 profile, + bool wred, bool ecn) +{ + MLXSW_REG_ZERO(cwtpm, payload); + mlxsw_reg_cwtpm_local_port_set(payload, local_port); + mlxsw_reg_cwtpm_traffic_class_set(payload, traffic_class); + mlxsw_reg_cwtpm_ew_set(payload, wred); + mlxsw_reg_cwtpm_ee_set(payload, ecn); + mlxsw_reg_cwtpm_tcp_g_set(payload, profile); + mlxsw_reg_cwtpm_tcp_y_set(payload, profile); + mlxsw_reg_cwtpm_tcp_r_set(payload, profile); + mlxsw_reg_cwtpm_ntcp_g_set(payload, profile); + mlxsw_reg_cwtpm_ntcp_y_set(payload, profile); + mlxsw_reg_cwtpm_ntcp_r_set(payload, profile); +} + /* PPBT - Policy-Engine Port Binding Table * --------------------------------------- * This register is used for configuration of the Port Binding Table. @@ -3156,8 +3341,10 @@ MLXSW_ITEM32(reg, ppcnt, pnat, 0x00, 14, 2); enum mlxsw_reg_ppcnt_grp { MLXSW_REG_PPCNT_IEEE_8023_CNT = 0x0, + MLXSW_REG_PPCNT_EXT_CNT = 0x5, MLXSW_REG_PPCNT_PRIO_CNT = 0x10, MLXSW_REG_PPCNT_TC_CNT = 0x11, + MLXSW_REG_PPCNT_TC_CONG_TC = 0x13, }; /* reg_ppcnt_grp @@ -3173,6 +3360,7 @@ enum mlxsw_reg_ppcnt_grp { * 0x10: Per Priority Counters * 0x11: Per Traffic Class Counters * 0x12: Physical Layer Counters + * 0x13: Per Traffic Class Congestion Counters * Access: Index */ MLXSW_ITEM32(reg, ppcnt, grp, 0x00, 0, 6); @@ -3311,6 +3499,14 @@ MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_received, MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_transmitted, MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x90, 0, 64); +/* Ethernet Extended Counter Group Counters */ + +/* reg_ppcnt_ecn_marked + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ecn_marked, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64); + /* Ethernet Per Priority Group Counters */ /* reg_ppcnt_rx_octets @@ -3386,6 +3582,14 @@ MLXSW_ITEM64(reg, ppcnt, tc_transmit_queue, MLXSW_ITEM64(reg, ppcnt, tc_no_buffer_discard_uc, MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64); +/* Ethernet Per Traffic Class Congestion Group Counters */ + +/* reg_ppcnt_wred_discard + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, wred_discard, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x00, 0, 64); + static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port, enum mlxsw_reg_ppcnt_grp grp, u8 prio_tc) @@ -7405,6 +7609,8 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(svpe), MLXSW_REG(sfmr), MLXSW_REG(spvmlr), + MLXSW_REG(cwtp), + MLXSW_REG(cwtpm), MLXSW_REG(ppbt), MLXSW_REG(pacl), MLXSW_REG(pagt), diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 63e50877796b..b2cd1ebf4e36 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1324,6 +1324,38 @@ out: return err; } +static void +mlxsw_sp_port_get_hw_xstats(struct net_device *dev, + struct mlxsw_sp_port_xstats *xstats) +{ + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + int err, i; + + err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_EXT_CNT, 0, + ppcnt_pl); + if (!err) + xstats->ecn = mlxsw_reg_ppcnt_ecn_marked_get(ppcnt_pl); + + for (i = 0; i < TC_MAX_QUEUE; i++) { + err = mlxsw_sp_port_get_stats_raw(dev, + MLXSW_REG_PPCNT_TC_CONG_TC, + i, ppcnt_pl); + if (!err) + xstats->wred_drop[i] = + mlxsw_reg_ppcnt_wred_discard_get(ppcnt_pl); + + err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_TC_CNT, + i, ppcnt_pl); + if (err) + continue; + + xstats->backlog[i] = + mlxsw_reg_ppcnt_tc_transmit_queue_get(ppcnt_pl); + xstats->tail_drop[i] = + mlxsw_reg_ppcnt_tc_no_buffer_discard_uc_get(ppcnt_pl); + } +} + static void update_stats_cache(struct work_struct *work) { struct mlxsw_sp_port *mlxsw_sp_port = @@ -1335,6 +1367,8 @@ static void update_stats_cache(struct work_struct *work) mlxsw_sp_port_get_hw_stats(mlxsw_sp_port->dev, &mlxsw_sp_port->periodic_hw_stats.stats); + mlxsw_sp_port_get_hw_xstats(mlxsw_sp_port->dev, + &mlxsw_sp_port->periodic_hw_stats.xstats); out: mlxsw_core_schedule_dw(&mlxsw_sp_port->periodic_hw_stats.update_dw, @@ -1797,6 +1831,8 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type, switch (type) { case TC_SETUP_BLOCK: return mlxsw_sp_setup_tc_block(mlxsw_sp_port, type_data); + case TC_SETUP_QDISC_RED: + return mlxsw_sp_setup_tc_red(mlxsw_sp_port, type_data); default: return -EOPNOTSUPP; } @@ -3007,6 +3043,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, if (IS_ERR(mlxsw_sp_port_vlan)) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create VID 1\n", mlxsw_sp_port->local_port); + err = PTR_ERR(mlxsw_sp_port_vlan); goto err_port_vlan_get; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 47dd7e06fd29..58cf222fb985 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -48,6 +48,7 @@ #include <linux/notifier.h> #include <net/psample.h> #include <net/pkt_cls.h> +#include <net/red.h> #include "port.h" #include "core.h" @@ -203,6 +204,37 @@ struct mlxsw_sp_port_vlan { struct list_head bridge_vlan_node; }; +enum mlxsw_sp_qdisc_type { + MLXSW_SP_QDISC_NO_QDISC, + MLXSW_SP_QDISC_RED, +}; + +struct mlxsw_sp_qdisc { + u32 handle; + enum mlxsw_sp_qdisc_type type; + struct red_stats xstats_base; + union { + struct { + u64 tail_drop_base; + u64 ecn_base; + u64 wred_drop_base; + } red; + } xstats; + + u64 tx_bytes; + u64 tx_packets; + u64 drops; + u64 overlimits; +}; + +/* No need an internal lock; At worse - miss a single periodic iteration */ +struct mlxsw_sp_port_xstats { + u64 ecn; + u64 wred_drop[TC_MAX_QUEUE]; + u64 tail_drop[TC_MAX_QUEUE]; + u64 backlog[TC_MAX_QUEUE]; +}; + struct mlxsw_sp_port { struct net_device *dev; struct mlxsw_sp_port_pcpu_stats __percpu *pcpu_stats; @@ -232,10 +264,12 @@ struct mlxsw_sp_port { struct { #define MLXSW_HW_STATS_UPDATE_TIME HZ struct rtnl_link_stats64 stats; + struct mlxsw_sp_port_xstats xstats; struct delayed_work update_dw; } periodic_hw_stats; struct mlxsw_sp_port_sample *sample; struct list_head vlans_list; + struct mlxsw_sp_qdisc root_qdisc; }; static inline bool @@ -546,6 +580,10 @@ void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, int mlxsw_sp_flower_stats(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, struct tc_cls_flower_offload *f); +/* spectrum_qdisc.c */ +int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_red_qopt_offload *p); + /* spectrum_fid.c */ int mlxsw_sp_fid_flood_set(struct mlxsw_sp_fid *fid, enum mlxsw_sp_flood_type packet_type, u8 local_port, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c new file mode 100644 index 000000000000..c33beac5def0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -0,0 +1,276 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Nogah Frankel <nogahf@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/netdevice.h> +#include <net/pkt_cls.h> +#include <net/red.h> + +#include "spectrum.h" +#include "reg.h" + +static int +mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port, + int tclass_num, u32 min, u32 max, + u32 probability, bool is_ecn) +{ + char cwtp_cmd[max_t(u8, MLXSW_REG_CWTP_LEN, MLXSW_REG_CWTPM_LEN)]; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + int err; + + mlxsw_reg_cwtp_pack(cwtp_cmd, mlxsw_sp_port->local_port, tclass_num); + mlxsw_reg_cwtp_profile_pack(cwtp_cmd, MLXSW_REG_CWTP_DEFAULT_PROFILE, + roundup(min, MLXSW_REG_CWTP_MIN_VALUE), + roundup(max, MLXSW_REG_CWTP_MIN_VALUE), + probability); + + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtp), cwtp_cmd); + if (err) + return err; + + mlxsw_reg_cwtpm_pack(cwtp_cmd, mlxsw_sp_port->local_port, tclass_num, + MLXSW_REG_CWTP_DEFAULT_PROFILE, true, is_ecn); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtp_cmd); +} + +static int +mlxsw_sp_tclass_congestion_disable(struct mlxsw_sp_port *mlxsw_sp_port, + int tclass_num) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char cwtpm_cmd[MLXSW_REG_CWTPM_LEN]; + + mlxsw_reg_cwtpm_pack(cwtpm_cmd, mlxsw_sp_port->local_port, tclass_num, + MLXSW_REG_CWTPM_RESET_PROFILE, false, false); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtpm_cmd); +} + +static void +mlxsw_sp_setup_tc_qdisc_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + int tclass_num) +{ + struct red_stats *xstats_base = &mlxsw_sp_qdisc->xstats_base; + struct mlxsw_sp_port_xstats *xstats; + struct rtnl_link_stats64 *stats; + + xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; + stats = &mlxsw_sp_port->periodic_hw_stats.stats; + + mlxsw_sp_qdisc->tx_packets = stats->tx_packets; + mlxsw_sp_qdisc->tx_bytes = stats->tx_bytes; + + switch (mlxsw_sp_qdisc->type) { + case MLXSW_SP_QDISC_RED: + xstats_base->prob_mark = xstats->ecn; + xstats_base->prob_drop = xstats->wred_drop[tclass_num]; + xstats_base->pdrop = xstats->tail_drop[tclass_num]; + + mlxsw_sp_qdisc->overlimits = xstats_base->prob_drop + + xstats_base->prob_mark; + mlxsw_sp_qdisc->drops = xstats_base->prob_drop + + xstats_base->pdrop; + break; + default: + break; + } +} + +static int +mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + int tclass_num) +{ + int err; + + if (mlxsw_sp_qdisc->handle != handle) + return 0; + + err = mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port, tclass_num); + mlxsw_sp_qdisc->handle = TC_H_UNSPEC; + mlxsw_sp_qdisc->type = MLXSW_SP_QDISC_NO_QDISC; + + return err; +} + +static int +mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + int tclass_num, + struct tc_red_qopt_offload_params *p) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u32 min, max; + u64 prob; + int err = 0; + + if (p->min > p->max) { + dev_err(mlxsw_sp->bus_info->dev, + "spectrum: RED: min %u is bigger then max %u\n", p->min, + p->max); + goto err_bad_param; + } + if (p->max > MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE)) { + dev_err(mlxsw_sp->bus_info->dev, + "spectrum: RED: max value %u is too big\n", p->max); + goto err_bad_param; + } + if (p->min == 0 || p->max == 0) { + dev_err(mlxsw_sp->bus_info->dev, + "spectrum: RED: 0 value is illegal for min and max\n"); + goto err_bad_param; + } + + /* calculate probability in percentage */ + prob = p->probability; + prob *= 100; + prob = DIV_ROUND_UP(prob, 1 << 16); + prob = DIV_ROUND_UP(prob, 1 << 16); + min = mlxsw_sp_bytes_cells(mlxsw_sp, p->min); + max = mlxsw_sp_bytes_cells(mlxsw_sp, p->max); + err = mlxsw_sp_tclass_congestion_enable(mlxsw_sp_port, tclass_num, min, + max, prob, p->is_ecn); + if (err) + goto err_config; + + mlxsw_sp_qdisc->type = MLXSW_SP_QDISC_RED; + if (mlxsw_sp_qdisc->handle != handle) + mlxsw_sp_setup_tc_qdisc_clean_stats(mlxsw_sp_port, + mlxsw_sp_qdisc, + tclass_num); + + mlxsw_sp_qdisc->handle = handle; + return 0; + +err_bad_param: + err = -EINVAL; +err_config: + mlxsw_sp_qdisc_red_destroy(mlxsw_sp_port, mlxsw_sp_qdisc->handle, + mlxsw_sp_qdisc, tclass_num); + return err; +} + +static int +mlxsw_sp_qdisc_get_red_xstats(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + int tclass_num, struct red_stats *res) +{ + struct red_stats *xstats_base = &mlxsw_sp_qdisc->xstats_base; + struct mlxsw_sp_port_xstats *xstats; + + if (mlxsw_sp_qdisc->handle != handle || + mlxsw_sp_qdisc->type != MLXSW_SP_QDISC_RED) + return -EOPNOTSUPP; + + xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; + + res->prob_drop = xstats->wred_drop[tclass_num] - xstats_base->prob_drop; + res->prob_mark = xstats->ecn - xstats_base->prob_mark; + res->pdrop = xstats->tail_drop[tclass_num] - xstats_base->pdrop; + return 0; +} + +static int +mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + int tclass_num, + struct tc_red_qopt_offload_stats *res) +{ + u64 tx_bytes, tx_packets, overlimits, drops; + struct mlxsw_sp_port_xstats *xstats; + struct rtnl_link_stats64 *stats; + + if (mlxsw_sp_qdisc->handle != handle || + mlxsw_sp_qdisc->type != MLXSW_SP_QDISC_RED) + return -EOPNOTSUPP; + + xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; + stats = &mlxsw_sp_port->periodic_hw_stats.stats; + + tx_bytes = stats->tx_bytes - mlxsw_sp_qdisc->tx_bytes; + tx_packets = stats->tx_packets - mlxsw_sp_qdisc->tx_packets; + overlimits = xstats->wred_drop[tclass_num] + xstats->ecn - + mlxsw_sp_qdisc->overlimits; + drops = xstats->wred_drop[tclass_num] + xstats->tail_drop[tclass_num] - + mlxsw_sp_qdisc->drops; + + _bstats_update(res->bstats, tx_bytes, tx_packets); + res->qstats->overlimits += overlimits; + res->qstats->drops += drops; + res->qstats->backlog += mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, + xstats->backlog[tclass_num]); + + mlxsw_sp_qdisc->drops += drops; + mlxsw_sp_qdisc->overlimits += overlimits; + mlxsw_sp_qdisc->tx_bytes += tx_bytes; + mlxsw_sp_qdisc->tx_packets += tx_packets; + return 0; +} + +#define MLXSW_SP_PORT_DEFAULT_TCLASS 0 + +int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_red_qopt_offload *p) +{ + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; + int tclass_num; + + if (p->parent != TC_H_ROOT) + return -EOPNOTSUPP; + + mlxsw_sp_qdisc = &mlxsw_sp_port->root_qdisc; + tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS; + + switch (p->command) { + case TC_RED_REPLACE: + return mlxsw_sp_qdisc_red_replace(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, tclass_num, + &p->set); + case TC_RED_DESTROY: + return mlxsw_sp_qdisc_red_destroy(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, tclass_num); + case TC_RED_XSTATS: + return mlxsw_sp_qdisc_get_red_xstats(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, tclass_num, + p->xstats); + case TC_RED_STATS: + return mlxsw_sp_qdisc_get_red_stats(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, tclass_num, + &p->stats); + default: + return -EOPNOTSUPP; + } +} diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 5d5b9855e24e..426c9a946eb4 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -1122,7 +1122,6 @@ static void vxge_set_multicast(struct net_device *dev) struct netdev_hw_addr *ha; struct vxgedev *vdev; int i, mcast_cnt = 0; - struct __vxge_hw_device *hldev; struct vxge_vpath *vpath; enum vxge_hw_status status = VXGE_HW_OK; struct macInfo mac_info; @@ -1136,7 +1135,6 @@ static void vxge_set_multicast(struct net_device *dev) "%s:%d", __func__, __LINE__); vdev = netdev_priv(dev); - hldev = vdev->devh; if (unlikely(!is_vxge_card_up(vdev))) return; @@ -1283,7 +1281,6 @@ static int vxge_set_mac_addr(struct net_device *dev, void *p) { struct sockaddr *addr = p; struct vxgedev *vdev; - struct __vxge_hw_device *hldev; enum vxge_hw_status status = VXGE_HW_OK; struct macInfo mac_info_new, mac_info_old; int vpath_idx = 0; @@ -1291,7 +1288,6 @@ static int vxge_set_mac_addr(struct net_device *dev, void *p) vxge_debug_entryexit(VXGE_TRACE, "%s:%d", __func__, __LINE__); vdev = netdev_priv(dev); - hldev = vdev->devh; if (!is_valid_ether_addr(addr->sa_data)) return -EINVAL; @@ -2177,7 +2173,6 @@ static void adaptive_coalesce_rx_interrupts(struct vxge_ring *ring) */ static irqreturn_t vxge_isr_napi(int irq, void *dev_id) { - struct net_device *dev; struct __vxge_hw_device *hldev; u64 reason; enum vxge_hw_status status; @@ -2185,7 +2180,6 @@ static irqreturn_t vxge_isr_napi(int irq, void *dev_id) vxge_debug_intr(VXGE_TRACE, "%s:%d", __func__, __LINE__); - dev = vdev->ndev; hldev = pci_get_drvdata(vdev->pdev); if (pci_channel_offline(vdev->pdev)) @@ -2713,14 +2707,13 @@ static int vxge_open(struct net_device *dev) struct vxge_vpath *vpath; int ret = 0; int i; - u64 val64, function_mode; + u64 val64; vxge_debug_entryexit(VXGE_TRACE, "%s: %s:%d", dev->name, __func__, __LINE__); vdev = netdev_priv(dev); hldev = pci_get_drvdata(vdev->pdev); - function_mode = vdev->config.device_hw_info.function_mode; /* make sure you have link off by default every time Nic is * initialized */ diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index de64cedf8b26..c1c595f8bb87 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -58,7 +58,6 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan, const struct tc_action *action) { size_t act_size = sizeof(struct nfp_fl_push_vlan); - struct tcf_vlan *vlan = to_vlan(action); u16 tmp_push_vlan_tci; push_vlan->head.jump_id = NFP_FL_ACTION_OPCODE_PUSH_VLAN; @@ -67,8 +66,8 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan, push_vlan->vlan_tpid = tcf_vlan_push_proto(action); tmp_push_vlan_tci = - FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, vlan->tcfv_push_prio) | - FIELD_PREP(NFP_FL_PUSH_VLAN_VID, vlan->tcfv_push_vid) | + FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, tcf_vlan_push_prio(action)) | + FIELD_PREP(NFP_FL_PUSH_VLAN_VID, tcf_vlan_push_vid(action)) | NFP_FL_PUSH_VLAN_CFI; push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci); } diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_mpi.c b/drivers/net/ethernet/qlogic/qlge/qlge_mpi.c index 384c8bc874f3..4be65d6761b3 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_mpi.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_mpi.c @@ -213,7 +213,6 @@ static int ql_idc_req_aen(struct ql_adapter *qdev) /* Get the status data and start up a thread to * handle the request. */ - mbcp = &qdev->idc_mbc; mbcp->out_count = 4; status = ql_get_mb_sts(qdev, mbcp); if (status) { diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 19a91881fbf9..46d60013564c 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -5734,7 +5734,7 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) * MCFW do not support VFs. */ rc = efx_ef10_vport_set_mac_address(efx); - } else { + } else if (rc) { efx_mcdi_display_error(efx, MC_CMD_VADAPTOR_SET_MAC, sizeof(inbuf), NULL, 0, rc); } diff --git a/drivers/net/ethernet/sfc/falcon/tx.c b/drivers/net/ethernet/sfc/falcon/tx.c index 6a75f4140a4b..1b978d69e702 100644 --- a/drivers/net/ethernet/sfc/falcon/tx.c +++ b/drivers/net/ethernet/sfc/falcon/tx.c @@ -435,7 +435,7 @@ int ef4_setup_tc(struct net_device *net_dev, enum tc_setup_type type, unsigned tc, num_tc; int rc; - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; num_tc = mqprio->num_tc; diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index 60cdb97f58e2..4f54245df0ec 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -648,11 +648,9 @@ static void efx_ptp_send_times(struct efx_nic *efx, struct pps_event_time now; struct timespec64 limit; struct efx_ptp_data *ptp = efx->ptp_data; - struct timespec64 start; int *mc_running = ptp->start.addr; pps_get_ts(&now); - start = now.ts_real; limit = now.ts_real; timespec64_add_ns(&limit, SYNCHRONISE_PERIOD_NS); diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 32bf1fecf864..ea27b8a7f465 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -663,7 +663,7 @@ int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type, unsigned tc, num_tc; int rc; - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; num_tc = mqprio->num_tc; diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 437d36289786..15e2e3031d36 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1887,7 +1887,7 @@ static int netcp_setup_tc(struct net_device *dev, enum tc_setup_type type, /* setup tc must be called under rtnl lock */ ASSERT_RTNL(); - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 5bb6a20072dd..bfc79698b8f4 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -100,12 +100,11 @@ static void free_netvsc_device_rcu(struct netvsc_device *nvdev) call_rcu(&nvdev->rcu, free_netvsc_device); } -static void netvsc_destroy_buf(struct hv_device *device) +static void netvsc_revoke_buf(struct hv_device *device, + struct netvsc_device *net_device) { struct nvsp_message *revoke_packet; struct net_device *ndev = hv_get_drvdata(device); - struct net_device_context *ndc = netdev_priv(ndev); - struct netvsc_device *net_device = rtnl_dereference(ndc->nvdev); int ret; /* @@ -148,28 +147,6 @@ static void netvsc_destroy_buf(struct hv_device *device) net_device->recv_section_cnt = 0; } - /* Teardown the gpadl on the vsp end */ - if (net_device->recv_buf_gpadl_handle) { - ret = vmbus_teardown_gpadl(device->channel, - net_device->recv_buf_gpadl_handle); - - /* If we failed here, we might as well return and have a leak - * rather than continue and a bugchk - */ - if (ret != 0) { - netdev_err(ndev, - "unable to teardown receive buffer's gpadl\n"); - return; - } - net_device->recv_buf_gpadl_handle = 0; - } - - if (net_device->recv_buf) { - /* Free up the receive buffer */ - vfree(net_device->recv_buf); - net_device->recv_buf = NULL; - } - /* Deal with the send buffer we may have setup. * If we got a send section size, it means we received a * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent @@ -210,7 +187,35 @@ static void netvsc_destroy_buf(struct hv_device *device) } net_device->send_section_cnt = 0; } - /* Teardown the gpadl on the vsp end */ +} + +static void netvsc_teardown_gpadl(struct hv_device *device, + struct netvsc_device *net_device) +{ + struct net_device *ndev = hv_get_drvdata(device); + int ret; + + if (net_device->recv_buf_gpadl_handle) { + ret = vmbus_teardown_gpadl(device->channel, + net_device->recv_buf_gpadl_handle); + + /* If we failed here, we might as well return and have a leak + * rather than continue and a bugchk + */ + if (ret != 0) { + netdev_err(ndev, + "unable to teardown receive buffer's gpadl\n"); + return; + } + net_device->recv_buf_gpadl_handle = 0; + } + + if (net_device->recv_buf) { + /* Free up the receive buffer */ + vfree(net_device->recv_buf); + net_device->recv_buf = NULL; + } + if (net_device->send_buf_gpadl_handle) { ret = vmbus_teardown_gpadl(device->channel, net_device->send_buf_gpadl_handle); @@ -420,7 +425,8 @@ static int netvsc_init_buf(struct hv_device *device, goto exit; cleanup: - netvsc_destroy_buf(device); + netvsc_revoke_buf(device, net_device); + netvsc_teardown_gpadl(device, net_device); exit: return ret; @@ -539,11 +545,6 @@ cleanup: return ret; } -static void netvsc_disconnect_vsp(struct hv_device *device) -{ - netvsc_destroy_buf(device); -} - /* * netvsc_device_remove - Callback when the root bus device is removed */ @@ -557,7 +558,7 @@ void netvsc_device_remove(struct hv_device *device) cancel_work_sync(&net_device->subchan_work); - netvsc_disconnect_vsp(device); + netvsc_revoke_buf(device, net_device); RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); @@ -570,6 +571,8 @@ void netvsc_device_remove(struct hv_device *device) /* Now, we can close the channel safely */ vmbus_close(device->channel); + netvsc_teardown_gpadl(device, net_device); + /* And dissassociate NAPI context from device */ for (i = 0; i < net_device->num_chn; i++) netif_napi_del(&net_device->chan_table[i].napi); diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 0648eebda829..8b1242b8d8ef 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -407,13 +407,13 @@ int rndis_filter_receive(struct net_device *ndev, /* Make sure the rndis device state is initialized */ if (unlikely(!rndis_dev)) { - netif_err(net_device_ctx, rx_err, ndev, + netif_dbg(net_device_ctx, rx_err, ndev, "got rndis message but no rndis device!\n"); return NVSP_STAT_FAIL; } if (unlikely(rndis_dev->state == RNDIS_DEV_UNINITIALIZED)) { - netif_err(net_device_ctx, rx_err, ndev, + netif_dbg(net_device_ctx, rx_err, ndev, "got rndis message uninitialized\n"); return NVSP_STAT_FAIL; } diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 034ae4c57196..f2a7e929316e 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -409,7 +409,7 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb) struct dst_entry *dst; int err, ret = NET_XMIT_DROP; struct flowi6 fl6 = { - .flowi6_iif = dev->ifindex, + .flowi6_oif = dev->ifindex, .daddr = ip6h->daddr, .saddr = ip6h->saddr, .flowi6_flags = FLOWI_FLAG_ANYSRC, diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 8125412c8814..bdfbabb86ee0 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -191,11 +191,14 @@ config LED_TRIGGER_PHY Adds support for a set of LED trigger events per-PHY. Link state change will trigger the events, for consumption by an LED class driver. There are triggers for each link speed currently - supported by the phy, and are of the form: + supported by the PHY and also a one common "link" trigger as a + logical-or of all the link speed ones. + All these triggers are named according to the following pattern: <mii bus id>:<phy>:<speed> Where speed is in the form: - <Speed in megabits>Mbps or <Speed in gigabits>Gbps + <Speed in megabits>Mbps OR <Speed in gigabits>Gbps OR link + for any speed known to the PHY. comment "MII PHY device drivers" diff --git a/drivers/net/phy/phy_led_triggers.c b/drivers/net/phy/phy_led_triggers.c index 94ca42e630bb..39ecad25b201 100644 --- a/drivers/net/phy/phy_led_triggers.c +++ b/drivers/net/phy/phy_led_triggers.c @@ -27,12 +27,21 @@ static struct phy_led_trigger *phy_speed_to_led_trigger(struct phy_device *phy, return NULL; } +static void phy_led_trigger_no_link(struct phy_device *phy) +{ + if (phy->last_triggered) { + led_trigger_event(&phy->last_triggered->trigger, LED_OFF); + led_trigger_event(&phy->led_link_trigger->trigger, LED_OFF); + phy->last_triggered = NULL; + } +} + void phy_led_trigger_change_speed(struct phy_device *phy) { struct phy_led_trigger *plt; if (!phy->link) - goto out_change_speed; + return phy_led_trigger_no_link(phy); if (phy->speed == 0) return; @@ -42,25 +51,28 @@ void phy_led_trigger_change_speed(struct phy_device *phy) netdev_alert(phy->attached_dev, "No phy led trigger registered for speed(%d)\n", phy->speed); - goto out_change_speed; + return phy_led_trigger_no_link(phy); } if (plt != phy->last_triggered) { + if (!phy->last_triggered) + led_trigger_event(&phy->led_link_trigger->trigger, + LED_FULL); + led_trigger_event(&phy->last_triggered->trigger, LED_OFF); led_trigger_event(&plt->trigger, LED_FULL); phy->last_triggered = plt; } - return; - -out_change_speed: - if (phy->last_triggered) { - led_trigger_event(&phy->last_triggered->trigger, - LED_OFF); - phy->last_triggered = NULL; - } } EXPORT_SYMBOL_GPL(phy_led_trigger_change_speed); +static void phy_led_trigger_format_name(struct phy_device *phy, char *buf, + size_t size, char *suffix) +{ + snprintf(buf, size, PHY_ID_FMT ":%s", + phy->mdio.bus->id, phy->mdio.addr, suffix); +} + static int phy_led_trigger_register(struct phy_device *phy, struct phy_led_trigger *plt, unsigned int speed) @@ -77,8 +89,8 @@ static int phy_led_trigger_register(struct phy_device *phy, snprintf(name_suffix, sizeof(name_suffix), "%dGbps", DIV_ROUND_CLOSEST(speed, 1000)); - snprintf(plt->name, sizeof(plt->name), PHY_ID_FMT ":%s", - phy->mdio.bus->id, phy->mdio.addr, name_suffix); + phy_led_trigger_format_name(phy, plt->name, sizeof(plt->name), + name_suffix); plt->trigger.name = plt->name; return led_trigger_register(&plt->trigger); @@ -99,13 +111,30 @@ int phy_led_triggers_register(struct phy_device *phy) if (!phy->phy_num_led_triggers) return 0; + phy->led_link_trigger = devm_kzalloc(&phy->mdio.dev, + sizeof(*phy->led_link_trigger), + GFP_KERNEL); + if (!phy->led_link_trigger) { + err = -ENOMEM; + goto out_clear; + } + + phy_led_trigger_format_name(phy, phy->led_link_trigger->name, + sizeof(phy->led_link_trigger->name), + "link"); + phy->led_link_trigger->trigger.name = phy->led_link_trigger->name; + + err = led_trigger_register(&phy->led_link_trigger->trigger); + if (err) + goto out_free_link; + phy->phy_led_triggers = devm_kzalloc(&phy->mdio.dev, sizeof(struct phy_led_trigger) * phy->phy_num_led_triggers, GFP_KERNEL); if (!phy->phy_led_triggers) { err = -ENOMEM; - goto out_clear; + goto out_unreg_link; } for (i = 0; i < phy->phy_num_led_triggers; i++) { @@ -123,6 +152,11 @@ out_unreg: while (i--) phy_led_trigger_unregister(&phy->phy_led_triggers[i]); devm_kfree(&phy->mdio.dev, phy->phy_led_triggers); +out_unreg_link: + phy_led_trigger_unregister(phy->led_link_trigger); +out_free_link: + devm_kfree(&phy->mdio.dev, phy->led_link_trigger); + phy->led_link_trigger = NULL; out_clear: phy->phy_num_led_triggers = 0; return err; @@ -135,5 +169,8 @@ void phy_led_triggers_unregister(struct phy_device *phy) for (i = 0; i < phy->phy_num_led_triggers; i++) phy_led_trigger_unregister(&phy->phy_led_triggers[i]); + + if (phy->led_link_trigger) + phy_led_trigger_unregister(phy->led_link_trigger); } EXPORT_SYMBOL_GPL(phy_led_triggers_unregister); diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c index 435854688a7a..228d4aa6d9ae 100644 --- a/drivers/net/thunderbolt.c +++ b/drivers/net/thunderbolt.c @@ -536,6 +536,7 @@ static void tbnet_tx_callback(struct tb_ring *ring, struct ring_frame *frame, dma_unmap_page(dma_dev, tf->frame.buffer_phy, tbnet_frame_size(tf), DMA_TO_DEVICE); + tf->frame.buffer_phy = 0; /* Return buffer to the ring */ net->tx_ring.prod++; diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index b2ff88e69a81..3d4f7959dabb 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -626,7 +626,7 @@ static int asix_suspend(struct usb_interface *intf, pm_message_t message) struct usbnet *dev = usb_get_intfdata(intf); struct asix_common_private *priv = dev->driver_priv; - if (priv->suspend) + if (priv && priv->suspend) priv->suspend(dev); return usbnet_suspend(intf, message); @@ -678,7 +678,7 @@ static int asix_resume(struct usb_interface *intf) struct usbnet *dev = usb_get_intfdata(intf); struct asix_common_private *priv = dev->driver_priv; - if (priv->resume) + if (priv && priv->resume) priv->resume(dev); return usbnet_resume(intf); diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 3e7a3ac3a362..05dca3e5c93d 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -230,7 +230,7 @@ skip: goto bad_desc; } - if (header.usb_cdc_ether_desc) { + if (header.usb_cdc_ether_desc && info->ether->wMaxSegmentSize) { dev->hard_mtu = le16_to_cpu(info->ether->wMaxSegmentSize); /* because of Zaurus, we may be ignoring the host * side link address we were given. diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index db7279d5b250..720a3a248070 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -499,6 +499,7 @@ static int qmi_wwan_rx_fixup(struct usbnet *dev, struct sk_buff *skb) return 1; } if (rawip) { + skb_reset_mac_header(skb); skb->dev = dev->net; /* normally set by eth_type_trans */ skb->protocol = proto; return 1; @@ -681,7 +682,7 @@ static int qmi_wwan_bind(struct usbnet *dev, struct usb_interface *intf) } /* errors aren't fatal - we can live with the dynamic address */ - if (cdc_ether) { + if (cdc_ether && cdc_ether->wMaxSegmentSize) { dev->hard_mtu = le16_to_cpu(cdc_ether->wMaxSegmentSize); usbnet_get_ethernet_addr(dev, cdc_ether->iMACAddress); } diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 40ee80c03c94..74c06a5f586f 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -324,6 +324,7 @@ static netdev_tx_t x25_asy_xmit(struct sk_buff *skb, if (err != LAPB_OK) netdev_err(dev, "lapb_disconnect_request error: %d\n", err); + /* fall through */ default: kfree_skb(skb); return NETDEV_TX_OK; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index ad3ea24f0885..bcc1694cebcd 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2685,7 +2685,6 @@ scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state) } sdev->sdev_state = state; - sysfs_notify(&sdev->sdev_gendev.kobj, NULL, "state"); return 0; illegal: @@ -3109,7 +3108,6 @@ int scsi_internal_device_unblock_nowait(struct scsi_device *sdev, case SDEV_BLOCK: case SDEV_TRANSPORT_OFFLINE: sdev->sdev_state = new_state; - sysfs_notify(&sdev->sdev_gendev.kobj, NULL, "state"); break; case SDEV_CREATED_BLOCK: if (new_state == SDEV_TRANSPORT_OFFLINE || @@ -3117,7 +3115,6 @@ int scsi_internal_device_unblock_nowait(struct scsi_device *sdev, sdev->sdev_state = new_state; else sdev->sdev_state = SDEV_CREATED; - sysfs_notify(&sdev->sdev_gendev.kobj, NULL, "state"); break; case SDEV_CANCEL: case SDEV_OFFLINE: diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index 4f6f01cf9968..36f6190931bc 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -556,11 +556,8 @@ int srp_reconnect_rport(struct srp_rport *rport) */ shost_for_each_device(sdev, shost) { mutex_lock(&sdev->state_mutex); - if (sdev->sdev_state == SDEV_OFFLINE) { + if (sdev->sdev_state == SDEV_OFFLINE) sdev->sdev_state = SDEV_RUNNING; - sysfs_notify(&sdev->sdev_gendev.kobj, - NULL, "state"); - } mutex_unlock(&sdev->state_mutex); } } else if (rport->state == SRP_RPORT_RUNNING) { diff --git a/include/linux/compiler.h b/include/linux/compiler.h index fd8697aa4f73..202710420d6d 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -191,13 +191,13 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, asm("%c0:\n\t" \ ".pushsection .discard.reachable\n\t" \ ".long %c0b - .\n\t" \ - ".popsection\n\t" : : "i" (__LINE__)); \ + ".popsection\n\t" : : "i" (__COUNTER__)); \ }) #define annotate_unreachable() ({ \ asm("%c0:\n\t" \ ".pushsection .discard.unreachable\n\t" \ ".long %c0b - .\n\t" \ - ".popsection\n\t" : : "i" (__LINE__)); \ + ".popsection\n\t" : : "i" (__COUNTER__)); \ }) #define ASM_UNREACHABLE \ "999:\n\t" \ diff --git a/include/linux/dsa/lan9303.h b/include/linux/dsa/lan9303.h index 05d8d136baab..f48a85c377de 100644 --- a/include/linux/dsa/lan9303.h +++ b/include/linux/dsa/lan9303.h @@ -13,8 +13,8 @@ struct lan9303_phy_ops { #define LAN9303_NUM_ALR_RECORDS 512 struct lan9303_alr_cache_entry { u8 mac_addr[ETH_ALEN]; - u8 port_map; /* Bitmap of ports. Zero if unused entry */ - u8 stp_override; /* non zero if set ALR_DAT1_AGE_OVERRID */ + u8 port_map; /* Bitmap of ports. Zero if unused entry */ + u8 stp_override; /* non zero if set LAN9303_ALR_DAT1_AGE_OVERRID */ }; struct lan9303 { @@ -28,7 +28,9 @@ struct lan9303 { struct mutex indirect_mutex; /* protect indexed register access */ const struct lan9303_phy_ops *ops; bool is_bridged; /* true if port 1 and 2 are bridged */ - u32 swe_port_state; /* remember SWE_PORT_STATE while not bridged */ + + /* remember LAN9303_SWE_PORT_STATE while not bridged */ + u32 swe_port_state; /* LAN9303 do not offer reading specific ALR entry. Cache all * static entries in a flat table **/ diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ea04ca024f0d..cb18c6290ca8 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -73,6 +73,7 @@ struct ipv6_devconf { __u32 enhanced_dad; __u32 addr_gen_mode; __s32 disable_policy; + __s32 ndisc_tclass; struct ctl_table_header *sysctl_header; }; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 66d19b611fe4..62af7512dabb 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -221,6 +221,7 @@ enum { }; enum { + MLX5_ETH_WQE_SVLAN = 1 << 0, MLX5_ETH_WQE_INSERT_VLAN = 1 << 15, }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fda527ccb263..6b274bfe489f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -770,13 +770,14 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, struct sk_buff *skb); enum tc_setup_type { - TC_SETUP_MQPRIO, + TC_SETUP_QDISC_MQPRIO, TC_SETUP_CLSU32, TC_SETUP_CLSFLOWER, TC_SETUP_CLSMATCHALL, TC_SETUP_CLSBPF, TC_SETUP_BLOCK, - TC_SETUP_CBS, + TC_SETUP_QDISC_CBS, + TC_SETUP_QDISC_RED, }; /* These structures hold the attributes of bpf state that are being passed @@ -4335,6 +4336,31 @@ void netdev_notice(const struct net_device *dev, const char *format, ...); __printf(2, 3) void netdev_info(const struct net_device *dev, const char *format, ...); +#define netdev_level_once(level, dev, fmt, ...) \ +do { \ + static bool __print_once __read_mostly; \ + \ + if (!__print_once) { \ + __print_once = true; \ + netdev_printk(level, dev, fmt, ##__VA_ARGS__); \ + } \ +} while (0) + +#define netdev_emerg_once(dev, fmt, ...) \ + netdev_level_once(KERN_EMERG, dev, fmt, ##__VA_ARGS__) +#define netdev_alert_once(dev, fmt, ...) \ + netdev_level_once(KERN_ALERT, dev, fmt, ##__VA_ARGS__) +#define netdev_crit_once(dev, fmt, ...) \ + netdev_level_once(KERN_CRIT, dev, fmt, ##__VA_ARGS__) +#define netdev_err_once(dev, fmt, ...) \ + netdev_level_once(KERN_ERR, dev, fmt, ##__VA_ARGS__) +#define netdev_warn_once(dev, fmt, ...) \ + netdev_level_once(KERN_WARNING, dev, fmt, ##__VA_ARGS__) +#define netdev_notice_once(dev, fmt, ...) \ + netdev_level_once(KERN_NOTICE, dev, fmt, ##__VA_ARGS__) +#define netdev_info_once(dev, fmt, ...) \ + netdev_level_once(KERN_INFO, dev, fmt, ##__VA_ARGS__) + #define MODULE_ALIAS_NETDEV(device) \ MODULE_ALIAS("netdev-" device) @@ -4375,6 +4401,10 @@ do { \ WARN(1, "netdevice: %s%s\n" format, netdev_name(dev), \ netdev_reg_state(dev), ##args) +#define netdev_WARN_ONCE(dev, condition, format, arg...) \ + WARN_ONCE(1, "netdevice: %s%s\n" format, netdev_name(dev) \ + netdev_reg_state(dev), ##args) + /* netif printk helpers, similar to netdev_printk */ #define netif_printk(priv, type, level, dev, fmt, args...) \ @@ -4449,15 +4479,7 @@ do { \ * Why 16. Because with 16 the only overlap we get on a hash of the * low nibble of the protocol value is RARP/SNAP/X.25. * - * NOTE: That is no longer true with the addition of VLAN tags. Not - * sure which should go first, but I bet it won't make much - * difference if we are running VLANs. The good news is that - * this protocol won't be in the list unless compiled in, so - * the average user (w/out VLANs) will not be adversely affected. - * --BLG - * * 0800 IP - * 8100 802.1Q VLAN * 0001 802.3 * 0002 AX.25 * 0004 802.2 diff --git a/include/linux/of.h b/include/linux/of.h index b240ed69dc96..b32d418d011a 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -675,12 +675,6 @@ static inline int of_property_count_elems_of_size(const struct device_node *np, return -ENOSYS; } -static inline int of_property_read_u32_index(const struct device_node *np, - const char *propname, u32 index, u32 *out_value) -{ - return -ENOSYS; -} - static inline int of_property_read_u8_array(const struct device_node *np, const char *propname, u8 *out_values, size_t sz) { @@ -707,16 +701,14 @@ static inline int of_property_read_u64_array(const struct device_node *np, return -ENOSYS; } -static inline int of_property_read_string(const struct device_node *np, - const char *propname, - const char **out_string) +static inline int of_property_read_u32_index(const struct device_node *np, + const char *propname, u32 index, u32 *out_value) { return -ENOSYS; } -static inline int of_property_read_string_helper(const struct device_node *np, - const char *propname, - const char **out_strs, size_t sz, int index) +static inline int of_property_read_u64_index(const struct device_node *np, + const char *propname, u32 index, u64 *out_value) { return -ENOSYS; } @@ -744,12 +736,51 @@ static inline int of_n_size_cells(struct device_node *np) return 0; } +static inline int of_property_read_variable_u8_array(const struct device_node *np, + const char *propname, u8 *out_values, + size_t sz_min, size_t sz_max) +{ + return -ENOSYS; +} + +static inline int of_property_read_variable_u16_array(const struct device_node *np, + const char *propname, u16 *out_values, + size_t sz_min, size_t sz_max) +{ + return -ENOSYS; +} + +static inline int of_property_read_variable_u32_array(const struct device_node *np, + const char *propname, + u32 *out_values, + size_t sz_min, + size_t sz_max) +{ + return -ENOSYS; +} + static inline int of_property_read_u64(const struct device_node *np, const char *propname, u64 *out_value) { return -ENOSYS; } +static inline int of_property_read_variable_u64_array(const struct device_node *np, + const char *propname, + u64 *out_values, + size_t sz_min, + size_t sz_max) +{ + return -ENOSYS; +} + +static inline int of_property_read_string(const struct device_node *np, + const char *propname, + const char **out_string) +{ + return -ENOSYS; +} + static inline int of_property_match_string(const struct device_node *np, const char *propname, const char *string) @@ -757,6 +788,13 @@ static inline int of_property_match_string(const struct device_node *np, return -ENOSYS; } +static inline int of_property_read_string_helper(const struct device_node *np, + const char *propname, + const char **out_strs, size_t sz, int index) +{ + return -ENOSYS; +} + static inline struct device_node *of_parse_phandle(const struct device_node *np, const char *phandle_name, int index) diff --git a/include/linux/phy.h b/include/linux/phy.h index d78cd01ea513..dc82a07cb4fd 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -451,6 +451,8 @@ struct phy_device { struct phy_led_trigger *phy_led_triggers; unsigned int phy_num_led_triggers; struct phy_led_trigger *last_triggered; + + struct phy_led_trigger *led_link_trigger; #endif /* diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 03634ec2f918..57d712671081 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3841,6 +3841,13 @@ static inline void nf_reset_trace(struct sk_buff *skb) #endif } +static inline void ipvs_reset(struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IP_VS) + skb->ipvs_property = 0; +#endif +} + /* Note: This doesn't put any conntrack and bridge info in dst. */ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src, bool copy) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 22f40c96a15b..df5d97a85e1a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -85,7 +85,6 @@ struct tcp_sack_block { /*These are used to set the sack_ok field in struct tcp_options_received */ #define TCP_SACK_SEEN (1 << 0) /*1 = peer is SACK capable, */ -#define TCP_FACK_ENABLED (1 << 1) /*1 = FACK is enabled locally*/ #define TCP_DSACK_SEEN (1 << 2) /*1 = DSACK was received from peer*/ struct tcp_options_received { @@ -294,7 +293,6 @@ struct tcp_sock { u32 pushed_seq; /* Last pushed seq, required to talk to windows */ u32 lost_out; /* Lost packets */ u32 sacked_out; /* SACK'd packets */ - u32 fackets_out; /* FACK'd packets */ struct hrtimer pacing_timer; diff --git a/include/net/act_api.h b/include/net/act_api.h index 9b8950042c96..fd08df74c466 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -14,7 +14,6 @@ struct tcf_idrinfo { spinlock_t lock; struct idr action_idr; - struct net *net; }; struct tc_action_ops; @@ -105,7 +104,7 @@ struct tc_action_net { static inline int tc_action_net_init(struct tc_action_net *tn, - const struct tc_action_ops *ops, struct net *net) + const struct tc_action_ops *ops) { int err = 0; @@ -113,7 +112,6 @@ int tc_action_net_init(struct tc_action_net *tn, if (!tn->idrinfo) return -ENOMEM; tn->ops = ops; - tn->idrinfo->net = net; spin_lock_init(&tn->idrinfo->lock); idr_init(&tn->idrinfo->action_idr); return err; diff --git a/include/net/dsa.h b/include/net/dsa.h index e54332968417..6c239257309b 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -122,7 +122,7 @@ struct dsa_switch_tree { struct kref refcount; /* Has this tree been applied to the hardware? */ - bool applied; + bool setup; /* * Configuration data for the platform device that owns @@ -190,7 +190,7 @@ struct dsa_port { struct dsa_switch *ds; unsigned int index; const char *name; - struct dsa_port *cpu_dp; + const struct dsa_port *cpu_dp; struct device_node *dn; unsigned int ageing_time; u8 stp_state; diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 22aba321282d..9a074776f70b 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -84,11 +84,11 @@ struct flow_dissector_key_ipv6_addrs { }; /** - * struct flow_dissector_key_tipc_addrs: - * @srcnode: source node address + * struct flow_dissector_key_tipc: + * @key: source node address combined with selector */ -struct flow_dissector_key_tipc_addrs { - __be32 srcnode; +struct flow_dissector_key_tipc { + __be32 key; }; /** @@ -100,7 +100,7 @@ struct flow_dissector_key_addrs { union { struct flow_dissector_key_ipv4_addrs v4addrs; struct flow_dissector_key_ipv6_addrs v6addrs; - struct flow_dissector_key_tipc_addrs tipcaddrs; + struct flow_dissector_key_tipc tipckey; }; }; @@ -192,7 +192,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_ICMP, /* struct flow_dissector_key_icmp */ FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ - FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */ + FLOW_DISSECTOR_KEY_TIPC, /* struct flow_dissector_key_tipc */ FLOW_DISSECTOR_KEY_ARP, /* struct flow_dissector_key_arp */ FLOW_DISSECTOR_KEY_VLAN, /* struct flow_dissector_key_flow_vlan */ FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */ diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 2cc728ef8cd0..4ed1040bbe4a 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -11,7 +11,7 @@ #define _NF_CONNTRACK_IPV4_H -extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; +const extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4; diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index 79a335c0d8b8..9cd55be95853 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -2,7 +2,7 @@ #ifndef _NF_CONNTRACK_IPV6_H #define _NF_CONNTRACK_IPV6_H -extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; +extern const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6; diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 510192eb7e9d..7ef56c13698a 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -43,7 +43,6 @@ struct nf_conntrack_l4proto { const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, unsigned int *timeouts); /* Called when a new connection for this protocol found; @@ -76,7 +75,7 @@ struct nf_conntrack_l4proto { int (*tuple_to_nlattr)(struct sk_buff *skb, const struct nf_conntrack_tuple *t); /* Calculate tuple nlattr size */ - int (*nlattr_tuple_size)(void); + unsigned int (*nlattr_tuple_size)(void); int (*nlattr_to_tuple)(struct nlattr *tb[], struct nf_conntrack_tuple *t); const struct nla_policy *nla_policy; @@ -146,15 +145,27 @@ int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple); int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *t); -int nf_ct_port_nlattr_tuple_size(void); +unsigned int nf_ct_port_nlattr_tuple_size(void); extern const struct nla_policy nf_ct_port_nla_policy[]; #ifdef CONFIG_SYSCTL -#define LOG_INVALID(net, proto) \ - ((net)->ct.sysctl_log_invalid == (proto) || \ - (net)->ct.sysctl_log_invalid == IPPROTO_RAW) +__printf(3, 4) __cold +void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, + const struct nf_conn *ct, + const char *fmt, ...); +__printf(5, 6) __cold +void nf_l4proto_log_invalid(const struct sk_buff *skb, + struct net *net, + u16 pf, u8 protonum, + const char *fmt, ...); #else -static inline int LOG_INVALID(struct net *net, int proto) { return 0; } +static inline __printf(5, 6) __cold +void nf_l4proto_log_invalid(const struct sk_buff *skb, struct net *net, + u16 pf, u8 protonum, const char *fmt, ...) {} +static inline __printf(3, 4) __cold +void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, + const struct nf_conn *ct, + const char *fmt, ...) { } #endif /* CONFIG_SYSCTL */ #endif /*_NF_CONNTRACK_PROTOCOL_H*/ diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 079c69cae2f6..01570a8f2982 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -312,6 +312,7 @@ struct nft_expr; * @flush: deactivate element in the next generation * @remove: remove element from set * @walk: iterate over all set elemeennts + * @get: get set elements * @privsize: function to return size of set private data * @init: initialize private data of new set instance * @destroy: destroy private data of set instance @@ -351,6 +352,10 @@ struct nft_set_ops { void (*walk)(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter); + void * (*get)(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem, + unsigned int flags); unsigned int (*privsize)(const struct nlattr * const nla[], const struct nft_set_desc *desc); diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 379550f8124a..5e12975fc658 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -155,6 +155,8 @@ struct netns_ipv4 { int sysctl_tcp_invalid_ratelimit; int sysctl_tcp_pacing_ss_ratio; int sysctl_tcp_pacing_ca_ratio; + int sysctl_tcp_wmem[3]; + int sysctl_tcp_rmem[3]; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/nsh.h b/include/net/nsh.h index a1eaea20be96..350b1ad11c7f 100644 --- a/include/net/nsh.h +++ b/include/net/nsh.h @@ -304,4 +304,7 @@ static inline void nsh_set_flags_ttl_len(struct nshhdr *nsh, u8 flags, NSH_FLAGS_MASK | NSH_TTL_MASK | NSH_LEN_MASK); } +int nsh_push(struct sk_buff *skb, const struct nshhdr *pushed_nh); +int nsh_pop(struct sk_buff *skb); + #endif /* __NET_NSH_H */ diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 98fef3221227..0105445cab83 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -231,6 +231,7 @@ struct tcf_exts { __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ int nr_actions; struct tc_action **actions; + struct net *net; #endif /* Map to export classifier specific extension TLV types to the * generic extensions API. Unsupported extensions must be set to 0. @@ -244,6 +245,7 @@ static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police) #ifdef CONFIG_NET_CLS_ACT exts->type = 0; exts->nr_actions = 0; + exts->net = NULL; exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *), GFP_KERNEL); if (!exts->actions) @@ -254,6 +256,28 @@ static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police) return 0; } +/* Return false if the netns is being destroyed in cleanup_net(). Callers + * need to do cleanup synchronously in this case, otherwise may race with + * tc_action_net_exit(). Return true for other cases. + */ +static inline bool tcf_exts_get_net(struct tcf_exts *exts) +{ +#ifdef CONFIG_NET_CLS_ACT + exts->net = maybe_get_net(exts->net); + return exts->net != NULL; +#else + return true; +#endif +} + +static inline void tcf_exts_put_net(struct tcf_exts *exts) +{ +#ifdef CONFIG_NET_CLS_ACT + if (exts->net) + put_net(exts->net); +#endif +} + static inline void tcf_exts_to_list(const struct tcf_exts *exts, struct list_head *actions) { @@ -703,4 +727,34 @@ struct tc_cookie { u8 *data; u32 len; }; + +enum tc_red_command { + TC_RED_REPLACE, + TC_RED_DESTROY, + TC_RED_STATS, + TC_RED_XSTATS, +}; + +struct tc_red_qopt_offload_params { + u32 min; + u32 max; + u32 probability; + bool is_ecn; +}; +struct tc_red_qopt_offload_stats { + struct gnet_stats_basic_packed *bstats; + struct gnet_stats_queue *qstats; +}; + +struct tc_red_qopt_offload { + enum tc_red_command command; + u32 handle; + u32 parent; + union { + struct tc_red_qopt_offload_params set; + struct tc_red_qopt_offload_stats stats; + struct red_stats *xstats; + }; +}; + #endif diff --git a/include/net/sock.h b/include/net/sock.h index 6f1be9726e02..688a823dccc3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1101,8 +1101,12 @@ struct proto { */ unsigned long *memory_pressure; long *sysctl_mem; + int *sysctl_wmem; int *sysctl_rmem; + u32 sysctl_wmem_offset; + u32 sysctl_rmem_offset; + int max_header; bool no_autobind; @@ -2390,4 +2394,22 @@ extern int sysctl_optmem_max; extern __u32 sysctl_wmem_default; extern __u32 sysctl_rmem_default; +static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto) +{ + /* Does this proto have per netns sysctl_wmem ? */ + if (proto->sysctl_wmem_offset) + return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset); + + return *proto->sysctl_wmem; +} + +static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto) +{ + /* Does this proto have per netns sysctl_rmem ? */ + if (proto->sysctl_rmem_offset) + return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset); + + return *proto->sysctl_rmem; +} + #endif /* _SOCK_H */ diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d756fbe46625..39bc855d7fee 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -76,6 +76,7 @@ enum switchdev_obj_id { SWITCHDEV_OBJ_ID_UNDEFINED, SWITCHDEV_OBJ_ID_PORT_VLAN, SWITCHDEV_OBJ_ID_PORT_MDB, + SWITCHDEV_OBJ_ID_HOST_MDB, }; struct switchdev_obj { diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index c2090df944ff..22ae260d6869 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -13,12 +13,17 @@ #include <net/act_api.h> #include <linux/tc_act/tc_vlan.h> +struct tcf_vlan_params { + int tcfv_action; + u16 tcfv_push_vid; + __be16 tcfv_push_proto; + u8 tcfv_push_prio; + struct rcu_head rcu; +}; + struct tcf_vlan { struct tc_action common; - int tcfv_action; - u16 tcfv_push_vid; - __be16 tcfv_push_proto; - u8 tcfv_push_prio; + struct tcf_vlan_params __rcu *vlan_p; }; #define to_vlan(a) ((struct tcf_vlan *)a) @@ -33,22 +38,45 @@ static inline bool is_tcf_vlan(const struct tc_action *a) static inline u32 tcf_vlan_action(const struct tc_action *a) { - return to_vlan(a)->tcfv_action; + u32 tcfv_action; + + rcu_read_lock(); + tcfv_action = rcu_dereference(to_vlan(a)->vlan_p)->tcfv_action; + rcu_read_unlock(); + + return tcfv_action; } static inline u16 tcf_vlan_push_vid(const struct tc_action *a) { - return to_vlan(a)->tcfv_push_vid; + u16 tcfv_push_vid; + + rcu_read_lock(); + tcfv_push_vid = rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_vid; + rcu_read_unlock(); + + return tcfv_push_vid; } static inline __be16 tcf_vlan_push_proto(const struct tc_action *a) { - return to_vlan(a)->tcfv_push_proto; + __be16 tcfv_push_proto; + + rcu_read_lock(); + tcfv_push_proto = rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_proto; + rcu_read_unlock(); + + return tcfv_push_proto; } static inline u8 tcf_vlan_push_prio(const struct tc_action *a) { - return to_vlan(a)->tcfv_push_prio; -} + u8 tcfv_push_prio; + rcu_read_lock(); + tcfv_push_prio = rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_prio; + rcu_read_unlock(); + + return tcfv_push_prio; +} #endif /* __NET_TC_VLAN_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index babfd4da1515..ed71511e67a6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -242,8 +242,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ extern int sysctl_tcp_max_orphans; extern long sysctl_tcp_mem[3]; -extern int sysctl_tcp_wmem[3]; -extern int sysctl_tcp_rmem[3]; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ #define TCP_RACK_STATIC_REO_WND 0x2 /* Use static RACK reo wnd */ @@ -386,7 +384,6 @@ void tcp_update_metrics(struct sock *sk); void tcp_init_metrics(struct sock *sk); void tcp_metrics_init(void); bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); -void tcp_disable_fack(struct tcp_sock *tp); void tcp_close(struct sock *sk, long timeout); void tcp_init_sock(struct sock *sk); void tcp_init_transfer(struct sock *sk, int bpf_op); @@ -778,7 +775,7 @@ struct tcp_skb_cb { }; __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ - __u8 sacked; /* State flags for SACK/FACK. */ + __u8 sacked; /* State flags for SACK. */ #define TCPCB_SACKED_ACKED 0x01 /* SKB ACK'd by a SACK block */ #define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */ #define TCPCB_LOST 0x04 /* SKB is lost */ @@ -1068,7 +1065,6 @@ void tcp_rate_check_app_limited(struct sock *sk); * * tcp_is_sack - SACK enabled * tcp_is_reno - No SACK - * tcp_is_fack - FACK enabled, implies SACK enabled */ static inline int tcp_is_sack(const struct tcp_sock *tp) { @@ -1080,16 +1076,6 @@ static inline bool tcp_is_reno(const struct tcp_sock *tp) return !tcp_is_sack(tp); } -static inline bool tcp_is_fack(const struct tcp_sock *tp) -{ - return tp->rx_opt.sack_ok & TCP_FACK_ENABLED; -} - -static inline void tcp_enable_fack(struct tcp_sock *tp) -{ - tp->rx_opt.sack_ok |= TCP_FACK_ENABLED; -} - static inline unsigned int tcp_left_out(const struct tcp_sock *tp) { return tp->sacked_out + tp->lost_out; diff --git a/include/net/tipc.h b/include/net/tipc.h new file mode 100644 index 000000000000..07670ec022a7 --- /dev/null +++ b/include/net/tipc.h @@ -0,0 +1,62 @@ +/* + * include/net/tipc.h: Include file for TIPC message header routines + * + * Copyright (c) 2017 Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_HDR_H +#define _TIPC_HDR_H + +#include <linux/random.h> + +#define KEEPALIVE_MSG_MASK 0x0e080000 /* LINK_PROTOCOL + MSG_IS_KEEPALIVE */ + +struct tipc_basic_hdr { + __be32 w[4]; +}; + +static inline u32 tipc_hdr_rps_key(struct tipc_basic_hdr *hdr) +{ + u32 w0 = ntohl(hdr->w[0]); + bool keepalive_msg = (w0 & KEEPALIVE_MSG_MASK) == KEEPALIVE_MSG_MASK; + int key; + + /* Return source node identity as key */ + if (likely(!keepalive_msg)) + return hdr->w[3]; + + /* Spread PROBE/PROBE_REPLY messages across the cores */ + get_random_bytes(&key, sizeof(key)); + return key; +} + +#endif diff --git a/include/sound/seq_kernel.h b/include/sound/seq_kernel.h index feb58d455560..4b9ee3009aa0 100644 --- a/include/sound/seq_kernel.h +++ b/include/sound/seq_kernel.h @@ -49,7 +49,8 @@ typedef union snd_seq_timestamp snd_seq_timestamp_t; #define SNDRV_SEQ_DEFAULT_CLIENT_EVENTS 200 /* max delivery path length */ -#define SNDRV_SEQ_MAX_HOPS 10 +/* NOTE: this shouldn't be greater than MAX_LOCKDEP_SUBCLASSES */ +#define SNDRV_SEQ_MAX_HOPS 8 /* max size of event size */ #define SNDRV_SEQ_MAX_EVENT_LEN 0x3fffffff diff --git a/include/sound/timer.h b/include/sound/timer.h index c4d76ff056c6..7ae226ab6990 100644 --- a/include/sound/timer.h +++ b/include/sound/timer.h @@ -90,6 +90,8 @@ struct snd_timer { struct list_head ack_list_head; struct list_head sack_list_head; /* slow ack list head */ struct tasklet_struct task_queue; + int max_instances; /* upper limit of timer instances */ + int num_instances; /* current number of timer instances */ }; struct snd_timer_instance { diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h index 6d31c0520ef3..ec4dade24466 100644 --- a/include/trace/events/sock.h +++ b/include/trace/events/sock.h @@ -48,7 +48,7 @@ TRACE_EVENT(sock_exceed_buf_limit, strncpy(__entry->name, prot->name, 32); __entry->sysctl_mem = prot->sysctl_mem; __entry->allocated = allocated; - __entry->sysctl_rmem = prot->sysctl_rmem[0]; + __entry->sysctl_rmem = sk_get_rmem0(sk, prot); __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc); ), diff --git a/include/uapi/linux/ila.h b/include/uapi/linux/ila.h index f54853288f99..483b77af4eb8 100644 --- a/include/uapi/linux/ila.h +++ b/include/uapi/linux/ila.h @@ -17,6 +17,8 @@ enum { ILA_ATTR_DIR, /* u32 */ ILA_ATTR_PAD, ILA_ATTR_CSUM_MODE, /* u8 */ + ILA_ATTR_IDENT_TYPE, /* u8 */ + ILA_ATTR_HOOK_TYPE, /* u8 */ __ILA_ATTR_MAX, }; @@ -41,6 +43,25 @@ enum { ILA_CSUM_ADJUST_TRANSPORT, ILA_CSUM_NEUTRAL_MAP, ILA_CSUM_NO_ACTION, + ILA_CSUM_NEUTRAL_MAP_AUTO, +}; + +enum { + ILA_ATYPE_IID = 0, + ILA_ATYPE_LUID, + ILA_ATYPE_VIRT_V4, + ILA_ATYPE_VIRT_UNI_V6, + ILA_ATYPE_VIRT_MULTI_V6, + ILA_ATYPE_NONLOCAL_ADDR, + ILA_ATYPE_RSVD_1, + ILA_ATYPE_RSVD_2, + + ILA_ATYPE_USE_FORMAT = 32, /* Get type from type field in identifier */ +}; + +enum { + ILA_HOOK_ROUTE_OUTPUT, + ILA_HOOK_ROUTE_INPUT, }; #endif /* _UAPI_LINUX_ILA_H */ diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index b22a9c4e1b12..9c0f4a92bcff 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -186,6 +186,7 @@ enum { DEVCONF_ADDR_GEN_MODE, DEVCONF_DISABLE_POLICY, DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN, + DEVCONF_NDISC_TCLASS, DEVCONF_MAX }; diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 501e4c4e2a03..ec75a685f1dd 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -336,6 +336,7 @@ enum ovs_key_attr { OVS_KEY_ATTR_CT_LABELS, /* 16-octet connection tracking label */ OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, /* struct ovs_key_ct_tuple_ipv4 */ OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, /* struct ovs_key_ct_tuple_ipv6 */ + OVS_KEY_ATTR_NSH, /* Nested set of ovs_nsh_key_* */ #ifdef __KERNEL__ OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */ @@ -495,6 +496,30 @@ struct ovs_key_ct_tuple_ipv6 { __u8 ipv6_proto; }; +enum ovs_nsh_key_attr { + OVS_NSH_KEY_ATTR_UNSPEC, + OVS_NSH_KEY_ATTR_BASE, /* struct ovs_nsh_key_base. */ + OVS_NSH_KEY_ATTR_MD1, /* struct ovs_nsh_key_md1. */ + OVS_NSH_KEY_ATTR_MD2, /* variable-length octets for MD type 2. */ + __OVS_NSH_KEY_ATTR_MAX +}; + +#define OVS_NSH_KEY_ATTR_MAX (__OVS_NSH_KEY_ATTR_MAX - 1) + +struct ovs_nsh_key_base { + __u8 flags; + __u8 ttl; + __u8 mdtype; + __u8 np; + __be32 path_hdr; +}; + +#define NSH_MD1_CONTEXT_SIZE 4 + +struct ovs_nsh_key_md1 { + __be32 context[NSH_MD1_CONTEXT_SIZE]; +}; + /** * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands. * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow @@ -811,6 +836,8 @@ struct ovs_action_push_eth { * @OVS_ACTION_ATTR_POP_ETH: Pop the outermost Ethernet header off the * packet. * @OVS_ACTION_ATTR_CT_CLEAR: Clear conntrack state from the packet. + * @OVS_ACTION_ATTR_PUSH_NSH: push NSH header to the packet. + * @OVS_ACTION_ATTR_POP_NSH: pop the outermost NSH header off the packet. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -841,6 +868,8 @@ enum ovs_action_attr { OVS_ACTION_ATTR_PUSH_ETH, /* struct ovs_action_push_eth. */ OVS_ACTION_ATTR_POP_ETH, /* No argument. */ OVS_ACTION_ATTR_CT_CLEAR, /* No argument. */ + OVS_ACTION_ATTR_PUSH_NSH, /* Nested OVS_NSH_KEY_ATTR_*. */ + OVS_ACTION_ATTR_POP_NSH, /* No argument. */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 5002562868cc..6a2c5ea7e9c4 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -256,6 +256,7 @@ struct tc_red_qopt { #define TC_RED_ECN 1 #define TC_RED_HARDDROP 2 #define TC_RED_ADAPTATIVE 4 +#define TC_RED_OFFLOADED 8 }; struct tc_red_xstats { diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 0d941cdd8e8c..33a70ece462f 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -191,7 +191,6 @@ enum LINUX_MIB_TCPRENORECOVERY, /* TCPRenoRecovery */ LINUX_MIB_TCPSACKRECOVERY, /* TCPSackRecovery */ LINUX_MIB_TCPSACKRENEGING, /* TCPSACKReneging */ - LINUX_MIB_TCPFACKREORDER, /* TCPFACKReorder */ LINUX_MIB_TCPSACKREORDER, /* TCPSACKReorder */ LINUX_MIB_TCPRENOREORDER, /* TCPRenoReorder */ LINUX_MIB_TCPTSREORDER, /* TCPTSReorder */ diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index 299a822d2c4e..c227ccba60ae 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -94,7 +94,7 @@ enum { SNDRV_HWDEP_IFACE_VX, /* Digigram VX cards */ SNDRV_HWDEP_IFACE_MIXART, /* Digigram miXart cards */ SNDRV_HWDEP_IFACE_USX2Y, /* Tascam US122, US224 & US428 usb */ - SNDRV_HWDEP_IFACE_EMUX_WAVETABLE, /* EmuX wavetable */ + SNDRV_HWDEP_IFACE_EMUX_WAVETABLE, /* EmuX wavetable */ SNDRV_HWDEP_IFACE_BLUETOOTH, /* Bluetooth audio */ SNDRV_HWDEP_IFACE_USX2Y_PCM, /* Tascam US122, US224 & US428 rawusb pcm */ SNDRV_HWDEP_IFACE_PCXHR, /* Digigram PCXHR */ @@ -384,7 +384,7 @@ struct snd_mask { struct snd_pcm_hw_params { unsigned int flags; - struct snd_mask masks[SNDRV_PCM_HW_PARAM_LAST_MASK - + struct snd_mask masks[SNDRV_PCM_HW_PARAM_LAST_MASK - SNDRV_PCM_HW_PARAM_FIRST_MASK + 1]; struct snd_mask mres[5]; /* reserved masks */ struct snd_interval intervals[SNDRV_PCM_HW_PARAM_LAST_INTERVAL - @@ -857,7 +857,7 @@ typedef int __bitwise snd_ctl_elem_iface_t; #define SNDRV_CTL_ELEM_ACCESS_INACTIVE (1<<8) /* control does actually nothing, but may be updated */ #define SNDRV_CTL_ELEM_ACCESS_LOCK (1<<9) /* write lock */ #define SNDRV_CTL_ELEM_ACCESS_OWNER (1<<10) /* write lock owner */ -#define SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK (1<<28) /* kernel use a TLV callback */ +#define SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK (1<<28) /* kernel use a TLV callback */ #define SNDRV_CTL_ELEM_ACCESS_USER (1<<29) /* user space element */ /* bits 30 and 31 are obsoleted (for indirect access) */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 9660ee65fbef..42d24bd64ea4 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -901,9 +901,11 @@ list_update_cgroup_event(struct perf_event *event, cpuctx_entry = &cpuctx->cgrp_cpuctx_entry; /* cpuctx->cgrp is NULL unless a cgroup event is active in this CPU .*/ if (add) { + struct perf_cgroup *cgrp = perf_cgroup_from_task(current, ctx); + list_add(cpuctx_entry, this_cpu_ptr(&cgrp_cpuctx_list)); - if (perf_cgroup_from_task(current, ctx) == event->cgrp) - cpuctx->cgrp = event->cgrp; + if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) + cpuctx->cgrp = cgrp; } else { list_del(cpuctx_entry); cpuctx->cgrp = NULL; diff --git a/kernel/futex.c b/kernel/futex.c index 0d638f008bb1..76ed5921117a 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -903,11 +903,27 @@ void exit_pi_state_list(struct task_struct *curr) */ raw_spin_lock_irq(&curr->pi_lock); while (!list_empty(head)) { - next = head->next; pi_state = list_entry(next, struct futex_pi_state, list); key = pi_state->key; hb = hash_futex(&key); + + /* + * We can race against put_pi_state() removing itself from the + * list (a waiter going away). put_pi_state() will first + * decrement the reference count and then modify the list, so + * its possible to see the list entry but fail this reference + * acquire. + * + * In that case; drop the locks to let put_pi_state() make + * progress and retry the loop. + */ + if (!atomic_inc_not_zero(&pi_state->refcount)) { + raw_spin_unlock_irq(&curr->pi_lock); + cpu_relax(); + raw_spin_lock_irq(&curr->pi_lock); + continue; + } raw_spin_unlock_irq(&curr->pi_lock); spin_lock(&hb->lock); @@ -918,8 +934,10 @@ void exit_pi_state_list(struct task_struct *curr) * task still owns the PI-state: */ if (head->next != next) { + /* retain curr->pi_lock for the loop invariant */ raw_spin_unlock(&pi_state->pi_mutex.wait_lock); spin_unlock(&hb->lock); + put_pi_state(pi_state); continue; } @@ -927,9 +945,8 @@ void exit_pi_state_list(struct task_struct *curr) WARN_ON(list_empty(&pi_state->list)); list_del_init(&pi_state->list); pi_state->owner = NULL; - raw_spin_unlock(&curr->pi_lock); - get_pi_state(pi_state); + raw_spin_unlock(&curr->pi_lock); raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); spin_unlock(&hb->lock); diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 9209d83ecdcf..ba0da243fdd8 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -649,6 +649,7 @@ static int sugov_start(struct cpufreq_policy *policy) struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); memset(sg_cpu, 0, sizeof(*sg_cpu)); + sg_cpu->cpu = cpu; sg_cpu->sg_policy = sg_policy; sg_cpu->flags = SCHED_CPUFREQ_RT; sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; @@ -714,11 +715,6 @@ struct cpufreq_governor *cpufreq_default_governor(void) static int __init sugov_register(void) { - int cpu; - - for_each_possible_cpu(cpu) - per_cpu(sugov_cpu, cpu).cpu = cpu; - return cpufreq_register_governor(&schedutil_gov); } fs_initcall(sugov_register); diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 4583feb66393..e449a23e9d59 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -13,6 +13,7 @@ #define pr_fmt(fmt) "NMI watchdog: " fmt #include <linux/nmi.h> +#include <linux/atomic.h> #include <linux/module.h> #include <linux/sched/debug.h> @@ -22,10 +23,11 @@ static DEFINE_PER_CPU(bool, hard_watchdog_warn); static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); +static DEFINE_PER_CPU(struct perf_event *, dead_event); static struct cpumask dead_events_mask; static unsigned long hardlockup_allcpu_dumped; -static unsigned int watchdog_cpus; +static atomic_t watchdog_cpus = ATOMIC_INIT(0); void arch_touch_nmi_watchdog(void) { @@ -189,7 +191,8 @@ void hardlockup_detector_perf_enable(void) if (hardlockup_detector_event_create()) return; - if (!watchdog_cpus++) + /* use original value for check */ + if (!atomic_fetch_inc(&watchdog_cpus)) pr_info("Enabled. Permanently consumes one hw-PMU counter.\n"); perf_event_enable(this_cpu_read(watchdog_ev)); @@ -204,8 +207,10 @@ void hardlockup_detector_perf_disable(void) if (event) { perf_event_disable(event); + this_cpu_write(watchdog_ev, NULL); + this_cpu_write(dead_event, event); cpumask_set_cpu(smp_processor_id(), &dead_events_mask); - watchdog_cpus--; + atomic_dec(&watchdog_cpus); } } @@ -219,7 +224,7 @@ void hardlockup_detector_perf_cleanup(void) int cpu; for_each_cpu(cpu, &dead_events_mask) { - struct perf_event *event = per_cpu(watchdog_ev, cpu); + struct perf_event *event = per_cpu(dead_event, cpu); /* * Required because for_each_cpu() reports unconditionally @@ -227,7 +232,7 @@ void hardlockup_detector_perf_cleanup(void) */ if (event) perf_event_release_kernel(event); - per_cpu(watchdog_ev, cpu) = NULL; + per_cpu(dead_event, cpu) = NULL; } cpumask_clear(&dead_events_mask); } diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h index efdd72e15794..d390d1be3748 100644 --- a/kernel/workqueue_internal.h +++ b/kernel/workqueue_internal.h @@ -10,6 +10,7 @@ #include <linux/workqueue.h> #include <linux/kthread.h> +#include <linux/preempt.h> struct worker_pool; @@ -60,7 +61,7 @@ struct worker { */ static inline struct worker *current_wq_worker(void) { - if (current->flags & PF_WQ_WORKER) + if (in_task() && (current->flags & PF_WQ_WORKER)) return kthread_data(current); return NULL; } diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c index fef5d2e114be..1ef0cec38d78 100644 --- a/lib/asn1_decoder.c +++ b/lib/asn1_decoder.c @@ -228,7 +228,7 @@ next_op: hdr = 2; /* Extract a tag from the data */ - if (unlikely(dp >= datalen - 1)) + if (unlikely(datalen - dp < 2)) goto data_overrun_error; tag = data[dp++]; if (unlikely((tag & 0x1f) == ASN1_LONG_TAG)) @@ -274,7 +274,7 @@ next_op: int n = len - 0x80; if (unlikely(n > 2)) goto length_too_long; - if (unlikely(dp >= datalen - n)) + if (unlikely(n > datalen - dp)) goto data_overrun_error; hdr += n; for (len = 0; n > 0; n--) { diff --git a/lib/nlattr.c b/lib/nlattr.c index 3d8295c85505..8bf78b4b78f0 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -15,19 +15,23 @@ #include <linux/types.h> #include <net/netlink.h> -static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = { +/* for these data types attribute length must be exactly given size */ +static const u8 nla_attr_len[NLA_TYPE_MAX+1] = { [NLA_U8] = sizeof(u8), [NLA_U16] = sizeof(u16), [NLA_U32] = sizeof(u32), [NLA_U64] = sizeof(u64), - [NLA_MSECS] = sizeof(u64), - [NLA_NESTED] = NLA_HDRLEN, [NLA_S8] = sizeof(s8), [NLA_S16] = sizeof(s16), [NLA_S32] = sizeof(s32), [NLA_S64] = sizeof(s64), }; +static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = { + [NLA_MSECS] = sizeof(u64), + [NLA_NESTED] = NLA_HDRLEN, +}; + static int validate_nla_bitfield32(const struct nlattr *nla, u32 *valid_flags_allowed) { @@ -65,6 +69,13 @@ static int validate_nla(const struct nlattr *nla, int maxtype, BUG_ON(pt->type > NLA_TYPE_MAX); + /* for data types NLA_U* and NLA_S* require exact length */ + if (nla_attr_len[pt->type]) { + if (attrlen != nla_attr_len[pt->type]) + return -ERANGE; + return 0; + } + switch (pt->type) { case NLA_FLAG: if (attrlen > 0) @@ -191,6 +202,8 @@ nla_policy_len(const struct nla_policy *p, int n) for (i = 0; i < n; i++, p++) { if (p->len) len += nla_total_size(p->len); + else if (nla_attr_len[p->type]) + len += nla_total_size(nla_attr_len[p->type]); else if (nla_attr_minlen[p->type]) len += nla_total_size(nla_attr_minlen[p->type]); } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index a096d3e189da..7f98a7d25866 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -137,7 +137,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb mdst = br_mdb_get(br, skb, vid); if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && br_multicast_querier_exists(br, eth_hdr(skb))) { - if ((mdst && mdst->mglist) || + if ((mdst && mdst->host_joined) || br_multicast_is_router(br)) { local_rcv = true; br->dev->stats.multicast++; diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 31ddff22563e..b0f4c734900b 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -292,6 +292,46 @@ err: kfree(priv); } +static void br_mdb_switchdev_host_port(struct net_device *dev, + struct net_device *lower_dev, + struct br_mdb_entry *entry, int type) +{ + struct switchdev_obj_port_mdb mdb = { + .obj = { + .id = SWITCHDEV_OBJ_ID_HOST_MDB, + .flags = SWITCHDEV_F_DEFER, + }, + .vid = entry->vid, + }; + + if (entry->addr.proto == htons(ETH_P_IP)) + ip_eth_mc_map(entry->addr.u.ip4, mdb.addr); +#if IS_ENABLED(CONFIG_IPV6) + else + ipv6_eth_mc_map(&entry->addr.u.ip6, mdb.addr); +#endif + + mdb.obj.orig_dev = dev; + switch (type) { + case RTM_NEWMDB: + switchdev_port_obj_add(lower_dev, &mdb.obj); + break; + case RTM_DELMDB: + switchdev_port_obj_del(lower_dev, &mdb.obj); + break; + } +} + +static void br_mdb_switchdev_host(struct net_device *dev, + struct br_mdb_entry *entry, int type) +{ + struct net_device *lower_dev; + struct list_head *iter; + + netdev_for_each_lower_dev(dev, lower_dev, iter) + br_mdb_switchdev_host_port(dev, lower_dev, entry, type); +} + static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p, struct br_mdb_entry *entry, int type) { @@ -317,7 +357,7 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p, #endif mdb.obj.orig_dev = port_dev; - if (port_dev && type == RTM_NEWMDB) { + if (p && port_dev && type == RTM_NEWMDB) { complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC); if (complete_info) { complete_info->port = p; @@ -327,10 +367,13 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p, if (switchdev_port_obj_add(port_dev, &mdb.obj)) kfree(complete_info); } - } else if (port_dev && type == RTM_DELMDB) { + } else if (p && port_dev && type == RTM_DELMDB) { switchdev_port_obj_del(port_dev, &mdb.obj); } + if (!p) + br_mdb_switchdev_host(dev, entry, type); + skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC); if (!skb) goto errout; @@ -353,7 +396,10 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, struct br_mdb_entry entry; memset(&entry, 0, sizeof(entry)); - entry.ifindex = port->dev->ifindex; + if (port) + entry.ifindex = port->dev->ifindex; + else + entry.ifindex = dev->ifindex; entry.addr.proto = group->proto; entry.addr.u.ip4 = group->u.ip4; #if IS_ENABLED(CONFIG_IPV6) @@ -655,7 +701,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) call_rcu_bh(&p->rcu, br_multicast_free_pg); err = 0; - if (!mp->ports && !mp->mglist && + if (!mp->ports && !mp->host_joined && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); break; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 5f7f0e9d446c..cb4729539b82 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -249,7 +249,8 @@ static void br_multicast_group_expired(struct timer_list *t) if (!netif_running(br->dev) || timer_pending(&mp->timer)) goto out; - mp->mglist = false; + mp->host_joined = false; + br_mdb_notify(br->dev, NULL, &mp->addr, RTM_DELMDB, 0); if (mp->ports) goto out; @@ -292,7 +293,7 @@ static void br_multicast_del_pg(struct net_bridge *br, p->flags); call_rcu_bh(&p->rcu, br_multicast_free_pg); - if (!mp->ports && !mp->mglist && + if (!mp->ports && !mp->host_joined && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); @@ -773,7 +774,10 @@ static int br_multicast_add_group(struct net_bridge *br, goto err; if (!port) { - mp->mglist = true; + if (!mp->host_joined) { + mp->host_joined = true; + br_mdb_notify(br->dev, NULL, &mp->addr, RTM_NEWMDB, 0); + } mod_timer(&mp->timer, now + br->multicast_membership_interval); goto out; } @@ -1477,7 +1481,7 @@ static int br_ip4_multicast_query(struct net_bridge *br, max_delay *= br->multicast_last_member_count; - if (mp->mglist && + if (mp->host_joined && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, now + max_delay) : try_to_del_timer_sync(&mp->timer) >= 0)) @@ -1561,7 +1565,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, goto out; max_delay *= br->multicast_last_member_count; - if (mp->mglist && + if (mp->host_joined && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, now + max_delay) : try_to_del_timer_sync(&mp->timer) >= 0)) @@ -1622,7 +1626,7 @@ br_multicast_leave_group(struct net_bridge *br, br_mdb_notify(br->dev, port, group, RTM_DELMDB, p->flags); - if (!mp->ports && !mp->mglist && + if (!mp->ports && !mp->host_joined && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); } @@ -1662,7 +1666,7 @@ br_multicast_leave_group(struct net_bridge *br, br->multicast_last_member_interval; if (!port) { - if (mp->mglist && + if (mp->host_joined && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, time) : try_to_del_timer_sync(&mp->timer) >= 0)) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 40553d832b6e..1312b8d20ec3 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -209,7 +209,7 @@ struct net_bridge_mdb_entry struct rcu_head rcu; struct timer_list timer; struct br_ip addr; - bool mglist; + bool host_joined; }; struct net_bridge_mdb_htable diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 3b3dcf719e07..37817d25b63d 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -2112,9 +2112,8 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, for (i = 0, j = 1 ; j < 4 ; j++, i++) { struct compat_ebt_entry_mwt *match32; unsigned int size; - char *buf = buf_start; + char *buf = buf_start + offsets[i]; - buf = buf_start + offsets[i]; if (offsets[i] > offsets[j]) return -EINVAL; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 1f5caafb4492..15ce30063765 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -10,6 +10,7 @@ #include <net/ipv6.h> #include <net/gre.h> #include <net/pptp.h> +#include <net/tipc.h> #include <linux/igmp.h> #include <linux/icmp.h> #include <linux/sctp.h> @@ -772,23 +773,22 @@ proto_again: break; } case htons(ETH_P_TIPC): { - struct { - __be32 pre[3]; - __be32 srcnode; - } *hdr, _hdr; - hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); + struct tipc_basic_hdr *hdr, _hdr; + + hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), + data, hlen, &_hdr); if (!hdr) { fdret = FLOW_DISSECT_RET_OUT_BAD; break; } if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_TIPC_ADDRS)) { + FLOW_DISSECTOR_KEY_TIPC)) { key_addrs = skb_flow_dissector_target(flow_dissector, - FLOW_DISSECTOR_KEY_TIPC_ADDRS, + FLOW_DISSECTOR_KEY_TIPC, target_container); - key_addrs->tipcaddrs.srcnode = hdr->srcnode; - key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC_ADDRS; + key_addrs->tipckey.key = tipc_hdr_rps_key(hdr); + key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC; } fdret = FLOW_DISSECT_RET_OUT_GOOD; break; @@ -1024,8 +1024,8 @@ static inline size_t flow_keys_hash_length(const struct flow_keys *flow) case FLOW_DISSECTOR_KEY_IPV6_ADDRS: diff -= sizeof(flow->addrs.v6addrs); break; - case FLOW_DISSECTOR_KEY_TIPC_ADDRS: - diff -= sizeof(flow->addrs.tipcaddrs); + case FLOW_DISSECTOR_KEY_TIPC: + diff -= sizeof(flow->addrs.tipckey); break; } return (sizeof(*flow) - diff) / sizeof(u32); @@ -1039,8 +1039,8 @@ __be32 flow_get_u32_src(const struct flow_keys *flow) case FLOW_DISSECTOR_KEY_IPV6_ADDRS: return (__force __be32)ipv6_addr_hash( &flow->addrs.v6addrs.src); - case FLOW_DISSECTOR_KEY_TIPC_ADDRS: - return flow->addrs.tipcaddrs.srcnode; + case FLOW_DISSECTOR_KEY_TIPC: + return flow->addrs.tipckey.key; default: return 0; } @@ -1321,8 +1321,8 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = { .offset = offsetof(struct flow_keys, addrs.v6addrs), }, { - .key_id = FLOW_DISSECTOR_KEY_TIPC_ADDRS, - .offset = offsetof(struct flow_keys, addrs.tipcaddrs), + .key_id = FLOW_DISSECTOR_KEY_TIPC, + .offset = offsetof(struct flow_keys, addrs.tipckey), }, { .key_id = FLOW_DISSECTOR_KEY_PORTS, diff --git a/net/core/pktgen.c b/net/core/pktgen.c index e3fa53a07d34..40db0b7e37ac 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2711,7 +2711,7 @@ static inline __be16 build_tci(unsigned int id, unsigned int cfi, static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, int datalen) { - struct timeval timestamp; + struct timespec64 timestamp; struct pktgen_hdr *pgh; pgh = skb_put(skb, sizeof(*pgh)); @@ -2773,9 +2773,17 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, pgh->tv_sec = 0; pgh->tv_usec = 0; } else { - do_gettimeofday(×tamp); + /* + * pgh->tv_sec wraps in y2106 when interpreted as unsigned + * as done by wireshark, or y2038 when interpreted as signed. + * This is probably harmless, but if anyone wants to improve + * it, we could introduce a variant that puts 64-bit nanoseconds + * into the respective header bytes. + * This would also be slightly faster to read. + */ + ktime_get_real_ts64(×tamp); pgh->tv_sec = htonl(timestamp.tv_sec); - pgh->tv_usec = htonl(timestamp.tv_usec); + pgh->tv_usec = htonl(timestamp.tv_nsec / NSEC_PER_USEC); } } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index dc5ad84ac096..dabba2a91fc8 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -920,7 +920,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + rtnl_xdp_size() /* IFLA_XDP */ + nla_total_size(4) /* IFLA_EVENT */ + nla_total_size(4) /* IFLA_NEW_NETNSID */ - + nla_total_size(1); /* IFLA_PROTO_DOWN */ + + nla_total_size(1) /* IFLA_PROTO_DOWN */ + nla_total_size(4) /* IFLA_IF_NETNSID */ + 0; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 97e604d55d55..8134c00df6c2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4869,6 +4869,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) if (!xnet) return; + ipvs_reset(skb); skb_orphan(skb); skb->mark = 0; } diff --git a/net/core/sock.c b/net/core/sock.c index 759400053110..57bbd6040eb6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2346,16 +2346,18 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) /* guarantee minimum buffer size under pressure */ if (kind == SK_MEM_RECV) { - if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0]) + if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot)) return 1; } else { /* SK_MEM_SEND */ + int wmem0 = sk_get_wmem0(sk, prot); + if (sk->sk_type == SOCK_STREAM) { - if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) + if (sk->sk_wmem_queued < wmem0) return 1; - } else if (refcount_read(&sk->sk_wmem_alloc) < - prot->sysctl_wmem[0]) + } else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) { return 1; + } } if (sk_has_memory_pressure(sk)) { @@ -3042,7 +3044,6 @@ struct prot_inuse { static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR); -#ifdef CONFIG_NET_NS void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) { __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val); @@ -3086,27 +3087,6 @@ static __init int net_inuse_init(void) } core_initcall(net_inuse_init); -#else -static DEFINE_PER_CPU(struct prot_inuse, prot_inuse); - -void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) -{ - __this_cpu_add(prot_inuse.val[prot->inuse_idx], val); -} -EXPORT_SYMBOL_GPL(sock_prot_inuse_add); - -int sock_prot_inuse_get(struct net *net, struct proto *prot) -{ - int cpu, idx = prot->inuse_idx; - int res = 0; - - for_each_possible_cpu(cpu) - res += per_cpu(prot_inuse, cpu).val[idx]; - - return res >= 0 ? res : 0; -} -EXPORT_SYMBOL_GPL(sock_prot_inuse_get); -#endif static void assign_proto_idx(struct proto *prot) { diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 08667f68e601..f0710b5d037d 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -156,6 +156,7 @@ static void dn_rehash_zone(struct dn_zone *dz) default: printk(KERN_DEBUG "DECnet: dn_rehash_zone: BUG! %d\n", old_divisor); + /* fall through */ case 256: new_divisor = 1024; new_hashmask = 0x3FF; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 283104e5ca6a..fd54a8e17986 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -94,14 +94,6 @@ static void dsa_tree_put(struct dsa_switch_tree *dst) kref_put(&dst->refcount, dsa_tree_release); } -/* For platform data configurations, we need to have a valid name argument to - * differentiate a disabled port from an enabled one - */ -static bool dsa_port_is_valid(struct dsa_port *port) -{ - return port->type != DSA_PORT_TYPE_UNUSED; -} - static bool dsa_port_is_dsa(struct dsa_port *port) { return port->type == DSA_PORT_TYPE_DSA; @@ -112,197 +104,214 @@ static bool dsa_port_is_cpu(struct dsa_port *port) return port->type == DSA_PORT_TYPE_CPU; } -static bool dsa_ds_find_port_dn(struct dsa_switch *ds, - struct device_node *port) +static bool dsa_port_is_user(struct dsa_port *dp) { - u32 index; - - for (index = 0; index < ds->num_ports; index++) - if (ds->ports[index].dn == port) - return true; - return false; + return dp->type == DSA_PORT_TYPE_USER; } -static struct dsa_switch *dsa_dst_find_port_dn(struct dsa_switch_tree *dst, - struct device_node *port) +static struct dsa_port *dsa_tree_find_port_by_node(struct dsa_switch_tree *dst, + struct device_node *dn) { struct dsa_switch *ds; - u32 index; + struct dsa_port *dp; + int device, port; - for (index = 0; index < DSA_MAX_SWITCHES; index++) { - ds = dst->ds[index]; + for (device = 0; device < DSA_MAX_SWITCHES; device++) { + ds = dst->ds[device]; if (!ds) continue; - if (dsa_ds_find_port_dn(ds, port)) - return ds; + for (port = 0; port < ds->num_ports; port++) { + dp = &ds->ports[port]; + + if (dp->dn == dn) + return dp; + } } return NULL; } -static int dsa_port_complete(struct dsa_switch_tree *dst, - struct dsa_switch *src_ds, - struct dsa_port *port, - u32 src_port) +static bool dsa_port_setup_routing_table(struct dsa_port *dp) { - struct device_node *link; - int index; - struct dsa_switch *dst_ds; - - for (index = 0;; index++) { - link = of_parse_phandle(port->dn, "link", index); - if (!link) - break; - - dst_ds = dsa_dst_find_port_dn(dst, link); - of_node_put(link); + struct dsa_switch *ds = dp->ds; + struct dsa_switch_tree *dst = ds->dst; + struct device_node *dn = dp->dn; + struct of_phandle_iterator it; + struct dsa_port *link_dp; + int err; - if (!dst_ds) - return 1; + of_for_each_phandle(&it, err, dn, "link", NULL, 0) { + link_dp = dsa_tree_find_port_by_node(dst, it.node); + if (!link_dp) { + of_node_put(it.node); + return false; + } - src_ds->rtable[dst_ds->index] = src_port; + ds->rtable[link_dp->ds->index] = dp->index; } - return 0; + return true; } -/* A switch is complete if all the DSA ports phandles point to ports - * known in the tree. A return value of 1 means the tree is not - * complete. This is not an error condition. A value of 0 is - * success. - */ -static int dsa_ds_complete(struct dsa_switch_tree *dst, struct dsa_switch *ds) +static bool dsa_switch_setup_routing_table(struct dsa_switch *ds) { - struct dsa_port *port; - u32 index; - int err; + bool complete = true; + struct dsa_port *dp; + int i; - for (index = 0; index < ds->num_ports; index++) { - port = &ds->ports[index]; - if (!dsa_port_is_valid(port)) - continue; + for (i = 0; i < DSA_MAX_SWITCHES; i++) + ds->rtable[i] = DSA_RTABLE_NONE; - if (!dsa_port_is_dsa(port)) - continue; + for (i = 0; i < ds->num_ports; i++) { + dp = &ds->ports[i]; - err = dsa_port_complete(dst, ds, port, index); - if (err != 0) - return err; + if (dsa_port_is_dsa(dp)) { + complete = dsa_port_setup_routing_table(dp); + if (!complete) + break; + } } - return 0; + return complete; } -/* A tree is complete if all the DSA ports phandles point to ports - * known in the tree. A return value of 1 means the tree is not - * complete. This is not an error condition. A value of 0 is - * success. - */ -static int dsa_dst_complete(struct dsa_switch_tree *dst) +static bool dsa_tree_setup_routing_table(struct dsa_switch_tree *dst) { struct dsa_switch *ds; - u32 index; - int err; + bool complete = true; + int device; - for (index = 0; index < DSA_MAX_SWITCHES; index++) { - ds = dst->ds[index]; + for (device = 0; device < DSA_MAX_SWITCHES; device++) { + ds = dst->ds[device]; if (!ds) continue; - err = dsa_ds_complete(dst, ds); - if (err != 0) - return err; + complete = dsa_switch_setup_routing_table(ds); + if (!complete) + break; } - return 0; + return complete; } -static int dsa_dsa_port_apply(struct dsa_port *port) +static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst) { - struct dsa_switch *ds = port->ds; - int err; + struct dsa_switch *ds; + struct dsa_port *dp; + int device, port; - err = dsa_port_fixed_link_register_of(port); - if (err) { - dev_warn(ds->dev, "Failed to setup dsa port %d: %d\n", - port->index, err); - return err; - } + for (device = 0; device < DSA_MAX_SWITCHES; device++) { + ds = dst->ds[device]; + if (!ds) + continue; - memset(&port->devlink_port, 0, sizeof(port->devlink_port)); + for (port = 0; port < ds->num_ports; port++) { + dp = &ds->ports[port]; - return devlink_port_register(ds->devlink, &port->devlink_port, - port->index); -} + if (dsa_port_is_cpu(dp)) + return dp; + } + } -static void dsa_dsa_port_unapply(struct dsa_port *port) -{ - devlink_port_unregister(&port->devlink_port); - dsa_port_fixed_link_unregister_of(port); + return NULL; } -static int dsa_cpu_port_apply(struct dsa_port *port) +static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) { - struct dsa_switch *ds = port->ds; - int err; + struct dsa_switch *ds; + struct dsa_port *dp; + int device, port; - err = dsa_port_fixed_link_register_of(port); - if (err) { - dev_warn(ds->dev, "Failed to setup cpu port %d: %d\n", - port->index, err); - return err; + /* DSA currently only supports a single CPU port */ + dst->cpu_dp = dsa_tree_find_first_cpu(dst); + if (!dst->cpu_dp) { + pr_warn("Tree has no master device\n"); + return -EINVAL; } - memset(&port->devlink_port, 0, sizeof(port->devlink_port)); - err = devlink_port_register(ds->devlink, &port->devlink_port, - port->index); - return err; + /* Assign the default CPU port to all ports of the fabric */ + for (device = 0; device < DSA_MAX_SWITCHES; device++) { + ds = dst->ds[device]; + if (!ds) + continue; + + for (port = 0; port < ds->num_ports; port++) { + dp = &ds->ports[port]; + + if (dsa_port_is_user(dp)) + dp->cpu_dp = dst->cpu_dp; + } + } + + return 0; } -static void dsa_cpu_port_unapply(struct dsa_port *port) +static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst) { - devlink_port_unregister(&port->devlink_port); - dsa_port_fixed_link_unregister_of(port); + /* DSA currently only supports a single CPU port */ + dst->cpu_dp = NULL; } -static int dsa_user_port_apply(struct dsa_port *port) +static int dsa_port_setup(struct dsa_port *dp) { - struct dsa_switch *ds = port->ds; + struct dsa_switch *ds = dp->ds; int err; - err = dsa_slave_create(port); - if (err) { - dev_warn(ds->dev, "Failed to create slave %d: %d\n", - port->index, err); - port->slave = NULL; - return err; - } + memset(&dp->devlink_port, 0, sizeof(dp->devlink_port)); - memset(&port->devlink_port, 0, sizeof(port->devlink_port)); - err = devlink_port_register(ds->devlink, &port->devlink_port, - port->index); + err = devlink_port_register(ds->devlink, &dp->devlink_port, dp->index); if (err) return err; - devlink_port_type_eth_set(&port->devlink_port, port->slave); + switch (dp->type) { + case DSA_PORT_TYPE_UNUSED: + break; + case DSA_PORT_TYPE_CPU: + case DSA_PORT_TYPE_DSA: + err = dsa_port_fixed_link_register_of(dp); + if (err) { + dev_err(ds->dev, "failed to register fixed link for port %d.%d\n", + ds->index, dp->index); + return err; + } + + break; + case DSA_PORT_TYPE_USER: + err = dsa_slave_create(dp); + if (err) + dev_err(ds->dev, "failed to create slave for port %d.%d\n", + ds->index, dp->index); + else + devlink_port_type_eth_set(&dp->devlink_port, dp->slave); + break; + } return 0; } -static void dsa_user_port_unapply(struct dsa_port *port) +static void dsa_port_teardown(struct dsa_port *dp) { - devlink_port_unregister(&port->devlink_port); - if (port->slave) { - dsa_slave_destroy(port->slave); - port->slave = NULL; + devlink_port_unregister(&dp->devlink_port); + + switch (dp->type) { + case DSA_PORT_TYPE_UNUSED: + break; + case DSA_PORT_TYPE_CPU: + case DSA_PORT_TYPE_DSA: + dsa_port_fixed_link_unregister_of(dp); + break; + case DSA_PORT_TYPE_USER: + if (dp->slave) { + dsa_slave_destroy(dp->slave); + dp->slave = NULL; + } + break; } } -static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) +static int dsa_switch_setup(struct dsa_switch *ds) { - struct dsa_port *port; - u32 index; int err; /* Initialize ds->phys_mii_mask before registering the slave MDIO bus @@ -343,136 +352,145 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) return err; } - for (index = 0; index < ds->num_ports; index++) { - port = &ds->ports[index]; - if (!dsa_port_is_valid(port)) - continue; + return 0; +} - if (dsa_port_is_dsa(port)) { - err = dsa_dsa_port_apply(port); - if (err) - return err; +static void dsa_switch_teardown(struct dsa_switch *ds) +{ + if (ds->slave_mii_bus && ds->ops->phy_read) + mdiobus_unregister(ds->slave_mii_bus); + + dsa_switch_unregister_notifier(ds); + + if (ds->devlink) { + devlink_unregister(ds->devlink); + devlink_free(ds->devlink); + ds->devlink = NULL; + } + +} + +static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) +{ + struct dsa_switch *ds; + struct dsa_port *dp; + int device, port; + int err; + + for (device = 0; device < DSA_MAX_SWITCHES; device++) { + ds = dst->ds[device]; + if (!ds) continue; - } - if (dsa_port_is_cpu(port)) { - err = dsa_cpu_port_apply(port); + err = dsa_switch_setup(ds); + if (err) + return err; + + for (port = 0; port < ds->num_ports; port++) { + dp = &ds->ports[port]; + + err = dsa_port_setup(dp); if (err) return err; - continue; } - - err = dsa_user_port_apply(port); - if (err) - continue; } return 0; } -static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds) +static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst) { - struct dsa_port *port; - u32 index; + struct dsa_switch *ds; + struct dsa_port *dp; + int device, port; - for (index = 0; index < ds->num_ports; index++) { - port = &ds->ports[index]; - if (!dsa_port_is_valid(port)) + for (device = 0; device < DSA_MAX_SWITCHES; device++) { + ds = dst->ds[device]; + if (!ds) continue; - if (dsa_port_is_dsa(port)) { - dsa_dsa_port_unapply(port); - continue; - } + for (port = 0; port < ds->num_ports; port++) { + dp = &ds->ports[port]; - if (dsa_port_is_cpu(port)) { - dsa_cpu_port_unapply(port); - continue; + dsa_port_teardown(dp); } - dsa_user_port_unapply(port); + dsa_switch_teardown(ds); } +} - if (ds->slave_mii_bus && ds->ops->phy_read) - mdiobus_unregister(ds->slave_mii_bus); +static int dsa_tree_setup_master(struct dsa_switch_tree *dst) +{ + struct dsa_port *cpu_dp = dst->cpu_dp; + struct net_device *master = cpu_dp->master; - dsa_switch_unregister_notifier(ds); + /* DSA currently supports a single pair of CPU port and master device */ + return dsa_master_setup(master, cpu_dp); +} - if (ds->devlink) { - devlink_unregister(ds->devlink); - devlink_free(ds->devlink); - ds->devlink = NULL; - } +static void dsa_tree_teardown_master(struct dsa_switch_tree *dst) +{ + struct dsa_port *cpu_dp = dst->cpu_dp; + struct net_device *master = cpu_dp->master; + return dsa_master_teardown(master); } -static int dsa_dst_apply(struct dsa_switch_tree *dst) +static int dsa_tree_setup(struct dsa_switch_tree *dst) { - struct dsa_switch *ds; - u32 index; + bool complete; int err; - for (index = 0; index < DSA_MAX_SWITCHES; index++) { - ds = dst->ds[index]; - if (!ds) - continue; - - err = dsa_ds_apply(dst, ds); - if (err) - return err; + if (dst->setup) { + pr_err("DSA: tree %d already setup! Disjoint trees?\n", + dst->index); + return -EEXIST; } - /* If we use a tagging format that doesn't have an ethertype - * field, make sure that all packets from this point on get - * sent to the tag format's receive function. - */ - wmb(); - dst->cpu_dp->master->dsa_ptr = dst->cpu_dp; + complete = dsa_tree_setup_routing_table(dst); + if (!complete) + return 0; + + err = dsa_tree_setup_default_cpu(dst); + if (err) + return err; - err = dsa_master_ethtool_setup(dst->cpu_dp->master); + err = dsa_tree_setup_switches(dst); if (err) return err; - dst->applied = true; + err = dsa_tree_setup_master(dst); + if (err) + return err; + + dst->setup = true; + + pr_info("DSA: tree %d setup\n", dst->index); return 0; } -static void dsa_dst_unapply(struct dsa_switch_tree *dst) +static void dsa_tree_teardown(struct dsa_switch_tree *dst) { - struct dsa_switch *ds; - u32 index; - - if (!dst->applied) + if (!dst->setup) return; - dsa_master_ethtool_restore(dst->cpu_dp->master); + dsa_tree_teardown_master(dst); - dst->cpu_dp->master->dsa_ptr = NULL; + dsa_tree_teardown_switches(dst); - /* If we used a tagging format that doesn't have an ethertype - * field, make sure that all packets from this point get sent - * without the tag and go through the regular receive path. - */ - wmb(); + dsa_tree_teardown_default_cpu(dst); - for (index = 0; index < DSA_MAX_SWITCHES; index++) { - ds = dst->ds[index]; - if (!ds) - continue; + pr_info("DSA: tree %d torn down\n", dst->index); - dsa_ds_unapply(dst, ds); - } - - dst->cpu_dp = NULL; - - pr_info("DSA: tree %d unapplied\n", dst->index); - dst->applied = false; + dst->setup = false; } static void dsa_tree_remove_switch(struct dsa_switch_tree *dst, unsigned int index) { + dsa_tree_teardown(dst); + dst->ds[index] = NULL; dsa_tree_put(dst); } @@ -481,6 +499,7 @@ static int dsa_tree_add_switch(struct dsa_switch_tree *dst, struct dsa_switch *ds) { unsigned int index = ds->index; + int err; if (dst->ds[index]) return -EBUSY; @@ -488,7 +507,11 @@ static int dsa_tree_add_switch(struct dsa_switch_tree *dst, dsa_tree_get(dst); dst->ds[index] = ds; - return 0; + err = dsa_tree_setup(dst); + if (err) + dsa_tree_remove_switch(dst, index); + + return err; } static int dsa_port_parse_user(struct dsa_port *dp, const char *name) @@ -532,86 +555,6 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master) return 0; } -static int dsa_cpu_parse(struct dsa_port *port, u32 index, - struct dsa_switch_tree *dst, - struct dsa_switch *ds) -{ - if (!dst->cpu_dp) - dst->cpu_dp = port; - - return 0; -} - -static int dsa_ds_parse(struct dsa_switch_tree *dst, struct dsa_switch *ds) -{ - struct dsa_port *port; - u32 index; - int err; - - for (index = 0; index < ds->num_ports; index++) { - port = &ds->ports[index]; - if (!dsa_port_is_valid(port) || - dsa_port_is_dsa(port)) - continue; - - if (dsa_port_is_cpu(port)) { - err = dsa_cpu_parse(port, index, dst, ds); - if (err) - return err; - } - - } - - pr_info("DSA: switch %d %d parsed\n", dst->index, ds->index); - - return 0; -} - -static int dsa_dst_parse(struct dsa_switch_tree *dst) -{ - struct dsa_switch *ds; - struct dsa_port *dp; - u32 index; - int port; - int err; - - for (index = 0; index < DSA_MAX_SWITCHES; index++) { - ds = dst->ds[index]; - if (!ds) - continue; - - err = dsa_ds_parse(dst, ds); - if (err) - return err; - } - - if (!dst->cpu_dp) { - pr_warn("Tree has no master device\n"); - return -EINVAL; - } - - /* Assign the default CPU port to all ports of the fabric */ - for (index = 0; index < DSA_MAX_SWITCHES; index++) { - ds = dst->ds[index]; - if (!ds) - continue; - - for (port = 0; port < ds->num_ports; port++) { - dp = &ds->ports[port]; - if (!dsa_port_is_valid(dp) || - dsa_port_is_dsa(dp) || - dsa_port_is_cpu(dp)) - continue; - - dp->cpu_dp = dst->cpu_dp; - } - } - - pr_info("DSA: tree %d parsed\n", dst->index); - - return 0; -} - static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) { struct device_node *ethernet = of_parse_phandle(dn, "ethernet", 0); @@ -768,13 +711,18 @@ static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd) return dsa_switch_parse_ports(ds, cd); } -static int _dsa_register_switch(struct dsa_switch *ds) +static int dsa_switch_add(struct dsa_switch *ds) +{ + struct dsa_switch_tree *dst = ds->dst; + + return dsa_tree_add_switch(dst, ds); +} + +static int dsa_switch_probe(struct dsa_switch *ds) { struct dsa_chip_data *pdata = ds->dev->platform_data; struct device_node *np = ds->dev->of_node; - struct dsa_switch_tree *dst; - unsigned int index; - int i, err; + int err; if (np) err = dsa_switch_parse_of(ds, np); @@ -786,46 +734,7 @@ static int _dsa_register_switch(struct dsa_switch *ds) if (err) return err; - index = ds->index; - dst = ds->dst; - - /* Initialize the routing table */ - for (i = 0; i < DSA_MAX_SWITCHES; ++i) - ds->rtable[i] = DSA_RTABLE_NONE; - - err = dsa_tree_add_switch(dst, ds); - if (err) - return err; - - err = dsa_dst_complete(dst); - if (err < 0) - goto out_del_dst; - - /* Not all switches registered yet */ - if (err == 1) - return 0; - - if (dst->applied) { - pr_info("DSA: Disjoint trees?\n"); - return -EINVAL; - } - - err = dsa_dst_parse(dst); - if (err) - goto out_del_dst; - - err = dsa_dst_apply(dst); - if (err) { - dsa_dst_unapply(dst); - goto out_del_dst; - } - - return 0; - -out_del_dst: - dsa_tree_remove_switch(dst, index); - - return err; + return dsa_switch_add(ds); } struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n) @@ -855,27 +764,25 @@ int dsa_register_switch(struct dsa_switch *ds) int err; mutex_lock(&dsa2_mutex); - err = _dsa_register_switch(ds); + err = dsa_switch_probe(ds); mutex_unlock(&dsa2_mutex); return err; } EXPORT_SYMBOL_GPL(dsa_register_switch); -static void _dsa_unregister_switch(struct dsa_switch *ds) +static void dsa_switch_remove(struct dsa_switch *ds) { struct dsa_switch_tree *dst = ds->dst; unsigned int index = ds->index; - dsa_dst_unapply(dst); - dsa_tree_remove_switch(dst, index); } void dsa_unregister_switch(struct dsa_switch *ds) { mutex_lock(&dsa2_mutex); - _dsa_unregister_switch(ds); + dsa_switch_remove(ds); mutex_unlock(&dsa2_mutex); } EXPORT_SYMBOL_GPL(dsa_unregister_switch); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 253a613c40cd..507e1ce4d4d2 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -108,8 +108,8 @@ int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], const unsigned char *addr, u16 vid); /* master.c */ -int dsa_master_ethtool_setup(struct net_device *dev); -void dsa_master_ethtool_restore(struct net_device *dev); +int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp); +void dsa_master_teardown(struct net_device *dev); static inline struct net_device *dsa_master_find_slave(struct net_device *dev, int device, int port) @@ -147,10 +147,10 @@ int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, u16 vid); int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data); -int dsa_port_mdb_add(struct dsa_port *dp, +int dsa_port_mdb_add(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb, struct switchdev_trans *trans); -int dsa_port_mdb_del(struct dsa_port *dp, +int dsa_port_mdb_del(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb); int dsa_port_vlan_add(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan, diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index 0511fe2feff7..4863e3e398b6 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -593,15 +593,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, if (!configured) return -EPROBE_DEFER; - /* - * If we use a tagging format that doesn't have an ethertype - * field, make sure that all packets from this point on get - * sent to the tag format's receive function. - */ - wmb(); - dev->dsa_ptr = dst->cpu_dp; - - return dsa_master_ethtool_setup(dst->cpu_dp->master); + return dsa_master_setup(dst->cpu_dp->master, dst->cpu_dp); } static int dsa_probe(struct platform_device *pdev) @@ -666,15 +658,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) { int i; - dsa_master_ethtool_restore(dst->cpu_dp->master); - - dst->cpu_dp->master->dsa_ptr = NULL; - - /* If we used a tagging format that doesn't have an ethertype - * field, make sure that all packets from this point get sent - * without the tag and go through the regular receive path. - */ - wmb(); + dsa_master_teardown(dst->cpu_dp->master); for (i = 0; i < dst->pd->nr_chips; i++) { struct dsa_switch *ds = dst->ds[i]; diff --git a/net/dsa/master.c b/net/dsa/master.c index 5f3f57e372e0..00589147f042 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -85,7 +85,7 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, } } -int dsa_master_ethtool_setup(struct net_device *dev) +static int dsa_master_ethtool_setup(struct net_device *dev) { struct dsa_port *cpu_dp = dev->dsa_ptr; struct dsa_switch *ds = cpu_dp->ds; @@ -108,10 +108,36 @@ int dsa_master_ethtool_setup(struct net_device *dev) return 0; } -void dsa_master_ethtool_restore(struct net_device *dev) +static void dsa_master_ethtool_teardown(struct net_device *dev) { struct dsa_port *cpu_dp = dev->dsa_ptr; dev->ethtool_ops = cpu_dp->orig_ethtool_ops; cpu_dp->orig_ethtool_ops = NULL; } + +int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) +{ + /* If we use a tagging format that doesn't have an ethertype + * field, make sure that all packets from this point on get + * sent to the tag format's receive function. + */ + wmb(); + + dev->dsa_ptr = cpu_dp; + + return dsa_master_ethtool_setup(dev); +} + +void dsa_master_teardown(struct net_device *dev) +{ + dsa_master_ethtool_teardown(dev); + + dev->dsa_ptr = NULL; + + /* If we used a tagging format that doesn't have an ethertype + * field, make sure that all packets from this point get sent + * without the tag and go through the regular receive path. + */ + wmb(); +} diff --git a/net/dsa/port.c b/net/dsa/port.c index bb30b1a7de3a..bb4be2679904 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -17,7 +17,7 @@ #include "dsa_priv.h" -static int dsa_port_notify(struct dsa_port *dp, unsigned long e, void *v) +static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v) { struct raw_notifier_head *nh = &dp->ds->dst->nh; int err; @@ -215,7 +215,7 @@ int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data) return ds->ops->port_fdb_dump(ds, port, cb, data); } -int dsa_port_mdb_add(struct dsa_port *dp, +int dsa_port_mdb_add(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb, struct switchdev_trans *trans) { @@ -229,7 +229,7 @@ int dsa_port_mdb_add(struct dsa_port *dp, return dsa_port_notify(dp, DSA_NOTIFIER_MDB_ADD, &info); } -int dsa_port_mdb_del(struct dsa_port *dp, +int dsa_port_mdb_del(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb) { struct dsa_notifier_mdb_info info = { @@ -252,7 +252,10 @@ int dsa_port_vlan_add(struct dsa_port *dp, .vlan = vlan, }; - return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info); + if (br_vlan_enabled(dp->bridge_dev)) + return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info); + + return 0; } int dsa_port_vlan_del(struct dsa_port *dp, @@ -264,7 +267,10 @@ int dsa_port_vlan_del(struct dsa_port *dp, .vlan = vlan, }; - return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info); + if (br_vlan_enabled(dp->bridge_dev)) + return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info); + + return 0; } int dsa_port_fixed_link_register_of(struct dsa_port *dp) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 814ced75a0cc..35715fda84b2 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -304,6 +304,13 @@ static int dsa_slave_port_obj_add(struct net_device *dev, case SWITCHDEV_OBJ_ID_PORT_MDB: err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans); break; + case SWITCHDEV_OBJ_ID_HOST_MDB: + /* DSA can directly translate this to a normal MDB add, + * but on the CPU port. + */ + err = dsa_port_mdb_add(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj), + trans); + break; case SWITCHDEV_OBJ_ID_PORT_VLAN: err = dsa_port_vlan_add(dp, SWITCHDEV_OBJ_PORT_VLAN(obj), trans); @@ -326,6 +333,12 @@ static int dsa_slave_port_obj_del(struct net_device *dev, case SWITCHDEV_OBJ_ID_PORT_MDB: err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; + case SWITCHDEV_OBJ_ID_HOST_MDB: + /* DSA can directly translate this to a normal MDB add, + * but on the CPU port. + */ + err = dsa_port_mdb_del(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj)); + break; case SWITCHDEV_OBJ_ID_PORT_VLAN: err = dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj)); break; @@ -1147,7 +1160,7 @@ static void dsa_slave_notify(struct net_device *dev, unsigned long val) int dsa_slave_create(struct dsa_port *port) { - struct dsa_port *cpu_dp = port->cpu_dp; + const struct dsa_port *cpu_dp = port->cpu_dp; struct net_device *master = cpu_dp->master; struct dsa_switch *ds = port->ds; const char *name = port->name; diff --git a/net/dsa/switch.c b/net/dsa/switch.c index e6c06aa349a6..1155e43c157f 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -121,7 +121,7 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds, if (ds->index == info->sw_index) set_bit(info->port, group); for (port = 0; port < ds->num_ports; port++) - if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) + if (dsa_is_dsa_port(ds, port)) set_bit(port, group); if (switchdev_trans_ph_prepare(trans)) { diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index e526c8967b98..5ba01fc3c6ba 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -88,7 +88,7 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) } static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt) { u16 *lan9303_tag; unsigned int source_port; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index c105a315b1a3..bb6239169b1a 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -773,20 +773,46 @@ free_skb: return NETDEV_TX_OK; } +static void ipgre_link_update(struct net_device *dev, bool set_mtu) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + int len; + + len = tunnel->tun_hlen; + tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); + len = tunnel->tun_hlen - len; + tunnel->hlen = tunnel->hlen + len; + + dev->needed_headroom = dev->needed_headroom + len; + if (set_mtu) + dev->mtu = max_t(int, dev->mtu - len, 68); + + if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { + if (!(tunnel->parms.o_flags & TUNNEL_CSUM) || + tunnel->encap.type == TUNNEL_ENCAP_NONE) { + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + } + dev->features |= NETIF_F_LLTX; + } +} + static int ipgre_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { - int err; struct ip_tunnel_parm p; + int err; if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) return -EFAULT; + if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || - ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) + p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) || + ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING))) return -EINVAL; } + p.i_flags = gre_flags_to_tnl_flags(p.i_flags); p.o_flags = gre_flags_to_tnl_flags(p.o_flags); @@ -794,11 +820,22 @@ static int ipgre_tunnel_ioctl(struct net_device *dev, if (err) return err; + if (cmd == SIOCCHGTUNNEL) { + struct ip_tunnel *t = netdev_priv(dev); + + t->parms.i_flags = p.i_flags; + t->parms.o_flags = p.o_flags; + + if (strcmp(dev->rtnl_link_ops->kind, "erspan")) + ipgre_link_update(dev, true); + } + p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags); p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) return -EFAULT; + return 0; } @@ -1307,9 +1344,9 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack) { struct ip_tunnel *t = netdev_priv(dev); - struct ip_tunnel_parm p; struct ip_tunnel_encap ipencap; __u32 fwmark = t->fwmark; + struct ip_tunnel_parm p; int err; if (ipgre_netlink_encap_parms(data, &ipencap)) { @@ -1322,7 +1359,18 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); if (err < 0) return err; - return ip_tunnel_changelink(dev, tb, &p, fwmark); + + err = ip_tunnel_changelink(dev, tb, &p, fwmark); + if (err < 0) + return err; + + t->parms.i_flags = p.i_flags; + t->parms.o_flags = p.o_flags; + + if (strcmp(dev->rtnl_link_ops->kind, "erspan")) + ipgre_link_update(dev, !tb[IFLA_MTU]); + + return 0; } static size_t ipgre_get_size(const struct net_device *dev) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 9e2770fd00be..f88221aebc9d 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -634,6 +634,25 @@ static void get_counters(const struct xt_table_info *t, } } +static void get_old_counters(const struct xt_table_info *t, + struct xt_counters counters[]) +{ + struct arpt_entry *iter; + unsigned int cpu, i; + + for_each_possible_cpu(cpu) { + i = 0; + xt_entry_foreach(iter, t->entries, t->size) { + struct xt_counters *tmp; + + tmp = xt_get_per_cpu_counter(&iter->counters, cpu); + ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt); + ++i; + } + cond_resched(); + } +} + static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; @@ -910,8 +929,7 @@ static int __do_replace(struct net *net, const char *name, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters, and synchronize with replace */ - get_counters(oldinfo, counters); + get_old_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 39286e543ee6..4cbe5e80f3bf 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -781,6 +781,26 @@ get_counters(const struct xt_table_info *t, } } +static void get_old_counters(const struct xt_table_info *t, + struct xt_counters counters[]) +{ + struct ipt_entry *iter; + unsigned int cpu, i; + + for_each_possible_cpu(cpu) { + i = 0; + xt_entry_foreach(iter, t->entries, t->size) { + const struct xt_counters *tmp; + + tmp = xt_get_per_cpu_counter(&iter->counters, cpu); + ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt); + ++i; /* macro does multi eval of i */ + } + + cond_resched(); + } +} + static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; @@ -1070,8 +1090,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters, and synchronize with replace */ - get_counters(oldinfo, counters); + get_old_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ xt_entry_foreach(iter, oldinfo->entries, oldinfo->size) diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index fe374da4bc13..89af9d88ca21 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -344,7 +344,7 @@ static void ipv4_hooks_unregister(struct net *net) mutex_unlock(®ister_ipv4_hooks); } -struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { +const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = { .l3proto = PF_INET, .pkt_to_tuple = ipv4_pkt_to_tuple, .invert_tuple = ipv4_invert_tuple, diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index a046c298413a..1849fedd9b81 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -81,7 +81,6 @@ static int icmp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, unsigned int *timeout) { /* Do not immediately delete the connection after the first @@ -165,6 +164,12 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, return NF_ACCEPT; } +static void icmp_error_log(const struct sk_buff *skb, struct net *net, + u8 pf, const char *msg) +{ + nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMP, "%s", msg); +} + /* Small and modified version of icmp_rcv */ static int icmp_error(struct net *net, struct nf_conn *tmpl, @@ -177,18 +182,14 @@ icmp_error(struct net *net, struct nf_conn *tmpl, /* Not enough header? */ icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); if (icmph == NULL) { - if (LOG_INVALID(net, IPPROTO_ICMP)) - nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, - NULL, "nf_ct_icmp: short packet "); + icmp_error_log(skb, net, pf, "short packet"); return -NF_ACCEPT; } /* See ip_conntrack_proto_tcp.c */ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip_checksum(skb, hooknum, dataoff, 0)) { - if (LOG_INVALID(net, IPPROTO_ICMP)) - nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL, - "nf_ct_icmp: bad HW ICMP checksum "); + icmp_error_log(skb, net, pf, "bad hw icmp checksum"); return -NF_ACCEPT; } @@ -199,9 +200,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl, * discarded. */ if (icmph->type > NR_ICMP_TYPES) { - if (LOG_INVALID(net, IPPROTO_ICMP)) - nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL, - "nf_ct_icmp: invalid ICMP type "); + icmp_error_log(skb, net, pf, "invalid icmp type"); return -NF_ACCEPT; } @@ -259,9 +258,14 @@ static int icmp_nlattr_to_tuple(struct nlattr *tb[], return 0; } -static int icmp_nlattr_tuple_size(void) +static unsigned int icmp_nlattr_tuple_size(void) { - return nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1); + static unsigned int size __read_mostly; + + if (!size) + size = nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1); + + return size; } #endif diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 127153f1ed8a..9f37c4727861 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -212,7 +212,6 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPRenoRecovery", LINUX_MIB_TCPRENORECOVERY), SNMP_MIB_ITEM("TCPSackRecovery", LINUX_MIB_TCPSACKRECOVERY), SNMP_MIB_ITEM("TCPSACKReneging", LINUX_MIB_TCPSACKRENEGING), - SNMP_MIB_ITEM("TCPFACKReorder", LINUX_MIB_TCPFACKREORDER), SNMP_MIB_ITEM("TCPSACKReorder", LINUX_MIB_TCPSACKREORDER), SNMP_MIB_ITEM("TCPRenoReorder", LINUX_MIB_TCPRENOREORDER), SNMP_MIB_ITEM("TCPTSReorder", LINUX_MIB_TCPTSREORDER), diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a82b44038308..ef0ff3357a44 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -441,22 +441,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_doulongvec_minmax, }, { - .procname = "tcp_wmem", - .data = &sysctl_tcp_wmem, - .maxlen = sizeof(sysctl_tcp_wmem), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - }, - { - .procname = "tcp_rmem", - .data = &sysctl_tcp_rmem, - .maxlen = sizeof(sysctl_tcp_rmem), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - }, - { .procname = "tcp_low_latency", .data = &sysctl_tcp_low_latency, .maxlen = sizeof(int), @@ -1164,6 +1148,22 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = &zero, .extra2 = &thousand, }, + { + .procname = "tcp_wmem", + .data = &init_net.ipv4.sysctl_tcp_wmem, + .maxlen = sizeof(init_net.ipv4.sysctl_tcp_wmem), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + }, + { + .procname = "tcp_rmem", + .data = &init_net.ipv4.sysctl_tcp_rmem, + .maxlen = sizeof(init_net.ipv4.sysctl_tcp_rmem), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c4cb19ed4628..bf97317e6c97 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -289,12 +289,7 @@ struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); long sysctl_tcp_mem[3] __read_mostly; -int sysctl_tcp_wmem[3] __read_mostly; -int sysctl_tcp_rmem[3] __read_mostly; - EXPORT_SYMBOL(sysctl_tcp_mem); -EXPORT_SYMBOL(sysctl_tcp_rmem); -EXPORT_SYMBOL(sysctl_tcp_wmem); atomic_long_t tcp_memory_allocated; /* Current allocated memory. */ EXPORT_SYMBOL(tcp_memory_allocated); @@ -456,8 +451,8 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_sync_mss = tcp_sync_mss; - sk->sk_sndbuf = sysctl_tcp_wmem[1]; - sk->sk_rcvbuf = sysctl_tcp_rmem[1]; + sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1]; + sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1]; sk_sockets_allocated_inc(sk); } @@ -2514,8 +2509,6 @@ static int tcp_repair_options_est(struct sock *sk, return -EINVAL; tp->rx_opt.sack_ok |= TCP_SACK_SEEN; - if (sock_net(sk)->ipv4.sysctl_tcp_fack) - tcp_enable_fack(tp); break; case TCPOPT_TIMESTAMP: if (opt.opt_val != 0) @@ -2984,7 +2977,6 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_lost = tp->lost_out; info->tcpi_retrans = tp->retrans_out; - info->tcpi_fackets = tp->fackets_out; now = tcp_jiffies32; info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime); @@ -3636,13 +3628,13 @@ void __init tcp_init(void) max_wshare = min(4UL*1024*1024, limit); max_rshare = min(6UL*1024*1024, limit); - sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; - sysctl_tcp_wmem[1] = 16*1024; - sysctl_tcp_wmem[2] = max(64*1024, max_wshare); + init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; + init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024; + init_net.ipv4.sysctl_tcp_wmem[2] = max(64*1024, max_wshare); - sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; - sysctl_tcp_rmem[1] = 87380; - sysctl_tcp_rmem[2] = max(87380, max_rshare); + init_net.ipv4.sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; + init_net.ipv4.sysctl_tcp_rmem[1] = 87380; + init_net.ipv4.sysctl_tcp_rmem[2] = max(87380, max_rshare); pr_info("Hash tables configured (established %u bind %u)\n", tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0ada8bfc2ebd..94d729be42a9 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -100,7 +100,7 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE; #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) -#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE) +#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK) #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) @@ -320,7 +320,7 @@ static void tcp_sndbuf_expand(struct sock *sk) sndmem *= nr_segs * per_mss; if (sk->sk_sndbuf < sndmem) - sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); + sk->sk_sndbuf = min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]); } /* 2. Tuning advertised window (window_clamp, rcv_ssthresh) @@ -354,7 +354,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); /* Optimize this! */ int truesize = tcp_win_from_space(sk, skb->truesize) >> 1; - int window = tcp_win_from_space(sk, sysctl_tcp_rmem[2]) >> 1; + int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; while (tp->rcv_ssthresh <= window) { if (truesize <= skb->len) @@ -409,7 +409,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) rcvmem <<= 2; if (sk->sk_rcvbuf < rcvmem) - sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]); + sk->sk_rcvbuf = min(rcvmem, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); } /* 4. Try to fixup all. It is made immediately after connection enters @@ -457,15 +457,16 @@ static void tcp_clamp_window(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + struct net *net = sock_net(sk); icsk->icsk_ack.quick = 0; - if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && + if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && !tcp_under_memory_pressure(sk) && sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), - sysctl_tcp_rmem[2]); + net->ipv4.sysctl_tcp_rmem[2]); } if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss); @@ -623,7 +624,8 @@ void tcp_rcv_space_adjust(struct sock *sk) while (tcp_win_from_space(sk, rcvmem) < tp->advmss) rcvmem += 128; - rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]); + rcvbuf = min(rcvwin / tp->advmss * rcvmem, + sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); if (rcvbuf > sk->sk_rcvbuf) { sk->sk_rcvbuf = rcvbuf; @@ -840,18 +842,6 @@ __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst) return min_t(__u32, cwnd, tp->snd_cwnd_clamp); } -/* - * Packet counting of FACK is based on in-order assumptions, therefore TCP - * disables it when reordering is detected - */ -void tcp_disable_fack(struct tcp_sock *tp) -{ - /* RFC3517 uses different metric in lost marker => reset on change */ - if (tcp_is_fack(tp)) - tp->lost_skb_hint = NULL; - tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED; -} - /* Take a notice that peer is sending D-SACKs */ static void tcp_dsack_seen(struct tcp_sock *tp) { @@ -859,42 +849,39 @@ static void tcp_dsack_seen(struct tcp_sock *tp) tp->rack.dsack_seen = 1; } -static void tcp_update_reordering(struct sock *sk, const int metric, - const int ts) +/* It's reordering when higher sequence was delivered (i.e. sacked) before + * some lower never-retransmitted sequence ("low_seq"). The maximum reordering + * distance is approximated in full-mss packet distance ("reordering"). + */ +static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq, + const int ts) { struct tcp_sock *tp = tcp_sk(sk); - int mib_idx; + const u32 mss = tp->mss_cache; + u32 fack, metric; - if (WARN_ON_ONCE(metric < 0)) + fack = tcp_highest_sack_seq(tp); + if (!before(low_seq, fack)) return; - if (metric > tp->reordering) { - tp->reordering = min(sock_net(sk)->ipv4.sysctl_tcp_max_reordering, metric); - + metric = fack - low_seq; + if ((metric > tp->reordering * mss) && mss) { #if FASTRETRANS_DEBUG > 1 pr_debug("Disorder%d %d %u f%u s%u rr%d\n", tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, tp->reordering, - tp->fackets_out, + 0, tp->sacked_out, tp->undo_marker ? tp->undo_retrans : 0); #endif - tcp_disable_fack(tp); + tp->reordering = min_t(u32, (metric + mss - 1) / mss, + sock_net(sk)->ipv4.sysctl_tcp_max_reordering); } tp->rack.reord = 1; - /* This exciting event is worth to be remembered. 8) */ - if (ts) - mib_idx = LINUX_MIB_TCPTSREORDER; - else if (tcp_is_reno(tp)) - mib_idx = LINUX_MIB_TCPRENOREORDER; - else if (tcp_is_fack(tp)) - mib_idx = LINUX_MIB_TCPFACKREORDER; - else - mib_idx = LINUX_MIB_TCPSACKREORDER; - - NET_INC_STATS(sock_net(sk), mib_idx); + NET_INC_STATS(sock_net(sk), + ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER); } /* This must be called before lost_out is incremented */ @@ -968,7 +955,6 @@ void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb) * 3. Loss detection event of two flavors: * A. Scoreboard estimator decided the packet is lost. * A'. Reno "three dupacks" marks head of queue lost. - * A''. Its FACK modification, head until snd.fack is lost. * B. SACK arrives sacking SND.NXT at the moment, when the * segment was retransmitted. * 4. D-SACK added new rule: D-SACK changes any tag to S. @@ -1111,8 +1097,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, } struct tcp_sacktag_state { - int reord; - int fack_count; + u32 reord; /* Timestamps for earliest and latest never-retransmitted segment * that was SACKed. RTO needs the earliest RTT to stay conservative, * but congestion control should still get an accurate delay signal. @@ -1188,15 +1173,15 @@ static u8 tcp_sacktag_one(struct sock *sk, u64 xmit_time) { struct tcp_sock *tp = tcp_sk(sk); - int fack_count = state->fack_count; /* Account D-SACK for retransmitted packet. */ if (dup_sack && (sacked & TCPCB_RETRANS)) { if (tp->undo_marker && tp->undo_retrans > 0 && after(end_seq, tp->undo_marker)) tp->undo_retrans--; - if (sacked & TCPCB_SACKED_ACKED) - state->reord = min(fack_count, state->reord); + if ((sacked & TCPCB_SACKED_ACKED) && + before(start_seq, state->reord)) + state->reord = start_seq; } /* Nothing to do; acked frame is about to be dropped (was ACKed). */ @@ -1222,9 +1207,10 @@ static u8 tcp_sacktag_one(struct sock *sk, * which was in hole. It is reordering. */ if (before(start_seq, - tcp_highest_sack_seq(tp))) - state->reord = min(fack_count, - state->reord); + tcp_highest_sack_seq(tp)) && + before(start_seq, state->reord)) + state->reord = start_seq; + if (!after(end_seq, tp->high_seq)) state->flag |= FLAG_ORIG_SACK_ACKED; if (state->first_sackt == 0) @@ -1243,15 +1229,10 @@ static u8 tcp_sacktag_one(struct sock *sk, tp->sacked_out += pcount; tp->delivered += pcount; /* Out-of-order packets delivered */ - fack_count += pcount; - /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ - if (!tcp_is_fack(tp) && tp->lost_skb_hint && + if (tp->lost_skb_hint && before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq)) tp->lost_cnt_hint += pcount; - - if (fack_count > tp->fackets_out) - tp->fackets_out = fack_count; } /* D-SACK. We can detect redundant retransmission in S|R and plain R @@ -1498,7 +1479,6 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, } out: - state->fack_count += pcount; return prev; noop: @@ -1577,8 +1557,6 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, tcp_highest_sack_seq(tp))) tcp_advance_highest_sack(sk, skb); } - - state->fack_count += tcp_skb_pcount(skb); } return skb; } @@ -1589,7 +1567,6 @@ static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk, { struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node; struct sk_buff *skb; - int unack_bytes; while (*p) { parent = *p; @@ -1602,12 +1579,6 @@ static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk, p = &parent->rb_right; continue; } - - state->fack_count = 0; - unack_bytes = TCP_SKB_CB(skb)->seq - tcp_sk(sk)->snd_una; - if (state->mss_now && unack_bytes > 0) - state->fack_count = unack_bytes / state->mss_now; - return skb; } return NULL; @@ -1665,13 +1636,10 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, int first_sack_index; state->flag = 0; - state->reord = tp->packets_out; + state->reord = tp->snd_nxt; - if (!tp->sacked_out) { - if (WARN_ON(tp->fackets_out)) - tp->fackets_out = 0; + if (!tp->sacked_out) tcp_highest_sack_reset(sk); - } found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire, num_sacks, prior_snd_una); @@ -1743,7 +1711,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, } state->mss_now = tcp_current_mss(sk); - state->fack_count = 0; skb = NULL; i = 0; @@ -1801,7 +1768,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, skb = tcp_highest_sack(sk); if (!skb) break; - state->fack_count = tp->fackets_out; cache++; goto walk; } @@ -1816,7 +1782,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, skb = tcp_highest_sack(sk); if (!skb) break; - state->fack_count = tp->fackets_out; } skb = tcp_sacktag_skip(skb, sk, state, start_seq); @@ -1836,9 +1801,8 @@ advance_sp: for (j = 0; j < used_sacks; j++) tp->recv_sack_cache[i++] = sp[j]; - if ((state->reord < tp->fackets_out) && - ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) - tcp_update_reordering(sk, tp->fackets_out - state->reord, 0); + if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss || tp->undo_marker) + tcp_check_sack_reordering(sk, state->reord, 0); tcp_verify_left_out(tp); out: @@ -1876,8 +1840,13 @@ static bool tcp_limit_reno_sacked(struct tcp_sock *tp) static void tcp_check_reno_reordering(struct sock *sk, const int addend) { struct tcp_sock *tp = tcp_sk(sk); - if (tcp_limit_reno_sacked(tp)) - tcp_update_reordering(sk, tp->packets_out + addend, 0); + + if (!tcp_limit_reno_sacked(tp)) + return; + + tp->reordering = min_t(u32, tp->packets_out + addend, + sock_net(sk)->ipv4.sysctl_tcp_max_reordering); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER); } /* Emulate SACKs for SACKless connection: account for a new dupack. */ @@ -1923,7 +1892,6 @@ void tcp_clear_retrans(struct tcp_sock *tp) tp->lost_out = 0; tp->undo_marker = 0; tp->undo_retrans = -1; - tp->fackets_out = 0; tp->sacked_out = 0; } @@ -1973,7 +1941,6 @@ void tcp_enter_loss(struct sock *sk) if (is_reneg) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); tp->sacked_out = 0; - tp->fackets_out = 0; } tcp_clear_all_retrans_hints(tp); @@ -2040,19 +2007,10 @@ static bool tcp_check_sack_reneging(struct sock *sk, int flag) return false; } -static inline int tcp_fackets_out(const struct tcp_sock *tp) -{ - return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out; -} - /* Heurestics to calculate number of duplicate ACKs. There's no dupACKs * counter when SACK is enabled (without SACK, sacked_out is used for * that purpose). * - * Instead, with FACK TCP uses fackets_out that includes both SACKed - * segments up to the highest received SACK block so far and holes in - * between them. - * * With reordering, holes may still be in flight, so RFC3517 recovery * uses pure sacked_out (total number of SACKed segments) even though * it violates the RFC that uses duplicate ACKs, often these are equal @@ -2062,10 +2020,10 @@ static inline int tcp_fackets_out(const struct tcp_sock *tp) */ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) { - return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; + return tp->sacked_out + 1; } -/* Linux NewReno/SACK/FACK/ECN state machine. +/* Linux NewReno/SACK/ECN state machine. * -------------------------------------- * * "Open" Normal state, no dubious events, fast path. @@ -2130,16 +2088,6 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) * dynamically measured and adjusted. This is implemented in * tcp_rack_mark_lost. * - * FACK (Disabled by default. Subsumbed by RACK): - * It is the simplest heuristics. As soon as we decided - * that something is lost, we decide that _all_ not SACKed - * packets until the most forward SACK are lost. I.e. - * lost_out = fackets_out - sacked_out and left_out = fackets_out. - * It is absolutely correct estimate, if network does not reorder - * packets. And it loses any connection to reality when reordering - * takes place. We use FACK by default until reordering - * is suspected on the path to this destination. - * * If the receiver does not support SACK: * * NewReno (RFC6582): in Recovery we assume that one segment @@ -2188,7 +2136,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) } /* Detect loss in event "A" above by marking head of queue up as lost. - * For FACK or non-SACK(Reno) senders, the first "packets" number of segments + * For non-SACK(Reno) senders, the first "packets" number of segments * are considered lost. For RFC3517 SACK, a segment is considered lost if it * has at least tp->reordering SACKed seqments above it; "packets" refers to * the maximum SACKed segments to pass before reaching this limit. @@ -2224,12 +2172,12 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) break; oldcnt = cnt; - if (tcp_is_fack(tp) || tcp_is_reno(tp) || + if (tcp_is_reno(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) cnt += tcp_skb_pcount(skb); if (cnt > packets) { - if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) || + if (tcp_is_sack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) || (oldcnt >= packets)) break; @@ -2260,11 +2208,6 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) if (tcp_is_reno(tp)) { tcp_mark_head_lost(sk, 1, 1); - } else if (tcp_is_fack(tp)) { - int lost = tp->fackets_out - tp->reordering; - if (lost <= 0) - lost = 1; - tcp_mark_head_lost(sk, lost, 0); } else { int sacked_upto = tp->sacked_out - tp->reordering; if (sacked_upto >= 0) @@ -2734,15 +2677,15 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack, } /* Undo during fast recovery after partial ACK. */ -static bool tcp_try_undo_partial(struct sock *sk, const int acked) +static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una) { struct tcp_sock *tp = tcp_sk(sk); if (tp->undo_marker && tcp_packet_delayed(tp)) { /* Plain luck! Hole if filled with delayed - * packet, rather than with a retransmit. + * packet, rather than with a retransmit. Check reordering. */ - tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); + tcp_check_sack_reordering(sk, prior_snd_una, 1); /* We are getting evidence that the reordering degree is higher * than we realized. If there are no retransmits out then we @@ -2778,6 +2721,14 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag) } } +static bool tcp_force_fast_retransmit(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + return after(tcp_highest_sack_seq(tp), + tp->snd_una + tp->reordering * tp->mss_cache); +} + /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and @@ -2790,19 +2741,17 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag) * It does _not_ decide what to send, it is made in function * tcp_xmit_retransmit_queue(). */ -static void tcp_fastretrans_alert(struct sock *sk, const int acked, +static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, bool is_dupack, int *ack_flag, int *rexmit) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int fast_rexmit = 0, flag = *ack_flag; bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && - (tcp_fackets_out(tp) > tp->reordering)); + tcp_force_fast_retransmit(sk)); if (!tp->packets_out && tp->sacked_out) tp->sacked_out = 0; - if (!tp->sacked_out && tp->fackets_out) - tp->fackets_out = 0; /* Now state machine starts. * A. ECE, hence prohibit cwnd undoing, the reduction is required. */ @@ -2849,11 +2798,11 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, if (tcp_is_reno(tp) && is_dupack) tcp_add_reno_sack(sk); } else { - if (tcp_try_undo_partial(sk, acked)) + if (tcp_try_undo_partial(sk, prior_snd_una)) return; /* Partial ACK arrived. Force fast retransmit. */ do_lost = tcp_is_reno(tp) || - tcp_fackets_out(tp) > tp->reordering; + tcp_force_fast_retransmit(sk); } if (tcp_try_undo_dsack(sk)) { tcp_try_keep_open(sk); @@ -3063,15 +3012,15 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, * is before the ack sequence we can discard it as it's confirmed to have * arrived at the other end. */ -static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, - u32 prior_snd_una, int *acked, +static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, + u32 prior_snd_una, struct tcp_sacktag_state *sack) { const struct inet_connection_sock *icsk = inet_csk(sk); u64 first_ackt, last_ackt; struct tcp_sock *tp = tcp_sk(sk); u32 prior_sacked = tp->sacked_out; - u32 reord = tp->packets_out; + u32 reord = tp->snd_nxt; /* lowest acked un-retx un-sacked seq */ struct sk_buff *skb, *next; bool fully_acked = true; long sack_rtt_us = -1L; @@ -3086,6 +3035,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); + const u32 start_seq = scb->seq; u8 sacked = scb->sacked; u32 acked_pcount; @@ -3116,7 +3066,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, first_ackt = last_ackt; last_in_flight = TCP_SKB_CB(skb)->tx.in_flight; - reord = min(pkts_acked, reord); + if (before(start_seq, reord)) + reord = start_seq; if (!after(scb->end_seq, tp->high_seq)) flag |= FLAG_ORIG_SACK_ACKED; } @@ -3194,16 +3145,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, int delta; /* Non-retransmitted hole got filled? That's reordering */ - if (reord < prior_fackets && reord <= tp->fackets_out) - tcp_update_reordering(sk, tp->fackets_out - reord, 0); + if (before(reord, prior_fack)) + tcp_check_sack_reordering(sk, reord, 0); - delta = tcp_is_fack(tp) ? pkts_acked : - prior_sacked - tp->sacked_out; + delta = prior_sacked - tp->sacked_out; tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta); } - - tp->fackets_out -= min(pkts_acked, tp->fackets_out); - } else if (skb && rtt_update && sack_rtt_us >= 0 && sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) { /* Do not re-arm RTO if the sack RTT is measured from data sent @@ -3244,7 +3191,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, } } #endif - *acked = pkts_acked; return flag; } @@ -3553,12 +3499,11 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; bool is_dupack = false; - u32 prior_fackets; int prior_packets = tp->packets_out; u32 delivered = tp->delivered; u32 lost = tp->lost; - int acked = 0; /* Number of packets newly acked */ int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */ + u32 prior_fack; sack_state.first_sackt = 0; sack_state.rate = &rs; @@ -3590,7 +3535,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) icsk->icsk_retransmits = 0; } - prior_fackets = tp->fackets_out; + prior_fack = tcp_highest_sack_seq(tp); rs.prior_in_flight = tcp_packets_in_flight(tp); /* ts_recent update must be made after we are sure that the packet @@ -3646,8 +3591,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) goto no_queue; /* See if we can take anything off of the retransmit queue. */ - flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked, - &sack_state); + flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state); tcp_rack_update_reo_wnd(sk, &rs); @@ -3659,7 +3603,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (tcp_ack_is_dubious(sk, flag)) { is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); - tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); + tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag, + &rexmit); } if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) @@ -3675,7 +3620,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) no_queue: /* If data was DSACKed, see if we can undo a cwnd reduction. */ if (flag & FLAG_DSACKING_ACK) - tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); + tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag, + &rexmit); /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. @@ -3697,7 +3643,8 @@ old_ack: if (TCP_SKB_CB(skb)->sacked) { flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, &sack_state); - tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); + tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag, + &rexmit); tcp_xmit_recovery(sk, rexmit); } @@ -5706,9 +5653,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tp->tcp_header_len = sizeof(struct tcphdr); } - if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_fack) - tcp_enable_fack(tp); - tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); tcp_initialize_rcv_mss(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0162c577bb9c..1eac84b8044e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2409,8 +2409,8 @@ struct proto tcp_prot = { .memory_allocated = &tcp_memory_allocated, .memory_pressure = &tcp_memory_pressure, .sysctl_mem = sysctl_tcp_mem, - .sysctl_wmem = sysctl_tcp_wmem, - .sysctl_rmem = sysctl_tcp_rmem, + .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), + .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp_sock), .slab_flags = SLAB_TYPESAFE_BY_RCU, @@ -2509,7 +2509,14 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2; net->ipv4.sysctl_tcp_pacing_ss_ratio = 200; net->ipv4.sysctl_tcp_pacing_ca_ratio = 120; - + if (net != &init_net) { + memcpy(net->ipv4.sysctl_tcp_rmem, + init_net.ipv4.sysctl_tcp_rmem, + sizeof(init_net.ipv4.sysctl_tcp_rmem)); + memcpy(net->ipv4.sysctl_tcp_wmem, + init_net.ipv4.sysctl_tcp_wmem, + sizeof(init_net.ipv4.sysctl_tcp_wmem)); + } net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60; diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 9d5ddebfd831..7097f92d16e5 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -470,10 +470,8 @@ void tcp_init_metrics(struct sock *sk) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; } val = tcp_metric_get(tm, TCP_METRIC_REORDERING); - if (val && tp->reordering != val) { - tcp_disable_fack(tp); + if (val && tp->reordering != val) tp->reordering = val; - } crtt = tcp_metric_get(tm, TCP_METRIC_RTT); rcu_read_unlock(); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 4bb86580decd..e36eff0403f4 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -475,7 +475,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->packets_out = 0; newtp->retrans_out = 0; newtp->sacked_out = 0; - newtp->fackets_out = 0; newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; newtp->tlp_high_seq = 0; newtp->lsndtime = tcp_jiffies32; @@ -509,10 +508,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, keepalive_time_when(newtp)); newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; - if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) { - if (sock_net(sk)->ipv4.sysctl_tcp_fack) - tcp_enable_fack(newtp); - } + newtp->rx_opt.sack_ok = ireq->sack_ok; newtp->window_clamp = req->rsk_window_clamp; newtp->rcv_ssthresh = req->rsk_rcv_wnd; newtp->rcv_wnd = req->rsk_rcv_wnd; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a9d917e4dad5..0256f7a41041 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -220,7 +220,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, (*rcv_wscale) = 0; if (wscale_ok) { /* Set window scaling on max possible window */ - space = max_t(u32, space, sysctl_tcp_rmem[2]); + space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); space = max_t(u32, space, sysctl_rmem_max); space = min_t(u32, space, *window_clamp); while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) { @@ -1218,21 +1218,6 @@ static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now) } } -/* When a modification to fackets out becomes necessary, we need to check - * skb is counted to fackets_out or not. - */ -static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb, - int decr) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (!tp->sacked_out || tcp_is_reno(tp)) - return; - - if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq)) - tp->fackets_out -= decr; -} - /* Pcount in the middle of the write queue got changed, we need to do various * tweaks to fix counters */ @@ -1253,11 +1238,9 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de if (tcp_is_reno(tp) && decr > 0) tp->sacked_out -= min_t(u32, tp->sacked_out, decr); - tcp_adjust_fackets_out(sk, skb, decr); - if (tp->lost_skb_hint && before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) && - (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))) + (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) tp->lost_cnt_hint -= decr; tcp_verify_left_out(tp); @@ -2961,9 +2944,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) * retransmitted data is acknowledged. It tries to continue * resending the rest of the retransmit queue, until either * we've sent it all or the congestion window limit is reached. - * If doing SACK, the first ACK which comes back for a timeout - * based retransmit packet might feed us FACK information again. - * If so, we use it to avoid unnecessarily retransmissions. */ void tcp_xmit_retransmit_queue(struct sock *sk) { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 66d8c3d912fd..a6dffd65eb9d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1267,7 +1267,9 @@ out: in6_ifa_put(ifp); } -static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift) +static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, + struct inet6_ifaddr *ift, + bool block) { struct inet6_dev *idev = ifp->idev; struct in6_addr addr, *tmpaddr; @@ -1371,7 +1373,7 @@ retry: ift = ipv6_add_addr(idev, &addr, NULL, tmp_plen, ipv6_addr_scope(&addr), addr_flags, - tmp_valid_lft, tmp_prefered_lft, true, NULL); + tmp_valid_lft, tmp_prefered_lft, block, NULL); if (IS_ERR(ift)) { in6_ifa_put(ifp); in6_dev_put(idev); @@ -1956,7 +1958,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) if (ifpub) { in6_ifa_hold(ifpub); spin_unlock_bh(&ifp->lock); - ipv6_create_tempaddr(ifpub, ifp); + ipv6_create_tempaddr(ifpub, ifp, true); in6_ifa_put(ifpub); } else { spin_unlock_bh(&ifp->lock); @@ -2456,7 +2458,7 @@ static void manage_tempaddrs(struct inet6_dev *idev, * no temporary address currently exists. */ read_unlock_bh(&idev->lock); - ipv6_create_tempaddr(ifp, NULL); + ipv6_create_tempaddr(ifp, NULL, false); } else { read_unlock_bh(&idev->lock); } @@ -4351,7 +4353,7 @@ restart: spin_lock(&ifpub->lock); ifpub->regen_count = 0; spin_unlock(&ifpub->lock); - ipv6_create_tempaddr(ifpub, ifp); + ipv6_create_tempaddr(ifpub, ifp, true); in6_ifa_put(ifpub); in6_ifa_put(ifp); goto restart; @@ -5057,6 +5059,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad; array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode; array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy; + array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass; } static inline size_t inet6_ifla6_size(void) @@ -5984,6 +5987,7 @@ int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write, } static int minus_one = -1; +static const int zero = 0; static const int one = 1; static const int two_five_five = 255; @@ -6355,6 +6359,15 @@ static const struct ctl_table addrconf_sysctl[] = { .proc_handler = addrconf_sysctl_disable_policy, }, { + .procname = "ndisc_tclass", + .data = &ipv6_devconf.ndisc_tclass, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&zero, + .extra2 = (void *)&two_five_five, + }, + { /* sentinel */ } }; diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h index e0170f62bc39..3c7a11b62334 100644 --- a/net/ipv6/ila/ila.h +++ b/net/ipv6/ila/ila.h @@ -55,17 +55,6 @@ struct ila_identifier { }; }; -enum { - ILA_ATYPE_IID = 0, - ILA_ATYPE_LUID, - ILA_ATYPE_VIRT_V4, - ILA_ATYPE_VIRT_UNI_V6, - ILA_ATYPE_VIRT_MULTI_V6, - ILA_ATYPE_RSVD_1, - ILA_ATYPE_RSVD_2, - ILA_ATYPE_RSVD_3, -}; - #define CSUM_NEUTRAL_FLAG htonl(0x10000000) struct ila_addr { @@ -93,6 +82,7 @@ struct ila_params { struct ila_locator locator_match; __wsum csum_diff; u8 csum_mode; + u8 ident_type; }; static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to) diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c index aba0998ddbfb..8c88ecf29b93 100644 --- a/net/ipv6/ila/ila_common.c +++ b/net/ipv6/ila/ila_common.c @@ -13,30 +13,37 @@ #include <uapi/linux/ila.h> #include "ila.h" -static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p) +void ila_init_saved_csum(struct ila_params *p) { - struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); + if (!p->locator_match.v64) + return; + p->csum_diff = compute_csum_diff8( + (__be32 *)&p->locator, + (__be32 *)&p->locator_match); +} + +static __wsum get_csum_diff_iaddr(struct ila_addr *iaddr, struct ila_params *p) +{ if (p->locator_match.v64) return p->csum_diff; else - return compute_csum_diff8((__be32 *)&iaddr->loc, - (__be32 *)&p->locator); + return compute_csum_diff8((__be32 *)&p->locator, + (__be32 *)&iaddr->loc); } -static void ila_csum_do_neutral(struct ila_addr *iaddr, - struct ila_params *p) +static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p) +{ + return get_csum_diff_iaddr(ila_a2i(&ip6h->daddr), p); +} + +static void ila_csum_do_neutral_fmt(struct ila_addr *iaddr, + struct ila_params *p) { __sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3]; __wsum diff, fval; - /* Check if checksum adjust value has been cached */ - if (p->locator_match.v64) { - diff = p->csum_diff; - } else { - diff = compute_csum_diff8((__be32 *)&p->locator, - (__be32 *)iaddr); - } + diff = get_csum_diff_iaddr(iaddr, p); fval = (__force __wsum)(ila_csum_neutral_set(iaddr->ident) ? CSUM_NEUTRAL_FLAG : ~CSUM_NEUTRAL_FLAG); @@ -53,13 +60,23 @@ static void ila_csum_do_neutral(struct ila_addr *iaddr, iaddr->ident.csum_neutral ^= 1; } -static void ila_csum_adjust_transport(struct sk_buff *skb, +static void ila_csum_do_neutral_nofmt(struct ila_addr *iaddr, struct ila_params *p) { + __sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3]; __wsum diff; - struct ipv6hdr *ip6h = ipv6_hdr(skb); - struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); + + diff = get_csum_diff_iaddr(iaddr, p); + + *adjust = ~csum_fold(csum_add(diff, csum_unfold(*adjust))); +} + +static void ila_csum_adjust_transport(struct sk_buff *skb, + struct ila_params *p) +{ size_t nhoff = sizeof(struct ipv6hdr); + struct ipv6hdr *ip6h = ipv6_hdr(skb); + __wsum diff; switch (ip6h->nexthdr) { case NEXTHDR_TCP: @@ -98,52 +115,45 @@ static void ila_csum_adjust_transport(struct sk_buff *skb, } break; } - - /* Now change destination address */ - iaddr->loc = p->locator; } void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p, - bool set_csum_neutral) + bool sir2ila) { struct ipv6hdr *ip6h = ipv6_hdr(skb); struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); - /* First deal with the transport checksum */ - if (ila_csum_neutral_set(iaddr->ident)) { - /* C-bit is set in the locator indicating that this - * is a locator being translated to a SIR address. - * Perform (receiver) checksum-neutral translation. - */ - if (!set_csum_neutral) - ila_csum_do_neutral(iaddr, p); - } else { - switch (p->csum_mode) { - case ILA_CSUM_ADJUST_TRANSPORT: - ila_csum_adjust_transport(skb, p); - break; - case ILA_CSUM_NEUTRAL_MAP: - ila_csum_do_neutral(iaddr, p); - break; - case ILA_CSUM_NO_ACTION: + switch (p->csum_mode) { + case ILA_CSUM_ADJUST_TRANSPORT: + ila_csum_adjust_transport(skb, p); + break; + case ILA_CSUM_NEUTRAL_MAP: + if (sir2ila) { + if (WARN_ON(ila_csum_neutral_set(iaddr->ident))) { + /* Checksum flag should never be + * set in a formatted SIR address. + */ + break; + } + } else if (!ila_csum_neutral_set(iaddr->ident)) { + /* ILA to SIR translation and C-bit isn't + * set so we're good. + */ break; } + ila_csum_do_neutral_fmt(iaddr, p); + break; + case ILA_CSUM_NEUTRAL_MAP_AUTO: + ila_csum_do_neutral_nofmt(iaddr, p); + break; + case ILA_CSUM_NO_ACTION: + break; } /* Now change destination address */ iaddr->loc = p->locator; } -void ila_init_saved_csum(struct ila_params *p) -{ - if (!p->locator_match.v64) - return; - - p->csum_diff = compute_csum_diff8( - (__be32 *)&p->locator, - (__be32 *)&p->locator_match); -} - static int __init ila_init(void) { int ret; diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index 696281b4bca2..3d56a2fb6f86 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -20,6 +20,7 @@ struct ila_lwt { struct ila_params p; struct dst_cache dst_cache; u32 connected : 1; + u32 lwt_output : 1; }; static inline struct ila_lwt *ila_lwt_lwtunnel( @@ -45,8 +46,10 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (skb->protocol != htons(ETH_P_IPV6)) goto drop; - ila_update_ipv6_locator(skb, ila_params_lwtunnel(orig_dst->lwtstate), - true); + if (ilwt->lwt_output) + ila_update_ipv6_locator(skb, + ila_params_lwtunnel(orig_dst->lwtstate), + true); if (rt->rt6i_flags & (RTF_GATEWAY | RTF_CACHE)) { /* Already have a next hop address in route, no need for @@ -98,11 +101,15 @@ drop: static int ila_input(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); + struct ila_lwt *ilwt = ila_lwt_lwtunnel(dst->lwtstate); if (skb->protocol != htons(ETH_P_IPV6)) goto drop; - ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate), false); + if (!ilwt->lwt_output) + ila_update_ipv6_locator(skb, + ila_params_lwtunnel(dst->lwtstate), + false); return dst->lwtstate->orig_input(skb); @@ -114,6 +121,8 @@ drop: static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, }, + [ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, }, + [ILA_ATTR_HOOK_TYPE] = { .type = NLA_U8, }, }; static int ila_build_state(struct nlattr *nla, @@ -127,33 +136,84 @@ static int ila_build_state(struct nlattr *nla, struct lwtunnel_state *newts; const struct fib6_config *cfg6 = cfg; struct ila_addr *iaddr; + u8 ident_type = ILA_ATYPE_USE_FORMAT; + u8 hook_type = ILA_HOOK_ROUTE_OUTPUT; + u8 csum_mode = ILA_CSUM_NO_ACTION; + bool lwt_output = true; + u8 eff_ident_type; int ret; if (family != AF_INET6) return -EINVAL; - if (cfg6->fc_dst_len < 8 * sizeof(struct ila_locator) + 3) { - /* Need to have full locator and at least type field - * included in destination - */ + ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack); + if (ret < 0) + return ret; + + if (!tb[ILA_ATTR_LOCATOR]) return -EINVAL; - } iaddr = (struct ila_addr *)&cfg6->fc_dst; - if (!ila_addr_is_ila(iaddr) || ila_csum_neutral_set(iaddr->ident)) { - /* Don't allow translation for a non-ILA address or checksum - * neutral flag to be set. + if (tb[ILA_ATTR_IDENT_TYPE]) + ident_type = nla_get_u8(tb[ILA_ATTR_IDENT_TYPE]); + + if (ident_type == ILA_ATYPE_USE_FORMAT) { + /* Infer identifier type from type field in formatted + * identifier. */ + + if (cfg6->fc_dst_len < 8 * sizeof(struct ila_locator) + 3) { + /* Need to have full locator and at least type field + * included in destination + */ + return -EINVAL; + } + + eff_ident_type = iaddr->ident.type; + } else { + eff_ident_type = ident_type; + } + + switch (eff_ident_type) { + case ILA_ATYPE_IID: + /* Don't allow ILA for IID type */ + return -EINVAL; + case ILA_ATYPE_LUID: + break; + case ILA_ATYPE_VIRT_V4: + case ILA_ATYPE_VIRT_UNI_V6: + case ILA_ATYPE_VIRT_MULTI_V6: + case ILA_ATYPE_NONLOCAL_ADDR: + /* These ILA formats are not supported yet. */ + default: return -EINVAL; } - ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack); - if (ret < 0) - return ret; + if (tb[ILA_ATTR_HOOK_TYPE]) + hook_type = nla_get_u8(tb[ILA_ATTR_HOOK_TYPE]); + + switch (hook_type) { + case ILA_HOOK_ROUTE_OUTPUT: + lwt_output = true; + break; + case ILA_HOOK_ROUTE_INPUT: + lwt_output = false; + break; + default: + return -EINVAL; + } - if (!tb[ILA_ATTR_LOCATOR]) + if (tb[ILA_ATTR_CSUM_MODE]) + csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]); + + if (csum_mode == ILA_CSUM_NEUTRAL_MAP && + ila_csum_neutral_set(iaddr->ident)) { + /* Don't allow translation if checksum neutral bit is + * configured and it's set in the SIR address. + */ return -EINVAL; + } newts = lwtunnel_state_alloc(sizeof(*ilwt)); if (!newts) @@ -166,19 +226,18 @@ static int ila_build_state(struct nlattr *nla, return ret; } + ilwt->lwt_output = !!lwt_output; + p = ila_params_lwtunnel(newts); + p->csum_mode = csum_mode; + p->ident_type = ident_type; p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]); /* Precompute checksum difference for translation since we * know both the old locator and the new one. */ p->locator_match = iaddr->loc; - p->csum_diff = compute_csum_diff8( - (__be32 *)&p->locator_match, (__be32 *)&p->locator); - - if (tb[ILA_ATTR_CSUM_MODE]) - p->csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]); ila_init_saved_csum(p); @@ -203,13 +262,23 @@ static int ila_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { struct ila_params *p = ila_params_lwtunnel(lwtstate); + struct ila_lwt *ilwt = ila_lwt_lwtunnel(lwtstate); if (nla_put_u64_64bit(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator.v64, ILA_ATTR_PAD)) goto nla_put_failure; + if (nla_put_u8(skb, ILA_ATTR_CSUM_MODE, (__force u8)p->csum_mode)) goto nla_put_failure; + if (nla_put_u8(skb, ILA_ATTR_IDENT_TYPE, (__force u8)p->ident_type)) + goto nla_put_failure; + + if (nla_put_u8(skb, ILA_ATTR_HOOK_TYPE, + ilwt->lwt_output ? ILA_HOOK_ROUTE_OUTPUT : + ILA_HOOK_ROUTE_INPUT)) + goto nla_put_failure; + return 0; nla_put_failure: @@ -220,6 +289,8 @@ static int ila_encap_nlsize(struct lwtunnel_state *lwtstate) { return nla_total_size_64bit(sizeof(u64)) + /* ILA_ATTR_LOCATOR */ nla_total_size(sizeof(u8)) + /* ILA_ATTR_CSUM_MODE */ + nla_total_size(sizeof(u8)) + /* ILA_ATTR_IDENT_TYPE */ + nla_total_size(sizeof(u8)) + /* ILA_ATTR_HOOK_TYPE */ 0; } diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 3123b9de91b5..6eb5e68f112a 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -121,6 +121,7 @@ static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { [ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, }, [ILA_ATTR_IFINDEX] = { .type = NLA_U32, }, [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, }, + [ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, }, }; static int parse_nl_config(struct genl_info *info, @@ -138,6 +139,14 @@ static int parse_nl_config(struct genl_info *info, if (info->attrs[ILA_ATTR_CSUM_MODE]) xp->ip.csum_mode = nla_get_u8(info->attrs[ILA_ATTR_CSUM_MODE]); + else + xp->ip.csum_mode = ILA_CSUM_NO_ACTION; + + if (info->attrs[ILA_ATTR_IDENT_TYPE]) + xp->ip.ident_type = nla_get_u8( + info->attrs[ILA_ATTR_IDENT_TYPE]); + else + xp->ip.ident_type = ILA_ATYPE_USE_FORMAT; if (info->attrs[ILA_ATTR_IFINDEX]) xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]); @@ -198,7 +207,7 @@ static void ila_free_cb(void *ptr, void *arg) } } -static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral); +static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila); static unsigned int ila_nf_input(void *priv, @@ -396,7 +405,8 @@ static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg) (__force u64)ila->xp.ip.locator_match.v64, ILA_ATTR_PAD) || nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->xp.ifindex) || - nla_put_u32(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode)) + nla_put_u8(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode) || + nla_put_u8(msg, ILA_ATTR_IDENT_TYPE, ila->xp.ip.ident_type)) return -1; return 0; @@ -607,7 +617,7 @@ static struct pernet_operations ila_net_ops = { .size = sizeof(struct ila_net), }; -static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral) +static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila) { struct ila_map *ila; struct ipv6hdr *ip6h = ipv6_hdr(skb); @@ -617,16 +627,16 @@ static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral) /* Assumes skb contains a valid IPv6 header that is pulled */ - if (!ila_addr_is_ila(iaddr)) { - /* Type indicates this is not an ILA address */ - return 0; - } + /* No check here that ILA type in the mapping matches what is in the + * address. We assume that whatever sender gaves us can be translated. + * The checksum mode however is relevant. + */ rcu_read_lock(); ila = ila_lookup_wildcards(iaddr, skb->dev->ifindex, ilan); if (ila) - ila_update_ipv6_locator(skb, &ila->xp.ip, set_csum_neutral); + ila_update_ipv6_locator(skb, &ila->xp.ip, sir2ila); rcu_read_unlock(); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f9c3ffe04382..b3cea200c85e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -427,12 +427,19 @@ static void ip6_nd_hdr(struct sk_buff *skb, int hop_limit, int len) { struct ipv6hdr *hdr; + struct inet6_dev *idev; + unsigned tclass; + + rcu_read_lock(); + idev = __in6_dev_get(skb->dev); + tclass = idev ? idev->cnf.ndisc_tclass : 0; + rcu_read_unlock(); skb_push(skb, sizeof(*hdr)); skb_reset_network_header(skb); hdr = ipv6_hdr(skb); - ip6_flow_hdr(hdr, 0, 0); + ip6_flow_hdr(hdr, tclass, 0); hdr->payload_len = htons(len); hdr->nexthdr = IPPROTO_ICMPV6; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 01bd3ee5ebc6..f06e25065a34 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -800,6 +800,25 @@ get_counters(const struct xt_table_info *t, } } +static void get_old_counters(const struct xt_table_info *t, + struct xt_counters counters[]) +{ + struct ip6t_entry *iter; + unsigned int cpu, i; + + for_each_possible_cpu(cpu) { + i = 0; + xt_entry_foreach(iter, t->entries, t->size) { + const struct xt_counters *tmp; + + tmp = xt_get_per_cpu_counter(&iter->counters, cpu); + ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt); + ++i; + } + cond_resched(); + } +} + static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; @@ -1090,8 +1109,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters, and synchronize with replace */ - get_counters(oldinfo, counters); + get_old_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ xt_entry_foreach(iter, oldinfo->entries, oldinfo->size) diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index fe01dc953c56..3b80a38f62b8 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -339,7 +339,7 @@ static void ipv6_hooks_unregister(struct net *net) mutex_unlock(®ister_ipv6_hooks); } -struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { +const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = { .l3proto = PF_INET6, .pkt_to_tuple = ipv6_pkt_to_tuple, .invert_tuple = ipv6_invert_tuple, diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index a9e1fd1a8536..3ac0d826afc4 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -94,7 +94,6 @@ static int icmpv6_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, unsigned int *timeout) { /* Do not immediately delete the connection after the first @@ -176,6 +175,12 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, return NF_ACCEPT; } +static void icmpv6_error_log(const struct sk_buff *skb, struct net *net, + u8 pf, const char *msg) +{ + nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMPV6, "%s", msg); +} + static int icmpv6_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, @@ -187,17 +192,13 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); if (icmp6h == NULL) { - if (LOG_INVALID(net, IPPROTO_ICMPV6)) - nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL, - "nf_ct_icmpv6: short packet "); + icmpv6_error_log(skb, net, pf, "short packet"); return -NF_ACCEPT; } if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { - if (LOG_INVALID(net, IPPROTO_ICMPV6)) - nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL, - "nf_ct_icmpv6: ICMPv6 checksum failed "); + icmpv6_error_log(skb, net, pf, "ICMPv6 checksum failed"); return -NF_ACCEPT; } @@ -258,9 +259,14 @@ static int icmpv6_nlattr_to_tuple(struct nlattr *tb[], return 0; } -static int icmpv6_nlattr_tuple_size(void) +static unsigned int icmpv6_nlattr_tuple_size(void) { - return nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1); + static unsigned int size __read_mostly; + + if (!size) + size = nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1); + + return size; } #endif diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0e2529958b52..6bb98c93edfe 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1940,8 +1940,8 @@ struct proto tcpv6_prot = { .memory_pressure = &tcp_memory_pressure, .orphan_count = &tcp_orphan_count, .sysctl_mem = sysctl_tcp_mem, - .sysctl_wmem = sysctl_tcp_wmem, - .sysctl_rmem = sysctl_tcp_rmem, + .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), + .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp6_sock), .slab_flags = SLAB_TYPESAFE_BY_RCU, diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 7c8d1eb757a5..350fcd39ebd8 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1246,8 +1246,6 @@ static void l2tp_tunnel_destruct(struct sock *sk) list_del_rcu(&tunnel->list); spin_unlock_bh(&pn->l2tp_tunnel_list_lock); - l2tp_tunnel_closeall(tunnel); - tunnel->sock = NULL; l2tp_tunnel_dec_refcount(tunnel); diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 6dbe450400a2..ff61124fdf59 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -123,6 +123,7 @@ static int l2tp_ip_recv(struct sk_buff *skb) unsigned char *ptr, *optr; struct l2tp_session *session; struct l2tp_tunnel *tunnel = NULL; + struct iphdr *iph; int length; if (!pskb_may_pull(skb, 4)) @@ -178,24 +179,17 @@ pass_up: goto discard; tunnel_id = ntohl(*(__be32 *) &skb->data[4]); - tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel) { - sk = tunnel->sock; - sock_hold(sk); - } else { - struct iphdr *iph = (struct iphdr *) skb_network_header(skb); - - read_lock_bh(&l2tp_ip_lock); - sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr, - inet_iif(skb), tunnel_id); - if (!sk) { - read_unlock_bh(&l2tp_ip_lock); - goto discard; - } + iph = (struct iphdr *)skb_network_header(skb); - sock_hold(sk); + read_lock_bh(&l2tp_ip_lock); + sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr, inet_iif(skb), + tunnel_id); + if (!sk) { read_unlock_bh(&l2tp_ip_lock); + goto discard; } + sock_hold(sk); + read_unlock_bh(&l2tp_ip_lock); if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_put; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 59ebb6e4f735..192344688c06 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -136,6 +136,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb) unsigned char *ptr, *optr; struct l2tp_session *session; struct l2tp_tunnel *tunnel = NULL; + struct ipv6hdr *iph; int length; if (!pskb_may_pull(skb, 4)) @@ -192,24 +193,17 @@ pass_up: goto discard; tunnel_id = ntohl(*(__be32 *) &skb->data[4]); - tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel) { - sk = tunnel->sock; - sock_hold(sk); - } else { - struct ipv6hdr *iph = ipv6_hdr(skb); - - read_lock_bh(&l2tp_ip6_lock); - sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr, - inet6_iif(skb), tunnel_id); - if (!sk) { - read_unlock_bh(&l2tp_ip6_lock); - goto discard; - } + iph = ipv6_hdr(skb); - sock_hold(sk); + read_lock_bh(&l2tp_ip6_lock); + sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr, + inet6_iif(skb), tunnel_id); + if (!sk) { read_unlock_bh(&l2tp_ip6_lock); + goto discard; } + sock_hold(sk); + read_unlock_bh(&l2tp_ip6_lock); if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_put; diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c index f135938bf781..67e708e98ccf 100644 --- a/net/ncsi/ncsi-aen.c +++ b/net/ncsi/ncsi-aen.c @@ -73,6 +73,9 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp, ncm->data[2] = data; ncm->data[4] = ntohl(lsc->oem_status); + netdev_info(ndp->ndev.dev, "NCSI: LSC AEN - channel %u state %s\n", + nc->id, data & 0x1 ? "up" : "down"); + chained = !list_empty(&nc->link); state = nc->state; spin_unlock_irqrestore(&nc->lock, flags); @@ -145,6 +148,8 @@ static int ncsi_aen_handler_hncdsc(struct ncsi_dev_priv *ndp, ncm = &nc->modes[NCSI_MODE_LINK]; hncdsc = (struct ncsi_aen_hncdsc_pkt *)h; ncm->data[3] = ntohl(hncdsc->status); + netdev_info(ndp->ndev.dev, "NCSI: HNCDSC AEN - channel %u state %s\n", + nc->id, ncm->data[3] & 0x3 ? "up" : "down"); if (!list_empty(&nc->link) || nc->state != NCSI_CHANNEL_ACTIVE) { spin_unlock_irqrestore(&nc->lock, flags); @@ -212,10 +217,18 @@ int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb) } ret = ncsi_validate_aen_pkt(h, nah->payload); - if (ret) + if (ret) { + netdev_warn(ndp->ndev.dev, + "NCSI: 'bad' packet ignored for AEN type 0x%x\n", + h->type); goto out; + } ret = nah->handler(ndp, h); + if (ret) + netdev_err(ndp->ndev.dev, + "NCSI: Handler for AEN type 0x%x returned %d\n", + h->type, ret); out: consume_skb(skb); return ret; diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 47baf914eec2..a2b904a718c6 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -229,6 +229,8 @@ static void ncsi_channel_monitor(unsigned long data) case NCSI_CHANNEL_MONITOR_WAIT ... NCSI_CHANNEL_MONITOR_WAIT_MAX: break; default: + netdev_err(ndp->ndev.dev, "NCSI Channel %d timed out!\n", + nc->id); if (!(ndp->flags & NCSI_DEV_HWA)) { ncsi_report_link(ndp, true); ndp->flags |= NCSI_DEV_RESHUFFLE; @@ -682,7 +684,7 @@ static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, data = ncsi_get_filter(nc, NCSI_FILTER_VLAN, index); if (!data) { netdev_err(ndp->ndev.dev, - "ncsi: failed to retrieve filter %d\n", index); + "NCSI: failed to retrieve filter %d\n", index); /* Set the VLAN id to 0 - this will still disable the entry in * the filter table, but we won't know what it was. */ @@ -692,7 +694,7 @@ static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, } netdev_printk(KERN_DEBUG, ndp->ndev.dev, - "ncsi: removed vlan tag %u at index %d\n", + "NCSI: removed vlan tag %u at index %d\n", vid, index + 1); ncsi_remove_filter(nc, NCSI_FILTER_VLAN, index); @@ -718,7 +720,7 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, if (index < 0) { /* New tag to add */ netdev_printk(KERN_DEBUG, ndp->ndev.dev, - "ncsi: new vlan id to set: %u\n", + "NCSI: new vlan id to set: %u\n", vlan->vid); break; } @@ -745,7 +747,7 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, } netdev_printk(KERN_DEBUG, ndp->ndev.dev, - "ncsi: set vid %u in packet, index %u\n", + "NCSI: set vid %u in packet, index %u\n", vlan->vid, index + 1); nca->type = NCSI_PKT_CMD_SVF; nca->words[1] = vlan->vid; @@ -784,8 +786,11 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) nca.package = np->id; nca.channel = NCSI_RESERVED_CHANNEL; ret = ncsi_xmit_cmd(&nca); - if (ret) + if (ret) { + netdev_err(ndp->ndev.dev, + "NCSI: Failed to transmit CMD_SP\n"); goto error; + } nd->state = ncsi_dev_state_config_cis; break; @@ -797,8 +802,11 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) nca.package = np->id; nca.channel = nc->id; ret = ncsi_xmit_cmd(&nca); - if (ret) + if (ret) { + netdev_err(ndp->ndev.dev, + "NCSI: Failed to transmit CMD_CIS\n"); goto error; + } nd->state = ncsi_dev_state_config_clear_vids; break; @@ -895,10 +903,16 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) } ret = ncsi_xmit_cmd(&nca); - if (ret) + if (ret) { + netdev_err(ndp->ndev.dev, + "NCSI: Failed to transmit CMD %x\n", + nca.type); goto error; + } break; case ncsi_dev_state_config_done: + netdev_printk(KERN_DEBUG, ndp->ndev.dev, + "NCSI: channel %u config done\n", nc->id); spin_lock_irqsave(&nc->lock, flags); if (nc->reconfigure_needed) { /* This channel's configuration has been updated @@ -925,6 +939,9 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) } else { hot_nc = NULL; nc->state = NCSI_CHANNEL_INACTIVE; + netdev_warn(ndp->ndev.dev, + "NCSI: channel %u link down after config\n", + nc->id); } spin_unlock_irqrestore(&nc->lock, flags); @@ -937,8 +954,8 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) ncsi_process_next_channel(ndp); break; default: - netdev_warn(dev, "Wrong NCSI state 0x%x in config\n", - nd->state); + netdev_alert(dev, "Wrong NCSI state 0x%x in config\n", + nd->state); } return; @@ -990,10 +1007,17 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp) } if (!found) { + netdev_warn(ndp->ndev.dev, + "NCSI: No channel found with link\n"); ncsi_report_link(ndp, true); return -ENODEV; } + ncm = &found->modes[NCSI_MODE_LINK]; + netdev_printk(KERN_DEBUG, ndp->ndev.dev, + "NCSI: Channel %u added to queue (link %s)\n", + found->id, ncm->data[2] & 0x1 ? "up" : "down"); + out: spin_lock_irqsave(&ndp->lock, flags); list_add_tail_rcu(&found->link, &ndp->channel_queue); @@ -1055,6 +1079,8 @@ static int ncsi_enable_hwa(struct ncsi_dev_priv *ndp) /* We can have no channels in extremely case */ if (list_empty(&ndp->channel_queue)) { + netdev_err(ndp->ndev.dev, + "NCSI: No available channels for HWA\n"); ncsi_report_link(ndp, false); return -ENOENT; } @@ -1223,6 +1249,9 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) return; error: + netdev_err(ndp->ndev.dev, + "NCSI: Failed to transmit cmd 0x%x during probe\n", + nca.type); ncsi_report_link(ndp, true); } @@ -1276,10 +1305,14 @@ int ncsi_process_next_channel(struct ncsi_dev_priv *ndp) switch (old_state) { case NCSI_CHANNEL_INACTIVE: ndp->ndev.state = ncsi_dev_state_config; + netdev_info(ndp->ndev.dev, "NCSI: configuring channel %u\n", + nc->id); ncsi_configure_channel(ndp); break; case NCSI_CHANNEL_ACTIVE: ndp->ndev.state = ncsi_dev_state_suspend; + netdev_info(ndp->ndev.dev, "NCSI: suspending channel %u\n", + nc->id); ncsi_suspend_channel(ndp); break; default: @@ -1299,6 +1332,8 @@ out: return ncsi_choose_active_channel(ndp); } + netdev_printk(KERN_DEBUG, ndp->ndev.dev, + "NCSI: No more channels to process\n"); ncsi_report_link(ndp, false); return -ENODEV; } @@ -1390,7 +1425,7 @@ static int ncsi_kick_channels(struct ncsi_dev_priv *ndp) ncsi_dev_state_config || !list_empty(&nc->link)) { netdev_printk(KERN_DEBUG, nd->dev, - "ncsi: channel %p marked dirty\n", + "NCSI: channel %p marked dirty\n", nc); nc->reconfigure_needed = true; } @@ -1410,7 +1445,7 @@ static int ncsi_kick_channels(struct ncsi_dev_priv *ndp) spin_unlock_irqrestore(&ndp->lock, flags); netdev_printk(KERN_DEBUG, nd->dev, - "ncsi: kicked channel %p\n", nc); + "NCSI: kicked channel %p\n", nc); n++; } } @@ -1431,7 +1466,7 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) nd = ncsi_find_dev(dev); if (!nd) { - netdev_warn(dev, "ncsi: No net_device?\n"); + netdev_warn(dev, "NCSI: No net_device?\n"); return 0; } @@ -1442,7 +1477,7 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) n_vids++; if (vlan->vid == vid) { netdev_printk(KERN_DEBUG, dev, - "vid %u already registered\n", vid); + "NCSI: vid %u already registered\n", vid); return 0; } } @@ -1461,7 +1496,7 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) vlan->vid = vid; list_add_rcu(&vlan->list, &ndp->vlan_vids); - netdev_printk(KERN_DEBUG, dev, "Added new vid %u\n", vid); + netdev_printk(KERN_DEBUG, dev, "NCSI: Added new vid %u\n", vid); found = ncsi_kick_channels(ndp) != 0; @@ -1481,7 +1516,7 @@ int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) nd = ncsi_find_dev(dev); if (!nd) { - netdev_warn(dev, "ncsi: no net_device?\n"); + netdev_warn(dev, "NCSI: no net_device?\n"); return 0; } @@ -1491,14 +1526,14 @@ int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) list_for_each_entry_safe(vlan, tmp, &ndp->vlan_vids, list) if (vlan->vid == vid) { netdev_printk(KERN_DEBUG, dev, - "vid %u found, removing\n", vid); + "NCSI: vid %u found, removing\n", vid); list_del_rcu(&vlan->list); found = true; kfree(vlan); } if (!found) { - netdev_err(dev, "ncsi: vid %u wasn't registered!\n", vid); + netdev_err(dev, "NCSI: vid %u wasn't registered!\n", vid); return -EINVAL; } @@ -1581,10 +1616,12 @@ int ncsi_start_dev(struct ncsi_dev *nd) return 0; } - if (ndp->flags & NCSI_DEV_HWA) + if (ndp->flags & NCSI_DEV_HWA) { + netdev_info(ndp->ndev.dev, "NCSI: Enabling HWA mode\n"); ret = ncsi_enable_hwa(ndp); - else + } else { ret = ncsi_choose_active_channel(ndp); + } return ret; } @@ -1615,6 +1652,7 @@ void ncsi_stop_dev(struct ncsi_dev *nd) } } + netdev_printk(KERN_DEBUG, ndp->ndev.dev, "NCSI: Stopping device\n"); ncsi_report_link(ndp, true); } EXPORT_SYMBOL_GPL(ncsi_stop_dev); diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 927dad4759d1..efd933ff5570 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -146,7 +146,7 @@ static int ncsi_rsp_handler_ec(struct ncsi_request *nr) ncm = &nc->modes[NCSI_MODE_ENABLE]; if (ncm->enable) - return -EBUSY; + return 0; ncm->enable = 1; return 0; @@ -173,7 +173,7 @@ static int ncsi_rsp_handler_dc(struct ncsi_request *nr) ncm = &nc->modes[NCSI_MODE_ENABLE]; if (!ncm->enable) - return -EBUSY; + return 0; ncm->enable = 0; return 0; @@ -217,7 +217,7 @@ static int ncsi_rsp_handler_ecnt(struct ncsi_request *nr) ncm = &nc->modes[NCSI_MODE_TX_ENABLE]; if (ncm->enable) - return -EBUSY; + return 0; ncm->enable = 1; return 0; @@ -239,7 +239,7 @@ static int ncsi_rsp_handler_dcnt(struct ncsi_request *nr) ncm = &nc->modes[NCSI_MODE_TX_ENABLE]; if (!ncm->enable) - return -EBUSY; + return 0; ncm->enable = 1; return 0; @@ -263,7 +263,7 @@ static int ncsi_rsp_handler_ae(struct ncsi_request *nr) /* Check if the AEN has been enabled */ ncm = &nc->modes[NCSI_MODE_AEN]; if (ncm->enable) - return -EBUSY; + return 0; /* Update to AEN configuration */ cmd = (struct ncsi_cmd_ae_pkt *)skb_network_header(nr->cmd); @@ -382,7 +382,7 @@ static int ncsi_rsp_handler_ev(struct ncsi_request *nr) /* Check if VLAN mode has been enabled */ ncm = &nc->modes[NCSI_MODE_VLAN]; if (ncm->enable) - return -EBUSY; + return 0; /* Update to VLAN mode */ cmd = (struct ncsi_cmd_ev_pkt *)skb_network_header(nr->cmd); @@ -409,7 +409,7 @@ static int ncsi_rsp_handler_dv(struct ncsi_request *nr) /* Check if VLAN mode has been enabled */ ncm = &nc->modes[NCSI_MODE_VLAN]; if (!ncm->enable) - return -EBUSY; + return 0; /* Update to VLAN mode */ ncm->enable = 0; @@ -455,13 +455,10 @@ static int ncsi_rsp_handler_sma(struct ncsi_request *nr) bitmap = &ncf->bitmap; if (cmd->at_e & 0x1) { - if (test_and_set_bit(cmd->index, bitmap)) - return -EBUSY; + set_bit(cmd->index, bitmap); memcpy(ncf->data + 6 * cmd->index, cmd->mac, 6); } else { - if (!test_and_clear_bit(cmd->index, bitmap)) - return -EBUSY; - + clear_bit(cmd->index, bitmap); memset(ncf->data + 6 * cmd->index, 0, 6); } @@ -485,7 +482,7 @@ static int ncsi_rsp_handler_ebf(struct ncsi_request *nr) /* Check if broadcast filter has been enabled */ ncm = &nc->modes[NCSI_MODE_BC]; if (ncm->enable) - return -EBUSY; + return 0; /* Update to broadcast filter mode */ cmd = (struct ncsi_cmd_ebf_pkt *)skb_network_header(nr->cmd); @@ -511,7 +508,7 @@ static int ncsi_rsp_handler_dbf(struct ncsi_request *nr) /* Check if broadcast filter isn't enabled */ ncm = &nc->modes[NCSI_MODE_BC]; if (!ncm->enable) - return -EBUSY; + return 0; /* Update to broadcast filter mode */ ncm->enable = 0; @@ -538,7 +535,7 @@ static int ncsi_rsp_handler_egmf(struct ncsi_request *nr) /* Check if multicast filter has been enabled */ ncm = &nc->modes[NCSI_MODE_MC]; if (ncm->enable) - return -EBUSY; + return 0; /* Update to multicast filter mode */ cmd = (struct ncsi_cmd_egmf_pkt *)skb_network_header(nr->cmd); @@ -564,7 +561,7 @@ static int ncsi_rsp_handler_dgmf(struct ncsi_request *nr) /* Check if multicast filter has been enabled */ ncm = &nc->modes[NCSI_MODE_MC]; if (!ncm->enable) - return -EBUSY; + return 0; /* Update to multicast filter mode */ ncm->enable = 0; @@ -591,7 +588,7 @@ static int ncsi_rsp_handler_snfc(struct ncsi_request *nr) /* Check if flow control has been enabled */ ncm = &nc->modes[NCSI_MODE_FC]; if (ncm->enable) - return -EBUSY; + return 0; /* Update to flow control mode */ cmd = (struct ncsi_cmd_snfc_pkt *)skb_network_header(nr->cmd); @@ -1032,11 +1029,19 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev, if (payload < 0) payload = ntohs(hdr->length); ret = ncsi_validate_rsp_pkt(nr, payload); - if (ret) + if (ret) { + netdev_warn(ndp->ndev.dev, + "NCSI: 'bad' packet ignored for type 0x%x\n", + hdr->type); goto out; + } /* Process the packet */ ret = nrh->handler(nr); + if (ret) + netdev_err(ndp->ndev.dev, + "NCSI: Handler for packet type 0x%x returned %d\n", + hdr->type, ret); out: ncsi_free_request(nr); return ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index a2f19b9906e9..0f164e986bf1 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -434,7 +434,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; if (unlikely(tb[IPSET_ATTR_CIDR])) { - u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (cidr != HOST_MASK) return -IPSET_ERR_INVALID_CIDR; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index c9b4e05ad940..e864681b8dc5 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -454,7 +454,6 @@ static size_t list_set_memsize(const struct list_set *map, size_t dsize) { struct set_elem *e; - size_t memsize; u32 n = 0; rcu_read_lock(); @@ -462,9 +461,7 @@ list_set_memsize(const struct list_set *map, size_t dsize) n++; rcu_read_unlock(); - memsize = sizeof(*map) + n * dsize; - - return memsize; + return (sizeof(*map) + n * dsize); } static int diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c index 1c8a42c1056c..d5be9c25fad6 100644 --- a/net/netfilter/ipset/pfxlen.c +++ b/net/netfilter/ipset/pfxlen.c @@ -3,6 +3,141 @@ /* Prefixlen maps for fast conversions, by Jan Engelhardt. */ +#ifdef E +#undef E +#endif + +#define PREFIXES_MAP \ + E(0x00000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0x80000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xC0000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xE0000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xF0000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xF8000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFC000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFE000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFF000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFF800000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF), + #define E(a, b, c, d) \ {.ip6 = { \ htonl(a), htonl(b), \ @@ -13,135 +148,7 @@ * just use prefixlen_netmask_map[prefixlength].ip. */ const union nf_inet_addr ip_set_netmask_map[] = { - E(0x00000000, 0x00000000, 0x00000000, 0x00000000), - E(0x80000000, 0x00000000, 0x00000000, 0x00000000), - E(0xC0000000, 0x00000000, 0x00000000, 0x00000000), - E(0xE0000000, 0x00000000, 0x00000000, 0x00000000), - E(0xF0000000, 0x00000000, 0x00000000, 0x00000000), - E(0xF8000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFC000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFE000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFF000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFF800000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF), + PREFIXES_MAP }; EXPORT_SYMBOL_GPL(ip_set_netmask_map); @@ -155,135 +162,7 @@ EXPORT_SYMBOL_GPL(ip_set_netmask_map); * just use prefixlen_hostmask_map[prefixlength].ip. */ const union nf_inet_addr ip_set_hostmask_map[] = { - E(0x00000000, 0x00000000, 0x00000000, 0x00000000), - E(0x80000000, 0x00000000, 0x00000000, 0x00000000), - E(0xC0000000, 0x00000000, 0x00000000, 0x00000000), - E(0xE0000000, 0x00000000, 0x00000000, 0x00000000), - E(0xF0000000, 0x00000000, 0x00000000, 0x00000000), - E(0xF8000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFC000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFE000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFF000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFF800000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF), + PREFIXES_MAP }; EXPORT_SYMBOL_GPL(ip_set_hostmask_map); diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 3d2ac71a83ec..f73561ca982d 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -185,7 +185,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]); ret = 1; } else { - pr_err("%s(): request for already hashed, called from %pF\n", + pr_err("%s(): request for already hashed, called from %pS\n", __func__, __builtin_return_address(0)); ret = 0; } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 4f940d7eb2f7..fac8c802b4ea 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -300,7 +300,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) unsigned int hash; if (svc->flags & IP_VS_SVC_F_HASHED) { - pr_err("%s(): request for already hashed, called from %pF\n", + pr_err("%s(): request for already hashed, called from %pS\n", __func__, __builtin_return_address(0)); return 0; } @@ -334,7 +334,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) static int ip_vs_svc_unhash(struct ip_vs_service *svc) { if (!(svc->flags & IP_VS_SVC_F_HASHED)) { - pr_err("%s(): request for unhash flagged, called from %pF\n", + pr_err("%s(): request for unhash flagged, called from %pS\n", __func__, __builtin_return_address(0)); return 0; } @@ -2034,12 +2034,16 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) seq_puts(seq, " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n"); } else { + struct net *net = seq_file_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); const struct ip_vs_service *svc = v; const struct ip_vs_iter *iter = seq->private; const struct ip_vs_dest *dest; struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); char *sched_name = sched ? sched->name : "none"; + if (svc->ipvs != ipvs) + return 0; if (iter->table == ip_vs_svc_table) { #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 01130392b7c0..5749fcaa2770 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1083,7 +1083,7 @@ static void gc_worker(struct work_struct *work) next_run = gc_work->next_gc_run; gc_work->last_bucket = i; gc_work->early_drop = false; - queue_delayed_work(system_long_wq, &gc_work->dwork, next_run); + queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run); } static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) @@ -1419,7 +1419,7 @@ repeat: /* Decide what timeout policy we want to apply to this flow. */ timeouts = nf_ct_timeout_lookup(net, ct, l4proto); - ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, timeouts); + ret = l4proto->packet(ct, skb, dataoff, ctinfo, timeouts); if (ret <= 0) { /* Invalid: inverse of the return code tells * the netfilter core what to do */ @@ -1563,9 +1563,14 @@ int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], } EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple); -int nf_ct_port_nlattr_tuple_size(void) +unsigned int nf_ct_port_nlattr_tuple_size(void) { - return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); + static unsigned int size __read_mostly; + + if (!size) + size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); + + return size; } EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size); #endif @@ -2084,7 +2089,7 @@ int nf_conntrack_init_start(void) goto err_proto; conntrack_gc_work_init(&conntrack_gc_work); - queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ); + queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ); return 0; diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index 89b2e46925c4..cf1bf2605c10 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -91,41 +91,41 @@ typedef struct field_t { } field_t; /* Bit Stream */ -typedef struct { +struct bitstr { unsigned char *buf; unsigned char *beg; unsigned char *end; unsigned char *cur; unsigned int bit; -} bitstr_t; +}; /* Tool Functions */ #define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;} #define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;} #define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;} #define CHECK_BOUND(bs,n) if((bs)->cur+(n)>(bs)->end)return(H323_ERROR_BOUND) -static unsigned int get_len(bitstr_t *bs); -static unsigned int get_bit(bitstr_t *bs); -static unsigned int get_bits(bitstr_t *bs, unsigned int b); -static unsigned int get_bitmap(bitstr_t *bs, unsigned int b); -static unsigned int get_uint(bitstr_t *bs, int b); +static unsigned int get_len(struct bitstr *bs); +static unsigned int get_bit(struct bitstr *bs); +static unsigned int get_bits(struct bitstr *bs, unsigned int b); +static unsigned int get_bitmap(struct bitstr *bs, unsigned int b); +static unsigned int get_uint(struct bitstr *bs, int b); /* Decoder Functions */ -static int decode_nul(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_bool(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_oid(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_int(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_enum(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_bitstr(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_numstr(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_octstr(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_bmpstr(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_seq(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_seqof(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_choice(bitstr_t *bs, const struct field_t *f, char *base, int level); +static int decode_nul(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_bool(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_oid(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_int(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_enum(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_bitstr(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_numstr(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_octstr(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_bmpstr(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_seq(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_seqof(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_choice(struct bitstr *bs, const struct field_t *f, char *base, int level); /* Decoder Functions Vector */ -typedef int (*decoder_t)(bitstr_t *, const struct field_t *, char *, int); +typedef int (*decoder_t)(struct bitstr *, const struct field_t *, char *, int); static const decoder_t Decoders[] = { decode_nul, decode_bool, @@ -150,7 +150,7 @@ static const decoder_t Decoders[] = { * Functions ****************************************************************************/ /* Assume bs is aligned && v < 16384 */ -static unsigned int get_len(bitstr_t *bs) +static unsigned int get_len(struct bitstr *bs) { unsigned int v; @@ -166,7 +166,7 @@ static unsigned int get_len(bitstr_t *bs) } /****************************************************************************/ -static unsigned int get_bit(bitstr_t *bs) +static unsigned int get_bit(struct bitstr *bs) { unsigned int b = (*bs->cur) & (0x80 >> bs->bit); @@ -177,7 +177,7 @@ static unsigned int get_bit(bitstr_t *bs) /****************************************************************************/ /* Assume b <= 8 */ -static unsigned int get_bits(bitstr_t *bs, unsigned int b) +static unsigned int get_bits(struct bitstr *bs, unsigned int b) { unsigned int v, l; @@ -203,7 +203,7 @@ static unsigned int get_bits(bitstr_t *bs, unsigned int b) /****************************************************************************/ /* Assume b <= 32 */ -static unsigned int get_bitmap(bitstr_t *bs, unsigned int b) +static unsigned int get_bitmap(struct bitstr *bs, unsigned int b) { unsigned int v, l, shift, bytes; @@ -242,7 +242,7 @@ static unsigned int get_bitmap(bitstr_t *bs, unsigned int b) /**************************************************************************** * Assume bs is aligned and sizeof(unsigned int) == 4 ****************************************************************************/ -static unsigned int get_uint(bitstr_t *bs, int b) +static unsigned int get_uint(struct bitstr *bs, int b) { unsigned int v = 0; @@ -264,7 +264,7 @@ static unsigned int get_uint(bitstr_t *bs, int b) } /****************************************************************************/ -static int decode_nul(bitstr_t *bs, const struct field_t *f, +static int decode_nul(struct bitstr *bs, const struct field_t *f, char *base, int level) { PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); @@ -273,7 +273,7 @@ static int decode_nul(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_bool(bitstr_t *bs, const struct field_t *f, +static int decode_bool(struct bitstr *bs, const struct field_t *f, char *base, int level) { PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); @@ -285,7 +285,7 @@ static int decode_bool(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_oid(bitstr_t *bs, const struct field_t *f, +static int decode_oid(struct bitstr *bs, const struct field_t *f, char *base, int level) { int len; @@ -302,7 +302,7 @@ static int decode_oid(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_int(bitstr_t *bs, const struct field_t *f, +static int decode_int(struct bitstr *bs, const struct field_t *f, char *base, int level) { unsigned int len; @@ -346,7 +346,7 @@ static int decode_int(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_enum(bitstr_t *bs, const struct field_t *f, +static int decode_enum(struct bitstr *bs, const struct field_t *f, char *base, int level) { PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); @@ -362,7 +362,7 @@ static int decode_enum(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_bitstr(bitstr_t *bs, const struct field_t *f, +static int decode_bitstr(struct bitstr *bs, const struct field_t *f, char *base, int level) { unsigned int len; @@ -396,7 +396,7 @@ static int decode_bitstr(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_numstr(bitstr_t *bs, const struct field_t *f, +static int decode_numstr(struct bitstr *bs, const struct field_t *f, char *base, int level) { unsigned int len; @@ -414,7 +414,7 @@ static int decode_numstr(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_octstr(bitstr_t *bs, const struct field_t *f, +static int decode_octstr(struct bitstr *bs, const struct field_t *f, char *base, int level) { unsigned int len; @@ -463,7 +463,7 @@ static int decode_octstr(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_bmpstr(bitstr_t *bs, const struct field_t *f, +static int decode_bmpstr(struct bitstr *bs, const struct field_t *f, char *base, int level) { unsigned int len; @@ -489,7 +489,7 @@ static int decode_bmpstr(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_seq(bitstr_t *bs, const struct field_t *f, +static int decode_seq(struct bitstr *bs, const struct field_t *f, char *base, int level) { unsigned int ext, bmp, i, opt, len = 0, bmp2, bmp2_len; @@ -606,7 +606,7 @@ static int decode_seq(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_seqof(bitstr_t *bs, const struct field_t *f, +static int decode_seqof(struct bitstr *bs, const struct field_t *f, char *base, int level) { unsigned int count, effective_count = 0, i, len = 0; @@ -696,7 +696,7 @@ static int decode_seqof(bitstr_t *bs, const struct field_t *f, /****************************************************************************/ -static int decode_choice(bitstr_t *bs, const struct field_t *f, +static int decode_choice(struct bitstr *bs, const struct field_t *f, char *base, int level) { unsigned int type, ext, len = 0; @@ -772,7 +772,7 @@ int DecodeRasMessage(unsigned char *buf, size_t sz, RasMessage *ras) FNAME("RasMessage") CHOICE, 5, 24, 32, DECODE | EXT, 0, _RasMessage }; - bitstr_t bs; + struct bitstr bs; bs.buf = bs.beg = bs.cur = buf; bs.end = buf + sz; @@ -789,7 +789,7 @@ static int DecodeH323_UserInformation(unsigned char *buf, unsigned char *beg, FNAME("H323-UserInformation") SEQ, 1, 2, 2, DECODE | EXT, 0, _H323_UserInformation }; - bitstr_t bs; + struct bitstr bs; bs.buf = buf; bs.beg = bs.cur = beg; @@ -808,7 +808,7 @@ int DecodeMultimediaSystemControlMessage(unsigned char *buf, size_t sz, FNAME("MultimediaSystemControlMessage") CHOICE, 2, 4, 4, DECODE | EXT, 0, _MultimediaSystemControlMessage }; - bitstr_t bs; + struct bitstr bs; bs.buf = bs.beg = bs.cur = buf; bs.end = buf + sz; @@ -877,6 +877,7 @@ int DecodeQ931(unsigned char *buf, size_t sz, Q931 *q931) if (sz < 1) break; len = *p++; + sz--; if (sz < len) break; p += len; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index de4053d84364..6e0adfefb9ed 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -533,11 +533,11 @@ nla_put_failure: return -1; } -static inline size_t ctnetlink_proto_size(const struct nf_conn *ct) +static size_t ctnetlink_proto_size(const struct nf_conn *ct) { const struct nf_conntrack_l3proto *l3proto; const struct nf_conntrack_l4proto *l4proto; - size_t len; + size_t len, len4 = 0; l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); len = l3proto->nla_size; @@ -545,8 +545,12 @@ static inline size_t ctnetlink_proto_size(const struct nf_conn *ct) l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); len += l4proto->nla_size; + if (l4proto->nlattr_tuple_size) { + len4 = l4proto->nlattr_tuple_size(); + len4 *= 3u; /* ORIG, REPLY, MASTER */ + } - return len; + return len + len4; } static inline size_t ctnetlink_acct_size(const struct nf_conn *ct) diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index b3e489c859ec..c8e9c9503a08 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -27,6 +27,7 @@ #include <net/netfilter/nf_conntrack_l3proto.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_log.h> static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly; struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO] __read_mostly; @@ -63,6 +64,52 @@ nf_ct_unregister_sysctl(struct ctl_table_header **header, *header = NULL; *table = NULL; } + +__printf(5, 6) +void nf_l4proto_log_invalid(const struct sk_buff *skb, + struct net *net, + u16 pf, u8 protonum, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + if (net->ct.sysctl_log_invalid != protonum || + net->ct.sysctl_log_invalid != IPPROTO_RAW) + return; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, + "nf_ct_proto_%d: %pV ", protonum, &vaf); + va_end(args); +} +EXPORT_SYMBOL_GPL(nf_l4proto_log_invalid); + +__printf(3, 4) +void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, + const struct nf_conn *ct, + const char *fmt, ...) +{ + struct va_format vaf; + struct net *net; + va_list args; + + net = nf_ct_net(ct); + if (likely(net->ct.sysctl_log_invalid == 0)) + return; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + + nf_l4proto_log_invalid(skb, net, nf_ct_l3num(ct), + nf_ct_protonum(ct), "%pV", &vaf); + va_end(args); +} +EXPORT_SYMBOL_GPL(nf_ct_l4proto_log_invalid); #endif const struct nf_conntrack_l4proto * @@ -125,7 +172,7 @@ void nf_ct_l3proto_module_put(unsigned short l3proto) } EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put); -int nf_ct_netns_get(struct net *net, u8 nfproto) +static int nf_ct_netns_do_get(struct net *net, u8 nfproto) { const struct nf_conntrack_l3proto *l3proto; int ret; @@ -150,9 +197,33 @@ int nf_ct_netns_get(struct net *net, u8 nfproto) return ret; } + +int nf_ct_netns_get(struct net *net, u8 nfproto) +{ + int err; + + if (nfproto == NFPROTO_INET) { + err = nf_ct_netns_do_get(net, NFPROTO_IPV4); + if (err < 0) + goto err1; + err = nf_ct_netns_do_get(net, NFPROTO_IPV6); + if (err < 0) + goto err2; + } else { + err = nf_ct_netns_do_get(net, nfproto); + if (err < 0) + goto err1; + } + return 0; + +err2: + nf_ct_netns_put(net, NFPROTO_IPV4); +err1: + return err; +} EXPORT_SYMBOL_GPL(nf_ct_netns_get); -void nf_ct_netns_put(struct net *net, u8 nfproto) +static void nf_ct_netns_do_put(struct net *net, u8 nfproto) { const struct nf_conntrack_l3proto *l3proto; @@ -171,6 +242,15 @@ void nf_ct_netns_put(struct net *net, u8 nfproto) nf_ct_l3proto_module_put(nfproto); } + +void nf_ct_netns_put(struct net *net, uint8_t nfproto) +{ + if (nfproto == NFPROTO_INET) { + nf_ct_netns_do_put(net, NFPROTO_IPV4); + nf_ct_netns_do_put(net, NFPROTO_IPV6); + } else + nf_ct_netns_do_put(net, nfproto); +} EXPORT_SYMBOL_GPL(nf_ct_netns_put); const struct nf_conntrack_l4proto * @@ -351,8 +431,6 @@ int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto) l4proto->nla_size = 0; if (l4proto->nlattr_size) l4proto->nla_size += l4proto->nlattr_size(); - if (l4proto->nlattr_tuple_size) - l4proto->nla_size += 3 * l4proto->nlattr_tuple_size(); rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], l4proto); diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 0f5a4d79f6b8..2a446f4a554c 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -428,13 +428,13 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, default: dn = dccp_pernet(net); if (dn->dccp_loose == 0) { - msg = "nf_ct_dccp: not picking up existing connection "; + msg = "not picking up existing connection "; goto out_invalid; } case CT_DCCP_REQUEST: break; case CT_DCCP_INVALID: - msg = "nf_ct_dccp: invalid state transition "; + msg = "invalid state transition "; goto out_invalid; } @@ -447,9 +447,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; out_invalid: - if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(net, nf_ct_l3num(ct), 0, skb, NULL, NULL, - NULL, "%s", msg); + nf_ct_l4proto_log_invalid(skb, ct, "%s", msg); return false; } @@ -469,10 +467,8 @@ static unsigned int *dccp_get_timeouts(struct net *net) static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, unsigned int *timeouts) { - struct net *net = nf_ct_net(ct); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); struct dccp_hdr _dh, *dh; u_int8_t type, old_state, new_state; @@ -534,15 +530,11 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.dccp.last_pkt = type; spin_unlock_bh(&ct->lock); - if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_dccp: invalid packet ignored "); + nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid packet"); return NF_ACCEPT; case CT_DCCP_INVALID: spin_unlock_bh(&ct->lock); - if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_dccp: invalid state transition "); + nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid state transition"); return -NF_ACCEPT; } @@ -604,8 +596,7 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl, return NF_ACCEPT; out_invalid: - if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "%s", msg); + nf_l4proto_log_invalid(skb, net, pf, IPPROTO_DCCP, "%s", msg); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 9cd40700842e..1f86ddf6649a 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -60,7 +60,6 @@ static int generic_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, unsigned int *timeout) { nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 09a90484c27d..a2503005d80b 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -244,7 +244,6 @@ static int gre_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, unsigned int *timeouts) { /* If we've seen traffic both ways, this is a GRE connection. diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 6303a88af12b..80faf04ddf15 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -306,7 +306,6 @@ static int sctp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, unsigned int *timeouts) { enum sctp_conntrack new_state, old_state; @@ -522,8 +521,7 @@ static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb, } return NF_ACCEPT; out_invalid: - if (LOG_INVALID(net, IPPROTO_SCTP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "%s", logmsg); + nf_l4proto_log_invalid(skb, net, pf, IPPROTO_SCTP, "%s", logmsg); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index cba1c6ffe51a..b12fc07111d0 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -493,8 +493,7 @@ static bool tcp_in_window(const struct nf_conn *ct, unsigned int index, const struct sk_buff *skb, unsigned int dataoff, - const struct tcphdr *tcph, - u_int8_t pf) + const struct tcphdr *tcph) { struct net *net = nf_ct_net(ct); struct nf_tcp_net *tn = tcp_pernet(net); @@ -702,9 +701,9 @@ static bool tcp_in_window(const struct nf_conn *ct, if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || tn->tcp_be_liberal) res = true; - if (!res && LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: %s ", + if (!res) { + nf_ct_l4proto_log_invalid(skb, ct, + "%s", before(seq, sender->td_maxend + 1) ? in_recv_win ? before(sack, receiver->td_end + 1) ? @@ -713,6 +712,7 @@ static bool tcp_in_window(const struct nf_conn *ct, : "ACK is over the upper bound (ACKed data not seen yet)" : "SEQ is under the lower bound (already ACKed data retransmitted)" : "SEQ is over the upper bound (over the window of the receiver)"); + } } pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u " @@ -738,6 +738,12 @@ static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK| [TCPHDR_ACK|TCPHDR_URG] = 1, }; +static void tcp_error_log(const struct sk_buff *skb, struct net *net, + u8 pf, const char *msg) +{ + nf_l4proto_log_invalid(skb, net, pf, IPPROTO_TCP, "%s", msg); +} + /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ static int tcp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, @@ -753,17 +759,13 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, /* Smaller that minimal TCP header? */ th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); if (th == NULL) { - if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: short packet "); + tcp_error_log(skb, net, pf, "short packet"); return -NF_ACCEPT; } /* Not whole TCP header or malformed packet */ if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { - if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: truncated/malformed packet "); + tcp_error_log(skb, net, pf, "truncated packet"); return -NF_ACCEPT; } @@ -774,18 +776,14 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, /* FIXME: Source route IP option packets --RR */ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { - if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: bad TCP checksum "); + tcp_error_log(skb, net, pf, "bad checksum"); return -NF_ACCEPT; } /* Check TCP flags. */ tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH)); if (!tcp_valid_flags[tcpflags]) { - if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: invalid TCP flag combination "); + tcp_error_log(skb, net, pf, "invalid tcp flag combination"); return -NF_ACCEPT; } @@ -802,7 +800,6 @@ static int tcp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, unsigned int *timeouts) { struct net *net = nf_ct_net(ct); @@ -939,10 +936,8 @@ static int tcp_packet(struct nf_conn *ct, IP_CT_EXP_CHALLENGE_ACK; } spin_unlock_bh(&ct->lock); - if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: invalid packet ignored in " - "state %s ", tcp_conntrack_names[old_state]); + nf_ct_l4proto_log_invalid(skb, ct, "invalid packet ignored in " + "state %s ", tcp_conntrack_names[old_state]); return NF_ACCEPT; case TCP_CONNTRACK_MAX: /* Special case for SYN proxy: when the SYN to the server or @@ -964,9 +959,7 @@ static int tcp_packet(struct nf_conn *ct, pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", dir, get_conntrack_index(th), old_state); spin_unlock_bh(&ct->lock); - if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: invalid state "); + nf_ct_l4proto_log_invalid(skb, ct, "invalid state"); return -NF_ACCEPT; case TCP_CONNTRACK_TIME_WAIT: /* RFC5961 compliance cause stack to send "challenge-ACK" @@ -981,9 +974,7 @@ static int tcp_packet(struct nf_conn *ct, /* Detected RFC5961 challenge ACK */ ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; spin_unlock_bh(&ct->lock); - if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: challenge-ACK ignored "); + nf_ct_l4proto_log_invalid(skb, ct, "challenge-ack ignored"); return NF_ACCEPT; /* Don't change state */ } break; @@ -993,9 +984,7 @@ static int tcp_packet(struct nf_conn *ct, && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) { /* Invalid RST */ spin_unlock_bh(&ct->lock); - if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, - NULL, "nf_ct_tcp: invalid RST "); + nf_ct_l4proto_log_invalid(skb, ct, "invalid rst"); return -NF_ACCEPT; } if (index == TCP_RST_SET @@ -1022,7 +1011,7 @@ static int tcp_packet(struct nf_conn *ct, } if (!tcp_in_window(ct, &ct->proto.tcp, dir, index, - skb, dataoff, th, pf)) { + skb, dataoff, th)) { spin_unlock_bh(&ct->lock); return -NF_ACCEPT; } @@ -1288,9 +1277,14 @@ static int tcp_nlattr_size(void) + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1); } -static int tcp_nlattr_tuple_size(void) +static unsigned int tcp_nlattr_tuple_size(void) { - return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); + static unsigned int size __read_mostly; + + if (!size) + size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); + + return size; } #endif diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 8af734cd1a94..3a5f727103af 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -73,7 +73,6 @@ static int udp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, unsigned int *timeouts) { /* If we've seen traffic both ways, this is some kind of UDP @@ -99,6 +98,12 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb, } #ifdef CONFIG_NF_CT_PROTO_UDPLITE +static void udplite_error_log(const struct sk_buff *skb, struct net *net, + u8 pf, const char *msg) +{ + nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDPLITE, "%s", msg); +} + static int udplite_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, @@ -112,9 +117,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, /* Header is too small? */ hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (!hdr) { - if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udplite: short packet "); + udplite_error_log(skb, net, pf, "short packet"); return -NF_ACCEPT; } @@ -122,17 +125,13 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, if (cscov == 0) { cscov = udplen; } else if (cscov < sizeof(*hdr) || cscov > udplen) { - if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udplite: invalid checksum coverage "); + udplite_error_log(skb, net, pf, "invalid checksum coverage"); return -NF_ACCEPT; } /* UDPLITE mandates checksums */ if (!hdr->check) { - if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udplite: checksum missing "); + udplite_error_log(skb, net, pf, "checksum missing"); return -NF_ACCEPT; } @@ -140,9 +139,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, pf)) { - if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udplite: bad UDPLite checksum "); + udplite_error_log(skb, net, pf, "bad checksum"); return -NF_ACCEPT; } @@ -150,6 +147,12 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, } #endif +static void udp_error_log(const struct sk_buff *skb, struct net *net, + u8 pf, const char *msg) +{ + nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDP, "%s", msg); +} + static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, u_int8_t pf, @@ -162,17 +165,13 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, /* Header is too small? */ hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hdr == NULL) { - if (LOG_INVALID(net, IPPROTO_UDP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udp: short packet "); + udp_error_log(skb, net, pf, "short packet"); return -NF_ACCEPT; } /* Truncated/malformed packets */ if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { - if (LOG_INVALID(net, IPPROTO_UDP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udp: truncated/malformed packet "); + udp_error_log(skb, net, pf, "truncated/malformed packet"); return -NF_ACCEPT; } @@ -186,9 +185,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, * FIXME: Source route IP option packets --RR */ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) { - if (LOG_INVALID(net, IPPROTO_UDP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udp: bad UDP checksum "); + udp_error_log(skb, net, pf, "bad checksum"); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index af8345fc4fbd..6c38421e31f9 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -542,17 +542,14 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data) if (nf_nat_proto_remove(ct, data)) return 1; - if ((ct->status & IPS_SRC_NAT_DONE) == 0) - return 0; - - /* This netns is being destroyed, and conntrack has nat null binding. + /* This module is being removed and conntrack has nat null binding. * Remove it from bysource hash, as the table will be freed soon. * * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack() * will delete entry from already-freed table. */ - clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); - __nf_nat_cleanup_conntrack(ct); + if (test_and_clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status)) + __nf_nat_cleanup_conntrack(ct); /* don't delete conntrack. Although that would make things a lot * simpler, we'd end up flushing all conntracks on nat rmmod. diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 64e1ee091225..d8327b43e4dc 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2549,14 +2549,9 @@ nft_select_set_ops(const struct nft_ctx *ctx, case NFT_SET_POL_PERFORMANCE: if (est.lookup < best.lookup) break; - if (est.lookup == best.lookup) { - if (!desc->size) { - if (est.space < best.space) - break; - } else if (est.size < best.size) { - break; - } - } + if (est.lookup == best.lookup && + est.space < best.space) + break; continue; case NFT_SET_POL_MEMORY: if (!desc->size) { @@ -3593,45 +3588,6 @@ static int nf_tables_dump_set_done(struct netlink_callback *cb) return 0; } -static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, - struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[], - struct netlink_ext_ack *extack) -{ - u8 genmask = nft_genmask_cur(net); - const struct nft_set *set; - struct nft_ctx ctx; - int err; - - err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask); - if (err < 0) - return err; - - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], - genmask); - if (IS_ERR(set)) - return PTR_ERR(set); - - if (nlh->nlmsg_flags & NLM_F_DUMP) { - struct netlink_dump_control c = { - .dump = nf_tables_dump_set, - .done = nf_tables_dump_set_done, - }; - struct nft_set_dump_ctx *dump_ctx; - - dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL); - if (!dump_ctx) - return -ENOMEM; - - dump_ctx->set = set; - dump_ctx->ctx = ctx; - - c.data = dump_ctx; - return netlink_dump_start(nlsk, skb, nlh, &c); - } - return -EOPNOTSUPP; -} - static int nf_tables_fill_setelem_info(struct sk_buff *skb, const struct nft_ctx *ctx, u32 seq, u32 portid, int event, u16 flags, @@ -3677,6 +3633,135 @@ nla_put_failure: return -1; } +static int nft_setelem_parse_flags(const struct nft_set *set, + const struct nlattr *attr, u32 *flags) +{ + if (attr == NULL) + return 0; + + *flags = ntohl(nla_get_be32(attr)); + if (*flags & ~NFT_SET_ELEM_INTERVAL_END) + return -EINVAL; + if (!(set->flags & NFT_SET_INTERVAL) && + *flags & NFT_SET_ELEM_INTERVAL_END) + return -EINVAL; + + return 0; +} + +static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set, + const struct nlattr *attr) +{ + struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; + const struct nft_set_ext *ext; + struct nft_data_desc desc; + struct nft_set_elem elem; + struct sk_buff *skb; + uint32_t flags = 0; + void *priv; + int err; + + err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, + nft_set_elem_policy, NULL); + if (err < 0) + return err; + + if (!nla[NFTA_SET_ELEM_KEY]) + return -EINVAL; + + err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags); + if (err < 0) + return err; + + err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc, + nla[NFTA_SET_ELEM_KEY]); + if (err < 0) + return err; + + err = -EINVAL; + if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) + return err; + + priv = set->ops->get(ctx->net, set, &elem, flags); + if (IS_ERR(priv)) + return PTR_ERR(priv); + + elem.priv = priv; + ext = nft_set_elem_ext(set, &elem); + + err = -ENOMEM; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err1; + + err = nf_tables_fill_setelem_info(skb, ctx, ctx->seq, ctx->portid, + NFT_MSG_NEWSETELEM, 0, set, &elem); + if (err < 0) + goto err2; + + err = nfnetlink_unicast(skb, ctx->net, ctx->portid, MSG_DONTWAIT); + /* This avoids a loop in nfnetlink. */ + if (err < 0) + goto err1; + + return 0; +err2: + kfree_skb(skb); +err1: + /* this avoids a loop in nfnetlink. */ + return err == -EAGAIN ? -ENOBUFS : err; +} + +static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) +{ + u8 genmask = nft_genmask_cur(net); + struct nft_set *set; + struct nlattr *attr; + struct nft_ctx ctx; + int rem, err = 0; + + err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask); + if (err < 0) + return err; + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], + genmask); + if (IS_ERR(set)) + return PTR_ERR(set); + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_set, + .done = nf_tables_dump_set_done, + }; + struct nft_set_dump_ctx *dump_ctx; + + dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL); + if (!dump_ctx) + return -ENOMEM; + + dump_ctx->set = set; + dump_ctx->ctx = ctx; + + c.data = dump_ctx; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS]) + return -EINVAL; + + nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { + err = nft_get_set_elem(&ctx, set, attr); + if (err < 0) + break; + } + + return err; +} + static void nf_tables_setelem_notify(const struct nft_ctx *ctx, const struct nft_set *set, const struct nft_set_elem *elem, @@ -3777,22 +3862,6 @@ static void nf_tables_set_elem_destroy(const struct nft_set *set, void *elem) kfree(elem); } -static int nft_setelem_parse_flags(const struct nft_set *set, - const struct nlattr *attr, u32 *flags) -{ - if (attr == NULL) - return 0; - - *flags = ntohl(nla_get_be32(attr)); - if (*flags & ~NFT_SET_ELEM_INTERVAL_END) - return -EINVAL; - if (!(set->flags & NFT_SET_INTERVAL) && - *flags & NFT_SET_ELEM_INTERVAL_END) - return -EINVAL; - - return 0; -} - static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr, u32 nlmsg_flags) { diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index bd0975d7dd6f..2647b895f4b0 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -312,39 +312,6 @@ static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = { [NFTA_CT_SREG] = { .type = NLA_U32 }, }; -static int nft_ct_netns_get(struct net *net, uint8_t family) -{ - int err; - - if (family == NFPROTO_INET) { - err = nf_ct_netns_get(net, NFPROTO_IPV4); - if (err < 0) - goto err1; - err = nf_ct_netns_get(net, NFPROTO_IPV6); - if (err < 0) - goto err2; - } else { - err = nf_ct_netns_get(net, family); - if (err < 0) - goto err1; - } - return 0; - -err2: - nf_ct_netns_put(net, NFPROTO_IPV4); -err1: - return err; -} - -static void nft_ct_netns_put(struct net *net, uint8_t family) -{ - if (family == NFPROTO_INET) { - nf_ct_netns_put(net, NFPROTO_IPV4); - nf_ct_netns_put(net, NFPROTO_IPV6); - } else - nf_ct_netns_put(net, family); -} - #ifdef CONFIG_NF_CONNTRACK_ZONES static void nft_ct_tmpl_put_pcpu(void) { @@ -489,7 +456,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, if (err < 0) return err; - err = nft_ct_netns_get(ctx->net, ctx->afi->family); + err = nf_ct_netns_get(ctx->net, ctx->afi->family); if (err < 0) return err; @@ -583,7 +550,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, if (err < 0) goto err1; - err = nft_ct_netns_get(ctx->net, ctx->afi->family); + err = nf_ct_netns_get(ctx->net, ctx->afi->family); if (err < 0) goto err1; @@ -606,7 +573,7 @@ static void nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv = nft_expr_priv(expr); __nft_ct_set_destroy(ctx, priv); - nft_ct_netns_put(ctx->net, ctx->afi->family); + nf_ct_netns_put(ctx->net, ctx->afi->family); } static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 734989c40579..45fb2752fb63 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -106,6 +106,23 @@ nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this, return NULL; } +static void *nft_bitmap_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) +{ + const struct nft_bitmap *priv = nft_set_priv(set); + u8 genmask = nft_genmask_cur(net); + struct nft_bitmap_elem *be; + + list_for_each_entry_rcu(be, &priv->list, head) { + if (memcmp(nft_set_ext_key(&be->ext), elem->key.val.data, set->klen) || + !nft_set_elem_active(&be->ext, genmask)) + continue; + + return be; + } + return ERR_PTR(-ENOENT); +} + static int nft_bitmap_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, struct nft_set_ext **ext) @@ -294,6 +311,7 @@ static struct nft_set_ops nft_bitmap_ops __read_mostly = { .activate = nft_bitmap_activate, .lookup = nft_bitmap_lookup, .walk = nft_bitmap_walk, + .get = nft_bitmap_get, }; static struct nft_set_type nft_bitmap_type __read_mostly = { diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 9c0d5a7ce5f9..f8166c1d5430 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -95,6 +95,24 @@ static bool nft_rhash_lookup(const struct net *net, const struct nft_set *set, return !!he; } +static void *nft_rhash_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) +{ + struct nft_rhash *priv = nft_set_priv(set); + struct nft_rhash_elem *he; + struct nft_rhash_cmp_arg arg = { + .genmask = nft_genmask_cur(net), + .set = set, + .key = elem->key.val.data, + }; + + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params); + if (he != NULL) + return he; + + return ERR_PTR(-ENOENT); +} + static bool nft_rhash_update(struct nft_set *set, const u32 *key, void *(*new)(struct nft_set *, const struct nft_expr *, @@ -409,6 +427,24 @@ static bool nft_hash_lookup(const struct net *net, const struct nft_set *set, return false; } +static void *nft_hash_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) +{ + struct nft_hash *priv = nft_set_priv(set); + u8 genmask = nft_genmask_cur(net); + struct nft_hash_elem *he; + u32 hash; + + hash = jhash(elem->key.val.data, set->klen, priv->seed); + hash = reciprocal_scale(hash, priv->buckets); + hlist_for_each_entry_rcu(he, &priv->table[hash], node) { + if (!memcmp(nft_set_ext_key(&he->ext), elem->key.val.data, set->klen) && + nft_set_elem_active(&he->ext, genmask)) + return he; + } + return ERR_PTR(-ENOENT); +} + /* nft_hash_select_ops() makes sure key size can be either 2 or 4 bytes . */ static inline u32 nft_hash_key(const u32 *key, u32 klen) { @@ -494,7 +530,7 @@ static void *nft_hash_deactivate(const struct net *net, hash = reciprocal_scale(hash, priv->buckets); hlist_for_each_entry(he, &priv->table[hash], node) { if (!memcmp(nft_set_ext_key(&this->ext), &elem->key.val, - set->klen) || + set->klen) && nft_set_elem_active(&he->ext, genmask)) { nft_set_elem_change_active(net, set, &he->ext); return he; @@ -600,6 +636,7 @@ static struct nft_set_ops nft_rhash_ops __read_mostly = { .lookup = nft_rhash_lookup, .update = nft_rhash_update, .walk = nft_rhash_walk, + .get = nft_rhash_get, .features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT, }; @@ -617,6 +654,7 @@ static struct nft_set_ops nft_hash_ops __read_mostly = { .remove = nft_hash_remove, .lookup = nft_hash_lookup, .walk = nft_hash_walk, + .get = nft_hash_get, .features = NFT_SET_MAP | NFT_SET_OBJECT, }; @@ -634,6 +672,7 @@ static struct nft_set_ops nft_hash_fast_ops __read_mostly = { .remove = nft_hash_remove, .lookup = nft_hash_lookup_fast, .walk = nft_hash_walk, + .get = nft_hash_get, .features = NFT_SET_MAP | NFT_SET_OBJECT, }; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index d83a4ec5900d..e6f08bc5f359 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -113,6 +113,78 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, return ret; } +static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set, + const u32 *key, struct nft_rbtree_elem **elem, + unsigned int seq, unsigned int flags, u8 genmask) +{ + struct nft_rbtree_elem *rbe, *interval = NULL; + struct nft_rbtree *priv = nft_set_priv(set); + const struct rb_node *parent; + const void *this; + int d; + + parent = rcu_dereference_raw(priv->root.rb_node); + while (parent != NULL) { + if (read_seqcount_retry(&priv->count, seq)) + return false; + + rbe = rb_entry(parent, struct nft_rbtree_elem, node); + + this = nft_set_ext_key(&rbe->ext); + d = memcmp(this, key, set->klen); + if (d < 0) { + parent = rcu_dereference_raw(parent->rb_left); + interval = rbe; + } else if (d > 0) { + parent = rcu_dereference_raw(parent->rb_right); + } else { + if (!nft_set_elem_active(&rbe->ext, genmask)) + parent = rcu_dereference_raw(parent->rb_left); + + if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) || + (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) == + (flags & NFT_SET_ELEM_INTERVAL_END)) { + *elem = rbe; + return true; + } + return false; + } + } + + if (set->flags & NFT_SET_INTERVAL && interval != NULL && + nft_set_elem_active(&interval->ext, genmask) && + !nft_rbtree_interval_end(interval)) { + *elem = interval; + return true; + } + + return false; +} + +static void *nft_rbtree_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) +{ + struct nft_rbtree *priv = nft_set_priv(set); + unsigned int seq = read_seqcount_begin(&priv->count); + struct nft_rbtree_elem *rbe = ERR_PTR(-ENOENT); + const u32 *key = (const u32 *)&elem->key.val; + u8 genmask = nft_genmask_cur(net); + bool ret; + + ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask); + if (ret || !read_seqcount_retry(&priv->count, seq)) + return rbe; + + read_lock_bh(&priv->lock); + seq = read_seqcount_begin(&priv->count); + ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask); + if (!ret) + rbe = ERR_PTR(-ENOENT); + read_unlock_bh(&priv->lock); + + return rbe; +} + static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, struct nft_rbtree_elem *new, struct nft_set_ext **ext) @@ -336,6 +408,7 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = { .activate = nft_rbtree_activate, .lookup = nft_rbtree_lookup, .walk = nft_rbtree_walk, + .get = nft_rbtree_get, .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT, }; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index d8571f414208..a77dd514297c 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1153,6 +1153,7 @@ xt_replace_table(struct xt_table *table, int *error) { struct xt_table_info *private; + unsigned int cpu; int ret; ret = xt_jumpstack_alloc(newinfo); @@ -1182,14 +1183,28 @@ xt_replace_table(struct xt_table *table, smp_wmb(); table->private = newinfo; + /* make sure all cpus see new ->private value */ + smp_wmb(); + /* * Even though table entries have now been swapped, other CPU's - * may still be using the old entries. This is okay, because - * resynchronization happens because of the locking done - * during the get_counters() routine. + * may still be using the old entries... */ local_bh_enable(); + /* ... so wait for even xt_recseq on all cpus */ + for_each_possible_cpu(cpu) { + seqcount_t *s = &per_cpu(xt_recseq, cpu); + u32 seq = raw_read_seqcount(s); + + if (seq & 1) { + do { + cond_resched(); + cpu_relax(); + } while (seq == raw_read_seqcount(s)); + } + } + #ifdef CONFIG_AUDIT if (audit_enabled) { audit_log(current->audit_context, GFP_KERNEL, diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index ffa8eec980e9..a6214f235333 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -46,7 +46,6 @@ struct xt_connlimit_conn { struct hlist_node node; struct nf_conntrack_tuple tuple; - union nf_inet_addr addr; }; struct xt_connlimit_rb { @@ -72,16 +71,9 @@ static inline unsigned int connlimit_iphash(__be32 addr) } static inline unsigned int -connlimit_iphash6(const union nf_inet_addr *addr, - const union nf_inet_addr *mask) +connlimit_iphash6(const union nf_inet_addr *addr) { - union nf_inet_addr res; - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) - res.ip6[i] = addr->ip6[i] & mask->ip6[i]; - - return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6), + return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6), connlimit_rnd) % CONNLIMIT_SLOTS; } @@ -95,24 +87,13 @@ static inline bool already_closed(const struct nf_conn *conn) } static int -same_source_net(const union nf_inet_addr *addr, - const union nf_inet_addr *mask, - const union nf_inet_addr *u3, u_int8_t family) +same_source(const union nf_inet_addr *addr, + const union nf_inet_addr *u3, u_int8_t family) { - if (family == NFPROTO_IPV4) { - return ntohl(addr->ip & mask->ip) - - ntohl(u3->ip & mask->ip); - } else { - union nf_inet_addr lh, rh; - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) { - lh.ip6[i] = addr->ip6[i] & mask->ip6[i]; - rh.ip6[i] = u3->ip6[i] & mask->ip6[i]; - } + if (family == NFPROTO_IPV4) + return ntohl(addr->ip) - ntohl(u3->ip); - return memcmp(&lh.ip6, &rh.ip6, sizeof(lh.ip6)); - } + return memcmp(addr->ip6, u3->ip6, sizeof(addr->ip6)); } static bool add_hlist(struct hlist_head *head, @@ -125,7 +106,6 @@ static bool add_hlist(struct hlist_head *head, if (conn == NULL) return false; conn->tuple = *tuple; - conn->addr = *addr; hlist_add_head(&conn->node, head); return true; } @@ -196,7 +176,7 @@ static void tree_nodes_free(struct rb_root *root, static unsigned int count_tree(struct net *net, struct rb_root *root, const struct nf_conntrack_tuple *tuple, - const union nf_inet_addr *addr, const union nf_inet_addr *mask, + const union nf_inet_addr *addr, u8 family, const struct nf_conntrack_zone *zone) { struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES]; @@ -217,7 +197,7 @@ count_tree(struct net *net, struct rb_root *root, rbconn = rb_entry(*rbnode, struct xt_connlimit_rb, node); parent = *rbnode; - diff = same_source_net(addr, mask, &rbconn->addr, family); + diff = same_source(addr, &rbconn->addr, family); if (diff < 0) { rbnode = &((*rbnode)->rb_left); } else if (diff > 0) { @@ -270,7 +250,6 @@ count_tree(struct net *net, struct rb_root *root, } conn->tuple = *tuple; - conn->addr = *addr; rbconn->addr = *addr; INIT_HLIST_HEAD(&rbconn->hhead); @@ -285,7 +264,6 @@ static int count_them(struct net *net, struct xt_connlimit_data *data, const struct nf_conntrack_tuple *tuple, const union nf_inet_addr *addr, - const union nf_inet_addr *mask, u_int8_t family, const struct nf_conntrack_zone *zone) { @@ -294,14 +272,14 @@ static int count_them(struct net *net, u32 hash; if (family == NFPROTO_IPV6) - hash = connlimit_iphash6(addr, mask); + hash = connlimit_iphash6(addr); else - hash = connlimit_iphash(addr->ip & mask->ip); + hash = connlimit_iphash(addr->ip); root = &data->climit_root[hash]; spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]); - count = count_tree(net, root, tuple, addr, mask, family, zone); + count = count_tree(net, root, tuple, addr, family, zone); spin_unlock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]); @@ -332,16 +310,23 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) if (xt_family(par) == NFPROTO_IPV6) { const struct ipv6hdr *iph = ipv6_hdr(skb); + unsigned int i; + memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ? &iph->daddr : &iph->saddr, sizeof(addr.ip6)); + + for (i = 0; i < ARRAY_SIZE(addr.ip6); ++i) + addr.ip6[i] &= info->mask.ip6[i]; } else { const struct iphdr *iph = ip_hdr(skb); addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ? iph->daddr : iph->saddr; + + addr.ip &= info->mask.ip; } connections = count_them(net, info->data, tuple_ptr, &addr, - &info->mask, xt_family(par), zone); + xt_family(par), zone); if (connections == 0) /* kmalloc failed, drop it entirely */ goto hotdrop; diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c index 58fb827439a8..d7da99a0b0b8 100644 --- a/net/nsh/nsh.c +++ b/net/nsh/nsh.c @@ -14,6 +14,66 @@ #include <net/nsh.h> #include <net/tun_proto.h> +int nsh_push(struct sk_buff *skb, const struct nshhdr *pushed_nh) +{ + struct nshhdr *nh; + size_t length = nsh_hdr_len(pushed_nh); + u8 next_proto; + + if (skb->mac_len) { + next_proto = TUN_P_ETHERNET; + } else { + next_proto = tun_p_from_eth_p(skb->protocol); + if (!next_proto) + return -EAFNOSUPPORT; + } + + /* Add the NSH header */ + if (skb_cow_head(skb, length) < 0) + return -ENOMEM; + + skb_push(skb, length); + nh = (struct nshhdr *)(skb->data); + memcpy(nh, pushed_nh, length); + nh->np = next_proto; + skb_postpush_rcsum(skb, nh, length); + + skb->protocol = htons(ETH_P_NSH); + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_mac_len(skb); + + return 0; +} +EXPORT_SYMBOL_GPL(nsh_push); + +int nsh_pop(struct sk_buff *skb) +{ + struct nshhdr *nh; + size_t length; + __be16 inner_proto; + + if (!pskb_may_pull(skb, NSH_BASE_HDR_LEN)) + return -ENOMEM; + nh = (struct nshhdr *)(skb->data); + length = nsh_hdr_len(nh); + inner_proto = tun_p_to_eth_p(nh->np); + if (!pskb_may_pull(skb, length)) + return -ENOMEM; + + if (!inner_proto) + return -EAFNOSUPPORT; + + skb_pull_rcsum(skb, length); + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_mac_len(skb); + skb->protocol = inner_proto; + + return 0; +} +EXPORT_SYMBOL_GPL(nsh_pop); + static struct sk_buff *nsh_gso_segment(struct sk_buff *skb, netdev_features_t features) { diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index ce947292ae77..2650205cdaf9 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -14,6 +14,7 @@ config OPENVSWITCH select MPLS select NET_MPLS_GSO select DST_CACHE + select NET_NSH ---help--- Open vSwitch is a multilayer Ethernet switch targeted at virtualized environments. In addition to supporting a variety of features diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index a551232daf61..9a6a6d51e421 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -43,6 +43,7 @@ #include "flow.h" #include "conntrack.h" #include "vport.h" +#include "flow_netlink.h" struct deferred_action { struct sk_buff *skb; @@ -380,6 +381,38 @@ static int push_eth(struct sk_buff *skb, struct sw_flow_key *key, return 0; } +static int push_nsh(struct sk_buff *skb, struct sw_flow_key *key, + const struct nshhdr *nh) +{ + int err; + + err = nsh_push(skb, nh); + if (err) + return err; + + /* safe right before invalidate_flow_key */ + key->mac_proto = MAC_PROTO_NONE; + invalidate_flow_key(key); + return 0; +} + +static int pop_nsh(struct sk_buff *skb, struct sw_flow_key *key) +{ + int err; + + err = nsh_pop(skb); + if (err) + return err; + + /* safe right before invalidate_flow_key */ + if (skb->protocol == htons(ETH_P_TEB)) + key->mac_proto = MAC_PROTO_ETHERNET; + else + key->mac_proto = MAC_PROTO_NONE; + invalidate_flow_key(key); + return 0; +} + static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh, __be32 addr, __be32 new_addr) { @@ -602,6 +635,69 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, return 0; } +static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key, + const struct nlattr *a) +{ + struct nshhdr *nh; + size_t length; + int err; + u8 flags; + u8 ttl; + int i; + + struct ovs_key_nsh key; + struct ovs_key_nsh mask; + + err = nsh_key_from_nlattr(a, &key, &mask); + if (err) + return err; + + /* Make sure the NSH base header is there */ + if (!pskb_may_pull(skb, skb_network_offset(skb) + NSH_BASE_HDR_LEN)) + return -ENOMEM; + + nh = nsh_hdr(skb); + length = nsh_hdr_len(nh); + + /* Make sure the whole NSH header is there */ + err = skb_ensure_writable(skb, skb_network_offset(skb) + + length); + if (unlikely(err)) + return err; + + nh = nsh_hdr(skb); + skb_postpull_rcsum(skb, nh, length); + flags = nsh_get_flags(nh); + flags = OVS_MASKED(flags, key.base.flags, mask.base.flags); + flow_key->nsh.base.flags = flags; + ttl = nsh_get_ttl(nh); + ttl = OVS_MASKED(ttl, key.base.ttl, mask.base.ttl); + flow_key->nsh.base.ttl = ttl; + nsh_set_flags_and_ttl(nh, flags, ttl); + nh->path_hdr = OVS_MASKED(nh->path_hdr, key.base.path_hdr, + mask.base.path_hdr); + flow_key->nsh.base.path_hdr = nh->path_hdr; + switch (nh->mdtype) { + case NSH_M_TYPE1: + for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++) { + nh->md1.context[i] = + OVS_MASKED(nh->md1.context[i], key.context[i], + mask.context[i]); + } + memcpy(flow_key->nsh.context, nh->md1.context, + sizeof(nh->md1.context)); + break; + case NSH_M_TYPE2: + memset(flow_key->nsh.context, 0, + sizeof(flow_key->nsh.context)); + break; + default: + return -EINVAL; + } + skb_postpush_rcsum(skb, nh, length); + return 0; +} + /* Must follow skb_ensure_writable() since that can move the skb data. */ static void set_tp_port(struct sk_buff *skb, __be16 *port, __be16 new_port, __sum16 *check) @@ -1024,6 +1120,10 @@ static int execute_masked_set_action(struct sk_buff *skb, get_mask(a, struct ovs_key_ethernet *)); break; + case OVS_KEY_ATTR_NSH: + err = set_nsh(skb, flow_key, a); + break; + case OVS_KEY_ATTR_IPV4: err = set_ipv4(skb, flow_key, nla_data(a), get_mask(a, struct ovs_key_ipv4 *)); @@ -1214,6 +1314,22 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, case OVS_ACTION_ATTR_POP_ETH: err = pop_eth(skb, key); break; + + case OVS_ACTION_ATTR_PUSH_NSH: { + u8 buffer[NSH_HDR_MAX_LEN]; + struct nshhdr *nh = (struct nshhdr *)buffer; + + err = nsh_hdr_from_nlattr(nla_data(a), nh, + NSH_HDR_MAX_LEN); + if (unlikely(err)) + break; + err = push_nsh(skb, key, nh); + break; + } + + case OVS_ACTION_ATTR_POP_NSH: + err = pop_nsh(skb, key); + break; } if (unlikely(err)) { diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 8c94cef25a72..864ddb1e3642 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -46,6 +46,7 @@ #include <net/ipv6.h> #include <net/mpls.h> #include <net/ndisc.h> +#include <net/nsh.h> #include "conntrack.h" #include "datapath.h" @@ -490,6 +491,52 @@ invalid: return 0; } +static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key) +{ + struct nshhdr *nh; + unsigned int nh_ofs = skb_network_offset(skb); + u8 version, length; + int err; + + err = check_header(skb, nh_ofs + NSH_BASE_HDR_LEN); + if (unlikely(err)) + return err; + + nh = nsh_hdr(skb); + version = nsh_get_ver(nh); + length = nsh_hdr_len(nh); + + if (version != 0) + return -EINVAL; + + err = check_header(skb, nh_ofs + length); + if (unlikely(err)) + return err; + + nh = nsh_hdr(skb); + key->nsh.base.flags = nsh_get_flags(nh); + key->nsh.base.ttl = nsh_get_ttl(nh); + key->nsh.base.mdtype = nh->mdtype; + key->nsh.base.np = nh->np; + key->nsh.base.path_hdr = nh->path_hdr; + switch (key->nsh.base.mdtype) { + case NSH_M_TYPE1: + if (length != NSH_M_TYPE1_LEN) + return -EINVAL; + memcpy(key->nsh.context, nh->md1.context, + sizeof(nh->md1)); + break; + case NSH_M_TYPE2: + memset(key->nsh.context, 0, + sizeof(nh->md1)); + break; + default: + return -EINVAL; + } + + return 0; +} + /** * key_extract - extracts a flow key from an Ethernet frame. * @skb: sk_buff that contains the frame, with skb->data pointing to the @@ -735,6 +782,10 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) memset(&key->tp, 0, sizeof(key->tp)); } } + } else if (key->eth.type == htons(ETH_P_NSH)) { + error = parse_nsh(skb, key); + if (error) + return error; } return 0; } diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 1875bba4f865..c670dd24b8b7 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -35,6 +35,7 @@ #include <net/inet_ecn.h> #include <net/ip_tunnels.h> #include <net/dst_metadata.h> +#include <net/nsh.h> struct sk_buff; @@ -66,6 +67,11 @@ struct vlan_head { (offsetof(struct sw_flow_key, recirc_id) + \ FIELD_SIZEOF(struct sw_flow_key, recirc_id)) +struct ovs_key_nsh { + struct ovs_nsh_key_base base; + __be32 context[NSH_MD1_CONTEXT_SIZE]; +}; + struct sw_flow_key { u8 tun_opts[IP_TUNNEL_OPTS_MAX]; u8 tun_opts_len; @@ -143,6 +149,7 @@ struct sw_flow_key { } nd; }; } ipv6; + struct ovs_key_nsh nsh; /* network service header */ }; struct { /* Connection tracking fields not packed above. */ diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index dc0d79092e74..4201f9293af3 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -48,6 +48,7 @@ #include <net/ndisc.h> #include <net/mpls.h> #include <net/vxlan.h> +#include <net/tun_proto.h> #include <net/erspan.h> #include "flow_netlink.h" @@ -80,9 +81,11 @@ static bool actions_may_change_flow(const struct nlattr *actions) case OVS_ACTION_ATTR_HASH: case OVS_ACTION_ATTR_POP_ETH: case OVS_ACTION_ATTR_POP_MPLS: + case OVS_ACTION_ATTR_POP_NSH: case OVS_ACTION_ATTR_POP_VLAN: case OVS_ACTION_ATTR_PUSH_ETH: case OVS_ACTION_ATTR_PUSH_MPLS: + case OVS_ACTION_ATTR_PUSH_NSH: case OVS_ACTION_ATTR_PUSH_VLAN: case OVS_ACTION_ATTR_SAMPLE: case OVS_ACTION_ATTR_SET: @@ -175,7 +178,8 @@ static bool match_validate(const struct sw_flow_match *match, | (1 << OVS_KEY_ATTR_ICMPV6) | (1 << OVS_KEY_ATTR_ARP) | (1 << OVS_KEY_ATTR_ND) - | (1 << OVS_KEY_ATTR_MPLS)); + | (1 << OVS_KEY_ATTR_MPLS) + | (1 << OVS_KEY_ATTR_NSH)); /* Always allowed mask fields. */ mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) @@ -284,6 +288,14 @@ static bool match_validate(const struct sw_flow_match *match, } } + if (match->key->eth.type == htons(ETH_P_NSH)) { + key_expected |= 1 << OVS_KEY_ATTR_NSH; + if (match->mask && + match->mask->key.eth.type == htons(0xffff)) { + mask_allowed |= 1 << OVS_KEY_ATTR_NSH; + } + } + if ((key_attrs & key_expected) != key_expected) { /* Key attributes check failed. */ OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)", @@ -325,12 +337,25 @@ size_t ovs_tun_key_attr_size(void) + nla_total_size(4); /* OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS */ } +size_t ovs_nsh_key_attr_size(void) +{ + /* Whenever adding new OVS_NSH_KEY_ FIELDS, we should consider + * updating this function. + */ + return nla_total_size(NSH_BASE_HDR_LEN) /* OVS_NSH_KEY_ATTR_BASE */ + /* OVS_NSH_KEY_ATTR_MD1 and OVS_NSH_KEY_ATTR_MD2 are + * mutually exclusive, so the bigger one can cover + * the small one. + */ + + nla_total_size(NSH_CTX_HDRS_MAX_LEN); +} + size_t ovs_key_attr_size(void) { /* Whenever adding new OVS_KEY_ FIELDS, we should consider * updating this function. */ - BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 28); + BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 29); return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ @@ -344,6 +369,8 @@ size_t ovs_key_attr_size(void) + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */ + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */ + nla_total_size(40) /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */ + + nla_total_size(0) /* OVS_KEY_ATTR_NSH */ + + ovs_nsh_key_attr_size() + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ @@ -377,6 +404,13 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = sizeof(u32) }, }; +static const struct ovs_len_tbl +ovs_nsh_key_attr_lens[OVS_NSH_KEY_ATTR_MAX + 1] = { + [OVS_NSH_KEY_ATTR_BASE] = { .len = sizeof(struct ovs_nsh_key_base) }, + [OVS_NSH_KEY_ATTR_MD1] = { .len = sizeof(struct ovs_nsh_key_md1) }, + [OVS_NSH_KEY_ATTR_MD2] = { .len = OVS_ATTR_VARIABLE }, +}; + /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_ENCAP] = { .len = OVS_ATTR_NESTED }, @@ -409,6 +443,8 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { .len = sizeof(struct ovs_key_ct_tuple_ipv4) }, [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = { .len = sizeof(struct ovs_key_ct_tuple_ipv6) }, + [OVS_KEY_ATTR_NSH] = { .len = OVS_ATTR_NESTED, + .next = ovs_nsh_key_attr_lens, }, }; static bool check_attr_len(unsigned int attr_len, unsigned int expected_len) @@ -1227,6 +1263,221 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, return 0; } +int nsh_hdr_from_nlattr(const struct nlattr *attr, + struct nshhdr *nh, size_t size) +{ + struct nlattr *a; + int rem; + u8 flags = 0; + u8 ttl = 0; + int mdlen = 0; + + /* validate_nsh has check this, so we needn't do duplicate check here + */ + if (size < NSH_BASE_HDR_LEN) + return -ENOBUFS; + + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + + switch (type) { + case OVS_NSH_KEY_ATTR_BASE: { + const struct ovs_nsh_key_base *base = nla_data(a); + + flags = base->flags; + ttl = base->ttl; + nh->np = base->np; + nh->mdtype = base->mdtype; + nh->path_hdr = base->path_hdr; + break; + } + case OVS_NSH_KEY_ATTR_MD1: + mdlen = nla_len(a); + if (mdlen > size - NSH_BASE_HDR_LEN) + return -ENOBUFS; + memcpy(&nh->md1, nla_data(a), mdlen); + break; + + case OVS_NSH_KEY_ATTR_MD2: + mdlen = nla_len(a); + if (mdlen > size - NSH_BASE_HDR_LEN) + return -ENOBUFS; + memcpy(&nh->md2, nla_data(a), mdlen); + break; + + default: + return -EINVAL; + } + } + + /* nsh header length = NSH_BASE_HDR_LEN + mdlen */ + nh->ver_flags_ttl_len = 0; + nsh_set_flags_ttl_len(nh, flags, ttl, NSH_BASE_HDR_LEN + mdlen); + + return 0; +} + +int nsh_key_from_nlattr(const struct nlattr *attr, + struct ovs_key_nsh *nsh, struct ovs_key_nsh *nsh_mask) +{ + struct nlattr *a; + int rem; + + /* validate_nsh has check this, so we needn't do duplicate check here + */ + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + + switch (type) { + case OVS_NSH_KEY_ATTR_BASE: { + const struct ovs_nsh_key_base *base = nla_data(a); + const struct ovs_nsh_key_base *base_mask = base + 1; + + nsh->base = *base; + nsh_mask->base = *base_mask; + break; + } + case OVS_NSH_KEY_ATTR_MD1: { + const struct ovs_nsh_key_md1 *md1 = nla_data(a); + const struct ovs_nsh_key_md1 *md1_mask = md1 + 1; + + memcpy(nsh->context, md1->context, sizeof(*md1)); + memcpy(nsh_mask->context, md1_mask->context, + sizeof(*md1_mask)); + break; + } + case OVS_NSH_KEY_ATTR_MD2: + /* Not supported yet */ + return -ENOTSUPP; + default: + return -EINVAL; + } + } + + return 0; +} + +static int nsh_key_put_from_nlattr(const struct nlattr *attr, + struct sw_flow_match *match, bool is_mask, + bool is_push_nsh, bool log) +{ + struct nlattr *a; + int rem; + bool has_base = false; + bool has_md1 = false; + bool has_md2 = false; + u8 mdtype = 0; + int mdlen = 0; + + if (WARN_ON(is_push_nsh && is_mask)) + return -EINVAL; + + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + int i; + + if (type > OVS_NSH_KEY_ATTR_MAX) { + OVS_NLERR(log, "nsh attr %d is out of range max %d", + type, OVS_NSH_KEY_ATTR_MAX); + return -EINVAL; + } + + if (!check_attr_len(nla_len(a), + ovs_nsh_key_attr_lens[type].len)) { + OVS_NLERR( + log, + "nsh attr %d has unexpected len %d expected %d", + type, + nla_len(a), + ovs_nsh_key_attr_lens[type].len + ); + return -EINVAL; + } + + switch (type) { + case OVS_NSH_KEY_ATTR_BASE: { + const struct ovs_nsh_key_base *base = nla_data(a); + + has_base = true; + mdtype = base->mdtype; + SW_FLOW_KEY_PUT(match, nsh.base.flags, + base->flags, is_mask); + SW_FLOW_KEY_PUT(match, nsh.base.ttl, + base->ttl, is_mask); + SW_FLOW_KEY_PUT(match, nsh.base.mdtype, + base->mdtype, is_mask); + SW_FLOW_KEY_PUT(match, nsh.base.np, + base->np, is_mask); + SW_FLOW_KEY_PUT(match, nsh.base.path_hdr, + base->path_hdr, is_mask); + break; + } + case OVS_NSH_KEY_ATTR_MD1: { + const struct ovs_nsh_key_md1 *md1 = nla_data(a); + + has_md1 = true; + for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++) + SW_FLOW_KEY_PUT(match, nsh.context[i], + md1->context[i], is_mask); + break; + } + case OVS_NSH_KEY_ATTR_MD2: + if (!is_push_nsh) /* Not supported MD type 2 yet */ + return -ENOTSUPP; + + has_md2 = true; + mdlen = nla_len(a); + if (mdlen > NSH_CTX_HDRS_MAX_LEN || mdlen <= 0) { + OVS_NLERR( + log, + "Invalid MD length %d for MD type %d", + mdlen, + mdtype + ); + return -EINVAL; + } + break; + default: + OVS_NLERR(log, "Unknown nsh attribute %d", + type); + return -EINVAL; + } + } + + if (rem > 0) { + OVS_NLERR(log, "nsh attribute has %d unknown bytes.", rem); + return -EINVAL; + } + + if (has_md1 && has_md2) { + OVS_NLERR( + 1, + "invalid nsh attribute: md1 and md2 are exclusive." + ); + return -EINVAL; + } + + if (!is_mask) { + if ((has_md1 && mdtype != NSH_M_TYPE1) || + (has_md2 && mdtype != NSH_M_TYPE2)) { + OVS_NLERR(1, "nsh attribute has unmatched MD type %d.", + mdtype); + return -EINVAL; + } + + if (is_push_nsh && + (!has_base || (!has_md1 && !has_md2))) { + OVS_NLERR( + 1, + "push_nsh: missing base or metadata attributes" + ); + return -EINVAL; + } + } + + return 0; +} + static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, u64 attrs, const struct nlattr **a, bool is_mask, bool log) @@ -1354,6 +1605,13 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, attrs &= ~(1 << OVS_KEY_ATTR_ARP); } + if (attrs & (1 << OVS_KEY_ATTR_NSH)) { + if (nsh_key_put_from_nlattr(a[OVS_KEY_ATTR_NSH], match, + is_mask, false, log) < 0) + return -EINVAL; + attrs &= ~(1 << OVS_KEY_ATTR_NSH); + } + if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { const struct ovs_key_mpls *mpls_key; @@ -1670,6 +1928,34 @@ static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh, return 0; } +static int nsh_key_to_nlattr(const struct ovs_key_nsh *nsh, bool is_mask, + struct sk_buff *skb) +{ + struct nlattr *start; + + start = nla_nest_start(skb, OVS_KEY_ATTR_NSH); + if (!start) + return -EMSGSIZE; + + if (nla_put(skb, OVS_NSH_KEY_ATTR_BASE, sizeof(nsh->base), &nsh->base)) + goto nla_put_failure; + + if (is_mask || nsh->base.mdtype == NSH_M_TYPE1) { + if (nla_put(skb, OVS_NSH_KEY_ATTR_MD1, + sizeof(nsh->context), nsh->context)) + goto nla_put_failure; + } + + /* Don't support MD type 2 yet */ + + nla_nest_end(skb, start); + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + static int __ovs_nla_put_key(const struct sw_flow_key *swkey, const struct sw_flow_key *output, bool is_mask, struct sk_buff *skb) @@ -1798,6 +2084,9 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, ipv6_key->ipv6_tclass = output->ip.tos; ipv6_key->ipv6_hlimit = output->ip.ttl; ipv6_key->ipv6_frag = output->ip.frag; + } else if (swkey->eth.type == htons(ETH_P_NSH)) { + if (nsh_key_to_nlattr(&output->nsh, is_mask, skb)) + goto nla_put_failure; } else if (swkey->eth.type == htons(ETH_P_ARP) || swkey->eth.type == htons(ETH_P_RARP)) { struct ovs_key_arp *arp_key; @@ -2292,6 +2581,19 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, return err; } +static bool validate_nsh(const struct nlattr *attr, bool is_mask, + bool is_push_nsh, bool log) +{ + struct sw_flow_match match; + struct sw_flow_key key; + int ret = 0; + + ovs_match_init(&match, &key, true, NULL); + ret = nsh_key_put_from_nlattr(attr, &match, is_mask, + is_push_nsh, log); + return !ret; +} + /* Return false if there are any non-masked bits set. * Mask follows data immediately, before any netlink padding. */ @@ -2434,6 +2736,13 @@ static int validate_set(const struct nlattr *a, break; + case OVS_KEY_ATTR_NSH: + if (eth_type != htons(ETH_P_NSH)) + return -EINVAL; + if (!validate_nsh(nla_data(a), masked, false, log)) + return -EINVAL; + break; + default: return -EINVAL; } @@ -2533,6 +2842,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc), [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth), [OVS_ACTION_ATTR_POP_ETH] = 0, + [OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1, + [OVS_ACTION_ATTR_POP_NSH] = 0, }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); @@ -2690,6 +3001,34 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, mac_proto = MAC_PROTO_ETHERNET; break; + case OVS_ACTION_ATTR_PUSH_NSH: + if (mac_proto != MAC_PROTO_ETHERNET) { + u8 next_proto; + + next_proto = tun_p_from_eth_p(eth_type); + if (!next_proto) + return -EINVAL; + } + mac_proto = MAC_PROTO_NONE; + if (!validate_nsh(nla_data(a), false, true, true)) + return -EINVAL; + break; + + case OVS_ACTION_ATTR_POP_NSH: { + __be16 inner_proto; + + if (eth_type != htons(ETH_P_NSH)) + return -EINVAL; + inner_proto = tun_p_to_eth_p(key->nsh.base.np); + if (!inner_proto) + return -EINVAL; + if (key->nsh.base.np == TUN_P_ETHERNET) + mac_proto = MAC_PROTO_ETHERNET; + else + mac_proto = MAC_PROTO_NONE; + break; + } + default: OVS_NLERR(log, "Unknown Action type %d", type); return -EINVAL; diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 929c665ac3aa..6657606b2b47 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -79,4 +79,9 @@ int ovs_nla_put_actions(const struct nlattr *attr, void ovs_nla_free_flow_actions(struct sw_flow_actions *); void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *); +int nsh_key_from_nlattr(const struct nlattr *attr, struct ovs_key_nsh *nsh, + struct ovs_key_nsh *nsh_mask); +int nsh_hdr_from_nlattr(const struct nlattr *attr, struct nshhdr *nh, + size_t size); + #endif /* flow_netlink.h */ diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index e458ece96d3d..77ab05e23001 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -1120,7 +1120,7 @@ static int __init qrtr_proto_init(void) return 0; } -module_init(qrtr_proto_init); +postcore_initcall(qrtr_proto_init); static void __exit qrtr_proto_fini(void) { diff --git a/net/sched/act_api.c b/net/sched/act_api.c index ff4d69082376..4d33a50a8a6d 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -80,7 +80,6 @@ static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p) spin_lock_bh(&idrinfo->lock); idr_remove_ext(&idrinfo->action_idr, p->tcfa_index); spin_unlock_bh(&idrinfo->lock); - put_net(idrinfo->net); gen_kill_estimator(&p->tcfa_rate_est); free_tcf(p); } @@ -339,7 +338,6 @@ err3: p->idrinfo = idrinfo; p->ops = ops; INIT_LIST_HEAD(&p->list); - get_net(idrinfo->net); *a = p; return 0; } diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 035ed268290b..5ef8ce8c83d4 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -398,7 +398,7 @@ static __net_init int bpf_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, bpf_net_id); - return tc_action_net_init(tn, &act_bpf_ops, net); + return tc_action_net_init(tn, &act_bpf_ops); } static void __net_exit bpf_exit_net(struct net *net) diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 34e52d01a5dd..10b7a8855a6c 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -206,7 +206,7 @@ static __net_init int connmark_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, connmark_net_id); - return tc_action_net_init(tn, &act_connmark_ops, net); + return tc_action_net_init(tn, &act_connmark_ops); } static void __net_exit connmark_exit_net(struct net *net) diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 35171df2ebef..1c40caadcff9 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -626,7 +626,7 @@ static __net_init int csum_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, csum_net_id); - return tc_action_net_init(tn, &act_csum_ops, net); + return tc_action_net_init(tn, &act_csum_ops); } static void __net_exit csum_exit_net(struct net *net) diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index ef7f7f39d26d..e29a48ef7fc3 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -232,7 +232,7 @@ static __net_init int gact_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, gact_net_id); - return tc_action_net_init(tn, &act_gact_ops, net); + return tc_action_net_init(tn, &act_gact_ops); } static void __net_exit gact_exit_net(struct net *net) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index eca272533418..3007cb1310ea 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -855,7 +855,7 @@ static __net_init int ife_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, ife_net_id); - return tc_action_net_init(tn, &act_ife_ops, net); + return tc_action_net_init(tn, &act_ife_ops); } static void __net_exit ife_exit_net(struct net *net) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index dbdf3b2470d5..d9e399a7e3d5 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -334,7 +334,7 @@ static __net_init int ipt_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, ipt_net_id); - return tc_action_net_init(tn, &act_ipt_ops, net); + return tc_action_net_init(tn, &act_ipt_ops); } static void __net_exit ipt_exit_net(struct net *net) @@ -384,7 +384,7 @@ static __net_init int xt_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, xt_net_id); - return tc_action_net_init(tn, &act_xt_ops, net); + return tc_action_net_init(tn, &act_xt_ops); } static void __net_exit xt_exit_net(struct net *net) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 8df5775bbf22..8b3e59388480 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -340,7 +340,7 @@ static __net_init int mirred_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, mirred_net_id); - return tc_action_net_init(tn, &act_mirred_ops, net); + return tc_action_net_init(tn, &act_mirred_ops); } static void __net_exit mirred_exit_net(struct net *net) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 7eeaaf9217b6..c365d01b99c8 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -307,7 +307,7 @@ static __net_init int nat_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, nat_net_id); - return tc_action_net_init(tn, &act_nat_ops, net); + return tc_action_net_init(tn, &act_nat_ops); } static void __net_exit nat_exit_net(struct net *net) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index b3d82c334a5f..491fe5deb09e 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -450,7 +450,7 @@ static __net_init int pedit_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, pedit_net_id); - return tc_action_net_init(tn, &act_pedit_ops, net); + return tc_action_net_init(tn, &act_pedit_ops); } static void __net_exit pedit_exit_net(struct net *net) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 9ec42b26e4b9..3bb2ebf9e9ae 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -331,7 +331,7 @@ static __net_init int police_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, police_net_id); - return tc_action_net_init(tn, &act_police_ops, net); + return tc_action_net_init(tn, &act_police_ops); } static void __net_exit police_exit_net(struct net *net) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index e69a1e3a39bf..8b5abcd2f32f 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -240,7 +240,7 @@ static __net_init int sample_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, sample_net_id); - return tc_action_net_init(tn, &act_sample_ops, net); + return tc_action_net_init(tn, &act_sample_ops); } static void __net_exit sample_exit_net(struct net *net) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index a8d0ea95f894..e7b57e5071a3 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -201,7 +201,7 @@ static __net_init int simp_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, simp_net_id); - return tc_action_net_init(tn, &act_simp_ops, net); + return tc_action_net_init(tn, &act_simp_ops); } static void __net_exit simp_exit_net(struct net *net) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index fbac62472e09..59949d61f20d 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -238,7 +238,7 @@ static __net_init int skbedit_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); - return tc_action_net_init(tn, &act_skbedit_ops, net); + return tc_action_net_init(tn, &act_skbedit_ops); } static void __net_exit skbedit_exit_net(struct net *net) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 8e12d8897d2f..b642ad3d39dd 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -263,7 +263,7 @@ static __net_init int skbmod_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); - return tc_action_net_init(tn, &act_skbmod_ops, net); + return tc_action_net_init(tn, &act_skbmod_ops); } static void __net_exit skbmod_exit_net(struct net *net) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index c33faa373cf2..30c96274c638 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -322,7 +322,7 @@ static __net_init int tunnel_key_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); - return tc_action_net_init(tn, &act_tunnel_key_ops, net); + return tc_action_net_init(tn, &act_tunnel_key_ops); } static void __net_exit tunnel_key_exit_net(struct net *net) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 115fc33cc6d8..97f717a13ad5 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -26,14 +26,13 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_vlan *v = to_vlan(a); + struct tcf_vlan_params *p; int action; int err; u16 tci; - spin_lock(&v->tcf_lock); tcf_lastuse_update(&v->tcf_tm); - bstats_update(&v->tcf_bstats, skb); - action = v->tcf_action; + bstats_cpu_update(this_cpu_ptr(v->common.cpu_bstats), skb); /* Ensure 'data' points at mac_header prior calling vlan manipulating * functions. @@ -41,15 +40,21 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, if (skb_at_tc_ingress(skb)) skb_push_rcsum(skb, skb->mac_len); - switch (v->tcfv_action) { + rcu_read_lock(); + + action = READ_ONCE(v->tcf_action); + + p = rcu_dereference(v->vlan_p); + + switch (p->tcfv_action) { case TCA_VLAN_ACT_POP: err = skb_vlan_pop(skb); if (err) goto drop; break; case TCA_VLAN_ACT_PUSH: - err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid | - (v->tcfv_push_prio << VLAN_PRIO_SHIFT)); + err = skb_vlan_push(skb, p->tcfv_push_proto, p->tcfv_push_vid | + (p->tcfv_push_prio << VLAN_PRIO_SHIFT)); if (err) goto drop; break; @@ -68,14 +73,14 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, goto drop; } /* replace the vid */ - tci = (tci & ~VLAN_VID_MASK) | v->tcfv_push_vid; + tci = (tci & ~VLAN_VID_MASK) | p->tcfv_push_vid; /* replace prio bits, if tcfv_push_prio specified */ - if (v->tcfv_push_prio) { + if (p->tcfv_push_prio) { tci &= ~VLAN_PRIO_MASK; - tci |= v->tcfv_push_prio << VLAN_PRIO_SHIFT; + tci |= p->tcfv_push_prio << VLAN_PRIO_SHIFT; } /* put updated tci as hwaccel tag */ - __vlan_hwaccel_put_tag(skb, v->tcfv_push_proto, tci); + __vlan_hwaccel_put_tag(skb, p->tcfv_push_proto, tci); break; default: BUG(); @@ -85,12 +90,13 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, drop: action = TC_ACT_SHOT; - v->tcf_qstats.drops++; + qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats)); + unlock: + rcu_read_unlock(); if (skb_at_tc_ingress(skb)) skb_pull_rcsum(skb, skb->mac_len); - spin_unlock(&v->tcf_lock); return action; } @@ -107,6 +113,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, { struct tc_action_net *tn = net_generic(net, vlan_net_id); struct nlattr *tb[TCA_VLAN_MAX + 1]; + struct tcf_vlan_params *p, *p_old; struct tc_vlan *parm; struct tcf_vlan *v; int action; @@ -172,7 +179,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, if (!exists) { ret = tcf_idr_create(tn, parm->index, est, a, - &act_vlan_ops, bind, false); + &act_vlan_ops, bind, true); if (ret) return ret; @@ -185,46 +192,67 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, v = to_vlan(*a); - spin_lock_bh(&v->tcf_lock); - - v->tcfv_action = action; - v->tcfv_push_vid = push_vid; - v->tcfv_push_prio = push_prio; - v->tcfv_push_proto = push_proto; + ASSERT_RTNL(); + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) { + if (ovr) + tcf_idr_release(*a, bind); + return -ENOMEM; + } v->tcf_action = parm->action; - spin_unlock_bh(&v->tcf_lock); + p_old = rtnl_dereference(v->vlan_p); + + p->tcfv_action = action; + p->tcfv_push_vid = push_vid; + p->tcfv_push_prio = push_prio; + p->tcfv_push_proto = push_proto; + + rcu_assign_pointer(v->vlan_p, p); + + if (p_old) + kfree_rcu(p_old, rcu); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; } +static void tcf_vlan_cleanup(struct tc_action *a, int bind) +{ + struct tcf_vlan *v = to_vlan(a); + struct tcf_vlan_params *p; + + p = rcu_dereference_protected(v->vlan_p, 1); + kfree_rcu(p, rcu); +} + static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_vlan *v = to_vlan(a); + struct tcf_vlan_params *p = rtnl_dereference(v->vlan_p); struct tc_vlan opt = { .index = v->tcf_index, .refcnt = v->tcf_refcnt - ref, .bindcnt = v->tcf_bindcnt - bind, .action = v->tcf_action, - .v_action = v->tcfv_action, + .v_action = p->tcfv_action, }; struct tcf_t t; if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt)) goto nla_put_failure; - if ((v->tcfv_action == TCA_VLAN_ACT_PUSH || - v->tcfv_action == TCA_VLAN_ACT_MODIFY) && - (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) || + if ((p->tcfv_action == TCA_VLAN_ACT_PUSH || + p->tcfv_action == TCA_VLAN_ACT_MODIFY) && + (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, p->tcfv_push_vid) || nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL, - v->tcfv_push_proto) || + p->tcfv_push_proto) || (nla_put_u8(skb, TCA_VLAN_PUSH_VLAN_PRIORITY, - v->tcfv_push_prio)))) + p->tcfv_push_prio)))) goto nla_put_failure; tcf_tm_dump(&t, &v->tcf_tm); @@ -260,6 +288,7 @@ static struct tc_action_ops act_vlan_ops = { .act = tcf_vlan, .dump = tcf_vlan_dump, .init = tcf_vlan_init, + .cleanup = tcf_vlan_cleanup, .walk = tcf_vlan_walker, .lookup = tcf_vlan_search, .size = sizeof(struct tcf_vlan), @@ -269,7 +298,7 @@ static __net_init int vlan_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, vlan_net_id); - return tc_action_net_init(tn, &act_vlan_ops, net); + return tc_action_net_init(tn, &act_vlan_ops); } static void __net_exit vlan_exit_net(struct net *net) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 206e19f4fc01..ab255b421781 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1110,6 +1110,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, exts->actions[i++] = act; exts->nr_actions = i; } + exts->net = net; } #else if ((exts->action && tb[exts->action]) || diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 871351358c10..5f169ded347e 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -87,16 +87,21 @@ static int basic_init(struct tcf_proto *tp) return 0; } +static void __basic_delete_filter(struct basic_filter *f) +{ + tcf_exts_destroy(&f->exts); + tcf_em_tree_destroy(&f->ematches); + tcf_exts_put_net(&f->exts); + kfree(f); +} + static void basic_delete_filter_work(struct work_struct *work) { struct basic_filter *f = container_of(work, struct basic_filter, work); rtnl_lock(); - tcf_exts_destroy(&f->exts); - tcf_em_tree_destroy(&f->ematches); + __basic_delete_filter(f); rtnl_unlock(); - - kfree(f); } static void basic_delete_filter(struct rcu_head *head) @@ -116,7 +121,10 @@ static void basic_destroy(struct tcf_proto *tp) list_del_rcu(&f->link); tcf_unbind_filter(tp, &f->res); idr_remove_ext(&head->handle_idr, f->handle); - call_rcu(&f->rcu, basic_delete_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, basic_delete_filter); + else + __basic_delete_filter(f); } idr_destroy(&head->handle_idr); kfree_rcu(head, rcu); @@ -130,6 +138,7 @@ static int basic_delete(struct tcf_proto *tp, void *arg, bool *last) list_del_rcu(&f->link); tcf_unbind_filter(tp, &f->res); idr_remove_ext(&head->handle_idr, f->handle); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, basic_delete_filter); *last = list_empty(&head->flist); return 0; @@ -225,6 +234,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, idr_replace_ext(&head->handle_idr, fnew, fnew->handle); list_replace_rcu(&fold->link, &fnew->link); tcf_unbind_filter(tp, &fold->res); + tcf_exts_get_net(&fold->exts); call_rcu(&fold->rcu, basic_delete_filter); } else { list_add_rcu(&fnew->link, &head->flist); diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index dc9bd9a0070b..fb680dafac5a 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -261,6 +261,7 @@ static int cls_bpf_init(struct tcf_proto *tp) static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) { tcf_exts_destroy(&prog->exts); + tcf_exts_put_net(&prog->exts); if (cls_bpf_is_ebpf(prog)) bpf_prog_put(prog->filter); @@ -297,7 +298,10 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog) cls_bpf_stop_offload(tp, prog); list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); - call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu); + if (tcf_exts_get_net(&prog->exts)) + call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu); + else + __cls_bpf_delete_prog(prog); } static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last) @@ -526,6 +530,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, idr_replace_ext(&head->handle_idr, prog, handle); list_replace_rcu(&oldprog->link, &prog->link); tcf_unbind_filter(tp, &oldprog->res); + tcf_exts_get_net(&oldprog->exts); call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu); } else { list_add_rcu(&prog->link, &head->plist); diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index a97e069bee89..309d5899265f 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -60,15 +60,21 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED }, }; +static void __cls_cgroup_destroy(struct cls_cgroup_head *head) +{ + tcf_exts_destroy(&head->exts); + tcf_em_tree_destroy(&head->ematches); + tcf_exts_put_net(&head->exts); + kfree(head); +} + static void cls_cgroup_destroy_work(struct work_struct *work) { struct cls_cgroup_head *head = container_of(work, struct cls_cgroup_head, work); rtnl_lock(); - tcf_exts_destroy(&head->exts); - tcf_em_tree_destroy(&head->ematches); - kfree(head); + __cls_cgroup_destroy(head); rtnl_unlock(); } @@ -124,8 +130,10 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, goto errout; rcu_assign_pointer(tp->root, new); - if (head) + if (head) { + tcf_exts_get_net(&head->exts); call_rcu(&head->rcu, cls_cgroup_destroy_rcu); + } return 0; errout: tcf_exts_destroy(&new->exts); @@ -138,8 +146,12 @@ static void cls_cgroup_destroy(struct tcf_proto *tp) struct cls_cgroup_head *head = rtnl_dereference(tp->root); /* Head can still be NULL due to cls_cgroup_init(). */ - if (head) - call_rcu(&head->rcu, cls_cgroup_destroy_rcu); + if (head) { + if (tcf_exts_get_net(&head->exts)) + call_rcu(&head->rcu, cls_cgroup_destroy_rcu); + else + __cls_cgroup_destroy(head); + } } static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last) diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 9c08fcdaf41c..25c2a888e1f0 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -372,15 +372,21 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { [TCA_FLOW_PERTURB] = { .type = NLA_U32 }, }; -static void flow_destroy_filter_work(struct work_struct *work) +static void __flow_destroy_filter(struct flow_filter *f) { - struct flow_filter *f = container_of(work, struct flow_filter, work); - - rtnl_lock(); del_timer_sync(&f->perturb_timer); tcf_exts_destroy(&f->exts); tcf_em_tree_destroy(&f->ematches); + tcf_exts_put_net(&f->exts); kfree(f); +} + +static void flow_destroy_filter_work(struct work_struct *work) +{ + struct flow_filter *f = container_of(work, struct flow_filter, work); + + rtnl_lock(); + __flow_destroy_filter(f); rtnl_unlock(); } @@ -554,8 +560,10 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, *arg = fnew; - if (fold) + if (fold) { + tcf_exts_get_net(&fold->exts); call_rcu(&fold->rcu, flow_destroy_filter); + } return 0; err2: @@ -572,6 +580,7 @@ static int flow_delete(struct tcf_proto *tp, void *arg, bool *last) struct flow_filter *f = arg; list_del_rcu(&f->list); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, flow_destroy_filter); *last = list_empty(&head->filters); return 0; @@ -596,7 +605,10 @@ static void flow_destroy(struct tcf_proto *tp) list_for_each_entry_safe(f, next, &head->filters, list) { list_del_rcu(&f->list); - call_rcu(&f->rcu, flow_destroy_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, flow_destroy_filter); + else + __flow_destroy_filter(f); } kfree_rcu(head, rcu); } diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index c99fa9e5be46..543a3e875d05 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -193,13 +193,19 @@ static int fl_init(struct tcf_proto *tp) return 0; } +static void __fl_destroy_filter(struct cls_fl_filter *f) +{ + tcf_exts_destroy(&f->exts); + tcf_exts_put_net(&f->exts); + kfree(f); +} + static void fl_destroy_filter_work(struct work_struct *work) { struct cls_fl_filter *f = container_of(work, struct cls_fl_filter, work); rtnl_lock(); - tcf_exts_destroy(&f->exts); - kfree(f); + __fl_destroy_filter(f); rtnl_unlock(); } @@ -282,7 +288,10 @@ static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f) if (!tc_skip_hw(f->flags)) fl_hw_destroy_filter(tp, f); tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, fl_destroy_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, fl_destroy_filter); + else + __fl_destroy_filter(f); } static void fl_destroy_sleepable(struct work_struct *work) @@ -952,6 +961,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, idr_replace_ext(&head->handle_idr, fnew, fnew->handle); list_replace_rcu(&fold->list, &fnew->list); tcf_unbind_filter(tp, &fold->res); + tcf_exts_get_net(&fold->exts); call_rcu(&fold->rcu, fl_destroy_filter); } else { list_add_tail_rcu(&fnew->list, &head->filters); diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 5908f56f76da..20f0de1a960a 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -125,13 +125,19 @@ static int fw_init(struct tcf_proto *tp) return 0; } +static void __fw_delete_filter(struct fw_filter *f) +{ + tcf_exts_destroy(&f->exts); + tcf_exts_put_net(&f->exts); + kfree(f); +} + static void fw_delete_filter_work(struct work_struct *work) { struct fw_filter *f = container_of(work, struct fw_filter, work); rtnl_lock(); - tcf_exts_destroy(&f->exts); - kfree(f); + __fw_delete_filter(f); rtnl_unlock(); } @@ -157,7 +163,10 @@ static void fw_destroy(struct tcf_proto *tp) RCU_INIT_POINTER(head->ht[h], rtnl_dereference(f->next)); tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, fw_delete_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, fw_delete_filter); + else + __fw_delete_filter(f); } } kfree_rcu(head, rcu); @@ -182,6 +191,7 @@ static int fw_delete(struct tcf_proto *tp, void *arg, bool *last) if (pfp == f) { RCU_INIT_POINTER(*fp, rtnl_dereference(f->next)); tcf_unbind_filter(tp, &f->res); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, fw_delete_filter); ret = 0; break; @@ -302,6 +312,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, RCU_INIT_POINTER(fnew->next, rtnl_dereference(pfp->next)); rcu_assign_pointer(*fp, fnew); tcf_unbind_filter(tp, &f->res); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, fw_delete_filter); *arg = fnew; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 95dc997873e8..66d4e0099158 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -44,13 +44,19 @@ static int mall_init(struct tcf_proto *tp) return 0; } +static void __mall_destroy(struct cls_mall_head *head) +{ + tcf_exts_destroy(&head->exts); + tcf_exts_put_net(&head->exts); + kfree(head); +} + static void mall_destroy_work(struct work_struct *work) { struct cls_mall_head *head = container_of(work, struct cls_mall_head, work); rtnl_lock(); - tcf_exts_destroy(&head->exts); - kfree(head); + __mall_destroy(head); rtnl_unlock(); } @@ -116,7 +122,10 @@ static void mall_destroy(struct tcf_proto *tp) if (!tc_skip_hw(head->flags)) mall_destroy_hw_filter(tp, head, (unsigned long) head); - call_rcu(&head->rcu, mall_destroy_rcu); + if (tcf_exts_get_net(&head->exts)) + call_rcu(&head->rcu, mall_destroy_rcu); + else + __mall_destroy(head); } static void *mall_get(struct tcf_proto *tp, u32 handle) diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 4b14ccd8b8f2..ac9a5b8825b9 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -257,13 +257,19 @@ static int route4_init(struct tcf_proto *tp) return 0; } +static void __route4_delete_filter(struct route4_filter *f) +{ + tcf_exts_destroy(&f->exts); + tcf_exts_put_net(&f->exts); + kfree(f); +} + static void route4_delete_filter_work(struct work_struct *work) { struct route4_filter *f = container_of(work, struct route4_filter, work); rtnl_lock(); - tcf_exts_destroy(&f->exts); - kfree(f); + __route4_delete_filter(f); rtnl_unlock(); } @@ -297,7 +303,10 @@ static void route4_destroy(struct tcf_proto *tp) next = rtnl_dereference(f->next); RCU_INIT_POINTER(b->ht[h2], next); tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, route4_delete_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, route4_delete_filter); + else + __route4_delete_filter(f); } } RCU_INIT_POINTER(head->table[h1], NULL); @@ -338,6 +347,7 @@ static int route4_delete(struct tcf_proto *tp, void *arg, bool *last) /* Delete it */ tcf_unbind_filter(tp, &f->res); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, route4_delete_filter); /* Strip RTNL protected tree */ @@ -541,6 +551,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, *arg = f; if (fold) { tcf_unbind_filter(tp, &fold->res); + tcf_exts_get_net(&fold->exts); call_rcu(&fold->rcu, route4_delete_filter); } return 0; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index bdbc541787f8..cf325625c99d 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -285,13 +285,19 @@ static int rsvp_init(struct tcf_proto *tp) return -ENOBUFS; } +static void __rsvp_delete_filter(struct rsvp_filter *f) +{ + tcf_exts_destroy(&f->exts); + tcf_exts_put_net(&f->exts); + kfree(f); +} + static void rsvp_delete_filter_work(struct work_struct *work) { struct rsvp_filter *f = container_of(work, struct rsvp_filter, work); rtnl_lock(); - tcf_exts_destroy(&f->exts); - kfree(f); + __rsvp_delete_filter(f); rtnl_unlock(); } @@ -310,7 +316,10 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) * grace period, since converted-to-rcu actions are relying on that * in cleanup() callback */ - call_rcu(&f->rcu, rsvp_delete_filter_rcu); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, rsvp_delete_filter_rcu); + else + __rsvp_delete_filter(f); } static void rsvp_destroy(struct tcf_proto *tp) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index d6abfa6757f2..67467ae24c97 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -142,13 +142,19 @@ static int tcindex_init(struct tcf_proto *tp) return 0; } +static void __tcindex_destroy_rexts(struct tcindex_filter_result *r) +{ + tcf_exts_destroy(&r->exts); + tcf_exts_put_net(&r->exts); +} + static void tcindex_destroy_rexts_work(struct work_struct *work) { struct tcindex_filter_result *r; r = container_of(work, struct tcindex_filter_result, work); rtnl_lock(); - tcf_exts_destroy(&r->exts); + __tcindex_destroy_rexts(r); rtnl_unlock(); } @@ -161,14 +167,20 @@ static void tcindex_destroy_rexts(struct rcu_head *head) tcf_queue_work(&r->work); } +static void __tcindex_destroy_fexts(struct tcindex_filter *f) +{ + tcf_exts_destroy(&f->result.exts); + tcf_exts_put_net(&f->result.exts); + kfree(f); +} + static void tcindex_destroy_fexts_work(struct work_struct *work) { struct tcindex_filter *f = container_of(work, struct tcindex_filter, work); rtnl_lock(); - tcf_exts_destroy(&f->result.exts); - kfree(f); + __tcindex_destroy_fexts(f); rtnl_unlock(); } @@ -213,10 +225,17 @@ found: * grace period, since converted-to-rcu actions are relying on that * in cleanup() callback */ - if (f) - call_rcu(&f->rcu, tcindex_destroy_fexts); - else - call_rcu(&r->rcu, tcindex_destroy_rexts); + if (f) { + if (tcf_exts_get_net(&f->result.exts)) + call_rcu(&f->rcu, tcindex_destroy_fexts); + else + __tcindex_destroy_fexts(f); + } else { + if (tcf_exts_get_net(&r->exts)) + call_rcu(&r->rcu, tcindex_destroy_rexts); + else + __tcindex_destroy_rexts(r); + } *last = false; return 0; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 2737b71854c9..ac152b4f4247 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -399,6 +399,7 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, bool free_pf) { tcf_exts_destroy(&n->exts); + tcf_exts_put_net(&n->exts); if (n->ht_down) n->ht_down->refcnt--; #ifdef CONFIG_CLS_U32_PERF @@ -476,6 +477,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) RCU_INIT_POINTER(*kp, key->next); tcf_unbind_filter(tp, &key->res); + tcf_exts_get_net(&key->exts); call_rcu(&key->rcu, u32_delete_key_freepf_rcu); return 0; } @@ -590,7 +592,10 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) tcf_unbind_filter(tp, &n->res); u32_remove_hw_knode(tp, n->handle); idr_remove_ext(&ht->handle_idr, n->handle); - call_rcu(&n->rcu, u32_delete_key_freepf_rcu); + if (tcf_exts_get_net(&n->exts)) + call_rcu(&n->rcu, u32_delete_key_freepf_rcu); + else + u32_destroy_key(n->tp, n, true); } } } @@ -949,6 +954,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, u32_replace_knode(tp, tp_c, new); tcf_unbind_filter(tp, &n->res); + tcf_exts_get_net(&n->exts); call_rcu(&n->rcu, u32_delete_key_rcu); return 0; } diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index bdb533b7fb8c..7a72980c1509 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -212,7 +212,7 @@ static void cbs_disable_offload(struct net_device *dev, cbs.queue = q->queue; cbs.enable = 0; - err = ops->ndo_setup_tc(dev, TC_SETUP_CBS, &cbs); + err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs); if (err < 0) pr_warn("Couldn't disable CBS offload for queue %d\n", cbs.queue); @@ -236,7 +236,7 @@ static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q, cbs.idleslope = opt->idleslope; cbs.sendslope = opt->sendslope; - err = ops->ndo_setup_tc(dev, TC_SETUP_CBS, &cbs); + err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs); if (err < 0) return err; diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 4d5ed45123f0..b85885a9d8a1 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -50,7 +50,8 @@ static void mqprio_destroy(struct Qdisc *sch) switch (priv->mode) { case TC_MQPRIO_MODE_DCB: case TC_MQPRIO_MODE_CHANNEL: - dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO, + dev->netdev_ops->ndo_setup_tc(dev, + TC_SETUP_QDISC_MQPRIO, &mqprio); break; default: @@ -265,7 +266,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) return -EINVAL; } err = dev->netdev_ops->ndo_setup_tc(dev, - TC_SETUP_MQPRIO, + TC_SETUP_QDISC_MQPRIO, &mqprio); if (err) return err; diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index fdfdb56aaae2..007dd8ef8aac 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -19,6 +19,7 @@ #include <linux/kernel.h> #include <linux/skbuff.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <net/inet_ecn.h> #include <net/red.h> @@ -148,11 +149,37 @@ static void red_reset(struct Qdisc *sch) red_restart(&q->vars); } +static int red_offload(struct Qdisc *sch, bool enable) +{ + struct red_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct tc_red_qopt_offload opt = { + .handle = sch->handle, + .parent = sch->parent, + }; + + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return -EOPNOTSUPP; + + if (enable) { + opt.command = TC_RED_REPLACE; + opt.set.min = q->parms.qth_min >> q->parms.Wlog; + opt.set.max = q->parms.qth_max >> q->parms.Wlog; + opt.set.probability = q->parms.max_P; + opt.set.is_ecn = red_use_ecn(q); + } else { + opt.command = TC_RED_DESTROY; + } + + return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt); +} + static void red_destroy(struct Qdisc *sch) { struct red_sched_data *q = qdisc_priv(sch); del_timer_sync(&q->adapt_timer); + red_offload(sch, false); qdisc_destroy(q->qdisc); } @@ -219,6 +246,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) red_start_of_idle_period(&q->vars); sch_tree_unlock(sch); + red_offload(sch, true); return 0; } @@ -244,6 +272,33 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt) return red_change(sch, opt); } +static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt) +{ + struct net_device *dev = qdisc_dev(sch); + struct tc_red_qopt_offload hw_stats = { + .handle = sch->handle, + .parent = sch->parent, + .command = TC_RED_STATS, + .stats.bstats = &sch->bstats, + .stats.qstats = &sch->qstats, + }; + int err; + + opt->flags &= ~TC_RED_OFFLOADED; + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return 0; + + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, + &hw_stats); + if (err == -EOPNOTSUPP) + return 0; + + if (!err) + opt->flags |= TC_RED_OFFLOADED; + + return err; +} + static int red_dump(struct Qdisc *sch, struct sk_buff *skb) { struct red_sched_data *q = qdisc_priv(sch); @@ -257,8 +312,13 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb) .Plog = q->parms.Plog, .Scell_log = q->parms.Scell_log, }; + int err; sch->qstats.backlog = q->qdisc->qstats.backlog; + err = red_dump_offload(sch, &opt); + if (err) + goto nla_put_failure; + opts = nla_nest_start(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; @@ -275,6 +335,7 @@ nla_put_failure: static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct red_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); struct tc_red_xstats st = { .early = q->stats.prob_drop + q->stats.forced_drop, .pdrop = q->stats.pdrop, @@ -282,6 +343,24 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) .marked = q->stats.prob_mark + q->stats.forced_mark, }; + if (tc_can_offload(dev) && dev->netdev_ops->ndo_setup_tc) { + struct red_stats hw_stats = {0}; + struct tc_red_qopt_offload hw_stats_request = { + .handle = sch->handle, + .parent = sch->parent, + .command = TC_RED_XSTATS, + .xstats = &hw_stats, + }; + if (!dev->netdev_ops->ndo_setup_tc(dev, + TC_SETUP_QDISC_RED, + &hw_stats_request)) { + st.early += hw_stats.prob_drop + hw_stats.forced_drop; + st.pdrop += hw_stats.pdrop; + st.other += hw_stats.other; + st.marked += hw_stats.prob_mark + hw_stats.forced_mark; + } + } + return gnet_stats_copy_app(d, &st, sizeof(st)); } diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 0531b41d1f2d..74b9d916a58b 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -345,6 +345,8 @@ static size_t switchdev_obj_size(const struct switchdev_obj *obj) return sizeof(struct switchdev_obj_port_vlan); case SWITCHDEV_OBJ_ID_PORT_MDB: return sizeof(struct switchdev_obj_port_mdb); + case SWITCHDEV_OBJ_ID_HOST_MDB: + return sizeof(struct switchdev_obj_port_mdb); default: BUG(); } diff --git a/net/tipc/link.c b/net/tipc/link.c index 870b9b8f877a..6bce0b1117bd 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -239,7 +239,8 @@ static int link_is_up(struct tipc_link *l) static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq); static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, - u16 rcvgap, int tolerance, int priority, + bool probe_reply, u16 rcvgap, + int tolerance, int priority, struct sk_buff_head *xmitq); static void link_print(struct tipc_link *l, const char *str); static int tipc_link_build_nack_msg(struct tipc_link *l, @@ -773,7 +774,7 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) } if (state || probe || setup) - tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, xmitq); + tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, 0, xmitq); return rc; } @@ -1174,7 +1175,7 @@ int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq) /* Unicast ACK */ l->rcv_unacked = 0; l->stats.sent_acks++; - tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq); + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq); return 0; } @@ -1188,7 +1189,7 @@ void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq) if (l->state == LINK_ESTABLISHING) mtyp = ACTIVATE_MSG; - tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, xmitq); + tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, 0, xmitq); /* Inform peer that this endpoint is going down if applicable */ skb = skb_peek_tail(xmitq); @@ -1215,7 +1216,7 @@ static int tipc_link_build_nack_msg(struct tipc_link *l, } if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV)) - tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq); + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq); return 0; } @@ -1289,7 +1290,8 @@ drop: } static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, - u16 rcvgap, int tolerance, int priority, + bool probe_reply, u16 rcvgap, + int tolerance, int priority, struct sk_buff_head *xmitq) { struct tipc_link *bcl = l->bc_rcvlink; @@ -1337,6 +1339,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, msg_set_seq_gap(hdr, rcvgap); msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl)); msg_set_probe(hdr, probe); + msg_set_is_keepalive(hdr, probe || probe_reply); tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id); msg_set_size(hdr, INT_H_SIZE + dlen); skb_trim(skb, INT_H_SIZE + dlen); @@ -1442,6 +1445,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, u16 rcv_nxt = l->rcv_nxt; u16 dlen = msg_data_sz(hdr); int mtyp = msg_type(hdr); + bool reply = msg_probe(hdr); void *data; char *if_name; int rc = 0; @@ -1528,9 +1532,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, /* Send NACK if peer has sent pkts we haven't received yet */ if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l)) rcvgap = peers_snd_nxt - l->rcv_nxt; - if (rcvgap || (msg_probe(hdr))) - tipc_link_build_proto_msg(l, STATE_MSG, 0, rcvgap, - 0, 0, xmitq); + if (rcvgap || reply) + tipc_link_build_proto_msg(l, STATE_MSG, 0, reply, + rcvgap, 0, 0, xmitq); tipc_link_release_pkts(l, ack); /* If NACK, retransmit will now start at right position */ @@ -2122,14 +2126,14 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol, struct sk_buff_head *xmitq) { l->tolerance = tol; - tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, tol, 0, xmitq); + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq); } void tipc_link_set_prio(struct tipc_link *l, u32 prio, struct sk_buff_head *xmitq) { l->priority = prio; - tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, prio, xmitq); + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, prio, xmitq); } void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit) diff --git a/net/tipc/msg.h b/net/tipc/msg.h index cedf811317fb..bf8f57ccc70c 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -226,6 +226,16 @@ static inline void msg_set_dest_droppable(struct tipc_msg *m, u32 d) msg_set_bits(m, 0, 19, 1, d); } +static inline int msg_is_keepalive(struct tipc_msg *m) +{ + return msg_bits(m, 0, 19, 1); +} + +static inline void msg_set_is_keepalive(struct tipc_msg *m, u32 d) +{ + msg_set_bits(m, 0, 19, 1, d); +} + static inline int msg_src_droppable(struct tipc_msg *m) { return msg_bits(m, 0, 18, 1); diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 82d20ee34581..347ab31574d5 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -266,8 +266,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto lock; } - daddr = (xfrm_address_t *)(skb_network_header(skb) + - XFRM_SPI_SKB_CB(skb)->daddroff); family = XFRM_SPI_SKB_CB(skb)->family; /* if tunnel is present override skb->mark value with tunnel i_key */ @@ -294,6 +292,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop; } + daddr = (xfrm_address_t *)(skb_network_header(skb) + + XFRM_SPI_SKB_CB(skb)->daddroff); do { if (skb->sp->len == XFRM_MAX_DEPTH) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f02b1743b239..2f57722f5d03 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1360,36 +1360,29 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, struct net *net = xp_net(policy); int nx; int i, error; - xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); - xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); xfrm_address_t tmp; for (nx = 0, i = 0; i < policy->xfrm_nr; i++) { struct xfrm_state *x; - xfrm_address_t *remote = daddr; - xfrm_address_t *local = saddr; + xfrm_address_t *local; + xfrm_address_t *remote; struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; - if (tmpl->mode == XFRM_MODE_TUNNEL || - tmpl->mode == XFRM_MODE_BEET) { - remote = &tmpl->id.daddr; - local = &tmpl->saddr; - if (xfrm_addr_any(local, tmpl->encap_family)) { - error = xfrm_get_saddr(net, fl->flowi_oif, - &tmp, remote, - tmpl->encap_family, 0); - if (error) - goto fail; - local = &tmp; - } + remote = &tmpl->id.daddr; + local = &tmpl->saddr; + if (xfrm_addr_any(local, tmpl->encap_family)) { + error = xfrm_get_saddr(net, fl->flowi_oif, + &tmp, remote, + tmpl->encap_family, 0); + if (error) + goto fail; + local = &tmp; } x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); if (x && x->km.state == XFRM_STATE_VALID) { xfrm[nx++] = x; - daddr = remote; - saddr = local; continue; } if (x) { @@ -1786,19 +1779,23 @@ void xfrm_policy_cache_flush(void) put_online_cpus(); } -static bool xfrm_pol_dead(struct xfrm_dst *xdst) +static bool xfrm_xdst_can_reuse(struct xfrm_dst *xdst, + struct xfrm_state * const xfrm[], + int num) { - unsigned int num_pols = xdst->num_pols; - unsigned int pol_dead = 0, i; + const struct dst_entry *dst = &xdst->u.dst; + int i; - for (i = 0; i < num_pols; i++) - pol_dead |= xdst->pols[i]->walk.dead; + if (xdst->num_xfrms != num) + return false; - /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ - if (pol_dead) - xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; + for (i = 0; i < num; i++) { + if (!dst || dst->xfrm != xfrm[i]) + return false; + dst = dst->child; + } - return pol_dead; + return xfrm_bundle_ok(xdst); } static struct xfrm_dst * @@ -1812,26 +1809,28 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, struct dst_entry *dst; int err; + /* Try to instantiate a bundle */ + err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); + if (err <= 0) { + if (err != 0 && err != -EAGAIN) + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); + return ERR_PTR(err); + } + xdst = this_cpu_read(xfrm_last_dst); if (xdst && xdst->u.dst.dev == dst_orig->dev && xdst->num_pols == num_pols && - !xfrm_pol_dead(xdst) && memcmp(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols) == 0 && - xfrm_bundle_ok(xdst)) { + xfrm_xdst_can_reuse(xdst, xfrm, err)) { dst_hold(&xdst->u.dst); + while (err > 0) + xfrm_state_put(xfrm[--err]); return xdst; } old = xdst; - /* Try to instantiate a bundle */ - err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); - if (err <= 0) { - if (err != 0 && err != -EAGAIN) - XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); - return ERR_PTR(err); - } dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig); if (IS_ERR(dst)) { diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 5994075b080d..3b4945c1eab0 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -29,6 +29,7 @@ hostprogs-y += test_cgrp2_sock hostprogs-y += test_cgrp2_sock2 hostprogs-y += xdp1 hostprogs-y += xdp2 +hostprogs-y += xdp_router_ipv4 hostprogs-y += test_current_task_under_cgroup hostprogs-y += trace_event hostprogs-y += sampleip @@ -76,6 +77,7 @@ test_cgrp2_sock2-objs := bpf_load.o $(LIBBPF) test_cgrp2_sock2.o xdp1-objs := bpf_load.o $(LIBBPF) xdp1_user.o # reuse xdp1 source intentionally xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o +xdp_router_ipv4-objs := bpf_load.o $(LIBBPF) xdp_router_ipv4_user.o test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) $(CGROUP_HELPERS) \ test_current_task_under_cgroup_user.o trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o @@ -118,6 +120,7 @@ always += parse_varlen.o parse_simple.o parse_ldabs.o always += test_cgrp2_tc_kern.o always += xdp1_kern.o always += xdp2_kern.o +always += xdp_router_ipv4_kern.o always += test_current_task_under_cgroup_kern.o always += trace_event_kern.o always += sampleip_kern.o @@ -166,6 +169,7 @@ HOSTLOADLIBES_map_perf_test += -lelf -lrt HOSTLOADLIBES_test_overhead += -lelf -lrt HOSTLOADLIBES_xdp1 += -lelf HOSTLOADLIBES_xdp2 += -lelf +HOSTLOADLIBES_xdp_router_ipv4 += -lelf HOSTLOADLIBES_test_current_task_under_cgroup += -lelf HOSTLOADLIBES_trace_event += -lelf HOSTLOADLIBES_sampleip += -lelf diff --git a/samples/bpf/tcp_bbf.readme b/samples/bpf/tcp_bpf.readme index 831fb601e3c9..831fb601e3c9 100644 --- a/samples/bpf/tcp_bbf.readme +++ b/samples/bpf/tcp_bpf.readme diff --git a/samples/bpf/tcp_bufs_kern.c b/samples/bpf/tcp_bufs_kern.c index ee83bbabd17c..0566b7fa38a1 100644 --- a/samples/bpf/tcp_bufs_kern.c +++ b/samples/bpf/tcp_bufs_kern.c @@ -41,8 +41,10 @@ int bpf_bufs(struct bpf_sock_ops *skops) * if neither port numberis 55601 */ if (bpf_ntohl(skops->remote_port) != 55601 && - skops->local_port != 55601) - return -1; + skops->local_port != 55601) { + skops->reply = -1; + return 1; + } op = (int) skops->op; @@ -61,8 +63,8 @@ int bpf_bufs(struct bpf_sock_ops *skops) /* Set sndbuf and rcvbuf of active connections */ rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, sizeof(bufsize)); - rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, - &bufsize, sizeof(bufsize)); + rv += bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, + &bufsize, sizeof(bufsize)); break; case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: /* Nothing to do */ @@ -71,8 +73,8 @@ int bpf_bufs(struct bpf_sock_ops *skops) /* Set sndbuf and rcvbuf of passive connections */ rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, sizeof(bufsize)); - rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, - &bufsize, sizeof(bufsize)); + rv += bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, + &bufsize, sizeof(bufsize)); break; default: rv = -1; diff --git a/samples/bpf/tcp_clamp_kern.c b/samples/bpf/tcp_clamp_kern.c index d68eadd9ca2d..f4225c9d2c0c 100644 --- a/samples/bpf/tcp_clamp_kern.c +++ b/samples/bpf/tcp_clamp_kern.c @@ -41,8 +41,10 @@ int bpf_clamp(struct bpf_sock_ops *skops) /* For testing purposes, only execute rest of BPF program * if neither port numberis 55601 */ - if (bpf_ntohl(skops->remote_port) != 55601 && skops->local_port != 55601) - return -1; + if (bpf_ntohl(skops->remote_port) != 55601 && skops->local_port != 55601) { + skops->reply = -1; + return 0; + } op = (int) skops->op; @@ -66,9 +68,9 @@ int bpf_clamp(struct bpf_sock_ops *skops) /* Set sndbuf and rcvbuf of active connections */ rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, sizeof(bufsize)); - rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, - SO_RCVBUF, &bufsize, - sizeof(bufsize)); + rv += bpf_setsockopt(skops, SOL_SOCKET, + SO_RCVBUF, &bufsize, + sizeof(bufsize)); break; case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: rv = bpf_setsockopt(skops, SOL_TCP, @@ -80,12 +82,12 @@ int bpf_clamp(struct bpf_sock_ops *skops) rv = bpf_setsockopt(skops, SOL_TCP, TCP_BPF_SNDCWND_CLAMP, &clamp, sizeof(clamp)); - rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, - SO_SNDBUF, &bufsize, - sizeof(bufsize)); - rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, - SO_RCVBUF, &bufsize, - sizeof(bufsize)); + rv += bpf_setsockopt(skops, SOL_SOCKET, + SO_SNDBUF, &bufsize, + sizeof(bufsize)); + rv += bpf_setsockopt(skops, SOL_SOCKET, + SO_RCVBUF, &bufsize, + sizeof(bufsize)); break; default: rv = -1; diff --git a/samples/bpf/tcp_cong_kern.c b/samples/bpf/tcp_cong_kern.c index dac15bce1fa9..ad0f1ba8206a 100644 --- a/samples/bpf/tcp_cong_kern.c +++ b/samples/bpf/tcp_cong_kern.c @@ -39,8 +39,10 @@ int bpf_cong(struct bpf_sock_ops *skops) * if neither port numberis 55601 */ if (bpf_ntohl(skops->remote_port) != 55601 && - skops->local_port != 55601) - return -1; + skops->local_port != 55601) { + skops->reply = -1; + return 1; + } op = (int) skops->op; diff --git a/samples/bpf/tcp_iw_kern.c b/samples/bpf/tcp_iw_kern.c index 23c5122ef819..4ca5ecc9f580 100644 --- a/samples/bpf/tcp_iw_kern.c +++ b/samples/bpf/tcp_iw_kern.c @@ -42,8 +42,10 @@ int bpf_iw(struct bpf_sock_ops *skops) * if neither port numberis 55601 */ if (bpf_ntohl(skops->remote_port) != 55601 && - skops->local_port != 55601) - return -1; + skops->local_port != 55601) { + skops->reply = -1; + return 1; + } op = (int) skops->op; @@ -62,8 +64,8 @@ int bpf_iw(struct bpf_sock_ops *skops) /* Set sndbuf and rcvbuf of active connections */ rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, sizeof(bufsize)); - rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, - &bufsize, sizeof(bufsize)); + rv += bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, + &bufsize, sizeof(bufsize)); break; case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: rv = bpf_setsockopt(skops, SOL_TCP, TCP_BPF_IW, &iw, @@ -73,8 +75,8 @@ int bpf_iw(struct bpf_sock_ops *skops) /* Set sndbuf and rcvbuf of passive connections */ rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, sizeof(bufsize)); - rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, - &bufsize, sizeof(bufsize)); + rv += bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, + &bufsize, sizeof(bufsize)); break; default: rv = -1; diff --git a/samples/bpf/tcp_rwnd_kern.c b/samples/bpf/tcp_rwnd_kern.c index 3f2a228f81ce..09ff65b40b31 100644 --- a/samples/bpf/tcp_rwnd_kern.c +++ b/samples/bpf/tcp_rwnd_kern.c @@ -38,8 +38,10 @@ int bpf_rwnd(struct bpf_sock_ops *skops) * if neither port numberis 55601 */ if (bpf_ntohl(skops->remote_port) != - 55601 && skops->local_port != 55601) - return -1; + 55601 && skops->local_port != 55601) { + skops->reply = -1; + return 1; + } op = (int) skops->op; diff --git a/samples/bpf/tcp_synrto_kern.c b/samples/bpf/tcp_synrto_kern.c index 3c3fc83d81cb..232bb242823e 100644 --- a/samples/bpf/tcp_synrto_kern.c +++ b/samples/bpf/tcp_synrto_kern.c @@ -38,8 +38,10 @@ int bpf_synrto(struct bpf_sock_ops *skops) * if neither port numberis 55601 */ if (bpf_ntohl(skops->remote_port) != 55601 && - skops->local_port != 55601) - return -1; + skops->local_port != 55601) { + skops->reply = -1; + return 1; + } op = (int) skops->op; diff --git a/samples/bpf/xdp_router_ipv4_kern.c b/samples/bpf/xdp_router_ipv4_kern.c new file mode 100644 index 000000000000..993f56bc7b9a --- /dev/null +++ b/samples/bpf/xdp_router_ipv4_kern.c @@ -0,0 +1,186 @@ +/* Copyright (C) 2017 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ +#define KBUILD_MODNAME "foo" +#include <uapi/linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include "bpf_helpers.h" +#include <linux/slab.h> +#include <net/ip_fib.h> + +struct trie_value { + __u8 prefix[4]; + __be64 value; + int ifindex; + int metric; + __be32 gw; +}; + +/* Key for lpm_trie*/ +union key_4 { + u32 b32[2]; + u8 b8[8]; +}; + +struct arp_entry { + __be64 mac; + __be32 dst; +}; + +struct direct_map { + struct arp_entry arp; + int ifindex; + __be64 mac; +}; + +/* Map for trie implementation*/ +struct bpf_map_def SEC("maps") lpm_map = { + .type = BPF_MAP_TYPE_LPM_TRIE, + .key_size = 8, + .value_size = sizeof(struct trie_value), + .max_entries = 50, + .map_flags = BPF_F_NO_PREALLOC, +}; + +/* Map for counter*/ +struct bpf_map_def SEC("maps") rxcnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u64), + .max_entries = 256, +}; + +/* Map for ARP table*/ +struct bpf_map_def SEC("maps") arp_table = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__be32), + .value_size = sizeof(__be64), + .max_entries = 50, +}; + +/* Map to keep the exact match entries in the route table*/ +struct bpf_map_def SEC("maps") exact_match = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__be32), + .value_size = sizeof(struct direct_map), + .max_entries = 50, +}; + +struct bpf_map_def SEC("maps") tx_port = { + .type = BPF_MAP_TYPE_DEVMAP, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 100, +}; + +/* Function to set source and destination mac of the packet */ +static inline void set_src_dst_mac(void *data, void *src, void *dst) +{ + unsigned short *source = src; + unsigned short *dest = dst; + unsigned short *p = data; + + __builtin_memcpy(p, dest, 6); + __builtin_memcpy(p + 3, source, 6); +} + +/* Parse IPV4 packet to get SRC, DST IP and protocol */ +static inline int parse_ipv4(void *data, u64 nh_off, void *data_end, + __be32 *src, __be32 *dest) +{ + struct iphdr *iph = data + nh_off; + + if (iph + 1 > data_end) + return 0; + *src = iph->saddr; + *dest = iph->daddr; + return iph->protocol; +} + +SEC("xdp_router_ipv4") +int xdp_router_ipv4_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + __be64 *dest_mac = NULL, *src_mac = NULL; + void *data = (void *)(long)ctx->data; + struct trie_value *prefix_value; + int rc = XDP_DROP, forward_to; + struct ethhdr *eth = data; + union key_4 key4; + long *value; + u16 h_proto; + u32 ipproto; + u64 nh_off; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return rc; + + h_proto = eth->h_proto; + + if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { + struct vlan_hdr *vhdr; + + vhdr = data + nh_off; + nh_off += sizeof(struct vlan_hdr); + if (data + nh_off > data_end) + return rc; + h_proto = vhdr->h_vlan_encapsulated_proto; + } + if (h_proto == htons(ETH_P_ARP)) { + return XDP_PASS; + } else if (h_proto == htons(ETH_P_IP)) { + struct direct_map *direct_entry; + __be32 src_ip = 0, dest_ip = 0; + + ipproto = parse_ipv4(data, nh_off, data_end, &src_ip, &dest_ip); + direct_entry = bpf_map_lookup_elem(&exact_match, &dest_ip); + /* Check for exact match, this would give a faster lookup*/ + if (direct_entry && direct_entry->mac && direct_entry->arp.mac) { + src_mac = &direct_entry->mac; + dest_mac = &direct_entry->arp.mac; + forward_to = direct_entry->ifindex; + } else { + /* Look up in the trie for lpm*/ + key4.b32[0] = 32; + key4.b8[4] = dest_ip & 0xff; + key4.b8[5] = (dest_ip >> 8) & 0xff; + key4.b8[6] = (dest_ip >> 16) & 0xff; + key4.b8[7] = (dest_ip >> 24) & 0xff; + prefix_value = bpf_map_lookup_elem(&lpm_map, &key4); + if (!prefix_value) + return XDP_DROP; + src_mac = &prefix_value->value; + if (!src_mac) + return XDP_DROP; + dest_mac = bpf_map_lookup_elem(&arp_table, &dest_ip); + if (!dest_mac) { + if (!prefix_value->gw) + return XDP_DROP; + dest_ip = prefix_value->gw; + dest_mac = bpf_map_lookup_elem(&arp_table, &dest_ip); + } + forward_to = prefix_value->ifindex; + } + } else { + ipproto = 0; + } + if (src_mac && dest_mac) { + set_src_dst_mac(data, src_mac, dest_mac); + value = bpf_map_lookup_elem(&rxcnt, &ipproto); + if (value) + *value += 1; + return bpf_redirect_map(&tx_port, forward_to, 0); + } + return rc; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c new file mode 100644 index 000000000000..2c1fe3f4b1a4 --- /dev/null +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -0,0 +1,659 @@ +/* Copyright (C) 2017 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ +#include <linux/bpf.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <assert.h> +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <unistd.h> +#include "bpf_load.h" +#include "libbpf.h" +#include <arpa/inet.h> +#include <fcntl.h> +#include <poll.h> +#include <net/if.h> +#include <netdb.h> +#include <sys/ioctl.h> +#include <sys/syscall.h> +#include "bpf_util.h" + +int sock, sock_arp, flags = 0; +static int total_ifindex; +int *ifindex_list; +char buf[8192]; + +static int get_route_table(int rtm_family); +static void int_exit(int sig) +{ + int i = 0; + + for (i = 0; i < total_ifindex; i++) + set_link_xdp_fd(ifindex_list[i], -1, flags); + exit(0); +} + +static void close_and_exit(int sig) +{ + int i = 0; + + close(sock); + close(sock_arp); + + for (i = 0; i < total_ifindex; i++) + set_link_xdp_fd(ifindex_list[i], -1, flags); + exit(0); +} + +/* Get the mac address of the interface given interface name */ +static __be64 getmac(char *iface) +{ + struct ifreq ifr; + __be64 mac = 0; + int fd, i; + + fd = socket(AF_INET, SOCK_DGRAM, 0); + ifr.ifr_addr.sa_family = AF_INET; + strncpy(ifr.ifr_name, iface, IFNAMSIZ - 1); + if (ioctl(fd, SIOCGIFHWADDR, &ifr) < 0) { + printf("ioctl failed leaving....\n"); + return -1; + } + for (i = 0; i < 6 ; i++) + *((__u8 *)&mac + i) = (__u8)ifr.ifr_hwaddr.sa_data[i]; + close(fd); + return mac; +} + +static int recv_msg(struct sockaddr_nl sock_addr, int sock) +{ + struct nlmsghdr *nh; + int len, nll = 0; + char *buf_ptr; + + buf_ptr = buf; + while (1) { + len = recv(sock, buf_ptr, sizeof(buf) - nll, 0); + if (len < 0) + return len; + + nh = (struct nlmsghdr *)buf_ptr; + + if (nh->nlmsg_type == NLMSG_DONE) + break; + buf_ptr += len; + nll += len; + if ((sock_addr.nl_groups & RTMGRP_NEIGH) == RTMGRP_NEIGH) + break; + + if ((sock_addr.nl_groups & RTMGRP_IPV4_ROUTE) == RTMGRP_IPV4_ROUTE) + break; + } + return nll; +} + +/* Function to parse the route entry returned by netlink + * Updates the route entry related map entries + */ +static void read_route(struct nlmsghdr *nh, int nll) +{ + char dsts[24], gws[24], ifs[16], dsts_len[24], metrics[24]; + struct bpf_lpm_trie_key *prefix_key; + struct rtattr *rt_attr; + struct rtmsg *rt_msg; + int rtm_family; + int rtl; + int i; + struct route_table { + int dst_len, iface, metric; + char *iface_name; + __be32 dst, gw; + __be64 mac; + } route; + struct arp_table { + __be64 mac; + __be32 dst; + }; + + struct direct_map { + struct arp_table arp; + int ifindex; + __be64 mac; + } direct_entry; + + if (nh->nlmsg_type == RTM_DELROUTE) + printf("DELETING Route entry\n"); + else if (nh->nlmsg_type == RTM_GETROUTE) + printf("READING Route entry\n"); + else if (nh->nlmsg_type == RTM_NEWROUTE) + printf("NEW Route entry\n"); + else + printf("%d\n", nh->nlmsg_type); + + memset(&route, 0, sizeof(route)); + printf("Destination\t\tGateway\t\tGenmask\t\tMetric\t\tIface\n"); + for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) { + rt_msg = (struct rtmsg *)NLMSG_DATA(nh); + rtm_family = rt_msg->rtm_family; + if (rtm_family == AF_INET) + if (rt_msg->rtm_table != RT_TABLE_MAIN) + continue; + rt_attr = (struct rtattr *)RTM_RTA(rt_msg); + rtl = RTM_PAYLOAD(nh); + + for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) { + switch (rt_attr->rta_type) { + case NDA_DST: + sprintf(dsts, "%u", + (*((__be32 *)RTA_DATA(rt_attr)))); + break; + case RTA_GATEWAY: + sprintf(gws, "%u", + *((__be32 *)RTA_DATA(rt_attr))); + break; + case RTA_OIF: + sprintf(ifs, "%u", + *((int *)RTA_DATA(rt_attr))); + break; + case RTA_METRICS: + sprintf(metrics, "%u", + *((int *)RTA_DATA(rt_attr))); + default: + break; + } + } + sprintf(dsts_len, "%d", rt_msg->rtm_dst_len); + route.dst = atoi(dsts); + route.dst_len = atoi(dsts_len); + route.gw = atoi(gws); + route.iface = atoi(ifs); + route.metric = atoi(metrics); + route.iface_name = alloca(sizeof(char *) * IFNAMSIZ); + route.iface_name = if_indextoname(route.iface, route.iface_name); + route.mac = getmac(route.iface_name); + if (route.mac == -1) { + int i = 0; + + for (i = 0; i < total_ifindex; i++) + set_link_xdp_fd(ifindex_list[i], -1, flags); + exit(0); + } + assert(bpf_map_update_elem(map_fd[4], &route.iface, &route.iface, 0) == 0); + if (rtm_family == AF_INET) { + struct trie_value { + __u8 prefix[4]; + __be64 value; + int ifindex; + int metric; + __be32 gw; + } *prefix_value; + + prefix_key = alloca(sizeof(*prefix_key) + 3); + prefix_value = alloca(sizeof(*prefix_value)); + + prefix_key->prefixlen = 32; + prefix_key->prefixlen = route.dst_len; + direct_entry.mac = route.mac & 0xffffffffffff; + direct_entry.ifindex = route.iface; + direct_entry.arp.mac = 0; + direct_entry.arp.dst = 0; + if (route.dst_len == 32) { + if (nh->nlmsg_type == RTM_DELROUTE) + assert(bpf_map_delete_elem(map_fd[3], &route.dst) == 0); + else + if (bpf_map_lookup_elem(map_fd[2], &route.dst, &direct_entry.arp.mac) == 0) + direct_entry.arp.dst = route.dst; + assert(bpf_map_update_elem(map_fd[3], &route.dst, &direct_entry, 0) == 0); + } + for (i = 0; i < 4; i++) + prefix_key->data[i] = (route.dst >> i * 8) & 0xff; + + printf("%3d.%d.%d.%d\t\t%3x\t\t%d\t\t%d\t\t%s\n", + (int)prefix_key->data[0], + (int)prefix_key->data[1], + (int)prefix_key->data[2], + (int)prefix_key->data[3], + route.gw, route.dst_len, + route.metric, + route.iface_name); + if (bpf_map_lookup_elem(map_fd[0], prefix_key, + prefix_value) < 0) { + for (i = 0; i < 4; i++) + prefix_value->prefix[i] = prefix_key->data[i]; + prefix_value->value = route.mac & 0xffffffffffff; + prefix_value->ifindex = route.iface; + prefix_value->gw = route.gw; + prefix_value->metric = route.metric; + + assert(bpf_map_update_elem(map_fd[0], + prefix_key, + prefix_value, 0 + ) == 0); + } else { + if (nh->nlmsg_type == RTM_DELROUTE) { + printf("deleting entry\n"); + printf("prefix key=%d.%d.%d.%d/%d", + prefix_key->data[0], + prefix_key->data[1], + prefix_key->data[2], + prefix_key->data[3], + prefix_key->prefixlen); + assert(bpf_map_delete_elem(map_fd[0], + prefix_key + ) == 0); + /* Rereading the route table to check if + * there is an entry with the same + * prefix but a different metric as the + * deleted enty. + */ + get_route_table(AF_INET); + } else if (prefix_key->data[0] == + prefix_value->prefix[0] && + prefix_key->data[1] == + prefix_value->prefix[1] && + prefix_key->data[2] == + prefix_value->prefix[2] && + prefix_key->data[3] == + prefix_value->prefix[3] && + route.metric >= prefix_value->metric) { + continue; + } else { + for (i = 0; i < 4; i++) + prefix_value->prefix[i] = + prefix_key->data[i]; + prefix_value->value = + route.mac & 0xffffffffffff; + prefix_value->ifindex = route.iface; + prefix_value->gw = route.gw; + prefix_value->metric = route.metric; + assert(bpf_map_update_elem( + map_fd[0], + prefix_key, + prefix_value, + 0) == 0); + } + } + } + memset(&route, 0, sizeof(route)); + memset(dsts, 0, sizeof(dsts)); + memset(dsts_len, 0, sizeof(dsts_len)); + memset(gws, 0, sizeof(gws)); + memset(ifs, 0, sizeof(ifs)); + memset(&route, 0, sizeof(route)); + } +} + +/* Function to read the existing route table when the process is launched*/ +static int get_route_table(int rtm_family) +{ + struct sockaddr_nl sa; + struct nlmsghdr *nh; + int sock, seq = 0; + struct msghdr msg; + struct iovec iov; + int ret = 0; + int nll; + + struct { + struct nlmsghdr nl; + struct rtmsg rt; + char buf[8192]; + } req; + + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock < 0) { + printf("open netlink socket: %s\n", strerror(errno)); + return -1; + } + memset(&sa, 0, sizeof(sa)); + sa.nl_family = AF_NETLINK; + if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { + printf("bind to netlink: %s\n", strerror(errno)); + ret = -1; + goto cleanup; + } + memset(&req, 0, sizeof(req)); + req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); + req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.nl.nlmsg_type = RTM_GETROUTE; + + req.rt.rtm_family = rtm_family; + req.rt.rtm_table = RT_TABLE_MAIN; + req.nl.nlmsg_pid = 0; + req.nl.nlmsg_seq = ++seq; + memset(&msg, 0, sizeof(msg)); + iov.iov_base = (void *)&req.nl; + iov.iov_len = req.nl.nlmsg_len; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + ret = sendmsg(sock, &msg, 0); + if (ret < 0) { + printf("send to netlink: %s\n", strerror(errno)); + ret = -1; + goto cleanup; + } + memset(buf, 0, sizeof(buf)); + nll = recv_msg(sa, sock); + if (nll < 0) { + printf("recv from netlink: %s\n", strerror(nll)); + ret = -1; + goto cleanup; + } + nh = (struct nlmsghdr *)buf; + read_route(nh, nll); +cleanup: + close(sock); + return ret; +} + +/* Function to parse the arp entry returned by netlink + * Updates the arp entry related map entries + */ +static void read_arp(struct nlmsghdr *nh, int nll) +{ + struct rtattr *rt_attr; + char dsts[24], mac[24]; + struct ndmsg *rt_msg; + int rtl, ndm_family; + + struct arp_table { + __be64 mac; + __be32 dst; + } arp_entry; + struct direct_map { + struct arp_table arp; + int ifindex; + __be64 mac; + } direct_entry; + + if (nh->nlmsg_type == RTM_GETNEIGH) + printf("READING arp entry\n"); + printf("Address\tHwAddress\n"); + for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) { + rt_msg = (struct ndmsg *)NLMSG_DATA(nh); + rt_attr = (struct rtattr *)RTM_RTA(rt_msg); + ndm_family = rt_msg->ndm_family; + rtl = RTM_PAYLOAD(nh); + for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) { + switch (rt_attr->rta_type) { + case NDA_DST: + sprintf(dsts, "%u", + *((__be32 *)RTA_DATA(rt_attr))); + break; + case NDA_LLADDR: + sprintf(mac, "%lld", + *((__be64 *)RTA_DATA(rt_attr))); + break; + default: + break; + } + } + arp_entry.dst = atoi(dsts); + arp_entry.mac = atol(mac); + printf("%x\t\t%llx\n", arp_entry.dst, arp_entry.mac); + if (ndm_family == AF_INET) { + if (bpf_map_lookup_elem(map_fd[3], &arp_entry.dst, + &direct_entry) == 0) { + if (nh->nlmsg_type == RTM_DELNEIGH) { + direct_entry.arp.dst = 0; + direct_entry.arp.mac = 0; + } else if (nh->nlmsg_type == RTM_NEWNEIGH) { + direct_entry.arp.dst = arp_entry.dst; + direct_entry.arp.mac = arp_entry.mac; + } + assert(bpf_map_update_elem(map_fd[3], + &arp_entry.dst, + &direct_entry, 0 + ) == 0); + memset(&direct_entry, 0, sizeof(direct_entry)); + } + if (nh->nlmsg_type == RTM_DELNEIGH) { + assert(bpf_map_delete_elem(map_fd[2], &arp_entry.dst) == 0); + } else if (nh->nlmsg_type == RTM_NEWNEIGH) { + assert(bpf_map_update_elem(map_fd[2], + &arp_entry.dst, + &arp_entry.mac, 0 + ) == 0); + } + } + memset(&arp_entry, 0, sizeof(arp_entry)); + memset(dsts, 0, sizeof(dsts)); + } +} + +/* Function to read the existing arp table when the process is launched*/ +static int get_arp_table(int rtm_family) +{ + struct sockaddr_nl sa; + struct nlmsghdr *nh; + int sock, seq = 0; + struct msghdr msg; + struct iovec iov; + int ret = 0; + int nll; + struct { + struct nlmsghdr nl; + struct ndmsg rt; + char buf[8192]; + } req; + + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock < 0) { + printf("open netlink socket: %s\n", strerror(errno)); + return -1; + } + memset(&sa, 0, sizeof(sa)); + sa.nl_family = AF_NETLINK; + if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { + printf("bind to netlink: %s\n", strerror(errno)); + ret = -1; + goto cleanup; + } + memset(&req, 0, sizeof(req)); + req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); + req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.nl.nlmsg_type = RTM_GETNEIGH; + req.rt.ndm_state = NUD_REACHABLE; + req.rt.ndm_family = rtm_family; + req.nl.nlmsg_pid = 0; + req.nl.nlmsg_seq = ++seq; + memset(&msg, 0, sizeof(msg)); + iov.iov_base = (void *)&req.nl; + iov.iov_len = req.nl.nlmsg_len; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + ret = sendmsg(sock, &msg, 0); + if (ret < 0) { + printf("send to netlink: %s\n", strerror(errno)); + ret = -1; + goto cleanup; + } + memset(buf, 0, sizeof(buf)); + nll = recv_msg(sa, sock); + if (nll < 0) { + printf("recv from netlink: %s\n", strerror(nll)); + ret = -1; + goto cleanup; + } + nh = (struct nlmsghdr *)buf; + read_arp(nh, nll); +cleanup: + close(sock); + return ret; +} + +/* Function to keep track and update changes in route and arp table + * Give regular statistics of packets forwarded + */ +static int monitor_route(void) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + const unsigned int nr_keys = 256; + struct pollfd fds_route, fds_arp; + __u64 prev[nr_keys][nr_cpus]; + struct sockaddr_nl la, lr; + __u64 values[nr_cpus]; + struct nlmsghdr *nh; + int nll, ret = 0; + int interval = 5; + __u32 key; + int i; + + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock < 0) { + printf("open netlink socket: %s\n", strerror(errno)); + return -1; + } + + fcntl(sock, F_SETFL, O_NONBLOCK); + memset(&lr, 0, sizeof(lr)); + lr.nl_family = AF_NETLINK; + lr.nl_groups = RTMGRP_IPV6_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_NOTIFY; + if (bind(sock, (struct sockaddr *)&lr, sizeof(lr)) < 0) { + printf("bind to netlink: %s\n", strerror(errno)); + ret = -1; + goto cleanup; + } + fds_route.fd = sock; + fds_route.events = POLL_IN; + + sock_arp = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock_arp < 0) { + printf("open netlink socket: %s\n", strerror(errno)); + return -1; + } + + fcntl(sock_arp, F_SETFL, O_NONBLOCK); + memset(&la, 0, sizeof(la)); + la.nl_family = AF_NETLINK; + la.nl_groups = RTMGRP_NEIGH | RTMGRP_NOTIFY; + if (bind(sock_arp, (struct sockaddr *)&la, sizeof(la)) < 0) { + printf("bind to netlink: %s\n", strerror(errno)); + ret = -1; + goto cleanup; + } + fds_arp.fd = sock_arp; + fds_arp.events = POLL_IN; + + memset(prev, 0, sizeof(prev)); + do { + signal(SIGINT, close_and_exit); + signal(SIGTERM, close_and_exit); + + sleep(interval); + for (key = 0; key < nr_keys; key++) { + __u64 sum = 0; + + assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0); + for (i = 0; i < nr_cpus; i++) + sum += (values[i] - prev[key][i]); + if (sum) + printf("proto %u: %10llu pkt/s\n", + key, sum / interval); + memcpy(prev[key], values, sizeof(values)); + } + + memset(buf, 0, sizeof(buf)); + if (poll(&fds_route, 1, 3) == POLL_IN) { + nll = recv_msg(lr, sock); + if (nll < 0) { + printf("recv from netlink: %s\n", strerror(nll)); + ret = -1; + goto cleanup; + } + + nh = (struct nlmsghdr *)buf; + printf("Routing table updated.\n"); + read_route(nh, nll); + } + memset(buf, 0, sizeof(buf)); + if (poll(&fds_arp, 1, 3) == POLL_IN) { + nll = recv_msg(la, sock_arp); + if (nll < 0) { + printf("recv from netlink: %s\n", strerror(nll)); + ret = -1; + goto cleanup; + } + + nh = (struct nlmsghdr *)buf; + read_arp(nh, nll); + } + + } while (1); +cleanup: + close(sock); + return ret; +} + +int main(int ac, char **argv) +{ + char filename[256]; + char **ifname_list; + int i = 1; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + if (ac < 2) { + printf("usage: %s [-S] Interface name list\n", argv[0]); + return 1; + } + if (!strcmp(argv[1], "-S")) { + flags = XDP_FLAGS_SKB_MODE; + total_ifindex = ac - 2; + ifname_list = (argv + 2); + } else { + flags = 0; + total_ifindex = ac - 1; + ifname_list = (argv + 1); + } + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + printf("\n**************loading bpf file*********************\n\n\n"); + if (!prog_fd[0]) { + printf("load_bpf_file: %s\n", strerror(errno)); + return 1; + } + ifindex_list = (int *)malloc(total_ifindex * sizeof(int *)); + for (i = 0; i < total_ifindex; i++) { + ifindex_list[i] = if_nametoindex(ifname_list[i]); + if (!ifindex_list[i]) { + printf("Couldn't translate interface name: %s", + strerror(errno)); + return 1; + } + } + for (i = 0; i < total_ifindex; i++) { + if (set_link_xdp_fd(ifindex_list[i], prog_fd[0], flags) < 0) { + printf("link set xdp fd failed\n"); + int recovery_index = i; + + for (i = 0; i < recovery_index; i++) + set_link_xdp_fd(ifindex_list[i], -1, flags); + + return 1; + } + printf("Attached to %d\n", ifindex_list[i]); + } + signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); + + printf("*******************ROUTE TABLE*************************\n\n\n"); + get_route_table(AF_INET); + printf("*******************ARP TABLE***************************\n\n\n"); + get_arp_table(AF_INET); + if (monitor_route() < 0) { + printf("Error in receiving route update"); + return 1; + } + + return 0; +} diff --git a/scripts/leaking_addresses.pl b/scripts/leaking_addresses.pl new file mode 100755 index 000000000000..2977371b2956 --- /dev/null +++ b/scripts/leaking_addresses.pl @@ -0,0 +1,305 @@ +#!/usr/bin/env perl +# +# (c) 2017 Tobin C. Harding <me@tobin.cc> +# Licensed under the terms of the GNU GPL License version 2 +# +# leaking_addresses.pl: Scan 64 bit kernel for potential leaking addresses. +# - Scans dmesg output. +# - Walks directory tree and parses each file (for each directory in @DIRS). +# +# You can configure the behaviour of the script; +# +# - By adding paths, for directories you do not want to walk; +# absolute paths: @skip_walk_dirs_abs +# directory names: @skip_walk_dirs_any +# +# - By adding paths, for files you do not want to parse; +# absolute paths: @skip_parse_files_abs +# file names: @skip_parse_files_any +# +# The use of @skip_xxx_xxx_any causes files to be skipped where ever they occur. +# For example adding 'fd' to @skip_walk_dirs_any causes the fd/ directory to be +# skipped for all PID sub-directories of /proc +# +# The same thing can be achieved by passing command line options to --dont-walk +# and --dont-parse. If absolute paths are supplied to these options they are +# appended to the @skip_xxx_xxx_abs arrays. If file names are supplied to these +# options, they are appended to the @skip_xxx_xxx_any arrays. +# +# Use --debug to output path before parsing, this is useful to find files that +# cause the script to choke. +# +# You may like to set kptr_restrict=2 before running script +# (see Documentation/sysctl/kernel.txt). + +use warnings; +use strict; +use POSIX; +use File::Basename; +use File::Spec; +use Cwd 'abs_path'; +use Term::ANSIColor qw(:constants); +use Getopt::Long qw(:config no_auto_abbrev); + +my $P = $0; +my $V = '0.01'; + +# Directories to scan. +my @DIRS = ('/proc', '/sys'); + +# Command line options. +my $help = 0; +my $debug = 0; +my @dont_walk = (); +my @dont_parse = (); + +# Do not parse these files (absolute path). +my @skip_parse_files_abs = ('/proc/kmsg', + '/proc/kcore', + '/proc/fs/ext4/sdb1/mb_groups', + '/proc/1/fd/3', + '/sys/kernel/debug/tracing/trace_pipe', + '/sys/kernel/security/apparmor/revision'); + +# Do not parse thes files under any subdirectory. +my @skip_parse_files_any = ('0', + '1', + '2', + 'pagemap', + 'events', + 'access', + 'registers', + 'snapshot_raw', + 'trace_pipe_raw', + 'ptmx', + 'trace_pipe'); + +# Do not walk these directories (absolute path). +my @skip_walk_dirs_abs = (); + +# Do not walk these directories under any subdirectory. +my @skip_walk_dirs_any = ('self', + 'thread-self', + 'cwd', + 'fd', + 'stderr', + 'stdin', + 'stdout'); + +sub help +{ + my ($exitcode) = @_; + + print << "EOM"; +Usage: $P [OPTIONS] +Version: $V + +Options: + + --dont-walk=<dir> Don't walk tree starting at <dir>. + --dont-parse=<file> Don't parse <file>. + -d, --debug Display debugging output. + -h, --help, --version Display this help and exit. + +If an absolute path is passed to --dont_XXX then this path is skipped. If a +single filename is passed then this file/directory will be skipped when +appearing under any subdirectory. + +Example: + + # Just scan dmesg output. + scripts/leaking_addresses.pl --dont_walk_abs /proc --dont_walk_abs /sys + +Scans the running (64 bit) kernel for potential leaking addresses. + +EOM + exit($exitcode); +} + +GetOptions( + 'dont-walk=s' => \@dont_walk, + 'dont-parse=s' => \@dont_parse, + 'd|debug' => \$debug, + 'h|help' => \$help, + 'version' => \$help +) or help(1); + +help(0) if ($help); + +push_to_global(); + +parse_dmesg(); +walk(@DIRS); + +exit 0; + +sub debug_arrays +{ + print 'dirs_any: ' . join(", ", @skip_walk_dirs_any) . "\n"; + print 'dirs_abs: ' . join(", ", @skip_walk_dirs_abs) . "\n"; + print 'parse_any: ' . join(", ", @skip_parse_files_any) . "\n"; + print 'parse_abs: ' . join(", ", @skip_parse_files_abs) . "\n"; +} + +sub dprint +{ + printf(STDERR @_) if $debug; +} + +sub push_in_abs_any +{ + my ($in, $abs, $any) = @_; + + foreach my $path (@$in) { + if (File::Spec->file_name_is_absolute($path)) { + push @$abs, $path; + } elsif (index($path,'/') == -1) { + push @$any, $path; + } else { + print 'path error: ' . $path; + } + } +} + +# Push command line options to global arrays. +sub push_to_global +{ + push_in_abs_any(\@dont_walk, \@skip_walk_dirs_abs, \@skip_walk_dirs_any); + push_in_abs_any(\@dont_parse, \@skip_parse_files_abs, \@skip_parse_files_any); +} + +sub is_false_positive +{ + my ($match) = @_; + + if ($match =~ '\b(0x)?(f|F){16}\b' or + $match =~ '\b(0x)?0{16}\b') { + return 1; + } + + # vsyscall memory region, we should probably check against a range here. + if ($match =~ '\bf{10}600000\b' or + $match =~ '\bf{10}601000\b') { + return 1; + } + + return 0; +} + +# True if argument potentially contains a kernel address. +sub may_leak_address +{ + my ($line) = @_; + my $address = '\b(0x)?ffff[[:xdigit:]]{12}\b'; + + # Signal masks. + if ($line =~ '^SigBlk:' or + $line =~ '^SigCgt:') { + return 0; + } + + if ($line =~ '\bKEY=[[:xdigit:]]{14} [[:xdigit:]]{16} [[:xdigit:]]{16}\b' or + $line =~ '\b[[:xdigit:]]{14} [[:xdigit:]]{16} [[:xdigit:]]{16}\b') { + return 0; + } + + while (/($address)/g) { + if (!is_false_positive($1)) { + return 1; + } + } + + return 0; +} + +sub parse_dmesg +{ + open my $cmd, '-|', 'dmesg'; + while (<$cmd>) { + if (may_leak_address($_)) { + print 'dmesg: ' . $_; + } + } + close $cmd; +} + +# True if we should skip this path. +sub skip +{ + my ($path, $paths_abs, $paths_any) = @_; + + foreach (@$paths_abs) { + return 1 if (/^$path$/); + } + + my($filename, $dirs, $suffix) = fileparse($path); + foreach (@$paths_any) { + return 1 if (/^$filename$/); + } + + return 0; +} + +sub skip_parse +{ + my ($path) = @_; + return skip($path, \@skip_parse_files_abs, \@skip_parse_files_any); +} + +sub parse_file +{ + my ($file) = @_; + + if (! -R $file) { + return; + } + + if (skip_parse($file)) { + dprint "skipping file: $file\n"; + return; + } + dprint "parsing: $file\n"; + + open my $fh, "<", $file or return; + while ( <$fh> ) { + if (may_leak_address($_)) { + print $file . ': ' . $_; + } + } + close $fh; +} + + +# True if we should skip walking this directory. +sub skip_walk +{ + my ($path) = @_; + return skip($path, \@skip_walk_dirs_abs, \@skip_walk_dirs_any) +} + +# Recursively walk directory tree. +sub walk +{ + my @dirs = @_; + my %seen; + + while (my $pwd = shift @dirs) { + next if (skip_walk($pwd)); + next if (!opendir(DIR, $pwd)); + my @files = readdir(DIR); + closedir(DIR); + + foreach my $file (@files) { + next if ($file eq '.' or $file eq '..'); + + my $path = "$pwd/$file"; + next if (-l $path); + + if (-d $path) { + push @dirs, $path; + } else { + parse_file($path); + } + } + } +} diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c index 66fb9ede9447..7ca0032e7ba9 100644 --- a/security/apparmor/ipc.c +++ b/security/apparmor/ipc.c @@ -128,7 +128,7 @@ static inline int map_signal_num(int sig) return SIGUNKNOWN; else if (sig >= SIGRTMIN) return sig - SIGRTMIN + 128; /* rt sigs mapped to 128 */ - else if (sig <= MAXMAPPED_SIG) + else if (sig < MAXMAPPED_SIG) return sig_map[sig]; return SIGUNKNOWN; } @@ -163,7 +163,7 @@ static void audit_signal_cb(struct audit_buffer *ab, void *va) audit_signal_mask(ab, aad(sa)->denied); } } - if (aad(sa)->signal <= MAXMAPPED_SIG) + if (aad(sa)->signal < MAXMAPPED_SIG) audit_log_format(ab, " signal=%s", sig_names[aad(sa)->signal]); else audit_log_format(ab, " signal=rtmin+%d", diff --git a/sound/core/hrtimer.c b/sound/core/hrtimer.c index 1ac0c423903e..6e47b823bcaa 100644 --- a/sound/core/hrtimer.c +++ b/sound/core/hrtimer.c @@ -159,6 +159,7 @@ static int __init snd_hrtimer_init(void) timer->hw = hrtimer_hw; timer->hw.resolution = resolution; timer->hw.ticks = NANO_SEC / resolution; + timer->max_instances = 100; /* lower the limit */ err = snd_timer_global_register(timer); if (err < 0) { diff --git a/sound/core/seq/oss/seq_oss_midi.c b/sound/core/seq/oss/seq_oss_midi.c index aaff9ee32695..b30b2139e3f0 100644 --- a/sound/core/seq/oss/seq_oss_midi.c +++ b/sound/core/seq/oss/seq_oss_midi.c @@ -612,9 +612,7 @@ send_midi_event(struct seq_oss_devinfo *dp, struct snd_seq_event *ev, struct seq if (!dp->timer->running) len = snd_seq_oss_timer_start(dp->timer); if (ev->type == SNDRV_SEQ_EVENT_SYSEX) { - if ((ev->flags & SNDRV_SEQ_EVENT_LENGTH_MASK) == SNDRV_SEQ_EVENT_LENGTH_VARIABLE) - snd_seq_oss_readq_puts(dp->readq, mdev->seq_device, - ev->data.ext.ptr, ev->data.ext.len); + snd_seq_oss_readq_sysex(dp->readq, mdev->seq_device, ev); } else { len = snd_midi_event_decode(mdev->coder, msg, sizeof(msg), ev); if (len > 0) diff --git a/sound/core/seq/oss/seq_oss_readq.c b/sound/core/seq/oss/seq_oss_readq.c index 046cb586fb2f..06b21226b4e7 100644 --- a/sound/core/seq/oss/seq_oss_readq.c +++ b/sound/core/seq/oss/seq_oss_readq.c @@ -118,6 +118,35 @@ snd_seq_oss_readq_puts(struct seq_oss_readq *q, int dev, unsigned char *data, in } /* + * put MIDI sysex bytes; the event buffer may be chained, thus it has + * to be expanded via snd_seq_dump_var_event(). + */ +struct readq_sysex_ctx { + struct seq_oss_readq *readq; + int dev; +}; + +static int readq_dump_sysex(void *ptr, void *buf, int count) +{ + struct readq_sysex_ctx *ctx = ptr; + + return snd_seq_oss_readq_puts(ctx->readq, ctx->dev, buf, count); +} + +int snd_seq_oss_readq_sysex(struct seq_oss_readq *q, int dev, + struct snd_seq_event *ev) +{ + struct readq_sysex_ctx ctx = { + .readq = q, + .dev = dev + }; + + if ((ev->flags & SNDRV_SEQ_EVENT_LENGTH_MASK) != SNDRV_SEQ_EVENT_LENGTH_VARIABLE) + return 0; + return snd_seq_dump_var_event(ev, readq_dump_sysex, &ctx); +} + +/* * copy an event to input queue: * return zero if enqueued */ diff --git a/sound/core/seq/oss/seq_oss_readq.h b/sound/core/seq/oss/seq_oss_readq.h index f1463f1f449e..8d033ca2d23f 100644 --- a/sound/core/seq/oss/seq_oss_readq.h +++ b/sound/core/seq/oss/seq_oss_readq.h @@ -44,6 +44,8 @@ void snd_seq_oss_readq_delete(struct seq_oss_readq *q); void snd_seq_oss_readq_clear(struct seq_oss_readq *readq); unsigned int snd_seq_oss_readq_poll(struct seq_oss_readq *readq, struct file *file, poll_table *wait); int snd_seq_oss_readq_puts(struct seq_oss_readq *readq, int dev, unsigned char *data, int len); +int snd_seq_oss_readq_sysex(struct seq_oss_readq *q, int dev, + struct snd_seq_event *ev); int snd_seq_oss_readq_put_event(struct seq_oss_readq *readq, union evrec *ev); int snd_seq_oss_readq_put_timestamp(struct seq_oss_readq *readq, unsigned long curt, int seq_mode); int snd_seq_oss_readq_pick(struct seq_oss_readq *q, union evrec *rec); diff --git a/sound/core/timer.c b/sound/core/timer.c index 6cdd04a45962..15e82a656d96 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -180,7 +180,7 @@ static void snd_timer_request(struct snd_timer_id *tid) * * call this with register_mutex down. */ -static void snd_timer_check_slave(struct snd_timer_instance *slave) +static int snd_timer_check_slave(struct snd_timer_instance *slave) { struct snd_timer *timer; struct snd_timer_instance *master; @@ -190,16 +190,21 @@ static void snd_timer_check_slave(struct snd_timer_instance *slave) list_for_each_entry(master, &timer->open_list_head, open_list) { if (slave->slave_class == master->slave_class && slave->slave_id == master->slave_id) { + if (master->timer->num_instances >= + master->timer->max_instances) + return -EBUSY; list_move_tail(&slave->open_list, &master->slave_list_head); + master->timer->num_instances++; spin_lock_irq(&slave_active_lock); slave->master = master; slave->timer = master->timer; spin_unlock_irq(&slave_active_lock); - return; + return 0; } } } + return 0; } /* @@ -208,7 +213,7 @@ static void snd_timer_check_slave(struct snd_timer_instance *slave) * * call this with register_mutex down. */ -static void snd_timer_check_master(struct snd_timer_instance *master) +static int snd_timer_check_master(struct snd_timer_instance *master) { struct snd_timer_instance *slave, *tmp; @@ -216,7 +221,11 @@ static void snd_timer_check_master(struct snd_timer_instance *master) list_for_each_entry_safe(slave, tmp, &snd_timer_slave_list, open_list) { if (slave->slave_class == master->slave_class && slave->slave_id == master->slave_id) { + if (master->timer->num_instances >= + master->timer->max_instances) + return -EBUSY; list_move_tail(&slave->open_list, &master->slave_list_head); + master->timer->num_instances++; spin_lock_irq(&slave_active_lock); spin_lock(&master->timer->lock); slave->master = master; @@ -228,8 +237,11 @@ static void snd_timer_check_master(struct snd_timer_instance *master) spin_unlock_irq(&slave_active_lock); } } + return 0; } +static int snd_timer_close_locked(struct snd_timer_instance *timeri); + /* * open a timer instance * when opening a master, the slave id must be here given. @@ -240,6 +252,7 @@ int snd_timer_open(struct snd_timer_instance **ti, { struct snd_timer *timer; struct snd_timer_instance *timeri = NULL; + int err; if (tid->dev_class == SNDRV_TIMER_CLASS_SLAVE) { /* open a slave instance */ @@ -259,10 +272,14 @@ int snd_timer_open(struct snd_timer_instance **ti, timeri->slave_id = tid->device; timeri->flags |= SNDRV_TIMER_IFLG_SLAVE; list_add_tail(&timeri->open_list, &snd_timer_slave_list); - snd_timer_check_slave(timeri); + err = snd_timer_check_slave(timeri); + if (err < 0) { + snd_timer_close_locked(timeri); + timeri = NULL; + } mutex_unlock(®ister_mutex); *ti = timeri; - return 0; + return err; } /* open a master instance */ @@ -288,6 +305,10 @@ int snd_timer_open(struct snd_timer_instance **ti, return -EBUSY; } } + if (timer->num_instances >= timer->max_instances) { + mutex_unlock(®ister_mutex); + return -EBUSY; + } timeri = snd_timer_instance_new(owner, timer); if (!timeri) { mutex_unlock(®ister_mutex); @@ -314,25 +335,27 @@ int snd_timer_open(struct snd_timer_instance **ti, } list_add_tail(&timeri->open_list, &timer->open_list_head); - snd_timer_check_master(timeri); + timer->num_instances++; + err = snd_timer_check_master(timeri); + if (err < 0) { + snd_timer_close_locked(timeri); + timeri = NULL; + } mutex_unlock(®ister_mutex); *ti = timeri; - return 0; + return err; } EXPORT_SYMBOL(snd_timer_open); /* * close a timer instance + * call this with register_mutex down. */ -int snd_timer_close(struct snd_timer_instance *timeri) +static int snd_timer_close_locked(struct snd_timer_instance *timeri) { struct snd_timer *timer = NULL; struct snd_timer_instance *slave, *tmp; - if (snd_BUG_ON(!timeri)) - return -ENXIO; - - mutex_lock(®ister_mutex); list_del(&timeri->open_list); /* force to stop the timer */ @@ -340,6 +363,7 @@ int snd_timer_close(struct snd_timer_instance *timeri) timer = timeri->timer; if (timer) { + timer->num_instances--; /* wait, until the active callback is finished */ spin_lock_irq(&timer->lock); while (timeri->flags & SNDRV_TIMER_IFLG_CALLBACK) { @@ -355,6 +379,7 @@ int snd_timer_close(struct snd_timer_instance *timeri) list_for_each_entry_safe(slave, tmp, &timeri->slave_list_head, open_list) { list_move_tail(&slave->open_list, &snd_timer_slave_list); + timer->num_instances--; slave->master = NULL; slave->timer = NULL; list_del_init(&slave->ack_list); @@ -382,9 +407,24 @@ int snd_timer_close(struct snd_timer_instance *timeri) module_put(timer->module); } - mutex_unlock(®ister_mutex); return 0; } + +/* + * close a timer instance + */ +int snd_timer_close(struct snd_timer_instance *timeri) +{ + int err; + + if (snd_BUG_ON(!timeri)) + return -ENXIO; + + mutex_lock(®ister_mutex); + err = snd_timer_close_locked(timeri); + mutex_unlock(®ister_mutex); + return err; +} EXPORT_SYMBOL(snd_timer_close); unsigned long snd_timer_resolution(struct snd_timer_instance *timeri) @@ -856,6 +896,7 @@ int snd_timer_new(struct snd_card *card, char *id, struct snd_timer_id *tid, spin_lock_init(&timer->lock); tasklet_init(&timer->task_queue, snd_timer_tasklet, (unsigned long)timer); + timer->max_instances = 1000; /* default limit per timer */ if (card != NULL) { timer->module = card->module; err = snd_device_new(card, SNDRV_DEV_TIMER, timer, &ops); diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 546d515f3c1f..dce0682c5001 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6544,6 +6544,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x14, 0x90170110}, {0x1b, 0x90a70130}, {0x21, 0x03211020}), + SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0xb7a60130}, + {0x13, 0xb8a61140}, + {0x16, 0x90170110}, + {0x21, 0x04211020}), SND_HDA_PIN_QUIRK(0x10ec0280, 0x103c, "HP", ALC280_FIXUP_HP_GPIO4, {0x12, 0x90a60130}, {0x14, 0x90170110}, diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 4f5f18f22974..20624320b753 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1375,6 +1375,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, case 0x199: return SNDRV_PCM_FMTBIT_DSD_U32_LE; case 0x19b: + case 0x203: return SNDRV_PCM_FMTBIT_DSD_U32_BE; default: break; diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index c1a6d5d0da0d..c10c9128f54e 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -1,4 +1,3 @@ -/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_DISABLED_FEATURES_H #define _ASM_X86_DISABLED_FEATURES_H diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h index 59ac6baafb6a..d91ba04dd007 100644 --- a/tools/arch/x86/include/asm/required-features.h +++ b/tools/arch/x86/include/asm/required-features.h @@ -1,4 +1,3 @@ -/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_REQUIRED_FEATURES_H #define _ASM_X86_REQUIRED_FEATURES_H diff --git a/tools/arch/x86/include/uapi/asm/unistd.h b/tools/arch/x86/include/uapi/asm/unistd.h index a26df0d75cd0..30d7d04d72d6 100644 --- a/tools/arch/x86/include/uapi/asm/unistd.h +++ b/tools/arch/x86/include/uapi/asm/unistd.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _UAPI_ASM_X86_UNISTD_H #define _UAPI_ASM_X86_UNISTD_H diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index ecf2c2067281..9a53a06e5a3e 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -1,10 +1,10 @@ -/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright 2002 Andi Kleen */ #include <linux/linkage.h> #include <asm/errno.h> #include <asm/cpufeatures.h> #include <asm/alternative-asm.h> +#include <asm/export.h> /* * We build a jump to memcpy_orig by default which gets NOPped out on @@ -41,6 +41,8 @@ ENTRY(memcpy) ret ENDPROC(memcpy) ENDPROC(__memcpy) +EXPORT_SYMBOL(memcpy) +EXPORT_SYMBOL(__memcpy) /* * memcpy_erms() - enhanced fast string memcpy. This is faster and @@ -275,6 +277,7 @@ ENTRY(memcpy_mcsafe_unrolled) xorq %rax, %rax ret ENDPROC(memcpy_mcsafe_unrolled) +EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled) .section .fixup, "ax" /* Return -EFAULT for any failure */ diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index abb9ee940b15..9f51a268eb06 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -12,7 +12,7 @@ SYNOPSIS **bpftool** [*OPTIONS*] **map** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } *COMMANDS* := { **show** | **dump** | **update** | **lookup** | **getnext** | **delete** @@ -86,6 +86,9 @@ OPTIONS -p, --pretty Generate human-readable JSON output. Implies **-j**. + -f, --bpffs + Show file names of pinned maps. + EXAMPLES ======== **# bpftool map show** diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 0f25d3c39e05..36e8d1c3c40d 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -12,7 +12,7 @@ SYNOPSIS **bpftool** [*OPTIONS*] **prog** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } *COMMANDS* := { **show** | **dump xlated** | **dump jited** | **pin** | **help** } @@ -75,6 +75,9 @@ OPTIONS -p, --pretty Generate human-readable JSON output. Implies **-j**. + -f, --bpffs + Show file names of pinned programs. + EXAMPLES ======== **# bpftool prog show** diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index aa7017098b2a..2bd3b280e6dd 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -34,7 +34,9 @@ /* Author: Jakub Kicinski <kubakici@wp.pl> */ #include <errno.h> +#include <fts.h> #include <libgen.h> +#include <mntent.h> #include <stdbool.h> #include <stdio.h> #include <stdlib.h> @@ -122,9 +124,8 @@ static int mnt_bpffs(const char *target, char *buff, size_t bufflen) return 0; } -int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type) +int open_obj_pinned(char *path) { - enum bpf_obj_type type; int fd; fd = bpf_obj_get(path); @@ -136,6 +137,18 @@ int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type) return -1; } + return fd; +} + +int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type) +{ + enum bpf_obj_type type; + int fd; + + fd = open_obj_pinned(path); + if (fd < 0) + return -1; + type = get_fd_type(fd); if (type < 0) { close(fd); @@ -310,3 +323,83 @@ void print_hex_data_json(uint8_t *data, size_t len) jsonw_printf(json_wtr, "\"0x%02hhx\"", data[i]); jsonw_end_array(json_wtr); } + +int build_pinned_obj_table(struct pinned_obj_table *tab, + enum bpf_obj_type type) +{ + struct bpf_prog_info pinned_info = {}; + struct pinned_obj *obj_node = NULL; + __u32 len = sizeof(pinned_info); + struct mntent *mntent = NULL; + enum bpf_obj_type objtype; + FILE *mntfile = NULL; + FTSENT *ftse = NULL; + FTS *fts = NULL; + int fd, err; + + mntfile = setmntent("/proc/mounts", "r"); + if (!mntfile) + return -1; + + while ((mntent = getmntent(mntfile))) { + char *path[] = { mntent->mnt_dir, NULL }; + + if (strncmp(mntent->mnt_type, "bpf", 3) != 0) + continue; + + fts = fts_open(path, 0, NULL); + if (!fts) + continue; + + while ((ftse = fts_read(fts))) { + if (!(ftse->fts_info & FTS_F)) + continue; + fd = open_obj_pinned(ftse->fts_path); + if (fd < 0) + continue; + + objtype = get_fd_type(fd); + if (objtype != type) { + close(fd); + continue; + } + memset(&pinned_info, 0, sizeof(pinned_info)); + err = bpf_obj_get_info_by_fd(fd, &pinned_info, &len); + if (err) { + close(fd); + continue; + } + + obj_node = malloc(sizeof(*obj_node)); + if (!obj_node) { + close(fd); + fts_close(fts); + fclose(mntfile); + return -1; + } + + memset(obj_node, 0, sizeof(*obj_node)); + obj_node->id = pinned_info.id; + obj_node->path = strdup(ftse->fts_path); + hash_add(tab->table, &obj_node->hash, obj_node->id); + + close(fd); + } + fts_close(fts); + } + fclose(mntfile); + return 0; +} + +void delete_pinned_obj_table(struct pinned_obj_table *tab) +{ + struct pinned_obj *obj; + struct hlist_node *tmp; + unsigned int bkt; + + hash_for_each_safe(tab->table, bkt, tmp, obj, hash) { + hash_del(&obj->hash); + free(obj->path); + free(obj); + } +} diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 78d9afb74ef4..d6e4762170a4 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -54,6 +54,9 @@ static int (*last_do_help)(int argc, char **argv); json_writer_t *json_wtr; bool pretty_output; bool json_output; +bool show_pinned; +struct pinned_obj_table prog_table; +struct pinned_obj_table map_table; void usage(void) { @@ -263,6 +266,7 @@ int main(int argc, char **argv) { "help", no_argument, NULL, 'h' }, { "pretty", no_argument, NULL, 'p' }, { "version", no_argument, NULL, 'V' }, + { "bpffs", no_argument, NULL, 'f' }, { 0 } }; int opt, ret; @@ -270,9 +274,13 @@ int main(int argc, char **argv) last_do_help = do_help; pretty_output = false; json_output = false; + show_pinned = false; bin_name = argv[0]; - while ((opt = getopt_long(argc, argv, "Vhpj", + hash_init(prog_table.table); + hash_init(map_table.table); + + while ((opt = getopt_long(argc, argv, "Vhpjf", options, NULL)) >= 0) { switch (opt) { case 'V': @@ -285,6 +293,9 @@ int main(int argc, char **argv) case 'j': json_output = true; break; + case 'f': + show_pinned = true; + break; default: usage(); } @@ -311,5 +322,10 @@ int main(int argc, char **argv) if (json_output) jsonw_destroy(&json_wtr); + if (show_pinned) { + delete_pinned_obj_table(&prog_table); + delete_pinned_obj_table(&map_table); + } + return ret; } diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index ff5ad05b137b..9c191e222d6f 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -42,6 +42,7 @@ #include <stdio.h> #include <linux/bpf.h> #include <linux/kernel.h> +#include <linux/hashtable.h> #include "json_writer.h" @@ -58,7 +59,7 @@ #define HELP_SPEC_PROGRAM \ "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG }" #define HELP_SPEC_OPTIONS \ - "OPTIONS := { {-j|--json} [{-p|--pretty}] }" + "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} }" enum bpf_obj_type { BPF_OBJ_UNKNOWN, @@ -70,6 +71,9 @@ extern const char *bin_name; extern json_writer_t *json_wtr; extern bool json_output; +extern bool show_pinned; +extern struct pinned_obj_table prog_table; +extern struct pinned_obj_table map_table; void p_err(const char *fmt, ...); void p_info(const char *fmt, ...); @@ -78,6 +82,20 @@ bool is_prefix(const char *pfx, const char *str); void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep); void usage(void) __attribute__((noreturn)); +struct pinned_obj_table { + DECLARE_HASHTABLE(table, 16); +}; + +struct pinned_obj { + __u32 id; + char *path; + struct hlist_node hash; +}; + +int build_pinned_obj_table(struct pinned_obj_table *table, + enum bpf_obj_type type); +void delete_pinned_obj_table(struct pinned_obj_table *tab); + struct cmd { const char *cmd; int (*func)(int argc, char **argv); @@ -89,6 +107,7 @@ int cmd_select(const struct cmd *cmds, int argc, char **argv, int get_fd_type(int fd); const char *get_fd_type_name(enum bpf_obj_type type); char *get_fdinfo(int fd, const char *key); +int open_obj_pinned(char *path); int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type); int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)); diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index e978ab23a77f..e2450c8e88e6 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -436,6 +436,18 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) jsonw_int_field(json_wtr, "bytes_memlock", atoi(memlock)); free(memlock); + if (!hash_empty(map_table.table)) { + struct pinned_obj *obj; + + jsonw_name(json_wtr, "pinned"); + jsonw_start_array(json_wtr); + hash_for_each_possible(map_table.table, obj, hash, info->id) { + if (obj->id == info->id) + jsonw_string(json_wtr, obj->path); + } + jsonw_end_array(json_wtr); + } + jsonw_end_object(json_wtr); return 0; @@ -466,7 +478,14 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) free(memlock); printf("\n"); + if (!hash_empty(map_table.table)) { + struct pinned_obj *obj; + hash_for_each_possible(map_table.table, obj, hash, info->id) { + if (obj->id == info->id) + printf("\tpinned %s\n", obj->path); + } + } return 0; } @@ -478,6 +497,9 @@ static int do_show(int argc, char **argv) int err; int fd; + if (show_pinned) + build_pinned_obj_table(&map_table, BPF_OBJ_MAP); + if (argc == 2) { fd = map_parse_fd_and_info(&argc, &argv, &info, &len); if (fd < 0) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index d3ab808dc882..f45c44ef9bec 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -272,6 +272,18 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) if (info->nr_map_ids) show_prog_maps(fd, info->nr_map_ids); + if (!hash_empty(prog_table.table)) { + struct pinned_obj *obj; + + jsonw_name(json_wtr, "pinned"); + jsonw_start_array(json_wtr); + hash_for_each_possible(prog_table.table, obj, hash, info->id) { + if (obj->id == info->id) + jsonw_string(json_wtr, obj->path); + } + jsonw_end_array(json_wtr); + } + jsonw_end_object(json_wtr); } @@ -331,6 +343,16 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) if (info->nr_map_ids) show_prog_maps(fd, info->nr_map_ids); + if (!hash_empty(prog_table.table)) { + struct pinned_obj *obj; + + printf("\n"); + hash_for_each_possible(prog_table.table, obj, hash, info->id) { + if (obj->id == info->id) + printf("\tpinned %s\n", obj->path); + } + } + printf("\n"); } @@ -360,6 +382,9 @@ static int do_show(int argc, char **argv) int err; int fd; + if (show_pinned) + build_pinned_obj_table(&prog_table, BPF_OBJ_PROG); + if (argc == 2) { fd = prog_parse_fd(&argc, &argv); if (fd < 0) diff --git a/tools/include/asm-generic/bitops/__fls.h b/tools/include/asm-generic/bitops/__fls.h index a60a7ccb6782..03f721a8a2b1 100644 --- a/tools/include/asm-generic/bitops/__fls.h +++ b/tools/include/asm-generic/bitops/__fls.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_GENERIC_BITOPS___FLS_H_ #define _ASM_GENERIC_BITOPS___FLS_H_ diff --git a/tools/include/asm-generic/bitops/arch_hweight.h b/tools/include/asm-generic/bitops/arch_hweight.h index 6a211f40665c..c2705e1d220d 100644 --- a/tools/include/asm-generic/bitops/arch_hweight.h +++ b/tools/include/asm-generic/bitops/arch_hweight.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_GENERIC_BITOPS_ARCH_HWEIGHT_H_ #define _ASM_GENERIC_BITOPS_ARCH_HWEIGHT_H_ diff --git a/tools/include/asm-generic/bitops/const_hweight.h b/tools/include/asm-generic/bitops/const_hweight.h index 0a7e06623470..149faeeeeaf2 100644 --- a/tools/include/asm-generic/bitops/const_hweight.h +++ b/tools/include/asm-generic/bitops/const_hweight.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_ #define _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_ diff --git a/tools/include/asm-generic/bitops/fls.h b/tools/include/asm-generic/bitops/fls.h index 0576d1f42f43..753aecaab641 100644 --- a/tools/include/asm-generic/bitops/fls.h +++ b/tools/include/asm-generic/bitops/fls.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_GENERIC_BITOPS_FLS_H_ #define _ASM_GENERIC_BITOPS_FLS_H_ diff --git a/tools/include/asm-generic/bitops/fls64.h b/tools/include/asm-generic/bitops/fls64.h index b097cf8444e3..866f2b2304ff 100644 --- a/tools/include/asm-generic/bitops/fls64.h +++ b/tools/include/asm-generic/bitops/fls64.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_GENERIC_BITOPS_FLS64_H_ #define _ASM_GENERIC_BITOPS_FLS64_H_ diff --git a/tools/include/asm/export.h b/tools/include/asm/export.h new file mode 100644 index 000000000000..2cb1a0d83035 --- /dev/null +++ b/tools/include/asm/export.h @@ -0,0 +1,7 @@ +#ifndef _TOOLS_ASM_EXPORT_H +#define _TOOLS_ASM_EXPORT_H + +#define EXPORT_SYMBOL(x) +#define EXPORT_SYMBOL_GPL(x) + +#endif /* _TOOLS_ASM_EXPORT_H */ diff --git a/tools/include/linux/hash.h b/tools/include/linux/hash.h index 2c4183bbc504..ad6fa21d977b 100644 --- a/tools/include/linux/hash.h +++ b/tools/include/linux/hash.h @@ -1,4 +1,3 @@ -/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_HASH_H #define _LINUX_HASH_H /* Fast hashing routine for ints, longs and pointers. diff --git a/tools/include/uapi/asm-generic/ioctls.h b/tools/include/uapi/asm-generic/ioctls.h index 14baf9f23a14..040651735662 100644 --- a/tools/include/uapi/asm-generic/ioctls.h +++ b/tools/include/uapi/asm-generic/ioctls.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef __ASM_GENERIC_IOCTLS_H #define __ASM_GENERIC_IOCTLS_H diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index 203268f9231e..6d319c46fd90 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef __ASM_GENERIC_MMAN_COMMON_H #define __ASM_GENERIC_MMAN_COMMON_H diff --git a/tools/include/uapi/asm-generic/mman.h b/tools/include/uapi/asm-generic/mman.h index f7c7b4355e56..2dffcbf705b3 100644 --- a/tools/include/uapi/asm-generic/mman.h +++ b/tools/include/uapi/asm-generic/mman.h @@ -1,8 +1,8 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef __ASM_GENERIC_MMAN_H #define __ASM_GENERIC_MMAN_H -#include <uapi/asm-generic/mman-common.h> +#include <asm-generic/mman-common.h> #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index b280f37cd057..e880ae6434ee 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com * * This program is free software; you can redistribute it and/or diff --git a/tools/include/uapi/linux/bpf_common.h b/tools/include/uapi/linux/bpf_common.h index 64ba734aba80..18be90725ab0 100644 --- a/tools/include/uapi/linux/bpf_common.h +++ b/tools/include/uapi/linux/bpf_common.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _UAPI__LINUX_BPF_COMMON_H__ #define _UAPI__LINUX_BPF_COMMON_H__ diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h index b6705247afe8..6448cdd9a350 100644 --- a/tools/include/uapi/linux/fcntl.h +++ b/tools/include/uapi/linux/fcntl.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _UAPI_LINUX_FCNTL_H #define _UAPI_LINUX_FCNTL_H diff --git a/tools/include/uapi/linux/hw_breakpoint.h b/tools/include/uapi/linux/hw_breakpoint.h index 6394ea9d5524..965e4d8606d8 100644 --- a/tools/include/uapi/linux/hw_breakpoint.h +++ b/tools/include/uapi/linux/hw_breakpoint.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _UAPI_LINUX_HW_BREAKPOINT_H #define _UAPI_LINUX_HW_BREAKPOINT_H diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 838887587411..7e99999d6236 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef __LINUX_KVM_H #define __LINUX_KVM_H diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h index 64d2b4e556e5..bfd5938fede6 100644 --- a/tools/include/uapi/linux/mman.h +++ b/tools/include/uapi/linux/mman.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _UAPI_LINUX_MMAN_H #define _UAPI_LINUX_MMAN_H diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 140ae638cfd6..362493a2f950 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * Performance events: * diff --git a/tools/include/uapi/linux/sched.h b/tools/include/uapi/linux/sched.h index e2a6c7b3510b..30a9e51bbb1e 100644 --- a/tools/include/uapi/linux/sched.h +++ b/tools/include/uapi/linux/sched.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _UAPI_LINUX_SCHED_H #define _UAPI_LINUX_SCHED_H diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index 9eac599afd91..7b35e98d3c58 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _UAPI_LINUX_STAT_H #define _UAPI_LINUX_STAT_H diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h index 60180c0b5dc6..c51f8e5cc608 100644 --- a/tools/include/uapi/linux/vhost.h +++ b/tools/include/uapi/linux/vhost.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _LINUX_VHOST_H #define _LINUX_VHOST_H /* Userspace interface for in-kernel virtio accelerators. */ diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h index 87bf30b182df..c227ccba60ae 100644 --- a/tools/include/uapi/sound/asound.h +++ b/tools/include/uapi/sound/asound.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ /* * Advanced Linux Sound Architecture - ALSA - Driver * Copyright (c) 1994-2003 by Jaroslav Kysela <perex@perex.cz>, diff --git a/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk b/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk index a3d2c62fd805..b02a36b2c14f 100644 --- a/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk +++ b/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk @@ -1,4 +1,5 @@ #!/bin/awk -f +# SPDX-License-Identifier: GPL-2.0 # gen-insn-attr-x86.awk: Instruction attribute table generator # Written by Masami Hiramatsu <mhiramat@redhat.com> # diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 4397a8b6e6cd..aa66791b1bfc 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -606,9 +606,19 @@ static struct arch *arch__find(const char *name) int symbol__alloc_hist(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); - const size_t size = symbol__size(sym); + size_t size = symbol__size(sym); size_t sizeof_sym_hist; + /* + * Add buffer of one element for zero length symbol. + * When sample is taken from first instruction of + * zero length symbol, perf still resolves it and + * shows symbol name in perf report and allows to + * annotate it. + */ + if (size == 0) + size = 1; + /* Check for overflow when calculating sizeof_sym_hist */ if (size > (SIZE_MAX - sizeof(struct sym_hist)) / sizeof(struct sym_hist_entry)) return -1; diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index dcfdafdc2f1c..6680e4fb7967 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -154,6 +154,10 @@ do { \ yycolumn += yyleng; \ } while (0); +#define USER_REJECT \ + yycolumn -= yyleng; \ + REJECT + %} %x mem @@ -335,8 +339,8 @@ r{num_raw_hex} { return raw(yyscanner); } {num_hex} { return value(yyscanner, 16); } {modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); } -{bpf_object} { if (!isbpf(yyscanner)) REJECT; return str(yyscanner, PE_BPF_OBJECT); } -{bpf_source} { if (!isbpf(yyscanner)) REJECT; return str(yyscanner, PE_BPF_SOURCE); } +{bpf_object} { if (!isbpf(yyscanner)) USER_REJECT; return str(yyscanner, PE_BPF_OBJECT); } +{bpf_source} { if (!isbpf(yyscanner)) USER_REJECT; return str(yyscanner, PE_BPF_SOURCE); } {name} { return pmu_str_check(yyscanner); } "/" { BEGIN(config); return '/'; } - { return '-'; } diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index b9f68e4add71..95cba0799828 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1326,21 +1326,12 @@ static void teardown_hyp_mode(void) { int cpu; - if (is_kernel_in_hyp_mode()) - return; - free_hyp_pgds(); for_each_possible_cpu(cpu) free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); hyp_cpu_pm_exit(); } -static int init_vhe_mode(void) -{ - kvm_info("VHE mode initialized successfully\n"); - return 0; -} - /** * Inits Hyp-mode on all online CPUs */ @@ -1421,8 +1412,6 @@ static int init_hyp_mode(void) } } - kvm_info("Hyp mode initialized successfully\n"); - return 0; out_err: @@ -1456,6 +1445,7 @@ int kvm_arch_init(void *opaque) { int err; int ret, cpu; + bool in_hyp_mode; if (!is_hyp_mode_available()) { kvm_err("HYP mode not available\n"); @@ -1474,21 +1464,28 @@ int kvm_arch_init(void *opaque) if (err) return err; - if (is_kernel_in_hyp_mode()) - err = init_vhe_mode(); - else + in_hyp_mode = is_kernel_in_hyp_mode(); + + if (!in_hyp_mode) { err = init_hyp_mode(); - if (err) - goto out_err; + if (err) + goto out_err; + } err = init_subsystems(); if (err) goto out_hyp; + if (in_hyp_mode) + kvm_info("VHE mode initialized successfully\n"); + else + kvm_info("Hyp mode initialized successfully\n"); + return 0; out_hyp: - teardown_hyp_mode(); + if (!in_hyp_mode) + teardown_hyp_mode(); out_err: teardown_common_resources(); return err; diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index f51c1e1b3f70..547f12dc4d54 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -1466,6 +1466,16 @@ static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its, { mutex_lock(&its->cmd_lock); + /* + * It is UNPREDICTABLE to enable the ITS if any of the CBASER or + * device/collection BASER are invalid + */ + if (!its->enabled && (val & GITS_CTLR_ENABLE) && + (!(its->baser_device_table & GITS_BASER_VALID) || + !(its->baser_coll_table & GITS_BASER_VALID) || + !(its->cbaser & GITS_CBASER_VALID))) + goto out; + its->enabled = !!(val & GITS_CTLR_ENABLE); /* @@ -1474,6 +1484,7 @@ static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its, */ vgic_its_process_commands(kvm, its); +out: mutex_unlock(&its->cmd_lock); } @@ -1801,37 +1812,33 @@ typedef int (*entry_fn_t)(struct vgic_its *its, u32 id, void *entry, static int scan_its_table(struct vgic_its *its, gpa_t base, int size, int esz, int start_id, entry_fn_t fn, void *opaque) { - void *entry = kzalloc(esz, GFP_KERNEL); struct kvm *kvm = its->dev->kvm; unsigned long len = size; int id = start_id; gpa_t gpa = base; + char entry[esz]; int ret; + memset(entry, 0, esz); + while (len > 0) { int next_offset; size_t byte_offset; ret = kvm_read_guest(kvm, gpa, entry, esz); if (ret) - goto out; + return ret; next_offset = fn(its, id, entry, opaque); - if (next_offset <= 0) { - ret = next_offset; - goto out; - } + if (next_offset <= 0) + return next_offset; byte_offset = next_offset * esz; id += next_offset; gpa += byte_offset; len -= byte_offset; } - ret = 1; - -out: - kfree(entry); - return ret; + return 1; } /** @@ -1940,6 +1947,14 @@ static int vgic_its_save_itt(struct vgic_its *its, struct its_device *device) return 0; } +/** + * vgic_its_restore_itt - restore the ITT of a device + * + * @its: its handle + * @dev: device handle + * + * Return 0 on success, < 0 on error + */ static int vgic_its_restore_itt(struct vgic_its *its, struct its_device *dev) { const struct vgic_its_abi *abi = vgic_its_get_abi(its); @@ -1951,6 +1966,10 @@ static int vgic_its_restore_itt(struct vgic_its *its, struct its_device *dev) ret = scan_its_table(its, base, max_size, ite_esz, 0, vgic_its_restore_ite, dev); + /* scan_its_table returns +1 if all ITEs are invalid */ + if (ret > 0) + ret = 0; + return ret; } @@ -2048,11 +2067,12 @@ static int vgic_its_device_cmp(void *priv, struct list_head *a, static int vgic_its_save_device_tables(struct vgic_its *its) { const struct vgic_its_abi *abi = vgic_its_get_abi(its); + u64 baser = its->baser_device_table; struct its_device *dev; int dte_esz = abi->dte_esz; - u64 baser; - baser = its->baser_device_table; + if (!(baser & GITS_BASER_VALID)) + return 0; list_sort(NULL, &its->device_list, vgic_its_device_cmp); @@ -2107,10 +2127,7 @@ static int handle_l1_dte(struct vgic_its *its, u32 id, void *addr, ret = scan_its_table(its, gpa, SZ_64K, dte_esz, l2_start_id, vgic_its_restore_dte, NULL); - if (ret <= 0) - return ret; - - return 1; + return ret; } /** @@ -2140,8 +2157,9 @@ static int vgic_its_restore_device_tables(struct vgic_its *its) vgic_its_restore_dte, NULL); } + /* scan_its_table returns +1 if all entries are invalid */ if (ret > 0) - ret = -EINVAL; + ret = 0; return ret; } @@ -2198,17 +2216,17 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) static int vgic_its_save_collection_table(struct vgic_its *its) { const struct vgic_its_abi *abi = vgic_its_get_abi(its); + u64 baser = its->baser_coll_table; + gpa_t gpa = BASER_ADDRESS(baser); struct its_collection *collection; u64 val; - gpa_t gpa; size_t max_size, filled = 0; int ret, cte_esz = abi->cte_esz; - gpa = BASER_ADDRESS(its->baser_coll_table); - if (!gpa) + if (!(baser & GITS_BASER_VALID)) return 0; - max_size = GITS_BASER_NR_PAGES(its->baser_coll_table) * SZ_64K; + max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; list_for_each_entry(collection, &its->collection_list, coll_list) { ret = vgic_its_save_cte(its, collection, gpa, cte_esz); @@ -2239,17 +2257,18 @@ static int vgic_its_save_collection_table(struct vgic_its *its) static int vgic_its_restore_collection_table(struct vgic_its *its) { const struct vgic_its_abi *abi = vgic_its_get_abi(its); + u64 baser = its->baser_coll_table; int cte_esz = abi->cte_esz; size_t max_size, read = 0; gpa_t gpa; int ret; - if (!(its->baser_coll_table & GITS_BASER_VALID)) + if (!(baser & GITS_BASER_VALID)) return 0; - gpa = BASER_ADDRESS(its->baser_coll_table); + gpa = BASER_ADDRESS(baser); - max_size = GITS_BASER_NR_PAGES(its->baser_coll_table) * SZ_64K; + max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; while (read < max_size) { ret = vgic_its_restore_cte(its, gpa, cte_esz); @@ -2258,6 +2277,10 @@ static int vgic_its_restore_collection_table(struct vgic_its *its) gpa += cte_esz; read += cte_esz; } + + if (ret > 0) + return 0; + return ret; } |