diff options
932 files changed, 16234 insertions, 6803 deletions
diff --git a/Documentation/ABI/stable/sysfs-driver-mlxreg-io b/Documentation/ABI/stable/sysfs-driver-mlxreg-io index 9b642669cb16..169fe08a649b 100644 --- a/Documentation/ABI/stable/sysfs-driver-mlxreg-io +++ b/Documentation/ABI/stable/sysfs-driver-mlxreg-io @@ -24,7 +24,7 @@ What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/ cpld3_version Date: November 2018 -KernelVersion: 4.21 +KernelVersion: 5.0 Contact: Vadim Pasternak <vadimpmellanox.com> Description: These files show with which CPLD versions have been burned on LED board. @@ -35,7 +35,7 @@ What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/ jtag_enable Date: November 2018 -KernelVersion: 4.21 +KernelVersion: 5.0 Contact: Vadim Pasternak <vadimpmellanox.com> Description: These files enable and disable the access to the JTAG domain. By default access to the JTAG domain is disabled. @@ -105,7 +105,7 @@ What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/ reset_voltmon_upgrade_fail Date: November 2018 -KernelVersion: 4.21 +KernelVersion: 5.0 Contact: Vadim Pasternak <vadimpmellanox.com> Description: These files show the system reset cause, as following: ComEx power fail, reset from ComEx, system platform reset, reset diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 7afb2fedde0a..bcf2cd519d1e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1701,12 +1701,11 @@ By default, super page will be supported if Intel IOMMU has the capability. With this option, super page will not be supported. - sm_off [Default Off] - By default, scalable mode will be supported if the + sm_on [Default Off] + By default, scalable mode will be disabled even if the hardware advertises that it has support for the scalable mode translation. With this option set, scalable mode - will not be used even on hardware which claims to support - it. + will be used on hardware which claims to support it. tboot_noforce [Default Off] Do not force the Intel IOMMU enabled under tboot. By default, tboot will force Intel IOMMU on, which diff --git a/Documentation/devicetree/bindings/Makefile b/Documentation/devicetree/bindings/Makefile index 6e5cef0ed6fb..50daa0b3b032 100644 --- a/Documentation/devicetree/bindings/Makefile +++ b/Documentation/devicetree/bindings/Makefile @@ -17,7 +17,11 @@ extra-y += $(DT_TMP_SCHEMA) quiet_cmd_mk_schema = SCHEMA $@ cmd_mk_schema = $(DT_MK_SCHEMA) $(DT_MK_SCHEMA_FLAGS) -o $@ $(filter-out FORCE, $^) -DT_DOCS = $(shell cd $(srctree)/$(src) && find * -name '*.yaml') +DT_DOCS = $(shell \ + cd $(srctree)/$(src) && \ + find * \( -name '*.yaml' ! -name $(DT_TMP_SCHEMA) \) \ + ) + DT_SCHEMA_FILES ?= $(addprefix $(src)/,$(DT_DOCS)) extra-y += $(patsubst $(src)/%.yaml,%.example.dts, $(DT_SCHEMA_FILES)) diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt index 3e17ac1d5d58..174f292d8a3e 100644 --- a/Documentation/devicetree/bindings/net/macb.txt +++ b/Documentation/devicetree/bindings/net/macb.txt @@ -3,8 +3,8 @@ Required properties: - compatible: Should be "cdns,[<chip>-]{macb|gem}" Use "cdns,at91rm9200-emac" Atmel at91rm9200 SoC. - Use "cdns,at91sam9260-macb" for Atmel at91sam9 SoCs or the 10/100Mbit IP - available on sama5d3 SoCs. + Use "cdns,at91sam9260-macb" for Atmel at91sam9 SoCs. + Use "cdns,sam9x60-macb" for Microchip sam9x60 SoC. Use "cdns,np4-macb" for NP4 SoC devices. Use "cdns,at32ap7000-macb" for other 10/100 usage or use the generic form: "cdns,macb". Use "cdns,pc302-gem" for Picochip picoXcell pc302 and later devices based on diff --git a/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt b/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt index 8e7f8551d190..454c937076a2 100644 --- a/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt +++ b/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt @@ -19,6 +19,9 @@ Clock Properties: - fsl,max-adj Maximum frequency adjustment in parts per billion. - fsl,extts-fifo The presence of this property indicates hardware support for the external trigger stamp FIFO. + - little-endian The presence of this property indicates the 1588 timer + IP block is little-endian mode. The default endian mode + is big-endian. These properties set the operational parameters for the PTP clock. You must choose these carefully for the clock to work right. diff --git a/Documentation/devicetree/bindings/serio/olpc,ap-sp.txt b/Documentation/devicetree/bindings/serio/olpc,ap-sp.txt index 36603419d6f8..0e72183f52bc 100644 --- a/Documentation/devicetree/bindings/serio/olpc,ap-sp.txt +++ b/Documentation/devicetree/bindings/serio/olpc,ap-sp.txt @@ -4,14 +4,10 @@ Required properties: - compatible : "olpc,ap-sp" - reg : base address and length of SoC's WTM registers - interrupts : SP-AP interrupt -- clocks : phandle + clock-specifier for the clock that drives the WTM -- clock-names: should be "sp" Example: ap-sp@d4290000 { compatible = "olpc,ap-sp"; reg = <0xd4290000 0x1000>; interrupts = <40>; - clocks = <&soc_clocks MMP2_CLK_SP>; - clock-names = "sp"; } diff --git a/Documentation/networking/device_drivers/freescale/dpaa2/dpio-driver.rst b/Documentation/networking/device_drivers/freescale/dpaa2/dpio-driver.rst index a188466b6698..5045df990a4c 100644 --- a/Documentation/networking/device_drivers/freescale/dpaa2/dpio-driver.rst +++ b/Documentation/networking/device_drivers/freescale/dpaa2/dpio-driver.rst @@ -27,11 +27,12 @@ Driver Overview The DPIO driver is bound to DPIO objects discovered on the fsl-mc bus and provides services that: - A) allow other drivers, such as the Ethernet driver, to enqueue and dequeue + + A. allow other drivers, such as the Ethernet driver, to enqueue and dequeue frames for their respective objects - B) allow drivers to register callbacks for data availability notifications + B. allow drivers to register callbacks for data availability notifications when data becomes available on a queue or channel - C) allow drivers to manage hardware buffer pools + C. allow drivers to manage hardware buffer pools The Linux DPIO driver consists of 3 primary components-- DPIO object driver-- fsl-mc driver that manages the DPIO object @@ -140,11 +141,10 @@ QBman portal interface (qbman-portal.c) The qbman-portal component provides APIs to do the low level hardware bit twiddling for operations such as: - -initializing Qman software portals - - -building and sending portal commands - -portal interrupt configuration and processing + - initializing Qman software portals + - building and sending portal commands + - portal interrupt configuration and processing The qbman-portal APIs are not public to other drivers, and are only used by dpio-service. diff --git a/Documentation/networking/devlink-info-versions.rst b/Documentation/networking/devlink-info-versions.rst index 7d4ecf6b6f34..c79ad8593383 100644 --- a/Documentation/networking/devlink-info-versions.rst +++ b/Documentation/networking/devlink-info-versions.rst @@ -14,6 +14,11 @@ board.rev Board design revision. +board.manufacture +================= + +An identifier of the company or the facility which produced the part. + fw.mgmt ======= diff --git a/Documentation/networking/devlink-params-mlxsw.txt b/Documentation/networking/devlink-params-mlxsw.txt index 2c5c67a920c9..c63ea9fc7009 100644 --- a/Documentation/networking/devlink-params-mlxsw.txt +++ b/Documentation/networking/devlink-params-mlxsw.txt @@ -1,2 +1,10 @@ fw_load_policy [DEVICE, GENERIC] Configuration mode: driverinit + +acl_region_rehash_interval [DEVICE, DRIVER-SPECIFIC] + Sets an interval for periodic ACL region rehashes. + The value is in milliseconds, minimal value is "3000". + Value "0" disables the periodic work. + The first rehash will be run right after value is set. + Type: u32 + Configuration mode: runtime diff --git a/Documentation/networking/operstates.txt b/Documentation/networking/operstates.txt index 355c6d8ef8ad..b203d1334822 100644 --- a/Documentation/networking/operstates.txt +++ b/Documentation/networking/operstates.txt @@ -22,8 +22,9 @@ and changeable from userspace under certain rules. 2. Querying from userspace Both admin and operational state can be queried via the netlink -operation RTM_GETLINK. It is also possible to subscribe to RTMGRP_LINK -to be notified of updates. This is important for setting from userspace. +operation RTM_GETLINK. It is also possible to subscribe to RTNLGRP_LINK +to be notified of updates while the interface is admin up. This is +important for setting from userspace. These values contain interface state: @@ -101,8 +102,9 @@ because some driver controlled protocol establishment has to complete. Corresponding functions are netif_dormant_on() to set the flag, netif_dormant_off() to clear it and netif_dormant() to query. -On device allocation, networking core sets the flags equivalent to -netif_carrier_ok() and !netif_dormant(). +On device allocation, both flags __LINK_STATE_NOCARRIER and +__LINK_STATE_DORMANT are cleared, so the effective state is equivalent +to netif_carrier_ok() and !netif_dormant(). Whenever the driver CHANGES one of these flags, a workqueue event is @@ -133,11 +135,11 @@ netif_carrier_ok() && !netif_dormant() is set by the driver. Afterwards, the userspace application can set IFLA_OPERSTATE to IF_OPER_DORMANT or IF_OPER_UP as long as the driver does not set netif_carrier_off() or netif_dormant_on(). Changes made by userspace -are multicasted on the netlink group RTMGRP_LINK. +are multicasted on the netlink group RTNLGRP_LINK. So basically a 802.1X supplicant interacts with the kernel like this: --subscribe to RTMGRP_LINK +-subscribe to RTNLGRP_LINK -set IFLA_LINKMODE to 1 via RTM_SETLINK -query RTM_GETLINK once to get initial state -if initial flags are not (IFF_LOWER_UP && !IFF_DORMANT), wait until diff --git a/Documentation/networking/snmp_counter.rst b/Documentation/networking/snmp_counter.rst index c5642f430d2e..52b026be028f 100644 --- a/Documentation/networking/snmp_counter.rst +++ b/Documentation/networking/snmp_counter.rst @@ -367,16 +367,19 @@ to the accept queue. TCP Fast Open ============= * TcpEstabResets + Defined in `RFC1213 tcpEstabResets`_. .. _RFC1213 tcpEstabResets: https://tools.ietf.org/html/rfc1213#page-48 * TcpAttemptFails + Defined in `RFC1213 tcpAttemptFails`_. .. _RFC1213 tcpAttemptFails: https://tools.ietf.org/html/rfc1213#page-48 * TcpOutRsts + Defined in `RFC1213 tcpOutRsts`_. The RFC says this counter indicates the 'segments sent containing the RST flag', but in linux kernel, this couner indicates the segments kerenl tried to send. The sending @@ -384,6 +387,30 @@ process might be failed due to some errors (e.g. memory alloc failed). .. _RFC1213 tcpOutRsts: https://tools.ietf.org/html/rfc1213#page-52 +* TcpExtTCPSpuriousRtxHostQueues + +When the TCP stack wants to retransmit a packet, and finds that packet +is not lost in the network, but the packet is not sent yet, the TCP +stack would give up the retransmission and update this counter. It +might happen if a packet stays too long time in a qdisc or driver +queue. + +* TcpEstabResets + +The socket receives a RST packet in Establish or CloseWait state. + +* TcpExtTCPKeepAlive + +This counter indicates many keepalive packets were sent. The keepalive +won't be enabled by default. A userspace program could enable it by +setting the SO_KEEPALIVE socket option. + +* TcpExtTCPSpuriousRTOs + +The spurious retransmission timeout detected by the `F-RTO`_ +algorithm. + +.. _F-RTO: https://tools.ietf.org/html/rfc5682 TCP Fast Path ============ @@ -609,6 +636,29 @@ packet yet, the sender would know packet 4 is out of order. The TCP stack of kernel will increase TcpExtTCPSACKReorder for both of the above scenarios. +* TcpExtTCPSlowStartRetrans + +The TCP stack wants to retransmit a packet and the congestion control +state is 'Loss'. + +* TcpExtTCPFastRetrans + +The TCP stack wants to retransmit a packet and the congestion control +state is not 'Loss'. + +* TcpExtTCPLostRetransmit + +A SACK points out that a retransmission packet is lost again. + +* TcpExtTCPRetransFail + +The TCP stack tries to deliver a retransmission packet to lower layers +but the lower layers return an error. + +* TcpExtTCPSynRetrans + +The TCP stack retransmits a SYN packet. + DSACK ===== The DSACK is defined in `RFC2883`_. The receiver uses DSACK to report @@ -790,8 +840,9 @@ unacknowledged number (more strict than `RFC 5961 section 5.2`_). .. _RFC 5961 section 5.2: https://tools.ietf.org/html/rfc5961#page-11 TCP receive window -================= +================== * TcpExtTCPWantZeroWindowAdv + Depending on current memory usage, the TCP stack tries to set receive window to zero. But the receive window might still be a no-zero value. For example, if the previous window size is 10, and the TCP @@ -799,14 +850,16 @@ stack receives 3 bytes, the current window size would be 7 even if the window size calculated by the memory usage is zero. * TcpExtTCPToZeroWindowAdv + The TCP receive window is set to zero from a no-zero value. * TcpExtTCPFromZeroWindowAdv + The TCP receive window is set to no-zero value from zero. Delayed ACK -========== +=========== The TCP Delayed ACK is a technique which is used for reducing the packet count in the network. For more details, please refer the `Delayed ACK wiki`_ @@ -814,10 +867,12 @@ packet count in the network. For more details, please refer the .. _Delayed ACK wiki: https://en.wikipedia.org/wiki/TCP_delayed_acknowledgment * TcpExtDelayedACKs + A delayed ACK timer expires. The TCP stack will send a pure ACK packet and exit the delayed ACK mode. * TcpExtDelayedACKLocked + A delayed ACK timer expires, but the TCP stack can't send an ACK immediately due to the socket is locked by a userspace program. The TCP stack will send a pure ACK later (after the userspace program @@ -826,24 +881,147 @@ TCP stack will also update TcpExtDelayedACKs and exit the delayed ACK mode. * TcpExtDelayedACKLost + It will be updated when the TCP stack receives a packet which has been ACKed. A Delayed ACK loss might cause this issue, but it would also be triggered by other reasons, such as a packet is duplicated in the network. Tail Loss Probe (TLP) -=================== +===================== TLP is an algorithm which is used to detect TCP packet loss. For more details, please refer the `TLP paper`_. .. _TLP paper: https://tools.ietf.org/html/draft-dukkipati-tcpm-tcp-loss-probe-01 * TcpExtTCPLossProbes + A TLP probe packet is sent. * TcpExtTCPLossProbeRecovery + A packet loss is detected and recovered by TLP. +TCP Fast Open +============= +TCP Fast Open is a technology which allows data transfer before the +3-way handshake complete. Please refer the `TCP Fast Open wiki`_ for a +general description. + +.. _TCP Fast Open wiki: https://en.wikipedia.org/wiki/TCP_Fast_Open + +* TcpExtTCPFastOpenActive + +When the TCP stack receives an ACK packet in the SYN-SENT status, and +the ACK packet acknowledges the data in the SYN packet, the TCP stack +understand the TFO cookie is accepted by the other side, then it +updates this counter. + +* TcpExtTCPFastOpenActiveFail + +This counter indicates that the TCP stack initiated a TCP Fast Open, +but it failed. This counter would be updated in three scenarios: (1) +the other side doesn't acknowledge the data in the SYN packet. (2) The +SYN packet which has the TFO cookie is timeout at least once. (3) +after the 3-way handshake, the retransmission timeout happens +net.ipv4.tcp_retries1 times, because some middle-boxes may black-hole +fast open after the handshake. + +* TcpExtTCPFastOpenPassive + +This counter indicates how many times the TCP stack accepts the fast +open request. + +* TcpExtTCPFastOpenPassiveFail + +This counter indicates how many times the TCP stack rejects the fast +open request. It is caused by either the TFO cookie is invalid or the +TCP stack finds an error during the socket creating process. + +* TcpExtTCPFastOpenListenOverflow + +When the pending fast open request number is larger than +fastopenq->max_qlen, the TCP stack will reject the fast open request +and update this counter. When this counter is updated, the TCP stack +won't update TcpExtTCPFastOpenPassive or +TcpExtTCPFastOpenPassiveFail. The fastopenq->max_qlen is set by the +TCP_FASTOPEN socket operation and it could not be larger than +net.core.somaxconn. For example: + +setsockopt(sfd, SOL_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)); + +* TcpExtTCPFastOpenCookieReqd + +This counter indicates how many times a client wants to request a TFO +cookie. + +SYN cookies +=========== +SYN cookies are used to mitigate SYN flood, for details, please refer +the `SYN cookies wiki`_. + +.. _SYN cookies wiki: https://en.wikipedia.org/wiki/SYN_cookies + +* TcpExtSyncookiesSent + +It indicates how many SYN cookies are sent. + +* TcpExtSyncookiesRecv + +How many reply packets of the SYN cookies the TCP stack receives. + +* TcpExtSyncookiesFailed + +The MSS decoded from the SYN cookie is invalid. When this counter is +updated, the received packet won't be treated as a SYN cookie and the +TcpExtSyncookiesRecv counter wont be updated. + +Challenge ACK +============= +For details of challenge ACK, please refer the explaination of +TcpExtTCPACKSkippedChallenge. + +* TcpExtTCPChallengeACK + +The number of challenge acks sent. + +* TcpExtTCPSYNChallenge + +The number of challenge acks sent in response to SYN packets. After +updates this counter, the TCP stack might send a challenge ACK and +update the TcpExtTCPChallengeACK counter, or it might also skip to +send the challenge and update the TcpExtTCPACKSkippedChallenge. + +prune +===== +When a socket is under memory pressure, the TCP stack will try to +reclaim memory from the receiving queue and out of order queue. One of +the reclaiming method is 'collapse', which means allocate a big sbk, +copy the contiguous skbs to the single big skb, and free these +contiguous skbs. + +* TcpExtPruneCalled + +The TCP stack tries to reclaim memory for a socket. After updates this +counter, the TCP stack will try to collapse the out of order queue and +the receiving queue. If the memory is still not enough, the TCP stack +will try to discard packets from the out of order queue (and update the +TcpExtOfoPruned counter) + +* TcpExtOfoPruned + +The TCP stack tries to discard packet on the out of order queue. + +* TcpExtRcvPruned + +After 'collapse' and discard packets from the out of order queue, if +the actually used memory is still larger than the max allowed memory, +this counter will be updated. It means the 'prune' fails. + +* TcpExtTCPRcvCollapsed + +This counter indicates how many skbs are freed during 'collapse'. + examples ======== diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index 819caf8ca05f..ebc679bcb2dc 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt @@ -56,26 +56,34 @@ of any kernel data structures. dentry-state: -From linux/fs/dentry.c: +From linux/include/linux/dcache.h: -------------------------------------------------------------- -struct { +struct dentry_stat_t dentry_stat { int nr_dentry; int nr_unused; int age_limit; /* age in seconds */ int want_pages; /* pages requested by system */ - int dummy[2]; -} dentry_stat = {0, 0, 45, 0,}; --------------------------------------------------------------- - -Dentries are dynamically allocated and deallocated, and -nr_dentry seems to be 0 all the time. Hence it's safe to -assume that only nr_unused, age_limit and want_pages are -used. Nr_unused seems to be exactly what its name says. + int nr_negative; /* # of unused negative dentries */ + int dummy; /* Reserved for future use */ +}; +-------------------------------------------------------------- + +Dentries are dynamically allocated and deallocated. + +nr_dentry shows the total number of dentries allocated (active ++ unused). nr_unused shows the number of dentries that are not +actively used, but are saved in the LRU list for future reuse. + Age_limit is the age in seconds after which dcache entries can be reclaimed when memory is short and want_pages is nonzero when shrink_dcache_pages() has been called and the dcache isn't pruned yet. +nr_negative shows the number of unused dentries that are also +negative dentries which do not map to any files. Instead, +they help speeding up rejection of non-existing files provided +by the users. + ============================================================== dquot-max & dquot-nr: diff --git a/Documentation/x86/resctrl_ui.txt b/Documentation/x86/resctrl_ui.txt index e8e8d14d3c4e..c1f95b59e14d 100644 --- a/Documentation/x86/resctrl_ui.txt +++ b/Documentation/x86/resctrl_ui.txt @@ -9,7 +9,7 @@ Fenghua Yu <fenghua.yu@intel.com> Tony Luck <tony.luck@intel.com> Vikas Shivappa <vikas.shivappa@intel.com> -This feature is enabled by the CONFIG_X86_RESCTRL and the x86 /proc/cpuinfo +This feature is enabled by the CONFIG_X86_CPU_RESCTRL and the x86 /proc/cpuinfo flag bits: RDT (Resource Director Technology) Allocation - "rdt_a" CAT (Cache Allocation Technology) - "cat_l3", "cat_l2" diff --git a/MAINTAINERS b/MAINTAINERS index b4491132b9ce..8c8f509844c5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2848,6 +2848,9 @@ F: include/uapi/linux/if_bonding.h BPF (Safe dynamic programs and tools) M: Alexei Starovoitov <ast@kernel.org> M: Daniel Borkmann <daniel@iogearbox.net> +R: Martin KaFai Lau <kafai@fb.com> +R: Song Liu <songliubraving@fb.com> +R: Yonghong Song <yhs@fb.com> L: netdev@vger.kernel.org L: linux-kernel@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git @@ -2873,6 +2876,8 @@ F: samples/bpf/ F: tools/bpf/ F: tools/lib/bpf/ F: tools/testing/selftests/bpf/ +K: bpf +N: bpf BPF JIT for ARM M: Shubham Bansal <illusionist.neo@gmail.com> @@ -5187,7 +5192,7 @@ DRM DRIVERS FOR XEN M: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> T: git git://anongit.freedesktop.org/drm/drm-misc L: dri-devel@lists.freedesktop.org -L: xen-devel@lists.xen.org +L: xen-devel@lists.xenproject.org (moderated for non-subscribers) S: Supported F: drivers/gpu/drm/xen/ F: Documentation/gpu/xen-front.rst @@ -6099,6 +6104,7 @@ FREESCALE QORIQ PTP CLOCK DRIVER M: Yangbo Lu <yangbo.lu@nxp.com> L: netdev@vger.kernel.org S: Maintained +F: drivers/net/ethernet/freescale/enetc/enetc_ptp.c F: drivers/ptp/ptp_qoriq.c F: drivers/ptp/ptp_qoriq_debugfs.c F: include/linux/fsl/ptp_qoriq.h @@ -6159,7 +6165,7 @@ FREESCALE SOC SOUND DRIVERS M: Timur Tabi <timur@kernel.org> M: Nicolin Chen <nicoleotsuka@gmail.com> M: Xiubo Li <Xiubo.Lee@gmail.com> -R: Fabio Estevam <fabio.estevam@nxp.com> +R: Fabio Estevam <festevam@gmail.com> L: alsa-devel@alsa-project.org (moderated for non-subscribers) L: linuxppc-dev@lists.ozlabs.org S: Maintained @@ -10907,7 +10913,7 @@ F: include/linux/nvmem-consumer.h F: include/linux/nvmem-provider.h NXP SGTL5000 DRIVER -M: Fabio Estevam <fabio.estevam@nxp.com> +M: Fabio Estevam <festevam@gmail.com> L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Maintained F: Documentation/devicetree/bindings/sound/sgtl5000.txt @@ -11321,10 +11327,12 @@ F: include/dt-bindings/ OPENCORES I2C BUS DRIVER M: Peter Korsgaard <peter@korsgaard.com> +M: Andrew Lunn <andrew@lunn.ch> L: linux-i2c@vger.kernel.org S: Maintained F: Documentation/i2c/busses/i2c-ocores F: drivers/i2c/busses/i2c-ocores.c +F: include/linux/platform_data/i2c-ocores.h OPENRISC ARCHITECTURE M: Jonas Bonn <jonas@southpole.se> @@ -12890,6 +12898,13 @@ F: Documentation/devicetree/bindings/net/dsa/realtek-smi.txt F: drivers/net/dsa/realtek-smi* F: drivers/net/dsa/rtl83* +REDPINE WIRELESS DRIVER +M: Amitkumar Karwar <amitkarwar@gmail.com> +M: Siva Rebbagondla <siva8118@gmail.com> +L: linux-wireless@vger.kernel.org +S: Maintained +F: drivers/net/wireless/rsi/ + REGISTER MAP ABSTRACTION M: Mark Brown <broonie@kernel.org> L: linux-kernel@vger.kernel.org @@ -13718,6 +13733,15 @@ L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/sfc/ +SFF/SFP/SFP+ MODULE SUPPORT +M: Russell King <linux@armlinux.org.uk> +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/phy/phylink.c +F: drivers/net/phy/sfp* +F: include/linux/phylink.h +F: include/linux/sfp.h + SGI GRU DRIVER M: Dimitri Sivanich <sivanich@sgi.com> S: Maintained @@ -13739,6 +13763,7 @@ F: drivers/misc/sgi-xp/ SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS M: Ursula Braun <ubraun@linux.ibm.com> +M: Karsten Graul <kgraul@linux.ibm.com> L: linux-s390@vger.kernel.org W: http://www.ibm.com/developerworks/linux/linux390/ S: Supported @@ -16663,6 +16688,15 @@ S: Maintained F: drivers/platform/x86/ F: drivers/platform/olpc/ +X86 PLATFORM DRIVERS - ARCH +R: Darren Hart <dvhart@infradead.org> +R: Andy Shevchenko <andy@infradead.org> +L: platform-driver-x86@vger.kernel.org +L: x86@kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/core +S: Maintained +F: arch/x86/platform + X86 VDSO M: Andy Lutomirski <luto@kernel.org> L: linux-kernel@vger.kernel.org @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 0 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc6 NAME = Shy Crocodile # *DOCUMENTATION* diff --git a/arch/alpha/include/asm/irq.h b/arch/alpha/include/asm/irq.h index 4d17cacd1462..432402c8e47f 100644 --- a/arch/alpha/include/asm/irq.h +++ b/arch/alpha/include/asm/irq.h @@ -56,15 +56,15 @@ #elif defined(CONFIG_ALPHA_DP264) || \ defined(CONFIG_ALPHA_LYNX) || \ - defined(CONFIG_ALPHA_SHARK) || \ - defined(CONFIG_ALPHA_EIGER) + defined(CONFIG_ALPHA_SHARK) # define NR_IRQS 64 #elif defined(CONFIG_ALPHA_TITAN) #define NR_IRQS 80 #elif defined(CONFIG_ALPHA_RAWHIDE) || \ - defined(CONFIG_ALPHA_TAKARA) + defined(CONFIG_ALPHA_TAKARA) || \ + defined(CONFIG_ALPHA_EIGER) # define NR_IRQS 128 #elif defined(CONFIG_ALPHA_WILDFIRE) diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index d73dc473fbb9..188fc9256baf 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -78,7 +78,7 @@ __load_new_mm_context(struct mm_struct *next_mm) /* Macro for exception fixup code to access integer registers. */ #define dpf_reg(r) \ (((unsigned long *)regs)[(r) <= 8 ? (r) : (r) <= 15 ? (r)-16 : \ - (r) <= 18 ? (r)+8 : (r)-10]) + (r) <= 18 ? (r)+10 : (r)-10]) asmlinkage void do_page_fault(unsigned long address, unsigned long mmcsr, diff --git a/arch/arm/boot/dts/am335x-shc.dts b/arch/arm/boot/dts/am335x-shc.dts index d0fd68873689..5b250060f6dd 100644 --- a/arch/arm/boot/dts/am335x-shc.dts +++ b/arch/arm/boot/dts/am335x-shc.dts @@ -215,7 +215,7 @@ pinctrl-names = "default"; pinctrl-0 = <&mmc1_pins>; bus-width = <0x4>; - cd-gpios = <&gpio0 6 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio0 6 GPIO_ACTIVE_LOW>; cd-inverted; max-frequency = <26000000>; vmmc-supply = <&vmmcsd_fixed>; diff --git a/arch/arm/boot/dts/da850.dtsi b/arch/arm/boot/dts/da850.dtsi index 47aa53ba6b92..559659b399d0 100644 --- a/arch/arm/boot/dts/da850.dtsi +++ b/arch/arm/boot/dts/da850.dtsi @@ -476,7 +476,7 @@ clocksource: timer@20000 { compatible = "ti,da830-timer"; reg = <0x20000 0x1000>; - interrupts = <12>, <13>; + interrupts = <21>, <22>; interrupt-names = "tint12", "tint34"; clocks = <&pll0_auxclk>; }; diff --git a/arch/arm/boot/dts/imx6q-pistachio.dts b/arch/arm/boot/dts/imx6q-pistachio.dts index 5edf858c8b86..a31b17eaf51c 100644 --- a/arch/arm/boot/dts/imx6q-pistachio.dts +++ b/arch/arm/boot/dts/imx6q-pistachio.dts @@ -103,7 +103,7 @@ power { label = "Power Button"; gpios = <&gpio2 12 GPIO_ACTIVE_LOW>; - gpio-key,wakeup; + wakeup-source; linux,code = <KEY_POWER>; }; }; diff --git a/arch/arm/boot/dts/imx6sll-evk.dts b/arch/arm/boot/dts/imx6sll-evk.dts index d8163705363e..4a31a415f88e 100644 --- a/arch/arm/boot/dts/imx6sll-evk.dts +++ b/arch/arm/boot/dts/imx6sll-evk.dts @@ -309,7 +309,7 @@ pinctrl-2 = <&pinctrl_usdhc3_200mhz>; cd-gpios = <&gpio3 22 GPIO_ACTIVE_LOW>; keep-power-in-suspend; - enable-sdio-wakeup; + wakeup-source; vmmc-supply = <®_sd3_vmmc>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi index 272ff6133ec1..d1375d3650fd 100644 --- a/arch/arm/boot/dts/imx6sx.dtsi +++ b/arch/arm/boot/dts/imx6sx.dtsi @@ -467,7 +467,7 @@ }; gpt: gpt@2098000 { - compatible = "fsl,imx6sx-gpt", "fsl,imx31-gpt"; + compatible = "fsl,imx6sx-gpt", "fsl,imx6dl-gpt"; reg = <0x02098000 0x4000>; interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6SX_CLK_GPT_BUS>, diff --git a/arch/arm/boot/dts/meson.dtsi b/arch/arm/boot/dts/meson.dtsi index e4645f612712..2ab74860d962 100644 --- a/arch/arm/boot/dts/meson.dtsi +++ b/arch/arm/boot/dts/meson.dtsi @@ -274,7 +274,7 @@ compatible = "amlogic,meson6-dwmac", "snps,dwmac"; reg = <0xc9410000 0x10000 0xc1108108 0x4>; - interrupts = <GIC_SPI 8 IRQ_TYPE_EDGE_RISING>; + interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "macirq"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/meson8b-ec100.dts b/arch/arm/boot/dts/meson8b-ec100.dts index 0872f6e3abf5..d50fc2f60fa3 100644 --- a/arch/arm/boot/dts/meson8b-ec100.dts +++ b/arch/arm/boot/dts/meson8b-ec100.dts @@ -205,8 +205,7 @@ cap-sd-highspeed; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vcc_3v3>; }; diff --git a/arch/arm/boot/dts/meson8b-odroidc1.dts b/arch/arm/boot/dts/meson8b-odroidc1.dts index 58669abda259..0f0a46ddf3ff 100644 --- a/arch/arm/boot/dts/meson8b-odroidc1.dts +++ b/arch/arm/boot/dts/meson8b-odroidc1.dts @@ -221,7 +221,6 @@ /* Realtek RTL8211F (0x001cc916) */ eth_phy: ethernet-phy@0 { reg = <0>; - eee-broken-1000t; interrupt-parent = <&gpio_intc>; /* GPIOH_3 */ interrupts = <17 IRQ_TYPE_LEVEL_LOW>; @@ -273,8 +272,7 @@ cap-sd-highspeed; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&tflash_vdd>; vqmmc-supply = <&tf_io>; diff --git a/arch/arm/boot/dts/meson8m2-mxiii-plus.dts b/arch/arm/boot/dts/meson8m2-mxiii-plus.dts index f5853610b20b..6ac02beb5fa7 100644 --- a/arch/arm/boot/dts/meson8m2-mxiii-plus.dts +++ b/arch/arm/boot/dts/meson8m2-mxiii-plus.dts @@ -206,8 +206,7 @@ cap-sd-highspeed; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vcc_3v3>; }; diff --git a/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi b/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi index ddc7a7bb33c0..f57acf8f66b9 100644 --- a/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi +++ b/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi @@ -105,7 +105,7 @@ interrupts-extended = < &cpcap 15 0 &cpcap 14 0 &cpcap 28 0 &cpcap 19 0 &cpcap 18 0 &cpcap 17 0 &cpcap 16 0 &cpcap 49 0 - &cpcap 48 1 + &cpcap 48 0 >; interrupt-names = "id_ground", "id_float", "se0conn", "vbusvld", diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi index e53d32691308..93b420934e8e 100644 --- a/arch/arm/boot/dts/omap3-gta04.dtsi +++ b/arch/arm/boot/dts/omap3-gta04.dtsi @@ -714,11 +714,7 @@ vdda-supply = <&vdac>; - #address-cells = <1>; - #size-cells = <0>; - port { - reg = <0>; venc_out: endpoint { remote-endpoint = <&opa_in>; ti,channels = <1>; diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index 182a53991c90..826920e6b878 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -814,7 +814,7 @@ /* For debugging, it is often good idea to remove this GPIO. It means you can remove back cover (to reboot by removing battery) and still use the MMC card. */ - cd-gpios = <&gpio6 0 GPIO_ACTIVE_HIGH>; /* 160 */ + cd-gpios = <&gpio6 0 GPIO_ACTIVE_LOW>; /* 160 */ }; /* most boards use vaux3, only some old versions use vmmc2 instead */ diff --git a/arch/arm/boot/dts/omap3-n950-n9.dtsi b/arch/arm/boot/dts/omap3-n950-n9.dtsi index 0d9b85317529..e142e6c70a59 100644 --- a/arch/arm/boot/dts/omap3-n950-n9.dtsi +++ b/arch/arm/boot/dts/omap3-n950-n9.dtsi @@ -370,6 +370,19 @@ compatible = "ti,omap2-onenand"; reg = <0 0 0x20000>; /* CS0, offset 0, IO size 128K */ + /* + * These timings are based on CONFIG_OMAP_GPMC_DEBUG=y reported + * bootloader set values when booted with v4.19 using both N950 + * and N9 devices (OneNAND Manufacturer: Samsung): + * + * gpmc cs0 before gpmc_cs_program_settings: + * cs0 GPMC_CS_CONFIG1: 0xfd001202 + * cs0 GPMC_CS_CONFIG2: 0x00181800 + * cs0 GPMC_CS_CONFIG3: 0x00030300 + * cs0 GPMC_CS_CONFIG4: 0x18001804 + * cs0 GPMC_CS_CONFIG5: 0x03171d1d + * cs0 GPMC_CS_CONFIG6: 0x97080000 + */ gpmc,sync-read; gpmc,sync-write; gpmc,burst-length = <16>; @@ -379,26 +392,27 @@ gpmc,device-width = <2>; gpmc,mux-add-data = <2>; gpmc,cs-on-ns = <0>; - gpmc,cs-rd-off-ns = <87>; - gpmc,cs-wr-off-ns = <87>; + gpmc,cs-rd-off-ns = <122>; + gpmc,cs-wr-off-ns = <122>; gpmc,adv-on-ns = <0>; - gpmc,adv-rd-off-ns = <10>; - gpmc,adv-wr-off-ns = <10>; - gpmc,oe-on-ns = <15>; - gpmc,oe-off-ns = <87>; + gpmc,adv-rd-off-ns = <15>; + gpmc,adv-wr-off-ns = <15>; + gpmc,oe-on-ns = <20>; + gpmc,oe-off-ns = <122>; gpmc,we-on-ns = <0>; - gpmc,we-off-ns = <87>; - gpmc,rd-cycle-ns = <112>; - gpmc,wr-cycle-ns = <112>; - gpmc,access-ns = <81>; + gpmc,we-off-ns = <122>; + gpmc,rd-cycle-ns = <148>; + gpmc,wr-cycle-ns = <148>; + gpmc,access-ns = <117>; gpmc,page-burst-access-ns = <15>; gpmc,bus-turnaround-ns = <0>; gpmc,cycle2cycle-delay-ns = <0>; gpmc,wait-monitoring-ns = <0>; - gpmc,clk-activation-ns = <5>; - gpmc,wr-data-mux-bus-ns = <30>; - gpmc,wr-access-ns = <81>; - gpmc,sync-clk-ps = <15000>; + gpmc,clk-activation-ns = <10>; + gpmc,wr-data-mux-bus-ns = <40>; + gpmc,wr-access-ns = <117>; + + gpmc,sync-clk-ps = <15000>; /* TBC; Where this value came? */ /* * MTD partition table corresponding to Nokia's MeeGo 1.2 diff --git a/arch/arm/boot/dts/omap5-l4.dtsi b/arch/arm/boot/dts/omap5-l4.dtsi index 9c7e309d9c2c..0960348002ad 100644 --- a/arch/arm/boot/dts/omap5-l4.dtsi +++ b/arch/arm/boot/dts/omap5-l4.dtsi @@ -1046,8 +1046,6 @@ <SYSC_IDLE_SMART>, <SYSC_IDLE_SMART_WKUP>; ti,syss-mask = <1>; - ti,no-reset-on-init; - ti,no-idle-on-init; /* Domains (V, P, C): core, core_pwrdm, l4per_clkdm */ clocks = <&l4per_clkctrl OMAP5_UART3_CLKCTRL 0>; clock-names = "fck"; diff --git a/arch/arm/boot/dts/r8a7743.dtsi b/arch/arm/boot/dts/r8a7743.dtsi index 3cc33f7ff7fe..3adc158a40bb 100644 --- a/arch/arm/boot/dts/r8a7743.dtsi +++ b/arch/arm/boot/dts/r8a7743.dtsi @@ -1681,15 +1681,12 @@ du: display@feb00000 { compatible = "renesas,du-r8a7743"; - reg = <0 0xfeb00000 0 0x40000>, - <0 0xfeb90000 0 0x1c>; - reg-names = "du", "lvds.0"; + reg = <0 0xfeb00000 0 0x40000>; interrupts = <GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 268 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cpg CPG_MOD 724>, - <&cpg CPG_MOD 723>, - <&cpg CPG_MOD 726>; - clock-names = "du.0", "du.1", "lvds.0"; + <&cpg CPG_MOD 723>; + clock-names = "du.0", "du.1"; status = "disabled"; ports { @@ -1704,6 +1701,33 @@ port@1 { reg = <1>; du_out_lvds0: endpoint { + remote-endpoint = <&lvds0_in>; + }; + }; + }; + }; + + lvds0: lvds@feb90000 { + compatible = "renesas,r8a7743-lvds"; + reg = <0 0xfeb90000 0 0x1c>; + clocks = <&cpg CPG_MOD 726>; + power-domains = <&sysc R8A7743_PD_ALWAYS_ON>; + resets = <&cpg 726>; + status = "disabled"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + lvds0_in: endpoint { + remote-endpoint = <&du_out_lvds0>; + }; + }; + port@1 { + reg = <1>; + lvds0_out: endpoint { }; }; }; diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi index 353d90f99b40..13304b8c5139 100644 --- a/arch/arm/boot/dts/sun6i-a31.dtsi +++ b/arch/arm/boot/dts/sun6i-a31.dtsi @@ -216,6 +216,7 @@ #clock-cells = <0>; compatible = "fixed-clock"; clock-frequency = <24000000>; + clock-output-names = "osc24M"; }; osc32k: clk-32k { diff --git a/arch/arm/boot/dts/sun8i-h3-beelink-x2.dts b/arch/arm/boot/dts/sun8i-h3-beelink-x2.dts index 5d23667dc2d2..25540b7694d5 100644 --- a/arch/arm/boot/dts/sun8i-h3-beelink-x2.dts +++ b/arch/arm/boot/dts/sun8i-h3-beelink-x2.dts @@ -53,7 +53,7 @@ aliases { serial0 = &uart0; - /* ethernet0 is the H3 emac, defined in sun8i-h3.dtsi */ + ethernet0 = &emac; ethernet1 = &sdiowifi; }; diff --git a/arch/arm/boot/dts/vf610-bk4.dts b/arch/arm/boot/dts/vf610-bk4.dts index 689c8930dce3..b08d561d6748 100644 --- a/arch/arm/boot/dts/vf610-bk4.dts +++ b/arch/arm/boot/dts/vf610-bk4.dts @@ -110,11 +110,11 @@ bus-num = <3>; status = "okay"; spi-slave; + #address-cells = <0>; - slave@0 { + slave { compatible = "lwn,bk4"; spi-max-frequency = <30000000>; - reg = <0>; }; }; diff --git a/arch/arm/mach-cns3xxx/pcie.c b/arch/arm/mach-cns3xxx/pcie.c index 318394ed5c7a..95a11d5b3587 100644 --- a/arch/arm/mach-cns3xxx/pcie.c +++ b/arch/arm/mach-cns3xxx/pcie.c @@ -83,7 +83,7 @@ static void __iomem *cns3xxx_pci_map_bus(struct pci_bus *bus, } else /* remote PCI bus */ base = cnspci->cfg1_regs + ((busno & 0xf) << 20); - return base + (where & 0xffc) + (devfn << 12); + return base + where + (devfn << 12); } static int cns3xxx_pci_read_config(struct pci_bus *bus, unsigned int devfn, @@ -93,7 +93,7 @@ static int cns3xxx_pci_read_config(struct pci_bus *bus, unsigned int devfn, u32 mask = (0x1ull << (size * 8)) - 1; int shift = (where % 4) * 8; - ret = pci_generic_config_read32(bus, devfn, where, size, val); + ret = pci_generic_config_read(bus, devfn, where, size, val); if (ret == PCIBIOS_SUCCESSFUL && !bus->number && !devfn && (where & 0xffc) == PCI_CLASS_REVISION) diff --git a/arch/arm/mach-iop32x/n2100.c b/arch/arm/mach-iop32x/n2100.c index 3b73813c6b04..23e8c93515d4 100644 --- a/arch/arm/mach-iop32x/n2100.c +++ b/arch/arm/mach-iop32x/n2100.c @@ -75,8 +75,7 @@ void __init n2100_map_io(void) /* * N2100 PCI. */ -static int __init -n2100_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +static int n2100_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int irq; diff --git a/arch/arm/mach-tango/pm.c b/arch/arm/mach-tango/pm.c index 028e50c6383f..a32c3b631484 100644 --- a/arch/arm/mach-tango/pm.c +++ b/arch/arm/mach-tango/pm.c @@ -3,6 +3,7 @@ #include <linux/suspend.h> #include <asm/suspend.h> #include "smc.h" +#include "pm.h" static int tango_pm_powerdown(unsigned long arg) { @@ -24,10 +25,7 @@ static const struct platform_suspend_ops tango_pm_ops = { .valid = suspend_valid_only_mem, }; -static int __init tango_pm_init(void) +void __init tango_pm_init(void) { suspend_set_ops(&tango_pm_ops); - return 0; } - -late_initcall(tango_pm_init); diff --git a/arch/arm/mach-tango/pm.h b/arch/arm/mach-tango/pm.h new file mode 100644 index 000000000000..35ea705a0ee2 --- /dev/null +++ b/arch/arm/mach-tango/pm.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifdef CONFIG_SUSPEND +void __init tango_pm_init(void); +#else +#define tango_pm_init NULL +#endif diff --git a/arch/arm/mach-tango/setup.c b/arch/arm/mach-tango/setup.c index 677dd7b5efd9..824f90737b04 100644 --- a/arch/arm/mach-tango/setup.c +++ b/arch/arm/mach-tango/setup.c @@ -2,6 +2,7 @@ #include <asm/mach/arch.h> #include <asm/hardware/cache-l2x0.h> #include "smc.h" +#include "pm.h" static void tango_l2c_write(unsigned long val, unsigned int reg) { @@ -15,4 +16,5 @@ DT_MACHINE_START(TANGO_DT, "Sigma Tango DT") .dt_compat = tango_dt_compat, .l2c_aux_mask = ~0, .l2c_write_sec = tango_l2c_write, + .init_late = tango_pm_init, MACHINE_END diff --git a/arch/arm/plat-pxa/ssp.c b/arch/arm/plat-pxa/ssp.c index ed36dcab80f1..f51919974183 100644 --- a/arch/arm/plat-pxa/ssp.c +++ b/arch/arm/plat-pxa/ssp.c @@ -190,8 +190,6 @@ static int pxa_ssp_remove(struct platform_device *pdev) if (ssp == NULL) return -ENODEV; - iounmap(ssp->mmio_base); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); release_mem_region(res->start, resource_size(res)); @@ -201,7 +199,6 @@ static int pxa_ssp_remove(struct platform_device *pdev) list_del(&ssp->node); mutex_unlock(&ssp_lock); - kfree(ssp); return 0; } diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index cb44aa290e73..e1d44b903dfc 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -7,7 +7,6 @@ #include <linux/of_address.h> #include <linux/slab.h> #include <linux/types.h> -#include <linux/dma-mapping.h> #include <linux/vmalloc.h> #include <linux/swiotlb.h> diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts index b0c64f75792c..8974b5a1d3b1 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts @@ -188,6 +188,7 @@ reg = <0x3a3>; interrupt-parent = <&r_intc>; interrupts = <0 IRQ_TYPE_LEVEL_LOW>; + x-powers,drive-vbus-en; /* set N_VBUSEN as output pin */ }; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi index 837a03dee875..2abb335145a6 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi @@ -390,7 +390,7 @@ }; video-codec@1c0e000 { - compatible = "allwinner,sun50i-h5-video-engine"; + compatible = "allwinner,sun50i-a64-video-engine"; reg = <0x01c0e000 0x1000>; clocks = <&ccu CLK_BUS_VE>, <&ccu CLK_VE>, <&ccu CLK_DRAM_VE>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi index e14e0ce7e89f..016641a41694 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi @@ -187,8 +187,7 @@ max-frequency = <100000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vddao_3v3>; vqmmc-supply = <&vddio_boot>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts index 8cd50b75171d..ade2ee09ae96 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts @@ -305,8 +305,7 @@ max-frequency = <200000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vddio_ao3v3>; vqmmc-supply = <&vddio_tf>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts index 4cf7f6e80c6a..25105ac96d55 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts @@ -238,8 +238,7 @@ max-frequency = <100000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vddao_3v3>; vqmmc-supply = <&vddio_card>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts index 2e1cd5e3a246..1cc9dc68ef00 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts @@ -258,8 +258,7 @@ max-frequency = <100000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&tflash_vdd>; vqmmc-supply = <&tf_io>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi index ce862266b9aa..0be0f2a5d2fe 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi @@ -196,8 +196,7 @@ max-frequency = <100000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vddao_3v3>; vqmmc-supply = <&vddio_card>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi index 93a4acf2c46c..ad4d50bd9d77 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi @@ -154,8 +154,7 @@ max-frequency = <100000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vcc_3v3>; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi index ec09bb5792b7..2d2db783c44c 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi @@ -211,8 +211,7 @@ max-frequency = <100000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vddao_3v3>; vqmmc-supply = <&vcc_3v3>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts index f1c410e2da2b..796baea7a0bf 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts @@ -131,8 +131,7 @@ max-frequency = <100000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vddao_3v3>; vqmmc-supply = <&vddio_card>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts index db293440e4ca..255cede7b447 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts @@ -238,8 +238,7 @@ max-frequency = <100000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vcc_3v3>; vqmmc-supply = <&vcc_card>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts index 6739697be1de..9cbdb85fb591 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts @@ -183,8 +183,7 @@ max-frequency = <100000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vddao_3v3>; vqmmc-supply = <&vddio_card>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi index a1b31013ab6e..bc811a2faf42 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi @@ -137,8 +137,7 @@ max-frequency = <100000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vddao_3v3>; vqmmc-supply = <&vddio_boot>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts index 3c3a667a8df8..3f086ed7de05 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts @@ -356,8 +356,7 @@ max-frequency = <100000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vddao_3v3>; vqmmc-supply = <&vddio_boot>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts index f7a1cffab4a8..8acfd40090d2 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts @@ -147,8 +147,7 @@ max-frequency = <100000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vddao_3v3>; vqmmc-supply = <&vddio_boot>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts index 7212dc4531e4..7fa20a8ede17 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts @@ -170,8 +170,7 @@ max-frequency = <100000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vddao_3v3>; vqmmc-supply = <&vddio_boot>; diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 99b7495455a6..838e32cc14c9 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -404,7 +404,7 @@ }; intc: interrupt-controller@9bc0000 { - compatible = "arm,gic-v3"; + compatible = "qcom,msm8996-gic-v3", "arm,gic-v3"; #interrupt-cells = <3>; interrupt-controller; #redistributor-regions = <1>; diff --git a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi index 20745a8528c5..719ed9d9067d 100644 --- a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi @@ -1011,6 +1011,9 @@ <&cpg CPG_CORE R8A774A1_CLK_S3D1>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; + dmas = <&dmac1 0x13>, <&dmac1 0x12>, + <&dmac2 0x13>, <&dmac2 0x12>; + dma-names = "tx", "rx", "tx", "rx"; power-domains = <&sysc R8A774A1_PD_ALWAYS_ON>; resets = <&cpg 310>; status = "disabled"; diff --git a/arch/arm64/boot/dts/renesas/r8a7796.dtsi b/arch/arm64/boot/dts/renesas/r8a7796.dtsi index afedbf5728ec..0648d12778ed 100644 --- a/arch/arm64/boot/dts/renesas/r8a7796.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a7796.dtsi @@ -1262,6 +1262,9 @@ <&cpg CPG_CORE R8A7796_CLK_S3D1>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; + dmas = <&dmac1 0x13>, <&dmac1 0x12>, + <&dmac2 0x13>, <&dmac2 0x12>; + dma-names = "tx", "rx", "tx", "rx"; power-domains = <&sysc R8A7796_PD_ALWAYS_ON>; resets = <&cpg 310>; status = "disabled"; diff --git a/arch/arm64/boot/dts/renesas/r8a77965.dtsi b/arch/arm64/boot/dts/renesas/r8a77965.dtsi index 6dc9b1fef830..4b3730f640ef 100644 --- a/arch/arm64/boot/dts/renesas/r8a77965.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77965.dtsi @@ -1068,6 +1068,9 @@ <&cpg CPG_CORE R8A77965_CLK_S3D1>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; + dmas = <&dmac1 0x13>, <&dmac1 0x12>, + <&dmac2 0x13>, <&dmac2 0x12>; + dma-names = "tx", "rx", "tx", "rx"; power-domains = <&sysc R8A77965_PD_ALWAYS_ON>; resets = <&cpg 310>; status = "disabled"; diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 29cdc99688f3..9859e1178e6b 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -299,8 +299,10 @@ int swsusp_arch_suspend(void) dcache_clean_range(__idmap_text_start, __idmap_text_end); /* Clean kvm setup code to PoC? */ - if (el2_reset_needed()) + if (el2_reset_needed()) { dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end); + dcache_clean_range(__hyp_text_start, __hyp_text_end); + } /* make the crash dump kernel image protected again */ crash_post_resume(); diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index e1261fbaa374..17f325ba831e 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -28,6 +28,8 @@ #include <asm/virt.h> .text + .pushsection .hyp.text, "ax" + .align 11 ENTRY(__hyp_stub_vectors) diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index ba6b41790fcd..b09b6f75f759 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -88,6 +88,7 @@ u64 __init kaslr_early_init(u64 dt_phys) * we end up running with module randomization disabled. */ module_alloc_base = (u64)_etext - MODULES_VSIZE; + __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base)); /* * Try to map the FDT early. If this fails, we simply bail, diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index f2c211a6229b..58871333737a 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -120,10 +120,12 @@ static int create_dtb(struct kimage *image, { void *buf; size_t buf_size; + size_t cmdline_len; int ret; + cmdline_len = cmdline ? strlen(cmdline) : 0; buf_size = fdt_totalsize(initial_boot_params) - + strlen(cmdline) + DTB_EXTRA_SPACE; + + cmdline_len + DTB_EXTRA_SPACE; for (;;) { buf = vmalloc(buf_size); diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 2a5b338b2542..f17afb99890c 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -478,13 +478,13 @@ bool arch_within_kprobe_blacklist(unsigned long addr) addr < (unsigned long)__entry_text_end) || (addr >= (unsigned long)__idmap_text_start && addr < (unsigned long)__idmap_text_end) || + (addr >= (unsigned long)__hyp_text_start && + addr < (unsigned long)__hyp_text_end) || !!search_exception_tables(addr)) return true; if (!is_kernel_in_hyp_mode()) { - if ((addr >= (unsigned long)__hyp_text_start && - addr < (unsigned long)__hyp_text_end) || - (addr >= (unsigned long)__hyp_idmap_text_start && + if ((addr >= (unsigned long)__hyp_idmap_text_start && addr < (unsigned long)__hyp_idmap_text_end)) return true; } diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index fcb1f2a6d7c6..99bb8facb5cb 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -286,74 +286,73 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, } -static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start) +static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start, + unsigned long end) { - pte_t *ptep = pte_offset_kernel(pmdp, 0UL); - unsigned long addr; - unsigned i; + unsigned long addr = start; + pte_t *ptep = pte_offset_kernel(pmdp, start); - for (i = 0; i < PTRS_PER_PTE; i++, ptep++) { - addr = start + i * PAGE_SIZE; + do { note_page(st, addr, 4, READ_ONCE(pte_val(*ptep))); - } + } while (ptep++, addr += PAGE_SIZE, addr != end); } -static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start) +static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start, + unsigned long end) { - pmd_t *pmdp = pmd_offset(pudp, 0UL); - unsigned long addr; - unsigned i; + unsigned long next, addr = start; + pmd_t *pmdp = pmd_offset(pudp, start); - for (i = 0; i < PTRS_PER_PMD; i++, pmdp++) { + do { pmd_t pmd = READ_ONCE(*pmdp); + next = pmd_addr_end(addr, end); - addr = start + i * PMD_SIZE; if (pmd_none(pmd) || pmd_sect(pmd)) { note_page(st, addr, 3, pmd_val(pmd)); } else { BUG_ON(pmd_bad(pmd)); - walk_pte(st, pmdp, addr); + walk_pte(st, pmdp, addr, next); } - } + } while (pmdp++, addr = next, addr != end); } -static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start) +static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start, + unsigned long end) { - pud_t *pudp = pud_offset(pgdp, 0UL); - unsigned long addr; - unsigned i; + unsigned long next, addr = start; + pud_t *pudp = pud_offset(pgdp, start); - for (i = 0; i < PTRS_PER_PUD; i++, pudp++) { + do { pud_t pud = READ_ONCE(*pudp); + next = pud_addr_end(addr, end); - addr = start + i * PUD_SIZE; if (pud_none(pud) || pud_sect(pud)) { note_page(st, addr, 2, pud_val(pud)); } else { BUG_ON(pud_bad(pud)); - walk_pmd(st, pudp, addr); + walk_pmd(st, pudp, addr, next); } - } + } while (pudp++, addr = next, addr != end); } static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long start) { - pgd_t *pgdp = pgd_offset(mm, 0UL); - unsigned i; - unsigned long addr; + unsigned long end = (start < TASK_SIZE_64) ? TASK_SIZE_64 : 0; + unsigned long next, addr = start; + pgd_t *pgdp = pgd_offset(mm, start); - for (i = 0; i < PTRS_PER_PGD; i++, pgdp++) { + do { pgd_t pgd = READ_ONCE(*pgdp); + next = pgd_addr_end(addr, end); - addr = start + i * PGDIR_SIZE; if (pgd_none(pgd)) { note_page(st, addr, 1, pgd_val(pgd)); } else { BUG_ON(pgd_bad(pgd)); - walk_pud(st, pgdp, addr); + walk_pud(st, pgdp, addr, next); } - } + } while (pgdp++, addr = next, addr != end); } void ptdump_walk_pgd(struct seq_file *m, struct ptdump_info *info) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 30695a868107..5c9073bace83 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -33,7 +33,11 @@ void sync_icache_aliases(void *kaddr, unsigned long len) __clean_dcache_area_pou(kaddr, len); __flush_icache_all(); } else { - flush_icache_range(addr, addr + len); + /* + * Don't issue kick_all_cpus_sync() after I-cache invalidation + * for user mappings. + */ + __flush_icache_range(addr, addr + len); } } diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index 33a2c94fed0d..63b4a1705182 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -30,6 +30,7 @@ generic-y += pgalloc.h generic-y += preempt.h generic-y += segment.h generic-y += serial.h +generic-y += shmparam.h generic-y += tlbflush.h generic-y += topology.h generic-y += trace_clock.h diff --git a/arch/c6x/include/uapi/asm/Kbuild b/arch/c6x/include/uapi/asm/Kbuild index 6c6f6301012e..0febf1a07c30 100644 --- a/arch/c6x/include/uapi/asm/Kbuild +++ b/arch/c6x/include/uapi/asm/Kbuild @@ -1,5 +1,4 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += kvm_para.h -generic-y += shmparam.h generic-y += ucontext.h diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild index cd400d353d18..961c1dc064e1 100644 --- a/arch/h8300/include/asm/Kbuild +++ b/arch/h8300/include/asm/Kbuild @@ -40,6 +40,7 @@ generic-y += preempt.h generic-y += scatterlist.h generic-y += sections.h generic-y += serial.h +generic-y += shmparam.h generic-y += sizes.h generic-y += spinlock.h generic-y += timex.h diff --git a/arch/h8300/include/uapi/asm/Kbuild b/arch/h8300/include/uapi/asm/Kbuild index 6c6f6301012e..0febf1a07c30 100644 --- a/arch/h8300/include/uapi/asm/Kbuild +++ b/arch/h8300/include/uapi/asm/Kbuild @@ -1,5 +1,4 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += kvm_para.h -generic-y += shmparam.h generic-y += ucontext.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index 47c4da3d64a4..b25fd42aa0f4 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -30,6 +30,7 @@ generic-y += rwsem.h generic-y += sections.h generic-y += segment.h generic-y += serial.h +generic-y += shmparam.h generic-y += sizes.h generic-y += topology.h generic-y += trace_clock.h diff --git a/arch/hexagon/include/uapi/asm/Kbuild b/arch/hexagon/include/uapi/asm/Kbuild index 61d955c1747a..c1b06dcf6cf8 100644 --- a/arch/hexagon/include/uapi/asm/Kbuild +++ b/arch/hexagon/include/uapi/asm/Kbuild @@ -1,4 +1,3 @@ include include/uapi/asm-generic/Kbuild.asm -generic-y += shmparam.h generic-y += ucontext.h diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c index 38049357d6d3..40712e49381b 100644 --- a/arch/m68k/emu/nfblock.c +++ b/arch/m68k/emu/nfblock.c @@ -155,18 +155,22 @@ out: static int __init nfhd_init(void) { u32 blocks, bsize; + int ret; int i; nfhd_id = nf_get_id("XHDI"); if (!nfhd_id) return -ENODEV; - major_num = register_blkdev(major_num, "nfhd"); - if (major_num <= 0) { + ret = register_blkdev(major_num, "nfhd"); + if (ret < 0) { pr_warn("nfhd: unable to get major number\n"); - return major_num; + return ret; } + if (!major_num) + major_num = ret; + for (i = NFHD_DEV_OFFSET; i < 24; i++) { if (nfhd_get_capacity(i, 0, &blocks, &bsize)) continue; diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 9f1dd26903e3..95f8f631c4df 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -20,6 +20,7 @@ generic-y += mm-arch-hooks.h generic-y += percpu.h generic-y += preempt.h generic-y += sections.h +generic-y += shmparam.h generic-y += spinlock.h generic-y += topology.h generic-y += trace_clock.h diff --git a/arch/m68k/include/uapi/asm/Kbuild b/arch/m68k/include/uapi/asm/Kbuild index b8b3525271fa..960bf1e4be53 100644 --- a/arch/m68k/include/uapi/asm/Kbuild +++ b/arch/m68k/include/uapi/asm/Kbuild @@ -2,4 +2,3 @@ include include/uapi/asm-generic/Kbuild.asm generated-y += unistd_32.h generic-y += kvm_para.h -generic-y += shmparam.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 9c7d1d25bf3d..791cc8d54d0a 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -26,6 +26,7 @@ generic-y += parport.h generic-y += percpu.h generic-y += preempt.h generic-y += serial.h +generic-y += shmparam.h generic-y += syscalls.h generic-y += topology.h generic-y += trace_clock.h diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild index 28823e3db825..97823ec46e97 100644 --- a/arch/microblaze/include/uapi/asm/Kbuild +++ b/arch/microblaze/include/uapi/asm/Kbuild @@ -2,5 +2,4 @@ include include/uapi/asm-generic/Kbuild.asm generated-y += unistd_32.h generic-y += kvm_para.h -generic-y += shmparam.h generic-y += ucontext.h diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 0d14f51d0002..a84c24d894aa 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1403,6 +1403,21 @@ config LOONGSON3_ENHANCEMENT please say 'N' here. If you want a high-performance kernel to run on new Loongson 3 machines only, please say 'Y' here. +config CPU_LOONGSON3_WORKAROUNDS + bool "Old Loongson 3 LLSC Workarounds" + default y if SMP + depends on CPU_LOONGSON3 + help + Loongson 3 processors have the llsc issues which require workarounds. + Without workarounds the system may hang unexpectedly. + + Newer Loongson 3 will fix these issues and no workarounds are needed. + The workarounds have no significant side effect on them but may + decrease the performance of the system so this option should be + disabled unless the kernel is intended to be run on old systems. + + If unsure, please say Y. + config CPU_LOONGSON2E bool "Loongson 2E" depends on SYS_HAS_CPU_LOONGSON2E diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts index 50cff3cbcc6d..4f7b1fa31cf5 100644 --- a/arch/mips/boot/dts/ingenic/ci20.dts +++ b/arch/mips/boot/dts/ingenic/ci20.dts @@ -76,7 +76,7 @@ status = "okay"; pinctrl-names = "default"; - pinctrl-0 = <&pins_uart2>; + pinctrl-0 = <&pins_uart3>; }; &uart4 { @@ -196,9 +196,9 @@ bias-disable; }; - pins_uart2: uart2 { - function = "uart2"; - groups = "uart2-data", "uart2-hwflow"; + pins_uart3: uart3 { + function = "uart3"; + groups = "uart3-data", "uart3-hwflow"; bias-disable; }; diff --git a/arch/mips/boot/dts/ingenic/jz4740.dtsi b/arch/mips/boot/dts/ingenic/jz4740.dtsi index 6fb16fd24035..2beb78a62b7d 100644 --- a/arch/mips/boot/dts/ingenic/jz4740.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4740.dtsi @@ -161,7 +161,7 @@ #dma-cells = <2>; interrupt-parent = <&intc>; - interrupts = <29>; + interrupts = <20>; clocks = <&cgu JZ4740_CLK_DMA>; diff --git a/arch/mips/boot/dts/xilfpga/nexys4ddr.dts b/arch/mips/boot/dts/xilfpga/nexys4ddr.dts index 2152b7ba65fb..cc8dbea0911f 100644 --- a/arch/mips/boot/dts/xilfpga/nexys4ddr.dts +++ b/arch/mips/boot/dts/xilfpga/nexys4ddr.dts @@ -90,11 +90,11 @@ interrupts = <0>; }; - axi_i2c: i2c@10A00000 { + axi_i2c: i2c@10a00000 { compatible = "xlnx,xps-iic-2.00.a"; interrupt-parent = <&axi_intc>; interrupts = <4>; - reg = < 0x10A00000 0x10000 >; + reg = < 0x10a00000 0x10000 >; clocks = <&ext>; xlnx,clk-freq = <0x5f5e100>; xlnx,family = "Artix7"; @@ -106,9 +106,9 @@ #address-cells = <1>; #size-cells = <0>; - ad7420@4B { + ad7420@4b { compatible = "adi,adt7420"; - reg = <0x4B>; + reg = <0x4b>; }; } ; }; diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 43fcd35e2957..94096299fc56 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -58,6 +58,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ if (kernel_uses_llsc) { \ int temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set push \n" \ " .set "MIPS_ISA_LEVEL" \n" \ @@ -85,6 +86,7 @@ static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v) \ if (kernel_uses_llsc) { \ int temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set push \n" \ " .set "MIPS_ISA_LEVEL" \n" \ @@ -118,6 +120,7 @@ static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v) \ if (kernel_uses_llsc) { \ int temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set push \n" \ " .set "MIPS_ISA_LEVEL" \n" \ @@ -256,6 +259,7 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ if (kernel_uses_llsc) { \ long temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set push \n" \ " .set "MIPS_ISA_LEVEL" \n" \ @@ -283,6 +287,7 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ if (kernel_uses_llsc) { \ long temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set push \n" \ " .set "MIPS_ISA_LEVEL" \n" \ @@ -316,6 +321,7 @@ static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \ if (kernel_uses_llsc) { \ long temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set push \n" \ " .set "MIPS_ISA_LEVEL" \n" \ diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index a5eb1bb199a7..b7f6ac5e513c 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -222,6 +222,42 @@ #define __smp_mb__before_atomic() __smp_mb__before_llsc() #define __smp_mb__after_atomic() smp_llsc_mb() +/* + * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load, + * store or pref) in between an ll & sc can cause the sc instruction to + * erroneously succeed, breaking atomicity. Whilst it's unusual to write code + * containing such sequences, this bug bites harder than we might otherwise + * expect due to reordering & speculation: + * + * 1) A memory access appearing prior to the ll in program order may actually + * be executed after the ll - this is the reordering case. + * + * In order to avoid this we need to place a memory barrier (ie. a sync + * instruction) prior to every ll instruction, in between it & any earlier + * memory access instructions. Many of these cases are already covered by + * smp_mb__before_llsc() but for the remaining cases, typically ones in + * which multiple CPUs may operate on a memory location but ordering is not + * usually guaranteed, we use loongson_llsc_mb() below. + * + * This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later. + * + * 2) If a conditional branch exists between an ll & sc with a target outside + * of the ll-sc loop, for example an exit upon value mismatch in cmpxchg() + * or similar, then misprediction of the branch may allow speculative + * execution of memory accesses from outside of the ll-sc loop. + * + * In order to avoid this we need a memory barrier (ie. a sync instruction) + * at each affected branch target, for which we also use loongson_llsc_mb() + * defined below. + * + * This case affects all current Loongson 3 CPUs. + */ +#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */ +#define loongson_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") +#else +#define loongson_llsc_mb() do { } while (0) +#endif + #include <asm-generic/barrier.h> #endif /* __ASM_BARRIER_H */ diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h index c4675957b21b..830c93a010c3 100644 --- a/arch/mips/include/asm/bitops.h +++ b/arch/mips/include/asm/bitops.h @@ -69,6 +69,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) : "ir" (1UL << bit), GCC_OFF_SMALL_ASM() (*m)); #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) } else if (kernel_uses_llsc && __builtin_constant_p(bit)) { + loongson_llsc_mb(); do { __asm__ __volatile__( " " __LL "%0, %1 # set_bit \n" @@ -79,6 +80,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) } while (unlikely(!temp)); #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */ } else if (kernel_uses_llsc) { + loongson_llsc_mb(); do { __asm__ __volatile__( " .set push \n" @@ -123,6 +125,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) : "ir" (~(1UL << bit))); #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) } else if (kernel_uses_llsc && __builtin_constant_p(bit)) { + loongson_llsc_mb(); do { __asm__ __volatile__( " " __LL "%0, %1 # clear_bit \n" @@ -133,6 +136,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) } while (unlikely(!temp)); #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */ } else if (kernel_uses_llsc) { + loongson_llsc_mb(); do { __asm__ __volatile__( " .set push \n" @@ -193,6 +197,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr) unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); unsigned long temp; + loongson_llsc_mb(); do { __asm__ __volatile__( " .set push \n" diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h index c14d798f3888..b83b0397462d 100644 --- a/arch/mips/include/asm/futex.h +++ b/arch/mips/include/asm/futex.h @@ -50,6 +50,7 @@ "i" (-EFAULT) \ : "memory"); \ } else if (cpu_has_llsc) { \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set push \n" \ " .set noat \n" \ @@ -163,6 +164,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, "i" (-EFAULT) : "memory"); } else if (cpu_has_llsc) { + loongson_llsc_mb(); __asm__ __volatile__( "# futex_atomic_cmpxchg_inatomic \n" " .set push \n" @@ -192,6 +194,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT) : "memory"); + loongson_llsc_mb(); } else return -ENOSYS; diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 57933fc8fd98..910851c62db3 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -228,6 +228,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) : [global] "r" (page_global)); } else if (kernel_uses_llsc) { + loongson_llsc_mb(); __asm__ __volatile__ ( " .set push \n" " .set "MIPS_ISA_ARCH_LEVEL" \n" @@ -242,6 +243,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) " .set pop \n" : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) : [global] "r" (page_global)); + loongson_llsc_mb(); } #else /* !CONFIG_SMP */ if (pte_none(*buddy)) diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c index 8f5bd04f320a..7f3f136572de 100644 --- a/arch/mips/kernel/mips-cm.c +++ b/arch/mips/kernel/mips-cm.c @@ -457,5 +457,5 @@ void mips_cm_error_report(void) } /* reprime cause register */ - write_gcr_error_cause(0); + write_gcr_error_cause(cm_error); } diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 6829a064aac8..339870ed92f7 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -371,7 +371,7 @@ static inline int is_sp_move_ins(union mips_instruction *ip, int *frame_size) static int get_frame_info(struct mips_frame_info *info) { bool is_mmips = IS_ENABLED(CONFIG_CPU_MICROMIPS); - union mips_instruction insn, *ip, *ip_end; + union mips_instruction insn, *ip; const unsigned int max_insns = 128; unsigned int last_insn_size = 0; unsigned int i; @@ -384,10 +384,9 @@ static int get_frame_info(struct mips_frame_info *info) if (!ip) goto err; - ip_end = (void *)ip + info->func_size; - - for (i = 0; i < max_insns && ip < ip_end; i++) { + for (i = 0; i < max_insns; i++) { ip = (void *)ip + last_insn_size; + if (is_mmips && mm_insn_16bit(ip->halfword[0])) { insn.word = ip->halfword[0] << 16; last_insn_size = 2; diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform index 0fce4608aa88..c1a4d4dc4665 100644 --- a/arch/mips/loongson64/Platform +++ b/arch/mips/loongson64/Platform @@ -23,6 +23,29 @@ ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS endif cflags-$(CONFIG_CPU_LOONGSON3) += -Wa,--trap + +# +# Some versions of binutils, not currently mainline as of 2019/02/04, support +# an -mfix-loongson3-llsc flag which emits a sync prior to each ll instruction +# to work around a CPU bug (see loongson_llsc_mb() in asm/barrier.h for a +# description). +# +# We disable this in order to prevent the assembler meddling with the +# instruction that labels refer to, ie. if we label an ll instruction: +# +# 1: ll v0, 0(a0) +# +# ...then with the assembler fix applied the label may actually point at a sync +# instruction inserted by the assembler, and if we were using the label in an +# exception table the table would no longer contain the address of the ll +# instruction. +# +# Avoid this by explicitly disabling that assembler behaviour. If upstream +# binutils does not merge support for the flag then we can revisit & remove +# this later - for now it ensures vendor toolchains don't cause problems. +# +cflags-$(CONFIG_CPU_LOONGSON3) += $(call as-option,-Wa$(comma)-mno-fix-loongson3-llsc,) + # # binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a # as MIPS64 R2; older versions as just R1. This leaves the possibility open diff --git a/arch/mips/loongson64/common/reset.c b/arch/mips/loongson64/common/reset.c index a60715e11306..b26892ce871c 100644 --- a/arch/mips/loongson64/common/reset.c +++ b/arch/mips/loongson64/common/reset.c @@ -59,7 +59,12 @@ static void loongson_poweroff(void) { #ifndef CONFIG_LEFI_FIRMWARE_INTERFACE mach_prepare_shutdown(); - unreachable(); + + /* + * It needs a wait loop here, but mips/kernel/reset.c already calls + * a generic delay loop, machine_hang(), so simply return. + */ + return; #else void (*fw_poweroff)(void) = (void *)loongson_sysconf.poweroff_addr; diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 37b1cb246332..65b6e85447b1 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -932,6 +932,8 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, * to mimic that here by taking a load/istream page * fault. */ + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(p, 0); UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0); uasm_i_jr(p, ptr); @@ -1646,6 +1648,8 @@ static void iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr) { #ifdef CONFIG_SMP + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(p, 0); # ifdef CONFIG_PHYS_ADDR_T_64BIT if (cpu_has_64bits) uasm_i_lld(p, pte, 0, ptr); @@ -2259,6 +2263,8 @@ static void build_r4000_tlb_load_handler(void) #endif uasm_l_nopage_tlbl(&l, p); + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(&p, 0); build_restore_work_registers(&p); #ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_0 & 1) { @@ -2313,6 +2319,8 @@ static void build_r4000_tlb_store_handler(void) #endif uasm_l_nopage_tlbs(&l, p); + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(&p, 0); build_restore_work_registers(&p); #ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_1 & 1) { @@ -2368,6 +2376,8 @@ static void build_r4000_tlb_modify_handler(void) #endif uasm_l_nopage_tlbm(&l, p); + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(&p, 0); build_restore_work_registers(&p); #ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_1 & 1) { diff --git a/arch/mips/pci/pci-octeon.c b/arch/mips/pci/pci-octeon.c index 5017d5843c5a..fc29b85cfa92 100644 --- a/arch/mips/pci/pci-octeon.c +++ b/arch/mips/pci/pci-octeon.c @@ -568,6 +568,11 @@ static int __init octeon_pci_setup(void) if (octeon_has_feature(OCTEON_FEATURE_PCIE)) return 0; + if (!octeon_is_pci_host()) { + pr_notice("Not in host mode, PCI Controller not initialized\n"); + return 0; + } + /* Point pcibios_map_irq() to the PCI version of it */ octeon_pcibios_map_irq = octeon_pci_pcibios_map_irq; @@ -579,11 +584,6 @@ static int __init octeon_pci_setup(void) else octeon_dma_bar_type = OCTEON_DMA_BAR_TYPE_BIG; - if (!octeon_is_pci_host()) { - pr_notice("Not in host mode, PCI Controller not initialized\n"); - return 0; - } - /* PCI I/O and PCI MEM values */ set_io_port_base(OCTEON_PCI_IOSPACE_BASE); ioport_resource.start = 0; diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index f6fd340e39c2..0ede4deb8181 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -8,6 +8,7 @@ ccflags-vdso := \ $(filter -E%,$(KBUILD_CFLAGS)) \ $(filter -mmicromips,$(KBUILD_CFLAGS)) \ $(filter -march=%,$(KBUILD_CFLAGS)) \ + $(filter -m%-float,$(KBUILD_CFLAGS)) \ -D__VDSO__ ifdef CONFIG_CC_IS_CLANG @@ -129,7 +130,7 @@ $(obj)/%-o32.o: $(src)/%.c FORCE $(call cmd,force_checksrc) $(call if_changed_rule,cc_o_c) -$(obj)/vdso-o32.lds: KBUILD_CPPFLAGS := -mabi=32 +$(obj)/vdso-o32.lds: KBUILD_CPPFLAGS := $(ccflags-vdso) -mabi=32 $(obj)/vdso-o32.lds: $(src)/vdso.lds.S FORCE $(call if_changed_dep,cpp_lds_S) @@ -169,7 +170,7 @@ $(obj)/%-n32.o: $(src)/%.c FORCE $(call cmd,force_checksrc) $(call if_changed_rule,cc_o_c) -$(obj)/vdso-n32.lds: KBUILD_CPPFLAGS := -mabi=n32 +$(obj)/vdso-n32.lds: KBUILD_CPPFLAGS := $(ccflags-vdso) -mabi=n32 $(obj)/vdso-n32.lds: $(src)/vdso.lds.S FORCE $(call if_changed_dep,cpp_lds_S) diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index eb87cd8327c8..1f04844b6b82 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -34,6 +34,7 @@ generic-y += qrwlock_types.h generic-y += qrwlock.h generic-y += sections.h generic-y += segment.h +generic-y += shmparam.h generic-y += string.h generic-y += switch_to.h generic-y += topology.h diff --git a/arch/openrisc/include/uapi/asm/Kbuild b/arch/openrisc/include/uapi/asm/Kbuild index 6c6f6301012e..0febf1a07c30 100644 --- a/arch/openrisc/include/uapi/asm/Kbuild +++ b/arch/openrisc/include/uapi/asm/Kbuild @@ -1,5 +1,4 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += kvm_para.h -generic-y += shmparam.h generic-y += ucontext.h diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 2e6ada28da64..c9bfe526ca9d 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1258,21 +1258,13 @@ extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, #define pmd_move_must_withdraw pmd_move_must_withdraw struct spinlock; -static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, - struct spinlock *old_pmd_ptl, - struct vm_area_struct *vma) -{ - if (radix_enabled()) - return false; - /* - * Archs like ppc64 use pgtable to store per pmd - * specific information. So when we switch the pmd, - * we should also withdraw and deposit the pgtable - */ - return true; -} - - +extern int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, + struct spinlock *old_pmd_ptl, + struct vm_area_struct *vma); +/* + * Hash translation mode use the deposited table to store hash pte + * slot information. + */ #define arch_needs_pgtable_deposit arch_needs_pgtable_deposit static inline bool arch_needs_pgtable_deposit(void) { diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index f3c31f5e1026..ecd31569a120 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -400,3 +400,25 @@ void arch_report_meminfo(struct seq_file *m) atomic_long_read(&direct_pages_count[MMU_PAGE_1G]) << 20); } #endif /* CONFIG_PROC_FS */ + +/* + * For hash translation mode, we use the deposited table to store hash slot + * information and they are stored at PTRS_PER_PMD offset from related pmd + * location. Hence a pmd move requires deposit and withdraw. + * + * For radix translation with split pmd ptl, we store the deposited table in the + * pmd page. Hence if we have different pmd page we need to withdraw during pmd + * move. + * + * With hash we use deposited table always irrespective of anon or not. + * With radix we use deposited table only for anonymous mapping. + */ +int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, + struct spinlock *old_pmd_ptl, + struct vm_area_struct *vma) +{ + if (radix_enabled()) + return (new_pmd_ptl != old_pmd_ptl) && vma_is_anonymous(vma); + + return true; +} diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 7d6457ab5d34..bba281b1fe1b 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -43,6 +43,7 @@ static int drc_pmem_bind(struct papr_scm_priv *p) { unsigned long ret[PLPAR_HCALL_BUFSIZE]; uint64_t rc, token; + uint64_t saved = 0; /* * When the hypervisor cannot map all the requested memory in a single @@ -56,6 +57,8 @@ static int drc_pmem_bind(struct papr_scm_priv *p) rc = plpar_hcall(H_SCM_BIND_MEM, ret, p->drc_index, 0, p->blocks, BIND_ANY_ADDR, token); token = ret[0]; + if (!saved) + saved = ret[1]; cond_resched(); } while (rc == H_BUSY); @@ -64,7 +67,7 @@ static int drc_pmem_bind(struct papr_scm_priv *p) return -ENXIO; } - p->bound_addr = ret[1]; + p->bound_addr = saved; dev_dbg(&p->pdev->dev, "bound drc %x to %pR\n", p->drc_index, &p->res); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index e64c657060bb..bd149905a5b5 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -104,7 +104,7 @@ choice prompt "Base ISA" default ARCH_RV64I help - This selects the base ISA that this kernel will traget and must match + This selects the base ISA that this kernel will target and must match the target platform. config ARCH_RV32I diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index f399659d3b8d..2fd3461e50ab 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -13,8 +13,6 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y CONFIG_BPF_SYSCALL=y CONFIG_SMP=y -CONFIG_PCI=y -CONFIG_PCIE_XILINX=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_NET=y @@ -28,6 +26,10 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y CONFIG_NETLINK_DIAG=y +CONFIG_PCI=y +CONFIG_PCIEPORTBUS=y +CONFIG_PCI_HOST_GENERIC=y +CONFIG_PCIE_XILINX=y CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_LOOP=y CONFIG_VIRTIO_BLK=y @@ -63,7 +65,6 @@ CONFIG_USB_STORAGE=y CONFIG_USB_UAS=y CONFIG_VIRTIO_MMIO=y CONFIG_SIFIVE_PLIC=y -CONFIG_RAS=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_AUTOFS4_FS=y @@ -77,5 +78,6 @@ CONFIG_NFS_V4_1=y CONFIG_NFS_V4_2=y CONFIG_ROOT_NFS=y CONFIG_CRYPTO_USER_API_HASH=y +CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_PRINTK_TIME=y # CONFIG_RCU_TRACE is not set diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 06cfbb3aacbb..2a546a52f02a 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -80,7 +80,7 @@ typedef struct page *pgtable_t; #define __pgd(x) ((pgd_t) { (x) }) #define __pgprot(x) ((pgprot_t) { (x) }) -#ifdef CONFIG_64BITS +#ifdef CONFIG_64BIT #define PTE_FMT "%016lx" #else #define PTE_FMT "%08lx" diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h index 2fa2942be221..470755cb7558 100644 --- a/arch/riscv/include/asm/pgtable-bits.h +++ b/arch/riscv/include/asm/pgtable-bits.h @@ -35,6 +35,12 @@ #define _PAGE_SPECIAL _PAGE_SOFT #define _PAGE_TABLE _PAGE_PRESENT +/* + * _PAGE_PROT_NONE is set on not-present pages (and ignored by the hardware) to + * distinguish them from swapped out pages + */ +#define _PAGE_PROT_NONE _PAGE_READ + #define _PAGE_PFN_SHIFT 10 /* Set of bits to preserve across pte_modify() */ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 16301966d65b..a8179a8c1491 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -44,7 +44,7 @@ /* Page protection bits */ #define _PAGE_BASE (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER) -#define PAGE_NONE __pgprot(0) +#define PAGE_NONE __pgprot(_PAGE_PROT_NONE) #define PAGE_READ __pgprot(_PAGE_BASE | _PAGE_READ) #define PAGE_WRITE __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_WRITE) #define PAGE_EXEC __pgprot(_PAGE_BASE | _PAGE_EXEC) @@ -98,7 +98,7 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; static inline int pmd_present(pmd_t pmd) { - return (pmd_val(pmd) & _PAGE_PRESENT); + return (pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROT_NONE)); } static inline int pmd_none(pmd_t pmd) @@ -178,7 +178,7 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long addr) static inline int pte_present(pte_t pte) { - return (pte_val(pte) & _PAGE_PRESENT); + return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE)); } static inline int pte_none(pte_t pte) @@ -380,7 +380,7 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, * * Format of swap PTE: * bit 0: _PAGE_PRESENT (zero) - * bit 1: reserved for future use (zero) + * bit 1: _PAGE_PROT_NONE (zero) * bits 2 to 6: swap type * bits 7 to XLEN-1: swap offset */ diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 0531f49af5c3..ce70bceb8872 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -22,7 +22,7 @@ * This decides where the kernel will search for a free chunk of vm * space during mmap's. */ -#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE >> 1) +#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3) #define STACK_TOP TASK_SIZE #define STACK_TOP_MAX STACK_TOP diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index 6a92a2fe198e..dac98348c6a3 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -39,6 +39,7 @@ void asm_offsets(void) OFFSET(TASK_STACK, task_struct, stack); OFFSET(TASK_TI, task_struct, thread_info); OFFSET(TASK_TI_FLAGS, task_struct, thread_info.flags); + OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count); OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp); OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp); OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 355166f57205..fd9b57c8b4ce 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -144,6 +144,10 @@ _save_context: REG_L x2, PT_SP(sp) .endm +#if !IS_ENABLED(CONFIG_PREEMPT) +.set resume_kernel, restore_all +#endif + ENTRY(handle_exception) SAVE_ALL @@ -228,7 +232,7 @@ ret_from_exception: REG_L s0, PT_SSTATUS(sp) csrc sstatus, SR_SIE andi s0, s0, SR_SPP - bnez s0, restore_all + bnez s0, resume_kernel resume_userspace: /* Interrupts must be disabled here so flags are checked atomically */ @@ -250,6 +254,18 @@ restore_all: RESTORE_ALL sret +#if IS_ENABLED(CONFIG_PREEMPT) +resume_kernel: + REG_L s0, TASK_TI_PREEMPT_COUNT(tp) + bnez s0, restore_all +need_resched: + REG_L s0, TASK_TI_FLAGS(tp) + andi s0, s0, _TIF_NEED_RESCHED + beqz s0, restore_all + call preempt_schedule_irq + j need_resched +#endif + work_pending: /* Enter slow path for supplementary processing */ la ra, ret_from_exception diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 6e079e94b638..77564310235f 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -181,7 +181,7 @@ static void __init setup_bootmem(void) BUG_ON(mem_size == 0); set_max_mapnr(PFN_DOWN(mem_size)); - max_low_pfn = memblock_end_of_DRAM(); + max_low_pfn = PFN_DOWN(memblock_end_of_DRAM()); #ifdef CONFIG_BLK_DEV_INITRD setup_initrd(); diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index fc185ecabb0a..18cda0e8cf94 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -57,15 +57,12 @@ void __init setup_smp(void) while ((dn = of_find_node_by_type(dn, "cpu"))) { hart = riscv_of_processor_hartid(dn); - if (hart < 0) { - of_node_put(dn); + if (hart < 0) continue; - } if (hart == cpuid_to_hartid_map(0)) { BUG_ON(found_boot_cpu); found_boot_cpu = 1; - of_node_put(dn); continue; } @@ -73,7 +70,6 @@ void __init setup_smp(void) set_cpu_possible(cpuid, true); set_cpu_present(cpuid, true); cpuid++; - of_node_put(dn); } BUG_ON(!found_boot_cpu); diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 1e1395d63dab..65df1dfdc303 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -18,8 +18,6 @@ #include <asm/cache.h> #include <asm/thread_info.h> -#define MAX_BYTES_PER_LONG 0x10 - OUTPUT_ARCH(riscv) ENTRY(_start) @@ -76,6 +74,8 @@ SECTIONS *(.sbss*) } + BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0) + EXCEPTION_TABLE(0x10) NOTES @@ -83,10 +83,6 @@ SECTIONS *(.rel.dyn*) } - BSS_SECTION(MAX_BYTES_PER_LONG, - MAX_BYTES_PER_LONG, - MAX_BYTES_PER_LONG) - _end = .; STABS_DEBUG diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 1d9bfaff60bc..658ebf645f42 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -28,7 +28,8 @@ static void __init zone_sizes_init(void) unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, }; #ifdef CONFIG_ZONE_DMA32 - max_zone_pfns[ZONE_DMA32] = PFN_DOWN(min(4UL * SZ_1G, max_low_pfn)); + max_zone_pfns[ZONE_DMA32] = PFN_DOWN(min(4UL * SZ_1G, + (unsigned long) PFN_PHYS(max_low_pfn))); #endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S index 537f97fde37f..b6796e616812 100644 --- a/arch/s390/kernel/swsusp.S +++ b/arch/s390/kernel/swsusp.S @@ -30,10 +30,10 @@ .section .text ENTRY(swsusp_arch_suspend) lg %r1,__LC_NODAT_STACK - aghi %r1,-STACK_FRAME_OVERHEAD stmg %r6,%r15,__SF_GPRS(%r1) + aghi %r1,-STACK_FRAME_OVERHEAD stg %r15,__SF_BACKCHAIN(%r1) - lgr %r1,%r15 + lgr %r15,%r1 /* Store FPU registers */ brasl %r14,save_fpu_regs diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index a966d7bfac57..4266a4de3160 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -382,7 +382,9 @@ static void zpci_irq_handler(struct airq_struct *airq) if (ai == -1UL) break; inc_irq_stat(IRQIO_MSI); + airq_iv_lock(aibv, ai); generic_handle_irq(airq_iv_get_data(aibv, ai)); + airq_iv_unlock(aibv, ai); } } } @@ -408,7 +410,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) zdev->aisb = aisb; /* Create adapter interrupt vector */ - zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA); + zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK); if (!zdev->aibv) return -ENOMEM; diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index 1372553dc0a9..1d1544b6ca74 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -28,6 +28,7 @@ generic-y += preempt.h generic-y += sections.h generic-y += segment.h generic-y += serial.h +generic-y += shmparam.h generic-y += sizes.h generic-y += syscalls.h generic-y += topology.h diff --git a/arch/unicore32/include/uapi/asm/Kbuild b/arch/unicore32/include/uapi/asm/Kbuild index 6c6f6301012e..0febf1a07c30 100644 --- a/arch/unicore32/include/uapi/asm/Kbuild +++ b/arch/unicore32/include/uapi/asm/Kbuild @@ -1,5 +1,4 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += kvm_para.h -generic-y += shmparam.h generic-y += ucontext.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 26387c7bf305..68261430fe6e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -446,12 +446,12 @@ config RETPOLINE branches. Requires a compiler with -mindirect-branch=thunk-extern support for full protection. The kernel may run slower. -config X86_RESCTRL - bool "Resource Control support" +config X86_CPU_RESCTRL + bool "x86 CPU resource control support" depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD) select KERNFS help - Enable Resource Control support. + Enable x86 CPU resource control support. Provide support for the allocation and monitoring of system resources usage by the CPU. diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 64037895b085..f62e347862cc 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -600,6 +600,16 @@ ENTRY(trampoline_32bit_src) leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax movl %eax, %cr3 3: + /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */ + pushl %ecx + pushl %edx + movl $MSR_EFER, %ecx + rdmsr + btsl $_EFER_LME, %eax + wrmsr + popl %edx + popl %ecx + /* Enable PAE and LA57 (if required) paging modes */ movl $X86_CR4_PAE, %eax cmpl $0, %edx diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h index 91f75638f6e6..6ff7e81b5628 100644 --- a/arch/x86/boot/compressed/pgtable.h +++ b/arch/x86/boot/compressed/pgtable.h @@ -6,7 +6,7 @@ #define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0 #define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE -#define TRAMPOLINE_32BIT_CODE_SIZE 0x60 +#define TRAMPOLINE_32BIT_CODE_SIZE 0x70 #define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 40e12cfc87f6..daafb893449b 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3559,6 +3559,14 @@ static void free_excl_cntrs(int cpu) static void intel_pmu_cpu_dying(int cpu) { + fini_debug_store_on_cpu(cpu); + + if (x86_pmu.counter_freezing) + disable_counter_freeze(); +} + +static void intel_pmu_cpu_dead(int cpu) +{ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); struct intel_shared_regs *pc; @@ -3570,11 +3578,6 @@ static void intel_pmu_cpu_dying(int cpu) } free_excl_cntrs(cpu); - - fini_debug_store_on_cpu(cpu); - - if (x86_pmu.counter_freezing) - disable_counter_freeze(); } static void intel_pmu_sched_task(struct perf_event_context *ctx, @@ -3663,6 +3666,7 @@ static __initconst const struct x86_pmu core_pmu = { .cpu_prepare = intel_pmu_cpu_prepare, .cpu_starting = intel_pmu_cpu_starting, .cpu_dying = intel_pmu_cpu_dying, + .cpu_dead = intel_pmu_cpu_dead, }; static struct attribute *intel_pmu_attrs[]; @@ -3703,6 +3707,8 @@ static __initconst const struct x86_pmu intel_pmu = { .cpu_prepare = intel_pmu_cpu_prepare, .cpu_starting = intel_pmu_cpu_starting, .cpu_dying = intel_pmu_cpu_dying, + .cpu_dead = intel_pmu_cpu_dead, + .guest_get_msrs = intel_guest_get_msrs, .sched_task = intel_pmu_sched_task, }; diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index c07bee31abe8..b10e04387f38 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1222,6 +1222,8 @@ static struct pci_driver snbep_uncore_pci_driver = { .id_table = snbep_uncore_pci_ids, }; +#define NODE_ID_MASK 0x7 + /* * build pci bus to socket mapping */ @@ -1243,7 +1245,7 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool err = pci_read_config_dword(ubox_dev, nodeid_loc, &config); if (err) break; - nodeid = config; + nodeid = config & NODE_ID_MASK; /* get the Node ID mapping */ err = pci_read_config_dword(ubox_dev, idmap_loc, &config); if (err) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 0dd6b0f4000e..d9a9993af882 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -6,7 +6,7 @@ * "Big Core" Processors (Branded as Core, Xeon, etc...) * * The "_X" parts are generally the EP and EX Xeons, or the - * "Extreme" ones, like Broadwell-E. + * "Extreme" ones, like Broadwell-E, or Atom microserver. * * While adding a new CPUID for a new microarchitecture, add a new * group to keep logically sorted out in chronological order. Within @@ -71,6 +71,7 @@ #define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */ #define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */ #define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */ +#define INTEL_FAM6_ATOM_TREMONT_X 0x86 /* Jacobsville */ /* Xeon Phi */ diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 8f657286d599..0ce558a8150d 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -7,7 +7,11 @@ #endif #ifdef CONFIG_KASAN +#ifdef CONFIG_KASAN_EXTRA +#define KASAN_STACK_ORDER 2 +#else #define KASAN_STACK_ORDER 1 +#endif #else #define KASAN_STACK_ORDER 0 #endif diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 40616e805292..2779ace16d23 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1065,7 +1065,7 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { - native_set_pmd(pmdp, pmd); + set_pmd(pmdp, pmd); } static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, diff --git a/arch/x86/include/asm/resctrl_sched.h b/arch/x86/include/asm/resctrl_sched.h index 40ebddde6ac2..f6b7fe2833cc 100644 --- a/arch/x86/include/asm/resctrl_sched.h +++ b/arch/x86/include/asm/resctrl_sched.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_RESCTRL_SCHED_H #define _ASM_X86_RESCTRL_SCHED_H -#ifdef CONFIG_X86_RESCTRL +#ifdef CONFIG_X86_CPU_RESCTRL #include <linux/sched.h> #include <linux/jump_label.h> @@ -88,6 +88,6 @@ static inline void resctrl_sched_in(void) static inline void resctrl_sched_in(void) {} -#endif /* CONFIG_X86_RESCTRL */ +#endif /* CONFIG_X86_CPU_RESCTRL */ #endif /* _ASM_X86_RESCTRL_SCHED_H */ diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index b6fa0869f7aa..cfd24f9f7614 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -39,7 +39,7 @@ obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o obj-$(CONFIG_X86_MCE) += mce/ obj-$(CONFIG_MTRR) += mtrr/ obj-$(CONFIG_MICROCODE) += microcode/ -obj-$(CONFIG_X86_RESCTRL) += resctrl/ +obj-$(CONFIG_X86_CPU_RESCTRL) += resctrl/ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 1de0f4170178..01874d54f4fd 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -71,7 +71,7 @@ void __init check_bugs(void) * identify_boot_cpu() initialized SMT support information, let the * core code know. */ - cpu_smt_check_topology_early(); + cpu_smt_check_topology(); if (!IS_ENABLED(CONFIG_SMP)) { pr_info("CPU: "); diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 672c7225cb1b..6ce290c506d9 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -784,6 +784,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, quirk_no_way_out(i, m, regs); if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { + m->bank = i; mce_read_aux(m, i); *msg = tmp; return 1; diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 51adde0a0f1a..e1f3ba19ba54 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -855,7 +855,7 @@ load_microcode_amd(bool save, u8 family, const u8 *data, size_t size) if (!p) { return ret; } else { - if (boot_cpu_data.microcode == p->patch_id) + if (boot_cpu_data.microcode >= p->patch_id) return ret; ret = UCODE_NEW; diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile index 1cabe6fd8e11..4a06c37b9cf1 100644 --- a/arch/x86/kernel/cpu/resctrl/Makefile +++ b/arch/x86/kernel/cpu/resctrl/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_X86_RESCTRL) += core.o rdtgroup.o monitor.o -obj-$(CONFIG_X86_RESCTRL) += ctrlmondata.o pseudo_lock.o +obj-$(CONFIG_X86_CPU_RESCTRL) += core.o rdtgroup.o monitor.o +obj-$(CONFIG_X86_CPU_RESCTRL) += ctrlmondata.o pseudo_lock.o CFLAGS_pseudo_lock.o = -I$(src) diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 0d5efa34f359..53917a3ebf94 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -167,6 +167,9 @@ setup_efi_state(struct boot_params *params, unsigned long params_load_addr, struct efi_info *current_ei = &boot_params.efi_info; struct efi_info *ei = ¶ms->efi_info; + if (!efi_enabled(EFI_RUNTIME_SERVICES)) + return 0; + if (!current_ei->efi_memmap_size) return 0; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 8ff20523661b..d8ea4ebd79e7 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -211,6 +211,7 @@ static void free_nested(struct kvm_vcpu *vcpu) if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon) return; + hrtimer_cancel(&vmx->nested.preemption_timer); vmx->nested.vmxon = false; vmx->nested.smm.vmxon = false; free_vpid(vmx->nested.vpid02); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 4341175339f3..95d618045001 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -26,6 +26,7 @@ #include <linux/mod_devicetable.h> #include <linux/mm.h> #include <linux/sched.h> +#include <linux/sched/smt.h> #include <linux/slab.h> #include <linux/tboot.h> #include <linux/trace_events.h> @@ -6823,7 +6824,7 @@ static int vmx_vm_init(struct kvm *kvm) * Warn upon starting the first VM in a potentially * insecure environment. */ - if (cpu_smt_control == CPU_SMT_ENABLED) + if (sched_smt_active()) pr_warn_once(L1TF_MSG_SMT); if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER) pr_warn_once(L1TF_MSG_L1D); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3d27206f6c01..e67ecf25e690 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5116,6 +5116,13 @@ int kvm_read_guest_virt(struct kvm_vcpu *vcpu, { u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; + /* + * FIXME: this should call handle_emulation_failure if X86EMUL_IO_NEEDED + * is returned, but our callers are not ready for that and they blindly + * call kvm_inject_page_fault. Ensure that they at least do not leak + * uninitialized kernel stack memory into cr2 and error code. + */ + memset(exception, 0, sizeof(*exception)); return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception); } diff --git a/arch/x86/lib/iomem.c b/arch/x86/lib/iomem.c index 66894675f3c8..df50451d94ef 100644 --- a/arch/x86/lib/iomem.c +++ b/arch/x86/lib/iomem.c @@ -2,8 +2,11 @@ #include <linux/module.h> #include <linux/io.h> +#define movs(type,to,from) \ + asm volatile("movs" type:"=&D" (to), "=&S" (from):"0" (to), "1" (from):"memory") + /* Originally from i386/string.h */ -static __always_inline void __iomem_memcpy(void *to, const void *from, size_t n) +static __always_inline void rep_movs(void *to, const void *from, size_t n) { unsigned long d0, d1, d2; asm volatile("rep ; movsl\n\t" @@ -21,13 +24,37 @@ static __always_inline void __iomem_memcpy(void *to, const void *from, size_t n) void memcpy_fromio(void *to, const volatile void __iomem *from, size_t n) { - __iomem_memcpy(to, (const void *)from, n); + if (unlikely(!n)) + return; + + /* Align any unaligned source IO */ + if (unlikely(1 & (unsigned long)from)) { + movs("b", to, from); + n--; + } + if (n > 1 && unlikely(2 & (unsigned long)from)) { + movs("w", to, from); + n-=2; + } + rep_movs(to, (const void *)from, n); } EXPORT_SYMBOL(memcpy_fromio); void memcpy_toio(volatile void __iomem *to, const void *from, size_t n) { - __iomem_memcpy((void *)to, (const void *) from, n); + if (unlikely(!n)) + return; + + /* Align any unaligned destination IO */ + if (unlikely(1 & (unsigned long)to)) { + movs("b", to, from); + n--; + } + if (n > 1 && unlikely(2 & (unsigned long)to)) { + movs("w", to, from); + n-=2; + } + rep_movs((void *)to, (const void *) from, n); } EXPORT_SYMBOL(memcpy_toio); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 2ff25ad33233..9d5c75f02295 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -595,7 +595,7 @@ static void show_ldttss(const struct desc_ptr *gdt, const char *name, u16 index) return; } - addr = desc.base0 | (desc.base1 << 16) | (desc.base2 << 24); + addr = desc.base0 | (desc.base1 << 16) | ((unsigned long)desc.base2 << 24); #ifdef CONFIG_X86_64 addr |= ((u64)desc.base3 << 32); #endif diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 4f8972311a77..14e6119838a6 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -230,6 +230,29 @@ static bool __cpa_pfn_in_highmap(unsigned long pfn) #endif +/* + * See set_mce_nospec(). + * + * Machine check recovery code needs to change cache mode of poisoned pages to + * UC to avoid speculative access logging another error. But passing the + * address of the 1:1 mapping to set_memory_uc() is a fine way to encourage a + * speculative access. So we cheat and flip the top bit of the address. This + * works fine for the code that updates the page tables. But at the end of the + * process we need to flush the TLB and cache and the non-canonical address + * causes a #GP fault when used by the INVLPG and CLFLUSH instructions. + * + * But in the common case we already have a canonical address. This code + * will fix the top bit if needed and is a no-op otherwise. + */ +static inline unsigned long fix_addr(unsigned long addr) +{ +#ifdef CONFIG_X86_64 + return (long)(addr << 1) >> 1; +#else + return addr; +#endif +} + static unsigned long __cpa_addr(struct cpa_data *cpa, unsigned long idx) { if (cpa->flags & CPA_PAGES_ARRAY) { @@ -313,7 +336,7 @@ void __cpa_flush_tlb(void *data) unsigned int i; for (i = 0; i < cpa->numpages; i++) - __flush_tlb_one_kernel(__cpa_addr(cpa, i)); + __flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i))); } static void cpa_flush(struct cpa_data *data, int cache) @@ -347,7 +370,7 @@ static void cpa_flush(struct cpa_data *data, int cache) * Only flush present addresses: */ if (pte && (pte_val(*pte) & _PAGE_PRESENT)) - clflush_cache_range_opt((void *)addr, PAGE_SIZE); + clflush_cache_range_opt((void *)fix_addr(addr), PAGE_SIZE); } mb(); } @@ -1627,29 +1650,6 @@ out: return ret; } -/* - * Machine check recovery code needs to change cache mode of poisoned - * pages to UC to avoid speculative access logging another error. But - * passing the address of the 1:1 mapping to set_memory_uc() is a fine - * way to encourage a speculative access. So we cheat and flip the top - * bit of the address. This works fine for the code that updates the - * page tables. But at the end of the process we need to flush the cache - * and the non-canonical address causes a #GP fault when used by the - * CLFLUSH instruction. - * - * But in the common case we already have a canonical address. This code - * will fix the top bit if needed and is a no-op otherwise. - */ -static inline unsigned long make_addr_canonical_again(unsigned long addr) -{ -#ifdef CONFIG_X86_64 - return (long)(addr << 1) >> 1; -#else - return addr; -#endif -} - - static int change_page_attr_set_clr(unsigned long *addr, int numpages, pgprot_t mask_set, pgprot_t mask_clr, int force_split, int in_flag, diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 20a0756f27ef..ce91682770cb 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -164,7 +164,7 @@ config XTENSA_FAKE_NMI If unsure, say N. config XTENSA_UNALIGNED_USER - bool "Unaligned memory access in use space" + bool "Unaligned memory access in user space" help The Xtensa architecture currently does not handle unaligned memory accesses in hardware but through an exception handler. @@ -451,7 +451,7 @@ config USE_OF help Include support for flattened device tree machine descriptions. -config BUILTIN_DTB +config BUILTIN_DTB_SOURCE string "DTB to build into the kernel image" depends on OF diff --git a/arch/xtensa/boot/dts/Makefile b/arch/xtensa/boot/dts/Makefile index f8052ba5aea8..0b8d00cdae7c 100644 --- a/arch/xtensa/boot/dts/Makefile +++ b/arch/xtensa/boot/dts/Makefile @@ -7,9 +7,9 @@ # # -BUILTIN_DTB := $(patsubst "%",%,$(CONFIG_BUILTIN_DTB)).dtb.o -ifneq ($(CONFIG_BUILTIN_DTB),"") -obj-$(CONFIG_OF) += $(BUILTIN_DTB) +BUILTIN_DTB_SOURCE := $(patsubst "%",%,$(CONFIG_BUILTIN_DTB_SOURCE)).dtb.o +ifneq ($(CONFIG_BUILTIN_DTB_SOURCE),"") +obj-$(CONFIG_OF) += $(BUILTIN_DTB_SOURCE) endif # for CONFIG_OF_ALL_DTBS test diff --git a/arch/xtensa/configs/audio_kc705_defconfig b/arch/xtensa/configs/audio_kc705_defconfig index 2bf964df37ba..f378e56f9ce6 100644 --- a/arch/xtensa/configs/audio_kc705_defconfig +++ b/arch/xtensa/configs/audio_kc705_defconfig @@ -34,7 +34,7 @@ CONFIG_XTENSA_PLATFORM_XTFPGA=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="earlycon=uart8250,mmio32native,0xfd050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug memmap=0x38000000@0" CONFIG_USE_OF=y -CONFIG_BUILTIN_DTB="kc705" +CONFIG_BUILTIN_DTB_SOURCE="kc705" # CONFIG_COMPACTION is not set # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_PM=y diff --git a/arch/xtensa/configs/cadence_csp_defconfig b/arch/xtensa/configs/cadence_csp_defconfig index 3221b7053fa3..62f32a902568 100644 --- a/arch/xtensa/configs/cadence_csp_defconfig +++ b/arch/xtensa/configs/cadence_csp_defconfig @@ -38,7 +38,7 @@ CONFIG_HIGHMEM=y # CONFIG_PCI is not set CONFIG_XTENSA_PLATFORM_XTFPGA=y CONFIG_USE_OF=y -CONFIG_BUILTIN_DTB="csp" +CONFIG_BUILTIN_DTB_SOURCE="csp" # CONFIG_COMPACTION is not set CONFIG_XTFPGA_LCD=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set diff --git a/arch/xtensa/configs/generic_kc705_defconfig b/arch/xtensa/configs/generic_kc705_defconfig index 985fa8546e4e..8bebe07f1060 100644 --- a/arch/xtensa/configs/generic_kc705_defconfig +++ b/arch/xtensa/configs/generic_kc705_defconfig @@ -33,7 +33,7 @@ CONFIG_XTENSA_PLATFORM_XTFPGA=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="earlycon=uart8250,mmio32native,0xfd050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug memmap=0x38000000@0" CONFIG_USE_OF=y -CONFIG_BUILTIN_DTB="kc705" +CONFIG_BUILTIN_DTB_SOURCE="kc705" # CONFIG_COMPACTION is not set # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_NET=y diff --git a/arch/xtensa/configs/nommu_kc705_defconfig b/arch/xtensa/configs/nommu_kc705_defconfig index f3fc4f970ca8..933ab2adf434 100644 --- a/arch/xtensa/configs/nommu_kc705_defconfig +++ b/arch/xtensa/configs/nommu_kc705_defconfig @@ -39,7 +39,7 @@ CONFIG_XTENSA_PLATFORM_XTFPGA=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="earlycon=uart8250,mmio32native,0x9d050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug memmap=256M@0x60000000" CONFIG_USE_OF=y -CONFIG_BUILTIN_DTB="kc705_nommu" +CONFIG_BUILTIN_DTB_SOURCE="kc705_nommu" CONFIG_BINFMT_FLAT=y CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/xtensa/configs/smp_lx200_defconfig b/arch/xtensa/configs/smp_lx200_defconfig index 11fed6c06a7c..e29c5b179a5b 100644 --- a/arch/xtensa/configs/smp_lx200_defconfig +++ b/arch/xtensa/configs/smp_lx200_defconfig @@ -33,11 +33,12 @@ CONFIG_SMP=y CONFIG_HOTPLUG_CPU=y # CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX is not set # CONFIG_PCI is not set +CONFIG_VECTORS_OFFSET=0x00002000 CONFIG_XTENSA_PLATFORM_XTFPGA=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="earlycon=uart8250,mmio32native,0xfd050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug memmap=96M@0" CONFIG_USE_OF=y -CONFIG_BUILTIN_DTB="lx200mx" +CONFIG_BUILTIN_DTB_SOURCE="lx200mx" # CONFIG_COMPACTION is not set # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_NET=y diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S index da08e75100ab..7f009719304e 100644 --- a/arch/xtensa/kernel/head.S +++ b/arch/xtensa/kernel/head.S @@ -276,12 +276,13 @@ should_never_return: movi a2, cpu_start_ccount 1: + memw l32i a3, a2, 0 beqi a3, 0, 1b movi a3, 0 s32i a3, a2, 0 - memw 1: + memw l32i a3, a2, 0 beqi a3, 0, 1b wsr a3, ccount @@ -317,11 +318,13 @@ ENTRY(cpu_restart) rsr a0, prid neg a2, a0 movi a3, cpu_start_id + memw s32i a2, a3, 0 #if XCHAL_DCACHE_IS_WRITEBACK dhwbi a3, 0 #endif 1: + memw l32i a2, a3, 0 dhi a3, 0 bne a2, a0, 1b diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index 932d64689bac..be1f280c322c 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -83,7 +83,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) { unsigned i; - for (i = 0; i < max_cpus; ++i) + for_each_possible_cpu(i) set_cpu_present(i, true); } @@ -96,6 +96,11 @@ void __init smp_init_cpus(void) pr_info("%s: Core Count = %d\n", __func__, ncpus); pr_info("%s: Core Id = %d\n", __func__, core_id); + if (ncpus > NR_CPUS) { + ncpus = NR_CPUS; + pr_info("%s: limiting core count by %d\n", __func__, ncpus); + } + for (i = 0; i < ncpus; ++i) set_cpu_possible(i, true); } @@ -195,9 +200,11 @@ static int boot_secondary(unsigned int cpu, struct task_struct *ts) int i; #ifdef CONFIG_HOTPLUG_CPU - cpu_start_id = cpu; - system_flush_invalidate_dcache_range( - (unsigned long)&cpu_start_id, sizeof(cpu_start_id)); + WRITE_ONCE(cpu_start_id, cpu); + /* Pairs with the third memw in the cpu_restart */ + mb(); + system_flush_invalidate_dcache_range((unsigned long)&cpu_start_id, + sizeof(cpu_start_id)); #endif smp_call_function_single(0, mx_cpu_start, (void *)cpu, 1); @@ -206,18 +213,21 @@ static int boot_secondary(unsigned int cpu, struct task_struct *ts) ccount = get_ccount(); while (!ccount); - cpu_start_ccount = ccount; + WRITE_ONCE(cpu_start_ccount, ccount); - while (time_before(jiffies, timeout)) { + do { + /* + * Pairs with the first two memws in the + * .Lboot_secondary. + */ mb(); - if (!cpu_start_ccount) - break; - } + ccount = READ_ONCE(cpu_start_ccount); + } while (ccount && time_before(jiffies, timeout)); - if (cpu_start_ccount) { + if (ccount) { smp_call_function_single(0, mx_cpu_stop, - (void *)cpu, 1); - cpu_start_ccount = 0; + (void *)cpu, 1); + WRITE_ONCE(cpu_start_ccount, 0); return -EIO; } } @@ -237,6 +247,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) pr_debug("%s: Calling wakeup_secondary(cpu:%d, idle:%p, sp: %08lx)\n", __func__, cpu, idle, start_info.stack); + init_completion(&cpu_running); ret = boot_secondary(cpu, idle); if (ret == 0) { wait_for_completion_timeout(&cpu_running, @@ -298,8 +309,10 @@ void __cpu_die(unsigned int cpu) unsigned long timeout = jiffies + msecs_to_jiffies(1000); while (time_before(jiffies, timeout)) { system_invalidate_dcache_range((unsigned long)&cpu_start_id, - sizeof(cpu_start_id)); - if (cpu_start_id == -cpu) { + sizeof(cpu_start_id)); + /* Pairs with the second memw in the cpu_restart */ + mb(); + if (READ_ONCE(cpu_start_id) == -cpu) { platform_cpu_kill(cpu); return; } diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index fd524a54d2ab..378186b5eb40 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -89,7 +89,7 @@ static int ccount_timer_shutdown(struct clock_event_device *evt) container_of(evt, struct ccount_timer, evt); if (timer->irq_enabled) { - disable_irq(evt->irq); + disable_irq_nosync(evt->irq); timer->irq_enabled = 0; } return 0; diff --git a/block/blk-core.c b/block/blk-core.c index 3c5f61ceeb67..6b78ec56a4f2 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -462,6 +462,10 @@ static void blk_rq_timed_out_timer(struct timer_list *t) kblockd_schedule_work(&q->timeout_work); } +static void blk_timeout_work(struct work_struct *work) +{ +} + /** * blk_alloc_queue_node - allocate a request queue * @gfp_mask: memory allocation flags @@ -505,7 +509,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) timer_setup(&q->backing_dev_info->laptop_mode_wb_timer, laptop_mode_timer_fn, 0); timer_setup(&q->timeout, blk_rq_timed_out_timer, 0); - INIT_WORK(&q->timeout_work, NULL); + INIT_WORK(&q->timeout_work, blk_timeout_work); INIT_LIST_HEAD(&q->icq_list); #ifdef CONFIG_BLK_CGROUP INIT_LIST_HEAD(&q->blkg_list); diff --git a/block/blk-flush.c b/block/blk-flush.c index a3fc7191c694..6e0f2d97fc6d 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -335,7 +335,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error) blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error); spin_unlock_irqrestore(&fq->mq_flush_lock, flags); - blk_mq_run_hw_queue(hctx, true); + blk_mq_sched_restart(hctx); } /** diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index fc714ef402a6..2620baa1f699 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -72,6 +72,7 @@ #include <linux/sched/loadavg.h> #include <linux/sched/signal.h> #include <trace/events/block.h> +#include <linux/blk-mq.h> #include "blk-rq-qos.h" #include "blk-stat.h" @@ -591,6 +592,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) u64 now = ktime_to_ns(ktime_get()); bool issue_as_root = bio_issue_as_root_blkg(bio); bool enabled = false; + int inflight = 0; blkg = bio->bi_blkg; if (!blkg || !bio_flagged(bio, BIO_TRACKED)) @@ -601,6 +603,9 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) return; enabled = blk_iolatency_enabled(iolat->blkiolat); + if (!enabled) + return; + while (blkg && blkg->parent) { iolat = blkg_to_lat(blkg); if (!iolat) { @@ -609,8 +614,9 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) } rqw = &iolat->rq_wait; - atomic_dec(&rqw->inflight); - if (!enabled || iolat->min_lat_nsec == 0) + inflight = atomic_dec_return(&rqw->inflight); + WARN_ON_ONCE(inflight < 0); + if (iolat->min_lat_nsec == 0) goto next; iolatency_record_time(iolat, &bio->bi_issue, now, issue_as_root); @@ -754,10 +760,13 @@ int blk_iolatency_init(struct request_queue *q) return 0; } -static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) +/* + * return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise + * return 0. + */ +static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) { struct iolatency_grp *iolat = blkg_to_lat(blkg); - struct blk_iolatency *blkiolat = iolat->blkiolat; u64 oldval = iolat->min_lat_nsec; iolat->min_lat_nsec = val; @@ -766,9 +775,10 @@ static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) BLKIOLATENCY_MAX_WIN_SIZE); if (!oldval && val) - atomic_inc(&blkiolat->enabled); + return 1; if (oldval && !val) - atomic_dec(&blkiolat->enabled); + return -1; + return 0; } static void iolatency_clear_scaling(struct blkcg_gq *blkg) @@ -800,6 +810,7 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, u64 lat_val = 0; u64 oldval; int ret; + int enable = 0; ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx); if (ret) @@ -834,7 +845,12 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, blkg = ctx.blkg; oldval = iolat->min_lat_nsec; - iolatency_set_min_lat_nsec(blkg, lat_val); + enable = iolatency_set_min_lat_nsec(blkg, lat_val); + if (enable) { + WARN_ON_ONCE(!blk_get_queue(blkg->q)); + blkg_get(blkg); + } + if (oldval != iolat->min_lat_nsec) { iolatency_clear_scaling(blkg); } @@ -842,6 +858,24 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, ret = 0; out: blkg_conf_finish(&ctx); + if (ret == 0 && enable) { + struct iolatency_grp *tmp = blkg_to_lat(blkg); + struct blk_iolatency *blkiolat = tmp->blkiolat; + + blk_mq_freeze_queue(blkg->q); + + if (enable == 1) + atomic_inc(&blkiolat->enabled); + else if (enable == -1) + atomic_dec(&blkiolat->enabled); + else + WARN_ON_ONCE(1); + + blk_mq_unfreeze_queue(blkg->q); + + blkg_put(blkg); + blk_put_queue(blkg->q); + } return ret ?: nbytes; } @@ -977,8 +1011,14 @@ static void iolatency_pd_offline(struct blkg_policy_data *pd) { struct iolatency_grp *iolat = pd_to_lat(pd); struct blkcg_gq *blkg = lat_to_blkg(iolat); + struct blk_iolatency *blkiolat = iolat->blkiolat; + int ret; - iolatency_set_min_lat_nsec(blkg, 0); + ret = iolatency_set_min_lat_nsec(blkg, 0); + if (ret == 1) + atomic_inc(&blkiolat->enabled); + if (ret == -1) + atomic_dec(&blkiolat->enabled); iolatency_clear_scaling(blkg); } diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index f8120832ca7b..7921573aebbc 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -839,6 +839,9 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = { static bool debugfs_create_files(struct dentry *parent, void *data, const struct blk_mq_debugfs_attr *attr) { + if (IS_ERR_OR_NULL(parent)) + return false; + d_inode(parent)->i_private = data; for (; attr->name; attr++) { diff --git a/block/blk-mq.c b/block/blk-mq.c index 8f5b533764ca..9437a5eb07cf 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -737,12 +737,20 @@ static void blk_mq_requeue_work(struct work_struct *work) spin_unlock_irq(&q->requeue_lock); list_for_each_entry_safe(rq, next, &rq_list, queuelist) { - if (!(rq->rq_flags & RQF_SOFTBARRIER)) + if (!(rq->rq_flags & (RQF_SOFTBARRIER | RQF_DONTPREP))) continue; rq->rq_flags &= ~RQF_SOFTBARRIER; list_del_init(&rq->queuelist); - blk_mq_sched_insert_request(rq, true, false, false); + /* + * If RQF_DONTPREP, rq has contained some driver specific + * data, so insert it to hctx dispatch list to avoid any + * merge. + */ + if (rq->rq_flags & RQF_DONTPREP) + blk_mq_request_bypass_insert(rq, false); + else + blk_mq_sched_insert_request(rq, true, false, false); } while (!list_empty(&rq_list)) { diff --git a/block/blk-mq.h b/block/blk-mq.h index d943d46b0785..d0b3dd54ef8d 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -36,7 +36,6 @@ struct blk_mq_ctx { struct kobject kobj; } ____cacheline_aligned_in_smp; -void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_free_queue(struct request_queue *q); int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); void blk_mq_wake_waiters(struct request_queue *q); diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 5c093ce01bcd..147f6c7ea59c 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -1029,6 +1029,9 @@ void __init acpi_early_init(void) acpi_permanent_mmap = true; + /* Initialize debug output. Linux does not use ACPICA defaults */ + acpi_dbg_level = ACPI_LV_INFO | ACPI_LV_REPAIR; + #ifdef CONFIG_X86 /* * If the machine falls into the DMI check table, diff --git a/drivers/android/binder.c b/drivers/android/binder.c index cdfc87629efb..4d2b2ad1ee0e 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -5854,9 +5854,10 @@ static int __init init_binder_device(const char *name) static int __init binder_init(void) { int ret; - char *device_name, *device_names, *device_tmp; + char *device_name, *device_tmp; struct binder_device *device; struct hlist_node *tmp; + char *device_names = NULL; ret = binder_alloc_shrinker_init(); if (ret) @@ -5898,23 +5899,29 @@ static int __init binder_init(void) &transaction_log_fops); } - /* - * Copy the module_parameter string, because we don't want to - * tokenize it in-place. - */ - device_names = kstrdup(binder_devices_param, GFP_KERNEL); - if (!device_names) { - ret = -ENOMEM; - goto err_alloc_device_names_failed; - } + if (strcmp(binder_devices_param, "") != 0) { + /* + * Copy the module_parameter string, because we don't want to + * tokenize it in-place. + */ + device_names = kstrdup(binder_devices_param, GFP_KERNEL); + if (!device_names) { + ret = -ENOMEM; + goto err_alloc_device_names_failed; + } - device_tmp = device_names; - while ((device_name = strsep(&device_tmp, ","))) { - ret = init_binder_device(device_name); - if (ret) - goto err_init_binder_device_failed; + device_tmp = device_names; + while ((device_name = strsep(&device_tmp, ","))) { + ret = init_binder_device(device_name); + if (ret) + goto err_init_binder_device_failed; + } } + ret = init_binderfs(); + if (ret) + goto err_init_binder_device_failed; + return ret; err_init_binder_device_failed: diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index 7fb97f503ef2..045b3e42d98b 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -46,4 +46,13 @@ static inline bool is_binderfs_device(const struct inode *inode) } #endif +#ifdef CONFIG_ANDROID_BINDERFS +extern int __init init_binderfs(void); +#else +static inline int __init init_binderfs(void) +{ + return 0; +} +#endif + #endif /* _LINUX_BINDER_INTERNAL_H */ diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 6a2185eb66c5..e773f45d19d9 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -395,6 +395,11 @@ static int binderfs_binder_ctl_create(struct super_block *sb) struct inode *inode = NULL; struct dentry *root = sb->s_root; struct binderfs_info *info = sb->s_fs_info; +#if defined(CONFIG_IPC_NS) + bool use_reserve = (info->ipc_ns == &init_ipc_ns); +#else + bool use_reserve = true; +#endif device = kzalloc(sizeof(*device), GFP_KERNEL); if (!device) @@ -413,7 +418,10 @@ static int binderfs_binder_ctl_create(struct super_block *sb) /* Reserve a new minor number for the new device. */ mutex_lock(&binderfs_minors_mutex); - minor = ida_alloc_max(&binderfs_minors, BINDERFS_MAX_MINOR, GFP_KERNEL); + minor = ida_alloc_max(&binderfs_minors, + use_reserve ? BINDERFS_MAX_MINOR : + BINDERFS_MAX_MINOR_CAPPED, + GFP_KERNEL); mutex_unlock(&binderfs_minors_mutex); if (minor < 0) { ret = minor; @@ -542,7 +550,7 @@ static struct file_system_type binder_fs_type = { .fs_flags = FS_USERNS_MOUNT, }; -static int __init init_binderfs(void) +int __init init_binderfs(void) { int ret; @@ -560,5 +568,3 @@ static int __init init_binderfs(void) return ret; } - -device_initcall(init_binderfs); diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index b8c3f9e6af89..adf28788cab5 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4554,6 +4554,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM, }, { "SAMSUNG SSD PM830 mSATA *", "CXM13D1Q", ATA_HORKAGE_NOLPM, }, { "SAMSUNG MZ7TD256HAFV-000L9", NULL, ATA_HORKAGE_NOLPM, }, + { "SAMSUNG MZ7TE512HMHP-000L1", "EXT06L0Q", ATA_HORKAGE_NOLPM, }, /* devices that don't properly handle queued TRIM commands */ { "Micron_M500IT_*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index cf78fa6d470d..a7359535caf5 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -79,8 +79,7 @@ static void cache_size(struct cacheinfo *this_leaf, struct device_node *np) ct_idx = get_cacheinfo_idx(this_leaf->type); propname = cache_type_info[ct_idx].size_prop; - if (of_property_read_u32(np, propname, &this_leaf->size)) - this_leaf->size = 0; + of_property_read_u32(np, propname, &this_leaf->size); } /* not cache_line_size() because that's a macro in include/linux/cache.h */ @@ -114,8 +113,7 @@ static void cache_nr_sets(struct cacheinfo *this_leaf, struct device_node *np) ct_idx = get_cacheinfo_idx(this_leaf->type); propname = cache_type_info[ct_idx].nr_sets_prop; - if (of_property_read_u32(np, propname, &this_leaf->number_of_sets)) - this_leaf->number_of_sets = 0; + of_property_read_u32(np, propname, &this_leaf->number_of_sets); } static void cache_associativity(struct cacheinfo *this_leaf) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 457be03b744d..0ea2139c50d8 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -130,7 +130,7 @@ u64 pm_runtime_autosuspend_expiration(struct device *dev) { int autosuspend_delay; u64 last_busy, expires = 0; - u64 now = ktime_to_ns(ktime_get()); + u64 now = ktime_get_mono_fast_ns(); if (!dev->power.use_autosuspend) goto out; @@ -909,7 +909,7 @@ static enum hrtimer_restart pm_suspend_timer_fn(struct hrtimer *timer) * If 'expires' is after the current time, we've been called * too early. */ - if (expires > 0 && expires < ktime_to_ns(ktime_get())) { + if (expires > 0 && expires < ktime_get_mono_fast_ns()) { dev->power.timer_expires = 0; rpm_suspend(dev, dev->power.timer_autosuspends ? (RPM_ASYNC | RPM_AUTO) : RPM_ASYNC); @@ -928,7 +928,7 @@ static enum hrtimer_restart pm_suspend_timer_fn(struct hrtimer *timer) int pm_schedule_suspend(struct device *dev, unsigned int delay) { unsigned long flags; - ktime_t expires; + u64 expires; int retval; spin_lock_irqsave(&dev->power.lock, flags); @@ -945,8 +945,8 @@ int pm_schedule_suspend(struct device *dev, unsigned int delay) /* Other scheduled or pending requests need to be canceled. */ pm_runtime_cancel_pending(dev); - expires = ktime_add(ktime_get(), ms_to_ktime(delay)); - dev->power.timer_expires = ktime_to_ns(expires); + expires = ktime_get_mono_fast_ns() + (u64)delay * NSEC_PER_MSEC; + dev->power.timer_expires = expires; dev->power.timer_autosuspends = 0; hrtimer_start(&dev->power.suspend_timer, expires, HRTIMER_MODE_ABS); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 6f2856c6d0f2..55481b40df9a 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4075,7 +4075,7 @@ static unsigned int floppy_check_events(struct gendisk *disk, if (time_after(jiffies, UDRS->last_checked + UDP->checkfreq)) { if (lock_fdc(drive)) - return -EINTR; + return 0; poll_drive(false, 0); process_fd_request(); } diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 6ccdbedb02f3..d2477a5058ac 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -1513,9 +1513,19 @@ static int clk_fetch_parent_index(struct clk_core *core, if (!parent) return -EINVAL; - for (i = 0; i < core->num_parents; i++) - if (clk_core_get_parent_by_index(core, i) == parent) + for (i = 0; i < core->num_parents; i++) { + if (core->parents[i] == parent) + return i; + + if (core->parents[i]) + continue; + + /* Fallback to comparing globally unique names */ + if (!strcmp(parent->name, core->parent_names[i])) { + core->parents[i] = parent; return i; + } + } return -EINVAL; } diff --git a/drivers/clk/imx/clk-frac-pll.c b/drivers/clk/imx/clk-frac-pll.c index 0026c3969b1e..76b9eb15604e 100644 --- a/drivers/clk/imx/clk-frac-pll.c +++ b/drivers/clk/imx/clk-frac-pll.c @@ -155,13 +155,14 @@ static int clk_pll_set_rate(struct clk_hw *hw, unsigned long rate, { struct clk_frac_pll *pll = to_clk_frac_pll(hw); u32 val, divfi, divff; - u64 temp64 = parent_rate; + u64 temp64; int ret; parent_rate *= 8; rate *= 2; divfi = rate / parent_rate; - temp64 *= rate - divfi; + temp64 = parent_rate * divfi; + temp64 = rate - temp64; temp64 *= PLL_FRAC_DENOM; do_div(temp64, parent_rate); divff = temp64; diff --git a/drivers/clk/mmp/clk-of-mmp2.c b/drivers/clk/mmp/clk-of-mmp2.c index 61fefc046ec5..d083b860f083 100644 --- a/drivers/clk/mmp/clk-of-mmp2.c +++ b/drivers/clk/mmp/clk-of-mmp2.c @@ -53,7 +53,6 @@ #define APMU_DISP1 0x110 #define APMU_CCIC0 0x50 #define APMU_CCIC1 0xf4 -#define APMU_SP 0x68 #define MPMU_UART_PLL 0x14 struct mmp2_clk_unit { @@ -210,8 +209,6 @@ static struct mmp_clk_mix_config ccic1_mix_config = { .reg_info = DEFINE_MIX_REG_INFO(4, 16, 2, 6, 32), }; -static DEFINE_SPINLOCK(sp_lock); - static struct mmp_param_mux_clk apmu_mux_clks[] = { {MMP2_CLK_DISP0_MUX, "disp0_mux", disp_parent_names, ARRAY_SIZE(disp_parent_names), CLK_SET_RATE_PARENT, APMU_DISP0, 6, 2, 0, &disp0_lock}, {MMP2_CLK_DISP1_MUX, "disp1_mux", disp_parent_names, ARRAY_SIZE(disp_parent_names), CLK_SET_RATE_PARENT, APMU_DISP1, 6, 2, 0, &disp1_lock}, @@ -242,7 +239,6 @@ static struct mmp_param_gate_clk apmu_gate_clks[] = { {MMP2_CLK_CCIC1, "ccic1_clk", "ccic1_mix_clk", CLK_SET_RATE_PARENT, APMU_CCIC1, 0x1b, 0x1b, 0x0, 0, &ccic1_lock}, {MMP2_CLK_CCIC1_PHY, "ccic1_phy_clk", "ccic1_mix_clk", CLK_SET_RATE_PARENT, APMU_CCIC1, 0x24, 0x24, 0x0, 0, &ccic1_lock}, {MMP2_CLK_CCIC1_SPHY, "ccic1_sphy_clk", "ccic1_sphy_div", CLK_SET_RATE_PARENT, APMU_CCIC1, 0x300, 0x300, 0x0, 0, &ccic1_lock}, - {MMP2_CLK_SP, "sp_clk", NULL, CLK_SET_RATE_PARENT, APMU_SP, 0x1b, 0x1b, 0x0, 0, &sp_lock}, }; static void mmp2_axi_periph_clk_init(struct mmp2_clk_unit *pxa_unit) diff --git a/drivers/clk/qcom/gcc-sdm845.c b/drivers/clk/qcom/gcc-sdm845.c index c782e62dd98b..58fa5c247af1 100644 --- a/drivers/clk/qcom/gcc-sdm845.c +++ b/drivers/clk/qcom/gcc-sdm845.c @@ -115,8 +115,8 @@ static const char * const gcc_parent_names_6[] = { "core_bi_pll_test_se", }; -static const char * const gcc_parent_names_7[] = { - "bi_tcxo", +static const char * const gcc_parent_names_7_ao[] = { + "bi_tcxo_ao", "gpll0", "gpll0_out_even", "core_bi_pll_test_se", @@ -128,6 +128,12 @@ static const char * const gcc_parent_names_8[] = { "core_bi_pll_test_se", }; +static const char * const gcc_parent_names_8_ao[] = { + "bi_tcxo_ao", + "gpll0", + "core_bi_pll_test_se", +}; + static const struct parent_map gcc_parent_map_10[] = { { P_BI_TCXO, 0 }, { P_GPLL0_OUT_MAIN, 1 }, @@ -210,7 +216,7 @@ static struct clk_rcg2 gcc_cpuss_ahb_clk_src = { .freq_tbl = ftbl_gcc_cpuss_ahb_clk_src, .clkr.hw.init = &(struct clk_init_data){ .name = "gcc_cpuss_ahb_clk_src", - .parent_names = gcc_parent_names_7, + .parent_names = gcc_parent_names_7_ao, .num_parents = 4, .ops = &clk_rcg2_ops, }, @@ -229,7 +235,7 @@ static struct clk_rcg2 gcc_cpuss_rbcpr_clk_src = { .freq_tbl = ftbl_gcc_cpuss_rbcpr_clk_src, .clkr.hw.init = &(struct clk_init_data){ .name = "gcc_cpuss_rbcpr_clk_src", - .parent_names = gcc_parent_names_8, + .parent_names = gcc_parent_names_8_ao, .num_parents = 3, .ops = &clk_rcg2_ops, }, diff --git a/drivers/clk/ti/divider.c b/drivers/clk/ti/divider.c index 8d77090ad94a..0241450f3eb3 100644 --- a/drivers/clk/ti/divider.c +++ b/drivers/clk/ti/divider.c @@ -403,8 +403,10 @@ int ti_clk_parse_divider_data(int *div_table, int num_dividers, int max_div, num_dividers = i; tmp = kcalloc(valid_div + 1, sizeof(*tmp), GFP_KERNEL); - if (!tmp) + if (!tmp) { + *table = ERR_PTR(-ENOMEM); return -ENOMEM; + } valid_div = 0; *width = 0; @@ -439,6 +441,7 @@ struct clk_hw *ti_clk_build_component_div(struct ti_clk_divider *setup) { struct clk_omap_divider *div; struct clk_omap_reg *reg; + int ret; if (!setup) return NULL; @@ -458,6 +461,12 @@ struct clk_hw *ti_clk_build_component_div(struct ti_clk_divider *setup) div->flags |= CLK_DIVIDER_POWER_OF_TWO; div->table = _get_div_table_from_setup(setup, &div->width); + if (IS_ERR(div->table)) { + ret = PTR_ERR(div->table); + kfree(div); + return ERR_PTR(ret); + } + div->shift = setup->bit_shift; div->latch = -EINVAL; diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c index b17d153e724f..23a1b27579a5 100644 --- a/drivers/cpuidle/poll_state.c +++ b/drivers/cpuidle/poll_state.c @@ -21,7 +21,7 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev, local_irq_enable(); if (!current_set_polling_and_test()) { unsigned int loop_count = 0; - u64 limit = TICK_USEC; + u64 limit = TICK_NSEC; int i; for (i = 1; i < drv->state_count; i++) { diff --git a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c index fe070d75c842..4c97478d44bd 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c +++ b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c @@ -537,6 +537,8 @@ static void process_response_list(struct nitrox_cmdq *cmdq) struct nitrox_device *ndev = cmdq->ndev; struct nitrox_softreq *sr; int req_completed = 0, err = 0, budget; + completion_t callback; + void *cb_arg; /* check all pending requests */ budget = atomic_read(&cmdq->pending_count); @@ -564,13 +566,13 @@ static void process_response_list(struct nitrox_cmdq *cmdq) smp_mb__after_atomic(); /* remove from response list */ response_list_del(sr, cmdq); - /* ORH error code */ err = READ_ONCE(*sr->resp.orh) & 0xff; - - if (sr->callback) - sr->callback(sr->cb_arg, err); + callback = sr->callback; + cb_arg = sr->cb_arg; softreq_destroy(sr); + if (callback) + callback(cb_arg, err); req_completed++; } diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c index 8ada308d72ee..b0125ad65825 100644 --- a/drivers/crypto/ccree/cc_driver.c +++ b/drivers/crypto/ccree/cc_driver.c @@ -380,7 +380,7 @@ static int init_cc_resources(struct platform_device *plat_dev) rc = cc_ivgen_init(new_drvdata); if (rc) { dev_err(dev, "cc_ivgen_init failed\n"); - goto post_power_mgr_err; + goto post_buf_mgr_err; } /* Allocate crypto algs */ @@ -403,6 +403,9 @@ static int init_cc_resources(struct platform_device *plat_dev) goto post_hash_err; } + /* All set, we can allow autosuspend */ + cc_pm_go(new_drvdata); + /* If we got here and FIPS mode is enabled * it means all FIPS test passed, so let TEE * know we're good. @@ -417,8 +420,6 @@ post_cipher_err: cc_cipher_free(new_drvdata); post_ivgen_err: cc_ivgen_fini(new_drvdata); -post_power_mgr_err: - cc_pm_fini(new_drvdata); post_buf_mgr_err: cc_buffer_mgr_fini(new_drvdata); post_req_mgr_err: diff --git a/drivers/crypto/ccree/cc_pm.c b/drivers/crypto/ccree/cc_pm.c index d990f472e89f..6ff7e75ad90e 100644 --- a/drivers/crypto/ccree/cc_pm.c +++ b/drivers/crypto/ccree/cc_pm.c @@ -100,20 +100,19 @@ int cc_pm_put_suspend(struct device *dev) int cc_pm_init(struct cc_drvdata *drvdata) { - int rc = 0; struct device *dev = drvdata_to_dev(drvdata); /* must be before the enabling to avoid resdundent suspending */ pm_runtime_set_autosuspend_delay(dev, CC_SUSPEND_TIMEOUT); pm_runtime_use_autosuspend(dev); /* activate the PM module */ - rc = pm_runtime_set_active(dev); - if (rc) - return rc; - /* enable the PM module*/ - pm_runtime_enable(dev); + return pm_runtime_set_active(dev); +} - return rc; +/* enable the PM module*/ +void cc_pm_go(struct cc_drvdata *drvdata) +{ + pm_runtime_enable(drvdata_to_dev(drvdata)); } void cc_pm_fini(struct cc_drvdata *drvdata) diff --git a/drivers/crypto/ccree/cc_pm.h b/drivers/crypto/ccree/cc_pm.h index 020a5403c58b..f62624357020 100644 --- a/drivers/crypto/ccree/cc_pm.h +++ b/drivers/crypto/ccree/cc_pm.h @@ -16,6 +16,7 @@ extern const struct dev_pm_ops ccree_pm; int cc_pm_init(struct cc_drvdata *drvdata); +void cc_pm_go(struct cc_drvdata *drvdata); void cc_pm_fini(struct cc_drvdata *drvdata); int cc_pm_suspend(struct device *dev); int cc_pm_resume(struct device *dev); @@ -29,6 +30,8 @@ static inline int cc_pm_init(struct cc_drvdata *drvdata) return 0; } +static void cc_pm_go(struct cc_drvdata *drvdata) {} + static inline void cc_pm_fini(struct cc_drvdata *drvdata) {} static inline int cc_pm_suspend(struct device *dev) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 4e557684f792..fe69dccfa0c0 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -203,6 +203,7 @@ struct at_xdmac_chan { u32 save_cim; u32 save_cnda; u32 save_cndc; + u32 irq_status; unsigned long status; struct tasklet_struct tasklet; struct dma_slave_config sconfig; @@ -1580,8 +1581,8 @@ static void at_xdmac_tasklet(unsigned long data) struct at_xdmac_desc *desc; u32 error_mask; - dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08lx\n", - __func__, atchan->status); + dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08x\n", + __func__, atchan->irq_status); error_mask = AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS @@ -1589,15 +1590,15 @@ static void at_xdmac_tasklet(unsigned long data) if (at_xdmac_chan_is_cyclic(atchan)) { at_xdmac_handle_cyclic(atchan); - } else if ((atchan->status & AT_XDMAC_CIS_LIS) - || (atchan->status & error_mask)) { + } else if ((atchan->irq_status & AT_XDMAC_CIS_LIS) + || (atchan->irq_status & error_mask)) { struct dma_async_tx_descriptor *txd; - if (atchan->status & AT_XDMAC_CIS_RBEIS) + if (atchan->irq_status & AT_XDMAC_CIS_RBEIS) dev_err(chan2dev(&atchan->chan), "read bus error!!!"); - if (atchan->status & AT_XDMAC_CIS_WBEIS) + if (atchan->irq_status & AT_XDMAC_CIS_WBEIS) dev_err(chan2dev(&atchan->chan), "write bus error!!!"); - if (atchan->status & AT_XDMAC_CIS_ROIS) + if (atchan->irq_status & AT_XDMAC_CIS_ROIS) dev_err(chan2dev(&atchan->chan), "request overflow error!!!"); spin_lock(&atchan->lock); @@ -1652,7 +1653,7 @@ static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id) atchan = &atxdmac->chan[i]; chan_imr = at_xdmac_chan_read(atchan, AT_XDMAC_CIM); chan_status = at_xdmac_chan_read(atchan, AT_XDMAC_CIS); - atchan->status = chan_status & chan_imr; + atchan->irq_status = chan_status & chan_imr; dev_vdbg(atxdmac->dma.dev, "%s: chan%d: imr=0x%x, status=0x%x\n", __func__, i, chan_imr, chan_status); @@ -1666,7 +1667,7 @@ static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id) at_xdmac_chan_read(atchan, AT_XDMAC_CDA), at_xdmac_chan_read(atchan, AT_XDMAC_CUBC)); - if (atchan->status & (AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS)) + if (atchan->irq_status & (AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS)) at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask); tasklet_schedule(&atchan->tasklet); diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index 1a44c8086d77..ae10f5614f95 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -406,38 +406,32 @@ static void bcm2835_dma_fill_cb_chain_with_sg( } } -static int bcm2835_dma_abort(void __iomem *chan_base) +static int bcm2835_dma_abort(struct bcm2835_chan *c) { - unsigned long cs; + void __iomem *chan_base = c->chan_base; long int timeout = 10000; - cs = readl(chan_base + BCM2835_DMA_CS); - if (!(cs & BCM2835_DMA_ACTIVE)) + /* + * A zero control block address means the channel is idle. + * (The ACTIVE flag in the CS register is not a reliable indicator.) + */ + if (!readl(chan_base + BCM2835_DMA_ADDR)) return 0; /* Write 0 to the active bit - Pause the DMA */ writel(0, chan_base + BCM2835_DMA_CS); /* Wait for any current AXI transfer to complete */ - while ((cs & BCM2835_DMA_ISPAUSED) && --timeout) { + while ((readl(chan_base + BCM2835_DMA_CS) & + BCM2835_DMA_WAITING_FOR_WRITES) && --timeout) cpu_relax(); - cs = readl(chan_base + BCM2835_DMA_CS); - } - /* We'll un-pause when we set of our next DMA */ + /* Peripheral might be stuck and fail to signal AXI write responses */ if (!timeout) - return -ETIMEDOUT; - - if (!(cs & BCM2835_DMA_ACTIVE)) - return 0; - - /* Terminate the control block chain */ - writel(0, chan_base + BCM2835_DMA_NEXTCB); - - /* Abort the whole DMA */ - writel(BCM2835_DMA_ABORT | BCM2835_DMA_ACTIVE, - chan_base + BCM2835_DMA_CS); + dev_err(c->vc.chan.device->dev, + "failed to complete outstanding writes\n"); + writel(BCM2835_DMA_RESET, chan_base + BCM2835_DMA_CS); return 0; } @@ -476,8 +470,15 @@ static irqreturn_t bcm2835_dma_callback(int irq, void *data) spin_lock_irqsave(&c->vc.lock, flags); - /* Acknowledge interrupt */ - writel(BCM2835_DMA_INT, c->chan_base + BCM2835_DMA_CS); + /* + * Clear the INT flag to receive further interrupts. Keep the channel + * active in case the descriptor is cyclic or in case the client has + * already terminated the descriptor and issued a new one. (May happen + * if this IRQ handler is threaded.) If the channel is finished, it + * will remain idle despite the ACTIVE flag being set. + */ + writel(BCM2835_DMA_INT | BCM2835_DMA_ACTIVE, + c->chan_base + BCM2835_DMA_CS); d = c->desc; @@ -485,11 +486,7 @@ static irqreturn_t bcm2835_dma_callback(int irq, void *data) if (d->cyclic) { /* call the cyclic callback */ vchan_cyclic_callback(&d->vd); - - /* Keep the DMA engine running */ - writel(BCM2835_DMA_ACTIVE, - c->chan_base + BCM2835_DMA_CS); - } else { + } else if (!readl(c->chan_base + BCM2835_DMA_ADDR)) { vchan_cookie_complete(&c->desc->vd); bcm2835_dma_start_desc(c); } @@ -779,7 +776,6 @@ static int bcm2835_dma_terminate_all(struct dma_chan *chan) struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); struct bcm2835_dmadev *d = to_bcm2835_dma_dev(c->vc.chan.device); unsigned long flags; - int timeout = 10000; LIST_HEAD(head); spin_lock_irqsave(&c->vc.lock, flags); @@ -789,27 +785,11 @@ static int bcm2835_dma_terminate_all(struct dma_chan *chan) list_del_init(&c->node); spin_unlock(&d->lock); - /* - * Stop DMA activity: we assume the callback will not be called - * after bcm_dma_abort() returns (even if it does, it will see - * c->desc is NULL and exit.) - */ + /* stop DMA activity */ if (c->desc) { vchan_terminate_vdesc(&c->desc->vd); c->desc = NULL; - bcm2835_dma_abort(c->chan_base); - - /* Wait for stopping */ - while (--timeout) { - if (!(readl(c->chan_base + BCM2835_DMA_CS) & - BCM2835_DMA_ACTIVE)) - break; - - cpu_relax(); - } - - if (!timeout) - dev_err(d->ddev.dev, "DMA transfer could not be terminated\n"); + bcm2835_dma_abort(c); } vchan_get_all_descriptors(&c->vc, &head); diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index 2eea4ef72915..6511928b4cdf 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -711,11 +711,9 @@ static int dmatest_func(void *data) srcs[i] = um->addr[i] + src_off; ret = dma_mapping_error(dev->dev, um->addr[i]); if (ret) { - dmaengine_unmap_put(um); result("src mapping error", total_tests, src_off, dst_off, len, ret); - failed_tests++; - continue; + goto error_unmap_continue; } um->to_cnt++; } @@ -730,11 +728,9 @@ static int dmatest_func(void *data) DMA_BIDIRECTIONAL); ret = dma_mapping_error(dev->dev, dsts[i]); if (ret) { - dmaengine_unmap_put(um); result("dst mapping error", total_tests, src_off, dst_off, len, ret); - failed_tests++; - continue; + goto error_unmap_continue; } um->bidi_cnt++; } @@ -762,12 +758,10 @@ static int dmatest_func(void *data) } if (!tx) { - dmaengine_unmap_put(um); result("prep error", total_tests, src_off, dst_off, len, ret); msleep(100); - failed_tests++; - continue; + goto error_unmap_continue; } done->done = false; @@ -776,12 +770,10 @@ static int dmatest_func(void *data) cookie = tx->tx_submit(tx); if (dma_submit_error(cookie)) { - dmaengine_unmap_put(um); result("submit error", total_tests, src_off, dst_off, len, ret); msleep(100); - failed_tests++; - continue; + goto error_unmap_continue; } dma_async_issue_pending(chan); @@ -790,22 +782,20 @@ static int dmatest_func(void *data) status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); - dmaengine_unmap_put(um); - if (!done->done) { result("test timed out", total_tests, src_off, dst_off, len, 0); - failed_tests++; - continue; + goto error_unmap_continue; } else if (status != DMA_COMPLETE) { result(status == DMA_ERROR ? "completion error status" : "completion busy status", total_tests, src_off, dst_off, len, ret); - failed_tests++; - continue; + goto error_unmap_continue; } + dmaengine_unmap_put(um); + if (params->noverify) { verbose_result("test passed", total_tests, src_off, dst_off, len, 0); @@ -846,6 +836,12 @@ static int dmatest_func(void *data) verbose_result("test passed", total_tests, src_off, dst_off, len, 0); } + + continue; + +error_unmap_continue: + dmaengine_unmap_put(um); + failed_tests++; } ktime = ktime_sub(ktime_get(), ktime); ktime = ktime_sub(ktime, comparetime); diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c index c2fff3f6c9ca..4a09af3cd546 100644 --- a/drivers/dma/imx-dma.c +++ b/drivers/dma/imx-dma.c @@ -618,7 +618,7 @@ static void imxdma_tasklet(unsigned long data) { struct imxdma_channel *imxdmac = (void *)data; struct imxdma_engine *imxdma = imxdmac->imxdma; - struct imxdma_desc *desc; + struct imxdma_desc *desc, *next_desc; unsigned long flags; spin_lock_irqsave(&imxdma->lock, flags); @@ -648,10 +648,10 @@ static void imxdma_tasklet(unsigned long data) list_move_tail(imxdmac->ld_active.next, &imxdmac->ld_free); if (!list_empty(&imxdmac->ld_queue)) { - desc = list_first_entry(&imxdmac->ld_queue, struct imxdma_desc, - node); + next_desc = list_first_entry(&imxdmac->ld_queue, + struct imxdma_desc, node); list_move_tail(imxdmac->ld_queue.next, &imxdmac->ld_active); - if (imxdma_xfer_desc(desc) < 0) + if (imxdma_xfer_desc(next_desc) < 0) dev_warn(imxdma->dev, "%s: channel: %d couldn't xfer desc\n", __func__, imxdmac->channel); } diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c index 472c88ae1c0f..92f843eaf1e0 100644 --- a/drivers/firmware/arm_scmi/bus.c +++ b/drivers/firmware/arm_scmi/bus.c @@ -119,6 +119,11 @@ void scmi_driver_unregister(struct scmi_driver *driver) } EXPORT_SYMBOL_GPL(scmi_driver_unregister); +static void scmi_device_release(struct device *dev) +{ + kfree(to_scmi_dev(dev)); +} + struct scmi_device * scmi_device_create(struct device_node *np, struct device *parent, int protocol) { @@ -138,6 +143,7 @@ scmi_device_create(struct device_node *np, struct device *parent, int protocol) scmi_dev->dev.parent = parent; scmi_dev->dev.of_node = np; scmi_dev->dev.bus = &scmi_bus_type; + scmi_dev->dev.release = scmi_device_release; dev_set_name(&scmi_dev->dev, "scmi_dev.%d", id); retval = device_register(&scmi_dev->dev); @@ -156,9 +162,8 @@ free_mem: void scmi_device_destroy(struct scmi_device *scmi_dev) { scmi_handle_put(scmi_dev->handle); - device_unregister(&scmi_dev->dev); ida_simple_remove(&scmi_bus_id, scmi_dev->id); - kfree(scmi_dev); + device_unregister(&scmi_dev->dev); } void scmi_set_handle(struct scmi_device *scmi_dev) diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 23ea1ed409d1..352bd2473162 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -37,8 +37,9 @@ extern u64 efi_system_table; static struct ptdump_info efi_ptdump_info = { .mm = &efi_mm, .markers = (struct addr_marker[]){ - { 0, "UEFI runtime start" }, - { DEFAULT_MAP_WINDOW_64, "UEFI runtime end" } + { 0, "UEFI runtime start" }, + { DEFAULT_MAP_WINDOW_64, "UEFI runtime end" }, + { -1, NULL } }, .base_addr = 0, }; diff --git a/drivers/fpga/stratix10-soc.c b/drivers/fpga/stratix10-soc.c index a1a09e04fab8..13851b3d1c56 100644 --- a/drivers/fpga/stratix10-soc.c +++ b/drivers/fpga/stratix10-soc.c @@ -508,14 +508,11 @@ static int __init s10_init(void) return -ENODEV; np = of_find_matching_node(fw_np, s10_of_match); - if (!np) { - of_node_put(fw_np); + if (!np) return -ENODEV; - } of_node_put(np); ret = of_platform_populate(fw_np, s10_of_match, NULL, NULL); - of_node_put(fw_np); if (ret) return ret; diff --git a/drivers/gpio/gpio-altera-a10sr.c b/drivers/gpio/gpio-altera-a10sr.c index 6b11f1314248..7f9e0304b510 100644 --- a/drivers/gpio/gpio-altera-a10sr.c +++ b/drivers/gpio/gpio-altera-a10sr.c @@ -66,8 +66,10 @@ static int altr_a10sr_gpio_direction_input(struct gpio_chip *gc, static int altr_a10sr_gpio_direction_output(struct gpio_chip *gc, unsigned int nr, int value) { - if (nr <= (ALTR_A10SR_OUT_VALID_RANGE_HI - ALTR_A10SR_LED_VALID_SHIFT)) + if (nr <= (ALTR_A10SR_OUT_VALID_RANGE_HI - ALTR_A10SR_LED_VALID_SHIFT)) { + altr_a10sr_gpio_set(gc, nr, value); return 0; + } return -EINVAL; } diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c index e0d6a0a7bc69..e41223c05f6e 100644 --- a/drivers/gpio/gpio-eic-sprd.c +++ b/drivers/gpio/gpio-eic-sprd.c @@ -180,7 +180,18 @@ static void sprd_eic_free(struct gpio_chip *chip, unsigned int offset) static int sprd_eic_get(struct gpio_chip *chip, unsigned int offset) { - return sprd_eic_read(chip, offset, SPRD_EIC_DBNC_DATA); + struct sprd_eic *sprd_eic = gpiochip_get_data(chip); + + switch (sprd_eic->type) { + case SPRD_EIC_DEBOUNCE: + return sprd_eic_read(chip, offset, SPRD_EIC_DBNC_DATA); + case SPRD_EIC_ASYNC: + return sprd_eic_read(chip, offset, SPRD_EIC_ASYNC_DATA); + case SPRD_EIC_SYNC: + return sprd_eic_read(chip, offset, SPRD_EIC_SYNC_DATA); + default: + return -ENOTSUPP; + } } static int sprd_eic_direction_input(struct gpio_chip *chip, unsigned int offset) @@ -368,6 +379,7 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type) irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_BOTH: + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 1); irq_set_handler_locked(data, handle_edge_irq); break; diff --git a/drivers/gpio/gpio-pcf857x.c b/drivers/gpio/gpio-pcf857x.c index adf72dda25a2..68a35b65925a 100644 --- a/drivers/gpio/gpio-pcf857x.c +++ b/drivers/gpio/gpio-pcf857x.c @@ -84,6 +84,7 @@ MODULE_DEVICE_TABLE(of, pcf857x_of_table); */ struct pcf857x { struct gpio_chip chip; + struct irq_chip irqchip; struct i2c_client *client; struct mutex lock; /* protect 'out' */ unsigned out; /* software latch */ @@ -252,18 +253,6 @@ static void pcf857x_irq_bus_sync_unlock(struct irq_data *data) mutex_unlock(&gpio->lock); } -static struct irq_chip pcf857x_irq_chip = { - .name = "pcf857x", - .irq_enable = pcf857x_irq_enable, - .irq_disable = pcf857x_irq_disable, - .irq_ack = noop, - .irq_mask = noop, - .irq_unmask = noop, - .irq_set_wake = pcf857x_irq_set_wake, - .irq_bus_lock = pcf857x_irq_bus_lock, - .irq_bus_sync_unlock = pcf857x_irq_bus_sync_unlock, -}; - /*-------------------------------------------------------------------------*/ static int pcf857x_probe(struct i2c_client *client, @@ -376,8 +365,17 @@ static int pcf857x_probe(struct i2c_client *client, /* Enable irqchip if we have an interrupt */ if (client->irq) { + gpio->irqchip.name = "pcf857x", + gpio->irqchip.irq_enable = pcf857x_irq_enable, + gpio->irqchip.irq_disable = pcf857x_irq_disable, + gpio->irqchip.irq_ack = noop, + gpio->irqchip.irq_mask = noop, + gpio->irqchip.irq_unmask = noop, + gpio->irqchip.irq_set_wake = pcf857x_irq_set_wake, + gpio->irqchip.irq_bus_lock = pcf857x_irq_bus_lock, + gpio->irqchip.irq_bus_sync_unlock = pcf857x_irq_bus_sync_unlock, status = gpiochip_irqchip_add_nested(&gpio->chip, - &pcf857x_irq_chip, + &gpio->irqchip, 0, handle_level_irq, IRQ_TYPE_NONE); if (status) { @@ -392,7 +390,7 @@ static int pcf857x_probe(struct i2c_client *client, if (status) goto fail; - gpiochip_set_nested_irqchip(&gpio->chip, &pcf857x_irq_chip, + gpiochip_set_nested_irqchip(&gpio->chip, &gpio->irqchip, client->irq); gpio->irq_parent = client->irq; } diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c index 1b79ebcfce3e..541fa6ac399d 100644 --- a/drivers/gpio/gpio-vf610.c +++ b/drivers/gpio/gpio-vf610.c @@ -253,6 +253,7 @@ static int vf610_gpio_probe(struct platform_device *pdev) struct vf610_gpio_port *port; struct resource *iores; struct gpio_chip *gc; + int i; int ret; port = devm_kzalloc(&pdev->dev, sizeof(*port), GFP_KERNEL); @@ -319,6 +320,10 @@ static int vf610_gpio_probe(struct platform_device *pdev) if (ret < 0) return ret; + /* Mask all GPIO interrupts */ + for (i = 0; i < gc->ngpio; i++) + vf610_gpio_writel(0, port->base + PORT_PCR(i)); + /* Clear the interrupt status register for all GPIO's */ vf610_gpio_writel(~0, port->base + PORT_ISFR); diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 1651d7f0a303..d1adfdf50fb3 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -828,7 +828,14 @@ static irqreturn_t lineevent_irq_thread(int irq, void *p) /* Do not leak kernel stack to userspace */ memset(&ge, 0, sizeof(ge)); - ge.timestamp = le->timestamp; + /* + * We may be running from a nested threaded interrupt in which case + * we didn't get the timestamp from lineevent_irq_handler(). + */ + if (!le->timestamp) + ge.timestamp = ktime_get_real_ns(); + else + ge.timestamp = le->timestamp; if (le->eflags & GPIOEVENT_REQUEST_RISING_EDGE && le->eflags & GPIOEVENT_REQUEST_FALLING_EDGE) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 6896dec97fc7..0ed41a9d2d77 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -1686,7 +1686,8 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, effective_mode &= ~S_IWUSR; if ((adev->flags & AMD_IS_APU) && - (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || + (attr == &sensor_dev_attr_power1_average.dev_attr.attr || + attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr|| attr == &sensor_dev_attr_power1_cap.dev_attr.attr)) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index 71913a18d142..a38e0fb4a6fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -38,6 +38,7 @@ #include "amdgpu_gem.h" #include <drm/amdgpu_drm.h> #include <linux/dma-buf.h> +#include <linux/dma-fence-array.h> /** * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table @@ -187,6 +188,48 @@ error: return ERR_PTR(ret); } +static int +__reservation_object_make_exclusive(struct reservation_object *obj) +{ + struct dma_fence **fences; + unsigned int count; + int r; + + if (!reservation_object_get_list(obj)) /* no shared fences to convert */ + return 0; + + r = reservation_object_get_fences_rcu(obj, NULL, &count, &fences); + if (r) + return r; + + if (count == 0) { + /* Now that was unexpected. */ + } else if (count == 1) { + reservation_object_add_excl_fence(obj, fences[0]); + dma_fence_put(fences[0]); + kfree(fences); + } else { + struct dma_fence_array *array; + + array = dma_fence_array_create(count, fences, + dma_fence_context_alloc(1), 0, + false); + if (!array) + goto err_fences_put; + + reservation_object_add_excl_fence(obj, &array->base); + dma_fence_put(&array->base); + } + + return 0; + +err_fences_put: + while (count--) + dma_fence_put(fences[count]); + kfree(fences); + return -ENOMEM; +} + /** * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation * @dma_buf: Shared DMA buffer @@ -218,16 +261,16 @@ static int amdgpu_gem_map_attach(struct dma_buf *dma_buf, if (attach->dev->driver != adev->dev->driver) { /* - * Wait for all shared fences to complete before we switch to future - * use of exclusive fence on this prime shared bo. + * We only create shared fences for internal use, but importers + * of the dmabuf rely on exclusive fences for implicitly + * tracking write hazards. As any of the current fences may + * correspond to a write, we need to convert all existing + * fences on the reservation object into a single exclusive + * fence. */ - r = reservation_object_wait_timeout_rcu(bo->tbo.resv, - true, false, - MAX_SCHEDULE_TIMEOUT); - if (unlikely(r < 0)) { - DRM_DEBUG_PRIME("Fence wait failed: %li\n", r); + r = __reservation_object_make_exclusive(bo->tbo.resv); + if (r) goto error_unreserve; - } } /* pin buffer into GTT */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 8fab0d637ee5..3a9b48b227ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -90,8 +90,10 @@ static int psp_sw_fini(void *handle) adev->psp.sos_fw = NULL; release_firmware(adev->psp.asd_fw); adev->psp.asd_fw = NULL; - release_firmware(adev->psp.ta_fw); - adev->psp.ta_fw = NULL; + if (adev->psp.ta_fw) { + release_firmware(adev->psp.ta_fw); + adev->psp.ta_fw = NULL; + } return 0; } @@ -435,6 +437,9 @@ static int psp_xgmi_initialize(struct psp_context *psp) struct ta_xgmi_shared_memory *xgmi_cmd; int ret; + if (!psp->adev->psp.ta_fw) + return -ENOENT; + if (!psp->xgmi_context.initialized) { ret = psp_xgmi_init_shared_buf(psp); if (ret) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index d2ea5ce2cefb..7c108e687683 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -3363,14 +3363,15 @@ void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid, struct amdgpu_task_info *task_info) { struct amdgpu_vm *vm; + unsigned long flags; - spin_lock(&adev->vm_manager.pasid_lock); + spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); vm = idr_find(&adev->vm_manager.pasid_idr, pasid); if (vm) *task_info = vm->task_info; - spin_unlock(&adev->vm_manager.pasid_lock); + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 4cd31a276dcd..186db182f924 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -93,7 +93,20 @@ static void nbio_v7_4_enable_doorbell_aperture(struct amdgpu_device *adev, static void nbio_v7_4_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, bool enable) { + u32 tmp = 0; + if (enable) { + tmp = REG_SET_FIELD(tmp, DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_EN, 1) | + REG_SET_FIELD(tmp, DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_MODE, 1) | + REG_SET_FIELD(tmp, DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_SIZE, 0); + + WREG32_SOC15(NBIO, 0, mmDOORBELL_SELFRING_GPA_APER_BASE_LOW, + lower_32_bits(adev->doorbell.base)); + WREG32_SOC15(NBIO, 0, mmDOORBELL_SELFRING_GPA_APER_BASE_HIGH, + upper_32_bits(adev->doorbell.base)); + } + + WREG32_SOC15(NBIO, 0, mmDOORBELL_SELFRING_GPA_APER_CNTL, tmp); } static void nbio_v7_4_ih_doorbell_range(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 0c6e7f9b143f..189fcb004579 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -152,18 +152,22 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); - if (err) - goto out2; - - err = amdgpu_ucode_validate(adev->psp.ta_fw); - if (err) - goto out2; - - ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data; - adev->psp.ta_xgmi_ucode_version = le32_to_cpu(ta_hdr->ta_xgmi_ucode_version); - adev->psp.ta_xgmi_ucode_size = le32_to_cpu(ta_hdr->ta_xgmi_size_bytes); - adev->psp.ta_xgmi_start_addr = (uint8_t *)ta_hdr + - le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); + if (err) { + release_firmware(adev->psp.ta_fw); + adev->psp.ta_fw = NULL; + dev_info(adev->dev, + "psp v11.0: Failed to load firmware \"%s\"\n", fw_name); + } else { + err = amdgpu_ucode_validate(adev->psp.ta_fw); + if (err) + goto out2; + + ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data; + adev->psp.ta_xgmi_ucode_version = le32_to_cpu(ta_hdr->ta_xgmi_ucode_version); + adev->psp.ta_xgmi_ucode_size = le32_to_cpu(ta_hdr->ta_xgmi_size_bytes); + adev->psp.ta_xgmi_start_addr = (uint8_t *)ta_hdr + + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); + } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 8849b74078d6..9b639974c70c 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -729,11 +729,13 @@ static int soc15_common_early_init(void *handle) case CHIP_RAVEN: adev->asic_funcs = &soc15_asic_funcs; if (adev->rev_id >= 0x8) - adev->external_rev_id = adev->rev_id + 0x81; + adev->external_rev_id = adev->rev_id + 0x79; else if (adev->pdev->device == 0x15d8) adev->external_rev_id = adev->rev_id + 0x41; + else if (adev->rev_id == 1) + adev->external_rev_id = adev->rev_id + 0x20; else - adev->external_rev_id = 0x1; + adev->external_rev_id = adev->rev_id + 0x01; if (adev->rev_id >= 0x8) { adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 5d85ff341385..2e7c44955f43 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -863,7 +863,7 @@ static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size, return 0; } -#if CONFIG_X86_64 +#ifdef CONFIG_X86_64 static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size, uint32_t *num_entries, struct crat_subtype_iolink *sub_type_hdr) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f4fa40c387d3..0b392bfca284 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4082,7 +4082,8 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm, } if (connector_type == DRM_MODE_CONNECTOR_HDMIA || - connector_type == DRM_MODE_CONNECTOR_DisplayPort) { + connector_type == DRM_MODE_CONNECTOR_DisplayPort || + connector_type == DRM_MODE_CONNECTOR_eDP) { drm_connector_attach_vrr_capable_property( &aconnector->base); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 9a7ac58eb18e..ddd75a4d8ba5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -671,6 +671,25 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us return bytes_from_user; } +/* + * Returns the min and max vrr vfreq through the connector's debugfs file. + * Example usage: cat /sys/kernel/debug/dri/0/DP-1/vrr_range + */ +static int vrr_range_show(struct seq_file *m, void *data) +{ + struct drm_connector *connector = m->private; + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + + if (connector->status != connector_status_connected) + return -ENODEV; + + seq_printf(m, "Min: %u\n", (unsigned int)aconnector->min_vfreq); + seq_printf(m, "Max: %u\n", (unsigned int)aconnector->max_vfreq); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(vrr_range); + static const struct file_operations dp_link_settings_debugfs_fops = { .owner = THIS_MODULE, .read = dp_link_settings_read, @@ -697,7 +716,8 @@ static const struct { } dp_debugfs_entries[] = { {"link_settings", &dp_link_settings_debugfs_fops}, {"phy_settings", &dp_phy_settings_debugfs_fop}, - {"test_pattern", &dp_phy_test_pattern_fops} + {"test_pattern", &dp_phy_test_pattern_fops}, + {"vrr_range", &vrr_range_fops} }; int connector_debugfs_init(struct amdgpu_dm_connector *connector) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c index afd287f08bc9..19801bdba0d2 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c @@ -591,7 +591,15 @@ static void dce11_pplib_apply_display_requirements( dc, context->bw.dce.sclk_khz); - pp_display_cfg->min_dcfclock_khz = pp_display_cfg->min_engine_clock_khz; + /* + * As workaround for >4x4K lightup set dcfclock to min_engine_clock value. + * This is not required for less than 5 displays, + * thus don't request decfclk in dc to avoid impact + * on power saving. + * + */ + pp_display_cfg->min_dcfclock_khz = (context->stream_count > 4)? + pp_display_cfg->min_engine_clock_khz : 0; pp_display_cfg->min_engine_clock_deep_sleep_khz = context->bw.dce.sclk_deep_sleep_khz; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c index f95c5f50eb0f..5273de3c5b98 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c @@ -1033,6 +1033,7 @@ static int smu10_get_clock_by_type_with_latency(struct pp_hwmgr *hwmgr, break; case amd_pp_dpp_clock: pclk_vol_table = pinfo->vdd_dep_on_dppclk; + break; default: return -EINVAL; } diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c index 99cba8ea5d82..5df1256618cc 100644 --- a/drivers/gpu/drm/drm_lease.c +++ b/drivers/gpu/drm/drm_lease.c @@ -528,7 +528,8 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev, object_count = cl->object_count; - object_ids = memdup_user(u64_to_user_ptr(cl->object_ids), object_count * sizeof(__u32)); + object_ids = memdup_user(u64_to_user_ptr(cl->object_ids), + array_size(object_count, sizeof(__u32))); if (IS_ERR(object_ids)) return PTR_ERR(object_ids); diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index 24a750436559..f91e02c87fd8 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -758,7 +758,7 @@ int drm_mode_hsync(const struct drm_display_mode *mode) if (mode->hsync) return mode->hsync; - if (mode->htotal < 0) + if (mode->htotal <= 0) return 0; calc_val = (mode->clock * 1000) / mode->htotal; /* hsync in Hz */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 216f52b744a6..c882ea94172c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1824,6 +1824,16 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, return 0; } +static inline bool +__vma_matches(struct vm_area_struct *vma, struct file *filp, + unsigned long addr, unsigned long size) +{ + if (vma->vm_file != filp) + return false; + + return vma->vm_start == addr && (vma->vm_end - vma->vm_start) == size; +} + /** * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address * it is mapped to. @@ -1882,7 +1892,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, return -EINTR; } vma = find_vma(mm, addr); - if (vma) + if (vma && __vma_matches(vma, obj->base.filp, addr, args->size)) vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); else diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index d6c8f8fdfda5..017fc602a10e 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -594,7 +594,8 @@ static void i915_pmu_enable(struct perf_event *event) * Update the bitmask of enabled events and increment * the event reference counter. */ - GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); + BUILD_BUG_ON(ARRAY_SIZE(i915->pmu.enable_count) != I915_PMU_MASK_BITS); + GEM_BUG_ON(bit >= ARRAY_SIZE(i915->pmu.enable_count)); GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0); i915->pmu.enable |= BIT_ULL(bit); i915->pmu.enable_count[bit]++; @@ -615,11 +616,16 @@ static void i915_pmu_enable(struct perf_event *event) engine = intel_engine_lookup_user(i915, engine_event_class(event), engine_event_instance(event)); - GEM_BUG_ON(!engine); - engine->pmu.enable |= BIT(sample); - GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); + BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) != + I915_ENGINE_SAMPLE_COUNT); + BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) != + I915_ENGINE_SAMPLE_COUNT); + GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count)); + GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample)); GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0); + + engine->pmu.enable |= BIT(sample); engine->pmu.enable_count[sample]++; } @@ -649,9 +655,11 @@ static void i915_pmu_disable(struct perf_event *event) engine = intel_engine_lookup_user(i915, engine_event_class(event), engine_event_instance(event)); - GEM_BUG_ON(!engine); - GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); + + GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count)); + GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample)); GEM_BUG_ON(engine->pmu.enable_count[sample] == 0); + /* * Decrement the reference count and clear the enabled * bitmask when the last listener on an event goes away. @@ -660,7 +668,7 @@ static void i915_pmu_disable(struct perf_event *event) engine->pmu.enable &= ~BIT(sample); } - GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); + GEM_BUG_ON(bit >= ARRAY_SIZE(i915->pmu.enable_count)); GEM_BUG_ON(i915->pmu.enable_count[bit] == 0); /* * Decrement the reference count and clear the enabled diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 7f164ca3db12..b3728c5f13e7 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -31,6 +31,8 @@ enum { ((1 << I915_PMU_SAMPLE_BITS) + \ (I915_PMU_LAST + 1 - __I915_PMU_OTHER(0))) +#define I915_ENGINE_SAMPLE_COUNT (I915_SAMPLE_SEMA + 1) + struct i915_pmu_sample { u64 cur; }; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 0a7d60509ca7..067054cf4a86 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1790,7 +1790,7 @@ enum i915_power_well_id { #define _CNL_PORT_TX_C_LN0_OFFSET 0x162C40 #define _CNL_PORT_TX_D_LN0_OFFSET 0x162E40 #define _CNL_PORT_TX_F_LN0_OFFSET 0x162840 -#define _CNL_PORT_TX_DW_GRP(port, dw) (_PICK((port), \ +#define _CNL_PORT_TX_DW_GRP(dw, port) (_PICK((port), \ _CNL_PORT_TX_AE_GRP_OFFSET, \ _CNL_PORT_TX_B_GRP_OFFSET, \ _CNL_PORT_TX_B_GRP_OFFSET, \ @@ -1798,7 +1798,7 @@ enum i915_power_well_id { _CNL_PORT_TX_AE_GRP_OFFSET, \ _CNL_PORT_TX_F_GRP_OFFSET) + \ 4 * (dw)) -#define _CNL_PORT_TX_DW_LN0(port, dw) (_PICK((port), \ +#define _CNL_PORT_TX_DW_LN0(dw, port) (_PICK((port), \ _CNL_PORT_TX_AE_LN0_OFFSET, \ _CNL_PORT_TX_B_LN0_OFFSET, \ _CNL_PORT_TX_B_LN0_OFFSET, \ @@ -1834,9 +1834,9 @@ enum i915_power_well_id { #define _CNL_PORT_TX_DW4_LN0_AE 0x162450 #define _CNL_PORT_TX_DW4_LN1_AE 0x1624D0 -#define CNL_PORT_TX_DW4_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP((port), 4)) -#define CNL_PORT_TX_DW4_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0((port), 4)) -#define CNL_PORT_TX_DW4_LN(port, ln) _MMIO(_CNL_PORT_TX_DW_LN0((port), 4) + \ +#define CNL_PORT_TX_DW4_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP(4, (port))) +#define CNL_PORT_TX_DW4_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0(4, (port))) +#define CNL_PORT_TX_DW4_LN(port, ln) _MMIO(_CNL_PORT_TX_DW_LN0(4, (port)) + \ ((ln) * (_CNL_PORT_TX_DW4_LN1_AE - \ _CNL_PORT_TX_DW4_LN0_AE))) #define ICL_PORT_TX_DW4_AUX(port) _MMIO(_ICL_PORT_TX_DW_AUX(4, port)) @@ -1864,8 +1864,12 @@ enum i915_power_well_id { #define RTERM_SELECT(x) ((x) << 3) #define RTERM_SELECT_MASK (0x7 << 3) -#define CNL_PORT_TX_DW7_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP((port), 7)) -#define CNL_PORT_TX_DW7_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0((port), 7)) +#define CNL_PORT_TX_DW7_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP(7, (port))) +#define CNL_PORT_TX_DW7_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0(7, (port))) +#define ICL_PORT_TX_DW7_AUX(port) _MMIO(_ICL_PORT_TX_DW_AUX(7, port)) +#define ICL_PORT_TX_DW7_GRP(port) _MMIO(_ICL_PORT_TX_DW_GRP(7, port)) +#define ICL_PORT_TX_DW7_LN0(port) _MMIO(_ICL_PORT_TX_DW_LN(7, 0, port)) +#define ICL_PORT_TX_DW7_LN(port, ln) _MMIO(_ICL_PORT_TX_DW_LN(7, ln, port)) #define N_SCALAR(x) ((x) << 24) #define N_SCALAR_MASK (0x7F << 24) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index f3e1d6a0b7dd..7edce1b7b348 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -494,103 +494,58 @@ static const struct cnl_ddi_buf_trans cnl_ddi_translations_edp_1_05V[] = { { 0x2, 0x7F, 0x3F, 0x00, 0x00 }, /* 400 400 0.0 */ }; -struct icl_combo_phy_ddi_buf_trans { - u32 dw2_swing_select; - u32 dw2_swing_scalar; - u32 dw4_scaling; -}; - -/* Voltage Swing Programming for VccIO 0.85V for DP */ -static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_dp_hdmi_0_85V[] = { - /* Voltage mV db */ - { 0x2, 0x98, 0x0018 }, /* 400 0.0 */ - { 0x2, 0x98, 0x3015 }, /* 400 3.5 */ - { 0x2, 0x98, 0x6012 }, /* 400 6.0 */ - { 0x2, 0x98, 0x900F }, /* 400 9.5 */ - { 0xB, 0x70, 0x0018 }, /* 600 0.0 */ - { 0xB, 0x70, 0x3015 }, /* 600 3.5 */ - { 0xB, 0x70, 0x6012 }, /* 600 6.0 */ - { 0x5, 0x00, 0x0018 }, /* 800 0.0 */ - { 0x5, 0x00, 0x3015 }, /* 800 3.5 */ - { 0x6, 0x98, 0x0018 }, /* 1200 0.0 */ -}; - -/* FIXME - After table is updated in Bspec */ -/* Voltage Swing Programming for VccIO 0.85V for eDP */ -static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_edp_0_85V[] = { - /* Voltage mV db */ - { 0x0, 0x00, 0x00 }, /* 200 0.0 */ - { 0x0, 0x00, 0x00 }, /* 200 1.5 */ - { 0x0, 0x00, 0x00 }, /* 200 4.0 */ - { 0x0, 0x00, 0x00 }, /* 200 6.0 */ - { 0x0, 0x00, 0x00 }, /* 250 0.0 */ - { 0x0, 0x00, 0x00 }, /* 250 1.5 */ - { 0x0, 0x00, 0x00 }, /* 250 4.0 */ - { 0x0, 0x00, 0x00 }, /* 300 0.0 */ - { 0x0, 0x00, 0x00 }, /* 300 1.5 */ - { 0x0, 0x00, 0x00 }, /* 350 0.0 */ -}; - -/* Voltage Swing Programming for VccIO 0.95V for DP */ -static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_dp_hdmi_0_95V[] = { - /* Voltage mV db */ - { 0x2, 0x98, 0x0018 }, /* 400 0.0 */ - { 0x2, 0x98, 0x3015 }, /* 400 3.5 */ - { 0x2, 0x98, 0x6012 }, /* 400 6.0 */ - { 0x2, 0x98, 0x900F }, /* 400 9.5 */ - { 0x4, 0x98, 0x0018 }, /* 600 0.0 */ - { 0x4, 0x98, 0x3015 }, /* 600 3.5 */ - { 0x4, 0x98, 0x6012 }, /* 600 6.0 */ - { 0x5, 0x76, 0x0018 }, /* 800 0.0 */ - { 0x5, 0x76, 0x3015 }, /* 800 3.5 */ - { 0x6, 0x98, 0x0018 }, /* 1200 0.0 */ +/* icl_combo_phy_ddi_translations */ +static const struct cnl_ddi_buf_trans icl_combo_phy_ddi_translations_dp_hbr2[] = { + /* NT mV Trans mV db */ + { 0xA, 0x35, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ + { 0xA, 0x4F, 0x37, 0x00, 0x08 }, /* 350 500 3.1 */ + { 0xC, 0x71, 0x2F, 0x00, 0x10 }, /* 350 700 6.0 */ + { 0x6, 0x7F, 0x2B, 0x00, 0x14 }, /* 350 900 8.2 */ + { 0xA, 0x4C, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ + { 0xC, 0x73, 0x34, 0x00, 0x0B }, /* 500 700 2.9 */ + { 0x6, 0x7F, 0x2F, 0x00, 0x10 }, /* 500 900 5.1 */ + { 0xC, 0x6C, 0x3C, 0x00, 0x03 }, /* 650 700 0.6 */ + { 0x6, 0x7F, 0x35, 0x00, 0x0A }, /* 600 900 3.5 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ }; -/* FIXME - After table is updated in Bspec */ -/* Voltage Swing Programming for VccIO 0.95V for eDP */ -static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_edp_0_95V[] = { - /* Voltage mV db */ - { 0x0, 0x00, 0x00 }, /* 200 0.0 */ - { 0x0, 0x00, 0x00 }, /* 200 1.5 */ - { 0x0, 0x00, 0x00 }, /* 200 4.0 */ - { 0x0, 0x00, 0x00 }, /* 200 6.0 */ - { 0x0, 0x00, 0x00 }, /* 250 0.0 */ - { 0x0, 0x00, 0x00 }, /* 250 1.5 */ - { 0x0, 0x00, 0x00 }, /* 250 4.0 */ - { 0x0, 0x00, 0x00 }, /* 300 0.0 */ - { 0x0, 0x00, 0x00 }, /* 300 1.5 */ - { 0x0, 0x00, 0x00 }, /* 350 0.0 */ +static const struct cnl_ddi_buf_trans icl_combo_phy_ddi_translations_edp_hbr2[] = { + /* NT mV Trans mV db */ + { 0x0, 0x7F, 0x3F, 0x00, 0x00 }, /* 200 200 0.0 */ + { 0x8, 0x7F, 0x38, 0x00, 0x07 }, /* 200 250 1.9 */ + { 0x1, 0x7F, 0x33, 0x00, 0x0C }, /* 200 300 3.5 */ + { 0x9, 0x7F, 0x31, 0x00, 0x0E }, /* 200 350 4.9 */ + { 0x8, 0x7F, 0x3F, 0x00, 0x00 }, /* 250 250 0.0 */ + { 0x1, 0x7F, 0x38, 0x00, 0x07 }, /* 250 300 1.6 */ + { 0x9, 0x7F, 0x35, 0x00, 0x0A }, /* 250 350 2.9 */ + { 0x1, 0x7F, 0x3F, 0x00, 0x00 }, /* 300 300 0.0 */ + { 0x9, 0x7F, 0x38, 0x00, 0x07 }, /* 300 350 1.3 */ + { 0x9, 0x7F, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ }; -/* Voltage Swing Programming for VccIO 1.05V for DP */ -static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_dp_hdmi_1_05V[] = { - /* Voltage mV db */ - { 0x2, 0x98, 0x0018 }, /* 400 0.0 */ - { 0x2, 0x98, 0x3015 }, /* 400 3.5 */ - { 0x2, 0x98, 0x6012 }, /* 400 6.0 */ - { 0x2, 0x98, 0x900F }, /* 400 9.5 */ - { 0x4, 0x98, 0x0018 }, /* 600 0.0 */ - { 0x4, 0x98, 0x3015 }, /* 600 3.5 */ - { 0x4, 0x98, 0x6012 }, /* 600 6.0 */ - { 0x5, 0x71, 0x0018 }, /* 800 0.0 */ - { 0x5, 0x71, 0x3015 }, /* 800 3.5 */ - { 0x6, 0x98, 0x0018 }, /* 1200 0.0 */ +static const struct cnl_ddi_buf_trans icl_combo_phy_ddi_translations_edp_hbr3[] = { + /* NT mV Trans mV db */ + { 0xA, 0x35, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ + { 0xA, 0x4F, 0x37, 0x00, 0x08 }, /* 350 500 3.1 */ + { 0xC, 0x71, 0x2F, 0x00, 0x10 }, /* 350 700 6.0 */ + { 0x6, 0x7F, 0x2B, 0x00, 0x14 }, /* 350 900 8.2 */ + { 0xA, 0x4C, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ + { 0xC, 0x73, 0x34, 0x00, 0x0B }, /* 500 700 2.9 */ + { 0x6, 0x7F, 0x2F, 0x00, 0x10 }, /* 500 900 5.1 */ + { 0xC, 0x6C, 0x3C, 0x00, 0x03 }, /* 650 700 0.6 */ + { 0x6, 0x7F, 0x35, 0x00, 0x0A }, /* 600 900 3.5 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ }; -/* FIXME - After table is updated in Bspec */ -/* Voltage Swing Programming for VccIO 1.05V for eDP */ -static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_edp_1_05V[] = { - /* Voltage mV db */ - { 0x0, 0x00, 0x00 }, /* 200 0.0 */ - { 0x0, 0x00, 0x00 }, /* 200 1.5 */ - { 0x0, 0x00, 0x00 }, /* 200 4.0 */ - { 0x0, 0x00, 0x00 }, /* 200 6.0 */ - { 0x0, 0x00, 0x00 }, /* 250 0.0 */ - { 0x0, 0x00, 0x00 }, /* 250 1.5 */ - { 0x0, 0x00, 0x00 }, /* 250 4.0 */ - { 0x0, 0x00, 0x00 }, /* 300 0.0 */ - { 0x0, 0x00, 0x00 }, /* 300 1.5 */ - { 0x0, 0x00, 0x00 }, /* 350 0.0 */ +static const struct cnl_ddi_buf_trans icl_combo_phy_ddi_translations_hdmi[] = { + /* NT mV Trans mV db */ + { 0xA, 0x60, 0x3F, 0x00, 0x00 }, /* 450 450 0.0 */ + { 0xB, 0x73, 0x36, 0x00, 0x09 }, /* 450 650 3.2 */ + { 0x6, 0x7F, 0x31, 0x00, 0x0E }, /* 450 850 5.5 */ + { 0xB, 0x73, 0x3F, 0x00, 0x00 }, /* 650 650 0.0 ALS */ + { 0x6, 0x7F, 0x37, 0x00, 0x08 }, /* 650 850 2.3 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 850 850 0.0 */ + { 0x6, 0x7F, 0x35, 0x00, 0x0A }, /* 600 850 3.0 */ }; struct icl_mg_phy_ddi_buf_trans { @@ -871,43 +826,23 @@ cnl_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) } } -static const struct icl_combo_phy_ddi_buf_trans * +static const struct cnl_ddi_buf_trans * icl_get_combo_buf_trans(struct drm_i915_private *dev_priv, enum port port, - int type, int *n_entries) + int type, int rate, int *n_entries) { - u32 voltage = I915_READ(ICL_PORT_COMP_DW3(port)) & VOLTAGE_INFO_MASK; - - if (type == INTEL_OUTPUT_EDP && dev_priv->vbt.edp.low_vswing) { - switch (voltage) { - case VOLTAGE_INFO_0_85V: - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_0_85V); - return icl_combo_phy_ddi_translations_edp_0_85V; - case VOLTAGE_INFO_0_95V: - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_0_95V); - return icl_combo_phy_ddi_translations_edp_0_95V; - case VOLTAGE_INFO_1_05V: - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_1_05V); - return icl_combo_phy_ddi_translations_edp_1_05V; - default: - MISSING_CASE(voltage); - return NULL; - } - } else { - switch (voltage) { - case VOLTAGE_INFO_0_85V: - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_0_85V); - return icl_combo_phy_ddi_translations_dp_hdmi_0_85V; - case VOLTAGE_INFO_0_95V: - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_0_95V); - return icl_combo_phy_ddi_translations_dp_hdmi_0_95V; - case VOLTAGE_INFO_1_05V: - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_1_05V); - return icl_combo_phy_ddi_translations_dp_hdmi_1_05V; - default: - MISSING_CASE(voltage); - return NULL; - } + if (type == INTEL_OUTPUT_HDMI) { + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_hdmi); + return icl_combo_phy_ddi_translations_hdmi; + } else if (rate > 540000 && type == INTEL_OUTPUT_EDP) { + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_hbr3); + return icl_combo_phy_ddi_translations_edp_hbr3; + } else if (type == INTEL_OUTPUT_EDP && dev_priv->vbt.edp.low_vswing) { + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_hbr2); + return icl_combo_phy_ddi_translations_edp_hbr2; } + + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hbr2); + return icl_combo_phy_ddi_translations_dp_hbr2; } static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port port) @@ -918,8 +853,8 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por if (IS_ICELAKE(dev_priv)) { if (intel_port_is_combophy(dev_priv, port)) - icl_get_combo_buf_trans(dev_priv, port, - INTEL_OUTPUT_HDMI, &n_entries); + icl_get_combo_buf_trans(dev_priv, port, INTEL_OUTPUT_HDMI, + 0, &n_entries); else n_entries = ARRAY_SIZE(icl_mg_phy_ddi_translations); default_entry = n_entries - 1; @@ -1086,7 +1021,7 @@ static uint32_t icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder, return DDI_CLK_SEL_TBT_810; default: MISSING_CASE(clock); - break; + return DDI_CLK_SEL_NONE; } case DPLL_ID_ICL_MGPLL1: case DPLL_ID_ICL_MGPLL2: @@ -2275,13 +2210,14 @@ static void bxt_ddi_vswing_sequence(struct intel_encoder *encoder, u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); enum port port = encoder->port; int n_entries; if (IS_ICELAKE(dev_priv)) { if (intel_port_is_combophy(dev_priv, port)) icl_get_combo_buf_trans(dev_priv, port, encoder->type, - &n_entries); + intel_dp->link_rate, &n_entries); else n_entries = ARRAY_SIZE(icl_mg_phy_ddi_translations); } else if (IS_CANNONLAKE(dev_priv)) { @@ -2462,14 +2398,15 @@ static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder, } static void icl_ddi_combo_vswing_program(struct drm_i915_private *dev_priv, - u32 level, enum port port, int type) + u32 level, enum port port, int type, + int rate) { - const struct icl_combo_phy_ddi_buf_trans *ddi_translations = NULL; + const struct cnl_ddi_buf_trans *ddi_translations = NULL; u32 n_entries, val; int ln; ddi_translations = icl_get_combo_buf_trans(dev_priv, port, type, - &n_entries); + rate, &n_entries); if (!ddi_translations) return; @@ -2478,34 +2415,23 @@ static void icl_ddi_combo_vswing_program(struct drm_i915_private *dev_priv, level = n_entries - 1; } - /* Set PORT_TX_DW5 Rterm Sel to 110b. */ + /* Set PORT_TX_DW5 */ val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); - val &= ~RTERM_SELECT_MASK; + val &= ~(SCALING_MODE_SEL_MASK | RTERM_SELECT_MASK | + TAP2_DISABLE | TAP3_DISABLE); + val |= SCALING_MODE_SEL(0x2); val |= RTERM_SELECT(0x6); - I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); - - /* Program PORT_TX_DW5 */ - val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); - /* Set DisableTap2 and DisableTap3 if MIPI DSI - * Clear DisableTap2 and DisableTap3 for all other Ports - */ - if (type == INTEL_OUTPUT_DSI) { - val |= TAP2_DISABLE; - val |= TAP3_DISABLE; - } else { - val &= ~TAP2_DISABLE; - val &= ~TAP3_DISABLE; - } + val |= TAP3_DISABLE; I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); /* Program PORT_TX_DW2 */ val = I915_READ(ICL_PORT_TX_DW2_LN0(port)); val &= ~(SWING_SEL_LOWER_MASK | SWING_SEL_UPPER_MASK | RCOMP_SCALAR_MASK); - val |= SWING_SEL_UPPER(ddi_translations[level].dw2_swing_select); - val |= SWING_SEL_LOWER(ddi_translations[level].dw2_swing_select); + val |= SWING_SEL_UPPER(ddi_translations[level].dw2_swing_sel); + val |= SWING_SEL_LOWER(ddi_translations[level].dw2_swing_sel); /* Program Rcomp scalar for every table entry */ - val |= RCOMP_SCALAR(ddi_translations[level].dw2_swing_scalar); + val |= RCOMP_SCALAR(0x98); I915_WRITE(ICL_PORT_TX_DW2_GRP(port), val); /* Program PORT_TX_DW4 */ @@ -2514,9 +2440,17 @@ static void icl_ddi_combo_vswing_program(struct drm_i915_private *dev_priv, val = I915_READ(ICL_PORT_TX_DW4_LN(port, ln)); val &= ~(POST_CURSOR_1_MASK | POST_CURSOR_2_MASK | CURSOR_COEFF_MASK); - val |= ddi_translations[level].dw4_scaling; + val |= POST_CURSOR_1(ddi_translations[level].dw4_post_cursor_1); + val |= POST_CURSOR_2(ddi_translations[level].dw4_post_cursor_2); + val |= CURSOR_COEFF(ddi_translations[level].dw4_cursor_coeff); I915_WRITE(ICL_PORT_TX_DW4_LN(port, ln), val); } + + /* Program PORT_TX_DW7 */ + val = I915_READ(ICL_PORT_TX_DW7_LN0(port)); + val &= ~N_SCALAR_MASK; + val |= N_SCALAR(ddi_translations[level].dw7_n_scalar); + I915_WRITE(ICL_PORT_TX_DW7_GRP(port), val); } static void icl_combo_phy_ddi_vswing_sequence(struct intel_encoder *encoder, @@ -2581,7 +2515,7 @@ static void icl_combo_phy_ddi_vswing_sequence(struct intel_encoder *encoder, I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); /* 5. Program swing and de-emphasis */ - icl_ddi_combo_vswing_program(dev_priv, level, port, type); + icl_ddi_combo_vswing_program(dev_priv, level, port, type, rate); /* 6. Set training enable to trigger update */ val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3da9c0f9e948..248128126422 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15415,16 +15415,45 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, } } +static bool has_bogus_dpll_config(const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + + /* + * Some SNB BIOSen (eg. ASUS K53SV) are known to misprogram + * the hardware when a high res displays plugged in. DPLL P + * divider is zero, and the pipe timings are bonkers. We'll + * try to disable everything in that case. + * + * FIXME would be nice to be able to sanitize this state + * without several WARNs, but for now let's take the easy + * road. + */ + return IS_GEN6(dev_priv) && + crtc_state->base.active && + crtc_state->shared_dpll && + crtc_state->port_clock == 0; +} + static void intel_sanitize_encoder(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_connector *connector; + struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); + struct intel_crtc_state *crtc_state = crtc ? + to_intel_crtc_state(crtc->base.state) : NULL; /* We need to check both for a crtc link (meaning that the * encoder is active and trying to read from a pipe) and the * pipe itself being active. */ - bool has_active_crtc = encoder->base.crtc && - to_intel_crtc(encoder->base.crtc)->active; + bool has_active_crtc = crtc_state && + crtc_state->base.active; + + if (crtc_state && has_bogus_dpll_config(crtc_state)) { + DRM_DEBUG_KMS("BIOS has misprogrammed the hardware. Disabling pipe %c\n", + pipe_name(crtc->pipe)); + has_active_crtc = false; + } connector = intel_encoder_find_connector(encoder); if (connector && !has_active_crtc) { @@ -15435,16 +15464,25 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder) /* Connector is active, but has no active pipe. This is * fallout from our resume register restoring. Disable * the encoder manually again. */ - if (encoder->base.crtc) { - struct drm_crtc_state *crtc_state = encoder->base.crtc->state; + if (crtc_state) { + struct drm_encoder *best_encoder; DRM_DEBUG_KMS("[ENCODER:%d:%s] manually disabled\n", encoder->base.base.id, encoder->base.name); + + /* avoid oopsing in case the hooks consult best_encoder */ + best_encoder = connector->base.state->best_encoder; + connector->base.state->best_encoder = &encoder->base; + if (encoder->disable) - encoder->disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state); + encoder->disable(encoder, crtc_state, + connector->base.state); if (encoder->post_disable) - encoder->post_disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state); + encoder->post_disable(encoder, crtc_state, + connector->base.state); + + connector->base.state->best_encoder = best_encoder; } encoder->base.crtc = NULL; diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index fdd2cbc56fa3..22a74608c6e4 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -304,9 +304,11 @@ static int cnl_max_source_rate(struct intel_dp *intel_dp) static int icl_max_source_rate(struct intel_dp *intel_dp) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); enum port port = dig_port->base.port; - if (port == PORT_B) + if (intel_port_is_combophy(dev_priv, port) && + !intel_dp_is_edp(intel_dp)) return 540000; return 810000; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index f94a04b4ad87..e9ddeaf05a14 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -209,6 +209,16 @@ struct intel_fbdev { unsigned long vma_flags; async_cookie_t cookie; int preferred_bpp; + + /* Whether or not fbdev hpd processing is temporarily suspended */ + bool hpd_suspended : 1; + /* Set when a hotplug was received while HPD processing was + * suspended + */ + bool hpd_waiting : 1; + + /* Protects hpd_suspended */ + struct mutex hpd_lock; }; struct intel_encoder { diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index fb5bb5b32a60..7f365ac0b549 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -679,6 +679,7 @@ int intel_fbdev_init(struct drm_device *dev) if (ifbdev == NULL) return -ENOMEM; + mutex_init(&ifbdev->hpd_lock); drm_fb_helper_prepare(dev, &ifbdev->helper, &intel_fb_helper_funcs); if (!intel_fbdev_init_bios(dev, ifbdev)) @@ -752,6 +753,26 @@ void intel_fbdev_fini(struct drm_i915_private *dev_priv) intel_fbdev_destroy(ifbdev); } +/* Suspends/resumes fbdev processing of incoming HPD events. When resuming HPD + * processing, fbdev will perform a full connector reprobe if a hotplug event + * was received while HPD was suspended. + */ +static void intel_fbdev_hpd_set_suspend(struct intel_fbdev *ifbdev, int state) +{ + bool send_hpd = false; + + mutex_lock(&ifbdev->hpd_lock); + ifbdev->hpd_suspended = state == FBINFO_STATE_SUSPENDED; + send_hpd = !ifbdev->hpd_suspended && ifbdev->hpd_waiting; + ifbdev->hpd_waiting = false; + mutex_unlock(&ifbdev->hpd_lock); + + if (send_hpd) { + DRM_DEBUG_KMS("Handling delayed fbcon HPD event\n"); + drm_fb_helper_hotplug_event(&ifbdev->helper); + } +} + void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -773,6 +794,7 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous */ if (state != FBINFO_STATE_RUNNING) flush_work(&dev_priv->fbdev_suspend_work); + console_lock(); } else { /* @@ -800,17 +822,26 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous drm_fb_helper_set_suspend(&ifbdev->helper, state); console_unlock(); + + intel_fbdev_hpd_set_suspend(ifbdev, state); } void intel_fbdev_output_poll_changed(struct drm_device *dev) { struct intel_fbdev *ifbdev = to_i915(dev)->fbdev; + bool send_hpd; if (!ifbdev) return; intel_fbdev_sync(ifbdev); - if (ifbdev->vma || ifbdev->helper.deferred_setup) + + mutex_lock(&ifbdev->hpd_lock); + send_hpd = !ifbdev->hpd_suspended; + ifbdev->hpd_waiting = true; + mutex_unlock(&ifbdev->hpd_lock); + + if (send_hpd && (ifbdev->vma || ifbdev->helper.deferred_setup)) drm_fb_helper_hotplug_event(&ifbdev->helper); } diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index b8f106d9ecf8..3ac20153705a 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -55,7 +55,12 @@ struct opregion_header { u8 signature[16]; u32 size; - u32 opregion_ver; + struct { + u8 rsvd; + u8 revision; + u8 minor; + u8 major; + } __packed over; u8 bios_ver[32]; u8 vbios_ver[16]; u8 driver_ver[16]; @@ -119,7 +124,8 @@ struct opregion_asle { u64 fdss; u32 fdsp; u32 stat; - u64 rvda; /* Physical address of raw vbt data */ + u64 rvda; /* Physical (2.0) or relative from opregion (2.1+) + * address of raw VBT data. */ u32 rvds; /* Size of raw vbt data */ u8 rsvd[58]; } __packed; @@ -925,6 +931,11 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) opregion->header = base; opregion->lid_state = base + ACPI_CLID; + DRM_DEBUG_DRIVER("ACPI OpRegion version %u.%u.%u\n", + opregion->header->over.major, + opregion->header->over.minor, + opregion->header->over.revision); + mboxes = opregion->header->mboxes; if (mboxes & MBOX_ACPI) { DRM_DEBUG_DRIVER("Public ACPI methods supported\n"); @@ -953,11 +964,26 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) if (dmi_check_system(intel_no_opregion_vbt)) goto out; - if (opregion->header->opregion_ver >= 2 && opregion->asle && + if (opregion->header->over.major >= 2 && opregion->asle && opregion->asle->rvda && opregion->asle->rvds) { - opregion->rvda = memremap(opregion->asle->rvda, - opregion->asle->rvds, + resource_size_t rvda = opregion->asle->rvda; + + /* + * opregion 2.0: rvda is the physical VBT address. + * + * opregion 2.1+: rvda is unsigned, relative offset from + * opregion base, and should never point within opregion. + */ + if (opregion->header->over.major > 2 || + opregion->header->over.minor >= 1) { + WARN_ON(rvda < OPREGION_SIZE); + + rvda += asls; + } + + opregion->rvda = memremap(rvda, opregion->asle->rvds, MEMREMAP_WB); + vbt = opregion->rvda; vbt_size = opregion->asle->rvds; if (intel_bios_is_valid_vbt(vbt, vbt_size)) { @@ -967,6 +993,8 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) goto out; } else { DRM_DEBUG_KMS("Invalid VBT in ACPI OpRegion (RVDA)\n"); + memunmap(opregion->rvda); + opregion->rvda = NULL; } } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 72edaa7ff411..a1a7cc29fdd1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -415,16 +415,17 @@ struct intel_engine_cs { /** * @enable_count: Reference count for the enabled samplers. * - * Index number corresponds to the bit number from @enable. + * Index number corresponds to @enum drm_i915_pmu_engine_sample. */ - unsigned int enable_count[I915_PMU_SAMPLE_BITS]; + unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT]; /** * @sample: Counter values for sampling events. * * Our internal timer stores the current counters in this field. + * + * Index number corresponds to @enum drm_i915_pmu_engine_sample. */ -#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1) - struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX]; + struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT]; } pmu; /* diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index d2e003d8f3db..5170a0f5fe7b 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -494,7 +494,7 @@ skl_program_plane(struct intel_plane *plane, keymax = (key->max_value & 0xffffff) | PLANE_KEYMAX_ALPHA(alpha); - keymsk = key->channel_mask & 0x3ffffff; + keymsk = key->channel_mask & 0x7ffffff; if (alpha < 0xff) keymsk |= PLANE_KEYMSK_ALPHA_ENABLE; diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c index 2c5bbe317353..e31e263cf86b 100644 --- a/drivers/gpu/drm/imx/imx-ldb.c +++ b/drivers/gpu/drm/imx/imx-ldb.c @@ -643,8 +643,10 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data) int bus_format; ret = of_property_read_u32(child, "reg", &i); - if (ret || i < 0 || i > 1) - return -EINVAL; + if (ret || i < 0 || i > 1) { + ret = -EINVAL; + goto free_child; + } if (!of_device_is_available(child)) continue; @@ -657,7 +659,6 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data) channel = &imx_ldb->channel[i]; channel->ldb = imx_ldb; channel->chno = i; - channel->child = child; /* * The output port is port@4 with an external 4-port mux or @@ -667,13 +668,13 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data) imx_ldb->lvds_mux ? 4 : 2, 0, &channel->panel, &channel->bridge); if (ret && ret != -ENODEV) - return ret; + goto free_child; /* panel ddc only if there is no bridge */ if (!channel->bridge) { ret = imx_ldb_panel_ddc(dev, channel, child); if (ret) - return ret; + goto free_child; } bus_format = of_get_bus_format(dev, child); @@ -689,18 +690,26 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data) if (bus_format < 0) { dev_err(dev, "could not determine data mapping: %d\n", bus_format); - return bus_format; + ret = bus_format; + goto free_child; } channel->bus_format = bus_format; + channel->child = child; ret = imx_ldb_register(drm, channel); - if (ret) - return ret; + if (ret) { + channel->child = NULL; + goto free_child; + } } dev_set_drvdata(dev, imx_ldb); return 0; + +free_child: + of_node_put(child); + return ret; } static void imx_ldb_unbind(struct device *dev, struct device *master, diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index c390924de93d..21e964f6ab5c 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -370,9 +370,9 @@ static int ipu_plane_atomic_check(struct drm_plane *plane, if (ret) return ret; - /* CRTC should be enabled */ + /* nothing to check when disabling or disabled */ if (!crtc_state->enable) - return -EINVAL; + return 0; switch (plane->type) { case DRM_PLANE_TYPE_PRIMARY: diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c index 00a9c2ab9e6c..64fb788b6647 100644 --- a/drivers/gpu/drm/omapdrm/dss/dsi.c +++ b/drivers/gpu/drm/omapdrm/dss/dsi.c @@ -1406,7 +1406,7 @@ static void dsi_pll_disable(struct dss_pll *pll) static int dsi_dump_dsi_clocks(struct seq_file *s, void *p) { - struct dsi_data *dsi = p; + struct dsi_data *dsi = s->private; struct dss_pll_clock_info *cinfo = &dsi->pll.cinfo; enum dss_clk_source dispc_clk_src, dsi_clk_src; int dsi_module = dsi->module_id; @@ -1467,7 +1467,7 @@ static int dsi_dump_dsi_clocks(struct seq_file *s, void *p) #ifdef CONFIG_OMAP2_DSS_COLLECT_IRQ_STATS static int dsi_dump_dsi_irqs(struct seq_file *s, void *p) { - struct dsi_data *dsi = p; + struct dsi_data *dsi = s->private; unsigned long flags; struct dsi_irq_stats stats; @@ -1558,7 +1558,7 @@ static int dsi_dump_dsi_irqs(struct seq_file *s, void *p) static int dsi_dump_dsi_regs(struct seq_file *s, void *p) { - struct dsi_data *dsi = p; + struct dsi_data *dsi = s->private; if (dsi_runtime_get(dsi)) return 0; @@ -4751,6 +4751,17 @@ static int dsi_set_config(struct omap_dss_device *dssdev, dsi->vm.flags |= DISPLAY_FLAGS_HSYNC_HIGH; dsi->vm.flags &= ~DISPLAY_FLAGS_VSYNC_LOW; dsi->vm.flags |= DISPLAY_FLAGS_VSYNC_HIGH; + /* + * HACK: These flags should be handled through the omap_dss_device bus + * flags, but this will only be possible when the DSI encoder will be + * converted to the omapdrm-managed encoder model. + */ + dsi->vm.flags &= ~DISPLAY_FLAGS_PIXDATA_NEGEDGE; + dsi->vm.flags |= DISPLAY_FLAGS_PIXDATA_POSEDGE; + dsi->vm.flags &= ~DISPLAY_FLAGS_DE_LOW; + dsi->vm.flags |= DISPLAY_FLAGS_DE_HIGH; + dsi->vm.flags &= ~DISPLAY_FLAGS_SYNC_POSEDGE; + dsi->vm.flags |= DISPLAY_FLAGS_SYNC_NEGEDGE; dss_mgr_set_timings(&dsi->output, &dsi->vm); @@ -5083,15 +5094,15 @@ static int dsi_bind(struct device *dev, struct device *master, void *data) snprintf(name, sizeof(name), "dsi%u_regs", dsi->module_id + 1); dsi->debugfs.regs = dss_debugfs_create_file(dss, name, - dsi_dump_dsi_regs, &dsi); + dsi_dump_dsi_regs, dsi); #ifdef CONFIG_OMAP2_DSS_COLLECT_IRQ_STATS snprintf(name, sizeof(name), "dsi%u_irqs", dsi->module_id + 1); dsi->debugfs.irqs = dss_debugfs_create_file(dss, name, - dsi_dump_dsi_irqs, &dsi); + dsi_dump_dsi_irqs, dsi); #endif snprintf(name, sizeof(name), "dsi%u_clks", dsi->module_id + 1); dsi->debugfs.clks = dss_debugfs_create_file(dss, name, - dsi_dump_dsi_clocks, &dsi); + dsi_dump_dsi_clocks, dsi); return 0; } @@ -5104,8 +5115,6 @@ static void dsi_unbind(struct device *dev, struct device *master, void *data) dss_debugfs_remove_file(dsi->debugfs.irqs); dss_debugfs_remove_file(dsi->debugfs.regs); - of_platform_depopulate(dev); - WARN_ON(dsi->scp_clk_refcount > 0); dss_pll_unregister(&dsi->pll); @@ -5457,6 +5466,8 @@ static int dsi_remove(struct platform_device *pdev) dsi_uninit_output(dsi); + of_platform_depopulate(&pdev->dev); + pm_runtime_disable(&pdev->dev); if (dsi->vdds_dsi_reg != NULL && dsi->vdds_dsi_enabled) { diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index d587779a80b4..a97294ac96d5 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -5676,7 +5676,7 @@ int ci_dpm_init(struct radeon_device *rdev) u16 data_offset, size; u8 frev, crev; struct ci_power_info *pi; - enum pci_bus_speed speed_cap; + enum pci_bus_speed speed_cap = PCI_SPEED_UNKNOWN; struct pci_dev *root = rdev->pdev->bus->self; int ret; @@ -5685,7 +5685,8 @@ int ci_dpm_init(struct radeon_device *rdev) return -ENOMEM; rdev->pm.dpm.priv = pi; - speed_cap = pcie_get_speed_cap(root); + if (!pci_is_root_bus(rdev->pdev->bus)) + speed_cap = pcie_get_speed_cap(root); if (speed_cap == PCI_SPEED_UNKNOWN) { pi->sys_pcie_mask = 0; } else { diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 8fb60b3af015..0a785ef0ab66 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -6899,7 +6899,7 @@ int si_dpm_init(struct radeon_device *rdev) struct ni_power_info *ni_pi; struct si_power_info *si_pi; struct atom_clock_dividers dividers; - enum pci_bus_speed speed_cap; + enum pci_bus_speed speed_cap = PCI_SPEED_UNKNOWN; struct pci_dev *root = rdev->pdev->bus->self; int ret; @@ -6911,7 +6911,8 @@ int si_dpm_init(struct radeon_device *rdev) eg_pi = &ni_pi->eg; pi = &eg_pi->rv7xx; - speed_cap = pcie_get_speed_cap(root); + if (!pci_is_root_bus(rdev->pdev->bus)) + speed_cap = pcie_get_speed_cap(root); if (speed_cap == PCI_SPEED_UNKNOWN) { si_pi->sys_pcie_mask = 0; } else { diff --git a/drivers/gpu/drm/rockchip/rockchip_rgb.c b/drivers/gpu/drm/rockchip/rockchip_rgb.c index 37f93022a106..c0351abf83a3 100644 --- a/drivers/gpu/drm/rockchip/rockchip_rgb.c +++ b/drivers/gpu/drm/rockchip/rockchip_rgb.c @@ -1,17 +1,8 @@ -//SPDX-License-Identifier: GPL-2.0+ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd * Author: * Sandy Huang <hjc@rock-chips.com> - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include <drm/drmP.h> diff --git a/drivers/gpu/drm/rockchip/rockchip_rgb.h b/drivers/gpu/drm/rockchip/rockchip_rgb.h index 38b52e63b2b0..27b9635124bc 100644 --- a/drivers/gpu/drm/rockchip/rockchip_rgb.h +++ b/drivers/gpu/drm/rockchip/rockchip_rgb.h @@ -1,17 +1,8 @@ -//SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd * Author: * Sandy Huang <hjc@rock-chips.com> - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #ifdef CONFIG_ROCKCHIP_RGB diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 4463d3826ecb..e2942c9a11a7 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -440,13 +440,10 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) while ((entity->dependency = sched->ops->dependency(sched_job, entity))) { + trace_drm_sched_job_wait_dep(sched_job, entity->dependency); - if (drm_sched_entity_add_dependency_cb(entity)) { - - trace_drm_sched_job_wait_dep(sched_job, - entity->dependency); + if (drm_sched_entity_add_dependency_cb(entity)) return NULL; - } } /* skip jobs from entity that marked guilty */ diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index 0420f5c978b9..cf45d0f940f9 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -761,6 +761,7 @@ static int sun4i_tcon_init_clocks(struct device *dev, return PTR_ERR(tcon->sclk0); } } + clk_prepare_enable(tcon->sclk0); if (tcon->quirks->has_channel_1) { tcon->sclk1 = devm_clk_get(dev, "tcon-ch1"); @@ -775,6 +776,7 @@ static int sun4i_tcon_init_clocks(struct device *dev, static void sun4i_tcon_free_clocks(struct sun4i_tcon *tcon) { + clk_disable_unprepare(tcon->sclk0); clk_disable_unprepare(tcon->clk); } diff --git a/drivers/gpu/drm/vkms/vkms_crc.c b/drivers/gpu/drm/vkms/vkms_crc.c index 9d9e8146db90..d7b409a3c0f8 100644 --- a/drivers/gpu/drm/vkms/vkms_crc.c +++ b/drivers/gpu/drm/vkms/vkms_crc.c @@ -1,4 +1,5 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0+ + #include "vkms_drv.h" #include <linux/crc32.h> #include <drm/drm_atomic.h> diff --git a/drivers/gpu/drm/vkms/vkms_crtc.c b/drivers/gpu/drm/vkms/vkms_crtc.c index 177bbcb38306..eb56ee893761 100644 --- a/drivers/gpu/drm/vkms/vkms_crtc.c +++ b/drivers/gpu/drm/vkms/vkms_crtc.c @@ -1,10 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ +// SPDX-License-Identifier: GPL-2.0+ #include "vkms_drv.h" #include <drm/drm_atomic_helper.h> diff --git a/drivers/gpu/drm/vkms/vkms_drv.c b/drivers/gpu/drm/vkms/vkms_drv.c index 83087877565c..7dcbecb5fac2 100644 --- a/drivers/gpu/drm/vkms/vkms_drv.c +++ b/drivers/gpu/drm/vkms/vkms_drv.c @@ -1,9 +1,4 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ +// SPDX-License-Identifier: GPL-2.0+ /** * DOC: vkms (Virtual Kernel Modesetting) diff --git a/drivers/gpu/drm/vkms/vkms_drv.h b/drivers/gpu/drm/vkms/vkms_drv.h index e4469cd3d254..81f1cfbeb936 100644 --- a/drivers/gpu/drm/vkms/vkms_drv.h +++ b/drivers/gpu/drm/vkms/vkms_drv.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + #ifndef _VKMS_DRV_H_ #define _VKMS_DRV_H_ diff --git a/drivers/gpu/drm/vkms/vkms_gem.c b/drivers/gpu/drm/vkms/vkms_gem.c index 80311daed47a..138b0bb325cf 100644 --- a/drivers/gpu/drm/vkms/vkms_gem.c +++ b/drivers/gpu/drm/vkms/vkms_gem.c @@ -1,10 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ +// SPDX-License-Identifier: GPL-2.0+ #include <linux/shmem_fs.h> diff --git a/drivers/gpu/drm/vkms/vkms_output.c b/drivers/gpu/drm/vkms/vkms_output.c index 271a0eb9042c..4173e4f48334 100644 --- a/drivers/gpu/drm/vkms/vkms_output.c +++ b/drivers/gpu/drm/vkms/vkms_output.c @@ -1,10 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ +// SPDX-License-Identifier: GPL-2.0+ #include "vkms_drv.h" #include <drm/drm_crtc_helper.h> diff --git a/drivers/gpu/drm/vkms/vkms_plane.c b/drivers/gpu/drm/vkms/vkms_plane.c index 418817600ad1..0e67d2d42f0c 100644 --- a/drivers/gpu/drm/vkms/vkms_plane.c +++ b/drivers/gpu/drm/vkms/vkms_plane.c @@ -1,10 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ +// SPDX-License-Identifier: GPL-2.0+ #include "vkms_drv.h" #include <drm/drm_plane_helper.h> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 25afb1d594e3..7ef5dcb06104 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -26,6 +26,7 @@ **************************************************************************/ #include <linux/module.h> #include <linux/console.h> +#include <linux/dma-mapping.h> #include <drm/drmP.h> #include "vmwgfx_drv.h" @@ -34,7 +35,6 @@ #include <drm/ttm/ttm_placement.h> #include <drm/ttm/ttm_bo_driver.h> #include <drm/ttm/ttm_module.h> -#include <linux/intel-iommu.h> #define VMWGFX_DRIVER_DESC "Linux drm driver for VMware graphics devices" #define VMWGFX_CHIP_SVGAII 0 @@ -546,6 +546,21 @@ static void vmw_get_initial_size(struct vmw_private *dev_priv) } /** + * vmw_assume_iommu - Figure out whether coherent dma-remapping might be + * taking place. + * @dev: Pointer to the struct drm_device. + * + * Return: true if iommu present, false otherwise. + */ +static bool vmw_assume_iommu(struct drm_device *dev) +{ + const struct dma_map_ops *ops = get_dma_ops(dev->dev); + + return !dma_is_direct(ops) && ops && + ops->map_page != dma_direct_map_page; +} + +/** * vmw_dma_select_mode - Determine how DMA mappings should be set up for this * system. * @@ -565,55 +580,27 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv) [vmw_dma_alloc_coherent] = "Using coherent TTM pages.", [vmw_dma_map_populate] = "Keeping DMA mappings.", [vmw_dma_map_bind] = "Giving up DMA mappings early."}; -#ifdef CONFIG_X86 - const struct dma_map_ops *dma_ops = get_dma_ops(dev_priv->dev->dev); -#ifdef CONFIG_INTEL_IOMMU - if (intel_iommu_enabled) { + if (vmw_force_coherent) + dev_priv->map_mode = vmw_dma_alloc_coherent; + else if (vmw_assume_iommu(dev_priv->dev)) dev_priv->map_mode = vmw_dma_map_populate; - goto out_fixup; - } -#endif - - if (!(vmw_force_iommu || vmw_force_coherent)) { + else if (!vmw_force_iommu) dev_priv->map_mode = vmw_dma_phys; - DRM_INFO("DMA map mode: %s\n", names[dev_priv->map_mode]); - return 0; - } - - dev_priv->map_mode = vmw_dma_map_populate; - - if (dma_ops && dma_ops->sync_single_for_cpu) + else if (IS_ENABLED(CONFIG_SWIOTLB) && swiotlb_nr_tbl()) dev_priv->map_mode = vmw_dma_alloc_coherent; -#ifdef CONFIG_SWIOTLB - if (swiotlb_nr_tbl() == 0) + else dev_priv->map_mode = vmw_dma_map_populate; -#endif -#ifdef CONFIG_INTEL_IOMMU -out_fixup: -#endif - if (dev_priv->map_mode == vmw_dma_map_populate && - vmw_restrict_iommu) + if (dev_priv->map_mode == vmw_dma_map_populate && vmw_restrict_iommu) dev_priv->map_mode = vmw_dma_map_bind; - if (vmw_force_coherent) - dev_priv->map_mode = vmw_dma_alloc_coherent; - -#if !defined(CONFIG_SWIOTLB) && !defined(CONFIG_INTEL_IOMMU) - /* - * No coherent page pool - */ - if (dev_priv->map_mode == vmw_dma_alloc_coherent) + /* No TTM coherent page pool? FIXME: Ask TTM instead! */ + if (!(IS_ENABLED(CONFIG_SWIOTLB) || IS_ENABLED(CONFIG_INTEL_IOMMU)) && + (dev_priv->map_mode == vmw_dma_alloc_coherent)) return -EINVAL; -#endif - -#else /* CONFIG_X86 */ - dev_priv->map_mode = vmw_dma_map_populate; -#endif /* CONFIG_X86 */ DRM_INFO("DMA map mode: %s\n", names[dev_priv->map_mode]); - return 0; } @@ -625,24 +612,20 @@ out_fixup: * With 32-bit we can only handle 32 bit PFNs. Optionally set that * restriction also for 64-bit systems. */ -#ifdef CONFIG_INTEL_IOMMU static int vmw_dma_masks(struct vmw_private *dev_priv) { struct drm_device *dev = dev_priv->dev; + int ret = 0; - if (intel_iommu_enabled && + ret = dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)); + if (dev_priv->map_mode != vmw_dma_phys && (sizeof(unsigned long) == 4 || vmw_restrict_dma_mask)) { DRM_INFO("Restricting DMA addresses to 44 bits.\n"); - return dma_set_mask(dev->dev, DMA_BIT_MASK(44)); + return dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(44)); } - return 0; -} -#else -static int vmw_dma_masks(struct vmw_private *dev_priv) -{ - return 0; + + return ret; } -#endif static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index f2d13a72c05d..88b8178d4687 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -3570,7 +3570,7 @@ int vmw_execbuf_fence_commands(struct drm_file *file_priv, *p_fence = NULL; } - return 0; + return ret; } /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index b351fb5214d3..ed2f67822f45 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -1646,7 +1646,7 @@ static int vmw_kms_check_topology(struct drm_device *dev, struct drm_connector_state *conn_state; struct vmw_connector_state *vmw_conn_state; - if (!du->pref_active) { + if (!du->pref_active && new_crtc_state->enable) { ret = -EINVAL; goto clean; } @@ -2554,8 +2554,8 @@ void vmw_kms_helper_validation_finish(struct vmw_private *dev_priv, user_fence_rep) { struct vmw_fence_obj *fence = NULL; - uint32_t handle; - int ret; + uint32_t handle = 0; + int ret = 0; if (file_priv || user_fence_rep || vmw_validation_has_bos(ctx) || out_fence) diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 474b00e19697..0a7d4395d427 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -898,8 +898,8 @@ static struct ipu_devtype ipu_type_imx51 = { .cpmem_ofs = 0x1f000000, .srm_ofs = 0x1f040000, .tpm_ofs = 0x1f060000, - .csi0_ofs = 0x1f030000, - .csi1_ofs = 0x1f038000, + .csi0_ofs = 0x1e030000, + .csi1_ofs = 0x1e038000, .ic_ofs = 0x1e020000, .disp0_ofs = 0x1e040000, .disp1_ofs = 0x1e048000, @@ -914,8 +914,8 @@ static struct ipu_devtype ipu_type_imx53 = { .cpmem_ofs = 0x07000000, .srm_ofs = 0x07040000, .tpm_ofs = 0x07060000, - .csi0_ofs = 0x07030000, - .csi1_ofs = 0x07038000, + .csi0_ofs = 0x06030000, + .csi1_ofs = 0x06038000, .ic_ofs = 0x06020000, .disp0_ofs = 0x06040000, .disp1_ofs = 0x06048000, diff --git a/drivers/gpu/ipu-v3/ipu-pre.c b/drivers/gpu/ipu-v3/ipu-pre.c index 2f8db9d62551..4a28f3fbb0a2 100644 --- a/drivers/gpu/ipu-v3/ipu-pre.c +++ b/drivers/gpu/ipu-v3/ipu-pre.c @@ -106,6 +106,7 @@ struct ipu_pre { void *buffer_virt; bool in_use; unsigned int safe_window_end; + unsigned int last_bufaddr; }; static DEFINE_MUTEX(ipu_pre_list_mutex); @@ -185,6 +186,7 @@ void ipu_pre_configure(struct ipu_pre *pre, unsigned int width, writel(bufaddr, pre->regs + IPU_PRE_CUR_BUF); writel(bufaddr, pre->regs + IPU_PRE_NEXT_BUF); + pre->last_bufaddr = bufaddr; val = IPU_PRE_PREF_ENG_CTRL_INPUT_PIXEL_FORMAT(0) | IPU_PRE_PREF_ENG_CTRL_INPUT_ACTIVE_BPP(active_bpp) | @@ -242,7 +244,11 @@ void ipu_pre_update(struct ipu_pre *pre, unsigned int bufaddr) unsigned short current_yblock; u32 val; + if (bufaddr == pre->last_bufaddr) + return; + writel(bufaddr, pre->regs + IPU_PRE_NEXT_BUF); + pre->last_bufaddr = bufaddr; do { if (time_after(jiffies, timeout)) { diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index c530476edba6..ac9fda1b5a72 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -30,6 +30,7 @@ #include <linux/debugfs.h> #include <linux/seq_file.h> +#include <linux/kfifo.h> #include <linux/sched/signal.h> #include <linux/export.h> #include <linux/slab.h> @@ -661,17 +662,12 @@ EXPORT_SYMBOL_GPL(hid_dump_device); /* enqueue string to 'events' ring buffer */ void hid_debug_event(struct hid_device *hdev, char *buf) { - unsigned i; struct hid_debug_list *list; unsigned long flags; spin_lock_irqsave(&hdev->debug_list_lock, flags); - list_for_each_entry(list, &hdev->debug_list, node) { - for (i = 0; buf[i]; i++) - list->hid_debug_buf[(list->tail + i) % HID_DEBUG_BUFSIZE] = - buf[i]; - list->tail = (list->tail + i) % HID_DEBUG_BUFSIZE; - } + list_for_each_entry(list, &hdev->debug_list, node) + kfifo_in(&list->hid_debug_fifo, buf, strlen(buf)); spin_unlock_irqrestore(&hdev->debug_list_lock, flags); wake_up_interruptible(&hdev->debug_wait); @@ -722,8 +718,7 @@ void hid_dump_input(struct hid_device *hdev, struct hid_usage *usage, __s32 valu hid_debug_event(hdev, buf); kfree(buf); - wake_up_interruptible(&hdev->debug_wait); - + wake_up_interruptible(&hdev->debug_wait); } EXPORT_SYMBOL_GPL(hid_dump_input); @@ -1083,8 +1078,8 @@ static int hid_debug_events_open(struct inode *inode, struct file *file) goto out; } - if (!(list->hid_debug_buf = kzalloc(HID_DEBUG_BUFSIZE, GFP_KERNEL))) { - err = -ENOMEM; + err = kfifo_alloc(&list->hid_debug_fifo, HID_DEBUG_FIFOSIZE, GFP_KERNEL); + if (err) { kfree(list); goto out; } @@ -1104,77 +1099,57 @@ static ssize_t hid_debug_events_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { struct hid_debug_list *list = file->private_data; - int ret = 0, len; + int ret = 0, copied; DECLARE_WAITQUEUE(wait, current); mutex_lock(&list->read_mutex); - while (ret == 0) { - if (list->head == list->tail) { - add_wait_queue(&list->hdev->debug_wait, &wait); - set_current_state(TASK_INTERRUPTIBLE); - - while (list->head == list->tail) { - if (file->f_flags & O_NONBLOCK) { - ret = -EAGAIN; - break; - } - if (signal_pending(current)) { - ret = -ERESTARTSYS; - break; - } + if (kfifo_is_empty(&list->hid_debug_fifo)) { + add_wait_queue(&list->hdev->debug_wait, &wait); + set_current_state(TASK_INTERRUPTIBLE); + + while (kfifo_is_empty(&list->hid_debug_fifo)) { + if (file->f_flags & O_NONBLOCK) { + ret = -EAGAIN; + break; + } - if (!list->hdev || !list->hdev->debug) { - ret = -EIO; - set_current_state(TASK_RUNNING); - goto out; - } + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } - /* allow O_NONBLOCK from other threads */ - mutex_unlock(&list->read_mutex); - schedule(); - mutex_lock(&list->read_mutex); - set_current_state(TASK_INTERRUPTIBLE); + /* if list->hdev is NULL we cannot remove_wait_queue(). + * if list->hdev->debug is 0 then hid_debug_unregister() + * was already called and list->hdev is being destroyed. + * if we add remove_wait_queue() here we can hit a race. + */ + if (!list->hdev || !list->hdev->debug) { + ret = -EIO; + set_current_state(TASK_RUNNING); + goto out; } - set_current_state(TASK_RUNNING); - remove_wait_queue(&list->hdev->debug_wait, &wait); + /* allow O_NONBLOCK from other threads */ + mutex_unlock(&list->read_mutex); + schedule(); + mutex_lock(&list->read_mutex); + set_current_state(TASK_INTERRUPTIBLE); } - if (ret) - goto out; + __set_current_state(TASK_RUNNING); + remove_wait_queue(&list->hdev->debug_wait, &wait); - /* pass the ringbuffer contents to userspace */ -copy_rest: - if (list->tail == list->head) + if (ret) goto out; - if (list->tail > list->head) { - len = list->tail - list->head; - if (len > count) - len = count; - - if (copy_to_user(buffer + ret, &list->hid_debug_buf[list->head], len)) { - ret = -EFAULT; - goto out; - } - ret += len; - list->head += len; - } else { - len = HID_DEBUG_BUFSIZE - list->head; - if (len > count) - len = count; - - if (copy_to_user(buffer, &list->hid_debug_buf[list->head], len)) { - ret = -EFAULT; - goto out; - } - list->head = 0; - ret += len; - count -= len; - if (count > 0) - goto copy_rest; - } - } + + /* pass the fifo content to userspace, locking is not needed with only + * one concurrent reader and one concurrent writer + */ + ret = kfifo_to_user(&list->hid_debug_fifo, buffer, count, &copied); + if (ret) + goto out; + ret = copied; out: mutex_unlock(&list->read_mutex); return ret; @@ -1185,7 +1160,7 @@ static __poll_t hid_debug_events_poll(struct file *file, poll_table *wait) struct hid_debug_list *list = file->private_data; poll_wait(file, &list->hdev->debug_wait, wait); - if (list->head != list->tail) + if (!kfifo_is_empty(&list->hid_debug_fifo)) return EPOLLIN | EPOLLRDNORM; if (!list->hdev->debug) return EPOLLERR | EPOLLHUP; @@ -1200,7 +1175,7 @@ static int hid_debug_events_release(struct inode *inode, struct file *file) spin_lock_irqsave(&list->hdev->debug_list_lock, flags); list_del(&list->node); spin_unlock_irqrestore(&list->hdev->debug_list_lock, flags); - kfree(list->hid_debug_buf); + kfifo_free(&list->hid_debug_fifo); kfree(list); return 0; @@ -1246,4 +1221,3 @@ void hid_debug_exit(void) { debugfs_remove_recursive(hid_debug_root); } - diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index 4adec4ab7d06..59ee01f3d022 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -3594,7 +3594,8 @@ nct6775_check_fan_inputs(struct nct6775_data *data) fan5pin |= cr1b & BIT(5); fan5pin |= creb & BIT(5); - fan6pin = creb & BIT(3); + fan6pin = !dsw_en && (cr2d & BIT(1)); + fan6pin |= creb & BIT(3); pwm5pin |= cr2d & BIT(7); pwm5pin |= (creb & BIT(4)) && !(cr2a & BIT(0)); diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index b1086bfb0465..cd9c65f3d404 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -1500,8 +1500,7 @@ static int omap_i2c_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int omap_i2c_runtime_suspend(struct device *dev) +static int __maybe_unused omap_i2c_runtime_suspend(struct device *dev) { struct omap_i2c_dev *omap = dev_get_drvdata(dev); @@ -1527,7 +1526,7 @@ static int omap_i2c_runtime_suspend(struct device *dev) return 0; } -static int omap_i2c_runtime_resume(struct device *dev) +static int __maybe_unused omap_i2c_runtime_resume(struct device *dev) { struct omap_i2c_dev *omap = dev_get_drvdata(dev); @@ -1542,20 +1541,18 @@ static int omap_i2c_runtime_resume(struct device *dev) } static const struct dev_pm_ops omap_i2c_pm_ops = { + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) SET_RUNTIME_PM_OPS(omap_i2c_runtime_suspend, omap_i2c_runtime_resume, NULL) }; -#define OMAP_I2C_PM_OPS (&omap_i2c_pm_ops) -#else -#define OMAP_I2C_PM_OPS NULL -#endif /* CONFIG_PM */ static struct platform_driver omap_i2c_driver = { .probe = omap_i2c_probe, .remove = omap_i2c_remove, .driver = { .name = "omap_i2c", - .pm = OMAP_I2C_PM_OPS, + .pm = &omap_i2c_pm_ops, .of_match_table = of_match_ptr(omap_i2c_of_match), }, }; diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index c39f89d2deba..2dc628d4f1ae 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -1828,7 +1828,7 @@ int i3c_master_add_i3c_dev_locked(struct i3c_master_controller *master, ret = i3c_master_retrieve_dev_info(newdev); if (ret) - goto err_free_dev; + goto err_detach_dev; olddev = i3c_master_search_i3c_dev_duplicate(newdev); if (olddev) { diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c index f8c00b94817f..bb03079fbade 100644 --- a/drivers/i3c/master/dw-i3c-master.c +++ b/drivers/i3c/master/dw-i3c-master.c @@ -419,12 +419,9 @@ static void dw_i3c_master_enqueue_xfer(struct dw_i3c_master *master, spin_unlock_irqrestore(&master->xferqueue.lock, flags); } -static void dw_i3c_master_dequeue_xfer(struct dw_i3c_master *master, - struct dw_i3c_xfer *xfer) +static void dw_i3c_master_dequeue_xfer_locked(struct dw_i3c_master *master, + struct dw_i3c_xfer *xfer) { - unsigned long flags; - - spin_lock_irqsave(&master->xferqueue.lock, flags); if (master->xferqueue.cur == xfer) { u32 status; @@ -439,6 +436,15 @@ static void dw_i3c_master_dequeue_xfer(struct dw_i3c_master *master, } else { list_del_init(&xfer->node); } +} + +static void dw_i3c_master_dequeue_xfer(struct dw_i3c_master *master, + struct dw_i3c_xfer *xfer) +{ + unsigned long flags; + + spin_lock_irqsave(&master->xferqueue.lock, flags); + dw_i3c_master_dequeue_xfer_locked(master, xfer); spin_unlock_irqrestore(&master->xferqueue.lock, flags); } @@ -494,7 +500,7 @@ static void dw_i3c_master_end_xfer_locked(struct dw_i3c_master *master, u32 isr) complete(&xfer->comp); if (ret < 0) { - dw_i3c_master_dequeue_xfer(master, xfer); + dw_i3c_master_dequeue_xfer_locked(master, xfer); writel(readl(master->regs + DEVICE_CTRL) | DEV_CTRL_RESUME, master->regs + DEVICE_CTRL); } diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index da58020a144e..33a28cde126c 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -235,21 +235,28 @@ EXPORT_SYMBOL_GPL(ide_prep_sense); int ide_queue_sense_rq(ide_drive_t *drive, void *special) { - struct request *sense_rq = drive->sense_rq; + ide_hwif_t *hwif = drive->hwif; + struct request *sense_rq; + unsigned long flags; + + spin_lock_irqsave(&hwif->lock, flags); /* deferred failure from ide_prep_sense() */ if (!drive->sense_rq_armed) { printk(KERN_WARNING PFX "%s: error queuing a sense request\n", drive->name); + spin_unlock_irqrestore(&hwif->lock, flags); return -ENOMEM; } + sense_rq = drive->sense_rq; ide_req(sense_rq)->special = special; drive->sense_rq_armed = false; drive->hwif->rq = NULL; ide_insert_request_head(drive, sense_rq); + spin_unlock_irqrestore(&hwif->lock, flags); return 0; } EXPORT_SYMBOL_GPL(ide_queue_sense_rq); diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 8445b484ae69..b137f27a34d5 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -68,8 +68,10 @@ int ide_end_rq(ide_drive_t *drive, struct request *rq, blk_status_t error, } if (!blk_update_request(rq, error, nr_bytes)) { - if (rq == drive->sense_rq) + if (rq == drive->sense_rq) { drive->sense_rq = NULL; + drive->sense_rq_active = false; + } __blk_mq_end_request(rq, error); return 0; @@ -451,16 +453,11 @@ void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq) blk_mq_delay_run_hw_queue(q->queue_hw_ctx[0], 3); } -/* - * Issue a new request to a device. - */ -blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *hctx, - const struct blk_mq_queue_data *bd) +blk_status_t ide_issue_rq(ide_drive_t *drive, struct request *rq, + bool local_requeue) { - ide_drive_t *drive = hctx->queue->queuedata; - ide_hwif_t *hwif = drive->hwif; + ide_hwif_t *hwif = drive->hwif; struct ide_host *host = hwif->host; - struct request *rq = bd->rq; ide_startstop_t startstop; if (!blk_rq_is_passthrough(rq) && !(rq->rq_flags & RQF_DONTPREP)) { @@ -474,8 +471,6 @@ blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *hctx, if (ide_lock_host(host, hwif)) return BLK_STS_DEV_RESOURCE; - blk_mq_start_request(rq); - spin_lock_irq(&hwif->lock); if (!ide_lock_port(hwif)) { @@ -511,18 +506,6 @@ repeat: drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED); /* - * we know that the queue isn't empty, but this can happen - * if ->prep_rq() decides to kill a request - */ - if (!rq) { - rq = bd->rq; - if (!rq) { - ide_unlock_port(hwif); - goto out; - } - } - - /* * Sanity: don't accept a request that isn't a PM request * if we are currently power managed. This is very important as * blk_stop_queue() doesn't prevent the blk_fetch_request() @@ -560,9 +543,12 @@ repeat: } } else { plug_device: + if (local_requeue) + list_add(&rq->queuelist, &drive->rq_list); spin_unlock_irq(&hwif->lock); ide_unlock_host(host); - ide_requeue_and_plug(drive, rq); + if (!local_requeue) + ide_requeue_and_plug(drive, rq); return BLK_STS_OK; } @@ -573,6 +559,26 @@ out: return BLK_STS_OK; } +/* + * Issue a new request to a device. + */ +blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) +{ + ide_drive_t *drive = hctx->queue->queuedata; + ide_hwif_t *hwif = drive->hwif; + + spin_lock_irq(&hwif->lock); + if (drive->sense_rq_active) { + spin_unlock_irq(&hwif->lock); + return BLK_STS_DEV_RESOURCE; + } + spin_unlock_irq(&hwif->lock); + + blk_mq_start_request(bd->rq); + return ide_issue_rq(drive, bd->rq, false); +} + static int drive_is_ready(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; @@ -893,13 +899,8 @@ EXPORT_SYMBOL_GPL(ide_pad_transfer); void ide_insert_request_head(ide_drive_t *drive, struct request *rq) { - ide_hwif_t *hwif = drive->hwif; - unsigned long flags; - - spin_lock_irqsave(&hwif->lock, flags); + drive->sense_rq_active = true; list_add_tail(&rq->queuelist, &drive->rq_list); - spin_unlock_irqrestore(&hwif->lock, flags); - kblockd_schedule_work(&drive->rq_work); } EXPORT_SYMBOL_GPL(ide_insert_request_head); diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c index 102aa3bc3e7f..8af7af6001eb 100644 --- a/drivers/ide/ide-park.c +++ b/drivers/ide/ide-park.c @@ -54,7 +54,9 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) scsi_req(rq)->cmd[0] = REQ_UNPARK_HEADS; scsi_req(rq)->cmd_len = 1; ide_req(rq)->type = ATA_PRIV_MISC; + spin_lock_irq(&hwif->lock); ide_insert_request_head(drive, rq); + spin_unlock_irq(&hwif->lock); out: return; diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 63627be0811a..5aeaca24a28f 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -1159,18 +1159,27 @@ static void drive_rq_insert_work(struct work_struct *work) ide_drive_t *drive = container_of(work, ide_drive_t, rq_work); ide_hwif_t *hwif = drive->hwif; struct request *rq; + blk_status_t ret; LIST_HEAD(list); - spin_lock_irq(&hwif->lock); - if (!list_empty(&drive->rq_list)) - list_splice_init(&drive->rq_list, &list); - spin_unlock_irq(&hwif->lock); + blk_mq_quiesce_queue(drive->queue); - while (!list_empty(&list)) { - rq = list_first_entry(&list, struct request, queuelist); + ret = BLK_STS_OK; + spin_lock_irq(&hwif->lock); + while (!list_empty(&drive->rq_list)) { + rq = list_first_entry(&drive->rq_list, struct request, queuelist); list_del_init(&rq->queuelist); - blk_execute_rq_nowait(drive->queue, rq->rq_disk, rq, true, NULL); + + spin_unlock_irq(&hwif->lock); + ret = ide_issue_rq(drive, rq, true); + spin_lock_irq(&hwif->lock); } + spin_unlock_irq(&hwif->lock); + + blk_mq_unquiesce_queue(drive->queue); + + if (ret != BLK_STS_OK) + kblockd_schedule_work(&drive->rq_work); } static const u8 ide_hwif_to_major[] = diff --git a/drivers/iio/adc/axp288_adc.c b/drivers/iio/adc/axp288_adc.c index 031d568b4972..4e339cfd0c54 100644 --- a/drivers/iio/adc/axp288_adc.c +++ b/drivers/iio/adc/axp288_adc.c @@ -27,9 +27,18 @@ #include <linux/iio/machine.h> #include <linux/iio/driver.h> -#define AXP288_ADC_EN_MASK 0xF1 -#define AXP288_ADC_TS_PIN_GPADC 0xF2 -#define AXP288_ADC_TS_PIN_ON 0xF3 +/* + * This mask enables all ADCs except for the battery temp-sensor (TS), that is + * left as-is to avoid breaking charging on devices without a temp-sensor. + */ +#define AXP288_ADC_EN_MASK 0xF0 +#define AXP288_ADC_TS_ENABLE 0x01 + +#define AXP288_ADC_TS_CURRENT_ON_OFF_MASK GENMASK(1, 0) +#define AXP288_ADC_TS_CURRENT_OFF (0 << 0) +#define AXP288_ADC_TS_CURRENT_ON_WHEN_CHARGING (1 << 0) +#define AXP288_ADC_TS_CURRENT_ON_ONDEMAND (2 << 0) +#define AXP288_ADC_TS_CURRENT_ON (3 << 0) enum axp288_adc_id { AXP288_ADC_TS, @@ -44,6 +53,7 @@ enum axp288_adc_id { struct axp288_adc_info { int irq; struct regmap *regmap; + bool ts_enabled; }; static const struct iio_chan_spec axp288_adc_channels[] = { @@ -115,21 +125,33 @@ static int axp288_adc_read_channel(int *val, unsigned long address, return IIO_VAL_INT; } -static int axp288_adc_set_ts(struct regmap *regmap, unsigned int mode, - unsigned long address) +/* + * The current-source used for the battery temp-sensor (TS) is shared + * with the GPADC. For proper fuel-gauge and charger operation the TS + * current-source needs to be permanently on. But to read the GPADC we + * need to temporary switch the TS current-source to ondemand, so that + * the GPADC can use it, otherwise we will always read an all 0 value. + */ +static int axp288_adc_set_ts(struct axp288_adc_info *info, + unsigned int mode, unsigned long address) { int ret; - /* channels other than GPADC do not need to switch TS pin */ + /* No need to switch the current-source if the TS pin is disabled */ + if (!info->ts_enabled) + return 0; + + /* Channels other than GPADC do not need the current source */ if (address != AXP288_GP_ADC_H) return 0; - ret = regmap_write(regmap, AXP288_ADC_TS_PIN_CTRL, mode); + ret = regmap_update_bits(info->regmap, AXP288_ADC_TS_PIN_CTRL, + AXP288_ADC_TS_CURRENT_ON_OFF_MASK, mode); if (ret) return ret; /* When switching to the GPADC pin give things some time to settle */ - if (mode == AXP288_ADC_TS_PIN_GPADC) + if (mode == AXP288_ADC_TS_CURRENT_ON_ONDEMAND) usleep_range(6000, 10000); return 0; @@ -145,14 +167,14 @@ static int axp288_adc_read_raw(struct iio_dev *indio_dev, mutex_lock(&indio_dev->mlock); switch (mask) { case IIO_CHAN_INFO_RAW: - if (axp288_adc_set_ts(info->regmap, AXP288_ADC_TS_PIN_GPADC, + if (axp288_adc_set_ts(info, AXP288_ADC_TS_CURRENT_ON_ONDEMAND, chan->address)) { dev_err(&indio_dev->dev, "GPADC mode\n"); ret = -EINVAL; break; } ret = axp288_adc_read_channel(val, chan->address, info->regmap); - if (axp288_adc_set_ts(info->regmap, AXP288_ADC_TS_PIN_ON, + if (axp288_adc_set_ts(info, AXP288_ADC_TS_CURRENT_ON, chan->address)) dev_err(&indio_dev->dev, "TS pin restore\n"); break; @@ -164,13 +186,35 @@ static int axp288_adc_read_raw(struct iio_dev *indio_dev, return ret; } -static int axp288_adc_set_state(struct regmap *regmap) +static int axp288_adc_initialize(struct axp288_adc_info *info) { - /* ADC should be always enabled for internal FG to function */ - if (regmap_write(regmap, AXP288_ADC_TS_PIN_CTRL, AXP288_ADC_TS_PIN_ON)) - return -EIO; + int ret, adc_enable_val; + + /* + * Determine if the TS pin is enabled and set the TS current-source + * accordingly. + */ + ret = regmap_read(info->regmap, AXP20X_ADC_EN1, &adc_enable_val); + if (ret) + return ret; + + if (adc_enable_val & AXP288_ADC_TS_ENABLE) { + info->ts_enabled = true; + ret = regmap_update_bits(info->regmap, AXP288_ADC_TS_PIN_CTRL, + AXP288_ADC_TS_CURRENT_ON_OFF_MASK, + AXP288_ADC_TS_CURRENT_ON); + } else { + info->ts_enabled = false; + ret = regmap_update_bits(info->regmap, AXP288_ADC_TS_PIN_CTRL, + AXP288_ADC_TS_CURRENT_ON_OFF_MASK, + AXP288_ADC_TS_CURRENT_OFF); + } + if (ret) + return ret; - return regmap_write(regmap, AXP20X_ADC_EN1, AXP288_ADC_EN_MASK); + /* Turn on the ADC for all channels except TS, leave TS as is */ + return regmap_update_bits(info->regmap, AXP20X_ADC_EN1, + AXP288_ADC_EN_MASK, AXP288_ADC_EN_MASK); } static const struct iio_info axp288_adc_iio_info = { @@ -200,7 +244,7 @@ static int axp288_adc_probe(struct platform_device *pdev) * Set ADC to enabled state at all time, including system suspend. * otherwise internal fuel gauge functionality may be affected. */ - ret = axp288_adc_set_state(axp20x->regmap); + ret = axp288_adc_initialize(info); if (ret) { dev_err(&pdev->dev, "unable to enable ADC device\n"); return ret; diff --git a/drivers/iio/adc/ti-ads8688.c b/drivers/iio/adc/ti-ads8688.c index 184d686ebd99..8b4568edd5cb 100644 --- a/drivers/iio/adc/ti-ads8688.c +++ b/drivers/iio/adc/ti-ads8688.c @@ -41,6 +41,7 @@ #define ADS8688_VREF_MV 4096 #define ADS8688_REALBITS 16 +#define ADS8688_MAX_CHANNELS 8 /* * enum ads8688_range - ADS8688 reference voltage range @@ -385,7 +386,7 @@ static irqreturn_t ads8688_trigger_handler(int irq, void *p) { struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; - u16 buffer[8]; + u16 buffer[ADS8688_MAX_CHANNELS + sizeof(s64)/sizeof(u16)]; int i, j = 0; for (i = 0; i < indio_dev->masklength; i++) { diff --git a/drivers/iio/chemical/atlas-ph-sensor.c b/drivers/iio/chemical/atlas-ph-sensor.c index a406ad31b096..3a20cb5d9bff 100644 --- a/drivers/iio/chemical/atlas-ph-sensor.c +++ b/drivers/iio/chemical/atlas-ph-sensor.c @@ -444,9 +444,8 @@ static int atlas_read_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_SCALE: switch (chan->type) { case IIO_TEMP: - *val = 1; /* 0.01 */ - *val2 = 100; - break; + *val = 10; + return IIO_VAL_INT; case IIO_PH: *val = 1; /* 0.001 */ *val2 = 1000; @@ -477,7 +476,7 @@ static int atlas_write_raw(struct iio_dev *indio_dev, int val, int val2, long mask) { struct atlas_data *data = iio_priv(indio_dev); - __be32 reg = cpu_to_be32(val); + __be32 reg = cpu_to_be32(val / 10); if (val2 != 0 || val < 0 || val > 20000) return -EINVAL; diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 3cd830d52967..616734313f0c 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -267,7 +267,6 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map, #endif struct ib_device *ib_device_get_by_index(u32 ifindex); -void ib_device_put(struct ib_device *device); /* RDMA device netlink */ void nldev_init(void); void nldev_exit(void); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 8872453e26c0..238ec42778ef 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -156,19 +156,26 @@ struct ib_device *ib_device_get_by_index(u32 index) down_read(&lists_rwsem); device = __ib_device_get_by_index(index); if (device) { - /* Do not return a device if unregistration has started. */ - if (!refcount_inc_not_zero(&device->refcount)) + if (!ib_device_try_get(device)) device = NULL; } up_read(&lists_rwsem); return device; } +/** + * ib_device_put - Release IB device reference + * @device: device whose reference to be released + * + * ib_device_put() releases reference to the IB device to allow it to be + * unregistered and eventually free. + */ void ib_device_put(struct ib_device *device) { if (refcount_dec_and_test(&device->refcount)) complete(&device->unreg_completion); } +EXPORT_SYMBOL(ib_device_put); static struct ib_device *__ib_device_get_by_name(const char *name) { @@ -303,7 +310,6 @@ struct ib_device *ib_alloc_device(size_t size) rwlock_init(&device->client_data_lock); INIT_LIST_HEAD(&device->client_data_list); INIT_LIST_HEAD(&device->port_list); - refcount_set(&device->refcount, 1); init_completion(&device->unreg_completion); return device; @@ -620,6 +626,7 @@ int ib_register_device(struct ib_device *device, const char *name, goto cg_cleanup; } + refcount_set(&device->refcount, 1); device->reg_state = IB_DEV_REGISTERED; list_for_each_entry(client, &client_list, list) diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index a4ec43093cb3..acb882f279cb 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -352,6 +352,8 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm, umem->writable = 1; umem->is_odp = 1; odp_data->per_mm = per_mm; + umem->owning_mm = per_mm->mm; + mmgrab(umem->owning_mm); mutex_init(&odp_data->umem_mutex); init_completion(&odp_data->notifier_completion); @@ -384,6 +386,7 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm, out_page_list: vfree(odp_data->page_list); out_odp_data: + mmdrop(umem->owning_mm); kfree(odp_data); return ERR_PTR(ret); } diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 2890a77339e1..5f366838b7ff 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -204,6 +204,9 @@ void ib_uverbs_release_file(struct kref *ref) if (atomic_dec_and_test(&file->device->refcount)) ib_uverbs_comp_dev(file->device); + if (file->async_file) + kref_put(&file->async_file->ref, + ib_uverbs_release_async_event_file); put_device(&file->device->dev); kfree(file); } @@ -964,11 +967,19 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) /* Get an arbitrary mm pointer that hasn't been cleaned yet */ mutex_lock(&ufile->umap_lock); - if (!list_empty(&ufile->umaps)) { - mm = list_first_entry(&ufile->umaps, - struct rdma_umap_priv, list) - ->vma->vm_mm; - mmget(mm); + while (!list_empty(&ufile->umaps)) { + int ret; + + priv = list_first_entry(&ufile->umaps, + struct rdma_umap_priv, list); + mm = priv->vma->vm_mm; + ret = mmget_not_zero(mm); + if (!ret) { + list_del_init(&priv->list); + mm = NULL; + continue; + } + break; } mutex_unlock(&ufile->umap_lock); if (!mm) @@ -1096,10 +1107,6 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) list_del_init(&file->list); mutex_unlock(&file->device->lists_mutex); - if (file->async_file) - kref_put(&file->async_file->ref, - ib_uverbs_release_async_event_file); - kref_put(&file->ref, ib_uverbs_release_file); return 0; diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c index 5030ec480370..2a3f2f01028d 100644 --- a/drivers/infiniband/core/uverbs_std_types_device.c +++ b/drivers/infiniband/core/uverbs_std_types_device.c @@ -168,12 +168,18 @@ void copy_port_attr_to_resp(struct ib_port_attr *attr, static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_PORT)( struct uverbs_attr_bundle *attrs) { - struct ib_device *ib_dev = attrs->ufile->device->ib_dev; + struct ib_device *ib_dev; struct ib_port_attr attr = {}; struct ib_uverbs_query_port_resp_ex resp = {}; + struct ib_ucontext *ucontext; int ret; u8 port_num; + ucontext = ib_uverbs_get_ucontext(attrs); + if (IS_ERR(ucontext)) + return PTR_ERR(ucontext); + ib_dev = ucontext->device; + /* FIXME: Extend the UAPI_DEF_OBJ_NEEDS_FN stuff.. */ if (!ib_dev->ops.query_port) return -EOPNOTSUPP; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index c22ebc774a6a..f9a7e9d29c8b 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -488,7 +488,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) vmf = 1; break; case STATUS: - if (flags & (unsigned long)(VM_WRITE | VM_EXEC)) { + if (flags & VM_WRITE) { ret = -EPERM; goto done; } diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 88242fe95eaa..bf96067876c9 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -987,7 +987,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { wc.ex.imm_data = packet->ohdr->u.ud.imm_data; wc.wc_flags = IB_WC_WITH_IMM; - tlen -= sizeof(u32); } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { wc.ex.imm_data = 0; wc.wc_flags = 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 960b1946c365..12deacf442cf 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -210,6 +210,7 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); + struct hns_roce_ib_create_srq_resp resp = {}; struct hns_roce_srq *srq; int srq_desc_size; int srq_buf_size; @@ -378,16 +379,21 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, srq->event = hns_roce_ib_srq_event; srq->ibsrq.ext.xrc.srq_num = srq->srqn; + resp.srqn = srq->srqn; if (udata) { - if (ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) { + if (ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp)))) { ret = -EFAULT; - goto err_wrid; + goto err_srqc_alloc; } } return &srq->ibsrq; +err_srqc_alloc: + hns_roce_srq_free(hr_dev, srq); + err_wrid: kvfree(srq->wrid); diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 25439da8976c..936ee1314bcd 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1411,7 +1411,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr); if (sqp->tx_ring[wire_tx_ix].ah) - rdma_destroy_ah(sqp->tx_ring[wire_tx_ix].ah, 0); + mlx4_ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah, 0); sqp->tx_ring[wire_tx_ix].ah = ah; ib_dma_sync_single_for_cpu(&dev->ib_dev, sqp->tx_ring[wire_tx_ix].buf.map, @@ -1902,7 +1902,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) if (wc.status == IB_WC_SUCCESS) { switch (wc.opcode) { case IB_WC_SEND: - rdma_destroy_ah(sqp->tx_ring[wc.wr_id & + mlx4_ib_destroy_ah(sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; @@ -1931,7 +1931,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) " status = %d, wrid = 0x%llx\n", ctx->slave, wc.status, wc.wr_id); if (!MLX4_TUN_IS_RECV(wc.wr_id)) { - rdma_destroy_ah(sqp->tx_ring[wc.wr_id & + mlx4_ib_destroy_ah(sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index 356bccc715ee..6bcc63aaa50b 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -345,3 +345,40 @@ int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, counter_set_id); return err; } + +int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, + u16 opmod, u8 port) +{ + int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out); + int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in); + int err = -ENOMEM; + void *data; + void *resp; + u32 *out; + u32 *in; + + in = kzalloc(inlen, GFP_KERNEL); + out = kzalloc(outlen, GFP_KERNEL); + if (!in || !out) + goto out; + + MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC); + MLX5_SET(mad_ifc_in, in, op_mod, opmod); + MLX5_SET(mad_ifc_in, in, port, port); + + data = MLX5_ADDR_OF(mad_ifc_in, in, mad); + memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad)); + + err = mlx5_cmd_exec(dev, in, inlen, out, outlen); + if (err) + goto out; + + resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet); + memcpy(outb, resp, + MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet)); + +out: + kfree(out); + kfree(in); + return err; +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 1e76dc67a369..923a7b93f507 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -63,4 +63,6 @@ int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid); int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid); int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, u16 uid); +int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, + u16 opmod, u8 port); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index e8a1e4498e3f..798591a18484 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -630,8 +630,7 @@ const struct uapi_definition mlx5_ib_flow_defs[] = { UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)), UAPI_DEF_CHAIN_OBJ_TREE( UVERBS_OBJECT_FLOW, - &mlx5_ib_fs, - UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)), + &mlx5_ib_fs), UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, &mlx5_ib_flow_actions), {}, diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 46a9ddc8ca56..4700cffb5a00 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -3,10 +3,11 @@ * Copyright (c) 2018 Mellanox Technologies. All rights reserved. */ +#include <linux/mlx5/vport.h> #include "ib_rep.h" #include "srq.h" -static const struct mlx5_ib_profile rep_profile = { +static const struct mlx5_ib_profile vf_rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_INIT, mlx5_ib_stage_init_init, mlx5_ib_stage_init_cleanup), @@ -46,30 +47,16 @@ static const struct mlx5_ib_profile rep_profile = { }; static int -mlx5_ib_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) -{ - struct mlx5_ib_dev *ibdev; - - ibdev = mlx5_ib_rep_to_dev(rep); - if (!__mlx5_ib_add(ibdev, ibdev->profile)) - return -EINVAL; - return 0; -} - -static void -mlx5_ib_nic_rep_unload(struct mlx5_eswitch_rep *rep) -{ - struct mlx5_ib_dev *ibdev; - - ibdev = mlx5_ib_rep_to_dev(rep); - __mlx5_ib_remove(ibdev, ibdev->profile, MLX5_IB_STAGE_MAX); -} - -static int mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { + const struct mlx5_ib_profile *profile; struct mlx5_ib_dev *ibdev; + if (rep->vport == MLX5_VPORT_UPLINK) + profile = &uplink_rep_profile; + else + profile = &vf_rep_profile; + ibdev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*ibdev)); if (!ibdev) return -ENOMEM; @@ -78,7 +65,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) ibdev->mdev = dev; ibdev->num_ports = max(MLX5_CAP_GEN(dev, num_ports), MLX5_CAP_GEN(dev, num_vhca_ports)); - if (!__mlx5_ib_add(ibdev, &rep_profile)) + if (!__mlx5_ib_add(ibdev, profile)) return -EINVAL; rep->rep_if[REP_IB].priv = ibdev; @@ -105,53 +92,23 @@ static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep) return mlx5_ib_rep_to_dev(rep); } -static void mlx5_ib_rep_register_vf_vports(struct mlx5_ib_dev *dev) -{ - struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev); - int vport; - - for (vport = 1; vport < total_vfs; vport++) { - struct mlx5_eswitch_rep_if rep_if = {}; - - rep_if.load = mlx5_ib_vport_rep_load; - rep_if.unload = mlx5_ib_vport_rep_unload; - rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev; - mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_IB); - } -} - -static void mlx5_ib_rep_unregister_vf_vports(struct mlx5_ib_dev *dev) +void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev) { - struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev); - int vport; - - for (vport = 1; vport < total_vfs; vport++) - mlx5_eswitch_unregister_vport_rep(esw, vport, REP_IB); -} - -void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev) -{ - struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; + struct mlx5_eswitch *esw = mdev->priv.eswitch; struct mlx5_eswitch_rep_if rep_if = {}; - rep_if.load = mlx5_ib_nic_rep_load; - rep_if.unload = mlx5_ib_nic_rep_unload; + rep_if.load = mlx5_ib_vport_rep_load; + rep_if.unload = mlx5_ib_vport_rep_unload; rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev; - rep_if.priv = dev; - - mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_IB); - mlx5_ib_rep_register_vf_vports(dev); + mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_IB); } -void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev) +void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev) { - struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; + struct mlx5_eswitch *esw = mdev->priv.eswitch; - mlx5_ib_rep_unregister_vf_vports(dev); /* VFs vports */ - mlx5_eswitch_unregister_vport_rep(esw, 0, REP_IB); /* UPLINK PF*/ + mlx5_eswitch_unregister_vport_reps(esw, REP_IB); } u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw) diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h index 2ba73636a2fb..798d41e61fb4 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.h +++ b/drivers/infiniband/hw/mlx5/ib_rep.h @@ -10,14 +10,16 @@ #include "mlx5_ib.h" #ifdef CONFIG_MLX5_ESWITCH +extern const struct mlx5_ib_profile uplink_rep_profile; + u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw); struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw, int vport_index); struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw); struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport_index); -void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev); -void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev); +void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev); +void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev); int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq); struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, @@ -48,8 +50,8 @@ struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, return NULL; } -static inline void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev) {} -static inline void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev) {} +static inline void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev) {} +static inline void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev) {} static inline int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq) { diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 558638468edb..6c529e6f3a01 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -36,6 +36,7 @@ #include <rdma/ib_smi.h> #include <rdma/ib_pma.h> #include "mlx5_ib.h" +#include "cmd.h" enum { MLX5_IB_VENDOR_CLASS1 = 0x9, @@ -51,9 +52,10 @@ static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num, return dev->mdev->port_caps[port_num - 1].has_smi; } -int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, - u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const void *in_mad, void *response_mad) +static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, + int ignore_bkey, u8 port, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, const void *in_mad, + void *response_mad) { u8 op_modifier = 0; @@ -68,7 +70,8 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, if (ignore_bkey || !in_wc) op_modifier |= 0x2; - return mlx5_core_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, port); + return mlx5_cmd_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, + port); } static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 94fe253d4956..581ae11e2fc9 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -331,8 +331,8 @@ out: spin_unlock(&port->mp.mpi_lock); } -static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed, - u8 *active_width) +static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, u8 *active_speed, + u8 *active_width) { switch (eth_proto_oper) { case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII): @@ -389,10 +389,66 @@ static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed, return 0; } +static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed, + u8 *active_width) +{ + switch (eth_proto_oper) { + case MLX5E_PROT_MASK(MLX5E_SGMII_100M): + case MLX5E_PROT_MASK(MLX5E_1000BASE_X_SGMII): + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_SDR; + break; + case MLX5E_PROT_MASK(MLX5E_5GBASE_R): + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_DDR; + break; + case MLX5E_PROT_MASK(MLX5E_10GBASE_XFI_XAUI_1): + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_QDR; + break; + case MLX5E_PROT_MASK(MLX5E_40GBASE_XLAUI_4_XLPPI_4): + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_QDR; + break; + case MLX5E_PROT_MASK(MLX5E_25GAUI_1_25GBASE_CR_KR): + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_EDR; + break; + case MLX5E_PROT_MASK(MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2): + case MLX5E_PROT_MASK(MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR): + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_HDR; + break; + case MLX5E_PROT_MASK(MLX5E_100GAUI_2_100GBASE_CR2_KR2): + *active_width = IB_WIDTH_2X; + *active_speed = IB_SPEED_HDR; + break; + case MLX5E_PROT_MASK(MLX5E_200GAUI_4_200GBASE_CR4_KR4): + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_HDR; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed, + u8 *active_width, bool ext) +{ + return ext ? + translate_eth_ext_proto_oper(eth_proto_oper, active_speed, + active_width) : + translate_eth_legacy_proto_oper(eth_proto_oper, active_speed, + active_width); +} + static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, struct ib_port_attr *props) { struct mlx5_ib_dev *dev = to_mdev(device); + u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0}; struct mlx5_core_dev *mdev; struct net_device *ndev, *upper; enum ib_mtu ndev_ib_mtu; @@ -400,6 +456,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, u16 qkey_viol_cntr; u32 eth_prot_oper; u8 mdev_port_num; + bool ext; int err; mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num); @@ -416,16 +473,18 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, /* Possible bad flows are checked before filling out props so in case * of an error it will still be zeroed out. */ - err = mlx5_query_port_eth_proto_oper(mdev, ð_prot_oper, - mdev_port_num); + err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, + mdev_port_num); if (err) goto out; + ext = MLX5_CAP_PCAM_FEATURE(dev->mdev, ptys_extended_ethernet); + eth_prot_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper); props->active_width = IB_WIDTH_4X; props->active_speed = IB_SPEED_QDR; translate_eth_proto_oper(eth_prot_oper, &props->active_speed, - &props->active_width); + &props->active_width, ext); props->port_cap_flags |= IB_PORT_CM_SUP; props->ip_gids = true; @@ -6386,7 +6445,7 @@ static const struct mlx5_ib_profile pf_profile = { mlx5_ib_stage_delay_drop_cleanup), }; -static const struct mlx5_ib_profile nic_rep_profile = { +const struct mlx5_ib_profile uplink_rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_INIT, mlx5_ib_stage_init_init, mlx5_ib_stage_init_cleanup), @@ -6479,6 +6538,12 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) printk_once(KERN_INFO "%s", mlx5_version); + if (MLX5_ESWITCH_MANAGER(mdev) && + mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) { + mlx5_ib_register_vport_reps(mdev); + return mdev; + } + port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); @@ -6493,14 +6558,6 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports), MLX5_CAP_GEN(mdev, num_vhca_ports)); - if (MLX5_ESWITCH_MANAGER(mdev) && - mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) { - dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0); - dev->profile = &nic_rep_profile; - mlx5_ib_register_vport_reps(dev); - return dev; - } - return __mlx5_ib_add(dev, &pf_profile); } @@ -6509,6 +6566,11 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) struct mlx5_ib_multiport_info *mpi; struct mlx5_ib_dev *dev; + if (MLX5_ESWITCH_MANAGER(mdev) && context == mdev) { + mlx5_ib_unregister_vport_reps(mdev); + return; + } + if (mlx5_core_is_mp_slave(mdev)) { mpi = context; mutex_lock(&mlx5_ib_multiport_mutex); @@ -6520,10 +6582,7 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) } dev = context; - if (dev->profile == &nic_rep_profile) - mlx5_ib_unregister_vport_reps(dev); - else - __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); + __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); ib_dealloc_device((struct ib_device *)dev); } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b06d3b1efea8..eedba0d2ec4b 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -587,6 +587,7 @@ struct mlx5_ib_mr { struct mlx5_ib_mr *parent; atomic_t num_leaf_free; wait_queue_head_t q_leaf_free; + struct mlx5_async_work cb_work; }; struct mlx5_ib_mw { @@ -944,6 +945,7 @@ struct mlx5_ib_dev { struct mlx5_memic memic; u16 devx_whitelist_uid; struct mlx5_srq_table srq_table; + struct mlx5_async_ctx async_ctx; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -1038,9 +1040,6 @@ void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db) void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); -int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, - u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const void *in_mad, void *response_mad); struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags, struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index fd6ea1f75085..bf2b6ea23851 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -123,9 +123,10 @@ static void update_odp_mr(struct mlx5_ib_mr *mr) } #endif -static void reg_mr_callback(int status, void *context) +static void reg_mr_callback(int status, struct mlx5_async_work *context) { - struct mlx5_ib_mr *mr = context; + struct mlx5_ib_mr *mr = + container_of(context, struct mlx5_ib_mr, cb_work); struct mlx5_ib_dev *dev = mr->dev; struct mlx5_mr_cache *cache = &dev->cache; int c = order2idx(dev, mr->order); @@ -216,9 +217,9 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) ent->pending++; spin_unlock_irq(&ent->lock); err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey, - in, inlen, + &dev->async_ctx, in, inlen, mr->out, sizeof(mr->out), - reg_mr_callback, mr); + reg_mr_callback, &mr->cb_work); if (err) { spin_lock_irq(&ent->lock); ent->pending--; @@ -679,6 +680,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) return -ENOMEM; } + mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); timer_setup(&dev->delay_timer, delay_time_func, 0); for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { ent = &cache->ent[i]; @@ -725,33 +727,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) return 0; } -static void wait_for_async_commands(struct mlx5_ib_dev *dev) -{ - struct mlx5_mr_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent; - int total = 0; - int i; - int j; - - for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { - ent = &cache->ent[i]; - for (j = 0 ; j < 1000; j++) { - if (!ent->pending) - break; - msleep(50); - } - } - for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { - ent = &cache->ent[i]; - total += ent->pending; - } - - if (total) - mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total); - else - mlx5_ib_warn(dev, "done with all pending requests\n"); -} - int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) { int i; @@ -763,12 +738,12 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) flush_workqueue(dev->cache.wq); mlx5_mr_cache_debugfs_cleanup(dev); + mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) clean_keys(dev, i); destroy_workqueue(dev->cache.wq); - wait_for_async_commands(dev); del_timer_sync(&dev->delay_timer); return 0; diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 01e0f6200631..4ee32964e1dd 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1595,10 +1595,12 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *work) struct prefetch_mr_work *w = container_of(work, struct prefetch_mr_work, work); - if (w->dev->ib_dev.reg_state == IB_DEV_REGISTERED) + if (ib_device_try_get(&w->dev->ib_dev)) { mlx5_ib_prefetch_sg_list(w->dev, w->pf_flags, w->sg_list, w->num_sge); - + ib_device_put(&w->dev->ib_dev); + } + put_device(&w->dev->ib_dev.dev); kfree(w); } @@ -1617,15 +1619,13 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, return mlx5_ib_prefetch_sg_list(dev, pf_flags, sg_list, num_sge); - if (dev->ib_dev.reg_state != IB_DEV_REGISTERED) - return -ENODEV; - work = kvzalloc(struct_size(work, sg_list, num_sge), GFP_KERNEL); if (!work) return -ENOMEM; memcpy(work->sg_list, sg_list, num_sge * sizeof(struct ib_sge)); + get_device(&dev->ib_dev.dev); work->dev = dev; work->pf_flags = pf_flags; work->num_sge = num_sge; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index dd2ae640bc84..7db778d96ef5 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1912,14 +1912,16 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, } if (!check_flags_mask(ucmd.flags, + MLX5_QP_FLAG_ALLOW_SCATTER_CQE | + MLX5_QP_FLAG_BFREG_INDEX | + MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE | + MLX5_QP_FLAG_SCATTER_CQE | MLX5_QP_FLAG_SIGNATURE | - MLX5_QP_FLAG_SCATTER_CQE | - MLX5_QP_FLAG_TUNNEL_OFFLOADS | - MLX5_QP_FLAG_BFREG_INDEX | - MLX5_QP_FLAG_TYPE_DCT | - MLX5_QP_FLAG_TYPE_DCI | - MLX5_QP_FLAG_ALLOW_SCATTER_CQE | - MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE)) + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC | + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC | + MLX5_QP_FLAG_TUNNEL_OFFLOADS | + MLX5_QP_FLAG_TYPE_DCI | + MLX5_QP_FLAG_TYPE_DCT)) return -EINVAL; err = get_qp_user_index(to_mucontext(pd->uobject->context), diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 868da0ece7ba..445ea19a2ec8 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -512,7 +512,6 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { wc.ex.imm_data = ohdr->u.ud.imm_data; wc.wc_flags = IB_WC_WITH_IMM; - tlen -= sizeof(u32); } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { wc.ex.imm_data = 0; wc.wc_flags = 0; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index a1bd8cfc2c25..c6cc3e4ab71d 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -2910,6 +2910,8 @@ send: goto op_err; if (!ret) goto rnr_nak; + if (wqe->length > qp->r_len) + goto inv_err; break; case IB_WR_RDMA_WRITE_WITH_IMM: @@ -3078,7 +3080,10 @@ op_err: goto err; inv_err: - send_status = IB_WC_REM_INV_REQ_ERR; + send_status = + sqp->ibqp.qp_type == IB_QPT_RC ? + IB_WC_REM_INV_REQ_ERR : + IB_WC_SUCCESS; wc.status = IB_WC_LOC_QP_OP_ERR; goto err; diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 1da119d901a9..73e808c1e6ad 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -248,7 +248,6 @@ struct ipoib_cm_tx { struct list_head list; struct net_device *dev; struct ipoib_neigh *neigh; - struct ipoib_path *path; struct ipoib_tx_buf *tx_ring; unsigned int tx_head; unsigned int tx_tail; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 0428e01e8f69..aa9dcfc36cd3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1312,7 +1312,6 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path neigh->cm = tx; tx->neigh = neigh; - tx->path = path; tx->dev = dev; list_add(&tx->list, &priv->cm.start_list); set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags); @@ -1371,7 +1370,7 @@ static void ipoib_cm_tx_start(struct work_struct *work) neigh->daddr + QPN_AND_OPTIONS_OFFSET); goto free_neigh; } - memcpy(&pathrec, &p->path->pathrec, sizeof(pathrec)); + memcpy(&pathrec, &path->pathrec, sizeof(pathrec)); spin_unlock_irqrestore(&priv->lock, flags); netif_tx_unlock_bh(dev); diff --git a/drivers/input/serio/olpc_apsp.c b/drivers/input/serio/olpc_apsp.c index bae08226e3d9..a7cfab3db9ee 100644 --- a/drivers/input/serio/olpc_apsp.c +++ b/drivers/input/serio/olpc_apsp.c @@ -23,7 +23,6 @@ #include <linux/of.h> #include <linux/slab.h> #include <linux/delay.h> -#include <linux/clk.h> /* * The OLPC XO-1.75 and XO-4 laptops do not have a hardware PS/2 controller. @@ -75,7 +74,6 @@ struct olpc_apsp { struct serio *kbio; struct serio *padio; void __iomem *base; - struct clk *clk; int open_count; int irq; }; @@ -148,17 +146,11 @@ static int olpc_apsp_open(struct serio *port) struct olpc_apsp *priv = port->port_data; unsigned int tmp; unsigned long l; - int error; if (priv->open_count++ == 0) { - error = clk_prepare_enable(priv->clk); - if (error) - return error; - l = readl(priv->base + COMMAND_FIFO_STATUS); if (!(l & CMD_STS_MASK)) { dev_err(priv->dev, "SP cannot accept commands.\n"); - clk_disable_unprepare(priv->clk); return -EIO; } @@ -179,8 +171,6 @@ static void olpc_apsp_close(struct serio *port) /* Disable interrupt 0 */ tmp = readl(priv->base + PJ_INTERRUPT_MASK); writel(tmp | INT_0, priv->base + PJ_INTERRUPT_MASK); - - clk_disable_unprepare(priv->clk); } } @@ -208,10 +198,6 @@ static int olpc_apsp_probe(struct platform_device *pdev) if (priv->irq < 0) return priv->irq; - priv->clk = devm_clk_get(&pdev->dev, "sp"); - if (IS_ERR(priv->clk)) - return PTR_ERR(priv->clk); - /* KEYBOARD */ kb_serio = kzalloc(sizeof(struct serio), GFP_KERNEL); if (!kb_serio) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 87ba23a75b38..2a7b78bb98b4 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1991,16 +1991,13 @@ static void do_attach(struct iommu_dev_data *dev_data, static void do_detach(struct iommu_dev_data *dev_data) { + struct protection_domain *domain = dev_data->domain; struct amd_iommu *iommu; u16 alias; iommu = amd_iommu_rlookup_table[dev_data->devid]; alias = dev_data->alias; - /* decrease reference counters */ - dev_data->domain->dev_iommu[iommu->index] -= 1; - dev_data->domain->dev_cnt -= 1; - /* Update data structures */ dev_data->domain = NULL; list_del(&dev_data->list); @@ -2010,6 +2007,16 @@ static void do_detach(struct iommu_dev_data *dev_data) /* Flush the DTE entry */ device_flush_dte(dev_data); + + /* Flush IOTLB */ + domain_flush_tlb_pde(domain); + + /* Wait for the flushes to finish */ + domain_flush_complete(domain); + + /* decrease reference counters - needs to happen after the flushes */ + domain->dev_iommu[iommu->index] -= 1; + domain->dev_cnt -= 1; } /* @@ -2617,13 +2624,13 @@ out_unmap: bus_addr = address + s->dma_address + (j << PAGE_SHIFT); iommu_unmap_page(domain, bus_addr, PAGE_SIZE); - if (--mapped_pages) + if (--mapped_pages == 0) goto out_free_iova; } } out_free_iova: - free_iova_fast(&dma_dom->iovad, address, npages); + free_iova_fast(&dma_dom->iovad, address >> PAGE_SHIFT, npages); out_err: return 0; diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 2bd9ac285c0d..78188bf7e90d 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -363,7 +363,7 @@ static int dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; static int intel_iommu_superpage = 1; -static int intel_iommu_sm = 1; +static int intel_iommu_sm; static int iommu_identity_mapping; #define IDENTMAP_ALL 1 @@ -456,9 +456,9 @@ static int __init intel_iommu_setup(char *str) } else if (!strncmp(str, "sp_off", 6)) { pr_info("Disable supported super page\n"); intel_iommu_superpage = 0; - } else if (!strncmp(str, "sm_off", 6)) { - pr_info("Intel-IOMMU: disable scalable mode support\n"); - intel_iommu_sm = 0; + } else if (!strncmp(str, "sm_on", 5)) { + pr_info("Intel-IOMMU: scalable mode supported\n"); + intel_iommu_sm = 1; } else if (!strncmp(str, "tboot_noforce", 13)) { printk(KERN_INFO "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); @@ -5294,7 +5294,7 @@ static void intel_iommu_put_resv_regions(struct device *dev, struct iommu_resv_region *entry, *next; list_for_each_entry_safe(entry, next, head, list) { - if (entry->type == IOMMU_RESV_RESERVED) + if (entry->type == IOMMU_RESV_MSI) kfree(entry); } } diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 730f7dabcf37..7e0df67bd3e9 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -441,6 +441,10 @@ static int mtk_iommu_add_device(struct device *dev) iommu_spec.args_count = count; mtk_iommu_create_mapping(dev, &iommu_spec); + + /* dev->iommu_fwspec might have changed */ + fwspec = dev_iommu_fwspec_get(dev); + of_node_put(iommu_spec.np); } diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 7f2a45445b00..c3aba3fc818d 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -97,9 +97,14 @@ struct its_device; * The ITS structure - contains most of the infrastructure, with the * top-level MSI domain, the command queue, the collections, and the * list of devices writing to it. + * + * dev_alloc_lock has to be taken for device allocations, while the + * spinlock must be taken to parse data structures such as the device + * list. */ struct its_node { raw_spinlock_t lock; + struct mutex dev_alloc_lock; struct list_head entry; void __iomem *base; phys_addr_t phys_base; @@ -156,6 +161,7 @@ struct its_device { void *itt; u32 nr_ites; u32 device_id; + bool shared; }; static struct { @@ -1580,6 +1586,9 @@ static unsigned long *its_lpi_alloc(int nr_irqs, u32 *base, int *nr_ids) nr_irqs /= 2; } while (nr_irqs > 0); + if (!nr_irqs) + err = -ENOSPC; + if (err) goto out; @@ -2059,6 +2068,29 @@ static int __init allocate_lpi_tables(void) return 0; } +static u64 its_clear_vpend_valid(void __iomem *vlpi_base) +{ + u32 count = 1000000; /* 1s! */ + bool clean; + u64 val; + + val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER); + val &= ~GICR_VPENDBASER_Valid; + gits_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); + + do { + val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER); + clean = !(val & GICR_VPENDBASER_Dirty); + if (!clean) { + count--; + cpu_relax(); + udelay(1); + } + } while (!clean && count); + + return val; +} + static void its_cpu_init_lpis(void) { void __iomem *rbase = gic_data_rdist_rd_base(); @@ -2144,6 +2176,30 @@ static void its_cpu_init_lpis(void) val |= GICR_CTLR_ENABLE_LPIS; writel_relaxed(val, rbase + GICR_CTLR); + if (gic_rdists->has_vlpis) { + void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); + + /* + * It's possible for CPU to receive VLPIs before it is + * sheduled as a vPE, especially for the first CPU, and the + * VLPI with INTID larger than 2^(IDbits+1) will be considered + * as out of range and dropped by GIC. + * So we initialize IDbits to known value to avoid VLPI drop. + */ + val = (LPI_NRBITS - 1) & GICR_VPROPBASER_IDBITS_MASK; + pr_debug("GICv4: CPU%d: Init IDbits to 0x%llx for GICR_VPROPBASER\n", + smp_processor_id(), val); + gits_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER); + + /* + * Also clear Valid bit of GICR_VPENDBASER, in case some + * ancient programming gets left in and has possibility of + * corrupting memory. + */ + val = its_clear_vpend_valid(vlpi_base); + WARN_ON(val & GICR_VPENDBASER_Dirty); + } + /* Make sure the GIC has seen the above */ dsb(sy); out: @@ -2422,6 +2478,7 @@ static int its_msi_prepare(struct irq_domain *domain, struct device *dev, struct its_device *its_dev; struct msi_domain_info *msi_info; u32 dev_id; + int err = 0; /* * We ignore "dev" entierely, and rely on the dev_id that has @@ -2444,6 +2501,7 @@ static int its_msi_prepare(struct irq_domain *domain, struct device *dev, return -EINVAL; } + mutex_lock(&its->dev_alloc_lock); its_dev = its_find_device(its, dev_id); if (its_dev) { /* @@ -2451,18 +2509,22 @@ static int its_msi_prepare(struct irq_domain *domain, struct device *dev, * another alias (PCI bridge of some sort). No need to * create the device. */ + its_dev->shared = true; pr_debug("Reusing ITT for devID %x\n", dev_id); goto out; } its_dev = its_create_device(its, dev_id, nvec, true); - if (!its_dev) - return -ENOMEM; + if (!its_dev) { + err = -ENOMEM; + goto out; + } pr_debug("ITT %d entries, %d bits\n", nvec, ilog2(nvec)); out: + mutex_unlock(&its->dev_alloc_lock); info->scratchpad[0].ptr = its_dev; - return 0; + return err; } static struct msi_domain_ops its_msi_domain_ops = { @@ -2566,6 +2628,7 @@ static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq, { struct irq_data *d = irq_domain_get_irq_data(domain, virq); struct its_device *its_dev = irq_data_get_irq_chip_data(d); + struct its_node *its = its_dev->its; int i; for (i = 0; i < nr_irqs; i++) { @@ -2580,8 +2643,14 @@ static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq, irq_domain_reset_irq_data(data); } - /* If all interrupts have been freed, start mopping the floor */ - if (bitmap_empty(its_dev->event_map.lpi_map, + mutex_lock(&its->dev_alloc_lock); + + /* + * If all interrupts have been freed, start mopping the + * floor. This is conditionned on the device not being shared. + */ + if (!its_dev->shared && + bitmap_empty(its_dev->event_map.lpi_map, its_dev->event_map.nr_lpis)) { its_lpi_free(its_dev->event_map.lpi_map, its_dev->event_map.lpi_base, @@ -2593,6 +2662,8 @@ static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq, its_free_device(its_dev); } + mutex_unlock(&its->dev_alloc_lock); + irq_domain_free_irqs_parent(domain, virq, nr_irqs); } @@ -2755,26 +2826,11 @@ static void its_vpe_schedule(struct its_vpe *vpe) static void its_vpe_deschedule(struct its_vpe *vpe) { void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); - u32 count = 1000000; /* 1s! */ - bool clean; u64 val; - /* We're being scheduled out */ - val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER); - val &= ~GICR_VPENDBASER_Valid; - gits_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); - - do { - val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER); - clean = !(val & GICR_VPENDBASER_Dirty); - if (!clean) { - count--; - cpu_relax(); - udelay(1); - } - } while (!clean && count); + val = its_clear_vpend_valid(vlpi_base); - if (unlikely(!clean && !count)) { + if (unlikely(val & GICR_VPENDBASER_Dirty)) { pr_err_ratelimited("ITS virtual pending table not cleaning\n"); vpe->idai = false; vpe->pending_last = true; @@ -3517,6 +3573,7 @@ static int __init its_probe_one(struct resource *res, } raw_spin_lock_init(&its->lock); + mutex_init(&its->dev_alloc_lock); INIT_LIST_HEAD(&its->entry); INIT_LIST_HEAD(&its->its_device_list); typer = gic_read_typer(its_base + GITS_TYPER); diff --git a/drivers/irqchip/irq-mmp.c b/drivers/irqchip/irq-mmp.c index 25f32e1d7764..3496b61a312a 100644 --- a/drivers/irqchip/irq-mmp.c +++ b/drivers/irqchip/irq-mmp.c @@ -34,6 +34,9 @@ #define SEL_INT_PENDING (1 << 6) #define SEL_INT_NUM_MASK 0x3f +#define MMP2_ICU_INT_ROUTE_PJ4_IRQ (1 << 5) +#define MMP2_ICU_INT_ROUTE_PJ4_FIQ (1 << 6) + struct icu_chip_data { int nr_irqs; unsigned int virq_base; @@ -190,7 +193,8 @@ static const struct mmp_intc_conf mmp_conf = { static const struct mmp_intc_conf mmp2_conf = { .conf_enable = 0x20, .conf_disable = 0x0, - .conf_mask = 0x7f, + .conf_mask = MMP2_ICU_INT_ROUTE_PJ4_IRQ | + MMP2_ICU_INT_ROUTE_PJ4_FIQ, }; static void __exception_irq_entry mmp_handle_irq(struct pt_regs *regs) diff --git a/drivers/irqchip/irq-xtensa-mx.c b/drivers/irqchip/irq-xtensa-mx.c index 5385f5768345..27933338f7b3 100644 --- a/drivers/irqchip/irq-xtensa-mx.c +++ b/drivers/irqchip/irq-xtensa-mx.c @@ -71,14 +71,17 @@ static void xtensa_mx_irq_mask(struct irq_data *d) unsigned int mask = 1u << d->hwirq; if (mask & (XCHAL_INTTYPE_MASK_EXTERN_EDGE | - XCHAL_INTTYPE_MASK_EXTERN_LEVEL)) { - set_er(1u << (xtensa_get_ext_irq_no(d->hwirq) - - HW_IRQ_MX_BASE), MIENG); - } else { - mask = __this_cpu_read(cached_irq_mask) & ~mask; - __this_cpu_write(cached_irq_mask, mask); - xtensa_set_sr(mask, intenable); + XCHAL_INTTYPE_MASK_EXTERN_LEVEL)) { + unsigned int ext_irq = xtensa_get_ext_irq_no(d->hwirq); + + if (ext_irq >= HW_IRQ_MX_BASE) { + set_er(1u << (ext_irq - HW_IRQ_MX_BASE), MIENG); + return; + } } + mask = __this_cpu_read(cached_irq_mask) & ~mask; + __this_cpu_write(cached_irq_mask, mask); + xtensa_set_sr(mask, intenable); } static void xtensa_mx_irq_unmask(struct irq_data *d) @@ -86,14 +89,17 @@ static void xtensa_mx_irq_unmask(struct irq_data *d) unsigned int mask = 1u << d->hwirq; if (mask & (XCHAL_INTTYPE_MASK_EXTERN_EDGE | - XCHAL_INTTYPE_MASK_EXTERN_LEVEL)) { - set_er(1u << (xtensa_get_ext_irq_no(d->hwirq) - - HW_IRQ_MX_BASE), MIENGSET); - } else { - mask |= __this_cpu_read(cached_irq_mask); - __this_cpu_write(cached_irq_mask, mask); - xtensa_set_sr(mask, intenable); + XCHAL_INTTYPE_MASK_EXTERN_LEVEL)) { + unsigned int ext_irq = xtensa_get_ext_irq_no(d->hwirq); + + if (ext_irq >= HW_IRQ_MX_BASE) { + set_er(1u << (ext_irq - HW_IRQ_MX_BASE), MIENGSET); + return; + } } + mask |= __this_cpu_read(cached_irq_mask); + __this_cpu_write(cached_irq_mask, mask); + xtensa_set_sr(mask, intenable); } static void xtensa_mx_irq_enable(struct irq_data *d) @@ -113,7 +119,11 @@ static void xtensa_mx_irq_ack(struct irq_data *d) static int xtensa_mx_irq_retrigger(struct irq_data *d) { - xtensa_set_sr(1 << d->hwirq, intset); + unsigned int mask = 1u << d->hwirq; + + if (WARN_ON(mask & ~XCHAL_INTTYPE_MASK_SOFTWARE)) + return 0; + xtensa_set_sr(mask, intset); return 1; } diff --git a/drivers/irqchip/irq-xtensa-pic.c b/drivers/irqchip/irq-xtensa-pic.c index c200234dd2c9..ab12328be5ee 100644 --- a/drivers/irqchip/irq-xtensa-pic.c +++ b/drivers/irqchip/irq-xtensa-pic.c @@ -70,7 +70,11 @@ static void xtensa_irq_ack(struct irq_data *d) static int xtensa_irq_retrigger(struct irq_data *d) { - xtensa_set_sr(1 << d->hwirq, intset); + unsigned int mask = 1u << d->hwirq; + + if (WARN_ON(mask & ~XCHAL_INTTYPE_MASK_SOFTWARE)) + return 0; + xtensa_set_sr(mask, intset); return 1; } diff --git a/drivers/isdn/gigaset/ser-gigaset.c b/drivers/isdn/gigaset/ser-gigaset.c index ab0b63a4d045..e1de8b1a1001 100644 --- a/drivers/isdn/gigaset/ser-gigaset.c +++ b/drivers/isdn/gigaset/ser-gigaset.c @@ -633,7 +633,7 @@ gigaset_tty_ioctl(struct tty_struct *tty, struct file *file, flush_send_queue(cs); break; } - /* Pass through */ + /* fall through */ default: /* pass through to underlying serial device */ diff --git a/drivers/isdn/hisax/hfc_pci.c b/drivers/isdn/hisax/hfc_pci.c index 81dd465afcf4..71a8312592d6 100644 --- a/drivers/isdn/hisax/hfc_pci.c +++ b/drivers/isdn/hisax/hfc_pci.c @@ -656,7 +656,7 @@ hfcpci_fill_fifo(struct BCState *bcs) schedule_event(bcs, B_ACKPENDING); } - dev_kfree_skb_any(bcs->tx_skb); + dev_consume_skb_any(bcs->tx_skb); bcs->tx_skb = skb_dequeue(&bcs->squeue); /* fetch next data */ } test_and_clear_bit(BC_FLG_BUSY, &bcs->Flag); diff --git a/drivers/isdn/i4l/isdn_tty.c b/drivers/isdn/i4l/isdn_tty.c index dc1cded716c1..43700fc19a31 100644 --- a/drivers/isdn/i4l/isdn_tty.c +++ b/drivers/isdn/i4l/isdn_tty.c @@ -3642,7 +3642,7 @@ isdn_tty_edit_at(const char *p, int count, modem_info *info) break; } else m->mdmcmdl = 0; - /* Fall through, check for 'A' */ + /* Fall through - check for 'A' */ case 0: if (c == 'A') { m->mdmcmd[m->mdmcmdl] = c; diff --git a/drivers/isdn/i4l/isdn_v110.c b/drivers/isdn/i4l/isdn_v110.c index 2a5f6668756c..d11fe76f138f 100644 --- a/drivers/isdn/i4l/isdn_v110.c +++ b/drivers/isdn/i4l/isdn_v110.c @@ -354,7 +354,7 @@ EncodeMatrix(unsigned char *buf, int len, unsigned char *m, int mlen) printk(KERN_WARNING "isdn_v110 (EncodeMatrix): buffer full!\n"); return line; } - /* else: fall through */ + /* fall through */ case 128: m[line] = 128; /* leftmost -> set byte to 1000000 */ mbit = 64; /* current bit in the matrix line */ diff --git a/drivers/isdn/mISDN/timerdev.c b/drivers/isdn/mISDN/timerdev.c index 211ed6cffd10..578978711887 100644 --- a/drivers/isdn/mISDN/timerdev.c +++ b/drivers/isdn/mISDN/timerdev.c @@ -170,8 +170,8 @@ dev_expire_timer(struct timer_list *t) spin_lock_irqsave(&timer->dev->lock, flags); if (timer->id >= 0) list_move_tail(&timer->list, &timer->dev->expired); - spin_unlock_irqrestore(&timer->dev->lock, flags); wake_up_interruptible(&timer->dev->wait); + spin_unlock_irqrestore(&timer->dev->lock, flags); } static int diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 47d4e0d30bf0..dd538e6b2748 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -932,7 +932,7 @@ static int dm_crypt_integrity_io_alloc(struct dm_crypt_io *io, struct bio *bio) if (IS_ERR(bip)) return PTR_ERR(bip); - tag_len = io->cc->on_disk_tag_size * bio_sectors(bio); + tag_len = io->cc->on_disk_tag_size * (bio_sectors(bio) >> io->cc->sector_shift); bip->bip_iter.bi_size = tag_len; bip->bip_iter.bi_sector = io->cc->start + io->sector; diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 4eb5f8c56535..a20531e5f3b4 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -131,7 +131,7 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig) static void rq_completed(struct mapped_device *md) { /* nudge anyone waiting on suspend queue */ - if (unlikely(waitqueue_active(&md->wait))) + if (unlikely(wq_has_sleeper(&md->wait))) wake_up(&md->wait); /* diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index ca8af21bf644..e83b63608262 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -257,6 +257,7 @@ struct pool { spinlock_t lock; struct bio_list deferred_flush_bios; + struct bio_list deferred_flush_completions; struct list_head prepared_mappings; struct list_head prepared_discards; struct list_head prepared_discards_pt2; @@ -956,6 +957,39 @@ static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) mempool_free(m, &m->tc->pool->mapping_pool); } +static void complete_overwrite_bio(struct thin_c *tc, struct bio *bio) +{ + struct pool *pool = tc->pool; + unsigned long flags; + + /* + * If the bio has the REQ_FUA flag set we must commit the metadata + * before signaling its completion. + */ + if (!bio_triggers_commit(tc, bio)) { + bio_endio(bio); + return; + } + + /* + * Complete bio with an error if earlier I/O caused changes to the + * metadata that can't be committed, e.g, due to I/O errors on the + * metadata device. + */ + if (dm_thin_aborted_changes(tc->td)) { + bio_io_error(bio); + return; + } + + /* + * Batch together any bios that trigger commits and then issue a + * single commit for them in process_deferred_bios(). + */ + spin_lock_irqsave(&pool->lock, flags); + bio_list_add(&pool->deferred_flush_completions, bio); + spin_unlock_irqrestore(&pool->lock, flags); +} + static void process_prepared_mapping(struct dm_thin_new_mapping *m) { struct thin_c *tc = m->tc; @@ -988,7 +1022,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) */ if (bio) { inc_remap_and_issue_cell(tc, m->cell, m->data_block); - bio_endio(bio); + complete_overwrite_bio(tc, bio); } else { inc_all_io_entry(tc->pool, m->cell->holder); remap_and_issue(tc, m->cell->holder, m->data_block); @@ -2317,7 +2351,7 @@ static void process_deferred_bios(struct pool *pool) { unsigned long flags; struct bio *bio; - struct bio_list bios; + struct bio_list bios, bio_completions; struct thin_c *tc; tc = get_first_thin(pool); @@ -2328,26 +2362,36 @@ static void process_deferred_bios(struct pool *pool) } /* - * If there are any deferred flush bios, we must commit - * the metadata before issuing them. + * If there are any deferred flush bios, we must commit the metadata + * before issuing them or signaling their completion. */ bio_list_init(&bios); + bio_list_init(&bio_completions); + spin_lock_irqsave(&pool->lock, flags); bio_list_merge(&bios, &pool->deferred_flush_bios); bio_list_init(&pool->deferred_flush_bios); + + bio_list_merge(&bio_completions, &pool->deferred_flush_completions); + bio_list_init(&pool->deferred_flush_completions); spin_unlock_irqrestore(&pool->lock, flags); - if (bio_list_empty(&bios) && + if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) && !(dm_pool_changed_this_transaction(pool->pmd) && need_commit_due_to_time(pool))) return; if (commit(pool)) { + bio_list_merge(&bios, &bio_completions); + while ((bio = bio_list_pop(&bios))) bio_io_error(bio); return; } pool->last_commit_jiffies = jiffies; + while ((bio = bio_list_pop(&bio_completions))) + bio_endio(bio); + while ((bio = bio_list_pop(&bios))) generic_make_request(bio); } @@ -2954,6 +2998,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout); spin_lock_init(&pool->lock); bio_list_init(&pool->deferred_flush_bios); + bio_list_init(&pool->deferred_flush_completions); INIT_LIST_HEAD(&pool->prepared_mappings); INIT_LIST_HEAD(&pool->prepared_discards); INIT_LIST_HEAD(&pool->prepared_discards_pt2); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 2b53c3841b53..515e6af9bed2 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -699,7 +699,7 @@ static void end_io_acct(struct dm_io *io) true, duration, &io->stats_aux); /* nudge anyone waiting on suspend queue */ - if (unlikely(waitqueue_active(&md->wait))) + if (unlikely(wq_has_sleeper(&md->wait))) wake_up(&md->wait); } @@ -1336,7 +1336,11 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio, return r; } - bio_trim(clone, sector - clone->bi_iter.bi_sector, len); + bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector)); + clone->bi_iter.bi_size = to_bytes(len); + + if (bio_integrity(bio)) + bio_integrity_trim(clone); return 0; } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 1d54109071cc..fa47249fa3e4 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1863,6 +1863,20 @@ static void end_sync_read(struct bio *bio) reschedule_retry(r1_bio); } +static void abort_sync_write(struct mddev *mddev, struct r1bio *r1_bio) +{ + sector_t sync_blocks = 0; + sector_t s = r1_bio->sector; + long sectors_to_go = r1_bio->sectors; + + /* make sure these bits don't get cleared. */ + do { + md_bitmap_end_sync(mddev->bitmap, s, &sync_blocks, 1); + s += sync_blocks; + sectors_to_go -= sync_blocks; + } while (sectors_to_go > 0); +} + static void end_sync_write(struct bio *bio) { int uptodate = !bio->bi_status; @@ -1874,15 +1888,7 @@ static void end_sync_write(struct bio *bio) struct md_rdev *rdev = conf->mirrors[find_bio_disk(r1_bio, bio)].rdev; if (!uptodate) { - sector_t sync_blocks = 0; - sector_t s = r1_bio->sector; - long sectors_to_go = r1_bio->sectors; - /* make sure these bits doesn't get cleared. */ - do { - md_bitmap_end_sync(mddev->bitmap, s, &sync_blocks, 1); - s += sync_blocks; - sectors_to_go -= sync_blocks; - } while (sectors_to_go > 0); + abort_sync_write(mddev, r1_bio); set_bit(WriteErrorSeen, &rdev->flags); if (!test_and_set_bit(WantReplacement, &rdev->flags)) set_bit(MD_RECOVERY_NEEDED, & @@ -2172,8 +2178,10 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio) (i == r1_bio->read_disk || !test_bit(MD_RECOVERY_SYNC, &mddev->recovery)))) continue; - if (test_bit(Faulty, &conf->mirrors[i].rdev->flags)) + if (test_bit(Faulty, &conf->mirrors[i].rdev->flags)) { + abort_sync_write(mddev, r1_bio); continue; + } bio_set_op_attrs(wbio, REQ_OP_WRITE, 0); if (test_bit(FailFast, &conf->mirrors[i].rdev->flags)) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index ec3a5ef7fee0..cbbe6b6535be 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -1935,12 +1935,14 @@ out: } static struct stripe_head * -r5c_recovery_alloc_stripe(struct r5conf *conf, - sector_t stripe_sect) +r5c_recovery_alloc_stripe( + struct r5conf *conf, + sector_t stripe_sect, + int noblock) { struct stripe_head *sh; - sh = raid5_get_active_stripe(conf, stripe_sect, 0, 1, 0); + sh = raid5_get_active_stripe(conf, stripe_sect, 0, noblock, 0); if (!sh) return NULL; /* no more stripe available */ @@ -2150,7 +2152,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, stripe_sect); if (!sh) { - sh = r5c_recovery_alloc_stripe(conf, stripe_sect); + sh = r5c_recovery_alloc_stripe(conf, stripe_sect, 1); /* * cannot get stripe from raid5_get_active_stripe * try replay some stripes @@ -2159,20 +2161,29 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, r5c_recovery_replay_stripes( cached_stripe_list, ctx); sh = r5c_recovery_alloc_stripe( - conf, stripe_sect); + conf, stripe_sect, 1); } if (!sh) { + int new_size = conf->min_nr_stripes * 2; pr_debug("md/raid:%s: Increasing stripe cache size to %d to recovery data on journal.\n", mdname(mddev), - conf->min_nr_stripes * 2); - raid5_set_cache_size(mddev, - conf->min_nr_stripes * 2); - sh = r5c_recovery_alloc_stripe(conf, - stripe_sect); + new_size); + ret = raid5_set_cache_size(mddev, new_size); + if (conf->min_nr_stripes <= new_size / 2) { + pr_err("md/raid:%s: Cannot increase cache size, ret=%d, new_size=%d, min_nr_stripes=%d, max_nr_stripes=%d\n", + mdname(mddev), + ret, + new_size, + conf->min_nr_stripes, + conf->max_nr_stripes); + return -ENOMEM; + } + sh = r5c_recovery_alloc_stripe( + conf, stripe_sect, 0); } if (!sh) { pr_err("md/raid:%s: Cannot get enough stripes due to memory pressure. Recovery failed.\n", - mdname(mddev)); + mdname(mddev)); return -ENOMEM; } list_add_tail(&sh->lru, cached_stripe_list); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4990f0319f6c..cecea901ab8c 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6369,6 +6369,7 @@ raid5_show_stripe_cache_size(struct mddev *mddev, char *page) int raid5_set_cache_size(struct mddev *mddev, int size) { + int result = 0; struct r5conf *conf = mddev->private; if (size <= 16 || size > 32768) @@ -6385,11 +6386,14 @@ raid5_set_cache_size(struct mddev *mddev, int size) mutex_lock(&conf->cache_size_mutex); while (size > conf->max_nr_stripes) - if (!grow_one_stripe(conf, GFP_KERNEL)) + if (!grow_one_stripe(conf, GFP_KERNEL)) { + conf->min_nr_stripes = conf->max_nr_stripes; + result = -ENOMEM; break; + } mutex_unlock(&conf->cache_size_mutex); - return 0; + return result; } EXPORT_SYMBOL(raid5_set_cache_size); diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index f461460a2aeb..76f9909cf396 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -1419,7 +1419,7 @@ config MFD_TPS65217 config MFD_TPS68470 bool "TI TPS68470 Power Management / LED chips" - depends on ACPI && I2C=y + depends on ACPI && PCI && I2C=y select MFD_CORE select REGMAP_I2C select I2C_DESIGNWARE_PLATFORM diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c index 1fc8ea0f519b..ca4c9cc218a2 100644 --- a/drivers/misc/mei/client.c +++ b/drivers/misc/mei/client.c @@ -401,8 +401,11 @@ static void mei_io_list_flush_cl(struct list_head *head, struct mei_cl_cb *cb, *next; list_for_each_entry_safe(cb, next, head, list) { - if (cl == cb->cl) + if (cl == cb->cl) { list_del_init(&cb->list); + if (cb->fop_type == MEI_FOP_READ) + mei_io_cb_free(cb); + } } } diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index 23739a60517f..bb1ee9834a02 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -139,6 +139,8 @@ #define MEI_DEV_ID_CNP_H 0xA360 /* Cannon Point H */ #define MEI_DEV_ID_CNP_H_4 0xA364 /* Cannon Point H 4 (iTouch) */ +#define MEI_DEV_ID_ICP_LP 0x34E0 /* Ice Lake Point LP */ + /* * MEI HW Section */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index e89497f858ae..3ab946ad3257 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -105,6 +105,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H, MEI_ME_PCH12_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H_4, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)}, + /* required last entry */ {0, } }; diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c index 2bfa3a903bf9..744757f541be 100644 --- a/drivers/misc/mic/vop/vop_main.c +++ b/drivers/misc/mic/vop/vop_main.c @@ -47,7 +47,8 @@ * @dc: Virtio device control * @vpdev: VOP device which is the parent for this virtio device * @vr: Buffer for accessing the VRING - * @used: Buffer for used + * @used_virt: Virtual address of used ring + * @used: DMA address of used ring * @used_size: Size of the used buffer * @reset_done: Track whether VOP reset is complete * @virtio_cookie: Cookie returned upon requesting a interrupt @@ -61,6 +62,7 @@ struct _vop_vdev { struct mic_device_ctrl __iomem *dc; struct vop_device *vpdev; void __iomem *vr[VOP_MAX_VRINGS]; + void *used_virt[VOP_MAX_VRINGS]; dma_addr_t used[VOP_MAX_VRINGS]; int used_size[VOP_MAX_VRINGS]; struct completion reset_done; @@ -260,12 +262,12 @@ static bool vop_notify(struct virtqueue *vq) static void vop_del_vq(struct virtqueue *vq, int n) { struct _vop_vdev *vdev = to_vopvdev(vq->vdev); - struct vring *vr = (struct vring *)(vq + 1); struct vop_device *vpdev = vdev->vpdev; dma_unmap_single(&vpdev->dev, vdev->used[n], vdev->used_size[n], DMA_BIDIRECTIONAL); - free_pages((unsigned long)vr->used, get_order(vdev->used_size[n])); + free_pages((unsigned long)vdev->used_virt[n], + get_order(vdev->used_size[n])); vring_del_virtqueue(vq); vpdev->hw_ops->iounmap(vpdev, vdev->vr[n]); vdev->vr[n] = NULL; @@ -283,6 +285,26 @@ static void vop_del_vqs(struct virtio_device *dev) vop_del_vq(vq, idx++); } +static struct virtqueue *vop_new_virtqueue(unsigned int index, + unsigned int num, + struct virtio_device *vdev, + bool context, + void *pages, + bool (*notify)(struct virtqueue *vq), + void (*callback)(struct virtqueue *vq), + const char *name, + void *used) +{ + bool weak_barriers = false; + struct vring vring; + + vring_init(&vring, num, pages, MIC_VIRTIO_RING_ALIGN); + vring.used = used; + + return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, + notify, callback, name); +} + /* * This routine will assign vring's allocated in host/io memory. Code in * virtio_ring.c however continues to access this io memory as if it were local @@ -302,7 +324,6 @@ static struct virtqueue *vop_find_vq(struct virtio_device *dev, struct _mic_vring_info __iomem *info; void *used; int vr_size, _vr_size, err, magic; - struct vring *vr; u8 type = ioread8(&vdev->desc->type); if (index >= ioread8(&vdev->desc->num_vq)) @@ -322,17 +343,7 @@ static struct virtqueue *vop_find_vq(struct virtio_device *dev, return ERR_PTR(-ENOMEM); vdev->vr[index] = va; memset_io(va, 0x0, _vr_size); - vq = vring_new_virtqueue( - index, - le16_to_cpu(config.num), MIC_VIRTIO_RING_ALIGN, - dev, - false, - ctx, - (void __force *)va, vop_notify, callback, name); - if (!vq) { - err = -ENOMEM; - goto unmap; - } + info = va + _vr_size; magic = ioread32(&info->magic); @@ -341,18 +352,27 @@ static struct virtqueue *vop_find_vq(struct virtio_device *dev, goto unmap; } - /* Allocate and reassign used ring now */ vdev->used_size[index] = PAGE_ALIGN(sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * le16_to_cpu(config.num)); used = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(vdev->used_size[index])); + vdev->used_virt[index] = used; if (!used) { err = -ENOMEM; dev_err(_vop_dev(vdev), "%s %d err %d\n", __func__, __LINE__, err); - goto del_vq; + goto unmap; + } + + vq = vop_new_virtqueue(index, le16_to_cpu(config.num), dev, ctx, + (void __force *)va, vop_notify, callback, + name, used); + if (!vq) { + err = -ENOMEM; + goto free_used; } + vdev->used[index] = dma_map_single(&vpdev->dev, used, vdev->used_size[index], DMA_BIDIRECTIONAL); @@ -360,26 +380,17 @@ static struct virtqueue *vop_find_vq(struct virtio_device *dev, err = -ENOMEM; dev_err(_vop_dev(vdev), "%s %d err %d\n", __func__, __LINE__, err); - goto free_used; + goto del_vq; } writeq(vdev->used[index], &vqconfig->used_address); - /* - * To reassign the used ring here we are directly accessing - * struct vring_virtqueue which is a private data structure - * in virtio_ring.c. At the minimum, a BUILD_BUG_ON() in - * vring_new_virtqueue() would ensure that - * (&vq->vring == (struct vring *) (&vq->vq + 1)); - */ - vr = (struct vring *)(vq + 1); - vr->used = used; vq->priv = vdev; return vq; +del_vq: + vring_del_virtqueue(vq); free_used: free_pages((unsigned long)used, get_order(vdev->used_size[index])); -del_vq: - vring_del_virtqueue(vq); unmap: vpdev->hw_ops->iounmap(vpdev, vdev->vr[index]); return ERR_PTR(err); @@ -581,6 +592,8 @@ static int _vop_remove_device(struct mic_device_desc __iomem *d, int ret = -1; if (ioread8(&dc->config_change) == MIC_VIRTIO_PARAM_DEV_REMOVE) { + struct device *dev = get_device(&vdev->vdev.dev); + dev_dbg(&vpdev->dev, "%s %d config_change %d type %d vdev %p\n", __func__, __LINE__, @@ -592,7 +605,7 @@ static int _vop_remove_device(struct mic_device_desc __iomem *d, iowrite8(-1, &dc->h2c_vdev_db); if (status & VIRTIO_CONFIG_S_DRIVER_OK) wait_for_completion(&vdev->reset_done); - put_device(&vdev->vdev.dev); + put_device(dev); iowrite8(1, &dc->guest_ack); dev_dbg(&vpdev->dev, "%s %d guest_ack %d\n", __func__, __LINE__, ioread8(&dc->guest_ack)); diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index aef1185f383d..14f3fdb8c6bb 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2112,7 +2112,7 @@ static void mmc_blk_mq_req_done(struct mmc_request *mrq) if (waiting) wake_up(&mq->wait); else - kblockd_schedule_work(&mq->complete_work); + queue_work(mq->card->complete_wq, &mq->complete_work); return; } @@ -2924,6 +2924,13 @@ static int mmc_blk_probe(struct mmc_card *card) mmc_fixup_device(card, mmc_blk_fixups); + card->complete_wq = alloc_workqueue("mmc_complete", + WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); + if (unlikely(!card->complete_wq)) { + pr_err("Failed to create mmc completion workqueue"); + return -ENOMEM; + } + md = mmc_blk_alloc(card); if (IS_ERR(md)) return PTR_ERR(md); @@ -2987,6 +2994,7 @@ static void mmc_blk_remove(struct mmc_card *card) pm_runtime_put_noidle(&card->dev); mmc_blk_remove_req(md); dev_set_drvdata(&card->dev, NULL); + destroy_workqueue(card->complete_wq); } static int _mmc_blk_suspend(struct mmc_card *card) diff --git a/drivers/mmc/host/bcm2835.c b/drivers/mmc/host/bcm2835.c index 50293529d6de..c9e7aa50bb0a 100644 --- a/drivers/mmc/host/bcm2835.c +++ b/drivers/mmc/host/bcm2835.c @@ -1431,6 +1431,8 @@ static int bcm2835_probe(struct platform_device *pdev) err: dev_dbg(dev, "%s -> err %d\n", __func__, ret); + if (host->dma_chan_rxtx) + dma_release_channel(host->dma_chan_rxtx); mmc_free_host(mmc); return ret; diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index f19ec60bcbdc..2eba507790e4 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -1338,7 +1338,8 @@ static int meson_mmc_probe(struct platform_device *pdev) host->regs + SD_EMMC_IRQ_EN); ret = request_threaded_irq(host->irq, meson_mmc_irq, - meson_mmc_irq_thread, IRQF_SHARED, NULL, host); + meson_mmc_irq_thread, IRQF_SHARED, + dev_name(&pdev->dev), host); if (ret) goto err_init_clk; diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index 8afeaf81ae66..833ef0590af8 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -846,7 +846,7 @@ static void msdc_set_mclk(struct msdc_host *host, unsigned char timing, u32 hz) if (timing == MMC_TIMING_MMC_HS400 && host->dev_comp->hs400_tune) - sdr_set_field(host->base + PAD_CMD_TUNE, + sdr_set_field(host->base + tune_reg, MSDC_PAD_TUNE_CMDRRDLY, host->hs400_cmd_int_delay); dev_dbg(host->dev, "sclk: %d, timing: %d\n", host->mmc->actual_clock, diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c index 279e326e397e..70fadc976795 100644 --- a/drivers/mmc/host/sunxi-mmc.c +++ b/drivers/mmc/host/sunxi-mmc.c @@ -1399,13 +1399,37 @@ static int sunxi_mmc_probe(struct platform_device *pdev) mmc->caps |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED | MMC_CAP_ERASE | MMC_CAP_SDIO_IRQ; - if (host->cfg->clk_delays || host->use_new_timings) + /* + * Some H5 devices do not have signal traces precise enough to + * use HS DDR mode for their eMMC chips. + * + * We still enable HS DDR modes for all the other controller + * variants that support them. + */ + if ((host->cfg->clk_delays || host->use_new_timings) && + !of_device_is_compatible(pdev->dev.of_node, + "allwinner,sun50i-h5-emmc")) mmc->caps |= MMC_CAP_1_8V_DDR | MMC_CAP_3_3V_DDR; ret = mmc_of_parse(mmc); if (ret) goto error_free_dma; + /* + * If we don't support delay chains in the SoC, we can't use any + * of the higher speed modes. Mask them out in case the device + * tree specifies the properties for them, which gets added to + * the caps by mmc_of_parse() above. + */ + if (!(host->cfg->clk_delays || host->use_new_timings)) { + mmc->caps &= ~(MMC_CAP_3_3V_DDR | MMC_CAP_1_8V_DDR | + MMC_CAP_1_2V_DDR | MMC_CAP_UHS); + mmc->caps2 &= ~MMC_CAP2_HS200; + } + + /* TODO: This driver doesn't support HS400 mode yet */ + mmc->caps2 &= ~MMC_CAP2_HS400; + ret = sunxi_mmc_init_host(host); if (ret) goto error_free_dma; diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index 60104e1079c5..37f174ccbcec 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -480,6 +480,10 @@ static struct mtd_part *allocate_partition(struct mtd_info *parent, /* let's register it anyway to preserve ordering */ slave->offset = 0; slave->mtd.size = 0; + + /* Initialize ->erasesize to make add_mtd_device() happy. */ + slave->mtd.erasesize = parent->erasesize; + printk(KERN_ERR"mtd: partition \"%s\" is out of reach -- disabled\n", part->name); goto out_register; @@ -632,7 +636,6 @@ err_remove_part: mutex_unlock(&mtd_partitions_mutex); free_partition(new); - pr_info("%s:%i\n", __func__, __LINE__); return ret; } diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-lib.c index bd4cfac6b5aa..a4768df5083f 100644 --- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-lib.c +++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-lib.c @@ -155,9 +155,10 @@ int gpmi_init(struct gpmi_nand_data *this) /* * Reset BCH here, too. We got failures otherwise :( - * See later BCH reset for explanation of MX23 handling + * See later BCH reset for explanation of MX23 and MX28 handling */ - ret = gpmi_reset_block(r->bch_regs, GPMI_IS_MX23(this)); + ret = gpmi_reset_block(r->bch_regs, + GPMI_IS_MX23(this) || GPMI_IS_MX28(this)); if (ret) goto err_out; @@ -263,12 +264,10 @@ int bch_set_geometry(struct gpmi_nand_data *this) /* * Due to erratum #2847 of the MX23, the BCH cannot be soft reset on this * chip, otherwise it will lock up. So we skip resetting BCH on the MX23. - * On the other hand, the MX28 needs the reset, because one case has been - * seen where the BCH produced ECC errors constantly after 10000 - * consecutive reboots. The latter case has not been seen on the MX23 - * yet, still we don't know if it could happen there as well. + * and MX28. */ - ret = gpmi_reset_block(r->bch_regs, GPMI_IS_MX23(this)); + ret = gpmi_reset_block(r->bch_regs, + GPMI_IS_MX23(this) || GPMI_IS_MX28(this)); if (ret) goto err_out; diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index cca4b24d2ffa..839494ac457c 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -410,6 +410,7 @@ static int nand_check_wp(struct nand_chip *chip) /** * nand_fill_oob - [INTERN] Transfer client buffer to oob + * @chip: NAND chip object * @oob: oob data buffer * @len: oob data write length * @ops: oob ops structure diff --git a/drivers/mtd/nand/raw/nand_bbt.c b/drivers/mtd/nand/raw/nand_bbt.c index 1b722fe9213c..19a2b563acdf 100644 --- a/drivers/mtd/nand/raw/nand_bbt.c +++ b/drivers/mtd/nand/raw/nand_bbt.c @@ -158,7 +158,7 @@ static u32 add_marker_len(struct nand_bbt_descr *td) /** * read_bbt - [GENERIC] Read the bad block table starting from page - * @chip: NAND chip object + * @this: NAND chip object * @buf: temporary buffer * @page: the starting page * @num: the number of bbt descriptors to read diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index 479c2f2cf17f..fa87ae28cdfe 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -304,24 +304,30 @@ static int spinand_write_to_cache_op(struct spinand_device *spinand, struct nand_device *nand = spinand_to_nand(spinand); struct mtd_info *mtd = nanddev_to_mtd(nand); struct nand_page_io_req adjreq = *req; - unsigned int nbytes = 0; - void *buf = NULL; + void *buf = spinand->databuf; + unsigned int nbytes; u16 column = 0; int ret; - memset(spinand->databuf, 0xff, - nanddev_page_size(nand) + - nanddev_per_page_oobsize(nand)); + /* + * Looks like PROGRAM LOAD (AKA write cache) does not necessarily reset + * the cache content to 0xFF (depends on vendor implementation), so we + * must fill the page cache entirely even if we only want to program + * the data portion of the page, otherwise we might corrupt the BBM or + * user data previously programmed in OOB area. + */ + nbytes = nanddev_page_size(nand) + nanddev_per_page_oobsize(nand); + memset(spinand->databuf, 0xff, nbytes); + adjreq.dataoffs = 0; + adjreq.datalen = nanddev_page_size(nand); + adjreq.databuf.out = spinand->databuf; + adjreq.ooblen = nanddev_per_page_oobsize(nand); + adjreq.ooboffs = 0; + adjreq.oobbuf.out = spinand->oobbuf; - if (req->datalen) { + if (req->datalen) memcpy(spinand->databuf + req->dataoffs, req->databuf.out, req->datalen); - adjreq.dataoffs = 0; - adjreq.datalen = nanddev_page_size(nand); - adjreq.databuf.out = spinand->databuf; - nbytes = adjreq.datalen; - buf = spinand->databuf; - } if (req->ooblen) { if (req->mode == MTD_OPS_AUTO_OOB) @@ -332,14 +338,6 @@ static int spinand_write_to_cache_op(struct spinand_device *spinand, else memcpy(spinand->oobbuf + req->ooboffs, req->oobbuf.out, req->ooblen); - - adjreq.ooblen = nanddev_per_page_oobsize(nand); - adjreq.ooboffs = 0; - nbytes += nanddev_per_page_oobsize(nand); - if (!buf) { - buf = spinand->oobbuf; - column = nanddev_page_size(nand); - } } spinand_cache_op_adjust_colum(spinand, &adjreq, &column); @@ -370,8 +368,8 @@ static int spinand_write_to_cache_op(struct spinand_device *spinand, /* * We need to use the RANDOM LOAD CACHE operation if there's - * more than one iteration, because the LOAD operation resets - * the cache to 0xff. + * more than one iteration, because the LOAD operation might + * reset the cache to 0xff. */ if (nbytes) { column = op.addr.val; @@ -1018,11 +1016,11 @@ static int spinand_init(struct spinand_device *spinand) for (i = 0; i < nand->memorg.ntargets; i++) { ret = spinand_select_target(spinand, i); if (ret) - goto err_free_bufs; + goto err_manuf_cleanup; ret = spinand_lock_block(spinand, BL_ALL_UNLOCKED); if (ret) - goto err_free_bufs; + goto err_manuf_cleanup; } ret = nanddev_init(nand, &spinand_ops, THIS_MODULE); diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index edb1c023a753..6210757772ed 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -145,13 +145,16 @@ config MACVTAP To compile this driver as a module, choose M here: the module will be called macvtap. +config IPVLAN_L3S + depends on NETFILTER + depends on IPVLAN + def_bool y + select NET_L3_MASTER_DEV config IPVLAN tristate "IP-VLAN support" depends on INET depends on IPV6 || !IPV6 - depends on NETFILTER - select NET_L3_MASTER_DEV ---help--- This allows one to create virtual devices off of a main interface and packets will be delivered based on the dest L3 (IPv6/IPv4 addr) @@ -197,9 +200,9 @@ config VXLAN config GENEVE tristate "Generic Network Virtualization Encapsulation" - depends on INET && NET_UDP_TUNNEL + depends on INET depends on IPV6 || !IPV6 - select NET_IP_TUNNEL + select NET_UDP_TUNNEL select GRO_CELLS ---help--- This allows one to create geneve virtual interfaces that provide diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c index f90bb723985f..b3c63d2f16aa 100644 --- a/drivers/net/appletalk/cops.c +++ b/drivers/net/appletalk/cops.c @@ -301,7 +301,7 @@ static int __init cops_probe1(struct net_device *dev, int ioaddr) dev->irq = cops_irq(ioaddr, board); if (dev->irq) break; - /* No IRQ found on this port, fallthrough */ + /* fall through - Once no IRQ found on this port. */ case 1: retval = -EINVAL; goto err_out; diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 4d5d01cb8141..da1fc17295d9 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -1375,6 +1375,7 @@ static int bond_option_slaves_set(struct bonding *bond, sscanf(newval->string, "%16s", command); /* IFNAMSIZ*/ ifname = command + 1; if ((strlen(command) <= 1) || + (command[0] != '+' && command[0] != '-') || !dev_valid_name(ifname)) goto err_no_cmd; @@ -1398,6 +1399,7 @@ static int bond_option_slaves_set(struct bonding *bond, break; default: + /* should not run here. */ goto err_no_cmd; } diff --git a/drivers/net/caif/caif_spi.c b/drivers/net/caif/caif_spi.c index d28a1398c091..7608bc3e00df 100644 --- a/drivers/net/caif/caif_spi.c +++ b/drivers/net/caif/caif_spi.c @@ -73,35 +73,37 @@ MODULE_PARM_DESC(spi_down_tail_align, "SPI downlink tail alignment."); #define LOW_WATER_MARK 100 #define HIGH_WATER_MARK (LOW_WATER_MARK*5) -#ifdef CONFIG_UML +#ifndef CONFIG_HAS_DMA /* * We sometimes use UML for debugging, but it cannot handle * dma_alloc_coherent so we have to wrap it. */ -static inline void *dma_alloc(dma_addr_t *daddr) +static inline void *dma_alloc(struct cfspi *cfspi, dma_addr_t *daddr) { return kmalloc(SPI_DMA_BUF_LEN, GFP_KERNEL); } -static inline void dma_free(void *cpu_addr, dma_addr_t handle) +static inline void dma_free(struct cfspi *cfspi, void *cpu_addr, + dma_addr_t handle) { kfree(cpu_addr); } #else -static inline void *dma_alloc(dma_addr_t *daddr) +static inline void *dma_alloc(struct cfspi *cfspi, dma_addr_t *daddr) { - return dma_alloc_coherent(NULL, SPI_DMA_BUF_LEN, daddr, + return dma_alloc_coherent(&cfspi->pdev->dev, SPI_DMA_BUF_LEN, daddr, GFP_KERNEL); } -static inline void dma_free(void *cpu_addr, dma_addr_t handle) +static inline void dma_free(struct cfspi *cfspi, void *cpu_addr, + dma_addr_t handle) { - dma_free_coherent(NULL, SPI_DMA_BUF_LEN, cpu_addr, handle); + dma_free_coherent(&cfspi->pdev->dev, SPI_DMA_BUF_LEN, cpu_addr, handle); } -#endif /* CONFIG_UML */ +#endif /* CONFIG_HAS_DMA */ #ifdef CONFIG_DEBUG_FS @@ -610,13 +612,13 @@ static int cfspi_init(struct net_device *dev) } /* Allocate DMA buffers. */ - cfspi->xfer.va_tx[0] = dma_alloc(&cfspi->xfer.pa_tx[0]); + cfspi->xfer.va_tx[0] = dma_alloc(cfspi, &cfspi->xfer.pa_tx[0]); if (!cfspi->xfer.va_tx[0]) { res = -ENODEV; goto err_dma_alloc_tx_0; } - cfspi->xfer.va_rx = dma_alloc(&cfspi->xfer.pa_rx); + cfspi->xfer.va_rx = dma_alloc(cfspi, &cfspi->xfer.pa_rx); if (!cfspi->xfer.va_rx) { res = -ENODEV; @@ -665,9 +667,9 @@ static int cfspi_init(struct net_device *dev) return 0; err_create_wq: - dma_free(cfspi->xfer.va_rx, cfspi->xfer.pa_rx); + dma_free(cfspi, cfspi->xfer.va_rx, cfspi->xfer.pa_rx); err_dma_alloc_rx: - dma_free(cfspi->xfer.va_tx[0], cfspi->xfer.pa_tx[0]); + dma_free(cfspi, cfspi->xfer.va_tx[0], cfspi->xfer.pa_tx[0]); err_dma_alloc_tx_0: return res; } @@ -683,8 +685,8 @@ static void cfspi_uninit(struct net_device *dev) cfspi->ndev = NULL; /* Free DMA buffers. */ - dma_free(cfspi->xfer.va_rx, cfspi->xfer.pa_rx); - dma_free(cfspi->xfer.va_tx[0], cfspi->xfer.pa_tx[0]); + dma_free(cfspi, cfspi->xfer.va_rx, cfspi->xfer.pa_rx); + dma_free(cfspi, cfspi->xfer.va_tx[0], cfspi->xfer.pa_tx[0]); set_bit(SPI_TERMINATE, &cfspi->state); wake_up_interruptible(&cfspi->wait); destroy_workqueue(cfspi->wq); diff --git a/drivers/net/dsa/b53/b53_srab.c b/drivers/net/dsa/b53/b53_srab.c index 90f514252987..d9c56a779c08 100644 --- a/drivers/net/dsa/b53/b53_srab.c +++ b/drivers/net/dsa/b53/b53_srab.c @@ -511,9 +511,6 @@ static void b53_srab_prepare_irq(struct platform_device *pdev) /* Clear all pending interrupts */ writel(0xffffffff, priv->regs + B53_SRAB_INTR); - if (dev->pdata && dev->pdata->chip_id != BCM58XX_DEVICE_ID) - return; - for (i = 0; i < B53_N_PORTS; i++) { port = &priv->port_intrs[i]; diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 361fbde76654..98696a88fa1c 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -690,7 +690,7 @@ static int bcm_sf2_sw_suspend(struct dsa_switch *ds) * port, the other ones have already been disabled during * bcm_sf2_sw_setup */ - for (port = 0; port < DSA_MAX_PORTS; port++) { + for (port = 0; port < ds->num_ports; port++) { if (dsa_is_user_port(ds, port) || dsa_is_cpu_port(ds, port)) bcm_sf2_port_disable(ds, port, NULL); } @@ -894,12 +894,44 @@ static const struct b53_io_ops bcm_sf2_io_ops = { .write64 = bcm_sf2_core_write64, }; +static void bcm_sf2_sw_get_strings(struct dsa_switch *ds, int port, + u32 stringset, uint8_t *data) +{ + int cnt = b53_get_sset_count(ds, port, stringset); + + b53_get_strings(ds, port, stringset, data); + bcm_sf2_cfp_get_strings(ds, port, stringset, + data + cnt * ETH_GSTRING_LEN); +} + +static void bcm_sf2_sw_get_ethtool_stats(struct dsa_switch *ds, int port, + uint64_t *data) +{ + int cnt = b53_get_sset_count(ds, port, ETH_SS_STATS); + + b53_get_ethtool_stats(ds, port, data); + bcm_sf2_cfp_get_ethtool_stats(ds, port, data + cnt); +} + +static int bcm_sf2_sw_get_sset_count(struct dsa_switch *ds, int port, + int sset) +{ + int cnt = b53_get_sset_count(ds, port, sset); + + if (cnt < 0) + return cnt; + + cnt += bcm_sf2_cfp_get_sset_count(ds, port, sset); + + return cnt; +} + static const struct dsa_switch_ops bcm_sf2_ops = { .get_tag_protocol = b53_get_tag_protocol, .setup = bcm_sf2_sw_setup, - .get_strings = b53_get_strings, - .get_ethtool_stats = b53_get_ethtool_stats, - .get_sset_count = b53_get_sset_count, + .get_strings = bcm_sf2_sw_get_strings, + .get_ethtool_stats = bcm_sf2_sw_get_ethtool_stats, + .get_sset_count = bcm_sf2_sw_get_sset_count, .get_ethtool_phy_stats = b53_get_ethtool_phy_stats, .get_phy_flags = bcm_sf2_sw_get_phy_flags, .phylink_validate = bcm_sf2_sw_validate, @@ -1062,7 +1094,6 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) dev_set_drvdata(&pdev->dev, priv); spin_lock_init(&priv->indir_lock); - mutex_init(&priv->stats_mutex); mutex_init(&priv->cfp.lock); INIT_LIST_HEAD(&priv->cfp.rules_list); diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h index faaef320ec48..eb3655bea467 100644 --- a/drivers/net/dsa/bcm_sf2.h +++ b/drivers/net/dsa/bcm_sf2.h @@ -87,9 +87,6 @@ struct bcm_sf2_priv { /* Backing b53_device */ struct b53_device *dev; - /* Mutex protecting access to the MIB counters */ - struct mutex stats_mutex; - struct bcm_sf2_hw_params hw_params; struct bcm_sf2_port_status port_sts[DSA_MAX_PORTS]; @@ -216,5 +213,10 @@ int bcm_sf2_set_rxnfc(struct dsa_switch *ds, int port, int bcm_sf2_cfp_rst(struct bcm_sf2_priv *priv); void bcm_sf2_cfp_exit(struct dsa_switch *ds); int bcm_sf2_cfp_resume(struct dsa_switch *ds); +void bcm_sf2_cfp_get_strings(struct dsa_switch *ds, int port, + u32 stringset, uint8_t *data); +void bcm_sf2_cfp_get_ethtool_stats(struct dsa_switch *ds, int port, + uint64_t *data); +int bcm_sf2_cfp_get_sset_count(struct dsa_switch *ds, int port, int sset); #endif /* __BCM_SF2_H */ diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index 6d8059dc77b7..0b9ca4bdf47e 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -213,6 +213,7 @@ static inline unsigned int bcm_sf2_cfp_rule_size(struct bcm_sf2_priv *priv) static int bcm_sf2_cfp_act_pol_set(struct bcm_sf2_priv *priv, unsigned int rule_index, + int src_port, unsigned int port_num, unsigned int queue_num, bool fwd_map_change) @@ -230,6 +231,10 @@ static int bcm_sf2_cfp_act_pol_set(struct bcm_sf2_priv *priv, else reg = 0; + /* Enable looping back to the original port */ + if (src_port == port_num) + reg |= LOOP_BK_EN; + core_writel(priv, reg, CORE_ACT_POL_DATA0); /* Set classification ID that needs to be put in Broadcom tag */ @@ -443,7 +448,7 @@ static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port, } /* Insert into Action and policer RAMs now */ - ret = bcm_sf2_cfp_act_pol_set(priv, rule_index, port_num, + ret = bcm_sf2_cfp_act_pol_set(priv, rule_index, port, port_num, queue_num, true); if (ret) goto out_err_flow_rule; @@ -733,7 +738,7 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, } /* Insert into Action and policer RAMs now */ - ret = bcm_sf2_cfp_act_pol_set(priv, rule_index[0], port_num, + ret = bcm_sf2_cfp_act_pol_set(priv, rule_index[0], port, port_num, queue_num, false); if (ret) goto out_err_flow_rule; @@ -795,7 +800,7 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, /* Insert into Action and policer RAMs now, set chain ID to * the one we are chained to */ - ret = bcm_sf2_cfp_act_pol_set(priv, rule_index[1], port_num, + ret = bcm_sf2_cfp_act_pol_set(priv, rule_index[1], port, port_num, queue_num, true); if (ret) goto out_err_flow_rule; @@ -1201,3 +1206,91 @@ int bcm_sf2_cfp_resume(struct dsa_switch *ds) return ret; } + +static const struct bcm_sf2_cfp_stat { + unsigned int offset; + unsigned int ram_loc; + const char *name; +} bcm_sf2_cfp_stats[] = { + { + .offset = CORE_STAT_GREEN_CNTR, + .ram_loc = GREEN_STAT_RAM, + .name = "Green" + }, + { + .offset = CORE_STAT_YELLOW_CNTR, + .ram_loc = YELLOW_STAT_RAM, + .name = "Yellow" + }, + { + .offset = CORE_STAT_RED_CNTR, + .ram_loc = RED_STAT_RAM, + .name = "Red" + }, +}; + +void bcm_sf2_cfp_get_strings(struct dsa_switch *ds, int port, + u32 stringset, uint8_t *data) +{ + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); + unsigned int s = ARRAY_SIZE(bcm_sf2_cfp_stats); + char buf[ETH_GSTRING_LEN]; + unsigned int i, j, iter; + + if (stringset != ETH_SS_STATS) + return; + + for (i = 1; i < priv->num_cfp_rules; i++) { + for (j = 0; j < s; j++) { + snprintf(buf, sizeof(buf), + "CFP%03d_%sCntr", + i, bcm_sf2_cfp_stats[j].name); + iter = (i - 1) * s + j; + strlcpy(data + iter * ETH_GSTRING_LEN, + buf, ETH_GSTRING_LEN); + } + } +} + +void bcm_sf2_cfp_get_ethtool_stats(struct dsa_switch *ds, int port, + uint64_t *data) +{ + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); + unsigned int s = ARRAY_SIZE(bcm_sf2_cfp_stats); + const struct bcm_sf2_cfp_stat *stat; + unsigned int i, j, iter; + struct cfp_rule *rule; + int ret; + + mutex_lock(&priv->cfp.lock); + for (i = 1; i < priv->num_cfp_rules; i++) { + rule = bcm_sf2_cfp_rule_find(priv, port, i); + if (!rule) + continue; + + for (j = 0; j < s; j++) { + stat = &bcm_sf2_cfp_stats[j]; + + bcm_sf2_cfp_rule_addr_set(priv, i); + ret = bcm_sf2_cfp_op(priv, stat->ram_loc | OP_SEL_READ); + if (ret) + continue; + + iter = (i - 1) * s + j; + data[iter] = core_readl(priv, stat->offset); + } + + } + mutex_unlock(&priv->cfp.lock); +} + +int bcm_sf2_cfp_get_sset_count(struct dsa_switch *ds, int port, int sset) +{ + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); + + if (sset != ETH_SS_STATS) + return 0; + + /* 3 counters per CFP rules */ + return (priv->num_cfp_rules - 1) * ARRAY_SIZE(bcm_sf2_cfp_stats); +} diff --git a/drivers/net/dsa/bcm_sf2_regs.h b/drivers/net/dsa/bcm_sf2_regs.h index 0a1e530d52b7..67f056206f37 100644 --- a/drivers/net/dsa/bcm_sf2_regs.h +++ b/drivers/net/dsa/bcm_sf2_regs.h @@ -400,6 +400,10 @@ enum bcm_sf2_reg_offs { #define CORE_RATE_METER6 0x281e0 #define CIR_REF_CNT_MASK 0x7ffff +#define CORE_STAT_GREEN_CNTR 0x28200 +#define CORE_STAT_YELLOW_CNTR 0x28210 +#define CORE_STAT_RED_CNTR 0x28220 + #define CORE_CFP_CTL_REG 0x28400 #define CFP_EN_MAP_MASK 0x1ff diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 89ed059bb576..674d77e1b029 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -397,6 +397,7 @@ static void ksz9477_port_stp_state_set(struct dsa_switch *ds, int port, struct ksz_port *p = &dev->ports[port]; u8 data; int member = -1; + int forward = dev->member; ksz_pread8(dev, port, P_STP_CTRL, &data); data &= ~(PORT_TX_ENABLE | PORT_RX_ENABLE | PORT_LEARN_DISABLE); @@ -464,10 +465,10 @@ static void ksz9477_port_stp_state_set(struct dsa_switch *ds, int port, } /* When topology has changed the function ksz_update_port_member - * should be called to modify port forwarding behavior. However - * as the offload_fwd_mark indication cannot be reported here - * the switch forwarding function is not enabled. + * should be called to modify port forwarding behavior. */ + if (forward != dev->member) + ksz_update_port_member(dev, port); } static void ksz9477_flush_dyn_mac_table(struct ksz_device *dev, int port) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 669a7f13fdce..32e7af5caa69 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -261,6 +261,7 @@ static irqreturn_t mv88e6xxx_g1_irq_thread_work(struct mv88e6xxx_chip *chip) unsigned int sub_irq; unsigned int n; u16 reg; + u16 ctl1; int err; mutex_lock(&chip->reg_lock); @@ -270,13 +271,28 @@ static irqreturn_t mv88e6xxx_g1_irq_thread_work(struct mv88e6xxx_chip *chip) if (err) goto out; - for (n = 0; n < chip->g1_irq.nirqs; ++n) { - if (reg & (1 << n)) { - sub_irq = irq_find_mapping(chip->g1_irq.domain, n); - handle_nested_irq(sub_irq); - ++nhandled; + do { + for (n = 0; n < chip->g1_irq.nirqs; ++n) { + if (reg & (1 << n)) { + sub_irq = irq_find_mapping(chip->g1_irq.domain, + n); + handle_nested_irq(sub_irq); + ++nhandled; + } } - } + + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_CTL1, &ctl1); + if (err) + goto unlock; + err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_STS, ®); +unlock: + mutex_unlock(&chip->reg_lock); + if (err) + goto out; + ctl1 &= GENMASK(chip->g1_irq.nirqs, 0); + } while (reg & ctl1); + out: return (nhandled > 0 ? IRQ_HANDLED : IRQ_NONE); } @@ -647,8 +663,10 @@ static void mv88e6390_phylink_validate(struct mv88e6xxx_chip *chip, int port, unsigned long *mask, struct phylink_link_state *state) { - if (port >= 9) + if (port >= 9) { phylink_set(mask, 2500baseX_Full); + phylink_set(mask, 2500baseT_Full); + } /* No ethtool bits for 200Mbps */ phylink_set(mask, 1000baseT_Full); diff --git a/drivers/net/dsa/mv88e6xxx/global1_atu.c b/drivers/net/dsa/mv88e6xxx/global1_atu.c index 5200e4bdce93..ea243840ee0f 100644 --- a/drivers/net/dsa/mv88e6xxx/global1_atu.c +++ b/drivers/net/dsa/mv88e6xxx/global1_atu.c @@ -314,6 +314,7 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id) { struct mv88e6xxx_chip *chip = dev_id; struct mv88e6xxx_atu_entry entry; + int spid; int err; u16 val; @@ -336,6 +337,8 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id) if (err) goto out; + spid = entry.state; + if (val & MV88E6XXX_G1_ATU_OP_AGE_OUT_VIOLATION) { dev_err_ratelimited(chip->dev, "ATU age out violation for %pM\n", @@ -344,23 +347,23 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id) if (val & MV88E6XXX_G1_ATU_OP_MEMBER_VIOLATION) { dev_err_ratelimited(chip->dev, - "ATU member violation for %pM portvec %x\n", - entry.mac, entry.portvec); - chip->ports[entry.portvec].atu_member_violation++; + "ATU member violation for %pM portvec %x spid %d\n", + entry.mac, entry.portvec, spid); + chip->ports[spid].atu_member_violation++; } if (val & MV88E6XXX_G1_ATU_OP_MISS_VIOLATION) { dev_err_ratelimited(chip->dev, - "ATU miss violation for %pM portvec %x\n", - entry.mac, entry.portvec); - chip->ports[entry.portvec].atu_miss_violation++; + "ATU miss violation for %pM portvec %x spid %d\n", + entry.mac, entry.portvec, spid); + chip->ports[spid].atu_miss_violation++; } if (val & MV88E6XXX_G1_ATU_OP_FULL_VIOLATION) { dev_err_ratelimited(chip->dev, - "ATU full violation for %pM portvec %x\n", - entry.mac, entry.portvec); - chip->ports[entry.portvec].atu_full_violation++; + "ATU full violation for %pM portvec %x spid %d\n", + entry.mac, entry.portvec, spid); + chip->ports[spid].atu_full_violation++; } mutex_unlock(&chip->reg_lock); diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c index b648e3f95c01..808abb6b3671 100644 --- a/drivers/net/ethernet/3com/3c515.c +++ b/drivers/net/ethernet/3com/3c515.c @@ -1177,7 +1177,7 @@ static irqreturn_t corkscrew_interrupt(int irq, void *dev_id) if (inl(ioaddr + DownListPtr) == isa_virt_to_bus(&lp->tx_ring[entry])) break; /* It still hasn't been processed. */ if (lp->tx_skbuff[entry]) { - dev_kfree_skb_irq(lp->tx_skbuff[entry]); + dev_consume_skb_irq(lp->tx_skbuff[entry]); lp->tx_skbuff[entry] = NULL; } dirty_tx++; @@ -1192,7 +1192,7 @@ static irqreturn_t corkscrew_interrupt(int irq, void *dev_id) #ifdef VORTEX_BUS_MASTER if (status & DMADone) { outw(0x1000, ioaddr + Wn7_MasterStatus); /* Ack the event. */ - dev_kfree_skb_irq(lp->tx_skb); /* Release the transferred buffer */ + dev_consume_skb_irq(lp->tx_skb); /* Release the transferred buffer */ netif_wake_queue(dev); } #endif diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index 40f421dbdf57..147051404194 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -2307,7 +2307,7 @@ _vortex_interrupt(int irq, struct net_device *dev) dma_unmap_single(vp->gendev, vp->tx_skb_dma, (vp->tx_skb->len + 3) & ~3, DMA_TO_DEVICE); pkts_compl++; bytes_compl += vp->tx_skb->len; - dev_kfree_skb_irq(vp->tx_skb); /* Release the transferred buffer */ + dev_consume_skb_irq(vp->tx_skb); /* Release the transferred buffer */ if (ioread16(ioaddr + TxFree) > 1536) { /* * AKPM: FIXME: I don't think we need this. If the queue was stopped due to @@ -2449,7 +2449,7 @@ _boomerang_interrupt(int irq, struct net_device *dev) #endif pkts_compl++; bytes_compl += skb->len; - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); vp->tx_skbuff[entry] = NULL; } else { pr_debug("boomerang_interrupt: no skb!\n"); diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c index 097467f44b0d..816540e6beac 100644 --- a/drivers/net/ethernet/adaptec/starfire.c +++ b/drivers/net/ethernet/adaptec/starfire.c @@ -1390,7 +1390,7 @@ static irqreturn_t intr_handler(int irq, void *dev_instance) } } - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); } np->tx_done_q[np->tx_done].status = 0; np->tx_done = (np->tx_done + 1) % DONE_Q_SIZE; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index a70bb1bb90e7..a6eacf2099c3 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2663,11 +2663,6 @@ static int ena_restore_device(struct ena_adapter *adapter) goto err_device_destroy; } - clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags); - /* Make sure we don't have a race with AENQ Links state handler */ - if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags)) - netif_carrier_on(adapter->netdev); - rc = ena_enable_msix_and_set_admin_interrupts(adapter, adapter->num_queues); if (rc) { @@ -2684,6 +2679,11 @@ static int ena_restore_device(struct ena_adapter *adapter) } set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); + + clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags); + if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags)) + netif_carrier_on(adapter->netdev); + mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); dev_err(&pdev->dev, "Device reset completed successfully, Driver info: %s\n", diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index dc8b6173d8d8..63870072cbbd 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -45,7 +45,7 @@ #define DRV_MODULE_VER_MAJOR 2 #define DRV_MODULE_VER_MINOR 0 -#define DRV_MODULE_VER_SUBMINOR 2 +#define DRV_MODULE_VER_SUBMINOR 3 #define DRV_MODULE_NAME "ena" #ifndef DRV_MODULE_VERSION diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index e833d1b3fe18..e5073aeea06a 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -1167,7 +1167,7 @@ static int au1000_probe(struct platform_device *pdev) /* Allocate the data buffers * Snooping works fine with eth on all au1xxx */ - aup->vaddr = (u32)dma_alloc_attrs(NULL, MAX_BUF_SIZE * + aup->vaddr = (u32)dma_alloc_attrs(&pdev->dev, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS), &aup->dma_addr, 0, DMA_ATTR_NON_CONSISTENT); @@ -1349,7 +1349,7 @@ err_remap3: err_remap2: iounmap(aup->mac); err_remap1: - dma_free_attrs(NULL, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS), + dma_free_attrs(&pdev->dev, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS), (void *)aup->vaddr, aup->dma_addr, DMA_ATTR_NON_CONSISTENT); err_vaddr: @@ -1383,7 +1383,7 @@ static int au1000_remove(struct platform_device *pdev) if (aup->tx_db_inuse[i]) au1000_ReleaseDB(aup, aup->tx_db_inuse[i]); - dma_free_attrs(NULL, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS), + dma_free_attrs(&pdev->dev, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS), (void *)aup->vaddr, aup->dma_addr, DMA_ATTR_NON_CONSISTENT); diff --git a/drivers/net/ethernet/amd/lance.c b/drivers/net/ethernet/amd/lance.c index b56d84c7df46..f90b454b1642 100644 --- a/drivers/net/ethernet/amd/lance.c +++ b/drivers/net/ethernet/amd/lance.c @@ -1084,7 +1084,7 @@ static irqreturn_t lance_interrupt(int irq, void *dev_id) /* We must free the original skb if it's not a data-only copy in the bounce buffer. */ if (lp->tx_skbuff[entry]) { - dev_kfree_skb_irq(lp->tx_skbuff[entry]); + dev_consume_skb_irq(lp->tx_skbuff[entry]); lp->tx_skbuff[entry] = NULL; } dirty_tx++; diff --git a/drivers/net/ethernet/amd/ni65.c b/drivers/net/ethernet/amd/ni65.c index 8931ce6bab7b..87ff5d6d1b22 100644 --- a/drivers/net/ethernet/amd/ni65.c +++ b/drivers/net/ethernet/amd/ni65.c @@ -1028,7 +1028,7 @@ static void ni65_xmit_intr(struct net_device *dev,int csr0) #ifdef XMT_VIA_SKB if(p->tmd_skb[p->tmdlast]) { - dev_kfree_skb_irq(p->tmd_skb[p->tmdlast]); + dev_consume_skb_irq(p->tmd_skb[p->tmdlast]); p->tmd_skb[p->tmdlast] = NULL; } #endif diff --git a/drivers/net/ethernet/apple/mace.c b/drivers/net/ethernet/apple/mace.c index 68b9ee489489..4d9819d2894d 100644 --- a/drivers/net/ethernet/apple/mace.c +++ b/drivers/net/ethernet/apple/mace.c @@ -764,7 +764,7 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id) dev->stats.tx_bytes += mp->tx_bufs[i]->len; ++dev->stats.tx_packets; } - dev_kfree_skb_irq(mp->tx_bufs[i]); + dev_consume_skb_irq(mp->tx_bufs[i]); --mp->tx_active; if (++i >= N_TX_RING) i = 0; diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index 4406325fdd9f..ff3d68532f5f 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -148,7 +148,7 @@ static void arc_emac_tx_clean(struct net_device *ndev) dma_unmap_len(tx_buff, len), DMA_TO_DEVICE); /* return the sk_buff to system */ - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); txbd->data = 0; txbd->info = 0; diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 3164aad29bcf..9dfe6a9431e6 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -1259,7 +1259,7 @@ static bool atl1e_clean_tx_irq(struct atl1e_adapter *adapter) } if (tx_buffer->skb) { - dev_kfree_skb_irq(tx_buffer->skb); + dev_consume_skb_irq(tx_buffer->skb); tx_buffer->skb = NULL; } diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c index 63edc5706c09..9e07b469066a 100644 --- a/drivers/net/ethernet/atheros/atlx/atl1.c +++ b/drivers/net/ethernet/atheros/atlx/atl1.c @@ -2088,7 +2088,7 @@ static int atl1_intr_tx(struct atl1_adapter *adapter) } if (buffer_info->skb) { - dev_kfree_skb_irq(buffer_info->skb); + dev_consume_skb_irq(buffer_info->skb); buffer_info->skb = NULL; } diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index f9521d0274b7..28c9b0bdf2f6 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -520,7 +520,6 @@ static void bcm_sysport_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct bcm_sysport_priv *priv = netdev_priv(dev); - u32 reg; wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER; wol->wolopts = priv->wolopts; @@ -528,11 +527,7 @@ static void bcm_sysport_get_wol(struct net_device *dev, if (!(priv->wolopts & WAKE_MAGICSECURE)) return; - /* Return the programmed SecureOn password */ - reg = umac_readl(priv, UMAC_PSW_MS); - put_unaligned_be16(reg, &wol->sopass[0]); - reg = umac_readl(priv, UMAC_PSW_LS); - put_unaligned_be32(reg, &wol->sopass[2]); + memcpy(wol->sopass, priv->sopass, sizeof(priv->sopass)); } static int bcm_sysport_set_wol(struct net_device *dev, @@ -548,13 +543,8 @@ static int bcm_sysport_set_wol(struct net_device *dev, if (wol->wolopts & ~supported) return -EINVAL; - /* Program the SecureOn password */ - if (wol->wolopts & WAKE_MAGICSECURE) { - umac_writel(priv, get_unaligned_be16(&wol->sopass[0]), - UMAC_PSW_MS); - umac_writel(priv, get_unaligned_be32(&wol->sopass[2]), - UMAC_PSW_LS); - } + if (wol->wolopts & WAKE_MAGICSECURE) + memcpy(priv->sopass, wol->sopass, sizeof(priv->sopass)); /* Flag the device and relevant IRQ as wakeup capable */ if (wol->wolopts) { @@ -2649,13 +2639,18 @@ static int bcm_sysport_suspend_to_wol(struct bcm_sysport_priv *priv) unsigned int index, i = 0; u32 reg; - /* Password has already been programmed */ reg = umac_readl(priv, UMAC_MPD_CTRL); if (priv->wolopts & (WAKE_MAGIC | WAKE_MAGICSECURE)) reg |= MPD_EN; reg &= ~PSW_EN; - if (priv->wolopts & WAKE_MAGICSECURE) + if (priv->wolopts & WAKE_MAGICSECURE) { + /* Program the SecureOn password */ + umac_writel(priv, get_unaligned_be16(&priv->sopass[0]), + UMAC_PSW_MS); + umac_writel(priv, get_unaligned_be32(&priv->sopass[2]), + UMAC_PSW_LS); reg |= PSW_EN; + } umac_writel(priv, reg, UMAC_MPD_CTRL); if (priv->wolopts & WAKE_FILTER) { diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index 0887e6356649..0b192fea9c5d 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -12,6 +12,7 @@ #define __BCM_SYSPORT_H #include <linux/bitmap.h> +#include <linux/ethtool.h> #include <linux/if_vlan.h> #include <linux/net_dim.h> @@ -778,6 +779,7 @@ struct bcm_sysport_priv { unsigned int crc_fwd:1; u16 rev; u32 wolopts; + u8 sopass[SOPASS_MAX]; unsigned int wol_irq_disabled:1; /* MIB related fields */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c index 8e0a317b31f7..a9bdc21873d3 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c @@ -1654,13 +1654,9 @@ static int bnx2x_vf_mbx_macvlan_list(struct bnx2x *bp, { int i, j; struct bnx2x_vf_mac_vlan_filters *fl = NULL; - size_t fsz; - fsz = tlv->n_mac_vlan_filters * - sizeof(struct bnx2x_vf_mac_vlan_filter) + - sizeof(struct bnx2x_vf_mac_vlan_filters); - - fl = kzalloc(fsz, GFP_KERNEL); + fl = kzalloc(struct_size(fl, filters, tlv->n_mac_vlan_filters), + GFP_KERNEL); if (!fl) return -ENOMEM; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 1c2987c3d708..92d73453a318 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4973,12 +4973,18 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp) struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; struct bnxt_ring_struct *ring = &cpr->cp_ring_struct; u32 map_idx = ring->map_idx; + unsigned int vector; + vector = bp->irq_tbl[map_idx].vector; + disable_irq_nosync(vector); rc = hwrm_ring_alloc_send_msg(bp, ring, type, map_idx); - if (rc) + if (rc) { + enable_irq(vector); goto err_out; + } bnxt_set_db(bp, &cpr->cp_db, type, map_idx, ring->fw_ring_id); bnxt_db_nq(bp, &cpr->cp_db, cpr->cp_raw_cons); + enable_irq(vector); bp->grp_info[i].cp_fw_ring_id = ring->fw_ring_id; if (!i) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index a6abfa4fb168..e1feb97bcd81 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -37,8 +37,6 @@ static const struct bnxt_dl_nvm_param nvm_params[] = { NVM_OFF_MSIX_VEC_PER_PF_MIN, BNXT_NVM_SHARED_CFG, 7}, {BNXT_DEVLINK_PARAM_ID_GRE_VER_CHECK, NVM_OFF_DIS_GRE_VER_CHECK, BNXT_NVM_SHARED_CFG, 1}, - - {DEVLINK_PARAM_GENERIC_ID_WOL, NVM_OFF_WOL, BNXT_NVM_PORT_CFG, 1}, }; static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg, @@ -72,8 +70,7 @@ static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg, bytesize = roundup(nvm_param.num_bits, BITS_PER_BYTE) / BITS_PER_BYTE; switch (bytesize) { case 1: - if (nvm_param.num_bits == 1 && - nvm_param.id != DEVLINK_PARAM_GENERIC_ID_WOL) + if (nvm_param.num_bits == 1) buf = &val->vbool; else buf = &val->vu8; @@ -167,17 +164,6 @@ static int bnxt_dl_msix_validate(struct devlink *dl, u32 id, return 0; } -static int bnxt_dl_wol_validate(struct devlink *dl, u32 id, - union devlink_param_value val, - struct netlink_ext_ack *extack) -{ - if (val.vu8 && val.vu8 != DEVLINK_PARAM_WAKE_MAGIC) { - NL_SET_ERR_MSG_MOD(extack, "WOL type is not supported"); - return -EINVAL; - } - return 0; -} - static const struct devlink_param bnxt_dl_params[] = { DEVLINK_PARAM_GENERIC(ENABLE_SRIOV, BIT(DEVLINK_PARAM_CMODE_PERMANENT), @@ -203,9 +189,6 @@ static const struct devlink_param bnxt_dl_params[] = { }; static const struct devlink_param bnxt_dl_port_params[] = { - DEVLINK_PARAM_GENERIC(WOL, BIT(DEVLINK_PARAM_CMODE_PERMANENT), - bnxt_dl_nvm_param_get, bnxt_dl_nvm_param_set, - bnxt_dl_wol_validate), }; int bnxt_dl_register(struct bnxt *bp) @@ -258,6 +241,9 @@ int bnxt_dl_register(struct bnxt *bp) netdev_err(bp->dev, "devlink_port_params_register failed"); goto err_dl_port_unreg; } + + devlink_params_publish(dl); + return 0; err_dl_port_unreg: diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h index da065ca84cc7..5b6b2c7d97cf 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h @@ -35,7 +35,6 @@ static inline void bnxt_link_bp_to_dl(struct bnxt *bp, struct devlink *dl) #define NVM_OFF_MSIX_VEC_PER_PF_MAX 108 #define NVM_OFF_MSIX_VEC_PER_PF_MIN 114 -#define NVM_OFF_WOL 152 #define NVM_OFF_IGNORE_ARI 164 #define NVM_OFF_DIS_GRE_VER_CHECK 171 #define NVM_OFF_ENABLE_SRIOV 401 diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c index 5db9f4158e62..134ae2862efa 100644 --- a/drivers/net/ethernet/broadcom/sb1250-mac.c +++ b/drivers/net/ethernet/broadcom/sb1250-mac.c @@ -1288,7 +1288,7 @@ static void sbdma_tx_process(struct sbmac_softc *sc, struct sbmacdma *d, * for transmits, we just free buffers. */ - dev_kfree_skb_irq(sb); + dev_consume_skb_irq(sb); /* * .. and advance to the next buffer. diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 2b2882615e8b..f2915f2fe21a 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1734,7 +1734,7 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev) if (!nskb) return -ENOMEM; - dev_kfree_skb_any(*skb); + dev_consume_skb_any(*skb); *skb = nskb; } @@ -3673,9 +3673,9 @@ static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb, /* Store packet information (to free when Tx completed) */ lp->skb = skb; lp->skb_length = skb->len; - lp->skb_physaddr = dma_map_single(NULL, skb->data, skb->len, - DMA_TO_DEVICE); - if (dma_mapping_error(NULL, lp->skb_physaddr)) { + lp->skb_physaddr = dma_map_single(&lp->pdev->dev, skb->data, + skb->len, DMA_TO_DEVICE); + if (dma_mapping_error(&lp->pdev->dev, lp->skb_physaddr)) { dev_kfree_skb_any(skb); dev->stats.tx_dropped++; netdev_err(dev, "%s: DMA mapping error\n", __func__); @@ -3763,9 +3763,9 @@ static irqreturn_t at91ether_interrupt(int irq, void *dev_id) dev->stats.tx_errors++; if (lp->skb) { - dev_kfree_skb_irq(lp->skb); + dev_consume_skb_irq(lp->skb); lp->skb = NULL; - dma_unmap_single(NULL, lp->skb_physaddr, + dma_unmap_single(&lp->pdev->dev, lp->skb_physaddr, lp->skb_length, DMA_TO_DEVICE); dev->stats.tx_packets++; dev->stats.tx_bytes += lp->skb_length; @@ -3943,6 +3943,7 @@ static const struct of_device_id macb_dt_ids[] = { { .compatible = "cdns,np4-macb", .data = &np4_config }, { .compatible = "cdns,pc302-gem", .data = &pc302gem_config }, { .compatible = "cdns,gem", .data = &pc302gem_config }, + { .compatible = "cdns,sam9x60-macb", .data = &at91sam9260_config }, { .compatible = "atmel,sama5d2-gem", .data = &sama5d2_config }, { .compatible = "atmel,sama5d3-gem", .data = &sama5d3_config }, { .compatible = "atmel,sama5d3-macb", .data = &sama5d3macb_config }, diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig index 5f03199a3acf..05f4a3b21e29 100644 --- a/drivers/net/ethernet/cavium/Kconfig +++ b/drivers/net/ethernet/cavium/Kconfig @@ -54,7 +54,6 @@ config CAVIUM_PTP tristate "Cavium PTP coprocessor as PTP clock" depends on 64BIT && PCI imply PTP_1588_CLOCK - default y ---help--- This driver adds support for the Precision Time Protocol Clocks and Timestamping coprocessor (PTP) found on Cavium processors. diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 568715a13b5c..b7b0eb104430 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -617,6 +617,7 @@ enum { /* adapter flags */ FW_OFLD_CONN = (1 << 9), ROOT_NO_RELAXED_ORDERING = (1 << 10), SHUTTING_DOWN = (1 << 11), + SGE_DBQ_TIMER = (1 << 12), }; enum { @@ -756,6 +757,8 @@ struct sge_eth_txq { /* state for an SGE Ethernet Tx queue */ #ifdef CONFIG_CHELSIO_T4_DCB u8 dcb_prio; /* DCB Priority bound to queue */ #endif + u8 dbqt; /* SGE Doorbell Queue Timer in use */ + unsigned int dbqtimerix; /* SGE Doorbell Queue Timer Index */ unsigned long tso; /* # of TSO requests */ unsigned long tx_cso; /* # of Tx checksum offloads */ unsigned long vlan_ins; /* # of Tx VLAN insertions */ @@ -816,6 +819,8 @@ struct sge { u16 nqs_per_uld; /* # of Rx queues per ULD */ u16 timer_val[SGE_NTIMERS]; u8 counter_val[SGE_NCOUNTERS]; + u16 dbqtimer_tick; + u16 dbqtimer_val[SGE_NDBQTIMERS]; u32 fl_pg_order; /* large page allocation size */ u32 stat_len; /* length of status page at ring end */ u32 pktshift; /* padding between CPL & packet data */ @@ -1402,7 +1407,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, rspq_flush_handler_t flush_handler, int cong); int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, struct net_device *dev, struct netdev_queue *netdevq, - unsigned int iqid); + unsigned int iqid, u8 dbqt); int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, struct net_device *dev, unsigned int iqid, unsigned int cmplqid); @@ -1415,6 +1420,8 @@ irqreturn_t t4_sge_intr_msix(int irq, void *cookie); int t4_sge_init(struct adapter *adap); void t4_sge_start(struct adapter *adap); void t4_sge_stop(struct adapter *adap); +int t4_sge_eth_txq_egress_update(struct adapter *adap, struct sge_eth_txq *q, + int maxreclaim); void cxgb4_set_ethtool_ops(struct net_device *netdev); int cxgb4_write_rss(const struct port_info *pi, const u16 *queues); enum cpl_tx_tnl_lso_type cxgb_encap_offload_supported(struct sk_buff *skb); @@ -1821,6 +1828,8 @@ int t4_ctrl_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf, int t4_ofld_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf, unsigned int vf, unsigned int eqid); int t4_sge_ctxt_flush(struct adapter *adap, unsigned int mbox, int ctxt_type); +int t4_read_sge_dbqtimers(struct adapter *adap, unsigned int ndbqtimers, + u16 *dbqtimers); void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl); int t4_update_port_info(struct port_info *pi); int t4_get_link_params(struct port_info *pi, unsigned int *link_okp, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index 796043544fc3..65b8dc706c1d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -932,11 +932,190 @@ static int get_adaptive_rx_setting(struct net_device *dev) return q->rspq.adaptive_rx; } -static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c) +/* Return the current global Adapter SGE Doorbell Queue Timer Tick for all + * Ethernet TX Queues. + */ +static int get_dbqtimer_tick(struct net_device *dev) +{ + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; + + if (!(adap->flags & SGE_DBQ_TIMER)) + return 0; + + return adap->sge.dbqtimer_tick; +} + +/* Return the SGE Doorbell Queue Timer Value for the Ethernet TX Queues + * associated with a Network Device. + */ +static int get_dbqtimer(struct net_device *dev) +{ + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; + struct sge_eth_txq *txq; + + txq = &adap->sge.ethtxq[pi->first_qset]; + + if (!(adap->flags & SGE_DBQ_TIMER)) + return 0; + + /* all of the TX Queues use the same Timer Index */ + return adap->sge.dbqtimer_val[txq->dbqtimerix]; +} + +/* Set the global Adapter SGE Doorbell Queue Timer Tick for all Ethernet TX + * Queues. This is the fundamental "Tick" that sets the scale of values which + * can be used. Individual Ethernet TX Queues index into a relatively small + * array of Tick Multipliers. Changing the base Tick will thus change all of + * the resulting Timer Values associated with those multipliers for all + * Ethernet TX Queues. + */ +static int set_dbqtimer_tick(struct net_device *dev, int usecs) +{ + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; + struct sge *s = &adap->sge; + u32 param, val; + int ret; + + if (!(adap->flags & SGE_DBQ_TIMER)) + return 0; + + /* return early if it's the same Timer Tick we're already using */ + if (s->dbqtimer_tick == usecs) + return 0; + + /* attempt to set the new Timer Tick value */ + param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_DBQ_TIMERTICK)); + val = usecs; + ret = t4_set_params(adap, adap->mbox, adap->pf, 0, 1, ¶m, &val); + if (ret) + return ret; + s->dbqtimer_tick = usecs; + + /* if successful, reread resulting dependent Timer values */ + ret = t4_read_sge_dbqtimers(adap, ARRAY_SIZE(s->dbqtimer_val), + s->dbqtimer_val); + return ret; +} + +/* Set the SGE Doorbell Queue Timer Value for the Ethernet TX Queues + * associated with a Network Device. There is a relatively small array of + * possible Timer Values so we need to pick the closest value available. + */ +static int set_dbqtimer(struct net_device *dev, int usecs) +{ + int qix, timerix, min_timerix, delta, min_delta; + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; + struct sge *s = &adap->sge; + struct sge_eth_txq *txq; + u32 param, val; + int ret; + + if (!(adap->flags & SGE_DBQ_TIMER)) + return 0; + + /* Find the SGE Doorbell Timer Value that's closest to the requested + * value. + */ + min_delta = INT_MAX; + min_timerix = 0; + for (timerix = 0; timerix < ARRAY_SIZE(s->dbqtimer_val); timerix++) { + delta = s->dbqtimer_val[timerix] - usecs; + if (delta < 0) + delta = -delta; + if (delta < min_delta) { + min_delta = delta; + min_timerix = timerix; + } + } + + /* Return early if it's the same Timer Index we're already using. + * We use the same Timer Index for all of the TX Queues for an + * interface so it's only necessary to check the first one. + */ + txq = &s->ethtxq[pi->first_qset]; + if (txq->dbqtimerix == min_timerix) + return 0; + + for (qix = 0; qix < pi->nqsets; qix++, txq++) { + if (adap->flags & FULL_INIT_DONE) { + param = + (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) | + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DMAQ_EQ_TIMERIX) | + FW_PARAMS_PARAM_YZ_V(txq->q.cntxt_id)); + val = min_timerix; + ret = t4_set_params(adap, adap->mbox, adap->pf, 0, + 1, ¶m, &val); + if (ret) + return ret; + } + txq->dbqtimerix = min_timerix; + } + return 0; +} + +/* Set the global Adapter SGE Doorbell Queue Timer Tick for all Ethernet TX + * Queues and the Timer Value for the Ethernet TX Queues associated with a + * Network Device. Since changing the global Tick changes all of the + * available Timer Values, we need to do this first before selecting the + * resulting closest Timer Value. Moreover, since the Tick is global, + * changing it affects the Timer Values for all Network Devices on the + * adapter. So, before changing the Tick, we grab all of the current Timer + * Values for other Network Devices on this Adapter and then attempt to select + * new Timer Values which are close to the old values ... + */ +static int set_dbqtimer_tickval(struct net_device *dev, + int tick_usecs, int timer_usecs) +{ + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; + int timer[MAX_NPORTS]; + unsigned int port; + int ret; + + /* Grab the other adapter Network Interface current timers and fill in + * the new one for this Network Interface. + */ + for_each_port(adap, port) + if (port == pi->port_id) + timer[port] = timer_usecs; + else + timer[port] = get_dbqtimer(adap->port[port]); + + /* Change the global Tick first ... */ + ret = set_dbqtimer_tick(dev, tick_usecs); + if (ret) + return ret; + + /* ... and then set all of the Network Interface Timer Values ... */ + for_each_port(adap, port) { + ret = set_dbqtimer(adap->port[port], timer[port]); + if (ret) + return ret; + } + + return 0; +} + +static int set_coalesce(struct net_device *dev, + struct ethtool_coalesce *coalesce) { - set_adaptive_rx_setting(dev, c->use_adaptive_rx_coalesce); - return set_rx_intr_params(dev, c->rx_coalesce_usecs, - c->rx_max_coalesced_frames); + int ret; + + set_adaptive_rx_setting(dev, coalesce->use_adaptive_rx_coalesce); + + ret = set_rx_intr_params(dev, coalesce->rx_coalesce_usecs, + coalesce->rx_max_coalesced_frames); + if (ret) + return ret; + + return set_dbqtimer_tickval(dev, + coalesce->tx_coalesce_usecs_irq, + coalesce->tx_coalesce_usecs); } static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c) @@ -949,6 +1128,8 @@ static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c) c->rx_max_coalesced_frames = (rq->intr_params & QINTR_CNT_EN_F) ? adap->sge.counter_val[rq->pktcnt_idx] : 0; c->use_adaptive_rx_coalesce = get_adaptive_rx_setting(dev); + c->tx_coalesce_usecs_irq = get_dbqtimer_tick(dev); + c->tx_coalesce_usecs = get_dbqtimer(dev); return 0; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index adf75d16e6d3..bcbac247a73d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -575,7 +575,7 @@ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp, struct sge_eth_txq *eq; eq = container_of(txq, struct sge_eth_txq, q); - netif_tx_wake_queue(eq->txq); + t4_sge_eth_txq_egress_update(q->adap, eq, -1); } else { struct sge_uld_txq *oq; @@ -933,10 +933,13 @@ static int setup_sge_queues(struct adapter *adap) q->rspq.idx = j; memset(&q->stats, 0, sizeof(q->stats)); } - for (j = 0; j < pi->nqsets; j++, t++) { + + q = &s->ethrxq[pi->first_qset]; + for (j = 0; j < pi->nqsets; j++, t++, q++) { err = t4_sge_alloc_eth_txq(adap, t, dev, netdev_get_tx_queue(dev, j), - s->fw_evtq.cntxt_id); + q->rspq.cntxt_id, + !!(adap->flags & SGE_DBQ_TIMER)); if (err) goto freeout; } @@ -958,7 +961,7 @@ static int setup_sge_queues(struct adapter *adap) if (!is_t4(adap->params.chip)) { err = t4_sge_alloc_eth_txq(adap, &s->ptptxq, adap->port[0], netdev_get_tx_queue(adap->port[0], 0) - , s->fw_evtq.cntxt_id); + , s->fw_evtq.cntxt_id, false); if (err) goto freeout; } @@ -4325,6 +4328,24 @@ static int adap_init0(struct adapter *adap) if (ret < 0) goto bye; + /* Grab the SGE Doorbell Queue Timer values. If successful, that + * indicates that the Firmware and Hardware support this. + */ + params[0] = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_DBQ_TIMERTICK)); + ret = t4_query_params(adap, adap->mbox, adap->pf, 0, + 1, params, val); + + if (!ret) { + adap->sge.dbqtimer_tick = val[0]; + ret = t4_read_sge_dbqtimers(adap, + ARRAY_SIZE(adap->sge.dbqtimer_val), + adap->sge.dbqtimer_val); + } + + if (!ret) + adap->flags |= SGE_DBQ_TIMER; + if (is_bypass_device(adap->pdev->device)) adap->params.bypass = 1; diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index fc0bc6458e84..f18493fb32aa 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -80,9 +80,10 @@ * Max number of Tx descriptors we clean up at a time. Should be modest as * freeing skbs isn't cheap and it happens while holding locks. We just need * to free packets faster than they arrive, we eventually catch up and keep - * the amortized cost reasonable. Must be >= 2 * TXQ_STOP_THRES. + * the amortized cost reasonable. Must be >= 2 * TXQ_STOP_THRES. It should + * also match the CIDX Flush Threshold. */ -#define MAX_TX_RECLAIM 16 +#define MAX_TX_RECLAIM 32 /* * Max number of Rx buffers we replenish at a time. Again keep this modest, @@ -401,31 +402,52 @@ static inline int reclaimable(const struct sge_txq *q) } /** - * cxgb4_reclaim_completed_tx - reclaims completed Tx descriptors + * reclaim_completed_tx - reclaims completed TX Descriptors * @adap: the adapter * @q: the Tx queue to reclaim completed descriptors from + * @maxreclaim: the maximum number of TX Descriptors to reclaim or -1 * @unmap: whether the buffers should be unmapped for DMA * - * Reclaims Tx descriptors that the SGE has indicated it has processed, - * and frees the associated buffers if possible. Called with the Tx - * queue locked. + * Reclaims Tx Descriptors that the SGE has indicated it has processed, + * and frees the associated buffers if possible. If @max == -1, then + * we'll use a defaiult maximum. Called with the TX Queue locked. */ -inline void cxgb4_reclaim_completed_tx(struct adapter *adap, struct sge_txq *q, - bool unmap) +static inline int reclaim_completed_tx(struct adapter *adap, struct sge_txq *q, + int maxreclaim, bool unmap) { - int avail = reclaimable(q); + int reclaim = reclaimable(q); - if (avail) { + if (reclaim) { /* * Limit the amount of clean up work we do at a time to keep * the Tx lock hold time O(1). */ - if (avail > MAX_TX_RECLAIM) - avail = MAX_TX_RECLAIM; + if (maxreclaim < 0) + maxreclaim = MAX_TX_RECLAIM; + if (reclaim > maxreclaim) + reclaim = maxreclaim; - free_tx_desc(adap, q, avail, unmap); - q->in_use -= avail; + free_tx_desc(adap, q, reclaim, unmap); + q->in_use -= reclaim; } + + return reclaim; +} + +/** + * cxgb4_reclaim_completed_tx - reclaims completed Tx descriptors + * @adap: the adapter + * @q: the Tx queue to reclaim completed descriptors from + * @unmap: whether the buffers should be unmapped for DMA + * + * Reclaims Tx descriptors that the SGE has indicated it has processed, + * and frees the associated buffers if possible. Called with the Tx + * queue locked. + */ +void cxgb4_reclaim_completed_tx(struct adapter *adap, struct sge_txq *q, + bool unmap) +{ + (void)reclaim_completed_tx(adap, q, -1, unmap); } EXPORT_SYMBOL(cxgb4_reclaim_completed_tx); @@ -1288,6 +1310,44 @@ static inline void t6_fill_tnl_lso(struct sk_buff *skb, } /** + * t4_sge_eth_txq_egress_update - handle Ethernet TX Queue update + * @adap: the adapter + * @eq: the Ethernet TX Queue + * @maxreclaim: the maximum number of TX Descriptors to reclaim or -1 + * + * We're typically called here to update the state of an Ethernet TX + * Queue with respect to the hardware's progress in consuming the TX + * Work Requests that we've put on that Egress Queue. This happens + * when we get Egress Queue Update messages and also prophylactically + * in regular timer-based Ethernet TX Queue maintenance. + */ +int t4_sge_eth_txq_egress_update(struct adapter *adap, struct sge_eth_txq *eq, + int maxreclaim) +{ + struct sge_txq *q = &eq->q; + unsigned int reclaimed; + + if (!q->in_use || !__netif_tx_trylock(eq->txq)) + return 0; + + /* Reclaim pending completed TX Descriptors. */ + reclaimed = reclaim_completed_tx(adap, &eq->q, maxreclaim, true); + + /* If the TX Queue is currently stopped and there's now more than half + * the queue available, restart it. Otherwise bail out since the rest + * of what we want do here is with the possibility of shipping any + * currently buffered Coalesced TX Work Request. + */ + if (netif_tx_queue_stopped(eq->txq) && txq_avail(q) > (q->size / 2)) { + netif_tx_wake_queue(eq->txq); + eq->q.restarts++; + } + + __netif_tx_unlock(eq->txq); + return reclaimed; +} + +/** * cxgb4_eth_xmit - add a packet to an Ethernet Tx queue * @skb: the packet * @dev: the egress net device @@ -1357,7 +1417,7 @@ out_free: dev_kfree_skb_any(skb); } skb_tx_timestamp(skb); - cxgb4_reclaim_completed_tx(adap, &q->q, true); + reclaim_completed_tx(adap, &q->q, -1, true); cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F; #ifdef CONFIG_CHELSIO_T4_FCOE @@ -1400,8 +1460,25 @@ out_free: dev_kfree_skb_any(skb); wr_mid = FW_WR_LEN16_V(DIV_ROUND_UP(flits, 2)); if (unlikely(credits < ETHTXQ_STOP_THRES)) { + /* After we're done injecting the Work Request for this + * packet, we'll be below our "stop threshold" so stop the TX + * Queue now and schedule a request for an SGE Egress Queue + * Update message. The queue will get started later on when + * the firmware processes this Work Request and sends us an + * Egress Queue Status Update message indicating that space + * has opened up. + */ eth_txq_stop(q); - wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; + + /* If we're using the SGE Doorbell Queue Timer facility, we + * don't need to ask the Firmware to send us Egress Queue CIDX + * Updates: the Hardware will do this automatically. And + * since we send the Ingress Queue CIDX Updates to the + * corresponding Ethernet Response Queue, we'll get them very + * quickly. + */ + if (!q->dbqt) + wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; } wr = (void *)&q->q.desc[q->q.pidx]; @@ -1671,7 +1748,7 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb, /* Take this opportunity to reclaim any TX Descriptors whose DMA * transfers have completed. */ - cxgb4_reclaim_completed_tx(adapter, &txq->q, true); + reclaim_completed_tx(adapter, &txq->q, -1, true); /* Calculate the number of flits and TX Descriptors we're going to * need along with how many TX Descriptors will be left over after @@ -1715,7 +1792,16 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb, * has opened up. */ eth_txq_stop(txq); - wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; + + /* If we're using the SGE Doorbell Queue Timer facility, we + * don't need to ask the Firmware to send us Egress Queue CIDX + * Updates: the Hardware will do this automatically. And + * since we send the Ingress Queue CIDX Updates to the + * corresponding Ethernet Response Queue, we'll get them very + * quickly. + */ + if (!txq->dbqt) + wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; } /* Start filling in our Work Request. Note that we do _not_ handle @@ -2794,6 +2880,74 @@ static int t4_tx_hststamp(struct adapter *adapter, struct sk_buff *skb, } /** + * t4_tx_completion_handler - handle CPL_SGE_EGR_UPDATE messages + * @rspq: Ethernet RX Response Queue associated with Ethernet TX Queue + * @rsp: Response Entry pointer into Response Queue + * @gl: Gather List pointer + * + * For adapters which support the SGE Doorbell Queue Timer facility, + * we configure the Ethernet TX Queues to send CIDX Updates to the + * Associated Ethernet RX Response Queue with CPL_SGE_EGR_UPDATE + * messages. This adds a small load to PCIe Link RX bandwidth and, + * potentially, higher CPU Interrupt load, but allows us to respond + * much more quickly to the CIDX Updates. This is important for + * Upper Layer Software which isn't willing to have a large amount + * of TX Data outstanding before receiving DMA Completions. + */ +static void t4_tx_completion_handler(struct sge_rspq *rspq, + const __be64 *rsp, + const struct pkt_gl *gl) +{ + u8 opcode = ((const struct rss_header *)rsp)->opcode; + struct port_info *pi = netdev_priv(rspq->netdev); + struct adapter *adapter = rspq->adap; + struct sge *s = &adapter->sge; + struct sge_eth_txq *txq; + + /* skip RSS header */ + rsp++; + + /* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG. + */ + if (unlikely(opcode == CPL_FW4_MSG && + ((const struct cpl_fw4_msg *)rsp)->type == + FW_TYPE_RSSCPL)) { + rsp++; + opcode = ((const struct rss_header *)rsp)->opcode; + rsp++; + } + + if (unlikely(opcode != CPL_SGE_EGR_UPDATE)) { + pr_info("%s: unexpected FW4/CPL %#x on Rx queue\n", + __func__, opcode); + return; + } + + txq = &s->ethtxq[pi->first_qset + rspq->idx]; + + /* We've got the Hardware Consumer Index Update in the Egress Update + * message. If we're using the SGE Doorbell Queue Timer mechanism, + * these Egress Update messages will be our sole CIDX Updates we get + * since we don't want to chew up PCIe bandwidth for both Ingress + * Messages and Status Page writes. However, The code which manages + * reclaiming successfully DMA'ed TX Work Requests uses the CIDX value + * stored in the Status Page at the end of the TX Queue. It's easiest + * to simply copy the CIDX Update value from the Egress Update message + * to the Status Page. Also note that no Endian issues need to be + * considered here since both are Big Endian and we're just copying + * bytes consistently ... + */ + if (txq->dbqt) { + struct cpl_sge_egr_update *egr; + + egr = (struct cpl_sge_egr_update *)rsp; + WRITE_ONCE(txq->q.stat->cidx, egr->cidx); + } + + t4_sge_eth_txq_egress_update(adapter, txq, -1); +} + +/** * t4_ethrx_handler - process an ingress ethernet packet * @q: the response queue that received the packet * @rsp: the response queue descriptor holding the RX_PKT message @@ -2816,6 +2970,15 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp, struct port_info *pi; int ret = 0; + /* If we're looking at TX Queue CIDX Update, handle that separately + * and return. + */ + if (unlikely((*(u8 *)rsp == CPL_FW4_MSG) || + (*(u8 *)rsp == CPL_SGE_EGR_UPDATE))) { + t4_tx_completion_handler(q, rsp, si); + return 0; + } + if (unlikely(*(u8 *)rsp == cpl_trace_pkt)) return handle_trace_pkt(q->adap, si); @@ -3289,10 +3452,10 @@ done: static void sge_tx_timer_cb(struct timer_list *t) { - unsigned long m; - unsigned int i, budget; struct adapter *adap = from_timer(adap, t, sge.tx_timer); struct sge *s = &adap->sge; + unsigned long m, period; + unsigned int i, budget; for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++) for (m = s->txq_maperr[i]; m; m &= m - 1) { @@ -3320,29 +3483,29 @@ static void sge_tx_timer_cb(struct timer_list *t) budget = MAX_TIMER_TX_RECLAIM; i = s->ethtxq_rover; do { - struct sge_eth_txq *q = &s->ethtxq[i]; - - if (q->q.in_use && - time_after_eq(jiffies, q->txq->trans_start + HZ / 100) && - __netif_tx_trylock(q->txq)) { - int avail = reclaimable(&q->q); - - if (avail) { - if (avail > budget) - avail = budget; - - free_tx_desc(adap, &q->q, avail, true); - q->q.in_use -= avail; - budget -= avail; - } - __netif_tx_unlock(q->txq); - } + budget -= t4_sge_eth_txq_egress_update(adap, &s->ethtxq[i], + budget); + if (!budget) + break; if (++i >= s->ethqsets) i = 0; - } while (budget && i != s->ethtxq_rover); + } while (i != s->ethtxq_rover); s->ethtxq_rover = i; - mod_timer(&s->tx_timer, jiffies + (budget ? TX_QCHECK_PERIOD : 2)); + + if (budget == 0) { + /* If we found too many reclaimable packets schedule a timer + * in the near future to continue where we left off. + */ + period = 2; + } else { + /* We reclaimed all reclaimable TX Descriptors, so reschedule + * at the normal period. + */ + period = TX_QCHECK_PERIOD; + } + + mod_timer(&s->tx_timer, jiffies + period); } /** @@ -3421,7 +3584,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, : FW_IQ_IQTYPE_OFLD)); if (fl) { - enum chip_type chip = CHELSIO_CHIP_VERSION(adap->params.chip); + unsigned int chip_ver = + CHELSIO_CHIP_VERSION(adap->params.chip); /* Allocate the ring for the hardware free list (with space * for its status page) along with the associated software @@ -3459,10 +3623,10 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, * the smaller 64-byte value there). */ c.fl0dcaen_to_fl0cidxfthresh = - htons(FW_IQ_CMD_FL0FBMIN_V(chip <= CHELSIO_T5 ? + htons(FW_IQ_CMD_FL0FBMIN_V(chip_ver <= CHELSIO_T5 ? FETCHBURSTMIN_128B_X : - FETCHBURSTMIN_64B_X) | - FW_IQ_CMD_FL0FBMAX_V((chip <= CHELSIO_T5) ? + FETCHBURSTMIN_64B_T6_X) | + FW_IQ_CMD_FL0FBMAX_V((chip_ver <= CHELSIO_T5) ? FETCHBURSTMAX_512B_X : FETCHBURSTMAX_256B_X)); c.fl0size = htons(flsz); @@ -3584,14 +3748,24 @@ static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id) adap->sge.egr_map[id - adap->sge.egr_start] = q; } +/** + * t4_sge_alloc_eth_txq - allocate an Ethernet TX Queue + * @adap: the adapter + * @txq: the SGE Ethernet TX Queue to initialize + * @dev: the Linux Network Device + * @netdevq: the corresponding Linux TX Queue + * @iqid: the Ingress Queue to which to deliver CIDX Update messages + * @dbqt: whether this TX Queue will use the SGE Doorbell Queue Timers + */ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, struct net_device *dev, struct netdev_queue *netdevq, - unsigned int iqid) + unsigned int iqid, u8 dbqt) { - int ret, nentries; - struct fw_eq_eth_cmd c; - struct sge *s = &adap->sge; + unsigned int chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip); struct port_info *pi = netdev_priv(dev); + struct sge *s = &adap->sge; + struct fw_eq_eth_cmd c; + int ret, nentries; /* Add status entries */ nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc); @@ -3610,19 +3784,47 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, FW_EQ_ETH_CMD_VFN_V(0)); c.alloc_to_len16 = htonl(FW_EQ_ETH_CMD_ALLOC_F | FW_EQ_ETH_CMD_EQSTART_F | FW_LEN16(c)); - c.viid_pkd = htonl(FW_EQ_ETH_CMD_AUTOEQUEQE_F | - FW_EQ_ETH_CMD_VIID_V(pi->viid)); + + /* For TX Ethernet Queues using the SGE Doorbell Queue Timer + * mechanism, we use Ingress Queue messages for Hardware Consumer + * Index Updates on the TX Queue. Otherwise we have the Hardware + * write the CIDX Updates into the Status Page at the end of the + * TX Queue. + */ + c.autoequiqe_to_viid = htonl((dbqt + ? FW_EQ_ETH_CMD_AUTOEQUIQE_F + : FW_EQ_ETH_CMD_AUTOEQUEQE_F) | + FW_EQ_ETH_CMD_VIID_V(pi->viid)); + c.fetchszm_to_iqid = - htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) | + htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(dbqt + ? HOSTFCMODE_INGRESS_QUEUE_X + : HOSTFCMODE_STATUS_PAGE_X) | FW_EQ_ETH_CMD_PCIECHN_V(pi->tx_chan) | FW_EQ_ETH_CMD_FETCHRO_F | FW_EQ_ETH_CMD_IQID_V(iqid)); + + /* Note that the CIDX Flush Threshold should match MAX_TX_RECLAIM. */ c.dcaen_to_eqsize = - htonl(FW_EQ_ETH_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) | + htonl(FW_EQ_ETH_CMD_FBMIN_V(chip_ver <= CHELSIO_T5 + ? FETCHBURSTMIN_64B_X + : FETCHBURSTMIN_64B_T6_X) | FW_EQ_ETH_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) | FW_EQ_ETH_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) | FW_EQ_ETH_CMD_EQSIZE_V(nentries)); + c.eqaddr = cpu_to_be64(txq->q.phys_addr); + /* If we're using the SGE Doorbell Queue Timer mechanism, pass in the + * currently configured Timer Index. THis can be changed later via an + * ethtool -C tx-usecs {Timer Val} command. Note that the SGE + * Doorbell Queue mode is currently automatically enabled in the + * Firmware by setting either AUTOEQUEQE or AUTOEQUIQE ... + */ + if (dbqt) + c.timeren_timerix = + cpu_to_be32(FW_EQ_ETH_CMD_TIMEREN_F | + FW_EQ_ETH_CMD_TIMERIX_V(txq->dbqtimerix)); + ret = t4_wr_mbox(adap, adap->mbox, &c, sizeof(c), &c); if (ret) { kfree(txq->q.sdesc); @@ -3639,6 +3841,8 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, txq->txq = netdevq; txq->tso = txq->tx_cso = txq->vlan_ins = 0; txq->mapping_err = 0; + txq->dbqt = dbqt; + return 0; } @@ -3646,10 +3850,11 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, struct net_device *dev, unsigned int iqid, unsigned int cmplqid) { - int ret, nentries; - struct fw_eq_ctrl_cmd c; - struct sge *s = &adap->sge; + unsigned int chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip); struct port_info *pi = netdev_priv(dev); + struct sge *s = &adap->sge; + struct fw_eq_ctrl_cmd c; + int ret, nentries; /* Add status entries */ nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc); @@ -3673,7 +3878,9 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, FW_EQ_CTRL_CMD_PCIECHN_V(pi->tx_chan) | FW_EQ_CTRL_CMD_FETCHRO_F | FW_EQ_CTRL_CMD_IQID_V(iqid)); c.dcaen_to_eqsize = - htonl(FW_EQ_CTRL_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) | + htonl(FW_EQ_CTRL_CMD_FBMIN_V(chip_ver <= CHELSIO_T5 + ? FETCHBURSTMIN_64B_X + : FETCHBURSTMIN_64B_T6_X) | FW_EQ_CTRL_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) | FW_EQ_CTRL_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) | FW_EQ_CTRL_CMD_EQSIZE_V(nentries)); @@ -3713,6 +3920,7 @@ int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq, struct net_device *dev, unsigned int iqid, unsigned int uld_type) { + unsigned int chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip); int ret, nentries; struct fw_eq_ofld_cmd c; struct sge *s = &adap->sge; @@ -3743,7 +3951,9 @@ int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq, FW_EQ_OFLD_CMD_PCIECHN_V(pi->tx_chan) | FW_EQ_OFLD_CMD_FETCHRO_F | FW_EQ_OFLD_CMD_IQID_V(iqid)); c.dcaen_to_eqsize = - htonl(FW_EQ_OFLD_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) | + htonl(FW_EQ_OFLD_CMD_FBMIN_V(chip_ver <= CHELSIO_T5 + ? FETCHBURSTMIN_64B_X + : FETCHBURSTMIN_64B_T6_X) | FW_EQ_OFLD_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) | FW_EQ_OFLD_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) | FW_EQ_OFLD_CMD_EQSIZE_V(nentries)); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index c5e5466ee38b..27af347be4af 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -6713,6 +6713,47 @@ int t4_sge_ctxt_flush(struct adapter *adap, unsigned int mbox, int ctxt_type) } /** + * t4_read_sge_dbqtimers - reag SGE Doorbell Queue Timer values + * @adap - the adapter + * @ndbqtimers: size of the provided SGE Doorbell Queue Timer table + * @dbqtimers: SGE Doorbell Queue Timer table + * + * Reads the SGE Doorbell Queue Timer values into the provided table. + * Returns 0 on success (Firmware and Hardware support this feature), + * an error on failure. + */ +int t4_read_sge_dbqtimers(struct adapter *adap, unsigned int ndbqtimers, + u16 *dbqtimers) +{ + int ret, dbqtimerix; + + ret = 0; + dbqtimerix = 0; + while (dbqtimerix < ndbqtimers) { + int nparams, param; + u32 params[7], vals[7]; + + nparams = ndbqtimers - dbqtimerix; + if (nparams > ARRAY_SIZE(params)) + nparams = ARRAY_SIZE(params); + + for (param = 0; param < nparams; param++) + params[param] = + (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_DBQ_TIMER) | + FW_PARAMS_PARAM_Y_V(dbqtimerix + param)); + ret = t4_query_params(adap, adap->mbox, adap->pf, 0, + nparams, params, vals); + if (ret) + break; + + for (param = 0; param < nparams; param++) + dbqtimers[dbqtimerix++] = vals[param]; + } + return ret; +} + +/** * t4_fw_hello - establish communication with FW * @adap: the adapter * @mbox: mailbox to use for the FW command diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h index 361d5032c288..002fc62ea726 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h @@ -91,6 +91,7 @@ enum { SGE_CTXT_SIZE = 24, /* size of SGE context */ SGE_NTIMERS = 6, /* # of interrupt holdoff timer values */ SGE_NCOUNTERS = 4, /* # of interrupt packet counter values */ + SGE_NDBQTIMERS = 8, /* # of Doorbell Queue Timer values */ SGE_MAX_IQ_SIZE = 65520, SGE_TIMER_RSTRT_CNTR = 6, /* restart RX packet threshold counter */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_values.h b/drivers/net/ethernet/chelsio/cxgb4/t4_values.h index f6558cbfc54e..eb1aa82149db 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_values.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_values.h @@ -71,12 +71,18 @@ #define FETCHBURSTMIN_64B_X 2 #define FETCHBURSTMIN_128B_X 3 +/* T6 and later use a single-bit encoding for FetchBurstMin */ +#define FETCHBURSTMIN_64B_T6_X 0 +#define FETCHBURSTMIN_128B_T6_X 1 + #define FETCHBURSTMAX_256B_X 2 #define FETCHBURSTMAX_512B_X 3 +#define HOSTFCMODE_INGRESS_QUEUE_X 1 #define HOSTFCMODE_STATUS_PAGE_X 2 #define CIDXFLUSHTHRESH_32_X 5 +#define CIDXFLUSHTHRESH_128_X 7 #define UPDATEDELIVERY_INTERRUPT_X 1 diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index 1d9b3e1e5f94..631f1663f4e0 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -1254,6 +1254,8 @@ enum fw_params_param_dev { FW_PARAMS_PARAM_DEV_RDMA_WRITE_WITH_IMM = 0x21, FW_PARAMS_PARAM_DEV_RI_WRITE_CMPL_WR = 0x24, FW_PARAMS_PARAM_DEV_OPAQUE_VIID_SMT_EXTN = 0x27, + FW_PARAMS_PARAM_DEV_DBQ_TIMER = 0x29, + FW_PARAMS_PARAM_DEV_DBQ_TIMERTICK = 0x2A, }; /* @@ -1322,6 +1324,7 @@ enum fw_params_param_dmaq { FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL = 0x11, FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH = 0x12, FW_PARAMS_PARAM_DMAQ_EQ_DCBPRIO_ETH = 0x13, + FW_PARAMS_PARAM_DMAQ_EQ_TIMERIX = 0x15, FW_PARAMS_PARAM_DMAQ_CONM_CTXT = 0x20, }; @@ -1751,8 +1754,8 @@ struct fw_eq_eth_cmd { __be32 fetchszm_to_iqid; __be32 dcaen_to_eqsize; __be64 eqaddr; - __be32 viid_pkd; - __be32 r8_lo; + __be32 autoequiqe_to_viid; + __be32 timeren_timerix; __be64 r9; }; @@ -1847,6 +1850,10 @@ struct fw_eq_eth_cmd { #define FW_EQ_ETH_CMD_EQSIZE_S 0 #define FW_EQ_ETH_CMD_EQSIZE_V(x) ((x) << FW_EQ_ETH_CMD_EQSIZE_S) +#define FW_EQ_ETH_CMD_AUTOEQUIQE_S 31 +#define FW_EQ_ETH_CMD_AUTOEQUIQE_V(x) ((x) << FW_EQ_ETH_CMD_AUTOEQUIQE_S) +#define FW_EQ_ETH_CMD_AUTOEQUIQE_F FW_EQ_ETH_CMD_AUTOEQUIQE_V(1U) + #define FW_EQ_ETH_CMD_AUTOEQUEQE_S 30 #define FW_EQ_ETH_CMD_AUTOEQUEQE_V(x) ((x) << FW_EQ_ETH_CMD_AUTOEQUEQE_S) #define FW_EQ_ETH_CMD_AUTOEQUEQE_F FW_EQ_ETH_CMD_AUTOEQUEQE_V(1U) @@ -1854,6 +1861,19 @@ struct fw_eq_eth_cmd { #define FW_EQ_ETH_CMD_VIID_S 16 #define FW_EQ_ETH_CMD_VIID_V(x) ((x) << FW_EQ_ETH_CMD_VIID_S) +#define FW_EQ_ETH_CMD_TIMEREN_S 3 +#define FW_EQ_ETH_CMD_TIMEREN_M 0x1 +#define FW_EQ_ETH_CMD_TIMEREN_V(x) ((x) << FW_EQ_ETH_CMD_TIMEREN_S) +#define FW_EQ_ETH_CMD_TIMEREN_G(x) \ + (((x) >> FW_EQ_ETH_CMD_TIMEREN_S) & FW_EQ_ETH_CMD_TIMEREN_M) +#define FW_EQ_ETH_CMD_TIMEREN_F FW_EQ_ETH_CMD_TIMEREN_V(1U) + +#define FW_EQ_ETH_CMD_TIMERIX_S 0 +#define FW_EQ_ETH_CMD_TIMERIX_M 0x7 +#define FW_EQ_ETH_CMD_TIMERIX_V(x) ((x) << FW_EQ_ETH_CMD_TIMERIX_S) +#define FW_EQ_ETH_CMD_TIMERIX_G(x) \ + (((x) >> FW_EQ_ETH_CMD_TIMERIX_S) & FW_EQ_ETH_CMD_TIMERIX_M) + struct fw_eq_ctrl_cmd { __be32 op_to_vfn; __be32 alloc_to_len16; diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 84be3313fede..3300b69a42b3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -1408,7 +1408,7 @@ static void fw_caps_to_lmm(enum fw_port_type port_type, case FW_PORT_TYPE_CR4_QSFP: SET_LMM(FIBRE); FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full); - FW_CAPS_TO_LMM(SPEED_10G, 10000baseSR_Full); + FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full); FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full); FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full); FW_CAPS_TO_LMM(SPEED_50G, 50000baseCR2_Full); @@ -1419,6 +1419,13 @@ static void fw_caps_to_lmm(enum fw_port_type port_type, break; } + if (fw_caps & FW_PORT_CAP32_FEC_V(FW_PORT_CAP32_FEC_M)) { + FW_CAPS_TO_LMM(FEC_RS, FEC_RS); + FW_CAPS_TO_LMM(FEC_BASER_RS, FEC_BASER); + } else { + SET_LMM(FEC_NONE); + } + FW_CAPS_TO_LMM(ANEG, Autoneg); FW_CAPS_TO_LMM(802_3_PAUSE, Pause); FW_CAPS_TO_LMM(802_3_ASM_DIR, Asym_Pause); diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index 1d534f0baa69..11d2ba0a2bf5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -2268,7 +2268,7 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq, cmd.iqaddr = cpu_to_be64(rspq->phys_addr); if (fl) { - enum chip_type chip = + unsigned int chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip); /* * Allocate the ring for the hardware free list (with space @@ -2319,10 +2319,10 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq, */ cmd.fl0dcaen_to_fl0cidxfthresh = cpu_to_be16( - FW_IQ_CMD_FL0FBMIN_V(chip <= CHELSIO_T5 ? - FETCHBURSTMIN_128B_X : - FETCHBURSTMIN_64B_X) | - FW_IQ_CMD_FL0FBMAX_V((chip <= CHELSIO_T5) ? + FW_IQ_CMD_FL0FBMIN_V(chip_ver <= CHELSIO_T5 + ? FETCHBURSTMIN_128B_X + : FETCHBURSTMIN_64B_T6_X) | + FW_IQ_CMD_FL0FBMAX_V((chip_ver <= CHELSIO_T5) ? FETCHBURSTMAX_512B_X : FETCHBURSTMAX_256B_X)); cmd.fl0size = cpu_to_be16(flsz); @@ -2411,10 +2411,11 @@ int t4vf_sge_alloc_eth_txq(struct adapter *adapter, struct sge_eth_txq *txq, struct net_device *dev, struct netdev_queue *devq, unsigned int iqid) { + unsigned int chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip); + struct port_info *pi = netdev_priv(dev); + struct fw_eq_eth_cmd cmd, rpl; struct sge *s = &adapter->sge; int ret, nentries; - struct fw_eq_eth_cmd cmd, rpl; - struct port_info *pi = netdev_priv(dev); /* * Calculate the size of the hardware TX Queue (including the Status @@ -2448,17 +2449,19 @@ int t4vf_sge_alloc_eth_txq(struct adapter *adapter, struct sge_eth_txq *txq, cmd.alloc_to_len16 = cpu_to_be32(FW_EQ_ETH_CMD_ALLOC_F | FW_EQ_ETH_CMD_EQSTART_F | FW_LEN16(cmd)); - cmd.viid_pkd = cpu_to_be32(FW_EQ_ETH_CMD_AUTOEQUEQE_F | - FW_EQ_ETH_CMD_VIID_V(pi->viid)); + cmd.autoequiqe_to_viid = cpu_to_be32(FW_EQ_ETH_CMD_AUTOEQUEQE_F | + FW_EQ_ETH_CMD_VIID_V(pi->viid)); cmd.fetchszm_to_iqid = cpu_to_be32(FW_EQ_ETH_CMD_HOSTFCMODE_V(SGE_HOSTFCMODE_STPG) | FW_EQ_ETH_CMD_PCIECHN_V(pi->port_id) | FW_EQ_ETH_CMD_IQID_V(iqid)); cmd.dcaen_to_eqsize = - cpu_to_be32(FW_EQ_ETH_CMD_FBMIN_V(SGE_FETCHBURSTMIN_64B) | - FW_EQ_ETH_CMD_FBMAX_V(SGE_FETCHBURSTMAX_512B) | + cpu_to_be32(FW_EQ_ETH_CMD_FBMIN_V(chip_ver <= CHELSIO_T5 + ? FETCHBURSTMIN_64B_X + : FETCHBURSTMIN_64B_T6_X) | + FW_EQ_ETH_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) | FW_EQ_ETH_CMD_CIDXFTHRESH_V( - SGE_CIDXFLUSHTHRESH_32) | + CIDXFLUSHTHRESH_32_X) | FW_EQ_ETH_CMD_EQSIZE_V(nentries)); cmd.eqaddr = cpu_to_be64(txq->q.phys_addr); diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 60641e202534..9a7f70db20c7 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1434,7 +1434,8 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq, * csum is correct or is zero. */ if ((netdev->features & NETIF_F_RXCSUM) && !csum_not_calc && - tcp_udp_csum_ok && ipv4_csum_ok && outer_csum_ok) { + tcp_udp_csum_ok && outer_csum_ok && + (ipv4_csum_ok || ipv6)) { skb->ip_summed = CHECKSUM_UNNECESSARY; skb->csum_level = encap; } diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c index 13430f75496c..f1a2da15dd0a 100644 --- a/drivers/net/ethernet/dec/tulip/de2104x.c +++ b/drivers/net/ethernet/dec/tulip/de2104x.c @@ -585,7 +585,7 @@ static void de_tx (struct de_private *de) netif_dbg(de, tx_done, de->dev, "tx done, slot %d\n", tx_tail); } - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); } next: diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index d8d423f22c4f..cfcdfee718b0 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -843,9 +843,9 @@ rio_free_tx (struct net_device *dev, int irq) desc_to_dma(&np->tx_ring[entry]), skb->len, PCI_DMA_TODEVICE); if (irq) - dev_kfree_skb_irq (skb); + dev_consume_skb_irq(skb); else - dev_kfree_skb (skb); + dev_kfree_skb(skb); np->tx_skbuff[entry] = NULL; entry = (entry + 1) % TX_RING_SIZE; diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index 1a27176381fb..4a37a69764ce 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -1193,7 +1193,6 @@ static irqreturn_t intr_handler(int irq, void *dev_instance) int handled = 0; int i; - do { int intr_status = ioread16(ioaddr + IntrStatus); iowrite16(intr_status, ioaddr + IntrStatus); @@ -1286,7 +1285,7 @@ static irqreturn_t intr_handler(int irq, void *dev_instance) dma_unmap_single(&np->pci_dev->dev, le32_to_cpu(np->tx_ring[entry].frag[0].addr), skb->len, DMA_TO_DEVICE); - dev_kfree_skb_irq (np->tx_skbuff[entry]); + dev_consume_skb_irq(np->tx_skbuff[entry]); np->tx_skbuff[entry] = NULL; np->tx_ring[entry].frag[0].addr = 0; np->tx_ring[entry].frag[0].length = 0; @@ -1305,7 +1304,7 @@ static irqreturn_t intr_handler(int irq, void *dev_instance) dma_unmap_single(&np->pci_dev->dev, le32_to_cpu(np->tx_ring[entry].frag[0].addr), skb->len, DMA_TO_DEVICE); - dev_kfree_skb_irq (np->tx_skbuff[entry]); + dev_consume_skb_irq(np->tx_skbuff[entry]); np->tx_skbuff[entry] = NULL; np->tx_ring[entry].frag[0].addr = 0; np->tx_ring[entry].frag[0].length = 0; diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c index ae55da60ed0e..c24fd56a2c71 100644 --- a/drivers/net/ethernet/fealnx.c +++ b/drivers/net/ethernet/fealnx.c @@ -1531,7 +1531,7 @@ static irqreturn_t intr_handler(int irq, void *dev_instance) /* Free the original skb. */ pci_unmap_single(np->pci_dev, np->cur_tx->buffer, np->cur_tx->skbuff->len, PCI_DMA_TODEVICE); - dev_kfree_skb_irq(np->cur_tx->skbuff); + dev_consume_skb_irq(np->cur_tx->skbuff); np->cur_tx->skbuff = NULL; --np->really_tx_count; if (np->cur_tx->control & TXLD) { diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c index 62497119c85f..bdee441bc3b7 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c @@ -501,7 +501,7 @@ static int dpaa_get_ts_info(struct net_device *net_dev, struct device_node *mac_node = dev->of_node; struct device_node *fman_node = NULL, *ptp_node = NULL; struct platform_device *ptp_dev = NULL; - struct qoriq_ptp *ptp = NULL; + struct ptp_qoriq *ptp = NULL; info->phc_index = -1; diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig index f9dd26fbfc83..8429f5c1d810 100644 --- a/drivers/net/ethernet/freescale/enetc/Kconfig +++ b/drivers/net/ethernet/freescale/enetc/Kconfig @@ -17,3 +17,15 @@ config FSL_ENETC_VF virtual function (VF) devices enabled by the ENETC PF driver. If compiled as module (M), the module name is fsl-enetc-vf. + +config FSL_ENETC_PTP_CLOCK + tristate "ENETC PTP clock driver" + depends on PTP_1588_CLOCK_QORIQ && (FSL_ENETC || FSL_ENETC_VF) + default y + help + This driver adds support for using the ENETC 1588 timer + as a PTP clock. This clock is only useful if your PTP + programs are getting hardware time stamps on the PTP Ethernet + packets using the SO_TIMESTAMPING API. + + If compiled as module (M), the module name is fsl-enetc-ptp. diff --git a/drivers/net/ethernet/freescale/enetc/Makefile b/drivers/net/ethernet/freescale/enetc/Makefile index 9529b01872ca..697660294dbc 100644 --- a/drivers/net/ethernet/freescale/enetc/Makefile +++ b/drivers/net/ethernet/freescale/enetc/Makefile @@ -13,3 +13,6 @@ fsl-enetc-vf-$(CONFIG_FSL_ENETC_VF) += enetc.o enetc_cbdr.o \ enetc_ethtool.o fsl-enetc-vf-objs := enetc_vf.o $(fsl-enetc-vf-y) endif + +obj-$(CONFIG_FSL_ENETC_PTP_CLOCK) += fsl-enetc-ptp.o +fsl-enetc-ptp-$(CONFIG_FSL_ENETC_PTP_CLOCK) += enetc_ptp.o diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index efa0b1a5ef4f..df8eb8882d92 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -4,8 +4,9 @@ #include <linux/bitops.h> /* ENETC device IDs */ -#define ENETC_DEV_ID_PF 0xe100 -#define ENETC_DEV_ID_VF 0xef00 +#define ENETC_DEV_ID_PF 0xe100 +#define ENETC_DEV_ID_VF 0xef00 +#define ENETC_DEV_ID_PTP 0xee02 /* ENETC register block BAR */ #define ENETC_BAR_REGS 0 diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ptp.c b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c new file mode 100644 index 000000000000..dc2f58a7c9e5 --- /dev/null +++ b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* Copyright 2019 NXP */ + +#include <linux/module.h> +#include <linux/of.h> +#include <linux/fsl/ptp_qoriq.h> + +#include "enetc.h" + +static struct ptp_clock_info enetc_ptp_caps = { + .owner = THIS_MODULE, + .name = "ENETC PTP clock", + .max_adj = 512000, + .n_alarm = 0, + .n_ext_ts = 2, + .n_per_out = 0, + .n_pins = 0, + .pps = 1, + .adjfine = ptp_qoriq_adjfine, + .adjtime = ptp_qoriq_adjtime, + .gettime64 = ptp_qoriq_gettime, + .settime64 = ptp_qoriq_settime, + .enable = ptp_qoriq_enable, +}; + +static int enetc_ptp_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct ptp_qoriq *ptp_qoriq; + void __iomem *base; + int err, len, n; + + if (pdev->dev.of_node && !of_device_is_available(pdev->dev.of_node)) { + dev_info(&pdev->dev, "device is disabled, skipping\n"); + return -ENODEV; + } + + err = pci_enable_device_mem(pdev); + if (err) { + dev_err(&pdev->dev, "device enable failed\n"); + return err; + } + + /* set up for high or low dma */ + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) { + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&pdev->dev, + "DMA configuration failed: 0x%x\n", err); + goto err_dma; + } + } + + err = pci_request_mem_regions(pdev, KBUILD_MODNAME); + if (err) { + dev_err(&pdev->dev, "pci_request_regions failed err=%d\n", err); + goto err_pci_mem_reg; + } + + pci_set_master(pdev); + + ptp_qoriq = kzalloc(sizeof(*ptp_qoriq), GFP_KERNEL); + if (!ptp_qoriq) { + err = -ENOMEM; + goto err_alloc_ptp; + } + + len = pci_resource_len(pdev, ENETC_BAR_REGS); + + base = ioremap(pci_resource_start(pdev, ENETC_BAR_REGS), len); + if (!base) { + err = -ENXIO; + dev_err(&pdev->dev, "ioremap() failed\n"); + goto err_ioremap; + } + + /* Allocate 1 interrupt */ + n = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSIX); + if (n != 1) { + err = -EPERM; + goto err_irq; + } + + ptp_qoriq->irq = pci_irq_vector(pdev, 0); + + err = request_irq(ptp_qoriq->irq, ptp_qoriq_isr, 0, DRIVER, ptp_qoriq); + if (err) { + dev_err(&pdev->dev, "request_irq() failed!\n"); + goto err_irq; + } + + ptp_qoriq->dev = &pdev->dev; + + err = ptp_qoriq_init(ptp_qoriq, base, enetc_ptp_caps); + if (err) + goto err_no_clock; + + pci_set_drvdata(pdev, ptp_qoriq); + + return 0; + +err_no_clock: + free_irq(ptp_qoriq->irq, ptp_qoriq); +err_irq: + iounmap(base); +err_ioremap: + kfree(ptp_qoriq); +err_alloc_ptp: + pci_release_mem_regions(pdev); +err_pci_mem_reg: +err_dma: + pci_disable_device(pdev); + + return err; +} + +static void enetc_ptp_remove(struct pci_dev *pdev) +{ + struct ptp_qoriq *ptp_qoriq = pci_get_drvdata(pdev); + + ptp_qoriq_free(ptp_qoriq); + kfree(ptp_qoriq); + + pci_release_mem_regions(pdev); + pci_disable_device(pdev); +} + +static const struct pci_device_id enetc_ptp_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_PTP) }, + { 0, } /* End of table. */ +}; +MODULE_DEVICE_TABLE(pci, enetc_ptp_id_table); + +static struct pci_driver enetc_ptp_driver = { + .name = KBUILD_MODNAME, + .id_table = enetc_ptp_id_table, + .probe = enetc_ptp_probe, + .remove = enetc_ptp_remove, +}; +module_pci_driver(enetc_ptp_driver); + +MODULE_DESCRIPTION("ENETC PTP clock driver"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 2370dc204202..697c2427f2b7 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2098,6 +2098,7 @@ static int fec_enet_get_regs_len(struct net_device *ndev) #if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \ defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM) || \ defined(CONFIG_ARM64) || defined(CONFIG_COMPILE_TEST) +static __u32 fec_enet_register_version = 2; static u32 fec_enet_register_offset[] = { FEC_IEVENT, FEC_IMASK, FEC_R_DES_ACTIVE_0, FEC_X_DES_ACTIVE_0, FEC_ECNTRL, FEC_MII_DATA, FEC_MII_SPEED, FEC_MIB_CTRLSTAT, FEC_R_CNTRL, @@ -2128,6 +2129,7 @@ static u32 fec_enet_register_offset[] = { IEEE_R_FDXFC, IEEE_R_OCTETS_OK }; #else +static __u32 fec_enet_register_version = 1; static u32 fec_enet_register_offset[] = { FEC_ECNTRL, FEC_IEVENT, FEC_IMASK, FEC_IVEC, FEC_R_DES_ACTIVE_0, FEC_R_DES_ACTIVE_1, FEC_R_DES_ACTIVE_2, FEC_X_DES_ACTIVE_0, @@ -2149,6 +2151,8 @@ static void fec_enet_get_regs(struct net_device *ndev, u32 *buf = (u32 *)regbuf; u32 i, off; + regs->version = fec_enet_register_version; + memset(buf, 0, regs->len); for (i = 0; i < ARRAY_SIZE(fec_enet_register_offset); i++) { diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c index b90bab72efdb..c1968b3ecec8 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c @@ -369,7 +369,7 @@ static irqreturn_t mpc52xx_fec_tx_interrupt(int irq, void *dev_id) dma_unmap_single(dev->dev.parent, bd->skb_pa, skb->len, DMA_TO_DEVICE); - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); } spin_unlock(&priv->lock); diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c index 241325c35cb4..27ed995f439a 100644 --- a/drivers/net/ethernet/freescale/gianfar_ethtool.c +++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c @@ -1492,7 +1492,7 @@ static int gfar_get_ts_info(struct net_device *dev, struct gfar_private *priv = netdev_priv(dev); struct platform_device *ptp_dev; struct device_node *ptp_node; - struct qoriq_ptp *ptp = NULL; + struct ptp_qoriq *ptp = NULL; info->phc_index = -1; diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index c3d539e209ed..eb3e65e8868f 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -1879,6 +1879,8 @@ static void ucc_geth_free_tx(struct ucc_geth_private *ugeth) u16 i, j; u8 __iomem *bd; + netdev_reset_queue(ugeth->ndev); + ug_info = ugeth->ug_info; uf_info = &ug_info->uf_info; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index 3b9e74be5fbd..b8155f5e71b4 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -3081,6 +3081,7 @@ int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset) dsaf_dev = dev_get_drvdata(&pdev->dev); if (!dsaf_dev) { dev_err(&pdev->dev, "dsaf_dev is NULL\n"); + put_device(&pdev->dev); return -ENODEV; } @@ -3088,6 +3089,7 @@ int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset) if (AE_IS_VER1(dsaf_dev->dsaf_ver)) { dev_err(dsaf_dev->dev, "%s v1 chip doesn't support RoCE!\n", dsaf_dev->ae_dev.name); + put_device(&pdev->dev); return -ENODEV; } diff --git a/drivers/net/ethernet/i825xx/lib82596.c b/drivers/net/ethernet/i825xx/lib82596.c index 2f7ae118217f..1274ad24d6af 100644 --- a/drivers/net/ethernet/i825xx/lib82596.c +++ b/drivers/net/ethernet/i825xx/lib82596.c @@ -1194,7 +1194,7 @@ static irqreturn_t i596_interrupt(int irq, void *dev_id) dma_unmap_single(dev->dev.parent, tx_cmd->dma_addr, skb->len, DMA_TO_DEVICE); - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); tx_cmd->cmd.command = 0; /* Mark free */ break; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 6fd15a734324..5a0419421511 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -1603,14 +1603,12 @@ static int fm10k_alloc_q_vector(struct fm10k_intfc *interface, { struct fm10k_q_vector *q_vector; struct fm10k_ring *ring; - int ring_count, size; + int ring_count; ring_count = txr_count + rxr_count; - size = sizeof(struct fm10k_q_vector) + - (sizeof(struct fm10k_ring) * ring_count); /* allocate q_vector and rings */ - q_vector = kzalloc(size, GFP_KERNEL); + q_vector = kzalloc(struct_size(q_vector, ring, ring_count), GFP_KERNEL); if (!q_vector) return -ENOMEM; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 6d812e96572d..69b230c53fed 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1189,15 +1189,15 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, { struct igb_q_vector *q_vector; struct igb_ring *ring; - int ring_count, size; + int ring_count; + size_t size; /* igb only supports 1 Tx and/or 1 Rx queue per vector */ if (txr_count > 1 || rxr_count > 1) return -ENOMEM; ring_count = txr_count + rxr_count; - size = sizeof(struct igb_q_vector) + - (sizeof(struct igb_ring) * ring_count); + size = struct_size(q_vector, ring, ring_count); /* allocate q_vector and rings */ q_vector = adapter->q_vector[v_idx]; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 11c75433fe22..87a11879bf2d 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -2958,22 +2958,21 @@ static int igc_alloc_q_vector(struct igc_adapter *adapter, { struct igc_q_vector *q_vector; struct igc_ring *ring; - int ring_count, size; + int ring_count; /* igc only supports 1 Tx and/or 1 Rx queue per vector */ if (txr_count > 1 || rxr_count > 1) return -ENOMEM; ring_count = txr_count + rxr_count; - size = sizeof(struct igc_q_vector) + - (sizeof(struct igc_ring) * ring_count); /* allocate q_vector and rings */ q_vector = adapter->q_vector[v_idx]; if (!q_vector) - q_vector = kzalloc(size, GFP_KERNEL); + q_vector = kzalloc(struct_size(q_vector, ring, ring_count), + GFP_KERNEL); else - memset(q_vector, 0, size); + memset(q_vector, 0, struct_size(q_vector, ring, ring_count)); if (!q_vector) return -ENOMEM; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index 62e6499e4146..cc3196ae5aea 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -836,12 +836,10 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, struct ixgbe_ring *ring; int node = NUMA_NO_NODE; int cpu = -1; - int ring_count, size; + int ring_count; u8 tcs = adapter->hw_tcs; ring_count = txr_count + rxr_count + xdp_count; - size = sizeof(struct ixgbe_q_vector) + - (sizeof(struct ixgbe_ring) * ring_count); /* customize cpu for Flow Director mapping */ if ((tcs <= 1) && !(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) { @@ -855,9 +853,11 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, } /* allocate q_vector and rings */ - q_vector = kzalloc_node(size, GFP_KERNEL, node); + q_vector = kzalloc_node(struct_size(q_vector, ring, ring_count), + GFP_KERNEL, node); if (!q_vector) - q_vector = kzalloc(size, GFP_KERNEL); + q_vector = kzalloc(struct_size(q_vector, ring, ring_count), + GFP_KERNEL); if (!q_vector) return -ENOMEM; diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 32ac9045cdae..f9bb890733b5 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -112,10 +112,12 @@ struct ltq_etop_priv { static int ltq_etop_alloc_skb(struct ltq_etop_chan *ch) { + struct ltq_etop_priv *priv = netdev_priv(ch->netdev); + ch->skb[ch->dma.desc] = netdev_alloc_skb(ch->netdev, MAX_DMA_DATA_LEN); if (!ch->skb[ch->dma.desc]) return -ENOMEM; - ch->dma.desc_base[ch->dma.desc].addr = dma_map_single(NULL, + ch->dma.desc_base[ch->dma.desc].addr = dma_map_single(&priv->pdev->dev, ch->skb[ch->dma.desc]->data, MAX_DMA_DATA_LEN, DMA_FROM_DEVICE); ch->dma.desc_base[ch->dma.desc].addr = @@ -487,7 +489,7 @@ ltq_etop_tx(struct sk_buff *skb, struct net_device *dev) netif_trans_update(dev); spin_lock_irqsave(&priv->lock, flags); - desc->addr = ((unsigned int) dma_map_single(NULL, skb->data, len, + desc->addr = ((unsigned int) dma_map_single(&priv->pdev->dev, skb->data, len, DMA_TO_DEVICE)) - byte_offset; wmb(); desc->ctl = LTQ_DMA_OWN | LTQ_DMA_SOP | LTQ_DMA_EOP | diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h index 398328f10743..96e3f0669032 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h @@ -402,8 +402,8 @@ #define MVPP2_GMAC_STATUS0_GMII_SPEED BIT(1) #define MVPP2_GMAC_STATUS0_MII_SPEED BIT(2) #define MVPP2_GMAC_STATUS0_FULL_DUPLEX BIT(3) -#define MVPP2_GMAC_STATUS0_RX_PAUSE BIT(6) -#define MVPP2_GMAC_STATUS0_TX_PAUSE BIT(7) +#define MVPP2_GMAC_STATUS0_RX_PAUSE BIT(4) +#define MVPP2_GMAC_STATUS0_TX_PAUSE BIT(5) #define MVPP2_GMAC_STATUS0_AN_COMPLETE BIT(11) #define MVPP2_GMAC_PORT_FIFO_CFG_1_REG 0x1c #define MVPP2_GMAC_TX_FIFO_MIN_TH_OFFS 6 diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 5c0c26fd8363..191d9ce85b7e 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -965,6 +965,11 @@ mvpp2_shared_interrupt_mask_unmask(struct mvpp2_port *port, bool mask) } /* Port configuration routines */ +static bool mvpp2_is_xlg(phy_interface_t interface) +{ + return interface == PHY_INTERFACE_MODE_10GKR || + interface == PHY_INTERFACE_MODE_XAUI; +} static void mvpp22_gop_init_rgmii(struct mvpp2_port *port) { @@ -1101,7 +1106,7 @@ static void mvpp22_gop_unmask_irq(struct mvpp2_port *port) if (port->gop_id == 0) { /* Enable the XLG/GIG irqs for this port */ val = readl(port->base + MVPP22_XLG_EXT_INT_MASK); - if (port->phy_interface == PHY_INTERFACE_MODE_10GKR) + if (mvpp2_is_xlg(port->phy_interface)) val |= MVPP22_XLG_EXT_INT_MASK_XLG; else val |= MVPP22_XLG_EXT_INT_MASK_GIG; @@ -1181,9 +1186,7 @@ static void mvpp2_port_enable(struct mvpp2_port *port) u32 val; /* Only GOP port 0 has an XLG MAC */ - if (port->gop_id == 0 && - (port->phy_interface == PHY_INTERFACE_MODE_XAUI || - port->phy_interface == PHY_INTERFACE_MODE_10GKR)) { + if (port->gop_id == 0 && mvpp2_is_xlg(port->phy_interface)) { val = readl(port->base + MVPP22_XLG_CTRL0_REG); val |= MVPP22_XLG_CTRL0_PORT_EN | MVPP22_XLG_CTRL0_MAC_RESET_DIS; @@ -1202,9 +1205,7 @@ static void mvpp2_port_disable(struct mvpp2_port *port) u32 val; /* Only GOP port 0 has an XLG MAC */ - if (port->gop_id == 0 && - (port->phy_interface == PHY_INTERFACE_MODE_XAUI || - port->phy_interface == PHY_INTERFACE_MODE_10GKR)) { + if (port->gop_id == 0 && mvpp2_is_xlg(port->phy_interface)) { val = readl(port->base + MVPP22_XLG_CTRL0_REG); val &= ~MVPP22_XLG_CTRL0_PORT_EN; writel(val, port->base + MVPP22_XLG_CTRL0_REG); @@ -1377,13 +1378,9 @@ static void mvpp2_port_reset(struct mvpp2_port *port) for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++) mvpp2_read_count(port, &mvpp2_ethtool_regs[i]); - val = readl(port->base + MVPP2_GMAC_CTRL_2_REG) & - ~MVPP2_GMAC_PORT_RESET_MASK; + val = readl(port->base + MVPP2_GMAC_CTRL_2_REG) | + MVPP2_GMAC_PORT_RESET_MASK; writel(val, port->base + MVPP2_GMAC_CTRL_2_REG); - - while (readl(port->base + MVPP2_GMAC_CTRL_2_REG) & - MVPP2_GMAC_PORT_RESET_MASK) - continue; } /* Change maximum receive size of the port */ @@ -2459,8 +2456,7 @@ static irqreturn_t mvpp2_link_status_isr(int irq, void *dev_id) mvpp22_gop_mask_irq(port); - if (port->gop_id == 0 && - port->phy_interface == PHY_INTERFACE_MODE_10GKR) { + if (port->gop_id == 0 && mvpp2_is_xlg(port->phy_interface)) { val = readl(port->base + MVPP22_XLG_INT_STAT); if (val & MVPP22_XLG_INT_STAT_LINK) { event = true; @@ -3150,8 +3146,7 @@ static void mvpp22_mode_reconfigure(struct mvpp2_port *port) ctrl3 = readl(port->base + MVPP22_XLG_CTRL3_REG); ctrl3 &= ~MVPP22_XLG_CTRL3_MACMODESELECT_MASK; - if (port->phy_interface == PHY_INTERFACE_MODE_XAUI || - port->phy_interface == PHY_INTERFACE_MODE_10GKR) + if (mvpp2_is_xlg(port->phy_interface)) ctrl3 |= MVPP22_XLG_CTRL3_MACMODESELECT_10G; else ctrl3 |= MVPP22_XLG_CTRL3_MACMODESELECT_GMAC; @@ -3159,9 +3154,7 @@ static void mvpp22_mode_reconfigure(struct mvpp2_port *port) writel(ctrl3, port->base + MVPP22_XLG_CTRL3_REG); } - if (port->gop_id == 0 && - (port->phy_interface == PHY_INTERFACE_MODE_XAUI || - port->phy_interface == PHY_INTERFACE_MODE_10GKR)) + if (port->gop_id == 0 && mvpp2_is_xlg(port->phy_interface)) mvpp2_xlg_max_rx_size_set(port); else mvpp2_gmac_max_rx_size_set(port); @@ -4501,17 +4494,12 @@ static int mvpp2_phylink_mac_link_state(struct net_device *dev, static void mvpp2_mac_an_restart(struct net_device *dev) { struct mvpp2_port *port = netdev_priv(dev); - u32 val; + u32 val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG); - if (port->phy_interface != PHY_INTERFACE_MODE_SGMII) - return; - - val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG); - /* The RESTART_AN bit is cleared by the h/w after restarting the AN - * process. - */ - val |= MVPP2_GMAC_IN_BAND_RESTART_AN | MVPP2_GMAC_IN_BAND_AUTONEG; - writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG); + writel(val | MVPP2_GMAC_IN_BAND_RESTART_AN, + port->base + MVPP2_GMAC_AUTONEG_CONFIG); + writel(val & ~MVPP2_GMAC_IN_BAND_RESTART_AN, + port->base + MVPP2_GMAC_AUTONEG_CONFIG); } static void mvpp2_xlg_config(struct mvpp2_port *port, unsigned int mode, @@ -4524,8 +4512,13 @@ static void mvpp2_xlg_config(struct mvpp2_port *port, unsigned int mode, if (state->pause & MLO_PAUSE_TX) ctrl0 |= MVPP22_XLG_CTRL0_TX_FLOW_CTRL_EN; + else + ctrl0 &= ~MVPP22_XLG_CTRL0_TX_FLOW_CTRL_EN; + if (state->pause & MLO_PAUSE_RX) ctrl0 |= MVPP22_XLG_CTRL0_RX_FLOW_CTRL_EN; + else + ctrl0 &= ~MVPP22_XLG_CTRL0_RX_FLOW_CTRL_EN; ctrl4 &= ~MVPP22_XLG_CTRL4_MACMODSELECT_GMAC; ctrl4 |= MVPP22_XLG_CTRL4_FWD_FC | MVPP22_XLG_CTRL4_FWD_PFC | @@ -4538,83 +4531,130 @@ static void mvpp2_xlg_config(struct mvpp2_port *port, unsigned int mode, static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode, const struct phylink_link_state *state) { - u32 an, ctrl0, ctrl2, ctrl4; - - an = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG); - ctrl0 = readl(port->base + MVPP2_GMAC_CTRL_0_REG); - ctrl2 = readl(port->base + MVPP2_GMAC_CTRL_2_REG); - ctrl4 = readl(port->base + MVPP22_GMAC_CTRL_4_REG); - - /* Force link down */ - an &= ~MVPP2_GMAC_FORCE_LINK_PASS; - an |= MVPP2_GMAC_FORCE_LINK_DOWN; - writel(an, port->base + MVPP2_GMAC_AUTONEG_CONFIG); + u32 old_an, an; + u32 old_ctrl0, ctrl0; + u32 old_ctrl2, ctrl2; + u32 old_ctrl4, ctrl4; - /* Set the GMAC in a reset state */ - ctrl2 |= MVPP2_GMAC_PORT_RESET_MASK; - writel(ctrl2, port->base + MVPP2_GMAC_CTRL_2_REG); + old_an = an = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG); + old_ctrl0 = ctrl0 = readl(port->base + MVPP2_GMAC_CTRL_0_REG); + old_ctrl2 = ctrl2 = readl(port->base + MVPP2_GMAC_CTRL_2_REG); + old_ctrl4 = ctrl4 = readl(port->base + MVPP22_GMAC_CTRL_4_REG); an &= ~(MVPP2_GMAC_CONFIG_MII_SPEED | MVPP2_GMAC_CONFIG_GMII_SPEED | MVPP2_GMAC_AN_SPEED_EN | MVPP2_GMAC_FC_ADV_EN | MVPP2_GMAC_FC_ADV_ASM_EN | MVPP2_GMAC_FLOW_CTRL_AUTONEG | MVPP2_GMAC_CONFIG_FULL_DUPLEX | MVPP2_GMAC_AN_DUPLEX_EN | - MVPP2_GMAC_FORCE_LINK_DOWN); + MVPP2_GMAC_IN_BAND_AUTONEG | MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS); ctrl0 &= ~MVPP2_GMAC_PORT_TYPE_MASK; - ctrl2 &= ~(MVPP2_GMAC_PORT_RESET_MASK | MVPP2_GMAC_PCS_ENABLE_MASK); + ctrl2 &= ~(MVPP2_GMAC_INBAND_AN_MASK | MVPP2_GMAC_PORT_RESET_MASK | + MVPP2_GMAC_PCS_ENABLE_MASK); + ctrl4 &= ~(MVPP22_CTRL4_RX_FC_EN | MVPP22_CTRL4_TX_FC_EN); + /* Configure port type */ if (phy_interface_mode_is_8023z(state->interface)) { - /* 1000BaseX and 2500BaseX ports cannot negotiate speed nor can - * they negotiate duplex: they are always operating with a fixed - * speed of 1000/2500Mbps in full duplex, so force 1000/2500 - * speed and full duplex here. - */ - ctrl0 |= MVPP2_GMAC_PORT_TYPE_MASK; - an |= MVPP2_GMAC_CONFIG_GMII_SPEED | - MVPP2_GMAC_CONFIG_FULL_DUPLEX; - } else if (!phy_interface_mode_is_rgmii(state->interface)) { - an |= MVPP2_GMAC_AN_SPEED_EN | MVPP2_GMAC_FLOW_CTRL_AUTONEG; + ctrl2 |= MVPP2_GMAC_PCS_ENABLE_MASK; + ctrl4 &= ~MVPP22_CTRL4_EXT_PIN_GMII_SEL; + ctrl4 |= MVPP22_CTRL4_SYNC_BYPASS_DIS | + MVPP22_CTRL4_DP_CLK_SEL | + MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE; + } else if (state->interface == PHY_INTERFACE_MODE_SGMII) { + ctrl2 |= MVPP2_GMAC_PCS_ENABLE_MASK | MVPP2_GMAC_INBAND_AN_MASK; + ctrl4 &= ~MVPP22_CTRL4_EXT_PIN_GMII_SEL; + ctrl4 |= MVPP22_CTRL4_SYNC_BYPASS_DIS | + MVPP22_CTRL4_DP_CLK_SEL | + MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE; + } else if (phy_interface_mode_is_rgmii(state->interface)) { + ctrl4 &= ~MVPP22_CTRL4_DP_CLK_SEL; + ctrl4 |= MVPP22_CTRL4_EXT_PIN_GMII_SEL | + MVPP22_CTRL4_SYNC_BYPASS_DIS | + MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE; } - if (state->duplex) - an |= MVPP2_GMAC_CONFIG_FULL_DUPLEX; + /* Configure advertisement bits */ if (phylink_test(state->advertising, Pause)) an |= MVPP2_GMAC_FC_ADV_EN; if (phylink_test(state->advertising, Asym_Pause)) an |= MVPP2_GMAC_FC_ADV_ASM_EN; - if (phy_interface_mode_is_8023z(state->interface) || - state->interface == PHY_INTERFACE_MODE_SGMII) { - an |= MVPP2_GMAC_IN_BAND_AUTONEG; - ctrl2 |= MVPP2_GMAC_INBAND_AN_MASK | MVPP2_GMAC_PCS_ENABLE_MASK; + /* Configure negotiation style */ + if (!phylink_autoneg_inband(mode)) { + /* Phy or fixed speed - no in-band AN */ + if (state->duplex) + an |= MVPP2_GMAC_CONFIG_FULL_DUPLEX; - ctrl4 &= ~(MVPP22_CTRL4_EXT_PIN_GMII_SEL | - MVPP22_CTRL4_RX_FC_EN | MVPP22_CTRL4_TX_FC_EN); - ctrl4 |= MVPP22_CTRL4_SYNC_BYPASS_DIS | - MVPP22_CTRL4_DP_CLK_SEL | - MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE; + if (state->speed == SPEED_1000 || state->speed == SPEED_2500) + an |= MVPP2_GMAC_CONFIG_GMII_SPEED; + else if (state->speed == SPEED_100) + an |= MVPP2_GMAC_CONFIG_MII_SPEED; if (state->pause & MLO_PAUSE_TX) ctrl4 |= MVPP22_CTRL4_TX_FC_EN; if (state->pause & MLO_PAUSE_RX) ctrl4 |= MVPP22_CTRL4_RX_FC_EN; - } else if (phy_interface_mode_is_rgmii(state->interface)) { - an |= MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS; + } else if (state->interface == PHY_INTERFACE_MODE_SGMII) { + /* SGMII in-band mode receives the speed and duplex from + * the PHY. Flow control information is not received. */ + an &= ~(MVPP2_GMAC_FORCE_LINK_DOWN | MVPP2_GMAC_FORCE_LINK_PASS); + an |= MVPP2_GMAC_IN_BAND_AUTONEG | + MVPP2_GMAC_AN_SPEED_EN | + MVPP2_GMAC_AN_DUPLEX_EN; - if (state->speed == SPEED_1000) - an |= MVPP2_GMAC_CONFIG_GMII_SPEED; - else if (state->speed == SPEED_100) - an |= MVPP2_GMAC_CONFIG_MII_SPEED; + if (state->pause & MLO_PAUSE_TX) + ctrl4 |= MVPP22_CTRL4_TX_FC_EN; + if (state->pause & MLO_PAUSE_RX) + ctrl4 |= MVPP22_CTRL4_RX_FC_EN; + } else if (phy_interface_mode_is_8023z(state->interface)) { + /* 1000BaseX and 2500BaseX ports cannot negotiate speed nor can + * they negotiate duplex: they are always operating with a fixed + * speed of 1000/2500Mbps in full duplex, so force 1000/2500 + * speed and full duplex here. + */ + ctrl0 |= MVPP2_GMAC_PORT_TYPE_MASK; + an &= ~(MVPP2_GMAC_FORCE_LINK_DOWN | MVPP2_GMAC_FORCE_LINK_PASS); + an |= MVPP2_GMAC_IN_BAND_AUTONEG | + MVPP2_GMAC_CONFIG_GMII_SPEED | + MVPP2_GMAC_CONFIG_FULL_DUPLEX; - ctrl4 &= ~MVPP22_CTRL4_DP_CLK_SEL; - ctrl4 |= MVPP22_CTRL4_EXT_PIN_GMII_SEL | - MVPP22_CTRL4_SYNC_BYPASS_DIS | - MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE; + if (state->pause & MLO_PAUSE_AN && state->an_enabled) { + an |= MVPP2_GMAC_FLOW_CTRL_AUTONEG; + } else { + if (state->pause & MLO_PAUSE_TX) + ctrl4 |= MVPP22_CTRL4_TX_FC_EN; + if (state->pause & MLO_PAUSE_RX) + ctrl4 |= MVPP22_CTRL4_RX_FC_EN; + } } - writel(ctrl0, port->base + MVPP2_GMAC_CTRL_0_REG); - writel(ctrl2, port->base + MVPP2_GMAC_CTRL_2_REG); - writel(ctrl4, port->base + MVPP22_GMAC_CTRL_4_REG); - writel(an, port->base + MVPP2_GMAC_AUTONEG_CONFIG); + if ((old_ctrl0 ^ ctrl0) & MVPP2_GMAC_PORT_TYPE_MASK || + (old_ctrl2 ^ ctrl2) & MVPP2_GMAC_INBAND_AN_MASK || + (old_an ^ an) & MVPP2_GMAC_IN_BAND_AUTONEG) { + /* Force link down */ + old_an &= ~MVPP2_GMAC_FORCE_LINK_PASS; + old_an |= MVPP2_GMAC_FORCE_LINK_DOWN; + writel(old_an, port->base + MVPP2_GMAC_AUTONEG_CONFIG); + + /* Set the GMAC in a reset state - do this in a way that + * ensures we clear it below. + */ + old_ctrl2 |= MVPP2_GMAC_PORT_RESET_MASK; + writel(old_ctrl2, port->base + MVPP2_GMAC_CTRL_2_REG); + } + + if (old_ctrl0 != ctrl0) + writel(ctrl0, port->base + MVPP2_GMAC_CTRL_0_REG); + if (old_ctrl2 != ctrl2) + writel(ctrl2, port->base + MVPP2_GMAC_CTRL_2_REG); + if (old_ctrl4 != ctrl4) + writel(ctrl4, port->base + MVPP22_GMAC_CTRL_4_REG); + if (old_an != an) + writel(an, port->base + MVPP2_GMAC_AUTONEG_CONFIG); + + if (old_ctrl2 & MVPP2_GMAC_PORT_RESET_MASK) { + while (readl(port->base + MVPP2_GMAC_CTRL_2_REG) & + MVPP2_GMAC_PORT_RESET_MASK) + continue; + } } static void mvpp2_mac_config(struct net_device *dev, unsigned int mode, @@ -4624,7 +4664,7 @@ static void mvpp2_mac_config(struct net_device *dev, unsigned int mode, bool change_interface = port->phy_interface != state->interface; /* Check for invalid configuration */ - if (state->interface == PHY_INTERFACE_MODE_10GKR && port->gop_id != 0) { + if (mvpp2_is_xlg(state->interface) && port->gop_id != 0) { netdev_err(dev, "Invalid mode on %s\n", dev->name); return; } @@ -4644,7 +4684,7 @@ static void mvpp2_mac_config(struct net_device *dev, unsigned int mode, } /* mac (re)configuration */ - if (state->interface == PHY_INTERFACE_MODE_10GKR) + if (mvpp2_is_xlg(state->interface)) mvpp2_xlg_config(port, mode, state); else if (phy_interface_mode_is_rgmii(state->interface) || phy_interface_mode_is_8023z(state->interface) || @@ -4666,12 +4706,10 @@ static void mvpp2_mac_link_up(struct net_device *dev, unsigned int mode, struct mvpp2_port *port = netdev_priv(dev); u32 val; - if (!phylink_autoneg_inband(mode) && - interface != PHY_INTERFACE_MODE_10GKR) { + if (!phylink_autoneg_inband(mode) && !mvpp2_is_xlg(interface)) { val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG); val &= ~MVPP2_GMAC_FORCE_LINK_DOWN; - if (phy_interface_mode_is_rgmii(interface)) - val |= MVPP2_GMAC_FORCE_LINK_PASS; + val |= MVPP2_GMAC_FORCE_LINK_PASS; writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG); } @@ -4688,8 +4726,7 @@ static void mvpp2_mac_link_down(struct net_device *dev, unsigned int mode, struct mvpp2_port *port = netdev_priv(dev); u32 val; - if (!phylink_autoneg_inband(mode) && - interface != PHY_INTERFACE_MODE_10GKR) { + if (!phylink_autoneg_inband(mode) && !mvpp2_is_xlg(interface)) { val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG); val &= ~MVPP2_GMAC_FORCE_LINK_PASS; val |= MVPP2_GMAC_FORCE_LINK_DOWN; diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index f8a6d6e3cb7a..35f2142aac5e 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -201,6 +201,7 @@ struct tx_desc { }; struct pxa168_eth_private { + struct platform_device *pdev; int port_num; /* User Ethernet port number */ int phy_addr; int phy_speed; @@ -331,7 +332,7 @@ static void rxq_refill(struct net_device *dev) used_rx_desc = pep->rx_used_desc_q; p_used_rx_desc = &pep->p_rx_desc_area[used_rx_desc]; size = skb_end_pointer(skb) - skb->data; - p_used_rx_desc->buf_ptr = dma_map_single(NULL, + p_used_rx_desc->buf_ptr = dma_map_single(&pep->pdev->dev, skb->data, size, DMA_FROM_DEVICE); @@ -743,7 +744,7 @@ static int txq_reclaim(struct net_device *dev, int force) netdev_err(dev, "Error in TX\n"); dev->stats.tx_errors++; } - dma_unmap_single(NULL, addr, count, DMA_TO_DEVICE); + dma_unmap_single(&pep->pdev->dev, addr, count, DMA_TO_DEVICE); if (skb) dev_kfree_skb_irq(skb); released++; @@ -805,7 +806,7 @@ static int rxq_process(struct net_device *dev, int budget) if (rx_next_curr_desc == rx_used_desc) pep->rx_resource_err = 1; pep->rx_desc_count--; - dma_unmap_single(NULL, rx_desc->buf_ptr, + dma_unmap_single(&pep->pdev->dev, rx_desc->buf_ptr, rx_desc->buf_size, DMA_FROM_DEVICE); received_packets++; @@ -1274,7 +1275,8 @@ pxa168_eth_start_xmit(struct sk_buff *skb, struct net_device *dev) length = skb->len; pep->tx_skb[tx_index] = skb; desc->byte_cnt = length; - desc->buf_ptr = dma_map_single(NULL, skb->data, length, DMA_TO_DEVICE); + desc->buf_ptr = dma_map_single(&pep->pdev->dev, skb->data, length, + DMA_TO_DEVICE); skb_tx_timestamp(skb); @@ -1528,6 +1530,7 @@ static int pxa168_eth_probe(struct platform_device *pdev) if (err) goto err_free_mdio; + pep->pdev = pdev; SET_NETDEV_DEV(dev, &pdev->dev); pxa168_init_hw(pep); err = register_netdev(dev); diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index 04fd1f135011..654ac534b10e 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -152,8 +152,10 @@ static void skge_get_regs(struct net_device *dev, struct ethtool_regs *regs, memset(p, 0, regs->len); memcpy_fromio(p, io, B3_RAM_ADDR); - memcpy_fromio(p + B3_RI_WTO_R1, io + B3_RI_WTO_R1, - regs->len - B3_RI_WTO_R1); + if (regs->len > B3_RI_WTO_R1) { + memcpy_fromio(p + B3_RI_WTO_R1, io + B3_RI_WTO_R1, + regs->len - B3_RI_WTO_R1); + } } /* Wake on Lan only supported on Yukon chips with rev 1 or above */ diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 9a0881cb7f51..6c01314e87b0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -617,6 +617,8 @@ static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb, } #endif +#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN) + /* We reach this function only after checking that any of * the (IPv4 | IPv6) bits are set in cqe->status. */ @@ -624,9 +626,20 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, netdev_features_t dev_features) { __wsum hw_checksum = 0; + void *hdr; + + /* CQE csum doesn't cover padding octets in short ethernet + * frames. And the pad field is appended prior to calculating + * and appending the FCS field. + * + * Detecting these padded frames requires to verify and parse + * IP headers, so we simply force all those small frames to skip + * checksum complete. + */ + if (short_frame(skb->len)) + return -EINVAL; - void *hdr = (u8 *)va + sizeof(struct ethhdr); - + hdr = (u8 *)va + sizeof(struct ethhdr); hw_checksum = csum_unfold((__force __sum16)cqe->checksum); if (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK) && @@ -819,6 +832,11 @@ xdp_drop_no_cnt: skb_record_rx_queue(skb, cq_ring); if (likely(dev->features & NETIF_F_RXCSUM)) { + /* TODO: For IP non TCP/UDP packets when csum complete is + * not an option (not supported or any other reason) we can + * actually check cqe IPOK status bit and report + * CHECKSUM_UNNECESSARY rather than CHECKSUM_NONE + */ if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP | MLX4_CQE_STATUS_UDP)) && (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index bdb8dd161923..1f6e16d5ea6b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -3981,6 +3981,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) goto err_params_unregister; + devlink_params_publish(devlink); pci_save_state(pdev); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 6bb2a860b15b..82d636baaa4e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o # mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \ - mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ + transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o diag/fs_tracepoint.o diag/fw_tracer.o @@ -35,7 +35,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o # # Core extra # -mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o +mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o ecpf.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 3e0fa8a8077b..be48c6440251 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -316,6 +316,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT: case MLX5_CMD_OP_DEALLOC_MEMIC: case MLX5_CMD_OP_PAGE_FAULT_RESUME: + case MLX5_CMD_OP_QUERY_HOST_PARAMS: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -627,6 +628,7 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT); MLX5_COMMAND_STR_CASE(ALLOC_MEMIC); MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC); + MLX5_COMMAND_STR_CASE(QUERY_HOST_PARAMS); default: return "unknown command opcode"; } } @@ -1583,6 +1585,24 @@ no_trig: spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); } +void mlx5_cmd_flush(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + int i; + + for (i = 0; i < cmd->max_reg_cmds; i++) + while (down_trylock(&cmd->sem)) + mlx5_cmd_trigger_completions(dev); + + while (down_trylock(&cmd->pages_sem)) + mlx5_cmd_trigger_completions(dev); + + /* Unlock cmdif */ + up(&cmd->pages_sem); + for (i = 0; i < cmd->max_reg_cmds; i++) + up(&cmd->sem); +} + static int status_to_err(u8 status) { return status ? -1 : 0; /* TBD more meaningful codes */ @@ -1711,12 +1731,57 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, } EXPORT_SYMBOL(mlx5_cmd_exec); -int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, - void *out, int out_size, mlx5_cmd_cbk_t callback, - void *context) +void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, + struct mlx5_async_ctx *ctx) +{ + ctx->dev = dev; + /* Starts at 1 to avoid doing wake_up if we are not cleaning up */ + atomic_set(&ctx->num_inflight, 1); + init_waitqueue_head(&ctx->wait); +} +EXPORT_SYMBOL(mlx5_cmd_init_async_ctx); + +/** + * mlx5_cmd_cleanup_async_ctx - Clean up an async_ctx + * @ctx: The ctx to clean + * + * Upon return all callbacks given to mlx5_cmd_exec_cb() have been called. The + * caller must ensure that mlx5_cmd_exec_cb() is not called during or after + * the call mlx5_cleanup_async_ctx(). + */ +void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx) +{ + atomic_dec(&ctx->num_inflight); + wait_event(ctx->wait, atomic_read(&ctx->num_inflight) == 0); +} +EXPORT_SYMBOL(mlx5_cmd_cleanup_async_ctx); + +static void mlx5_cmd_exec_cb_handler(int status, void *_work) +{ + struct mlx5_async_work *work = _work; + struct mlx5_async_ctx *ctx = work->ctx; + + work->user_callback(status, work); + if (atomic_dec_and_test(&ctx->num_inflight)) + wake_up(&ctx->wait); +} + +int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, + void *out, int out_size, mlx5_async_cbk_t callback, + struct mlx5_async_work *work) { - return cmd_exec(dev, in, in_size, out, out_size, callback, context, - false); + int ret; + + work->ctx = ctx; + work->user_callback = callback; + if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight))) + return -EIO; + ret = cmd_exec(ctx->dev, in, in_size, out, out_size, + mlx5_cmd_exec_cb_handler, work, false); + if (ret && atomic_dec_and_test(&ctx->num_inflight)) + wake_up(&ctx->wait); + + return ret; } EXPORT_SYMBOL(mlx5_cmd_exec_cb); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c new file mode 100644 index 000000000000..4746f2d28fb6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "ecpf.h" + +bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) +{ + return (ioread32be(&dev->iseg->initializing) >> MLX5_ECPU_BIT_NUM) & 1; +} + +static int mlx5_peer_pf_enable_hca(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {}; + + MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); + MLX5_SET(enable_hca_in, in, function_id, 0); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0); + return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); +} + +static int mlx5_peer_pf_disable_hca(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {}; + + MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); + MLX5_SET(disable_hca_in, in, function_id, 0); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_peer_pf_init(struct mlx5_core_dev *dev) +{ + int err; + + err = mlx5_peer_pf_enable_hca(dev); + if (err) + mlx5_core_err(dev, "Failed to enable peer PF HCA err(%d)\n", + err); + + return err; +} + +static void mlx5_peer_pf_cleanup(struct mlx5_core_dev *dev) +{ + int err; + + err = mlx5_peer_pf_disable_hca(dev); + if (err) { + mlx5_core_err(dev, "Failed to disable peer PF HCA err(%d)\n", + err); + return; + } + + err = mlx5_wait_for_pages(dev, &dev->priv.peer_pf_pages); + if (err) + mlx5_core_warn(dev, "Timeout reclaiming peer PF pages err(%d)\n", + err); +} + +int mlx5_ec_init(struct mlx5_core_dev *dev) +{ + int err = 0; + + if (!mlx5_core_is_ecpf(dev)) + return 0; + + /* ECPF shall enable HCA for peer PF in the same way a PF + * does this for its VFs. + */ + err = mlx5_peer_pf_init(dev); + if (err) + return err; + + return 0; +} + +void mlx5_ec_cleanup(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_ecpf(dev)) + return; + + mlx5_peer_pf_cleanup(dev); +} + +static int mlx5_query_host_params_context(struct mlx5_core_dev *dev, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_host_params_in)] = {}; + + MLX5_SET(query_host_params_in, in, opcode, + MLX5_CMD_OP_QUERY_HOST_PARAMS); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + +int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf) +{ + u32 out[MLX5_ST_SZ_DW(query_host_params_out)] = {}; + int err; + + err = mlx5_query_host_params_context(dev, out, sizeof(out)); + if (err) + return err; + + *num_vf = MLX5_GET(query_host_params_out, out, + host_params_context.host_num_of_vfs); + mlx5_core_dbg(dev, "host_num_of_vfs %d\n", *num_vf); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h new file mode 100644 index 000000000000..346372df218f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_ECPF_H__ +#define __MLX5_ECPF_H__ + +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" + +#ifdef CONFIG_MLX5_ESWITCH + +enum { + MLX5_ECPU_BIT_NUM = 23, +}; + +bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev); +int mlx5_ec_init(struct mlx5_core_dev *dev); +void mlx5_ec_cleanup(struct mlx5_core_dev *dev); +int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf); + +#else /* CONFIG_MLX5_ESWITCH */ + +static inline bool +mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; } +static inline int mlx5_ec_init(struct mlx5_core_dev *dev) { return 0; } +static inline void mlx5_ec_cleanup(struct mlx5_core_dev *dev) {} +static inline int +mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf) +{ return -EOPNOTSUPP; } + +#endif /* CONFIG_MLX5_ESWITCH */ + +#endif /* __MLX5_ECPF_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 66e510b16243..e9acfa9aa069 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -655,6 +655,7 @@ struct mlx5e_channel_stats { enum { MLX5E_STATE_OPENED, MLX5E_STATE_DESTROYING, + MLX5E_STATE_XDP_TX_ENABLED, }; struct mlx5e_rqt { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 4a37713023be..122927f3a600 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -63,66 +63,168 @@ static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = { [MLX5E_50GBASE_KR2] = 50000, }; -u32 mlx5e_port_ptys2speed(u32 eth_proto_oper) +static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = { + [MLX5E_SGMII_100M] = 100, + [MLX5E_1000BASE_X_SGMII] = 1000, + [MLX5E_5GBASE_R] = 5000, + [MLX5E_10GBASE_XFI_XAUI_1] = 10000, + [MLX5E_40GBASE_XLAUI_4_XLPPI_4] = 40000, + [MLX5E_25GAUI_1_25GBASE_CR_KR] = 25000, + [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2] = 50000, + [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR] = 50000, + [MLX5E_CAUI_4_100GBASE_CR4_KR4] = 100000, + [MLX5E_200GAUI_4_200GBASE_CR4_KR4] = 200000, + [MLX5E_400GAUI_8] = 400000, +}; + +static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev, + const u32 **arr, u32 *size) +{ + bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + + *size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) : + ARRAY_SIZE(mlx5e_link_speed); + *arr = ext ? mlx5e_ext_link_speed : mlx5e_link_speed; +} + +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, + struct mlx5e_port_eth_proto *eproto) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + if (!eproto) + return -EINVAL; + + if (ext != MLX5_CAP_PCAM_FEATURE(dev, ptys_extended_ethernet)) + return -EOPNOTSUPP; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port); + if (err) + return err; + + eproto->cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, + eth_proto_capability); + eproto->admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_admin); + eproto->oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper); + return 0; +} + +void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status, + u8 *an_disable_cap, u8 *an_disable_admin) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + + *an_status = 0; + *an_disable_cap = 0; + *an_disable_admin = 0; + + if (mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, 1)) + return; + + *an_status = MLX5_GET(ptys_reg, out, an_status); + *an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap); + *an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); +} + +int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, + u32 proto_admin, bool ext) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + u32 in[MLX5_ST_SZ_DW(ptys_reg)]; + u8 an_disable_admin; + u8 an_disable_cap; + u8 an_status; + + mlx5_port_query_eth_autoneg(dev, &an_status, &an_disable_cap, + &an_disable_admin); + if (!an_disable_cap && an_disable) + return -EPERM; + + memset(in, 0, sizeof(in)); + + MLX5_SET(ptys_reg, in, local_port, 1); + MLX5_SET(ptys_reg, in, an_disable_admin, an_disable); + MLX5_SET(ptys_reg, in, proto_mask, MLX5_PTYS_EN); + if (ext) + MLX5_SET(ptys_reg, in, ext_eth_proto_admin, proto_admin); + else + MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PTYS, 0, 1); +} + +u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper) { unsigned long temp = eth_proto_oper; + const u32 *table; u32 speed = 0; + u32 max_size; int i; - i = find_first_bit(&temp, MLX5E_LINK_MODES_NUMBER); - if (i < MLX5E_LINK_MODES_NUMBER) - speed = mlx5e_link_speed[i]; - + mlx5e_port_get_speed_arr(mdev, &table, &max_size); + i = find_first_bit(&temp, max_size); + if (i < max_size) + speed = table[i]; return speed; } int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) { - u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {}; - u32 eth_proto_oper; + struct mlx5e_port_eth_proto eproto; + bool ext; int err; - err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); + ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); if (err) - return err; + goto out; - eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); - *speed = mlx5e_port_ptys2speed(eth_proto_oper); + *speed = mlx5e_port_ptys2speed(mdev, eproto.oper); if (!(*speed)) err = -EINVAL; +out: return err; } int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) { + struct mlx5e_port_eth_proto eproto; u32 max_speed = 0; - u32 proto_cap; + const u32 *table; + u32 max_size; + bool ext; int err; int i; - err = mlx5_query_port_proto_cap(mdev, &proto_cap, MLX5_PTYS_EN); + ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); if (err) return err; - for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) - if (proto_cap & MLX5E_PROT_MASK(i)) - max_speed = max(max_speed, mlx5e_link_speed[i]); + mlx5e_port_get_speed_arr(mdev, &table, &max_size); + for (i = 0; i < max_size; ++i) + if (eproto.cap & MLX5E_PROT_MASK(i)) + max_speed = max(max_speed, table[i]); *speed = max_speed; return 0; } -u32 mlx5e_port_speed2linkmodes(u32 speed) +u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed) { u32 link_modes = 0; + const u32 *table; + u32 max_size; int i; - for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { - if (mlx5e_link_speed[i] == speed) + mlx5e_port_get_speed_arr(mdev, &table, &max_size); + for (i = 0; i < max_size; ++i) { + if (table[i] == speed) link_modes |= MLX5E_PROT_MASK(i); } - return link_modes; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h index cd2160b8c9bf..70f536ec51c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -36,10 +36,22 @@ #include <linux/mlx5/driver.h> #include "en.h" -u32 mlx5e_port_ptys2speed(u32 eth_proto_oper); +struct mlx5e_port_eth_proto { + u32 cap; + u32 admin; + u32 oper; +}; + +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, + struct mlx5e_port_eth_proto *eproto); +void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status, + u8 *an_disable_cap, u8 *an_disable_admin); +int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, + u32 proto_admin, bool ext); +u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper); int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); -u32 mlx5e_port_speed2linkmodes(u32 speed); +u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed); int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out); int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 9e71f4d41b82..bdcc5e79328d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -256,6 +256,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, e->m_neigh.family = n->ops->family; memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); e->out_dev = out_dev; + e->route_dev = route_dev; /* It's important to add the neigh to the hash table before checking * the neigh validity state. So if we'll get a notification, in case the @@ -369,6 +370,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, e->m_neigh.family = n->ops->family; memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); e->out_dev = out_dev; + e->route_dev = route_dev; /* It's importent to add the neigh to the hash table before checking * the neigh validity state. So if we'll get a notification, in case the @@ -602,16 +604,18 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, struct mlx5_flow_spec *spec, struct tc_cls_flower_offload *f, void *headers_c, - void *headers_v) + void *headers_v, u8 *match_level) { int tunnel_type; int err = 0; tunnel_type = mlx5e_tc_tun_get_type(filter_dev); if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { + *match_level = MLX5_MATCH_L4; err = mlx5e_tc_tun_parse_vxlan(priv, spec, f, headers_c, headers_v); } else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { + *match_level = MLX5_MATCH_L3; err = mlx5e_tc_tun_parse_gretap(priv, spec, f, headers_c, headers_v); } else { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index 706ce7bf15e7..b63f15de899d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -39,6 +39,6 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, struct mlx5_flow_spec *spec, struct tc_cls_flower_offload *f, void *headers_c, - void *headers_v); + void *headers_v, u8 *match_level); #endif //__MLX5_EN_TC_TUNNEL_H__ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 3740177eed09..03b2a9f9c589 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -365,7 +365,8 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, int sq_num; int i; - if (unlikely(!test_bit(MLX5E_STATE_OPENED, &priv->state))) + /* this flag is sufficient, no need to test internal sq state */ + if (unlikely(!mlx5e_xdp_tx_is_enabled(priv))) return -ENETDOWN; if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) @@ -378,9 +379,6 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, sq = &priv->channels.c[sq_num]->xdpsq; - if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) - return -ENETDOWN; - for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; struct mlx5e_xdp_info xdpi; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index 3a67cb3cd179..ee27a7c8cd87 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -50,6 +50,23 @@ void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq); int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags); +static inline void mlx5e_xdp_tx_enable(struct mlx5e_priv *priv) +{ + set_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); +} + +static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv) +{ + clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); + /* let other device's napi(s) see our new state */ + synchronize_rcu(); +} + +static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv) +{ + return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); +} + static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq) { if (sq->doorbell_cseg) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 3bbccead2f63..3cd73254d020 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -354,9 +354,6 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, new_channels.params = priv->channels.params; new_channels.params.num_channels = count; - if (!netif_is_rxfh_configured(priv->netdev)) - mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, - MLX5E_INDIR_RQT_SIZE, count); if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { priv->channels.params = new_channels.params; @@ -372,6 +369,10 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, if (arfs_enabled) mlx5e_arfs_disable(priv); + if (!netif_is_rxfh_configured(priv->netdev)) + mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, + MLX5E_INDIR_RQT_SIZE, count); + /* Switch to new channels, set new parameters and close old ones */ mlx5e_switch_priv_channels(priv, &new_channels, NULL); @@ -695,13 +696,14 @@ static void get_speed_duplex(struct net_device *netdev, u32 eth_proto_oper, struct ethtool_link_ksettings *link_ksettings) { + struct mlx5e_priv *priv = netdev_priv(netdev); u32 speed = SPEED_UNKNOWN; u8 duplex = DUPLEX_UNKNOWN; if (!netif_carrier_ok(netdev)) goto out; - speed = mlx5e_port_ptys2speed(eth_proto_oper); + speed = mlx5e_port_ptys2speed(priv->mdev, eth_proto_oper); if (!speed) { speed = SPEED_UNKNOWN; goto out; @@ -885,7 +887,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, const struct ethtool_link_ksettings *link_ksettings) { struct mlx5_core_dev *mdev = priv->mdev; - u32 eth_proto_cap, eth_proto_admin; + struct mlx5e_port_eth_proto eproto; bool an_changes = false; u8 an_disable_admin; u8 an_disable_cap; @@ -899,16 +901,16 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, link_modes = link_ksettings->base.autoneg == AUTONEG_ENABLE ? mlx5e_ethtool2ptys_adver_link(link_ksettings->link_modes.advertising) : - mlx5e_port_speed2linkmodes(speed); + mlx5e_port_speed2linkmodes(mdev, speed); - err = mlx5_query_port_proto_cap(mdev, ð_proto_cap, MLX5_PTYS_EN); + err = mlx5_port_query_eth_proto(mdev, 1, false, &eproto); if (err) { - netdev_err(priv->netdev, "%s: query port eth proto cap failed: %d\n", + netdev_err(priv->netdev, "%s: query port eth proto failed: %d\n", __func__, err); goto out; } - link_modes = link_modes & eth_proto_cap; + link_modes = link_modes & eproto.cap; if (!link_modes) { netdev_err(priv->netdev, "%s: Not supported link mode(s) requested", __func__); @@ -916,24 +918,17 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, goto out; } - err = mlx5_query_port_proto_admin(mdev, ð_proto_admin, MLX5_PTYS_EN); - if (err) { - netdev_err(priv->netdev, "%s: query port eth proto admin failed: %d\n", - __func__, err); - goto out; - } - - mlx5_query_port_autoneg(mdev, MLX5_PTYS_EN, &an_status, - &an_disable_cap, &an_disable_admin); + mlx5_port_query_eth_autoneg(mdev, &an_status, &an_disable_cap, + &an_disable_admin); an_disable = link_ksettings->base.autoneg == AUTONEG_DISABLE; an_changes = ((!an_disable && an_disable_admin) || (an_disable && !an_disable_admin)); - if (!an_changes && link_modes == eth_proto_admin) + if (!an_changes && link_modes == eproto.admin) goto out; - mlx5_set_port_ptys(mdev, an_disable, link_modes, MLX5_PTYS_EN); + mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, false); mlx5_toggle_port_link(mdev); out: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d81ebba7c04e..83510ca0bcd8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2859,6 +2859,7 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) mlx5e_build_tx2sq_maps(priv); mlx5e_activate_channels(&priv->channels); + mlx5e_xdp_tx_enable(priv); netif_tx_start_all_queues(priv->netdev); if (mlx5e_is_vport_rep(priv)) @@ -2880,6 +2881,7 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) */ netif_tx_stop_all_queues(priv->netdev); netif_tx_disable(priv->netdev); + mlx5e_xdp_tx_disable(priv); mlx5e_deactivate_channels(&priv->channels); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 0b1988b330f3..287d48e5b073 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -153,7 +153,7 @@ static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; - if (rep->vport == FDB_UPLINK_VPORT) + if (rep->vport == MLX5_VPORT_UPLINK) mlx5e_uplink_rep_update_hw_counters(priv); else mlx5e_vf_rep_update_hw_counters(priv); @@ -579,6 +579,10 @@ static void mlx5e_rep_update_flows(struct mlx5e_priv *priv, if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) { ether_addr_copy(e->h_dest, ha); ether_addr_copy(eth->h_dest, ha); + /* Update the encap source mac, in case that we delete + * the flows when encap source mac changed. + */ + ether_addr_copy(eth->h_source, e->route_dev->dev_addr); mlx5e_tc_encap_flows_add(priv, e); } @@ -1079,7 +1083,8 @@ static int mlx5e_vf_rep_open(struct net_device *dev) if (!mlx5_modify_vport_admin_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - rep->vport, MLX5_VPORT_ADMIN_STATE_UP)) + rep->vport, 1, + MLX5_VPORT_ADMIN_STATE_UP)) netif_carrier_on(dev); unlock: @@ -1097,7 +1102,8 @@ static int mlx5e_vf_rep_close(struct net_device *dev) mutex_lock(&priv->state_lock); mlx5_modify_vport_admin_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - rep->vport, MLX5_VPORT_ADMIN_STATE_DOWN); + rep->vport, 1, + MLX5_VPORT_ADMIN_STATE_DOWN); ret = mlx5e_close_locked(dev); mutex_unlock(&priv->state_lock); return ret; @@ -1115,7 +1121,7 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev, if (ret) return ret; - if (rep->vport == FDB_UPLINK_VPORT) + if (rep->vport == MLX5_VPORT_UPLINK) ret = snprintf(buf, len, "p%d", pf_num); else ret = snprintf(buf, len, "pf%dvf%d", pf_num, rep->vport - 1); @@ -1202,7 +1208,7 @@ bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) return false; rep = rpriv->rep; - return (rep->vport == FDB_UPLINK_VPORT); + return (rep->vport == MLX5_VPORT_UPLINK); } static bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id) @@ -1340,7 +1346,7 @@ static void mlx5e_build_rep_params(struct net_device *netdev) params->sw_mtu = netdev->mtu; /* SQ */ - if (rep->vport == FDB_UPLINK_VPORT) + if (rep->vport == MLX5_VPORT_UPLINK) params->log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; else params->log_sq_size = MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE; @@ -1367,7 +1373,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_core_dev *mdev = priv->mdev; - if (rep->vport == FDB_UPLINK_VPORT) { + if (rep->vport == MLX5_VPORT_UPLINK) { SET_NETDEV_DEV(netdev, &priv->mdev->pdev->dev); netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep; /* we want a persistent mac for the uplink rep */ @@ -1397,7 +1403,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_TSO6; netdev->hw_features |= NETIF_F_RXCSUM; - if (rep->vport != FDB_UPLINK_VPORT) + if (rep->vport != MLX5_VPORT_UPLINK) netdev->features |= NETIF_F_VLAN_CHALLENGED; netdev->features |= netdev->hw_features; @@ -1550,7 +1556,7 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) return err; } - if (rpriv->rep->vport == FDB_UPLINK_VPORT) { + if (rpriv->rep->vport == MLX5_VPORT_UPLINK) { uplink_priv = &rpriv->uplink_priv; /* init shared tc flow table */ @@ -1586,7 +1592,7 @@ static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv) for (tc = 0; tc < priv->profile->max_tc; tc++) mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); - if (rpriv->rep->vport == FDB_UPLINK_VPORT) { + if (rpriv->rep->vport == MLX5_VPORT_UPLINK) { /* clean indirect TC block notifications */ unregister_netdevice_notifier(&rpriv->uplink_priv.netdevice_nb); mlx5e_rep_indr_clean_block_privs(rpriv); @@ -1705,7 +1711,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) rpriv->rep = rep; nch = mlx5e_get_max_num_channels(dev); - profile = (rep->vport == FDB_UPLINK_VPORT) ? &mlx5e_uplink_rep_profile : &mlx5e_vf_rep_profile; + profile = (rep->vport == MLX5_VPORT_UPLINK) ? &mlx5e_uplink_rep_profile : &mlx5e_vf_rep_profile; netdev = mlx5e_create_netdev(dev, profile, nch, rpriv); if (!netdev) { pr_warn("Failed to create representor netdev for vport %d\n", @@ -1718,7 +1724,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) rep->rep_if[REP_ETH].priv = rpriv; INIT_LIST_HEAD(&rpriv->vport_sqs_list); - if (rep->vport == FDB_UPLINK_VPORT) { + if (rep->vport == MLX5_VPORT_UPLINK) { err = mlx5e_create_mdev_resources(dev); if (err) goto err_destroy_netdev; @@ -1754,7 +1760,7 @@ err_detach_netdev: mlx5e_detach_netdev(netdev_priv(netdev)); err_destroy_mdev_resources: - if (rep->vport == FDB_UPLINK_VPORT) + if (rep->vport == MLX5_VPORT_UPLINK) mlx5e_destroy_mdev_resources(dev); err_destroy_netdev: @@ -1774,7 +1780,7 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep) unregister_netdev(netdev); mlx5e_rep_neigh_cleanup(rpriv); mlx5e_detach_netdev(priv); - if (rep->vport == FDB_UPLINK_VPORT) + if (rep->vport == MLX5_VPORT_UPLINK) mlx5e_destroy_mdev_resources(priv->mdev); mlx5e_destroy_netdev(priv); kfree(ppriv); /* mlx5e_rep_priv */ @@ -1792,25 +1798,18 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep) void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(mdev); - int vport; + struct mlx5_eswitch_rep_if rep_if = {}; - for (vport = 0; vport < total_vfs; vport++) { - struct mlx5_eswitch_rep_if rep_if = {}; + rep_if.load = mlx5e_vport_rep_load; + rep_if.unload = mlx5e_vport_rep_unload; + rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev; - rep_if.load = mlx5e_vport_rep_load; - rep_if.unload = mlx5e_vport_rep_unload; - rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev; - mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH); - } + mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_ETH); } void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(mdev); - int vport; - for (vport = total_vfs - 1; vport >= 0; vport--) - mlx5_eswitch_unregister_vport_rep(esw, vport, REP_ETH); + mlx5_eswitch_unregister_vport_reps(esw, REP_ETH); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index edd722824697..36eafc877e6b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -148,6 +148,7 @@ struct mlx5e_encap_entry { unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ struct net_device *out_dev; + struct net_device *route_dev; int tunnel_type; int tunnel_hlen; int reformat_type; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 85c5dd7fc2c7..05892fc4e03f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -127,6 +127,7 @@ struct mlx5e_tc_flow_parse_attr { struct net_device *filter_dev; struct mlx5_flow_spec spec; int num_mod_hdr_actions; + int max_mod_hdr_actions; void *mod_hdr_actions; int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; }; @@ -1301,7 +1302,7 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, static int parse_tunnel_attr(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct tc_cls_flower_offload *f, - struct net_device *filter_dev) + struct net_device *filter_dev, u8 *match_level) { struct netlink_ext_ack *extack = f->common.extack; void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, @@ -1313,7 +1314,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, int err; err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, - headers_c, headers_v); + headers_c, headers_v, match_level); if (err) { NL_SET_ERR_MSG_MOD(extack, "failed to parse tunnel attributes"); @@ -1413,7 +1414,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct tc_cls_flower_offload *f, struct net_device *filter_dev, - u8 *match_level) + u8 *match_level, u8 *tunnel_match_level) { struct netlink_ext_ack *extack = f->common.extack; void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, @@ -1464,7 +1465,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, switch (match.key->addr_type) { case FLOW_DISSECTOR_KEY_IPV4_ADDRS: case FLOW_DISSECTOR_KEY_IPV6_ADDRS: - if (parse_tunnel_attr(priv, spec, f, filter_dev)) + if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level)) return -EOPNOTSUPP; break; default: @@ -1767,15 +1768,15 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_core_dev *dev = priv->mdev; struct mlx5_eswitch *esw = dev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; + u8 match_level, tunnel_match_level = MLX5_MATCH_NONE; struct mlx5_eswitch_rep *rep; - u8 match_level; int err; - err = __parse_cls_flower(priv, spec, f, filter_dev, &match_level); + err = __parse_cls_flower(priv, spec, f, filter_dev, &match_level, &tunnel_match_level); if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) { rep = rpriv->rep; - if (rep->vport != FDB_UPLINK_VPORT && + if (rep->vport != MLX5_VPORT_UPLINK && (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && esw->offloads.inline_mode < match_level)) { NL_SET_ERR_MSG_MOD(extack, @@ -1787,10 +1788,12 @@ static int parse_cls_flower(struct mlx5e_priv *priv, } } - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { flow->esw_attr->match_level = match_level; - else + flow->esw_attr->tunnel_match_level = tunnel_match_level; + } else { flow->nic_attr->match_level = match_level; + } return err; } @@ -1880,9 +1883,9 @@ static struct mlx5_fields fields[] = { OFFLOAD(UDP_DPORT, 2, udp.dest, 0), }; -/* On input attr->num_mod_hdr_actions tells how many HW actions can be parsed at - * max from the SW pedit action. On success, it says how many HW actions were - * actually parsed. +/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at + * max from the SW pedit action. On success, attr->num_mod_hdr_actions + * says how many HW actions were actually parsed. */ static int offload_pedit_fields(struct pedit_headers_action *hdrs, struct mlx5e_tc_flow_parse_attr *parse_attr, @@ -1905,9 +1908,11 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, add_vals = &hdrs[1].vals; action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); - action = parse_attr->mod_hdr_actions; - max_actions = parse_attr->num_mod_hdr_actions; - nactions = 0; + action = parse_attr->mod_hdr_actions + + parse_attr->num_mod_hdr_actions * action_size; + + max_actions = parse_attr->max_mod_hdr_actions; + nactions = parse_attr->num_mod_hdr_actions; for (i = 0; i < ARRAY_SIZE(fields); i++) { f = &fields[i]; @@ -2020,7 +2025,7 @@ static int alloc_mod_hdr_actions(struct mlx5e_priv *priv, if (!parse_attr->mod_hdr_actions) return -ENOMEM; - parse_attr->num_mod_hdr_actions = max_actions; + parse_attr->max_mod_hdr_actions = max_actions; return 0; } @@ -2069,9 +2074,11 @@ static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, int err; u8 cmd; - err = alloc_mod_hdr_actions(priv, hdrs, namespace, parse_attr); - if (err) - goto out_err; + if (!parse_attr->mod_hdr_actions) { + err = alloc_mod_hdr_actions(priv, hdrs, namespace, parse_attr); + if (err) + goto out_err; + } err = offload_pedit_fields(hdrs, parse_attr, extack); if (err < 0) @@ -2129,6 +2136,7 @@ static bool csum_offload_supported(struct mlx5e_priv *priv, static bool modify_header_match_supported(struct mlx5_flow_spec *spec, struct flow_action *flow_action, + u32 actions, struct netlink_ext_ack *extack) { const struct flow_action_entry *act; @@ -2138,7 +2146,11 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, u16 ethertype; int i; - headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + if (actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers); + else + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); /* for non-IP we only re-write MACs, so we're okay */ @@ -2191,7 +2203,7 @@ static bool actions_match_supported(struct mlx5e_priv *priv, if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) return modify_header_match_supported(&parse_attr->spec, - flow_action, + flow_action, actions, extack); return true; @@ -2681,7 +2693,7 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, int flags) static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) { struct mlx5_esw_flow_attr *attr = flow->esw_attr; - bool is_rep_ingress = attr->in_rep->vport != FDB_UPLINK_VPORT && + bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK && flow->flags & MLX5E_TC_FLOW_INGRESS; bool act_is_encap = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT); @@ -2804,7 +2816,7 @@ static int mlx5e_tc_add_fdb_peer_flow(struct tc_cls_flower_offload *f, * original flow and packets redirected from uplink use the * peer mdev. */ - if (flow->esw_attr->in_rep->vport == FDB_UPLINK_VPORT) + if (flow->esw_attr->in_rep->vport == MLX5_VPORT_UPLINK) in_mdev = peer_priv->mdev; else in_mdev = priv->mdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 189211295e48..c1334a8ac8f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -387,8 +387,14 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); if (unlikely(contig_wqebbs_room < num_wqebbs)) { +#ifdef CONFIG_MLX5_EN_IPSEC + struct mlx5_wqe_eth_seg cur_eth = wqe->eth; +#endif mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); mlx5e_sq_fetch_wqe(sq, &wqe, &pi); +#ifdef CONFIG_MLX5_EN_IPSEC + wqe->eth = cur_eth; +#endif } /* fill wqe */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index ee04aab65a9f..bb6e5b5d9681 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -34,6 +34,7 @@ #include <linux/notifier.h> #include <linux/module.h> #include <linux/mlx5/driver.h> +#include <linux/mlx5/vport.h> #include <linux/mlx5/eq.h> #include <linux/mlx5/cmd.h> #ifdef CONFIG_RFS_ACCEL @@ -114,11 +115,11 @@ static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) struct mlx5_cq_table *table = &eq->cq_table; struct mlx5_core_cq *cq = NULL; - spin_lock(&table->lock); + rcu_read_lock(); cq = radix_tree_lookup(&table->tree, cqn); if (likely(cq)) mlx5_cq_hold(cq); - spin_unlock(&table->lock); + rcu_read_unlock(); return cq; } @@ -371,9 +372,9 @@ int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) struct mlx5_cq_table *table = &eq->cq_table; int err; - spin_lock_irq(&table->lock); + spin_lock(&table->lock); err = radix_tree_insert(&table->tree, cq->cqn, cq); - spin_unlock_irq(&table->lock); + spin_unlock(&table->lock); return err; } @@ -383,9 +384,9 @@ int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) struct mlx5_cq_table *table = &eq->cq_table; struct mlx5_core_cq *tmp; - spin_lock_irq(&table->lock); + spin_lock(&table->lock); tmp = radix_tree_delete(&table->tree, cq->cqn); - spin_unlock_irq(&table->lock); + spin_unlock(&table->lock); if (!tmp) { mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn); @@ -530,6 +531,9 @@ static u64 gather_async_events_mask(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER); + if (mlx5_core_is_ecpf_esw_manager(dev)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE); + return async_event_mask; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 5b492b67f4e1..e18af31336e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -39,8 +39,7 @@ #include "lib/eq.h" #include "eswitch.h" #include "fs_core.h" - -#define UPLINK_VPORT 0xFFFF +#include "ecpf.h" enum { MLX5_ACTION_NONE = 0, @@ -52,7 +51,7 @@ enum { struct vport_addr { struct l2addr_node node; u8 action; - u32 vport; + u16 vport; struct mlx5_flow_handle *flow_rule; bool mpfs; /* UC MAC was added to MPFs */ /* A flag indicating that mac was added due to mc promiscuous vport */ @@ -70,6 +69,28 @@ enum { MC_ADDR_CHANGE | \ PROMISC_CHANGE) +/* The vport getter/iterator are only valid after esw->total_vports + * and vport->vport are initialized in mlx5_eswitch_init. + */ +#define mlx5_esw_for_all_vports(esw, i, vport) \ + for ((i) = MLX5_VPORT_PF; \ + (vport) = &(esw)->vports[i], \ + (i) < (esw)->total_vports; (i)++) + +#define mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) \ + for ((i) = MLX5_VPORT_FIRST_VF; \ + (vport) = &(esw)->vports[i], \ + (i) <= (nvfs); (i)++) + +static struct mlx5_vport *mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, + u16 vport_num) +{ + u16 idx = mlx5_eswitch_vport_num_to_index(esw, vport_num); + + WARN_ON(vport_num > esw->total_vports - 1); + return &esw->vports[idx]; +} + static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, u32 events_mask) { @@ -115,7 +136,7 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } -static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, +static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport, u16 vlan, u8 qos, u8 set_flags) { u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0}; @@ -152,7 +173,7 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, /* E-Switch FDB */ static struct mlx5_flow_handle * -__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, +__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u16 vport, bool rx_rule, u8 mac_c[ETH_ALEN], u8 mac_v[ETH_ALEN]) { int match_header = (is_zero_ether_addr(mac_c) ? 0 : @@ -188,7 +209,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, misc_parameters); mc_misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - MLX5_SET(fte_match_set_misc, mv_misc, source_port, UPLINK_VPORT); + MLX5_SET(fte_match_set_misc, mv_misc, source_port, MLX5_VPORT_UPLINK); MLX5_SET_TO_ONES(fte_match_set_misc, mc_misc, source_port); } @@ -215,7 +236,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, } static struct mlx5_flow_handle * -esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) +esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u16 vport) { u8 mac_c[ETH_ALEN]; @@ -224,7 +245,7 @@ esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) } static struct mlx5_flow_handle * -esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport) +esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u16 vport) { u8 mac_c[ETH_ALEN]; u8 mac_v[ETH_ALEN]; @@ -237,7 +258,7 @@ esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport) } static struct mlx5_flow_handle * -esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport) +esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u16 vport) { u8 mac_c[ETH_ALEN]; u8 mac_v[ETH_ALEN]; @@ -377,19 +398,19 @@ typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) { u8 *mac = vaddr->node.addr; - u32 vport = vaddr->vport; + u16 vport = vaddr->vport; int err; - /* Skip mlx5_mpfs_add_mac for PFs, - * it is already done by the PF netdev in mlx5e_execute_l2_action + /* Skip mlx5_mpfs_add_mac for eswitch_managers, + * it is already done by its netdev in mlx5e_execute_l2_action */ - if (!vport) + if (esw->manager_vport == vport) goto fdb_add; err = mlx5_mpfs_add_mac(esw->dev, mac); if (err) { esw_warn(esw->dev, - "Failed to add L2 table mac(%pM) for vport(%d), err(%d)\n", + "Failed to add L2 table mac(%pM) for vport(0x%x), err(%d)\n", mac, vport, err); return err; } @@ -409,13 +430,13 @@ fdb_add: static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) { u8 *mac = vaddr->node.addr; - u32 vport = vaddr->vport; + u16 vport = vaddr->vport; int err = 0; - /* Skip mlx5_mpfs_del_mac for PFs, - * it is already done by the PF netdev in mlx5e_execute_l2_action + /* Skip mlx5_mpfs_del_mac for eswitch managerss, + * it is already done by its netdev in mlx5e_execute_l2_action */ - if (!vport || !vaddr->mpfs) + if (!vaddr->mpfs || esw->manager_vport == vport) goto fdb_del; err = mlx5_mpfs_del_mac(esw->dev, mac); @@ -438,17 +459,18 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw, struct esw_mc_addr *esw_mc) { u8 *mac = vaddr->node.addr; - u32 vport_idx = 0; + struct mlx5_vport *vport; + u16 i, vport_num; - for (vport_idx = 0; vport_idx < esw->total_vports; vport_idx++) { - struct mlx5_vport *vport = &esw->vports[vport_idx]; + mlx5_esw_for_all_vports(esw, i, vport) { struct hlist_head *vport_hash = vport->mc_list; struct vport_addr *iter_vaddr = l2addr_hash_find(vport_hash, mac, struct vport_addr); + vport_num = vport->vport; if (IS_ERR_OR_NULL(vport->allmulti_rule) || - vaddr->vport == vport_idx) + vaddr->vport == vport_num) continue; switch (vaddr->action) { case MLX5_ACTION_ADD: @@ -460,14 +482,14 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw, if (!iter_vaddr) { esw_warn(esw->dev, "ALL-MULTI: Failed to add MAC(%pM) to vport[%d] DB\n", - mac, vport_idx); + mac, vport_num); continue; } - iter_vaddr->vport = vport_idx; + iter_vaddr->vport = vport_num; iter_vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, - vport_idx); + vport_num); iter_vaddr->mc_promisc = true; break; case MLX5_ACTION_DEL: @@ -485,7 +507,7 @@ static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) struct hlist_head *hash = esw->mc_table; struct esw_mc_addr *esw_mc; u8 *mac = vaddr->node.addr; - u32 vport = vaddr->vport; + u16 vport = vaddr->vport; if (!esw->fdb_table.legacy.fdb) return 0; @@ -499,7 +521,7 @@ static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) return -ENOMEM; esw_mc->uplink_rule = /* Forward MC MAC to Uplink */ - esw_fdb_set_vport_rule(esw, mac, UPLINK_VPORT); + esw_fdb_set_vport_rule(esw, mac, MLX5_VPORT_UPLINK); /* Add this multicast mac to all the mc promiscuous vports */ update_allmulti_vports(esw, vaddr, esw_mc); @@ -525,7 +547,7 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) struct hlist_head *hash = esw->mc_table; struct esw_mc_addr *esw_mc; u8 *mac = vaddr->node.addr; - u32 vport = vaddr->vport; + u16 vport = vaddr->vport; if (!esw->fdb_table.legacy.fdb) return 0; @@ -564,9 +586,9 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) /* Apply vport UC/MC list to HW l2 table and FDB table */ static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, - u32 vport_num, int list_type) + u16 vport_num, int list_type) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; vport_addr_action vport_addr_add; vport_addr_action vport_addr_del; @@ -599,9 +621,9 @@ static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, /* Sync vport UC/MC list from vport context */ static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, - u32 vport_num, int list_type) + u16 vport_num, int list_type) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; u8 (*mac_list)[ETH_ALEN]; struct l2addr_node *node; @@ -686,9 +708,9 @@ out: /* Sync vport UC/MC list from vport context * Must be called after esw_update_vport_addr_list */ -static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num) +static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u16 vport_num) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); struct l2addr_node *node; struct vport_addr *addr; struct hlist_head *hash; @@ -721,11 +743,11 @@ static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num) } /* Apply vport rx mode to HW FDB table */ -static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num, +static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num, bool promisc, bool mc_promisc) { + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); struct esw_mc_addr *allmulti_addr = &esw->mc_promisc; - struct mlx5_vport *vport = &esw->vports[vport_num]; if (IS_ERR_OR_NULL(vport->allmulti_rule) != mc_promisc) goto promisc; @@ -736,7 +758,7 @@ static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num, if (!allmulti_addr->uplink_rule) allmulti_addr->uplink_rule = esw_fdb_set_vport_allmulti_rule(esw, - UPLINK_VPORT); + MLX5_VPORT_UPLINK); allmulti_addr->refcnt++; } else if (vport->allmulti_rule) { mlx5_del_flow_rules(vport->allmulti_rule); @@ -764,9 +786,9 @@ promisc: } /* Sync vport rx mode from vport context */ -static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num) +static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); int promisc_all = 0; int promisc_uc = 0; int promisc_mc = 0; @@ -1343,8 +1365,8 @@ static void esw_destroy_tsar(struct mlx5_eswitch *esw) static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num, u32 initial_max_rate, u32 initial_bw_share) { + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; - struct mlx5_vport *vport = &esw->vports[vport_num]; struct mlx5_core_dev *dev = esw->dev; void *vport_elem; int err = 0; @@ -1383,7 +1405,7 @@ static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num, static void esw_vport_disable_qos(struct mlx5_eswitch *esw, int vport_num) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); int err = 0; if (!vport->qos.enabled) @@ -1402,8 +1424,8 @@ static void esw_vport_disable_qos(struct mlx5_eswitch *esw, int vport_num) static int esw_vport_qos_config(struct mlx5_eswitch *esw, int vport_num, u32 max_rate, u32 bw_share) { + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; - struct mlx5_vport *vport = &esw->vports[vport_num]; struct mlx5_core_dev *dev = esw->dev; void *vport_elem; u32 bitmask = 0; @@ -1459,15 +1481,22 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw, { int vport_num = vport->vport; - if (!vport_num) + if (esw->manager_vport == vport_num) return; mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - vport_num, + vport_num, 1, vport->info.link_state); - mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, vport->info.mac); - mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, vport->info.node_guid); + + /* Host PF has its own mac/guid. */ + if (vport_num) { + mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, + vport->info.mac); + mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, + vport->info.node_guid); + } + modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos, (vport->info.vlan || vport->info.qos)); @@ -1513,10 +1542,10 @@ static void esw_vport_destroy_drop_counters(struct mlx5_vport *vport) mlx5_fc_destroy(dev, vport->egress.drop_counter); } -static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, +static void esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int enable_events) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + u16 vport_num = vport->vport; mutex_lock(&esw->state_lock); WARN_ON(vport->enabled); @@ -1539,8 +1568,11 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, vport->enabled_events = enable_events; vport->enabled = true; - /* only PF is trusted by default */ - if (!vport_num) + /* Esw manager is trusted by default. Host PF (vport 0) is trusted as well + * in smartNIC as it's a vport group manager. + */ + if (esw->manager_vport == vport_num || + (!vport_num && mlx5_core_is_ecpf(esw->dev))) vport->info.trusted = true; esw_vport_change_handle_locked(vport); @@ -1550,9 +1582,10 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, mutex_unlock(&esw->state_lock); } -static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) +static void esw_disable_vport(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + u16 vport_num = vport->vport; if (!vport->enabled) return; @@ -1573,10 +1606,10 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) esw_vport_change_handle_locked(vport); vport->enabled_events = 0; esw_vport_disable_qos(esw, vport_num); - if (vport_num && esw->mode == SRIOV_LEGACY) { + if (esw->mode == SRIOV_LEGACY) { mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - vport_num, + vport_num, 1, MLX5_VPORT_ADMIN_STATE_DOWN); esw_vport_disable_egress_acl(esw, vport); esw_vport_disable_ingress_acl(esw, vport); @@ -1595,7 +1628,7 @@ static int eswitch_vport_event(struct notifier_block *nb, u16 vport_num; vport_num = be16_to_cpu(eqe->data.vport_change.vport_num); - vport = &esw->vports[vport_num]; + vport = mlx5_eswitch_get_vport(esw, vport_num); if (vport->enabled) queue_work(esw->work_queue, &vport->vport_change_handler); @@ -1607,6 +1640,8 @@ static int eswitch_vport_event(struct notifier_block *nb, int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { + int vf_nvports = 0, total_nvports = 0; + struct mlx5_vport *vport; int err; int i, enabled_events; @@ -1624,6 +1659,18 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode); + if (mode == SRIOV_OFFLOADS) { + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + err = mlx5_query_host_params_num_vfs(esw->dev, &vf_nvports); + if (err) + return err; + total_nvports = esw->total_vports; + } else { + vf_nvports = nvfs; + total_nvports = nvfs + MLX5_SPECIAL_VPORTS(esw->dev); + } + } + esw->mode = mode; mlx5_lag_update(esw->dev); @@ -1633,7 +1680,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) } else { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - err = esw_offloads_init(esw, nvfs + 1); + err = esw_offloads_init(esw, vf_nvports, total_nvports); } if (err) @@ -1648,8 +1695,20 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) * 2. FDB/Eswitch is programmed by user space tools */ enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : 0; - for (i = 0; i <= nvfs; i++) - esw_enable_vport(esw, i, enabled_events); + + /* Enable PF vport */ + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); + esw_enable_vport(esw, vport, enabled_events); + + /* Enable ECPF vports */ + if (mlx5_ecpf_vport_exists(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); + esw_enable_vport(esw, vport, enabled_events); + } + + /* Enable VF vports */ + mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) + esw_enable_vport(esw, vport, enabled_events); if (mode == SRIOV_LEGACY) { MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); @@ -1674,8 +1733,8 @@ abort: void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) { struct esw_mc_addr *mc_promisc; + struct mlx5_vport *vport; int old_mode; - int nvports; int i; if (!ESW_ALLOWED(esw) || esw->mode == SRIOV_NONE) @@ -1685,13 +1744,12 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw->enabled_vports, esw->mode); mc_promisc = &esw->mc_promisc; - nvports = esw->enabled_vports; if (esw->mode == SRIOV_LEGACY) mlx5_eq_notifier_unregister(esw->dev, &esw->nb); - for (i = 0; i < esw->total_vports; i++) - esw_disable_vport(esw, i); + mlx5_esw_for_all_vports(esw, i, vport) + esw_disable_vport(esw, vport); if (mc_promisc && mc_promisc->uplink_rule) mlx5_del_flow_rules(mc_promisc->uplink_rule); @@ -1701,7 +1759,7 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) if (esw->mode == SRIOV_LEGACY) esw_destroy_legacy_fdb_table(esw); else if (esw->mode == SRIOV_OFFLOADS) - esw_offloads_cleanup(esw, nvports); + esw_offloads_cleanup(esw); old_mode = esw->mode; esw->mode = SRIOV_NONE; @@ -1718,8 +1776,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) { int total_vports = MLX5_TOTAL_VPORTS(dev); struct mlx5_eswitch *esw; - int vport_num; - int err; + struct mlx5_vport *vport; + int err, i; if (!MLX5_VPORT_MANAGER(dev)) return 0; @@ -1735,6 +1793,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) return -ENOMEM; esw->dev = dev; + esw->manager_vport = mlx5_eswitch_manager_vport(dev); esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); if (!esw->work_queue) { @@ -1749,6 +1808,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) goto abort; } + esw->total_vports = total_vports; + err = esw_offloads_init_reps(esw); if (err) goto abort; @@ -1757,17 +1818,14 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) hash_init(esw->offloads.mod_hdr_tbl); mutex_init(&esw->state_lock); - for (vport_num = 0; vport_num < total_vports; vport_num++) { - struct mlx5_vport *vport = &esw->vports[vport_num]; - - vport->vport = vport_num; + mlx5_esw_for_all_vports(esw, i, vport) { + vport->vport = mlx5_eswitch_index_to_vport_num(esw, i); vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; vport->dev = dev; INIT_WORK(&vport->vport_change_handler, esw_vport_change_handler); } - esw->total_vports = total_vports; esw->enabled_vports = 0; esw->mode = SRIOV_NONE; esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; @@ -1866,7 +1924,7 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, err = mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - vport, link_state); + vport, 1, link_state); if (err) { mlx5_core_warn(esw->dev, "Failed to set vport %d link state, err = %d", @@ -2009,8 +2067,7 @@ static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw) u32 max_guarantee = 0; int i; - for (i = 0; i < esw->total_vports; i++) { - evport = &esw->vports[i]; + mlx5_esw_for_all_vports(esw, i, evport) { if (!evport->enabled || evport->info.min_rate < max_guarantee) continue; max_guarantee = evport->info.min_rate; @@ -2029,8 +2086,7 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) int err; int i; - for (i = 0; i < esw->total_vports; i++) { - evport = &esw->vports[i]; + mlx5_esw_for_all_vports(esw, i, evport) { if (!evport->enabled) continue; vport_min_rate = evport->info.min_rate; @@ -2045,7 +2101,7 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) if (bw_share == evport->qos.bw_share) continue; - err = esw_vport_qos_config(esw, i, vport_max_rate, + err = esw_vport_qos_config(esw, evport->vport, vport_max_rate, bw_share); if (!err) evport->qos.bw_share = bw_share; @@ -2128,7 +2184,7 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, !MLX5_CAP_GEN(dev, transmit_discard_vport_down)) return 0; - err = mlx5_query_vport_down_stats(dev, vport_idx, + err = mlx5_query_vport_down_stats(dev, vport_idx, 1, &rx_discard_vport_down, &tx_discard_vport_down); if (err) @@ -2165,8 +2221,7 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, MLX5_CMD_OP_QUERY_VPORT_COUNTER); MLX5_SET(query_vport_counter_in, in, op_mod, 0); MLX5_SET(query_vport_counter_in, in, vport_number, vport); - if (vport) - MLX5_SET(query_vport_counter_in, in, other_vport, 1); + MLX5_SET(query_vport_counter_in, in, other_vport, 1); memset(out, 0, outlen); err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 9c89eea9b2c3..af5581a57e56 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -38,6 +38,7 @@ #include <net/devlink.h> #include <linux/mlx5/device.h> #include <linux/mlx5/eswitch.h> +#include <linux/mlx5/vport.h> #include <linux/mlx5/fs.h> #include "lib/mpfs.h" @@ -49,8 +50,6 @@ #define MLX5_MAX_MC_PER_VPORT(dev) \ (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list)) -#define FDB_UPLINK_VPORT 0xffff - #define MLX5_MIN_BW_SHARE 1 #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ @@ -183,6 +182,16 @@ struct esw_mc_addr { /* SRIOV only */ u32 refcnt; }; +struct mlx5_host_work { + struct work_struct work; + struct mlx5_eswitch *esw; +}; + +struct mlx5_host_info { + struct mlx5_nb nb; + u16 num_vfs; +}; + struct mlx5_eswitch { struct mlx5_core_dev *dev; struct mlx5_nb nb; @@ -206,10 +215,13 @@ struct mlx5_eswitch { struct mlx5_esw_offload offloads; int mode; int nvports; + u16 manager_vport; + struct mlx5_host_info host_info; }; -void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); -int esw_offloads_init(struct mlx5_eswitch *esw, int nvports); +void esw_offloads_cleanup(struct mlx5_eswitch *esw); +int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, + int total_nvports); void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw); int esw_offloads_init_reps(struct mlx5_eswitch *esw); @@ -312,6 +324,7 @@ struct mlx5_esw_flow_attr { } dests[MLX5_MAX_FLOW_FWD_VPORTS]; u32 mod_hdr_id; u8 match_level; + u8 tunnel_match_level; struct mlx5_fc *counter; u32 chain; u16 prio; @@ -364,6 +377,53 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, #define esw_debug(dev, format, ...) \ mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) + +/* The returned number is valid only when the dev is eswitch manager. */ +static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) +{ + return mlx5_core_is_ecpf_esw_manager(dev) ? + MLX5_VPORT_ECPF : MLX5_VPORT_PF; +} + +static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw) +{ + /* Uplink always locate at the last element of the array.*/ + return esw->total_vports - 1; +} + +static inline int mlx5_eswitch_ecpf_idx(struct mlx5_eswitch *esw) +{ + return esw->total_vports - 2; +} + +static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw, + u16 vport_num) +{ + if (vport_num == MLX5_VPORT_ECPF) { + if (!mlx5_ecpf_vport_exists(esw->dev)) + esw_warn(esw->dev, "ECPF vport doesn't exist!\n"); + return mlx5_eswitch_ecpf_idx(esw); + } + + if (vport_num == MLX5_VPORT_UPLINK) + return mlx5_eswitch_uplink_idx(esw); + + return vport_num; +} + +static inline int mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw, + int index) +{ + if (index == mlx5_eswitch_ecpf_idx(esw) && + mlx5_ecpf_vport_exists(esw->dev)) + return MLX5_VPORT_ECPF; + + if (index == mlx5_eswitch_uplink_idx(esw)) + return MLX5_VPORT_UPLINK; + + return index; +} + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 53065b6ae593..f2260391be5b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -40,15 +40,59 @@ #include "en.h" #include "fs_core.h" #include "lib/devcom.h" +#include "ecpf.h" +#include "lib/eq.h" enum { FDB_FAST_PATH = 0, FDB_SLOW_PATH }; +/* There are two match-all miss flows, one for unicast dst mac and + * one for multicast. + */ +#define MLX5_ESW_MISS_FLOWS (2) + #define fdb_prio_table(esw, chain, prio, level) \ (esw)->fdb_table.offloads.fdb_prio[(chain)][(prio)][(level)] +#define UPLINK_REP_INDEX 0 + +/* The rep getter/iterator are only valid after esw->total_vports + * and vport->vport are initialized in mlx5_eswitch_init. + */ +#define mlx5_esw_for_all_reps(esw, i, rep) \ + for ((i) = MLX5_VPORT_PF; \ + (rep) = &(esw)->offloads.vport_reps[i], \ + (i) < (esw)->total_vports; (i)++) + +#define mlx5_esw_for_each_vf_rep(esw, i, rep, nvfs) \ + for ((i) = MLX5_VPORT_FIRST_VF; \ + (rep) = &(esw)->offloads.vport_reps[i], \ + (i) <= (nvfs); (i)++) + +#define mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvfs) \ + for ((i) = (nvfs); \ + (rep) = &(esw)->offloads.vport_reps[i], \ + (i) >= MLX5_VPORT_FIRST_VF; (i)--) + +#define mlx5_esw_for_each_vf_vport(esw, vport, nvfs) \ + for ((vport) = MLX5_VPORT_FIRST_VF; \ + (vport) <= (nvfs); (vport)++) + +#define mlx5_esw_for_each_vf_vport_reverse(esw, vport, nvfs) \ + for ((vport) = (nvfs); \ + (vport) >= MLX5_VPORT_FIRST_VF; (vport)--) + +static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw, + u16 vport_num) +{ + u16 idx = mlx5_eswitch_vport_num_to_index(esw, vport_num); + + WARN_ON(idx > esw->total_vports - 1); + return &esw->offloads.vport_reps[idx]; +} + static struct mlx5_flow_table * esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level); static void @@ -160,14 +204,15 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_eswitch_owner_vhca_id); - if (attr->match_level == MLX5_MATCH_NONE) - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; - else - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | - MLX5_MATCH_MISC_PARAMETERS; - - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) - spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { + if (attr->tunnel_match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + if (attr->match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; + } else if (attr->match_level != MLX5_MATCH_NONE) { + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + } if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) flow_act.modify_id = attr->mod_hdr_id; @@ -318,7 +363,7 @@ static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none"); for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) { rep = &esw->offloads.vport_reps[vf_vport]; - if (!rep->rep_if[REP_ETH].valid) + if (rep->rep_if[REP_ETH].state != REP_LOADED) continue; err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val); @@ -359,15 +404,15 @@ static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr, in_rep = attr->in_rep; out_rep = attr->dests[0].rep; - if (push && in_rep->vport == FDB_UPLINK_VPORT) + if (push && in_rep->vport == MLX5_VPORT_UPLINK) goto out_notsupp; - if (pop && out_rep->vport == FDB_UPLINK_VPORT) + if (pop && out_rep->vport == MLX5_VPORT_UPLINK) goto out_notsupp; /* vport has vlan push configured, can't offload VF --> wire rules w.o it */ if (!push && !pop && fwd) - if (in_rep->vlan && out_rep->vport == FDB_UPLINK_VPORT) + if (in_rep->vlan && out_rep->vport == MLX5_VPORT_UPLINK) goto out_notsupp; /* protects against (1) setting rules with different vlans to push and @@ -409,7 +454,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, if (!push && !pop && fwd) { /* tracks VF --> wire rules without vlan push action */ - if (attr->dests[0].rep->vport == FDB_UPLINK_VPORT) { + if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) { vport->vlan_refcount++; attr->vlan_handled = true; } @@ -469,7 +514,7 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, if (!push && !pop && fwd) { /* tracks VF --> wire rules without vlan push action */ - if (attr->dests[0].rep->vport == FDB_UPLINK_VPORT) + if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) vport->vlan_refcount--; return 0; @@ -516,7 +561,8 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn); - MLX5_SET(fte_match_set_misc, misc, source_port, 0x0); /* source vport is 0 */ + /* source vport is the esw manager */ + MLX5_SET(fte_match_set_misc, misc, source_port, esw->manager_vport); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn); @@ -561,7 +607,7 @@ static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev, source_eswitch_owner_vhca_id); dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest->vport.num = 0; + dest->vport.num = peer_dev->priv.eswitch->manager_vport; dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id); dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; } @@ -595,14 +641,35 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - for (i = 1; i < nvports; i++) { + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_PF); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_pf_flow_err; + } + flows[MLX5_VPORT_PF] = flow; + } + + if (mlx5_ecpf_vport_exists(esw->dev)) { + MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_ecpf_flow_err; + } + flows[mlx5_eswitch_ecpf_idx(esw)] = flow; + } + + mlx5_esw_for_each_vf_vport(esw, i, mlx5_core_max_vfs(esw->dev)) { MLX5_SET(fte_match_set_misc, misc, source_port, i); flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, &flow_act, &dest, 1); if (IS_ERR(flow)) { err = PTR_ERR(flow); - esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err); - goto add_flow_err; + goto add_vf_flow_err; } flows[i] = flow; } @@ -612,9 +679,18 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, kvfree(spec); return 0; -add_flow_err: - for (i--; i > 0; i--) +add_vf_flow_err: + nvports = --i; + mlx5_esw_for_each_vf_vport_reverse(esw, i, nvports) mlx5_del_flow_rules(flows[i]); + + if (mlx5_ecpf_vport_exists(esw->dev)) + mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]); +add_ecpf_flow_err: + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) + mlx5_del_flow_rules(flows[MLX5_VPORT_PF]); +add_pf_flow_err: + esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err); kvfree(flows); alloc_flows_err: kvfree(spec); @@ -628,9 +704,15 @@ static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw) flows = esw->fdb_table.offloads.peer_miss_rules; - for (i = 1; i < esw->total_vports; i++) + mlx5_esw_for_each_vf_vport_reverse(esw, i, mlx5_core_max_vfs(esw->dev)) mlx5_del_flow_rules(flows[i]); + if (mlx5_ecpf_vport_exists(esw->dev)) + mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]); + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) + mlx5_del_flow_rules(flows[MLX5_VPORT_PF]); + kvfree(flows); } @@ -660,7 +742,7 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) dmac_c[0] = 0x01; dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest.vport.num = 0; + dest.vport.num = esw->manager_vport; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, @@ -904,8 +986,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) esw->fdb_table.offloads.fdb_left[i] = ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0; - table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2 + - esw->total_vports; + table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + + MLX5_ESW_MISS_FLOWS + esw->total_vports; /* create the slow path fdb with encap set, so further table instances * can be created at run time while VFs are probed if the FW allows that. @@ -999,7 +1081,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) dmac[0] = 0x01; MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 2); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ix + MLX5_ESW_MISS_FLOWS); g = mlx5_create_flow_group(fdb, flow_group_in); if (IS_ERR(g)) { @@ -1048,7 +1131,7 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) esw_destroy_offloads_fast_fdb_tables(esw); } -static int esw_create_offloads_table(struct mlx5_eswitch *esw) +static int esw_create_offloads_table(struct mlx5_eswitch *esw, int nvports) { struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_core_dev *dev = esw->dev; @@ -1062,7 +1145,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw) return -EOPNOTSUPP; } - ft_attr.max_fte = dev->priv.sriov.num_vfs + 2; + ft_attr.max_fte = nvports + MLX5_ESW_MISS_FLOWS; ft_offloads = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft_offloads)) { @@ -1082,16 +1165,15 @@ static void esw_destroy_offloads_table(struct mlx5_eswitch *esw) mlx5_destroy_flow_table(offloads->ft_offloads); } -static int esw_create_vport_rx_group(struct mlx5_eswitch *esw) +static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_group *g; - struct mlx5_priv *priv = &esw->dev->priv; u32 *flow_group_in; void *match_criteria, *misc; int err = 0; - int nvports = priv->sriov.num_vfs + 2; + nvports = nvports + MLX5_ESW_MISS_FLOWS; flow_group_in = kvzalloc(inlen, GFP_KERNEL); if (!flow_group_in) return -ENOMEM; @@ -1168,7 +1250,8 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, { int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; - if (esw->mode != SRIOV_LEGACY) { + if (esw->mode != SRIOV_LEGACY && + !mlx5_core_is_ecpf_esw_manager(esw->dev)) { NL_SET_ERR_MSG_MOD(extack, "Can't set offloads mode, SRIOV legacy not enabled"); return -EINVAL; @@ -1206,9 +1289,8 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) { int total_vfs = MLX5_TOTAL_VPORTS(esw->dev); struct mlx5_core_dev *dev = esw->dev; - struct mlx5_esw_offload *offloads; struct mlx5_eswitch_rep *rep; - u8 hw_id[ETH_ALEN]; + u8 hw_id[ETH_ALEN], rep_type; int vport; esw->offloads.vport_reps = kcalloc(total_vfs, @@ -1217,75 +1299,203 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) if (!esw->offloads.vport_reps) return -ENOMEM; - offloads = &esw->offloads; mlx5_query_nic_vport_mac_address(dev, 0, hw_id); - for (vport = 0; vport < total_vfs; vport++) { - rep = &offloads->vport_reps[vport]; - - rep->vport = vport; + mlx5_esw_for_all_reps(esw, vport, rep) { + rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport); ether_addr_copy(rep->hw_id, hw_id); - } - offloads->vport_reps[0].vport = FDB_UPLINK_VPORT; + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) + rep->rep_if[rep_type].state = REP_UNREGISTERED; + } return 0; } -static void esw_offloads_unload_reps_type(struct mlx5_eswitch *esw, int nvports, - u8 rep_type) +static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, u8 rep_type) +{ + if (rep->rep_if[rep_type].state != REP_LOADED) + return; + + rep->rep_if[rep_type].unload(rep); + rep->rep_if[rep_type].state = REP_REGISTERED; +} + +static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) { struct mlx5_eswitch_rep *rep; - int vport; - for (vport = nvports - 1; vport >= 0; vport--) { - rep = &esw->offloads.vport_reps[vport]; - if (!rep->rep_if[rep_type].valid) - continue; + if (mlx5_ecpf_vport_exists(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF); + __esw_offloads_unload_rep(esw, rep, rep_type); + } - rep->rep_if[rep_type].unload(rep); + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); + __esw_offloads_unload_rep(esw, rep, rep_type); } + + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); + __esw_offloads_unload_rep(esw, rep, rep_type); } -static void esw_offloads_unload_reps(struct mlx5_eswitch *esw, int nvports) +static void __unload_reps_vf_vport(struct mlx5_eswitch *esw, int nvports, + u8 rep_type) +{ + struct mlx5_eswitch_rep *rep; + int i; + + mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvports) + __esw_offloads_unload_rep(esw, rep, rep_type); +} + +static void esw_offloads_unload_vf_reps(struct mlx5_eswitch *esw, int nvports) +{ + u8 rep_type = NUM_REP_TYPES; + + while (rep_type-- > 0) + __unload_reps_vf_vport(esw, nvports, rep_type); +} + +static void __unload_reps_all_vport(struct mlx5_eswitch *esw, int nvports, + u8 rep_type) +{ + __unload_reps_vf_vport(esw, nvports, rep_type); + + /* Special vports must be the last to unload. */ + __unload_reps_special_vport(esw, rep_type); +} + +static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw, int nvports) { u8 rep_type = NUM_REP_TYPES; while (rep_type-- > 0) - esw_offloads_unload_reps_type(esw, nvports, rep_type); + __unload_reps_all_vport(esw, nvports, rep_type); +} + +static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, u8 rep_type) +{ + int err = 0; + + if (rep->rep_if[rep_type].state != REP_REGISTERED) + return 0; + + err = rep->rep_if[rep_type].load(esw->dev, rep); + if (err) + return err; + + rep->rep_if[rep_type].state = REP_LOADED; + + return 0; } -static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports, - u8 rep_type) +static int __load_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) { struct mlx5_eswitch_rep *rep; - int vport; int err; - for (vport = 0; vport < nvports; vport++) { - rep = &esw->offloads.vport_reps[vport]; - if (!rep->rep_if[rep_type].valid) - continue; + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); + err = __esw_offloads_load_rep(esw, rep, rep_type); + if (err) + return err; - err = rep->rep_if[rep_type].load(esw->dev, rep); + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); + err = __esw_offloads_load_rep(esw, rep, rep_type); if (err) - goto err_reps; + goto err_pf; + } + + if (mlx5_ecpf_vport_exists(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF); + err = __esw_offloads_load_rep(esw, rep, rep_type); + if (err) + goto err_ecpf; + } + + return 0; + +err_ecpf: + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); + __esw_offloads_unload_rep(esw, rep, rep_type); + } + +err_pf: + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); + __esw_offloads_unload_rep(esw, rep, rep_type); + return err; +} + +static int __load_reps_vf_vport(struct mlx5_eswitch *esw, int nvports, + u8 rep_type) +{ + struct mlx5_eswitch_rep *rep; + int err, i; + + mlx5_esw_for_each_vf_rep(esw, i, rep, nvports) { + err = __esw_offloads_load_rep(esw, rep, rep_type); + if (err) + goto err_vf; } return 0; +err_vf: + __unload_reps_vf_vport(esw, --i, rep_type); + return err; +} + +static int esw_offloads_load_vf_reps(struct mlx5_eswitch *esw, int nvports) +{ + u8 rep_type = 0; + int err; + + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { + err = __load_reps_vf_vport(esw, nvports, rep_type); + if (err) + goto err_reps; + } + + return err; + err_reps: - esw_offloads_unload_reps_type(esw, vport, rep_type); + while (rep_type-- > 0) + __unload_reps_vf_vport(esw, nvports, rep_type); + return err; +} + +static int __load_reps_all_vport(struct mlx5_eswitch *esw, int nvports, + u8 rep_type) +{ + int err; + + /* Special vports must be loaded first. */ + err = __load_reps_special_vport(esw, rep_type); + if (err) + return err; + + err = __load_reps_vf_vport(esw, nvports, rep_type); + if (err) + goto err_vfs; + + return 0; + +err_vfs: + __unload_reps_special_vport(esw, rep_type); return err; } -static int esw_offloads_load_reps(struct mlx5_eswitch *esw, int nvports) +static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports) { u8 rep_type = 0; int err; for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { - err = esw_offloads_load_reps_type(esw, nvports, rep_type); + err = __load_reps_all_vport(esw, nvports, rep_type); if (err) goto err_reps; } @@ -1294,7 +1504,7 @@ static int esw_offloads_load_reps(struct mlx5_eswitch *esw, int nvports) err_reps: while (rep_type-- > 0) - esw_offloads_unload_reps_type(esw, nvports, rep_type); + __unload_reps_all_vport(esw, nvports, rep_type); return err; } @@ -1397,7 +1607,7 @@ static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS); } -int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) +static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int nvports) { int err; @@ -1407,24 +1617,16 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) if (err) return err; - err = esw_create_offloads_table(esw); + err = esw_create_offloads_table(esw, nvports); if (err) goto create_ft_err; - err = esw_create_vport_rx_group(esw); + err = esw_create_vport_rx_group(esw, nvports); if (err) goto create_fg_err; - err = esw_offloads_load_reps(esw, nvports); - if (err) - goto err_reps; - - esw_offloads_devcom_init(esw); return 0; -err_reps: - esw_destroy_vport_rx_group(esw); - create_fg_err: esw_destroy_offloads_table(esw); @@ -1434,6 +1636,95 @@ create_ft_err: return err; } +static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) +{ + esw_destroy_vport_rx_group(esw); + esw_destroy_offloads_table(esw); + esw_destroy_offloads_fdb_tables(esw); +} + +static void esw_host_params_event_handler(struct work_struct *work) +{ + struct mlx5_host_work *host_work; + struct mlx5_eswitch *esw; + int err, num_vf = 0; + + host_work = container_of(work, struct mlx5_host_work, work); + esw = host_work->esw; + + err = mlx5_query_host_params_num_vfs(esw->dev, &num_vf); + if (err || num_vf == esw->host_info.num_vfs) + goto out; + + /* Number of VFs can only change from "0 to x" or "x to 0". */ + if (esw->host_info.num_vfs > 0) { + esw_offloads_unload_vf_reps(esw, esw->host_info.num_vfs); + } else { + err = esw_offloads_load_vf_reps(esw, num_vf); + + if (err) + goto out; + } + + esw->host_info.num_vfs = num_vf; + +out: + kfree(host_work); +} + +static int esw_host_params_event(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_host_work *host_work; + struct mlx5_host_info *host_info; + struct mlx5_eswitch *esw; + + host_work = kzalloc(sizeof(*host_work), GFP_ATOMIC); + if (!host_work) + return NOTIFY_DONE; + + host_info = mlx5_nb_cof(nb, struct mlx5_host_info, nb); + esw = container_of(host_info, struct mlx5_eswitch, host_info); + + host_work->esw = esw; + + INIT_WORK(&host_work->work, esw_host_params_event_handler); + queue_work(esw->work_queue, &host_work->work); + + return NOTIFY_OK; +} + +int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, + int total_nvports) +{ + int err; + + mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); + + err = esw_offloads_steering_init(esw, total_nvports); + if (err) + return err; + + err = esw_offloads_load_all_reps(esw, vf_nvports); + if (err) + goto err_reps; + + esw_offloads_devcom_init(esw); + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + MLX5_NB_INIT(&esw->host_info.nb, esw_host_params_event, + HOST_PARAMS_CHANGE); + mlx5_eq_notifier_register(esw->dev, &esw->host_info.nb); + esw->host_info.num_vfs = vf_nvports; + } + + return 0; + +err_reps: + esw_offloads_steering_cleanup(esw); + return err; +} + static int esw_offloads_stop(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { @@ -1453,13 +1744,21 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, return err; } -void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports) +void esw_offloads_cleanup(struct mlx5_eswitch *esw) { + u16 num_vfs; + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + mlx5_eq_notifier_unregister(esw->dev, &esw->host_info.nb); + flush_workqueue(esw->work_queue); + num_vfs = esw->host_info.num_vfs; + } else { + num_vfs = esw->dev->priv.sriov.num_vfs; + } + esw_offloads_devcom_cleanup(esw); - esw_offloads_unload_reps(esw, nvports); - esw_destroy_vport_rx_group(esw); - esw_destroy_offloads_table(esw); - esw_destroy_offloads_fdb_tables(esw); + esw_offloads_unload_all_reps(esw, num_vfs); + esw_offloads_steering_cleanup(esw); } static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) @@ -1548,7 +1847,8 @@ static int mlx5_devlink_eswitch_check(struct devlink *devlink) if(!MLX5_ESWITCH_MANAGER(dev)) return -EPERM; - if (dev->priv.eswitch->mode == SRIOV_NONE) + if (dev->priv.eswitch->mode == SRIOV_NONE && + !mlx5_core_is_ecpf_esw_manager(dev)) return -EOPNOTSUPP; return 0; @@ -1760,47 +2060,45 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap) return 0; } -void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, - int vport_index, - struct mlx5_eswitch_rep_if *__rep_if, - u8 rep_type) +void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep_if *__rep_if, + u8 rep_type) { - struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep_if *rep_if; + struct mlx5_eswitch_rep *rep; + int i; - rep_if = &offloads->vport_reps[vport_index].rep_if[rep_type]; - - rep_if->load = __rep_if->load; - rep_if->unload = __rep_if->unload; - rep_if->get_proto_dev = __rep_if->get_proto_dev; - rep_if->priv = __rep_if->priv; + mlx5_esw_for_all_reps(esw, i, rep) { + rep_if = &rep->rep_if[rep_type]; + rep_if->load = __rep_if->load; + rep_if->unload = __rep_if->unload; + rep_if->get_proto_dev = __rep_if->get_proto_dev; + rep_if->priv = __rep_if->priv; - rep_if->valid = true; + rep_if->state = REP_REGISTERED; + } } -EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep); +EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps); -void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, - int vport_index, u8 rep_type) +void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type) { - struct mlx5_esw_offload *offloads = &esw->offloads; + u16 max_vf = mlx5_core_max_vfs(esw->dev); struct mlx5_eswitch_rep *rep; + int i; - rep = &offloads->vport_reps[vport_index]; - - if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport_index].enabled) - rep->rep_if[rep_type].unload(rep); + if (esw->mode == SRIOV_OFFLOADS) + __unload_reps_all_vport(esw, max_vf, rep_type); - rep->rep_if[rep_type].valid = false; + mlx5_esw_for_all_reps(esw, i, rep) + rep->rep_if[rep_type].state = REP_UNREGISTERED; } -EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep); +EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps); void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) { -#define UPLINK_REP_INDEX 0 - struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep *rep; - rep = &offloads->vport_reps[UPLINK_REP_INDEX]; + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); return rep->rep_if[rep_type].priv; } @@ -1808,15 +2106,11 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, int vport, u8 rep_type) { - struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep *rep; - if (vport == FDB_UPLINK_VPORT) - vport = UPLINK_REP_INDEX; - - rep = &offloads->vport_reps[vport]; + rep = mlx5_eswitch_get_rep(esw, vport); - if (rep->rep_if[rep_type].valid && + if (rep->rep_if[rep_type].state == REP_LOADED && rep->rep_if[rep_type].get_proto_dev) return rep->rep_if[rep_type].get_proto_dev(rep); return NULL; @@ -1825,13 +2119,13 @@ EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev); void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type) { - return mlx5_eswitch_get_proto_dev(esw, UPLINK_REP_INDEX, rep_type); + return mlx5_eswitch_get_proto_dev(esw, MLX5_VPORT_UPLINK, rep_type); } EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev); struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw, int vport) { - return &esw->offloads.vport_reps[vport]; + return mlx5_eswitch_get_rep(esw, vport); } EXPORT_SYMBOL(mlx5_eswitch_vport_rep); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index fbc42b7252a9..5d5864e8df3c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -103,6 +103,8 @@ static const char *eqe_type_str(u8 type) return "MLX5_EVENT_TYPE_STALL_EVENT"; case MLX5_EVENT_TYPE_CMD: return "MLX5_EVENT_TYPE_CMD"; + case MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE: + return "MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE"; case MLX5_EVENT_TYPE_PAGE_REQUEST: return "MLX5_EVENT_TYPE_PAGE_REQUEST"; case MLX5_EVENT_TYPE_PAGE_FAULT: @@ -211,11 +213,10 @@ static int port_module(struct notifier_block *nb, unsigned long type, void *data enum port_module_event_status_type module_status; enum port_module_event_error_type error_type; struct mlx5_eqe_port_module *module_event_eqe; - const char *status_str, *error_str; + const char *status_str; u8 module_num; module_event_eqe = &eqe->data.port_module; - module_num = module_event_eqe->module; module_status = module_event_eqe->module_status & PORT_MODULE_EVENT_MODULE_STATUS_MASK; error_type = module_event_eqe->error_type & @@ -223,25 +224,27 @@ static int port_module(struct notifier_block *nb, unsigned long type, void *data if (module_status < MLX5_MODULE_STATUS_NUM) events->pme_stats.status_counters[module_status]++; - status_str = mlx5_pme_status_to_string(module_status); - if (module_status == MLX5_MODULE_STATUS_ERROR) { + if (module_status == MLX5_MODULE_STATUS_ERROR) if (error_type < MLX5_MODULE_EVENT_ERROR_NUM) events->pme_stats.error_counters[error_type]++; - error_str = mlx5_pme_error_to_string(error_type); - } if (!printk_ratelimit()) return NOTIFY_OK; - if (module_status == MLX5_MODULE_STATUS_ERROR) + module_num = module_event_eqe->module; + status_str = mlx5_pme_status_to_string(module_status); + if (module_status == MLX5_MODULE_STATUS_ERROR) { + const char *error_str = mlx5_pme_error_to_string(error_type); + mlx5_core_err(events->dev, "Port module event[error]: module %u, %s, %s\n", module_num, status_str, error_str); - else + } else { mlx5_core_info(events->dev, "Port module event: module %u, %s\n", module_num, status_str); + } return NOTIFY_OK; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index b354f3ee94c5..f2cfa012315e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -32,6 +32,7 @@ #include <linux/mutex.h> #include <linux/mlx5/driver.h> +#include <linux/mlx5/vport.h> #include <linux/mlx5/eswitch.h> #include "mlx5_core.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 196c07383082..cb9fa3430c53 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -103,7 +103,7 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) mlx5_core_err(dev, "start\n"); if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) { dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; - mlx5_cmd_trigger_completions(dev); + mlx5_cmd_flush(dev); } mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 2d223385dc81..04c5aca7f8c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -343,6 +343,11 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) roce_lag = !mlx5_sriov_is_enabled(dev0) && !mlx5_sriov_is_enabled(dev1); +#ifdef CONFIG_MLX5_ESWITCH + roce_lag &= dev0->priv.eswitch->mode == SRIOV_NONE && + dev1->priv.eswitch->mode == SRIOV_NONE; +#endif + if (roce_lag) mlx5_lag_remove_ib_devices(ldev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c deleted file mode 100644 index 3a3b0005fd2b..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/mad.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/mlx5/driver.h> -#include <linux/mlx5/cmd.h> -#include "mlx5_core.h" - -int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, - u16 opmod, u8 port) -{ - int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out); - int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in); - int err = -ENOMEM; - void *data; - void *resp; - u32 *out; - u32 *in; - - in = kzalloc(inlen, GFP_KERNEL); - out = kzalloc(outlen, GFP_KERNEL); - if (!in || !out) - goto out; - - MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC); - MLX5_SET(mad_ifc_in, in, op_mod, opmod); - MLX5_SET(mad_ifc_in, in, port, port); - - data = MLX5_ADDR_OF(mad_ifc_in, in, mad); - memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad)); - - err = mlx5_cmd_exec(dev, in, inlen, out, outlen); - if (err) - goto out; - - resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet); - memcpy(outb, resp, - MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet)); - -out: - kfree(out); - kfree(in); - return err; -} -EXPORT_SYMBOL_GPL(mlx5_core_mad_ifc); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index be81b319b0dc..40d591c8e76c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -65,6 +65,7 @@ #include "lib/vxlan.h" #include "lib/devcom.h" #include "diag/fw_tracer.h" +#include "ecpf.h" MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver"); @@ -459,6 +460,50 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev) return err; } +static int handle_hca_cap_odp(struct mlx5_core_dev *dev) +{ + void *set_hca_cap; + void *set_ctx; + int set_sz; + int err; + + if (!MLX5_CAP_GEN(dev, pg)) + return 0; + + err = mlx5_core_get_caps(dev, MLX5_CAP_ODP); + if (err) + return err; + + if (!(MLX5_CAP_ODP_MAX(dev, ud_odp_caps.srq_receive) || + MLX5_CAP_ODP_MAX(dev, rc_odp_caps.srq_receive) || + MLX5_CAP_ODP_MAX(dev, xrc_odp_caps.srq_receive))) + return 0; + + set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + set_ctx = kzalloc(set_sz, GFP_KERNEL); + if (!set_ctx) + return -ENOMEM; + + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); + memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ODP], + MLX5_ST_SZ_BYTES(odp_cap)); + + /* set ODP SRQ support for RC/UD and XRC transports */ + MLX5_SET(odp_cap, set_hca_cap, ud_odp_caps.srq_receive, + MLX5_CAP_ODP_MAX(dev, ud_odp_caps.srq_receive)); + + MLX5_SET(odp_cap, set_hca_cap, rc_odp_caps.srq_receive, + MLX5_CAP_ODP_MAX(dev, rc_odp_caps.srq_receive)); + + MLX5_SET(odp_cap, set_hca_cap, xrc_odp_caps.srq_receive, + MLX5_CAP_ODP_MAX(dev, xrc_odp_caps.srq_receive)); + + err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ODP); + + kfree(set_ctx); + return err; +} + static int handle_hca_cap(struct mlx5_core_dev *dev) { void *set_ctx = NULL; @@ -567,6 +612,8 @@ int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id) MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); MLX5_SET(enable_hca_in, in, function_id, func_id); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, + dev->caps.embedded_cpu); return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); } @@ -577,6 +624,8 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id) MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); MLX5_SET(disable_hca_in, in, function_id, func_id); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, + dev->caps.embedded_cpu); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } @@ -693,6 +742,11 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto err_clr_master; } + if (pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP32) && + pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP64) && + pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP128)) + mlx5_core_dbg(dev, "Enabling pci atomics failed\n"); + dev->iseg_base = pci_resource_start(dev->pdev, 0); dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg)); if (!dev->iseg) { @@ -849,6 +903,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, struct pci_dev *pdev = dev->pdev; int err; + dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev); mutex_lock(&dev->intf_state_mutex); if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n", @@ -926,6 +981,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto reclaim_boot_pages; } + err = handle_hca_cap_odp(dev); + if (err) { + dev_err(&pdev->dev, "handle_hca_cap_odp failed\n"); + goto reclaim_boot_pages; + } + err = mlx5_satisfy_startup_pages(dev, 0); if (err) { dev_err(&pdev->dev, "failed to allocate init pages\n"); @@ -1014,6 +1075,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_sriov; } + err = mlx5_ec_init(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init embedded CPU\n"); + goto err_ec; + } + if (mlx5_device_registered(dev)) { mlx5_attach_device(dev); } else { @@ -1031,6 +1098,9 @@ out: return 0; err_reg_dev: + mlx5_ec_cleanup(dev); + +err_ec: mlx5_sriov_detach(dev); err_sriov: @@ -1105,6 +1175,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, if (mlx5_device_registered(dev)) mlx5_detach_device(dev); + mlx5_ec_cleanup(dev); mlx5_sriov_detach(dev); mlx5_cleanup_fs(dev); mlx5_accel_ipsec_cleanup(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 5300b0b6d836..9529cf9623e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -121,11 +121,12 @@ int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, u32 modify_bitmask); int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, u32 element_id); -int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); +int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages); u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev, struct ptp_system_timestamp *sts); void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev); +void mlx5_cmd_flush(struct mlx5_core_dev *dev); int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 0670165afd5f..ea744d8466ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -51,9 +51,10 @@ void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev) int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, - u32 *in, int inlen, - u32 *out, int outlen, - mlx5_cmd_cbk_t callback, void *context) + struct mlx5_async_ctx *async_ctx, u32 *in, + int inlen, u32 *out, int outlen, + mlx5_async_cbk_t callback, + struct mlx5_async_work *context) { struct mlx5_mkey_table *table = &dev->priv.mkey_table; u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0}; @@ -71,7 +72,7 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, MLX5_SET(mkc, mkc, mkey_7_0, key); if (callback) - return mlx5_cmd_exec_cb(dev, in, inlen, out, outlen, + return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, callback, context); err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout)); @@ -105,7 +106,7 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *in, int inlen) { - return mlx5_core_create_mkey_cb(dev, mkey, in, inlen, + return mlx5_core_create_mkey_cb(dev, mkey, NULL, in, inlen, NULL, 0, NULL, NULL); } EXPORT_SYMBOL(mlx5_core_create_mkey); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index a83b517b0714..41025387ff2c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -48,6 +48,7 @@ enum { struct mlx5_pages_req { struct mlx5_core_dev *dev; u16 func_id; + u8 ec_function; s32 npages; struct work_struct work; }; @@ -143,6 +144,7 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, MLX5_SET(query_pages_in, in, op_mod, boot ? MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES : MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES); + MLX5_SET(query_pages_in, in, embedded_cpu_function, mlx5_core_is_ecpf(dev)); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) @@ -253,7 +255,8 @@ err_mapping: return err; } -static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id) +static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id, + bool ec_function) { u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0}; @@ -262,6 +265,7 @@ static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id) MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_CANT_GIVE); MLX5_SET(manage_pages_in, in, function_id, func_id); + MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) @@ -270,7 +274,7 @@ static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id) } static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, - int notify_fail) + int notify_fail, bool ec_function) { u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; int inlen = MLX5_ST_SZ_BYTES(manage_pages_in); @@ -305,6 +309,7 @@ retry: MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE); MLX5_SET(manage_pages_in, in, function_id, func_id); MLX5_SET(manage_pages_in, in, input_num_entries, npages); + MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function); err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (err) { @@ -316,8 +321,11 @@ retry: dev->priv.fw_pages += npages; if (func_id) dev->priv.vfs_pages += npages; + else if (mlx5_core_is_ecpf(dev) && !ec_function) + dev->priv.peer_pf_pages += npages; - mlx5_core_dbg(dev, "err %d\n", err); + mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x, err %d\n", + npages, ec_function, func_id, err); kvfree(in); return 0; @@ -328,7 +336,7 @@ out_4k: out_free: kvfree(in); if (notify_fail) - page_notify_fail(dev, func_id); + page_notify_fail(dev, func_id, ec_function); return err; } @@ -364,7 +372,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, } static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, - int *nclaimed) + int *nclaimed, bool ec_function) { int outlen = MLX5_ST_SZ_BYTES(manage_pages_out); u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0}; @@ -385,6 +393,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE); MLX5_SET(manage_pages_in, in, function_id, func_id); MLX5_SET(manage_pages_in, in, input_num_entries, npages); + MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function); mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen); err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen); @@ -410,6 +419,8 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, dev->priv.fw_pages -= num_claimed; if (func_id) dev->priv.vfs_pages -= num_claimed; + else if (mlx5_core_is_ecpf(dev) && !ec_function) + dev->priv.peer_pf_pages -= num_claimed; out_free: kvfree(out); @@ -423,9 +434,10 @@ static void pages_work_handler(struct work_struct *work) int err = 0; if (req->npages < 0) - err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL); + err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL, + req->ec_function); else if (req->npages > 0) - err = give_pages(dev, req->func_id, req->npages, 1); + err = give_pages(dev, req->func_id, req->npages, 1, req->ec_function); if (err) mlx5_core_warn(dev, "%s fail %d\n", @@ -434,6 +446,10 @@ static void pages_work_handler(struct work_struct *work) kfree(req); } +enum { + EC_FUNCTION_MASK = 0x8000, +}; + static int req_pages_handler(struct notifier_block *nb, unsigned long type, void *data) { @@ -441,6 +457,7 @@ static int req_pages_handler(struct notifier_block *nb, struct mlx5_core_dev *dev; struct mlx5_priv *priv; struct mlx5_eqe *eqe; + bool ec_function; u16 func_id; s32 npages; @@ -450,6 +467,7 @@ static int req_pages_handler(struct notifier_block *nb, func_id = be16_to_cpu(eqe->data.req_pages.func_id); npages = be32_to_cpu(eqe->data.req_pages.num_pages); + ec_function = be16_to_cpu(eqe->data.req_pages.ec_function) & EC_FUNCTION_MASK; mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n", func_id, npages); req = kzalloc(sizeof(*req), GFP_ATOMIC); @@ -461,6 +479,7 @@ static int req_pages_handler(struct notifier_block *nb, req->dev = dev; req->func_id = func_id; req->npages = npages; + req->ec_function = ec_function; INIT_WORK(&req->work, pages_work_handler); queue_work(dev->priv.pg_wq, &req->work); return NOTIFY_OK; @@ -479,7 +498,7 @@ int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot) mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n", npages, boot ? "boot" : "init", func_id); - return give_pages(dev, func_id, npages, 0); + return give_pages(dev, func_id, npages, 0, mlx5_core_is_ecpf(dev)); } enum { @@ -513,7 +532,7 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) fwp = rb_entry(p, struct fw_page, rb_node); err = reclaim_pages(dev, fwp->func_id, optimal_reclaimed_pages(), - &nclaimed); + &nclaimed, mlx5_core_is_ecpf(dev)); if (err) { mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", @@ -535,6 +554,9 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) WARN(dev->priv.vfs_pages, "VFs FW pages counter is %d after reclaiming all pages\n", dev->priv.vfs_pages); + WARN(dev->priv.peer_pf_pages, + "Peer PF FW pages counter is %d after reclaiming all pages\n", + dev->priv.peer_pf_pages); return 0; } @@ -567,10 +589,10 @@ void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) flush_workqueue(dev->priv.pg_wq); } -int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev) +int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages) { unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); - int prev_vfs_pages = dev->priv.vfs_pages; + int prev_pages = *pages; /* In case of internal error we will free the pages manually later */ if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { @@ -578,16 +600,16 @@ int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev) return 0; } - mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages, + mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_pages, dev->priv.name); - while (dev->priv.vfs_pages) { + while (*pages) { if (time_after(jiffies, end)) { - mlx5_core_warn(dev, "aborting while there are %d pending pages\n", dev->priv.vfs_pages); + mlx5_core_warn(dev, "aborting while there are %d pending pages\n", *pages); return -ETIMEDOUT; } - if (dev->priv.vfs_pages < prev_vfs_pages) { + if (*pages < prev_pages) { end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); - prev_vfs_pages = dev->priv.vfs_pages; + prev_pages = *pages; } msleep(50); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 2b82f35f4c35..b81542820528 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -30,10 +30,7 @@ * SOFTWARE. */ -#include <linux/module.h> -#include <linux/mlx5/driver.h> #include <linux/mlx5/port.h> -#include <linux/mlx5/cmd.h> #include "mlx5_core.h" int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, @@ -157,44 +154,6 @@ int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration) sizeof(out), MLX5_REG_MLCR, 0, 1); } -int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, - u32 *proto_cap, int proto_mask) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - int err; - - err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1); - if (err) - return err; - - if (proto_mask == MLX5_PTYS_EN) - *proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability); - else - *proto_cap = MLX5_GET(ptys_reg, out, ib_proto_capability); - - return 0; -} -EXPORT_SYMBOL_GPL(mlx5_query_port_proto_cap); - -int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, - u32 *proto_admin, int proto_mask) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - int err; - - err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1); - if (err) - return err; - - if (proto_mask == MLX5_PTYS_EN) - *proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin); - else - *proto_admin = MLX5_GET(ptys_reg, out, ib_proto_admin); - - return 0; -} -EXPORT_SYMBOL_GPL(mlx5_query_port_proto_admin); - int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, u8 *link_width_oper, u8 local_port) { @@ -211,23 +170,6 @@ int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper); -int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev, - u32 *proto_oper, u8 local_port) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - int err; - - err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, - local_port); - if (err) - return err; - - *proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); - - return 0; -} -EXPORT_SYMBOL(mlx5_query_port_eth_proto_oper); - int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, u8 *proto_oper, u8 local_port) { @@ -245,35 +187,6 @@ int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, } EXPORT_SYMBOL(mlx5_query_port_ib_proto_oper); -int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable, - u32 proto_admin, int proto_mask) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - u32 in[MLX5_ST_SZ_DW(ptys_reg)]; - u8 an_disable_admin; - u8 an_disable_cap; - u8 an_status; - - mlx5_query_port_autoneg(dev, proto_mask, &an_status, - &an_disable_cap, &an_disable_admin); - if (!an_disable_cap && an_disable) - return -EPERM; - - memset(in, 0, sizeof(in)); - - MLX5_SET(ptys_reg, in, local_port, 1); - MLX5_SET(ptys_reg, in, an_disable_admin, an_disable); - MLX5_SET(ptys_reg, in, proto_mask, proto_mask); - if (proto_mask == MLX5_PTYS_EN) - MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); - else - MLX5_SET(ptys_reg, in, ib_proto_admin, proto_admin); - - return mlx5_core_access_reg(dev, in, sizeof(in), out, - sizeof(out), MLX5_REG_PTYS, 0, 1); -} -EXPORT_SYMBOL_GPL(mlx5_set_port_ptys); - /* This function should be used after setting a port register only */ void mlx5_toggle_port_link(struct mlx5_core_dev *dev) { @@ -606,25 +519,6 @@ int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx) } EXPORT_SYMBOL_GPL(mlx5_query_port_pfc); -void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask, - u8 *an_status, - u8 *an_disable_cap, u8 *an_disable_admin) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - - *an_status = 0; - *an_disable_cap = 0; - *an_disable_admin = 0; - - if (mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1)) - return; - - *an_status = MLX5_GET(ptys_reg, out, an_status); - *an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap); - *an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); -} -EXPORT_SYMBOL_GPL(mlx5_query_port_autoneg); - int mlx5_max_tc(struct mlx5_core_dev *mdev) { u8 num_tc = MLX5_CAP_GEN(mdev, max_tc) ? : 8; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 6e178030d8fb..7b23fa8d2d60 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -147,7 +147,7 @@ out: if (MLX5_ESWITCH_MANAGER(dev)) mlx5_eswitch_disable_sriov(dev->priv.eswitch); - if (mlx5_wait_for_vf_pages(dev)) + if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages)) mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 9b150ce9d315..ef95feca9961 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -64,7 +64,7 @@ u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) } int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, - u16 vport, u8 state) + u16 vport, u8 other_vport, u8 state) { u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)] = {0}; u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)] = {0}; @@ -73,8 +73,7 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, MLX5_CMD_OP_MODIFY_VPORT_STATE); MLX5_SET(modify_vport_state_in, in, op_mod, opmod); MLX5_SET(modify_vport_state_in, in, vport_number, vport); - if (vport) - MLX5_SET(modify_vport_state_in, in, other_vport, 1); + MLX5_SET(modify_vport_state_in, in, other_vport, other_vport); MLX5_SET(modify_vport_state_in, in, admin_state, state); return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); @@ -255,7 +254,7 @@ int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu) EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mtu); int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, - u32 vport, + u16 vport, enum mlx5_list_type list_type, u8 addr_list[][ETH_ALEN], int *list_size) @@ -373,7 +372,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list); int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev, - u32 vport, + u16 vport, u16 vlans[], int *size) { @@ -526,7 +525,7 @@ int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid) EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid); int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, - u32 vport, u64 node_guid) + u16 vport, u64 node_guid) { int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); void *nic_vport_context; @@ -827,7 +826,7 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid); int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev, - u32 vport, + u16 vport, int *promisc_uc, int *promisc_mc, int *promisc_all) @@ -1057,7 +1056,7 @@ free: EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter); int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport, - u64 *rx_discard_vport_down, + u8 other_vport, u64 *rx_discard_vport_down, u64 *tx_discard_vport_down) { u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {0}; @@ -1068,8 +1067,7 @@ int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport, MLX5_CMD_OP_QUERY_VNIC_ENV); MLX5_SET(query_vnic_env_in, in, op_mod, 0); MLX5_SET(query_vnic_env_in, in, vport_number, vport); - if (vport) - MLX5_SET(query_vnic_env_in, in, other_vport, 1); + MLX5_SET(query_vnic_env_in, in, other_vport, other_vport); err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); if (err) diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index bbf45f10c208..a01d15546e37 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_MLXSW_CORE) += mlxsw_core.o mlxsw_core-objs := core.o core_acl_flex_keys.o \ - core_acl_flex_actions.o + core_acl_flex_actions.o core_env.o mlxsw_core-$(CONFIG_MLXSW_CORE_HWMON) += core_hwmon.o mlxsw_core-$(CONFIG_MLXSW_CORE_THERMAL) += core_thermal.o obj-$(CONFIG_MLXSW_PCI) += mlxsw_pci.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 4f6fa515394e..b505d3858235 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1062,6 +1062,9 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, goto err_driver_init; } + if (mlxsw_driver->params_register && !reload) + devlink_params_publish(devlink); + return 0; err_driver_init: @@ -1131,6 +1134,8 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, return; } + if (mlxsw_core->driver->params_unregister && !reload) + devlink_params_unpublish(devlink); if (mlxsw_core->driver->fini) mlxsw_core->driver->fini(mlxsw_core); mlxsw_thermal_fini(mlxsw_core->thermal); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 4e114f35ee0d..cd0c6aa0dff9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -344,6 +344,7 @@ struct mlxsw_bus_info { struct mlxsw_fw_rev fw_rev; u8 vsd[MLXSW_CMD_BOARDINFO_VSD_LEN]; u8 psid[MLXSW_CMD_BOARDINFO_PSID_LEN]; + u8 low_frequency; }; struct mlxsw_hwmon; @@ -394,4 +395,9 @@ static inline void mlxsw_thermal_fini(struct mlxsw_thermal *thermal) #endif +enum mlxsw_devlink_param_id { + MLXSW_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, + MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL, +}; + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c new file mode 100644 index 000000000000..160d6cd164f4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/err.h> + +#include "core.h" +#include "core_env.h" +#include "item.h" +#include "reg.h" + +static int mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id, + bool *qsfp) +{ + char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE]; + char mcia_pl[MLXSW_REG_MCIA_LEN]; + u8 ident; + int err; + + mlxsw_reg_mcia_pack(mcia_pl, id, 0, MLXSW_REG_MCIA_PAGE0_LO_OFF, 0, 1, + MLXSW_REG_MCIA_I2C_ADDR_LOW); + err = mlxsw_reg_query(core, MLXSW_REG(mcia), mcia_pl); + if (err) + return err; + mlxsw_reg_mcia_eeprom_memcpy_from(mcia_pl, eeprom_tmp); + ident = eeprom_tmp[0]; + switch (ident) { + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_SFP: + *qsfp = false; + break; + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP: /* fall-through */ + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_PLUS: /* fall-through */ + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28: /* fall-through */ + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_DD: + *qsfp = true; + break; + default: + return -EINVAL; + } + + return 0; +} + +int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module, + int off, int *temp) +{ + char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE]; + union { + u8 buf[MLXSW_REG_MCIA_TH_ITEM_SIZE]; + u16 temp; + } temp_thresh; + char mcia_pl[MLXSW_REG_MCIA_LEN] = {0}; + char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0}; + u16 module_temp; + bool qsfp; + int err; + + mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module, + 1); + err = mlxsw_reg_query(core, MLXSW_REG(mtbr), mtbr_pl); + if (err) + return err; + + /* Don't read temperature thresholds for module with no valid info. */ + mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &module_temp, NULL); + switch (module_temp) { + case MLXSW_REG_MTBR_BAD_SENS_INFO: /* fall-through */ + case MLXSW_REG_MTBR_NO_CONN: /* fall-through */ + case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */ + case MLXSW_REG_MTBR_INDEX_NA: + *temp = 0; + return 0; + default: + /* Do not consider thresholds for zero temperature. */ + if (!MLXSW_REG_MTMP_TEMP_TO_MC(module_temp)) { + *temp = 0; + return 0; + } + break; + } + + /* Read Free Side Device Temperature Thresholds from page 03h + * (MSB at lower byte address). + * Bytes: + * 128-129 - Temp High Alarm (SFP_TEMP_HIGH_ALARM); + * 130-131 - Temp Low Alarm (SFP_TEMP_LOW_ALARM); + * 132-133 - Temp High Warning (SFP_TEMP_HIGH_WARN); + * 134-135 - Temp Low Warning (SFP_TEMP_LOW_WARN); + */ + + /* Validate module identifier value. */ + err = mlxsw_env_validate_cable_ident(core, module, &qsfp); + if (err) + return err; + + if (qsfp) + mlxsw_reg_mcia_pack(mcia_pl, module, 0, + MLXSW_REG_MCIA_TH_PAGE_NUM, + MLXSW_REG_MCIA_TH_PAGE_OFF + off, + MLXSW_REG_MCIA_TH_ITEM_SIZE, + MLXSW_REG_MCIA_I2C_ADDR_LOW); + else + mlxsw_reg_mcia_pack(mcia_pl, module, 0, + MLXSW_REG_MCIA_PAGE0_LO, + off, MLXSW_REG_MCIA_TH_ITEM_SIZE, + MLXSW_REG_MCIA_I2C_ADDR_HIGH); + + err = mlxsw_reg_query(core, MLXSW_REG(mcia), mcia_pl); + if (err) + return err; + + mlxsw_reg_mcia_eeprom_memcpy_from(mcia_pl, eeprom_tmp); + memcpy(temp_thresh.buf, eeprom_tmp, MLXSW_REG_MCIA_TH_ITEM_SIZE); + *temp = temp_thresh.temp * 1000; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.h b/drivers/net/ethernet/mellanox/mlxsw/core_env.h new file mode 100644 index 000000000000..6dbdf63f3ee1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_CORE_ENV_H +#define _MLXSW_CORE_ENV_H + +int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module, + int off, int *temp); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index e04e8162aa14..6956bbebe2f1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -7,8 +7,10 @@ #include <linux/sysfs.h> #include <linux/hwmon.h> #include <linux/err.h> +#include <linux/sfp.h> #include "core.h" +#include "core_env.h" #define MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT 127 #define MLXSW_HWMON_ATTR_COUNT (MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT * 4 + \ @@ -30,6 +32,7 @@ struct mlxsw_hwmon { struct attribute *attrs[MLXSW_HWMON_ATTR_COUNT + 1]; struct mlxsw_hwmon_attr hwmon_attrs[MLXSW_HWMON_ATTR_COUNT]; unsigned int attrs_count; + u8 sensor_count; }; static ssize_t mlxsw_hwmon_temp_show(struct device *dev, @@ -121,6 +124,27 @@ static ssize_t mlxsw_hwmon_fan_rpm_show(struct device *dev, return sprintf(buf, "%u\n", mlxsw_reg_mfsm_rpm_get(mfsm_pl)); } +static ssize_t mlxsw_hwmon_fan_fault_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char fore_pl[MLXSW_REG_FORE_LEN]; + bool fault; + int err; + + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(fore), fore_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query fan\n"); + return err; + } + mlxsw_reg_fore_unpack(fore_pl, mlwsw_hwmon_attr->type_index, &fault); + + return sprintf(buf, "%u\n", fault); +} + static ssize_t mlxsw_hwmon_pwm_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -167,12 +191,160 @@ static ssize_t mlxsw_hwmon_pwm_store(struct device *dev, return len; } +static ssize_t mlxsw_hwmon_module_temp_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0}; + u16 temp; + u8 module; + int err; + + module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; + mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module, + 1); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtbr), mtbr_pl); + if (err) { + dev_err(dev, "Failed to query module temperature sensor\n"); + return err; + } + + mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL); + /* Update status and temperature cache. */ + switch (temp) { + case MLXSW_REG_MTBR_NO_CONN: /* fall-through */ + case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */ + case MLXSW_REG_MTBR_INDEX_NA: + temp = 0; + break; + case MLXSW_REG_MTBR_BAD_SENS_INFO: + /* Untrusted cable is connected. Reading temperature from its + * sensor is faulty. + */ + temp = 0; + break; + default: + temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp); + break; + } + + return sprintf(buf, "%u\n", temp); +} + +static ssize_t mlxsw_hwmon_module_temp_fault_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0}; + u8 module, fault; + u16 temp; + int err; + + module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; + mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module, + 1); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtbr), mtbr_pl); + if (err) { + dev_err(dev, "Failed to query module temperature sensor\n"); + return err; + } + + mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL); + + /* Update status and temperature cache. */ + switch (temp) { + case MLXSW_REG_MTBR_BAD_SENS_INFO: + /* Untrusted cable is connected. Reading temperature from its + * sensor is faulty. + */ + fault = 1; + break; + case MLXSW_REG_MTBR_NO_CONN: /* fall-through */ + case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */ + case MLXSW_REG_MTBR_INDEX_NA: + default: + fault = 0; + break; + } + + return sprintf(buf, "%u\n", fault); +} + +static ssize_t +mlxsw_hwmon_module_temp_critical_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + int temp; + u8 module; + int err; + + module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; + err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, module, + SFP_TEMP_HIGH_WARN, &temp); + if (err) { + dev_err(dev, "Failed to query module temperature thresholds\n"); + return err; + } + + return sprintf(buf, "%u\n", temp); +} + +static ssize_t +mlxsw_hwmon_module_temp_emergency_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + u8 module; + int temp; + int err; + + module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; + err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, module, + SFP_TEMP_HIGH_ALARM, &temp); + if (err) { + dev_err(dev, "Failed to query module temperature thresholds\n"); + return err; + } + + return sprintf(buf, "%u\n", temp); +} + +static ssize_t +mlxsw_hwmon_module_temp_label_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + + return sprintf(buf, "front panel %03u\n", + mlwsw_hwmon_attr->type_index); +} + enum mlxsw_hwmon_attr_type { MLXSW_HWMON_ATTR_TYPE_TEMP, MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, MLXSW_HWMON_ATTR_TYPE_TEMP_RST, MLXSW_HWMON_ATTR_TYPE_FAN_RPM, + MLXSW_HWMON_ATTR_TYPE_FAN_FAULT, MLXSW_HWMON_ATTR_TYPE_PWM, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_FAULT, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL, }; static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon, @@ -209,6 +381,12 @@ static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon, snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), "fan%u_input", num + 1); break; + case MLXSW_HWMON_ATTR_TYPE_FAN_FAULT: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_fan_fault_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "fan%u_fault", num + 1); + break; case MLXSW_HWMON_ATTR_TYPE_PWM: mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_pwm_show; mlxsw_hwmon_attr->dev_attr.store = mlxsw_hwmon_pwm_store; @@ -216,6 +394,40 @@ static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon, snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), "pwm%u", num + 1); break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_module_temp_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_input", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_FAULT: + mlxsw_hwmon_attr->dev_attr.show = + mlxsw_hwmon_module_temp_fault_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_fault", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT: + mlxsw_hwmon_attr->dev_attr.show = + mlxsw_hwmon_module_temp_critical_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_crit", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG: + mlxsw_hwmon_attr->dev_attr.show = + mlxsw_hwmon_module_temp_emergency_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_emergency", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL: + mlxsw_hwmon_attr->dev_attr.show = + mlxsw_hwmon_module_temp_label_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_label", num + 1); + break; default: WARN_ON(1); } @@ -233,7 +445,6 @@ static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon *mlxsw_hwmon) { char mtcap_pl[MLXSW_REG_MTCAP_LEN] = {0}; char mtmp_pl[MLXSW_REG_MTMP_LEN]; - u8 sensor_count; int i; int err; @@ -242,8 +453,8 @@ static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon *mlxsw_hwmon) dev_err(mlxsw_hwmon->bus_info->dev, "Failed to get number of temp sensors\n"); return err; } - sensor_count = mlxsw_reg_mtcap_sensor_count_get(mtcap_pl); - for (i = 0; i < sensor_count; i++) { + mlxsw_hwmon->sensor_count = mlxsw_reg_mtcap_sensor_count_get(mtcap_pl); + for (i = 0; i < mlxsw_hwmon->sensor_count; i++) { mlxsw_reg_mtmp_pack(mtmp_pl, i, true, true); err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); @@ -280,10 +491,14 @@ static int mlxsw_hwmon_fans_init(struct mlxsw_hwmon *mlxsw_hwmon) mlxsw_reg_mfcr_unpack(mfcr_pl, &freq, &tacho_active, &pwm_active); num = 0; for (type_index = 0; type_index < MLXSW_MFCR_TACHOS_MAX; type_index++) { - if (tacho_active & BIT(type_index)) + if (tacho_active & BIT(type_index)) { mlxsw_hwmon_attr_add(mlxsw_hwmon, MLXSW_HWMON_ATTR_TYPE_FAN_RPM, + type_index, num); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_FAN_FAULT, type_index, num++); + } } num = 0; for (type_index = 0; type_index < MLXSW_MFCR_PWMS_MAX; type_index++) { @@ -295,6 +510,53 @@ static int mlxsw_hwmon_fans_init(struct mlxsw_hwmon *mlxsw_hwmon) return 0; } +static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon) +{ + unsigned int module_count = mlxsw_core_max_ports(mlxsw_hwmon->core); + char pmlp_pl[MLXSW_REG_PMLP_LEN] = {0}; + int i, index; + u8 width; + int err; + + /* Add extra attributes for module temperature. Sensor index is + * assigned to sensor_count value, while all indexed before + * sensor_count are already utilized by the sensors connected through + * mtmp register by mlxsw_hwmon_temp_init(). + */ + index = mlxsw_hwmon->sensor_count; + for (i = 1; i < module_count; i++) { + mlxsw_reg_pmlp_pack(pmlp_pl, i); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(pmlp), + pmlp_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to read module index %d\n", + i); + return err; + } + width = mlxsw_reg_pmlp_width_get(pmlp_pl); + if (!width) + continue; + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE, index, + index); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_FAULT, + index, index); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT, + index, index); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG, + index, index); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL, + index, index); + index++; + } + + return 0; +} + int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, const struct mlxsw_bus_info *mlxsw_bus_info, struct mlxsw_hwmon **p_hwmon) @@ -317,6 +579,10 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, if (err) goto err_fans_init; + err = mlxsw_hwmon_module_init(mlxsw_hwmon); + if (err) + goto err_temp_module_init; + mlxsw_hwmon->groups[0] = &mlxsw_hwmon->group; mlxsw_hwmon->group.attrs = mlxsw_hwmon->attrs; @@ -333,6 +599,7 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, return 0; err_hwmon_register: +err_temp_module_init: err_fans_init: err_temp_init: kfree(mlxsw_hwmon); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 61f897b40f82..821fef2e2230 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -13,7 +13,12 @@ #include "core.h" #define MLXSW_THERMAL_POLL_INT 1000 /* ms */ -#define MLXSW_THERMAL_MAX_TEMP 110000 /* 110C */ +#define MLXSW_THERMAL_SLOW_POLL_INT 20000 /* ms */ +#define MLXSW_THERMAL_ASIC_TEMP_NORM 75000 /* 75C */ +#define MLXSW_THERMAL_ASIC_TEMP_HIGH 85000 /* 85C */ +#define MLXSW_THERMAL_ASIC_TEMP_HOT 105000 /* 105C */ +#define MLXSW_THERMAL_ASIC_TEMP_CRIT 110000 /* 110C */ +#define MLXSW_THERMAL_HYSTERESIS_TEMP 5000 /* 5C */ #define MLXSW_THERMAL_MAX_STATE 10 #define MLXSW_THERMAL_MAX_DUTY 255 /* Minimum and maximum fan allowed speed in percent: from 20% to 100%. Values @@ -26,9 +31,15 @@ #define MLXSW_THERMAL_SPEED_MAX (MLXSW_THERMAL_MAX_STATE * 2) #define MLXSW_THERMAL_SPEED_MIN_LEVEL 2 /* 20% */ +/* External cooling devices, allowed for binding to mlxsw thermal zones. */ +static char * const mlxsw_thermal_external_allowed_cdev[] = { + "mlxreg_fan", +}; + struct mlxsw_thermal_trip { int type; int temp; + int hyst; int min_state; int max_state; }; @@ -36,32 +47,29 @@ struct mlxsw_thermal_trip { static const struct mlxsw_thermal_trip default_thermal_trips[] = { { /* In range - 0-40% PWM */ .type = THERMAL_TRIP_ACTIVE, - .temp = 75000, + .temp = MLXSW_THERMAL_ASIC_TEMP_NORM, + .hyst = MLXSW_THERMAL_HYSTERESIS_TEMP, .min_state = 0, .max_state = (4 * MLXSW_THERMAL_MAX_STATE) / 10, }, - { /* High - 40-100% PWM */ - .type = THERMAL_TRIP_ACTIVE, - .temp = 80000, - .min_state = (4 * MLXSW_THERMAL_MAX_STATE) / 10, - .max_state = MLXSW_THERMAL_MAX_STATE, - }, { - /* Very high - 100% PWM */ + /* In range - 40-100% PWM */ .type = THERMAL_TRIP_ACTIVE, - .temp = 85000, - .min_state = MLXSW_THERMAL_MAX_STATE, + .temp = MLXSW_THERMAL_ASIC_TEMP_HIGH, + .hyst = MLXSW_THERMAL_HYSTERESIS_TEMP, + .min_state = (4 * MLXSW_THERMAL_MAX_STATE) / 10, .max_state = MLXSW_THERMAL_MAX_STATE, }, { /* Warning */ .type = THERMAL_TRIP_HOT, - .temp = 105000, + .temp = MLXSW_THERMAL_ASIC_TEMP_HOT, + .hyst = MLXSW_THERMAL_HYSTERESIS_TEMP, .min_state = MLXSW_THERMAL_MAX_STATE, .max_state = MLXSW_THERMAL_MAX_STATE, }, { /* Critical - soft poweroff */ .type = THERMAL_TRIP_CRITICAL, - .temp = MLXSW_THERMAL_MAX_TEMP, + .temp = MLXSW_THERMAL_ASIC_TEMP_CRIT, .min_state = MLXSW_THERMAL_MAX_STATE, .max_state = MLXSW_THERMAL_MAX_STATE, } @@ -76,6 +84,7 @@ struct mlxsw_thermal { struct mlxsw_core *core; const struct mlxsw_bus_info *bus_info; struct thermal_zone_device *tzdev; + int polling_delay; struct thermal_cooling_device *cdevs[MLXSW_MFCR_PWMS_MAX]; u8 cooling_levels[MLXSW_THERMAL_MAX_STATE + 1]; struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS]; @@ -103,6 +112,13 @@ static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal, if (thermal->cdevs[i] == cdev) return i; + /* Allow mlxsw thermal zone binding to an external cooling device */ + for (i = 0; i < ARRAY_SIZE(mlxsw_thermal_external_allowed_cdev); i++) { + if (strnstr(cdev->type, mlxsw_thermal_external_allowed_cdev[i], + sizeof(cdev->type))) + return 0; + } + return -ENODEV; } @@ -172,7 +188,7 @@ static int mlxsw_thermal_set_mode(struct thermal_zone_device *tzdev, mutex_lock(&tzdev->lock); if (mode == THERMAL_DEVICE_ENABLED) - tzdev->polling_delay = MLXSW_THERMAL_POLL_INT; + tzdev->polling_delay = thermal->polling_delay; else tzdev->polling_delay = 0; @@ -237,13 +253,31 @@ static int mlxsw_thermal_set_trip_temp(struct thermal_zone_device *tzdev, struct mlxsw_thermal *thermal = tzdev->devdata; if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS || - temp > MLXSW_THERMAL_MAX_TEMP) + temp > MLXSW_THERMAL_ASIC_TEMP_CRIT) return -EINVAL; thermal->trips[trip].temp = temp; return 0; } +static int mlxsw_thermal_get_trip_hyst(struct thermal_zone_device *tzdev, + int trip, int *p_hyst) +{ + struct mlxsw_thermal *thermal = tzdev->devdata; + + *p_hyst = thermal->trips[trip].hyst; + return 0; +} + +static int mlxsw_thermal_set_trip_hyst(struct thermal_zone_device *tzdev, + int trip, int hyst) +{ + struct mlxsw_thermal *thermal = tzdev->devdata; + + thermal->trips[trip].hyst = hyst; + return 0; +} + static struct thermal_zone_device_ops mlxsw_thermal_ops = { .bind = mlxsw_thermal_bind, .unbind = mlxsw_thermal_unbind, @@ -253,6 +287,8 @@ static struct thermal_zone_device_ops mlxsw_thermal_ops = { .get_trip_type = mlxsw_thermal_get_trip_type, .get_trip_temp = mlxsw_thermal_get_trip_temp, .set_trip_temp = mlxsw_thermal_set_trip_temp, + .get_trip_hyst = mlxsw_thermal_get_trip_hyst, + .set_trip_hyst = mlxsw_thermal_set_trip_hyst, }; static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev, @@ -407,8 +443,9 @@ int mlxsw_thermal_init(struct mlxsw_core *core, if (pwm_active & BIT(i)) { struct thermal_cooling_device *cdev; - cdev = thermal_cooling_device_register("Fan", thermal, - &mlxsw_cooling_ops); + cdev = thermal_cooling_device_register("mlxsw_fan", + thermal, + &mlxsw_cooling_ops); if (IS_ERR(cdev)) { err = PTR_ERR(cdev); dev_err(dev, "Failed to register cooling device\n"); @@ -423,13 +460,17 @@ int mlxsw_thermal_init(struct mlxsw_core *core, thermal->cooling_levels[i] = max(MLXSW_THERMAL_SPEED_MIN_LEVEL, i); + thermal->polling_delay = bus_info->low_frequency ? + MLXSW_THERMAL_SLOW_POLL_INT : + MLXSW_THERMAL_POLL_INT; + thermal->tzdev = thermal_zone_device_register("mlxsw", MLXSW_THERMAL_NUM_TRIPS, MLXSW_THERMAL_TRIP_MASK, thermal, &mlxsw_thermal_ops, NULL, 0, - MLXSW_THERMAL_POLL_INT); + thermal->polling_delay); if (IS_ERR(thermal->tzdev)) { err = PTR_ERR(thermal->tzdev); dev_err(dev, "Failed to register thermal zone\n"); diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c index 798bd5aca384..a87ca6b6580d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c @@ -503,6 +503,7 @@ static int mlxsw_i2c_probe(struct i2c_client *client, mlxsw_i2c->bus_info.device_kind = id->name; mlxsw_i2c->bus_info.device_name = client->name; mlxsw_i2c->bus_info.dev = &client->dev; + mlxsw_i2c->bus_info.low_frequency = true; mlxsw_i2c->dev = &client->dev; err = mlxsw_core_bus_device_register(&mlxsw_i2c->bus_info, diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 5f8066ab7d40..cbd0193ec3f6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -2199,6 +2199,14 @@ MLXSW_ITEM32(reg, pagt, size, 0x00, 0, 8); */ MLXSW_ITEM32(reg, pagt, acl_group_id, 0x08, 0, 16); +/* reg_pagt_multi + * Multi-ACL + * 0 - This ACL is the last ACL in the multi-ACL + * 1 - This ACL is part of a multi-ACL + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, pagt, multi, 0x30, 31, 1, 0x04, 0x00, false); + /* reg_pagt_acl_id * ACL identifier * Access: RW @@ -2212,12 +2220,13 @@ static inline void mlxsw_reg_pagt_pack(char *payload, u16 acl_group_id) } static inline void mlxsw_reg_pagt_acl_id_pack(char *payload, int index, - u16 acl_id) + u16 acl_id, bool multi) { u8 size = mlxsw_reg_pagt_size_get(payload); if (index >= size) mlxsw_reg_pagt_size_set(payload, index + 1); + mlxsw_reg_pagt_multi_set(payload, index, multi); mlxsw_reg_pagt_acl_id_set(payload, index, acl_id); } @@ -7866,6 +7875,35 @@ static inline void mlxsw_reg_mfsl_unpack(char *payload, u8 tacho, *p_tach_max = mlxsw_reg_mfsl_tach_max_get(payload); } +/* FORE - Fan Out of Range Event Register + * -------------------------------------- + * This register reports the status of the controlled fans compared to the + * range defined by the MFSL register. + */ +#define MLXSW_REG_FORE_ID 0x9007 +#define MLXSW_REG_FORE_LEN 0x0C + +MLXSW_REG_DEFINE(fore, MLXSW_REG_FORE_ID, MLXSW_REG_FORE_LEN); + +/* fan_under_limit + * Fan speed is below the low limit defined in MFSL register. Each bit relates + * to a single tachometer and indicates the specific tachometer reading is + * below the threshold. + * Access: RO + */ +MLXSW_ITEM32(reg, fore, fan_under_limit, 0x00, 16, 10); + +static inline void mlxsw_reg_fore_unpack(char *payload, u8 tacho, + bool *fault) +{ + u16 limit; + + if (fault) { + limit = mlxsw_reg_fore_fan_under_limit_get(payload); + *fault = limit & BIT(tacho); + } +} + /* MTCAP - Management Temperature Capabilities * ------------------------------------------- * This register exposes the capabilities of the device and @@ -7992,6 +8030,80 @@ static inline void mlxsw_reg_mtmp_unpack(char *payload, unsigned int *p_temp, mlxsw_reg_mtmp_sensor_name_memcpy_from(payload, sensor_name); } +/* MTBR - Management Temperature Bulk Register + * ------------------------------------------- + * This register is used for bulk temperature reading. + */ +#define MLXSW_REG_MTBR_ID 0x900F +#define MLXSW_REG_MTBR_BASE_LEN 0x10 /* base length, without records */ +#define MLXSW_REG_MTBR_REC_LEN 0x04 /* record length */ +#define MLXSW_REG_MTBR_REC_MAX_COUNT 47 /* firmware limitation */ +#define MLXSW_REG_MTBR_LEN (MLXSW_REG_MTBR_BASE_LEN + \ + MLXSW_REG_MTBR_REC_LEN * \ + MLXSW_REG_MTBR_REC_MAX_COUNT) + +MLXSW_REG_DEFINE(mtbr, MLXSW_REG_MTBR_ID, MLXSW_REG_MTBR_LEN); + +/* reg_mtbr_base_sensor_index + * Base sensors index to access (0 - ASIC sensor, 1-63 - ambient sensors, + * 64-127 are mapped to the SFP+/QSFP modules sequentially). + * Access: Index + */ +MLXSW_ITEM32(reg, mtbr, base_sensor_index, 0x00, 0, 7); + +/* reg_mtbr_num_rec + * Request: Number of records to read + * Response: Number of records read + * See above description for more details. + * Range 1..255 + * Access: RW + */ +MLXSW_ITEM32(reg, mtbr, num_rec, 0x04, 0, 8); + +/* reg_mtbr_rec_max_temp + * The highest measured temperature from the sensor. + * When the bit mte is cleared, the field max_temperature is reserved. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtbr, rec_max_temp, MLXSW_REG_MTBR_BASE_LEN, 16, + 16, MLXSW_REG_MTBR_REC_LEN, 0x00, false); + +/* reg_mtbr_rec_temp + * Temperature reading from the sensor. Reading is in 0..125 Celsius + * degrees units. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtbr, rec_temp, MLXSW_REG_MTBR_BASE_LEN, 0, 16, + MLXSW_REG_MTBR_REC_LEN, 0x00, false); + +static inline void mlxsw_reg_mtbr_pack(char *payload, u8 base_sensor_index, + u8 num_rec) +{ + MLXSW_REG_ZERO(mtbr, payload); + mlxsw_reg_mtbr_base_sensor_index_set(payload, base_sensor_index); + mlxsw_reg_mtbr_num_rec_set(payload, num_rec); +} + +/* Error codes from temperatute reading */ +enum mlxsw_reg_mtbr_temp_status { + MLXSW_REG_MTBR_NO_CONN = 0x8000, + MLXSW_REG_MTBR_NO_TEMP_SENS = 0x8001, + MLXSW_REG_MTBR_INDEX_NA = 0x8002, + MLXSW_REG_MTBR_BAD_SENS_INFO = 0x8003, +}; + +/* Base index for reading modules temperature */ +#define MLXSW_REG_MTBR_BASE_MODULE_INDEX 64 + +static inline void mlxsw_reg_mtbr_temp_unpack(char *payload, int rec_ind, + u16 *p_temp, u16 *p_max_temp) +{ + if (p_temp) + *p_temp = mlxsw_reg_mtbr_rec_temp_get(payload, rec_ind); + if (p_max_temp) + *p_max_temp = mlxsw_reg_mtbr_rec_max_temp_get(payload, rec_ind); +} + /* MCIA - Management Cable Info Access * ----------------------------------- * MCIA register is used to access the SFP+ and QSFP connector's EPROM. @@ -8046,13 +8158,41 @@ MLXSW_ITEM32(reg, mcia, device_address, 0x04, 0, 16); */ MLXSW_ITEM32(reg, mcia, size, 0x08, 0, 16); -#define MLXSW_SP_REG_MCIA_EEPROM_SIZE 48 +#define MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH 256 +#define MLXSW_REG_MCIA_EEPROM_SIZE 48 +#define MLXSW_REG_MCIA_I2C_ADDR_LOW 0x50 +#define MLXSW_REG_MCIA_I2C_ADDR_HIGH 0x51 +#define MLXSW_REG_MCIA_PAGE0_LO_OFF 0xa0 +#define MLXSW_REG_MCIA_TH_ITEM_SIZE 2 +#define MLXSW_REG_MCIA_TH_PAGE_NUM 3 +#define MLXSW_REG_MCIA_PAGE0_LO 0 +#define MLXSW_REG_MCIA_TH_PAGE_OFF 0x80 + +enum mlxsw_reg_mcia_eeprom_module_info_rev_id { + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_UNSPC = 0x00, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_8436 = 0x01, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_8636 = 0x03, +}; + +enum mlxsw_reg_mcia_eeprom_module_info_id { + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_SFP = 0x03, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP = 0x0C, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_PLUS = 0x0D, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28 = 0x11, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_DD = 0x18, +}; + +enum mlxsw_reg_mcia_eeprom_module_info { + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE, +}; /* reg_mcia_eeprom * Bytes to read/write. * Access: RW */ -MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, MLXSW_SP_REG_MCIA_EEPROM_SIZE); +MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, MLXSW_REG_MCIA_EEPROM_SIZE); static inline void mlxsw_reg_mcia_pack(char *payload, u8 module, u8 lock, u8 page_number, u16 device_addr, @@ -9740,8 +9880,10 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(mfsc), MLXSW_REG(mfsm), MLXSW_REG(mfsl), + MLXSW_REG(fore), MLXSW_REG(mtcap), MLXSW_REG(mtmp), + MLXSW_REG(mtbr), MLXSW_REG(mcia), MLXSW_REG(mpat), MLXSW_REG(mpar), diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 8dd808b7f931..abac923a8d04 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2723,23 +2723,23 @@ static int mlxsw_sp_query_module_eeprom(struct mlxsw_sp_port *mlxsw_sp_port, unsigned int *p_read_size) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char eeprom_tmp[MLXSW_SP_REG_MCIA_EEPROM_SIZE]; + char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE]; char mcia_pl[MLXSW_REG_MCIA_LEN]; u16 i2c_addr; int status; int err; - size = min_t(u16, size, MLXSW_SP_REG_MCIA_EEPROM_SIZE); + size = min_t(u16, size, MLXSW_REG_MCIA_EEPROM_SIZE); - if (offset < MLXSW_SP_EEPROM_PAGE_LENGTH && - offset + size > MLXSW_SP_EEPROM_PAGE_LENGTH) + if (offset < MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH && + offset + size > MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH) /* Cross pages read, read until offset 256 in low page */ - size = MLXSW_SP_EEPROM_PAGE_LENGTH - offset; + size = MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH - offset; - i2c_addr = MLXSW_SP_I2C_ADDR_LOW; - if (offset >= MLXSW_SP_EEPROM_PAGE_LENGTH) { - i2c_addr = MLXSW_SP_I2C_ADDR_HIGH; - offset -= MLXSW_SP_EEPROM_PAGE_LENGTH; + i2c_addr = MLXSW_REG_MCIA_I2C_ADDR_LOW; + if (offset >= MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH) { + i2c_addr = MLXSW_REG_MCIA_I2C_ADDR_HIGH; + offset -= MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH; } mlxsw_reg_mcia_pack(mcia_pl, mlxsw_sp_port->mapping.module, @@ -2760,55 +2760,37 @@ static int mlxsw_sp_query_module_eeprom(struct mlxsw_sp_port *mlxsw_sp_port, return 0; } -enum mlxsw_sp_eeprom_module_info_rev_id { - MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_UNSPC = 0x00, - MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8436 = 0x01, - MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8636 = 0x03, -}; - -enum mlxsw_sp_eeprom_module_info_id { - MLXSW_SP_EEPROM_MODULE_INFO_ID_SFP = 0x03, - MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP = 0x0C, - MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP_PLUS = 0x0D, - MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28 = 0x11, -}; - -enum mlxsw_sp_eeprom_module_info { - MLXSW_SP_EEPROM_MODULE_INFO_ID, - MLXSW_SP_EEPROM_MODULE_INFO_REV_ID, - MLXSW_SP_EEPROM_MODULE_INFO_SIZE, -}; - static int mlxsw_sp_get_module_info(struct net_device *netdev, struct ethtool_modinfo *modinfo) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev); - u8 module_info[MLXSW_SP_EEPROM_MODULE_INFO_SIZE]; + u8 module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE]; + u16 offset = MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE; u8 module_rev_id, module_id; unsigned int read_size; int err; - err = mlxsw_sp_query_module_eeprom(mlxsw_sp_port, 0, - MLXSW_SP_EEPROM_MODULE_INFO_SIZE, + err = mlxsw_sp_query_module_eeprom(mlxsw_sp_port, 0, offset, module_info, &read_size); if (err) return err; - if (read_size < MLXSW_SP_EEPROM_MODULE_INFO_SIZE) + if (read_size < offset) return -EIO; - module_rev_id = module_info[MLXSW_SP_EEPROM_MODULE_INFO_REV_ID]; - module_id = module_info[MLXSW_SP_EEPROM_MODULE_INFO_ID]; + module_rev_id = module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID]; + module_id = module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID]; switch (module_id) { - case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP: + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP: modinfo->type = ETH_MODULE_SFF_8436; modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; break; - case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP_PLUS: - case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28: - if (module_id == MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28 || - module_rev_id >= MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8636) { + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_PLUS: /* fall-through */ + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28: + if (module_id == MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28 || + module_rev_id >= + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_8636) { modinfo->type = ETH_MODULE_SFF_8636; modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; } else { @@ -2816,7 +2798,7 @@ static int mlxsw_sp_get_module_info(struct net_device *netdev, modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; } break; - case MLXSW_SP_EEPROM_MODULE_INFO_ID_SFP: + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_SFP: modinfo->type = ETH_MODULE_SFF_8472; modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; break; @@ -4413,6 +4395,71 @@ static void mlxsw_sp_params_unregister(struct mlxsw_core *mlxsw_core) ARRAY_SIZE(mlxsw_sp_devlink_params)); } +static int +mlxsw_sp_params_acl_region_rehash_intrvl_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + + ctx->val.vu32 = mlxsw_sp_acl_region_rehash_intrvl_get(mlxsw_sp); + return 0; +} + +static int +mlxsw_sp_params_acl_region_rehash_intrvl_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + + return mlxsw_sp_acl_region_rehash_intrvl_set(mlxsw_sp, ctx->val.vu32); +} + +static const struct devlink_param mlxsw_sp2_devlink_params[] = { + DEVLINK_PARAM_DRIVER(MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL, + "acl_region_rehash_interval", + DEVLINK_PARAM_TYPE_U32, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + mlxsw_sp_params_acl_region_rehash_intrvl_get, + mlxsw_sp_params_acl_region_rehash_intrvl_set, + NULL), +}; + +static int mlxsw_sp2_params_register(struct mlxsw_core *mlxsw_core) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + union devlink_param_value value; + int err; + + err = mlxsw_sp_params_register(mlxsw_core); + if (err) + return err; + + err = devlink_params_register(devlink, mlxsw_sp2_devlink_params, + ARRAY_SIZE(mlxsw_sp2_devlink_params)); + if (err) + goto err_devlink_params_register; + + value.vu32 = 0; + devlink_param_driverinit_value_set(devlink, + MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL, + value); + return 0; + +err_devlink_params_register: + mlxsw_sp_params_unregister(mlxsw_core); + return err; +} + +static void mlxsw_sp2_params_unregister(struct mlxsw_core *mlxsw_core) +{ + devlink_params_unregister(priv_to_devlink(mlxsw_core), + mlxsw_sp2_devlink_params, + ARRAY_SIZE(mlxsw_sp2_devlink_params)); + mlxsw_sp_params_unregister(mlxsw_core); +} + static struct mlxsw_driver mlxsw_sp1_driver = { .kind = mlxsw_sp1_driver_name, .priv_size = sizeof(struct mlxsw_sp), @@ -4461,8 +4508,8 @@ static struct mlxsw_driver mlxsw_sp2_driver = { .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get, .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp2_resources_register, - .params_register = mlxsw_sp_params_register, - .params_unregister = mlxsw_sp_params_unregister, + .params_register = mlxsw_sp2_params_register, + .params_unregister = mlxsw_sp2_params_unregister, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp2_config_profile, .res_query_enabled = true, @@ -4706,9 +4753,6 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, err = mlxsw_sp_lag_col_port_add(mlxsw_sp_port, lag_id, port_index); if (err) goto err_col_port_add; - err = mlxsw_sp_lag_col_port_enable(mlxsw_sp_port, lag_id); - if (err) - goto err_col_port_enable; mlxsw_core_lag_mapping_set(mlxsw_sp->core, lag_id, port_index, mlxsw_sp_port->local_port); @@ -4722,8 +4766,6 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, return 0; -err_col_port_enable: - mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); err_col_port_add: if (!lag->ref_count) mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); @@ -4742,7 +4784,6 @@ static void mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port, lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id); WARN_ON(lag->ref_count == 0); - mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, lag_id); mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); /* Any VLANs configured on the port are no longer valid */ @@ -4787,21 +4828,56 @@ static int mlxsw_sp_lag_dist_port_remove(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); } -static int mlxsw_sp_port_lag_tx_en_set(struct mlxsw_sp_port *mlxsw_sp_port, - bool lag_tx_enabled) +static int +mlxsw_sp_port_lag_col_dist_enable(struct mlxsw_sp_port *mlxsw_sp_port) { - if (lag_tx_enabled) - return mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, - mlxsw_sp_port->lag_id); - else - return mlxsw_sp_lag_dist_port_remove(mlxsw_sp_port, - mlxsw_sp_port->lag_id); + int err; + + err = mlxsw_sp_lag_col_port_enable(mlxsw_sp_port, + mlxsw_sp_port->lag_id); + if (err) + return err; + + err = mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, mlxsw_sp_port->lag_id); + if (err) + goto err_dist_port_add; + + return 0; + +err_dist_port_add: + mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, mlxsw_sp_port->lag_id); + return err; +} + +static int +mlxsw_sp_port_lag_col_dist_disable(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err; + + err = mlxsw_sp_lag_dist_port_remove(mlxsw_sp_port, + mlxsw_sp_port->lag_id); + if (err) + return err; + + err = mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, + mlxsw_sp_port->lag_id); + if (err) + goto err_col_port_disable; + + return 0; + +err_col_port_disable: + mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, mlxsw_sp_port->lag_id); + return err; } static int mlxsw_sp_port_lag_changed(struct mlxsw_sp_port *mlxsw_sp_port, struct netdev_lag_lower_state_info *info) { - return mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, info->tx_enabled); + if (info->tx_enabled) + return mlxsw_sp_port_lag_col_dist_enable(mlxsw_sp_port); + else + return mlxsw_sp_port_lag_col_dist_disable(mlxsw_sp_port); } static int mlxsw_sp_port_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, @@ -5024,8 +5100,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, err = mlxsw_sp_port_lag_join(mlxsw_sp_port, upper_dev); } else { - mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, - false); + mlxsw_sp_port_lag_col_dist_disable(mlxsw_sp_port); mlxsw_sp_port_lag_leave(mlxsw_sp_port, upper_dev); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 4fe0996c7cdd..ceebc91f4f1d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -690,6 +690,8 @@ struct mlxsw_sp_fid *mlxsw_sp_acl_dummy_fid(struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp); +u32 mlxsw_sp_acl_region_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_acl_region_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp, u32 val); /* spectrum_acl_tcam.c */ struct mlxsw_sp_acl_tcam; @@ -704,10 +706,13 @@ struct mlxsw_sp_acl_tcam_ops { size_t region_priv_size; int (*region_init)(struct mlxsw_sp *mlxsw_sp, void *region_priv, void *tcam_priv, - struct mlxsw_sp_acl_tcam_region *region); + struct mlxsw_sp_acl_tcam_region *region, + void *hints_priv); void (*region_fini)(struct mlxsw_sp *mlxsw_sp, void *region_priv); int (*region_associate)(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_region *region); + void * (*region_rehash_hints_get)(void *region_priv); + void (*region_rehash_hints_put)(void *hints_priv); size_t chunk_priv_size; void (*chunk_init)(void *region_priv, void *chunk_priv, unsigned int priority); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c index 6e444525713f..3a636f753607 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c @@ -112,7 +112,8 @@ mlxsw_sp1_acl_ctcam_region_catchall_del(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp1_acl_tcam_region_init(struct mlxsw_sp *mlxsw_sp, void *region_priv, void *tcam_priv, - struct mlxsw_sp_acl_tcam_region *_region) + struct mlxsw_sp_acl_tcam_region *_region, + void *hints_priv) { struct mlxsw_sp1_acl_tcam_region *region = region_priv; int err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c index d380b3403960..6c66a0f1b79e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c @@ -139,7 +139,8 @@ static void mlxsw_sp2_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, void *priv) static int mlxsw_sp2_acl_tcam_region_init(struct mlxsw_sp *mlxsw_sp, void *region_priv, void *tcam_priv, - struct mlxsw_sp_acl_tcam_region *_region) + struct mlxsw_sp_acl_tcam_region *_region, + void *hints_priv) { struct mlxsw_sp2_acl_tcam_region *region = region_priv; struct mlxsw_sp2_acl_tcam *tcam = tcam_priv; @@ -147,7 +148,8 @@ mlxsw_sp2_acl_tcam_region_init(struct mlxsw_sp *mlxsw_sp, void *region_priv, region->region = _region; return mlxsw_sp_acl_atcam_region_init(mlxsw_sp, &tcam->atcam, - ®ion->aregion, _region, + ®ion->aregion, + _region, hints_priv, &mlxsw_sp2_acl_ctcam_region_ops); } @@ -166,6 +168,18 @@ mlxsw_sp2_acl_tcam_region_associate(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_acl_atcam_region_associate(mlxsw_sp, region->id); } +static void *mlxsw_sp2_acl_tcam_region_rehash_hints_get(void *region_priv) +{ + struct mlxsw_sp2_acl_tcam_region *region = region_priv; + + return mlxsw_sp_acl_atcam_rehash_hints_get(®ion->aregion); +} + +static void mlxsw_sp2_acl_tcam_region_rehash_hints_put(void *hints_priv) +{ + mlxsw_sp_acl_atcam_rehash_hints_put(hints_priv); +} + static void mlxsw_sp2_acl_tcam_chunk_init(void *region_priv, void *chunk_priv, unsigned int priority) { @@ -243,6 +257,8 @@ const struct mlxsw_sp_acl_tcam_ops mlxsw_sp2_acl_tcam_ops = { .region_init = mlxsw_sp2_acl_tcam_region_init, .region_fini = mlxsw_sp2_acl_tcam_region_fini, .region_associate = mlxsw_sp2_acl_tcam_region_associate, + .region_rehash_hints_get = mlxsw_sp2_acl_tcam_region_rehash_hints_get, + .region_rehash_hints_put = mlxsw_sp2_acl_tcam_region_rehash_hints_put, .chunk_priv_size = sizeof(struct mlxsw_sp2_acl_tcam_chunk), .chunk_init = mlxsw_sp2_acl_tcam_chunk_init, .chunk_fini = mlxsw_sp2_acl_tcam_chunk_fini, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 2649d128a57c..a146a44634e9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -640,7 +640,7 @@ mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp, int err; mlxsw_sp_acl_ruleset_ref_inc(ruleset); - rule = kzalloc(sizeof(*rule) + ops->rule_priv_size(mlxsw_sp), + rule = kzalloc(sizeof(*rule) + ops->rule_priv_size, GFP_KERNEL); if (!rule) { err = -ENOMEM; @@ -912,3 +912,19 @@ void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_afk_destroy(acl->afk); kfree(acl); } + +u32 mlxsw_sp_acl_region_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_acl *acl = mlxsw_sp->acl; + + return mlxsw_sp_acl_tcam_vregion_rehash_intrvl_get(mlxsw_sp, + &acl->tcam); +} + +int mlxsw_sp_acl_region_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp, u32 val) +{ + struct mlxsw_sp_acl *acl = mlxsw_sp->acl; + + return mlxsw_sp_acl_tcam_vregion_rehash_intrvl_set(mlxsw_sp, + &acl->tcam, val); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c index a74a390901ac..ded4cf658680 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c @@ -318,6 +318,7 @@ mlxsw_sp_acl_atcam_region_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam *atcam, struct mlxsw_sp_acl_atcam_region *aregion, struct mlxsw_sp_acl_tcam_region *region, + void *hints_priv, const struct mlxsw_sp_acl_ctcam_region_ops *ops) { int err; @@ -334,7 +335,7 @@ mlxsw_sp_acl_atcam_region_init(struct mlxsw_sp *mlxsw_sp, err = aregion->ops->init(aregion); if (err) goto err_ops_init; - err = mlxsw_sp_acl_erp_region_init(aregion); + err = mlxsw_sp_acl_erp_region_init(aregion, hints_priv); if (err) goto err_erp_region_init; err = mlxsw_sp_acl_ctcam_region_init(mlxsw_sp, &aregion->cregion, @@ -634,3 +635,14 @@ void mlxsw_sp_acl_atcam_fini(struct mlxsw_sp *mlxsw_sp, { mlxsw_sp_acl_erps_fini(mlxsw_sp, atcam); } + +void * +mlxsw_sp_acl_atcam_rehash_hints_get(struct mlxsw_sp_acl_atcam_region *aregion) +{ + return mlxsw_sp_acl_erp_rehash_hints_get(aregion); +} + +void mlxsw_sp_acl_atcam_rehash_hints_put(void *hints_priv) +{ + mlxsw_sp_acl_erp_rehash_hints_put(hints_priv); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c index 2941967e1cc5..e935c36638d9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c @@ -1200,6 +1200,32 @@ mlxsw_sp_acl_erp_delta_fill(const struct mlxsw_sp_acl_erp_key *parent_key, return 0; } +static bool mlxsw_sp_acl_erp_delta_check(void *priv, const void *parent_obj, + const void *obj) +{ + const struct mlxsw_sp_acl_erp_key *parent_key = parent_obj; + const struct mlxsw_sp_acl_erp_key *key = obj; + u16 delta_start; + u8 delta_mask; + int err; + + err = mlxsw_sp_acl_erp_delta_fill(parent_key, key, + &delta_start, &delta_mask); + return err ? false : true; +} + +static int mlxsw_sp_acl_erp_hints_obj_cmp(const void *obj1, const void *obj2) +{ + const struct mlxsw_sp_acl_erp_key *key1 = obj1; + const struct mlxsw_sp_acl_erp_key *key2 = obj2; + + /* For hints purposes, two objects are considered equal + * in case the masks are the same. Does not matter what + * the "ctcam" value is. + */ + return memcmp(key1->mask, key2->mask, sizeof(key1->mask)); +} + static void *mlxsw_sp_acl_erp_delta_create(void *priv, void *parent_obj, void *obj) { @@ -1254,12 +1280,17 @@ static void mlxsw_sp_acl_erp_delta_destroy(void *priv, void *delta_priv) kfree(delta); } -static void *mlxsw_sp_acl_erp_root_create(void *priv, void *obj) +static void *mlxsw_sp_acl_erp_root_create(void *priv, void *obj, + unsigned int root_id) { struct mlxsw_sp_acl_atcam_region *aregion = priv; struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table; struct mlxsw_sp_acl_erp_key *key = obj; + if (!key->ctcam && + root_id != OBJAGG_OBJ_ROOT_ID_INVALID && + root_id >= MLXSW_SP_ACL_ERP_MAX_PER_REGION) + return ERR_PTR(-ENOBUFS); return erp_table->ops->erp_create(erp_table, key); } @@ -1273,6 +1304,8 @@ static void mlxsw_sp_acl_erp_root_destroy(void *priv, void *root_priv) static const struct objagg_ops mlxsw_sp_acl_erp_objagg_ops = { .obj_size = sizeof(struct mlxsw_sp_acl_erp_key), + .delta_check = mlxsw_sp_acl_erp_delta_check, + .hints_obj_cmp = mlxsw_sp_acl_erp_hints_obj_cmp, .delta_create = mlxsw_sp_acl_erp_delta_create, .delta_destroy = mlxsw_sp_acl_erp_delta_destroy, .root_create = mlxsw_sp_acl_erp_root_create, @@ -1280,7 +1313,8 @@ static const struct objagg_ops mlxsw_sp_acl_erp_objagg_ops = { }; static struct mlxsw_sp_acl_erp_table * -mlxsw_sp_acl_erp_table_create(struct mlxsw_sp_acl_atcam_region *aregion) +mlxsw_sp_acl_erp_table_create(struct mlxsw_sp_acl_atcam_region *aregion, + struct objagg_hints *hints) { struct mlxsw_sp_acl_erp_table *erp_table; int err; @@ -1290,7 +1324,7 @@ mlxsw_sp_acl_erp_table_create(struct mlxsw_sp_acl_atcam_region *aregion) return ERR_PTR(-ENOMEM); erp_table->objagg = objagg_create(&mlxsw_sp_acl_erp_objagg_ops, - aregion); + hints, aregion); if (IS_ERR(erp_table->objagg)) { err = PTR_ERR(erp_table->objagg); goto err_objagg_create; @@ -1337,12 +1371,88 @@ mlxsw_sp_acl_erp_region_param_init(struct mlxsw_sp_acl_atcam_region *aregion) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pererp), pererp_pl); } -int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion) +static int +mlxsw_sp_acl_erp_hints_check(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct objagg_hints *hints, bool *p_rehash_needed) +{ + struct objagg *objagg = aregion->erp_table->objagg; + const struct objagg_stats *ostats; + const struct objagg_stats *hstats; + int err; + + *p_rehash_needed = false; + + ostats = objagg_stats_get(objagg); + if (IS_ERR(ostats)) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get ERP stats\n"); + return PTR_ERR(ostats); + } + + hstats = objagg_hints_stats_get(hints); + if (IS_ERR(hstats)) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get ERP hints stats\n"); + err = PTR_ERR(hstats); + goto err_hints_stats_get; + } + + /* Very basic criterion for now. */ + if (hstats->root_count < ostats->root_count) + *p_rehash_needed = true; + + err = 0; + + objagg_stats_put(hstats); +err_hints_stats_get: + objagg_stats_put(ostats); + return err; +} + +void * +mlxsw_sp_acl_erp_rehash_hints_get(struct mlxsw_sp_acl_atcam_region *aregion) +{ + struct mlxsw_sp *mlxsw_sp = aregion->region->mlxsw_sp; + struct objagg_hints *hints; + bool rehash_needed; + int err; + + hints = objagg_hints_get(aregion->erp_table->objagg, + OBJAGG_OPT_ALGO_SIMPLE_GREEDY); + if (IS_ERR(hints)) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to create ERP hints\n"); + return ERR_CAST(hints); + } + err = mlxsw_sp_acl_erp_hints_check(mlxsw_sp, aregion, hints, + &rehash_needed); + if (err) + goto errout; + + if (!rehash_needed) { + err = -EAGAIN; + goto errout; + } + return hints; + +errout: + objagg_hints_put(hints); + return ERR_PTR(err); +} + +void mlxsw_sp_acl_erp_rehash_hints_put(void *hints_priv) +{ + struct objagg_hints *hints = hints_priv; + + objagg_hints_put(hints); +} + +int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion, + void *hints_priv) { struct mlxsw_sp_acl_erp_table *erp_table; + struct objagg_hints *hints = hints_priv; int err; - erp_table = mlxsw_sp_acl_erp_table_create(aregion); + erp_table = mlxsw_sp_acl_erp_table_create(aregion, hints); if (IS_ERR(erp_table)) return PTR_ERR(erp_table); aregion->erp_table = erp_table; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 11456e1f236f..7e225a86e3a8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -8,6 +8,7 @@ #include <linux/list.h> #include <linux/rhashtable.h> #include <linux/netdevice.h> +#include <trace/events/mlxsw.h> #include "reg.h" #include "core.h" @@ -23,6 +24,9 @@ size_t mlxsw_sp_acl_tcam_priv_size(struct mlxsw_sp *mlxsw_sp) return ops->priv_size; } +#define MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_DFLT 5000 /* ms */ +#define MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_MIN 3000 /* ms */ + int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam *tcam) { @@ -33,6 +37,10 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, size_t alloc_size; int err; + tcam->vregion_rehash_intrvl = + MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_DFLT; + INIT_LIST_HEAD(&tcam->vregion_list); + max_tcam_regions = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_REGIONS); max_regions = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_REGIONS); @@ -153,9 +161,9 @@ struct mlxsw_sp_acl_tcam_pattern { struct mlxsw_sp_acl_tcam_group { struct mlxsw_sp_acl_tcam *tcam; u16 id; - struct list_head region_list; + struct list_head vregion_list; unsigned int region_count; - struct rhashtable chunk_ht; + struct rhashtable vchunk_ht; struct mlxsw_sp_acl_tcam_group_ops *ops; const struct mlxsw_sp_acl_tcam_pattern *patterns; unsigned int patterns_count; @@ -163,40 +171,77 @@ struct mlxsw_sp_acl_tcam_group { struct mlxsw_afk_element_usage tmplt_elusage; }; -struct mlxsw_sp_acl_tcam_chunk { - struct list_head list; /* Member of a TCAM region */ - struct rhash_head ht_node; /* Member of a chunk HT */ - unsigned int priority; /* Priority within the region and group */ +struct mlxsw_sp_acl_tcam_vregion { + struct mlxsw_sp_acl_tcam_region *region; + struct mlxsw_sp_acl_tcam_region *region2; /* Used during migration */ + struct list_head list; /* Member of a TCAM group */ + struct list_head tlist; /* Member of a TCAM */ + struct list_head vchunk_list; /* List of vchunks under this vregion */ struct mlxsw_sp_acl_tcam_group *group; + struct mlxsw_afk_key_info *key_info; + struct mlxsw_sp_acl_tcam *tcam; + struct delayed_work rehash_dw; + struct mlxsw_sp *mlxsw_sp; + bool failed_rollback; /* Indicates failed rollback during migration */ +}; + +struct mlxsw_sp_acl_tcam_vchunk; + +struct mlxsw_sp_acl_tcam_chunk { + struct mlxsw_sp_acl_tcam_vchunk *vchunk; struct mlxsw_sp_acl_tcam_region *region; - unsigned int ref_count; unsigned long priv[0]; /* priv has to be always the last item */ }; +struct mlxsw_sp_acl_tcam_vchunk { + struct mlxsw_sp_acl_tcam_chunk *chunk; + struct mlxsw_sp_acl_tcam_chunk *chunk2; /* Used during migration */ + struct list_head list; /* Member of a TCAM vregion */ + struct rhash_head ht_node; /* Member of a chunk HT */ + struct list_head ventry_list; + unsigned int priority; /* Priority within the vregion and group */ + struct mlxsw_sp_acl_tcam_group *group; + struct mlxsw_sp_acl_tcam_vregion *vregion; + unsigned int ref_count; +}; + struct mlxsw_sp_acl_tcam_entry { + struct mlxsw_sp_acl_tcam_ventry *ventry; struct mlxsw_sp_acl_tcam_chunk *chunk; unsigned long priv[0]; /* priv has to be always the last item */ }; -static const struct rhashtable_params mlxsw_sp_acl_tcam_chunk_ht_params = { +struct mlxsw_sp_acl_tcam_ventry { + struct mlxsw_sp_acl_tcam_entry *entry; + struct list_head list; /* Member of a TCAM vchunk */ + struct mlxsw_sp_acl_tcam_vchunk *vchunk; + struct mlxsw_sp_acl_rule_info *rulei; +}; + +static const struct rhashtable_params mlxsw_sp_acl_tcam_vchunk_ht_params = { .key_len = sizeof(unsigned int), - .key_offset = offsetof(struct mlxsw_sp_acl_tcam_chunk, priority), - .head_offset = offsetof(struct mlxsw_sp_acl_tcam_chunk, ht_node), + .key_offset = offsetof(struct mlxsw_sp_acl_tcam_vchunk, priority), + .head_offset = offsetof(struct mlxsw_sp_acl_tcam_vchunk, ht_node), .automatic_shrinking = true, }; static int mlxsw_sp_acl_tcam_group_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_group *group) { - struct mlxsw_sp_acl_tcam_region *region; + struct mlxsw_sp_acl_tcam_vregion *vregion; char pagt_pl[MLXSW_REG_PAGT_LEN]; int acl_index = 0; mlxsw_reg_pagt_pack(pagt_pl, group->id); - list_for_each_entry(region, &group->region_list, list) - mlxsw_reg_pagt_acl_id_pack(pagt_pl, acl_index++, region->id); + list_for_each_entry(vregion, &group->vregion_list, list) { + if (vregion->region2) + mlxsw_reg_pagt_acl_id_pack(pagt_pl, acl_index++, + vregion->region2->id, true); + mlxsw_reg_pagt_acl_id_pack(pagt_pl, acl_index++, + vregion->region->id, false); + } mlxsw_reg_pagt_size_set(pagt_pl, acl_index); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pagt), pagt_pl); } @@ -219,13 +264,13 @@ mlxsw_sp_acl_tcam_group_add(struct mlxsw_sp *mlxsw_sp, memcpy(&group->tmplt_elusage, tmplt_elusage, sizeof(group->tmplt_elusage)); } - INIT_LIST_HEAD(&group->region_list); + INIT_LIST_HEAD(&group->vregion_list); err = mlxsw_sp_acl_tcam_group_id_get(tcam, &group->id); if (err) return err; - err = rhashtable_init(&group->chunk_ht, - &mlxsw_sp_acl_tcam_chunk_ht_params); + err = rhashtable_init(&group->vchunk_ht, + &mlxsw_sp_acl_tcam_vchunk_ht_params); if (err) goto err_rhashtable_init; @@ -241,9 +286,9 @@ static void mlxsw_sp_acl_tcam_group_del(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_tcam *tcam = group->tcam; - rhashtable_destroy(&group->chunk_ht); + rhashtable_destroy(&group->vchunk_ht); mlxsw_sp_acl_tcam_group_id_put(tcam, group->id); - WARN_ON(!list_empty(&group->region_list)); + WARN_ON(!list_empty(&group->vregion_list)); } static int @@ -283,140 +328,153 @@ mlxsw_sp_acl_tcam_group_id(struct mlxsw_sp_acl_tcam_group *group) } static unsigned int -mlxsw_sp_acl_tcam_region_prio(struct mlxsw_sp_acl_tcam_region *region) +mlxsw_sp_acl_tcam_vregion_prio(struct mlxsw_sp_acl_tcam_vregion *vregion) { - struct mlxsw_sp_acl_tcam_chunk *chunk; + struct mlxsw_sp_acl_tcam_vchunk *vchunk; - if (list_empty(®ion->chunk_list)) + if (list_empty(&vregion->vchunk_list)) return 0; - /* As a priority of a region, return priority of the first chunk */ - chunk = list_first_entry(®ion->chunk_list, typeof(*chunk), list); - return chunk->priority; + /* As a priority of a vregion, return priority of the first vchunk */ + vchunk = list_first_entry(&vregion->vchunk_list, + typeof(*vchunk), list); + return vchunk->priority; } static unsigned int -mlxsw_sp_acl_tcam_region_max_prio(struct mlxsw_sp_acl_tcam_region *region) +mlxsw_sp_acl_tcam_vregion_max_prio(struct mlxsw_sp_acl_tcam_vregion *vregion) { - struct mlxsw_sp_acl_tcam_chunk *chunk; + struct mlxsw_sp_acl_tcam_vchunk *vchunk; - if (list_empty(®ion->chunk_list)) + if (list_empty(&vregion->vchunk_list)) return 0; - chunk = list_last_entry(®ion->chunk_list, typeof(*chunk), list); - return chunk->priority; + vchunk = list_last_entry(&vregion->vchunk_list, + typeof(*vchunk), list); + return vchunk->priority; } -static void -mlxsw_sp_acl_tcam_group_list_add(struct mlxsw_sp_acl_tcam_group *group, - struct mlxsw_sp_acl_tcam_region *region) +static int +mlxsw_sp_acl_tcam_group_region_attach(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region) { - struct mlxsw_sp_acl_tcam_region *region2; - struct list_head *pos; + struct mlxsw_sp_acl_tcam_group *group = region->vregion->group; + int err; + + if (group->region_count == group->tcam->max_group_size) + return -ENOBUFS; + + err = mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group); + if (err) + return err; - /* Position the region inside the list according to priority */ - list_for_each(pos, &group->region_list) { - region2 = list_entry(pos, typeof(*region2), list); - if (mlxsw_sp_acl_tcam_region_prio(region2) > - mlxsw_sp_acl_tcam_region_prio(region)) - break; - } - list_add_tail(®ion->list, pos); group->region_count++; + return 0; } static void -mlxsw_sp_acl_tcam_group_list_del(struct mlxsw_sp_acl_tcam_group *group, - struct mlxsw_sp_acl_tcam_region *region) +mlxsw_sp_acl_tcam_group_region_detach(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region) { + struct mlxsw_sp_acl_tcam_group *group = region->vregion->group; + group->region_count--; - list_del(®ion->list); + mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group); } static int -mlxsw_sp_acl_tcam_group_region_attach(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_group *group, - struct mlxsw_sp_acl_tcam_region *region) +mlxsw_sp_acl_tcam_group_vregion_attach(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_group *group, + struct mlxsw_sp_acl_tcam_vregion *vregion) { + struct mlxsw_sp_acl_tcam_vregion *vregion2; + struct list_head *pos; int err; - if (group->region_count == group->tcam->max_group_size) - return -ENOBUFS; - - mlxsw_sp_acl_tcam_group_list_add(group, region); + /* Position the vregion inside the list according to priority */ + list_for_each(pos, &group->vregion_list) { + vregion2 = list_entry(pos, typeof(*vregion2), list); + if (mlxsw_sp_acl_tcam_vregion_prio(vregion2) > + mlxsw_sp_acl_tcam_vregion_prio(vregion)) + break; + } + list_add_tail(&vregion->list, pos); + vregion->group = group; - err = mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group); + err = mlxsw_sp_acl_tcam_group_region_attach(mlxsw_sp, vregion->region); if (err) - goto err_group_update; - region->group = group; + goto err_region_attach; return 0; -err_group_update: - mlxsw_sp_acl_tcam_group_list_del(group, region); - mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group); +err_region_attach: + list_del(&vregion->list); return err; } static void -mlxsw_sp_acl_tcam_group_region_detach(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_region *region) +mlxsw_sp_acl_tcam_group_vregion_detach(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion) { - struct mlxsw_sp_acl_tcam_group *group = region->group; - - mlxsw_sp_acl_tcam_group_list_del(group, region); - mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group); + list_del(&vregion->list); + if (vregion->region2) + mlxsw_sp_acl_tcam_group_region_detach(mlxsw_sp, + vregion->region2); + mlxsw_sp_acl_tcam_group_region_detach(mlxsw_sp, vregion->region); } -static struct mlxsw_sp_acl_tcam_region * -mlxsw_sp_acl_tcam_group_region_find(struct mlxsw_sp_acl_tcam_group *group, - unsigned int priority, - struct mlxsw_afk_element_usage *elusage, - bool *p_need_split) +static struct mlxsw_sp_acl_tcam_vregion * +mlxsw_sp_acl_tcam_group_vregion_find(struct mlxsw_sp_acl_tcam_group *group, + unsigned int priority, + struct mlxsw_afk_element_usage *elusage, + bool *p_need_split) { - struct mlxsw_sp_acl_tcam_region *region, *region2; + struct mlxsw_sp_acl_tcam_vregion *vregion, *vregion2; struct list_head *pos; bool issubset; - list_for_each(pos, &group->region_list) { - region = list_entry(pos, typeof(*region), list); + list_for_each(pos, &group->vregion_list) { + vregion = list_entry(pos, typeof(*vregion), list); /* First, check if the requested priority does not rather belong - * under some of the next regions. + * under some of the next vregions. */ - if (pos->next != &group->region_list) { /* not last */ - region2 = list_entry(pos->next, typeof(*region2), list); - if (priority >= mlxsw_sp_acl_tcam_region_prio(region2)) + if (pos->next != &group->vregion_list) { /* not last */ + vregion2 = list_entry(pos->next, typeof(*vregion2), + list); + if (priority >= + mlxsw_sp_acl_tcam_vregion_prio(vregion2)) continue; } - issubset = mlxsw_afk_key_info_subset(region->key_info, elusage); + issubset = mlxsw_afk_key_info_subset(vregion->key_info, + elusage); /* If requested element usage would not fit and the priority - * is lower than the currently inspected region we cannot - * use this region, so return NULL to indicate new region has + * is lower than the currently inspected vregion we cannot + * use this region, so return NULL to indicate new vregion has * to be created. */ if (!issubset && - priority < mlxsw_sp_acl_tcam_region_prio(region)) + priority < mlxsw_sp_acl_tcam_vregion_prio(vregion)) return NULL; /* If requested element usage would not fit and the priority - * is higher than the currently inspected region we cannot - * use this region. There is still some hope that the next - * region would be the fit. So let it be processed and + * is higher than the currently inspected vregion we cannot + * use this vregion. There is still some hope that the next + * vregion would be the fit. So let it be processed and * eventually break at the check right above this. */ if (!issubset && - priority > mlxsw_sp_acl_tcam_region_max_prio(region)) + priority > mlxsw_sp_acl_tcam_vregion_max_prio(vregion)) continue; - /* Indicate if the region needs to be split in order to add + /* Indicate if the vregion needs to be split in order to add * the requested priority. Split is needed when requested - * element usage won't fit into the found region. + * element usage won't fit into the found vregion. */ *p_need_split = !issubset; - return region; + return vregion; } - return NULL; /* New region has to be created. */ + return NULL; /* New vregion has to be created. */ } static void @@ -511,24 +569,19 @@ mlxsw_sp_acl_tcam_region_disable(struct mlxsw_sp *mlxsw_sp, static struct mlxsw_sp_acl_tcam_region * mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam *tcam, - struct mlxsw_afk_element_usage *elusage) + struct mlxsw_sp_acl_tcam_vregion *vregion, + void *hints_priv) { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl); struct mlxsw_sp_acl_tcam_region *region; int err; region = kzalloc(sizeof(*region) + ops->region_priv_size, GFP_KERNEL); if (!region) return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(®ion->chunk_list); region->mlxsw_sp = mlxsw_sp; - - region->key_info = mlxsw_afk_key_info_get(afk, elusage); - if (IS_ERR(region->key_info)) { - err = PTR_ERR(region->key_info); - goto err_key_info_get; - } + region->vregion = vregion; + region->key_info = vregion->key_info; err = mlxsw_sp_acl_tcam_region_id_get(tcam, ®ion->id); if (err) @@ -547,7 +600,8 @@ mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp, if (err) goto err_tcam_region_enable; - err = ops->region_init(mlxsw_sp, region->priv, tcam->priv, region); + err = ops->region_init(mlxsw_sp, region->priv, tcam->priv, + region, hints_priv); if (err) goto err_tcam_region_init; @@ -561,8 +615,6 @@ err_tcam_region_alloc: err_tcam_region_associate: mlxsw_sp_acl_tcam_region_id_put(tcam, region->id); err_region_id_get: - mlxsw_afk_key_info_put(region->key_info); -err_key_info_get: kfree(region); return ERR_PTR(err); } @@ -576,217 +628,372 @@ mlxsw_sp_acl_tcam_region_destroy(struct mlxsw_sp *mlxsw_sp, ops->region_fini(mlxsw_sp, region->priv); mlxsw_sp_acl_tcam_region_disable(mlxsw_sp, region); mlxsw_sp_acl_tcam_region_free(mlxsw_sp, region); - mlxsw_sp_acl_tcam_region_id_put(region->group->tcam, region->id); - mlxsw_afk_key_info_put(region->key_info); + mlxsw_sp_acl_tcam_region_id_put(region->vregion->group->tcam, + region->id); kfree(region); } +static void +mlxsw_sp_acl_tcam_vregion_rehash_work_schedule(struct mlxsw_sp_acl_tcam_vregion *vregion) +{ + unsigned long interval = vregion->tcam->vregion_rehash_intrvl; + + if (!interval) + return; + mlxsw_core_schedule_dw(&vregion->rehash_dw, + msecs_to_jiffies(interval)); +} + static int -mlxsw_sp_acl_tcam_chunk_assoc(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_group *group, - unsigned int priority, - struct mlxsw_afk_element_usage *elusage, - struct mlxsw_sp_acl_tcam_chunk *chunk) +mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion); + +static void mlxsw_sp_acl_tcam_vregion_rehash_work(struct work_struct *work) { - struct mlxsw_sp_acl_tcam_region *region; - bool region_created = false; + struct mlxsw_sp_acl_tcam_vregion *vregion = + container_of(work, struct mlxsw_sp_acl_tcam_vregion, + rehash_dw.work); + + /* TODO: Take rtnl lock here as the rest of the code counts on it + * now. Later, this should be replaced by per-vregion lock. + */ + rtnl_lock(); + mlxsw_sp_acl_tcam_vregion_rehash(vregion->mlxsw_sp, vregion); + rtnl_unlock(); + mlxsw_sp_acl_tcam_vregion_rehash_work_schedule(vregion); +} + +static struct mlxsw_sp_acl_tcam_vregion * +mlxsw_sp_acl_tcam_vregion_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam, + struct mlxsw_afk_element_usage *elusage) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl); + struct mlxsw_sp_acl_tcam_vregion *vregion; + int err; + + vregion = kzalloc(sizeof(*vregion), GFP_KERNEL); + if (!vregion) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&vregion->vchunk_list); + vregion->tcam = tcam; + vregion->mlxsw_sp = mlxsw_sp; + + vregion->key_info = mlxsw_afk_key_info_get(afk, elusage); + if (IS_ERR(vregion->key_info)) { + err = PTR_ERR(vregion->key_info); + goto err_key_info_get; + } + + vregion->region = mlxsw_sp_acl_tcam_region_create(mlxsw_sp, tcam, + vregion, NULL); + if (IS_ERR(vregion->region)) { + err = PTR_ERR(vregion->region); + goto err_region_create; + } + + list_add_tail(&vregion->tlist, &tcam->vregion_list); + + if (ops->region_rehash_hints_get) { + /* Create the delayed work for vregion periodic rehash */ + INIT_DELAYED_WORK(&vregion->rehash_dw, + mlxsw_sp_acl_tcam_vregion_rehash_work); + mlxsw_sp_acl_tcam_vregion_rehash_work_schedule(vregion); + } + + return vregion; + +err_region_create: + mlxsw_afk_key_info_put(vregion->key_info); +err_key_info_get: + kfree(vregion); + return ERR_PTR(err); +} + +static void +mlxsw_sp_acl_tcam_vregion_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + + if (ops->region_rehash_hints_get) + cancel_delayed_work_sync(&vregion->rehash_dw); + list_del(&vregion->tlist); + if (vregion->region2) + mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, vregion->region2); + mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, vregion->region); + mlxsw_afk_key_info_put(vregion->key_info); + kfree(vregion); +} + +u32 mlxsw_sp_acl_tcam_vregion_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + u32 vregion_rehash_intrvl; + + if (WARN_ON(!ops->region_rehash_hints_get)) + return 0; + vregion_rehash_intrvl = tcam->vregion_rehash_intrvl; + return vregion_rehash_intrvl; +} + +int mlxsw_sp_acl_tcam_vregion_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam, + u32 val) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + struct mlxsw_sp_acl_tcam_vregion *vregion; + + if (val < MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_MIN && val) + return -EINVAL; + if (WARN_ON(!ops->region_rehash_hints_get)) + return -EOPNOTSUPP; + tcam->vregion_rehash_intrvl = val; + rtnl_lock(); + list_for_each_entry(vregion, &tcam->vregion_list, tlist) { + if (val) + mlxsw_core_schedule_dw(&vregion->rehash_dw, 0); + else + cancel_delayed_work_sync(&vregion->rehash_dw); + } + rtnl_unlock(); + return 0; +} + +static int +mlxsw_sp_acl_tcam_vchunk_assoc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_group *group, + unsigned int priority, + struct mlxsw_afk_element_usage *elusage, + struct mlxsw_sp_acl_tcam_vchunk *vchunk) +{ + struct mlxsw_sp_acl_tcam_vregion *vregion; + bool vregion_created = false; bool need_split; int err; - region = mlxsw_sp_acl_tcam_group_region_find(group, priority, elusage, - &need_split); - if (region && need_split) { - /* According to priority, the chunk should belong to an - * existing region. However, this chunk needs elements - * that region does not contain. We need to split the existing - * region into two and create a new region for this chunk + vregion = mlxsw_sp_acl_tcam_group_vregion_find(group, priority, elusage, + &need_split); + if (vregion && need_split) { + /* According to priority, the vchunk should belong to an + * existing vregion. However, this vchunk needs elements + * that vregion does not contain. We need to split the existing + * vregion into two and create a new vregion for this vchunk * in between. This is not supported now. */ return -EOPNOTSUPP; } - if (!region) { - struct mlxsw_afk_element_usage region_elusage; + if (!vregion) { + struct mlxsw_afk_element_usage vregion_elusage; mlxsw_sp_acl_tcam_group_use_patterns(group, elusage, - ®ion_elusage); - region = mlxsw_sp_acl_tcam_region_create(mlxsw_sp, group->tcam, - ®ion_elusage); - if (IS_ERR(region)) - return PTR_ERR(region); - region_created = true; + &vregion_elusage); + vregion = mlxsw_sp_acl_tcam_vregion_create(mlxsw_sp, + group->tcam, + &vregion_elusage); + if (IS_ERR(vregion)) + return PTR_ERR(vregion); + vregion_created = true; } - chunk->region = region; - list_add_tail(&chunk->list, ®ion->chunk_list); + vchunk->vregion = vregion; + list_add_tail(&vchunk->list, &vregion->vchunk_list); - if (!region_created) + if (!vregion_created) return 0; - err = mlxsw_sp_acl_tcam_group_region_attach(mlxsw_sp, group, region); + err = mlxsw_sp_acl_tcam_group_vregion_attach(mlxsw_sp, group, vregion); if (err) - goto err_group_region_attach; + goto err_group_vregion_attach; return 0; -err_group_region_attach: - mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, region); +err_group_vregion_attach: + mlxsw_sp_acl_tcam_vregion_destroy(mlxsw_sp, vregion); return err; } static void -mlxsw_sp_acl_tcam_chunk_deassoc(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_chunk *chunk) +mlxsw_sp_acl_tcam_vchunk_deassoc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vchunk *vchunk) { - struct mlxsw_sp_acl_tcam_region *region = chunk->region; + struct mlxsw_sp_acl_tcam_vregion *vregion = vchunk->vregion; - list_del(&chunk->list); - if (list_empty(®ion->chunk_list)) { - mlxsw_sp_acl_tcam_group_region_detach(mlxsw_sp, region); - mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, region); + list_del(&vchunk->list); + if (list_empty(&vregion->vchunk_list)) { + mlxsw_sp_acl_tcam_group_vregion_detach(mlxsw_sp, vregion); + mlxsw_sp_acl_tcam_vregion_destroy(mlxsw_sp, vregion); } } static struct mlxsw_sp_acl_tcam_chunk * mlxsw_sp_acl_tcam_chunk_create(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_group *group, - unsigned int priority, - struct mlxsw_afk_element_usage *elusage) + struct mlxsw_sp_acl_tcam_vchunk *vchunk, + struct mlxsw_sp_acl_tcam_region *region) { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; struct mlxsw_sp_acl_tcam_chunk *chunk; + + chunk = kzalloc(sizeof(*chunk) + ops->chunk_priv_size, GFP_KERNEL); + if (!chunk) + return ERR_PTR(-ENOMEM); + chunk->vchunk = vchunk; + chunk->region = region; + + ops->chunk_init(region->priv, chunk->priv, vchunk->priority); + return chunk; +} + +static void +mlxsw_sp_acl_tcam_chunk_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_chunk *chunk) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + + ops->chunk_fini(chunk->priv); + kfree(chunk); +} + +static struct mlxsw_sp_acl_tcam_vchunk * +mlxsw_sp_acl_tcam_vchunk_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_group *group, + unsigned int priority, + struct mlxsw_afk_element_usage *elusage) +{ + struct mlxsw_sp_acl_tcam_vchunk *vchunk; int err; if (priority == MLXSW_SP_ACL_TCAM_CATCHALL_PRIO) return ERR_PTR(-EINVAL); - chunk = kzalloc(sizeof(*chunk) + ops->chunk_priv_size, GFP_KERNEL); - if (!chunk) + vchunk = kzalloc(sizeof(*vchunk), GFP_KERNEL); + if (!vchunk) return ERR_PTR(-ENOMEM); - chunk->priority = priority; - chunk->group = group; - chunk->ref_count = 1; + INIT_LIST_HEAD(&vchunk->ventry_list); + vchunk->priority = priority; + vchunk->group = group; + vchunk->ref_count = 1; - err = mlxsw_sp_acl_tcam_chunk_assoc(mlxsw_sp, group, priority, - elusage, chunk); + err = mlxsw_sp_acl_tcam_vchunk_assoc(mlxsw_sp, group, priority, + elusage, vchunk); if (err) - goto err_chunk_assoc; + goto err_vchunk_assoc; - ops->chunk_init(chunk->region->priv, chunk->priv, priority); - - err = rhashtable_insert_fast(&group->chunk_ht, &chunk->ht_node, - mlxsw_sp_acl_tcam_chunk_ht_params); + err = rhashtable_insert_fast(&group->vchunk_ht, &vchunk->ht_node, + mlxsw_sp_acl_tcam_vchunk_ht_params); if (err) goto err_rhashtable_insert; - return chunk; + vchunk->chunk = mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, vchunk, + vchunk->vregion->region); + if (IS_ERR(vchunk->chunk)) { + err = PTR_ERR(vchunk->chunk); + goto err_chunk_create; + } + + return vchunk; +err_chunk_create: + rhashtable_remove_fast(&group->vchunk_ht, &vchunk->ht_node, + mlxsw_sp_acl_tcam_vchunk_ht_params); err_rhashtable_insert: - ops->chunk_fini(chunk->priv); - mlxsw_sp_acl_tcam_chunk_deassoc(mlxsw_sp, chunk); -err_chunk_assoc: - kfree(chunk); + mlxsw_sp_acl_tcam_vchunk_deassoc(mlxsw_sp, vchunk); +err_vchunk_assoc: + kfree(vchunk); return ERR_PTR(err); } static void -mlxsw_sp_acl_tcam_chunk_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_chunk *chunk) +mlxsw_sp_acl_tcam_vchunk_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vchunk *vchunk) { - const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - struct mlxsw_sp_acl_tcam_group *group = chunk->group; - - rhashtable_remove_fast(&group->chunk_ht, &chunk->ht_node, - mlxsw_sp_acl_tcam_chunk_ht_params); - ops->chunk_fini(chunk->priv); - mlxsw_sp_acl_tcam_chunk_deassoc(mlxsw_sp, chunk); - kfree(chunk); + struct mlxsw_sp_acl_tcam_group *group = vchunk->group; + + if (vchunk->chunk2) + mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk2); + mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk); + rhashtable_remove_fast(&group->vchunk_ht, &vchunk->ht_node, + mlxsw_sp_acl_tcam_vchunk_ht_params); + mlxsw_sp_acl_tcam_vchunk_deassoc(mlxsw_sp, vchunk); + kfree(vchunk); } -static struct mlxsw_sp_acl_tcam_chunk * -mlxsw_sp_acl_tcam_chunk_get(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_group *group, - unsigned int priority, - struct mlxsw_afk_element_usage *elusage) +static struct mlxsw_sp_acl_tcam_vchunk * +mlxsw_sp_acl_tcam_vchunk_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_group *group, + unsigned int priority, + struct mlxsw_afk_element_usage *elusage) { - struct mlxsw_sp_acl_tcam_chunk *chunk; + struct mlxsw_sp_acl_tcam_vchunk *vchunk; - chunk = rhashtable_lookup_fast(&group->chunk_ht, &priority, - mlxsw_sp_acl_tcam_chunk_ht_params); - if (chunk) { - if (WARN_ON(!mlxsw_afk_key_info_subset(chunk->region->key_info, + vchunk = rhashtable_lookup_fast(&group->vchunk_ht, &priority, + mlxsw_sp_acl_tcam_vchunk_ht_params); + if (vchunk) { + if (WARN_ON(!mlxsw_afk_key_info_subset(vchunk->vregion->key_info, elusage))) return ERR_PTR(-EINVAL); - chunk->ref_count++; - return chunk; + vchunk->ref_count++; + return vchunk; } - return mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, group, - priority, elusage); + return mlxsw_sp_acl_tcam_vchunk_create(mlxsw_sp, group, + priority, elusage); } -static void mlxsw_sp_acl_tcam_chunk_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_chunk *chunk) +static void +mlxsw_sp_acl_tcam_vchunk_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vchunk *vchunk) { - if (--chunk->ref_count) + if (--vchunk->ref_count) return; - mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, chunk); + mlxsw_sp_acl_tcam_vchunk_destroy(mlxsw_sp, vchunk); } -static size_t mlxsw_sp_acl_tcam_entry_priv_size(struct mlxsw_sp *mlxsw_sp) -{ - const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - - return ops->entry_priv_size; -} - -static int mlxsw_sp_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_group *group, - struct mlxsw_sp_acl_tcam_entry *entry, - struct mlxsw_sp_acl_rule_info *rulei) +static struct mlxsw_sp_acl_tcam_entry * +mlxsw_sp_acl_tcam_entry_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_ventry *ventry, + struct mlxsw_sp_acl_tcam_chunk *chunk) { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - struct mlxsw_sp_acl_tcam_chunk *chunk; - struct mlxsw_sp_acl_tcam_region *region; + struct mlxsw_sp_acl_tcam_entry *entry; int err; - chunk = mlxsw_sp_acl_tcam_chunk_get(mlxsw_sp, group, rulei->priority, - &rulei->values.elusage); - if (IS_ERR(chunk)) - return PTR_ERR(chunk); - - region = chunk->region; + entry = kzalloc(sizeof(*entry) + ops->entry_priv_size, GFP_KERNEL); + if (!entry) + return ERR_PTR(-ENOMEM); + entry->ventry = ventry; + entry->chunk = chunk; - err = ops->entry_add(mlxsw_sp, region->priv, chunk->priv, - entry->priv, rulei); + err = ops->entry_add(mlxsw_sp, chunk->region->priv, chunk->priv, + entry->priv, ventry->rulei); if (err) goto err_entry_add; - entry->chunk = chunk; - return 0; + return entry; err_entry_add: - mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, chunk); - return err; + kfree(entry); + return ERR_PTR(err); } -static void mlxsw_sp_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_entry *entry) +static void mlxsw_sp_acl_tcam_entry_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_entry *entry) { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk; - struct mlxsw_sp_acl_tcam_region *region = chunk->region; - ops->entry_del(mlxsw_sp, region->priv, chunk->priv, entry->priv); - mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, chunk); + ops->entry_del(mlxsw_sp, entry->chunk->region->priv, + entry->chunk->priv, entry->priv); + kfree(entry); } static int mlxsw_sp_acl_tcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region, struct mlxsw_sp_acl_tcam_entry *entry, struct mlxsw_sp_acl_rule_info *rulei) { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk; - struct mlxsw_sp_acl_tcam_region *region = chunk->region; return ops->entry_action_replace(mlxsw_sp, region->priv, entry->priv, rulei); @@ -798,13 +1005,249 @@ mlxsw_sp_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp, bool *activity) { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk; - struct mlxsw_sp_acl_tcam_region *region = chunk->region; - return ops->entry_activity_get(mlxsw_sp, region->priv, + return ops->entry_activity_get(mlxsw_sp, entry->chunk->region->priv, entry->priv, activity); } +static int mlxsw_sp_acl_tcam_ventry_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_group *group, + struct mlxsw_sp_acl_tcam_ventry *ventry, + struct mlxsw_sp_acl_rule_info *rulei) +{ + struct mlxsw_sp_acl_tcam_vchunk *vchunk; + int err; + + vchunk = mlxsw_sp_acl_tcam_vchunk_get(mlxsw_sp, group, rulei->priority, + &rulei->values.elusage); + if (IS_ERR(vchunk)) + return PTR_ERR(vchunk); + + ventry->vchunk = vchunk; + ventry->rulei = rulei; + ventry->entry = mlxsw_sp_acl_tcam_entry_create(mlxsw_sp, ventry, + vchunk->chunk); + if (IS_ERR(ventry->entry)) { + err = PTR_ERR(ventry->entry); + goto err_entry_create; + } + + list_add_tail(&ventry->list, &vchunk->ventry_list); + + return 0; + +err_entry_create: + mlxsw_sp_acl_tcam_vchunk_put(mlxsw_sp, vchunk); + return err; +} + +static void mlxsw_sp_acl_tcam_ventry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_ventry *ventry) +{ + struct mlxsw_sp_acl_tcam_vchunk *vchunk = ventry->vchunk; + + list_del(&ventry->list); + mlxsw_sp_acl_tcam_entry_destroy(mlxsw_sp, ventry->entry); + mlxsw_sp_acl_tcam_vchunk_put(mlxsw_sp, vchunk); +} + +static int +mlxsw_sp_acl_tcam_ventry_action_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_ventry *ventry, + struct mlxsw_sp_acl_rule_info *rulei) +{ + struct mlxsw_sp_acl_tcam_vchunk *vchunk = ventry->vchunk; + + return mlxsw_sp_acl_tcam_entry_action_replace(mlxsw_sp, + vchunk->vregion->region, + ventry->entry, rulei); +} + +static int +mlxsw_sp_acl_tcam_ventry_activity_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_ventry *ventry, + bool *activity) +{ + return mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, + ventry->entry, activity); +} + +static int +mlxsw_sp_acl_tcam_ventry_migrate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_ventry *ventry, + struct mlxsw_sp_acl_tcam_chunk *chunk2) +{ + struct mlxsw_sp_acl_tcam_entry *entry2; + + entry2 = mlxsw_sp_acl_tcam_entry_create(mlxsw_sp, ventry, chunk2); + if (IS_ERR(entry2)) + return PTR_ERR(entry2); + mlxsw_sp_acl_tcam_entry_destroy(mlxsw_sp, ventry->entry); + ventry->entry = entry2; + return 0; +} + +static int +mlxsw_sp_acl_tcam_vchunk_migrate_one(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vchunk *vchunk, + struct mlxsw_sp_acl_tcam_region *region, + bool this_is_rollback) +{ + struct mlxsw_sp_acl_tcam_ventry *ventry; + struct mlxsw_sp_acl_tcam_chunk *chunk2; + int err; + int err2; + + chunk2 = mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, vchunk, region); + if (IS_ERR(chunk2)) { + if (this_is_rollback) + vchunk->vregion->failed_rollback = true; + return PTR_ERR(chunk2); + } + vchunk->chunk2 = chunk2; + list_for_each_entry(ventry, &vchunk->ventry_list, list) { + err = mlxsw_sp_acl_tcam_ventry_migrate(mlxsw_sp, ventry, + vchunk->chunk2); + if (err) { + if (this_is_rollback) { + vchunk->vregion->failed_rollback = true; + return err; + } + goto rollback; + } + } + mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk); + vchunk->chunk = chunk2; + vchunk->chunk2 = NULL; + return 0; + +rollback: + /* Migrate the entries back to the original chunk. If some entry + * migration fails, there's no good way how to proceed. Set the + * vregion with "failed_rollback" flag. + */ + list_for_each_entry_continue_reverse(ventry, &vchunk->ventry_list, + list) { + err2 = mlxsw_sp_acl_tcam_ventry_migrate(mlxsw_sp, ventry, + vchunk->chunk); + if (err2) { + vchunk->vregion->failed_rollback = true; + goto err_rollback; + } + } + + mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk2); + vchunk->chunk2 = NULL; + +err_rollback: + return err; +} + +static int +mlxsw_sp_acl_tcam_vchunk_migrate_all(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion) +{ + struct mlxsw_sp_acl_tcam_vchunk *vchunk; + int err; + + list_for_each_entry(vchunk, &vregion->vchunk_list, list) { + err = mlxsw_sp_acl_tcam_vchunk_migrate_one(mlxsw_sp, vchunk, + vregion->region2, + false); + if (err) + goto rollback; + } + return 0; + +rollback: + list_for_each_entry_continue_reverse(vchunk, &vregion->vchunk_list, + list) { + mlxsw_sp_acl_tcam_vchunk_migrate_one(mlxsw_sp, vchunk, + vregion->region, true); + } + return err; +} + +static int +mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion, + void *hints_priv) +{ + struct mlxsw_sp_acl_tcam_region *region2, *unused_region; + int err; + + trace_mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion); + + region2 = mlxsw_sp_acl_tcam_region_create(mlxsw_sp, vregion->tcam, + vregion, hints_priv); + if (IS_ERR(region2)) + return PTR_ERR(region2); + + vregion->region2 = region2; + err = mlxsw_sp_acl_tcam_group_region_attach(mlxsw_sp, region2); + if (err) + goto err_group_region_attach; + + err = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion); + if (!vregion->failed_rollback) { + if (!err) { + /* In case of successful migration, region2 is used and + * the original is unused. + */ + unused_region = vregion->region; + vregion->region = vregion->region2; + } else { + /* In case of failure during migration, the original + * region is still used. + */ + unused_region = vregion->region2; + } + vregion->region2 = NULL; + mlxsw_sp_acl_tcam_group_region_detach(mlxsw_sp, unused_region); + mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, unused_region); + } + return err; + +err_group_region_attach: + vregion->region2 = NULL; + mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, region2); + return err; +} + +static int +mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + void *hints_priv; + int err; + + trace_mlxsw_sp_acl_tcam_vregion_rehash(mlxsw_sp, vregion); + if (vregion->failed_rollback) + return -EBUSY; + + hints_priv = ops->region_rehash_hints_get(vregion->region->priv); + if (IS_ERR(hints_priv)) { + err = PTR_ERR(hints_priv); + if (err != -EAGAIN) + dev_err(mlxsw_sp->bus_info->dev, "Failed get rehash hints\n"); + return err; + } + + err = mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion, hints_priv); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to migrate vregion\n"); + if (vregion->failed_rollback) { + trace_mlxsw_sp_acl_tcam_vregion_rehash_dis(mlxsw_sp, + vregion); + dev_err(mlxsw_sp->bus_info->dev, "Failed to rollback during vregion migration fail\n"); + } + } + + ops->region_rehash_hints_put(hints_priv); + return err; +} + static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = { MLXSW_AFK_ELEMENT_SRC_SYS_PORT, MLXSW_AFK_ELEMENT_DMAC_32_47, @@ -859,7 +1302,7 @@ struct mlxsw_sp_acl_tcam_flower_ruleset { }; struct mlxsw_sp_acl_tcam_flower_rule { - struct mlxsw_sp_acl_tcam_entry entry; + struct mlxsw_sp_acl_tcam_ventry ventry; }; static int @@ -917,12 +1360,6 @@ mlxsw_sp_acl_tcam_flower_ruleset_group_id(void *ruleset_priv) return mlxsw_sp_acl_tcam_group_id(&ruleset->group); } -static size_t mlxsw_sp_acl_tcam_flower_rule_priv_size(struct mlxsw_sp *mlxsw_sp) -{ - return sizeof(struct mlxsw_sp_acl_tcam_flower_rule) + - mlxsw_sp_acl_tcam_entry_priv_size(mlxsw_sp); -} - static int mlxsw_sp_acl_tcam_flower_rule_add(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, void *rule_priv, @@ -931,8 +1368,8 @@ mlxsw_sp_acl_tcam_flower_rule_add(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv; struct mlxsw_sp_acl_tcam_flower_rule *rule = rule_priv; - return mlxsw_sp_acl_tcam_entry_add(mlxsw_sp, &ruleset->group, - &rule->entry, rulei); + return mlxsw_sp_acl_tcam_ventry_add(mlxsw_sp, &ruleset->group, + &rule->ventry, rulei); } static void @@ -940,7 +1377,7 @@ mlxsw_sp_acl_tcam_flower_rule_del(struct mlxsw_sp *mlxsw_sp, void *rule_priv) { struct mlxsw_sp_acl_tcam_flower_rule *rule = rule_priv; - mlxsw_sp_acl_tcam_entry_del(mlxsw_sp, &rule->entry); + mlxsw_sp_acl_tcam_ventry_del(mlxsw_sp, &rule->ventry); } static int @@ -957,8 +1394,8 @@ mlxsw_sp_acl_tcam_flower_rule_activity_get(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_tcam_flower_rule *rule = rule_priv; - return mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, &rule->entry, - activity); + return mlxsw_sp_acl_tcam_ventry_activity_get(mlxsw_sp, &rule->ventry, + activity); } static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = { @@ -968,7 +1405,7 @@ static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = { .ruleset_bind = mlxsw_sp_acl_tcam_flower_ruleset_bind, .ruleset_unbind = mlxsw_sp_acl_tcam_flower_ruleset_unbind, .ruleset_group_id = mlxsw_sp_acl_tcam_flower_ruleset_group_id, - .rule_priv_size = mlxsw_sp_acl_tcam_flower_rule_priv_size, + .rule_priv_size = sizeof(struct mlxsw_sp_acl_tcam_flower_rule), .rule_add = mlxsw_sp_acl_tcam_flower_rule_add, .rule_del = mlxsw_sp_acl_tcam_flower_rule_del, .rule_action_replace = mlxsw_sp_acl_tcam_flower_rule_action_replace, @@ -976,12 +1413,12 @@ static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = { }; struct mlxsw_sp_acl_tcam_mr_ruleset { - struct mlxsw_sp_acl_tcam_chunk *chunk; + struct mlxsw_sp_acl_tcam_vchunk *vchunk; struct mlxsw_sp_acl_tcam_group group; }; struct mlxsw_sp_acl_tcam_mr_rule { - struct mlxsw_sp_acl_tcam_entry entry; + struct mlxsw_sp_acl_tcam_ventry ventry; }; static int @@ -1006,10 +1443,11 @@ mlxsw_sp_acl_tcam_mr_ruleset_add(struct mlxsw_sp *mlxsw_sp, * specific ACL Group ID which must exist in HW before multicast router * is initialized. */ - ruleset->chunk = mlxsw_sp_acl_tcam_chunk_get(mlxsw_sp, &ruleset->group, - 1, tmplt_elusage); - if (IS_ERR(ruleset->chunk)) { - err = PTR_ERR(ruleset->chunk); + ruleset->vchunk = mlxsw_sp_acl_tcam_vchunk_get(mlxsw_sp, + &ruleset->group, 1, + tmplt_elusage); + if (IS_ERR(ruleset->vchunk)) { + err = PTR_ERR(ruleset->vchunk); goto err_chunk_get; } @@ -1025,7 +1463,7 @@ mlxsw_sp_acl_tcam_mr_ruleset_del(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv) { struct mlxsw_sp_acl_tcam_mr_ruleset *ruleset = ruleset_priv; - mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, ruleset->chunk); + mlxsw_sp_acl_tcam_vchunk_put(mlxsw_sp, ruleset->vchunk); mlxsw_sp_acl_tcam_group_del(mlxsw_sp, &ruleset->group); } @@ -1054,12 +1492,6 @@ mlxsw_sp_acl_tcam_mr_ruleset_group_id(void *ruleset_priv) return mlxsw_sp_acl_tcam_group_id(&ruleset->group); } -static size_t mlxsw_sp_acl_tcam_mr_rule_priv_size(struct mlxsw_sp *mlxsw_sp) -{ - return sizeof(struct mlxsw_sp_acl_tcam_mr_rule) + - mlxsw_sp_acl_tcam_entry_priv_size(mlxsw_sp); -} - static int mlxsw_sp_acl_tcam_mr_rule_add(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, void *rule_priv, @@ -1068,8 +1500,8 @@ mlxsw_sp_acl_tcam_mr_rule_add(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, struct mlxsw_sp_acl_tcam_mr_ruleset *ruleset = ruleset_priv; struct mlxsw_sp_acl_tcam_mr_rule *rule = rule_priv; - return mlxsw_sp_acl_tcam_entry_add(mlxsw_sp, &ruleset->group, - &rule->entry, rulei); + return mlxsw_sp_acl_tcam_ventry_add(mlxsw_sp, &ruleset->group, + &rule->ventry, rulei); } static void @@ -1077,7 +1509,7 @@ mlxsw_sp_acl_tcam_mr_rule_del(struct mlxsw_sp *mlxsw_sp, void *rule_priv) { struct mlxsw_sp_acl_tcam_mr_rule *rule = rule_priv; - mlxsw_sp_acl_tcam_entry_del(mlxsw_sp, &rule->entry); + mlxsw_sp_acl_tcam_ventry_del(mlxsw_sp, &rule->ventry); } static int @@ -1087,8 +1519,8 @@ mlxsw_sp_acl_tcam_mr_rule_action_replace(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_tcam_mr_rule *rule = rule_priv; - return mlxsw_sp_acl_tcam_entry_action_replace(mlxsw_sp, &rule->entry, - rulei); + return mlxsw_sp_acl_tcam_ventry_action_replace(mlxsw_sp, &rule->ventry, + rulei); } static int @@ -1097,8 +1529,8 @@ mlxsw_sp_acl_tcam_mr_rule_activity_get(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_tcam_mr_rule *rule = rule_priv; - return mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, &rule->entry, - activity); + return mlxsw_sp_acl_tcam_ventry_activity_get(mlxsw_sp, &rule->ventry, + activity); } static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_mr_ops = { @@ -1108,7 +1540,7 @@ static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_mr_ops = { .ruleset_bind = mlxsw_sp_acl_tcam_mr_ruleset_bind, .ruleset_unbind = mlxsw_sp_acl_tcam_mr_ruleset_unbind, .ruleset_group_id = mlxsw_sp_acl_tcam_mr_ruleset_group_id, - .rule_priv_size = mlxsw_sp_acl_tcam_mr_rule_priv_size, + .rule_priv_size = sizeof(struct mlxsw_sp_acl_tcam_mr_rule), .rule_add = mlxsw_sp_acl_tcam_mr_rule_add, .rule_del = mlxsw_sp_acl_tcam_mr_rule_del, .rule_action_replace = mlxsw_sp_acl_tcam_mr_rule_action_replace, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h index 0858d5b06353..96bd42a9fbc3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h @@ -17,6 +17,8 @@ struct mlxsw_sp_acl_tcam { unsigned long *used_groups; /* bit array */ unsigned int max_groups; unsigned int max_group_size; + struct list_head vregion_list; + u32 vregion_rehash_intrvl; /* ms */ unsigned long priv[0]; /* priv has to be always the last item */ }; @@ -26,6 +28,11 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam *tcam); void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam *tcam); +u32 mlxsw_sp_acl_tcam_vregion_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam); +int mlxsw_sp_acl_tcam_vregion_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam, + u32 val); int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, u32 *priority, bool fillup_priority); @@ -43,7 +50,7 @@ struct mlxsw_sp_acl_profile_ops { struct mlxsw_sp_port *mlxsw_sp_port, bool ingress); u16 (*ruleset_group_id)(void *ruleset_priv); - size_t (*rule_priv_size)(struct mlxsw_sp *mlxsw_sp); + size_t rule_priv_size; int (*rule_add)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, void *rule_priv, struct mlxsw_sp_acl_rule_info *rulei); @@ -67,11 +74,10 @@ mlxsw_sp_acl_tcam_profile_ops(struct mlxsw_sp *mlxsw_sp, (MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN * BITS_PER_BYTE) struct mlxsw_sp_acl_tcam_group; +struct mlxsw_sp_acl_tcam_vregion; struct mlxsw_sp_acl_tcam_region { - struct list_head list; /* Member of a TCAM group */ - struct list_head chunk_list; /* List of chunks under this region */ - struct mlxsw_sp_acl_tcam_group *group; + struct mlxsw_sp_acl_tcam_vregion *vregion; enum mlxsw_reg_ptar_key_type key_type; u16 id; /* ACL ID and region ID - they are same */ char tcam_region_info[MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN]; @@ -207,6 +213,7 @@ mlxsw_sp_acl_atcam_region_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam *atcam, struct mlxsw_sp_acl_atcam_region *aregion, struct mlxsw_sp_acl_tcam_region *region, + void *hints_priv, const struct mlxsw_sp_acl_ctcam_region_ops *ops); void mlxsw_sp_acl_atcam_region_fini(struct mlxsw_sp_acl_atcam_region *aregion); void mlxsw_sp_acl_atcam_chunk_init(struct mlxsw_sp_acl_atcam_region *aregion, @@ -230,6 +237,9 @@ int mlxsw_sp_acl_atcam_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam *atcam); void mlxsw_sp_acl_atcam_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam *atcam); +void * +mlxsw_sp_acl_atcam_rehash_hints_get(struct mlxsw_sp_acl_atcam_region *aregion); +void mlxsw_sp_acl_atcam_rehash_hints_put(void *hints_priv); struct mlxsw_sp_acl_erp_delta; @@ -260,7 +270,11 @@ void mlxsw_sp_acl_erp_bf_remove(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam_region *aregion, struct mlxsw_sp_acl_erp_mask *erp_mask, struct mlxsw_sp_acl_atcam_entry *aentry); -int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion); +void * +mlxsw_sp_acl_erp_rehash_hints_get(struct mlxsw_sp_acl_atcam_region *aregion); +void mlxsw_sp_acl_erp_rehash_hints_put(void *hints_priv); +int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion, + void *hints_priv); void mlxsw_sp_acl_erp_region_fini(struct mlxsw_sp_acl_atcam_region *aregion); int mlxsw_sp_acl_erps_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam *atcam); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 9af9f5c1b25c..15f804453cd6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -102,8 +102,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, return err; } break; - case FLOW_ACTION_VLAN_PUSH: - case FLOW_ACTION_VLAN_POP: { + case FLOW_ACTION_VLAN_MANGLE: { u16 proto = be16_to_cpu(act->vlan.proto); u8 prio = act->vlan.prio; u16 vid = act->vlan.vid; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 6754061d9b72..52fed8c7bf1e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3814,13 +3814,11 @@ mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) struct mlxsw_sp_nexthop_group *nh_grp; struct mlxsw_sp_nexthop *nh; struct fib_nh *fib_nh; - size_t alloc_size; int i; int err; - alloc_size = sizeof(*nh_grp) + - fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop); - nh_grp = kzalloc(alloc_size, GFP_KERNEL); + nh_grp = kzalloc(struct_size(nh_grp, nexthops, fi->fib_nhs), + GFP_KERNEL); if (!nh_grp) return ERR_PTR(-ENOMEM); nh_grp->priv = fi; @@ -5066,13 +5064,11 @@ mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp; struct mlxsw_sp_rt6 *mlxsw_sp_rt6; struct mlxsw_sp_nexthop *nh; - size_t alloc_size; int i = 0; int err; - alloc_size = sizeof(*nh_grp) + - fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop); - nh_grp = kzalloc(alloc_size, GFP_KERNEL); + nh_grp = kzalloc(struct_size(nh_grp, nexthops, fib6_entry->nrt6), + GFP_KERNEL); if (!nh_grp) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&nh_grp->fib_list); @@ -6146,7 +6142,7 @@ static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif) mlxsw_reg_ritr_rif_pack(ritr_pl, rif); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); - if (WARN_ON_ONCE(err)) + if (err) return err; mlxsw_reg_ritr_enable_set(ritr_pl, false); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 95e37de3e48f..1f492b7dbea8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -431,30 +431,10 @@ static void mlxsw_sp_bridge_vlan_put(struct mlxsw_sp_bridge_vlan *bridge_vlan) mlxsw_sp_bridge_vlan_destroy(bridge_vlan); } -static void mlxsw_sp_port_bridge_flags_get(struct mlxsw_sp_bridge *bridge, - struct net_device *dev, - unsigned long *brport_flags) -{ - struct mlxsw_sp_bridge_port *bridge_port; - - bridge_port = mlxsw_sp_bridge_port_find(bridge, dev); - if (WARN_ON(!bridge_port)) - return; - - memcpy(brport_flags, &bridge_port->flags, sizeof(*brport_flags)); -} - static int mlxsw_sp_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) { - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: - mlxsw_sp_port_bridge_flags_get(mlxsw_sp->bridge, attr->orig_dev, - &attr->u.brport_flags); - break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT: attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD; diff --git a/drivers/net/ethernet/micrel/ks8695net.c b/drivers/net/ethernet/micrel/ks8695net.c index b881f5d4a7f9..6006d47707cb 100644 --- a/drivers/net/ethernet/micrel/ks8695net.c +++ b/drivers/net/ethernet/micrel/ks8695net.c @@ -391,7 +391,7 @@ ks8695_tx_irq(int irq, void *dev_id) ksp->tx_buffers[buff_n].dma_ptr, ksp->tx_buffers[buff_n].length, DMA_TO_DEVICE); - dev_kfree_skb_irq(ksp->tx_buffers[buff_n].skb); + dev_consume_skb_irq(ksp->tx_buffers[buff_n].skb); ksp->tx_buffers[buff_n].skb = NULL; ksp->tx_ring_used--; } diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c index b34055ac476f..e1651756bf9d 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.c +++ b/drivers/net/ethernet/moxa/moxart_ether.c @@ -81,11 +81,13 @@ static void moxart_mac_free_memory(struct net_device *ndev) priv->rx_buf_size, DMA_FROM_DEVICE); if (priv->tx_desc_base) - dma_free_coherent(NULL, TX_REG_DESC_SIZE * TX_DESC_NUM, + dma_free_coherent(&priv->pdev->dev, + TX_REG_DESC_SIZE * TX_DESC_NUM, priv->tx_desc_base, priv->tx_base); if (priv->rx_desc_base) - dma_free_coherent(NULL, RX_REG_DESC_SIZE * RX_DESC_NUM, + dma_free_coherent(&priv->pdev->dev, + RX_REG_DESC_SIZE * RX_DESC_NUM, priv->rx_desc_base, priv->rx_base); kfree(priv->tx_buf_base); @@ -298,7 +300,7 @@ static void moxart_tx_finished(struct net_device *ndev) ndev->stats.tx_packets++; ndev->stats.tx_bytes += priv->tx_skb[tx_tail]->len; - dev_kfree_skb_irq(priv->tx_skb[tx_tail]); + dev_consume_skb_irq(priv->tx_skb[tx_tail]); priv->tx_skb[tx_tail] = NULL; tx_tail = TX_NEXT(tx_tail); @@ -476,6 +478,7 @@ static int moxart_mac_probe(struct platform_device *pdev) priv = netdev_priv(ndev); priv->ndev = ndev; + priv->pdev = pdev; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ndev->base_addr = res->start; @@ -491,7 +494,7 @@ static int moxart_mac_probe(struct platform_device *pdev) priv->tx_buf_size = TX_BUF_SIZE; priv->rx_buf_size = RX_BUF_SIZE; - priv->tx_desc_base = dma_alloc_coherent(NULL, TX_REG_DESC_SIZE * + priv->tx_desc_base = dma_alloc_coherent(&pdev->dev, TX_REG_DESC_SIZE * TX_DESC_NUM, &priv->tx_base, GFP_DMA | GFP_KERNEL); if (!priv->tx_desc_base) { @@ -499,7 +502,7 @@ static int moxart_mac_probe(struct platform_device *pdev) goto init_fail; } - priv->rx_desc_base = dma_alloc_coherent(NULL, RX_REG_DESC_SIZE * + priv->rx_desc_base = dma_alloc_coherent(&pdev->dev, RX_REG_DESC_SIZE * RX_DESC_NUM, &priv->rx_base, GFP_DMA | GFP_KERNEL); if (!priv->rx_desc_base) { diff --git a/drivers/net/ethernet/moxa/moxart_ether.h b/drivers/net/ethernet/moxa/moxart_ether.h index bee608b547d1..bf4c3029cd0c 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.h +++ b/drivers/net/ethernet/moxa/moxart_ether.h @@ -292,6 +292,7 @@ #define LINK_STATUS 0x4 struct moxart_mac_priv_t { + struct platform_device *pdev; void __iomem *base; unsigned int reg_maccr; unsigned int reg_imr; diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 19ce0e605096..e0340f778d8f 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -1408,7 +1408,7 @@ myri10ge_tx_done(struct myri10ge_slice_state *ss, int mcp_index) if (skb) { ss->stats.tx_bytes += skb->len; ss->stats.tx_packets++; - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); if (len) pci_unmap_single(pdev, dma_unmap_addr(&tx->info[idx], diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index b9a1a9f999ea..1a2634cbbb69 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -2173,7 +2173,7 @@ static void netdev_tx_done(struct net_device *dev) np->tx_skbuff[entry]->len, PCI_DMA_TODEVICE); /* Free the original skb. */ - dev_kfree_skb_irq(np->tx_skbuff[entry]); + dev_consume_skb_irq(np->tx_skbuff[entry]); np->tx_skbuff[entry] = NULL; } if (netif_queue_stopped(dev) && diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index 958fced4dacf..000009e18ff8 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -1003,7 +1003,7 @@ static void do_tx_done(struct net_device *ndev) addr, len, PCI_DMA_TODEVICE); - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); atomic_dec(&dev->nr_tx_skbs); } else pci_unmap_page(dev->pci_dev, diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index c805dcbebd02..aaec00912ea0 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -328,7 +328,7 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) } /* We must free the original skb */ - dev_kfree_skb_irq(lp->tx_skb[entry]); + dev_consume_skb_irq(lp->tx_skb[entry]); lp->tx_skb[entry] = NULL; /* and unmap DMA buffer */ dma_unmap_single(lp->device, lp->tx_laddr[entry], lp->tx_len[entry], DMA_TO_DEVICE); diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index 82be90075695..feda9644289d 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -3055,7 +3055,7 @@ static void tx_intr_handler(struct fifo_info *fifo_data) /* Updating the statistics block */ swstats->mem_freed += skb->truesize; - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); get_info.offset++; if (get_info.offset == get_info.fifo_len + 1) diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 0da7393b2ef3..b877acec5cde 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -114,7 +114,7 @@ static inline void VXGE_COMPLETE_VPATH_TX(struct vxge_fifo *fifo) /* free SKBs */ for (temp = completed; temp != skb_ptr; temp++) - dev_kfree_skb_irq(*temp); + dev_consume_skb_irq(*temp); } while (more); } diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 583e97c99e68..eeda4ed98333 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -345,7 +345,7 @@ static void nfp_fl_set_helper32(u32 value, u32 mask, u8 *p_exact, u8 *p_mask) } static int -nfp_fl_set_eth(const struct flow_action_entry *act, int idx, u32 off, +nfp_fl_set_eth(const struct flow_action_entry *act, u32 off, struct nfp_fl_set_eth *set_eth) { u32 exact, mask; @@ -376,7 +376,7 @@ struct ipv4_ttl_word { }; static int -nfp_fl_set_ip4(const struct flow_action_entry *act, int idx, u32 off, +nfp_fl_set_ip4(const struct flow_action_entry *act, u32 off, struct nfp_fl_set_ip4_addrs *set_ip_addr, struct nfp_fl_set_ip4_ttl_tos *set_ip_ttl_tos) { @@ -505,7 +505,7 @@ nfp_fl_set_ip6_hop_limit_flow_label(u32 off, __be32 exact, __be32 mask, } static int -nfp_fl_set_ip6(const struct flow_action_entry *act, int idx, u32 off, +nfp_fl_set_ip6(const struct flow_action_entry *act, u32 off, struct nfp_fl_set_ipv6_addr *ip_dst, struct nfp_fl_set_ipv6_addr *ip_src, struct nfp_fl_set_ipv6_tc_hl_fl *ip_hl_fl) @@ -541,7 +541,7 @@ nfp_fl_set_ip6(const struct flow_action_entry *act, int idx, u32 off, } static int -nfp_fl_set_tport(const struct flow_action_entry *act, int idx, u32 off, +nfp_fl_set_tport(const struct flow_action_entry *act, u32 off, struct nfp_fl_set_tport *set_tport, int opcode) { u32 exact, mask; @@ -598,8 +598,8 @@ nfp_fl_pedit(const struct flow_action_entry *act, struct nfp_fl_set_eth set_eth; size_t act_size = 0; u8 ip_proto = 0; - int idx, err; u32 offset; + int err; memset(&set_ip6_tc_hl_fl, 0, sizeof(set_ip6_tc_hl_fl)); memset(&set_ip_ttl_tos, 0, sizeof(set_ip_ttl_tos)); @@ -614,22 +614,22 @@ nfp_fl_pedit(const struct flow_action_entry *act, switch (htype) { case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH: - err = nfp_fl_set_eth(act, idx, offset, &set_eth); + err = nfp_fl_set_eth(act, offset, &set_eth); break; case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4: - err = nfp_fl_set_ip4(act, idx, offset, &set_ip_addr, + err = nfp_fl_set_ip4(act, offset, &set_ip_addr, &set_ip_ttl_tos); break; case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6: - err = nfp_fl_set_ip6(act, idx, offset, &set_ip6_dst, + err = nfp_fl_set_ip6(act, offset, &set_ip6_dst, &set_ip6_src, &set_ip6_tc_hl_fl); break; case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP: - err = nfp_fl_set_tport(act, idx, offset, &set_tport, + err = nfp_fl_set_tport(act, offset, &set_tport, NFP_FL_ACTION_OPCODE_SET_TCP); break; case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP: - err = nfp_fl_set_tport(act, idx, offset, &set_tport, + err = nfp_fl_set_tport(act, offset, &set_tport, NFP_FL_ACTION_OPCODE_SET_UDP); break; default: diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c index 56b22ea32474..cf9e1118ee8f 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -45,11 +45,9 @@ nfp_flower_cmsg_mac_repr_start(struct nfp_app *app, unsigned int num_ports) { struct nfp_flower_cmsg_mac_repr *msg; struct sk_buff *skb; - unsigned int size; - size = sizeof(*msg) + num_ports * sizeof(msg->ports[0]); - skb = nfp_flower_cmsg_alloc(app, size, NFP_FLOWER_CMSG_TYPE_MAC_REPR, - GFP_KERNEL); + skb = nfp_flower_cmsg_alloc(app, struct_size(msg, ports, num_ports), + NFP_FLOWER_CMSG_TYPE_MAC_REPR, GFP_KERNEL); if (!skb) return NULL; diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index fe1469d201af..450d7296fd57 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -187,7 +187,6 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, if (ipv4_addrs.mask->dst != cpu_to_be32(~0)) return -EOPNOTSUPP; - flow_rule_match_enc_ports(rule, &enc_ports); if (enc_ports.mask->dst != cpu_to_be16(~0)) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index dddbb0575be9..080a301f379e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -178,7 +178,7 @@ static const struct nfp_devlink_versions_simple { } nfp_devlink_versions_hwinfo[] = { { DEVLINK_INFO_VERSION_GENERIC_BOARD_ID, "assembly.partno", }, { DEVLINK_INFO_VERSION_GENERIC_BOARD_REV, "assembly.revision", }, - { "board.vendor", /* fab */ "assembly.vendor", }, + { DEVLINK_INFO_VERSION_GENERIC_BOARD_MANUFACTURE, "assembly.vendor", }, { "board.model", /* code name */ "assembly.model", }, }; @@ -258,18 +258,33 @@ nfp_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, struct netlink_ext_ack *extack) { struct nfp_pf *pf = devlink_priv(devlink); + const char *sn, *vendor, *part; struct nfp_nsp *nsp; char *buf = NULL; - const char *sn; int err; err = devlink_info_driver_name_put(req, "nfp"); if (err) return err; + vendor = nfp_hwinfo_lookup(pf->hwinfo, "assembly.vendor"); + part = nfp_hwinfo_lookup(pf->hwinfo, "assembly.partno"); sn = nfp_hwinfo_lookup(pf->hwinfo, "assembly.serial"); - if (sn) { - err = devlink_info_serial_number_put(req, sn); + if (vendor && part && sn) { + char *buf; + + buf = kmalloc(strlen(vendor) + strlen(part) + strlen(sn) + 1, + GFP_KERNEL); + if (!buf) + return -ENOMEM; + + buf[0] = '\0'; + strcat(buf, vendor); + strcat(buf, part); + strcat(buf, sn); + + err = devlink_info_serial_number_put(req, buf); + kfree(buf); if (err) return err; } diff --git a/drivers/net/ethernet/nuvoton/w90p910_ether.c b/drivers/net/ethernet/nuvoton/w90p910_ether.c index c662c6f5bee3..67bf02b0763a 100644 --- a/drivers/net/ethernet/nuvoton/w90p910_ether.c +++ b/drivers/net/ethernet/nuvoton/w90p910_ether.c @@ -630,7 +630,7 @@ static int w90p910_ether_start_xmit(struct sk_buff *skb, struct net_device *dev) if (!(w90p910_send_frame(dev, skb->data, skb->len))) { ether->skb = skb; - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); return 0; } return -EAGAIN; diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c index c9529c29a0a7..eee883a2aa8d 100644 --- a/drivers/net/ethernet/packetengines/hamachi.c +++ b/drivers/net/ethernet/packetengines/hamachi.c @@ -1337,7 +1337,7 @@ static irqreturn_t hamachi_interrupt(int irq, void *dev_instance) leXX_to_cpu(hmp->tx_ring[entry].addr), skb->len, PCI_DMA_TODEVICE); - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); hmp->tx_skbuff[entry] = NULL; } hmp->tx_ring[entry].status_n_length = 0; diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c index 54224d1822e3..6f8d6584f809 100644 --- a/drivers/net/ethernet/packetengines/yellowfin.c +++ b/drivers/net/ethernet/packetengines/yellowfin.c @@ -925,7 +925,7 @@ static irqreturn_t yellowfin_interrupt(int irq, void *dev_instance) /* Free the original skb. */ pci_unmap_single(yp->pci_dev, le32_to_cpu(yp->tx_ring[entry].addr), skb->len, PCI_DMA_TODEVICE); - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); yp->tx_skbuff[entry] = NULL; } if (yp->tx_full && @@ -983,7 +983,7 @@ static irqreturn_t yellowfin_interrupt(int irq, void *dev_instance) pci_unmap_single(yp->pci_dev, yp->tx_ring[entry<<1].addr, skb->len, PCI_DMA_TODEVICE); - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); yp->tx_skbuff[entry] = 0; /* Mark status as empty. */ yp->tx_status[entry].tx_errs = 0; diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 3b0955d34716..2d21c94f19c2 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -53,7 +53,7 @@ extern const struct qed_common_ops qed_common_ops_pass; #define QED_MAJOR_VERSION 8 -#define QED_MINOR_VERSION 33 +#define QED_MINOR_VERSION 37 #define QED_REVISION_VERSION 0 #define QED_ENGINEERING_VERSION 20 diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index 35c9f484eb9f..e61d1d905415 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -2135,12 +2135,12 @@ int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn, u32 rdma_tasks) struct qed_eth_pf_params *p_params = &p_hwfn->pf_params.eth_pf_params; - if (!p_params->num_vf_cons) - p_params->num_vf_cons = - ETH_PF_PARAMS_VF_CONS_DEFAULT; - qed_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_ETH, - p_params->num_cons, - p_params->num_vf_cons); + if (!p_params->num_vf_cons) + p_params->num_vf_cons = + ETH_PF_PARAMS_VF_CONS_DEFAULT; + qed_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_ETH, + p_params->num_cons, + p_params->num_vf_cons); p_hwfn->p_cxt_mngr->arfs_count = p_params->num_arfs_filters; break; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 417121e74ee9..37edaa847512 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -12796,6 +12796,7 @@ struct public_drv_mb { #define FW_MB_PARAM_GET_PF_RDMA_BOTH 0x3 /* get MFW feature support response */ +#define FW_MB_PARAM_FEATURE_SUPPORT_SMARTLINQ 0x00000001 #define FW_MB_PARAM_FEATURE_SUPPORT_EEE 0x00000002 #define FW_MB_PARAM_FEATURE_SUPPORT_VLINK 0x00010000 diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index e68ca83ae915..58be1c4c6668 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -2216,7 +2216,7 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev, u16 num_queues = 0; /* Since the feature controls only queue-zones, - * make sure we have the contexts [rx, tx, xdp] to + * make sure we have the contexts [rx, xdp, tcs] to * match. */ for_each_hwfn(cdev, i) { @@ -2226,7 +2226,8 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev, u16 cids; cids = hwfn->pf_params.eth_pf_params.num_cons; - num_queues += min_t(u16, l2_queues, cids / 3); + cids /= (2 + info->num_tc); + num_queues += min_t(u16, l2_queues, cids); } /* queues might theoretically be >256, but interrupts' @@ -2870,7 +2871,8 @@ static int qed_get_coalesce(struct qed_dev *cdev, u16 *coal, void *handle) p_hwfn = p_cid->p_owner; rc = qed_get_queue_coalesce(p_hwfn, coal, handle); if (rc) - DP_NOTICE(p_hwfn, "Unable to read queue coalescing\n"); + DP_VERBOSE(cdev, QED_MSG_DEBUG, + "Unable to read queue coalescing\n"); return rc; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index b47352643fb5..f164d4acebcb 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -281,6 +281,8 @@ int qed_fill_dev_info(struct qed_dev *cdev, if (hw_info->b_wol_support == QED_WOL_SUPPORT_PME) dev_info->wol_support = true; + dev_info->smart_an = qed_mcp_is_smart_an_supported(p_hwfn); + dev_info->abs_pf_id = QED_LEADING_HWFN(cdev)->abs_pf_id; } else { qed_vf_get_fw_version(&cdev->hwfns[0], &dev_info->fw_major, diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index bb8541847aa5..cc27fd60d689 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -3654,6 +3654,12 @@ void qed_mcp_resc_lock_default_init(struct qed_resc_lock_params *p_lock, } } +bool qed_mcp_is_smart_an_supported(struct qed_hwfn *p_hwfn) +{ + return !!(p_hwfn->mcp_info->capabilities & + FW_MB_PARAM_FEATURE_SUPPORT_SMARTLINQ); +} + int qed_mcp_get_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { u32 mcp_resp; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index 6e1d72a669ae..2799e6741765 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -1148,6 +1148,16 @@ void qed_mcp_resc_lock_default_init(struct qed_resc_lock_params *p_lock, struct qed_resc_unlock_params *p_unlock, enum qed_resc_lock resource, bool b_is_permanent); + +/** + * @brief - Return whether management firmware support smart AN + * + * @param p_hwfn + * + * @return bool - true if feature is supported. + */ +bool qed_mcp_is_smart_an_supported(struct qed_hwfn *p_hwfn); + /** * @brief Learn of supported MFW features; To be done during early init * diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h index 4179c9013fc6..96ab77ae6af5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h @@ -382,6 +382,7 @@ void qed_consq_setup(struct qed_hwfn *p_hwfn); * @param p_hwfn */ void qed_consq_free(struct qed_hwfn *p_hwfn); +int qed_spq_pend_post(struct qed_hwfn *p_hwfn); /** * @file diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c index 888274fa208b..5a495fda9e9d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c @@ -604,6 +604,9 @@ int qed_sp_pf_update_stag(struct qed_hwfn *p_hwfn) p_ent->ramrod.pf_update.update_mf_vlan_flag = true; p_ent->ramrod.pf_update.mf_vlan = cpu_to_le16(p_hwfn->hw_info.ovlan); + if (test_bit(QED_MF_UFP_SPECIFIC, &p_hwfn->cdev->mf_bits)) + p_ent->ramrod.pf_update.mf_vlan |= + cpu_to_le16(((u16)p_hwfn->ufp_info.tc << 13)); return qed_spq_post(p_hwfn, p_ent, NULL); } diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index 3e0f7c46bb1b..79b311b86f66 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -397,6 +397,11 @@ int qed_eq_completion(struct qed_hwfn *p_hwfn, void *cookie) qed_eq_prod_update(p_hwfn, qed_chain_get_prod_idx(p_chain)); + /* Attempt to post pending requests */ + spin_lock_bh(&p_hwfn->p_spq->lock); + rc = qed_spq_pend_post(p_hwfn); + spin_unlock_bh(&p_hwfn->p_spq->lock); + return rc; } @@ -767,7 +772,7 @@ static int qed_spq_post_list(struct qed_hwfn *p_hwfn, return 0; } -static int qed_spq_pend_post(struct qed_hwfn *p_hwfn) +int qed_spq_pend_post(struct qed_hwfn *p_hwfn) { struct qed_spq *p_spq = p_hwfn->p_spq; struct qed_spq_entry *p_ent = NULL; @@ -927,7 +932,6 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn, struct qed_spq_entry *p_ent = NULL; struct qed_spq_entry *tmp; struct qed_spq_entry *found = NULL; - int rc; if (!p_hwfn) return -EINVAL; @@ -985,12 +989,7 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn, */ qed_spq_return_entry(p_hwfn, found); - /* Attempt to post pending requests */ - spin_lock_bh(&p_spq->lock); - rc = qed_spq_pend_post(p_hwfn); - spin_unlock_bh(&p_spq->lock); - - return rc; + return 0; } int qed_consq_alloc(struct qed_hwfn *p_hwfn) diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 843416404aeb..63a78162cfaf 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -56,7 +56,7 @@ #include <net/tc_act/tc_gact.h> #define QEDE_MAJOR_VERSION 8 -#define QEDE_MINOR_VERSION 33 +#define QEDE_MINOR_VERSION 37 #define QEDE_REVISION_VERSION 0 #define QEDE_ENGINEERING_VERSION 20 #define DRV_MODULE_VERSION __stringify(QEDE_MAJOR_VERSION) "." \ @@ -497,6 +497,9 @@ struct qede_reload_args { /* Datapath functions definition */ netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev); +u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev, + select_queue_fallback_t fallback); netdev_features_t qede_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features); diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 16331c6c6fa7..c6238083e898 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -186,11 +186,13 @@ static const struct { enum { QEDE_PRI_FLAG_CMT, + QEDE_PRI_FLAG_SMART_AN_SUPPORT, /* MFW supports SmartAN */ QEDE_PRI_FLAG_LEN, }; static const char qede_private_arr[QEDE_PRI_FLAG_LEN][ETH_GSTRING_LEN] = { "Coupled-Function", + "SmartAN capable", }; enum qede_ethtool_tests { @@ -404,8 +406,15 @@ static int qede_get_sset_count(struct net_device *dev, int stringset) static u32 qede_get_priv_flags(struct net_device *dev) { struct qede_dev *edev = netdev_priv(dev); + u32 flags = 0; - return (!!(edev->dev_info.common.num_hwfns > 1)) << QEDE_PRI_FLAG_CMT; + if (edev->dev_info.common.num_hwfns > 1) + flags |= BIT(QEDE_PRI_FLAG_CMT); + + if (edev->dev_info.common.smart_an) + flags |= BIT(QEDE_PRI_FLAG_SMART_AN_SUPPORT); + + return flags; } struct qede_link_mode_mapping { diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index bdf816fe5a16..31b046e24565 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -1695,6 +1695,19 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev) return NETDEV_TX_OK; } +u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev, + select_queue_fallback_t fallback) +{ + struct qede_dev *edev = netdev_priv(dev); + int total_txq; + + total_txq = QEDE_TSS_COUNT(edev) * edev->dev_info.num_tc; + + return QEDE_TSS_COUNT(edev) ? + fallback(dev, skb, NULL) % total_txq : 0; +} + /* 8B udp header + 8B base tunnel header + 32B option length */ #define QEDE_MAX_TUN_HDR_LEN 48 diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 6b4d96635238..02a97c659e29 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -621,6 +621,7 @@ static const struct net_device_ops qede_netdev_ops = { .ndo_open = qede_open, .ndo_stop = qede_close, .ndo_start_xmit = qede_start_xmit, + .ndo_select_queue = qede_select_queue, .ndo_set_rx_mode = qede_set_rx_mode, .ndo_set_mac_address = qede_set_mac_addr, .ndo_validate_addr = eth_validate_addr, @@ -656,6 +657,7 @@ static const struct net_device_ops qede_netdev_vf_ops = { .ndo_open = qede_open, .ndo_stop = qede_close, .ndo_start_xmit = qede_start_xmit, + .ndo_select_queue = qede_select_queue, .ndo_set_rx_mode = qede_set_rx_mode, .ndo_set_mac_address = qede_set_mac_addr, .ndo_validate_addr = eth_validate_addr, @@ -674,6 +676,7 @@ static const struct net_device_ops qede_netdev_vf_xdp_ops = { .ndo_open = qede_open, .ndo_stop = qede_close, .ndo_start_xmit = qede_start_xmit, + .ndo_select_queue = qede_select_queue, .ndo_set_rx_mode = qede_set_rx_mode, .ndo_set_mac_address = qede_set_mac_addr, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c b/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c index 5edbd532127d..a6886cc5654c 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c @@ -249,8 +249,8 @@ static void ql_update_stats(struct ql_adapter *qdev) spin_lock(&qdev->stats_lock); if (ql_sem_spinlock(qdev, qdev->xg_sem_mask)) { - netif_err(qdev, drv, qdev->ndev, - "Couldn't get xgmac sem.\n"); + netif_err(qdev, drv, qdev->ndev, + "Couldn't get xgmac sem.\n"); goto quit; } /* diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index 059ba9429e51..096515c27263 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -1159,10 +1159,10 @@ static void ql_update_lbq(struct ql_adapter *qdev, struct rx_ring *rx_ring) map = lbq_desc->p.pg_chunk.map + lbq_desc->p.pg_chunk.offset; - dma_unmap_addr_set(lbq_desc, mapaddr, map); + dma_unmap_addr_set(lbq_desc, mapaddr, map); dma_unmap_len_set(lbq_desc, maplen, rx_ring->lbq_buf_size); - *lbq_desc->addr = cpu_to_le64(map); + *lbq_desc->addr = cpu_to_le64(map); pci_dma_sync_single_for_device(qdev->pdev, map, rx_ring->lbq_buf_size, diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index 8d790313ee3d..20d2400ad300 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -1204,7 +1204,7 @@ void emac_mac_tx_process(struct emac_adapter *adpt, struct emac_tx_queue *tx_q) if (tpbuf->skb) { pkts_compl++; bytes_compl += tpbuf->skb->len; - dev_kfree_skb_irq(tpbuf->skb); + dev_consume_skb_irq(tpbuf->skb); tpbuf->skb = NULL; } diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 1dd72137fd53..10bab1f7150a 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -1288,11 +1288,13 @@ static u8 rtl8168d_efuse_read(struct rtl8169_private *tp, int reg_addr) static void rtl_ack_events(struct rtl8169_private *tp, u16 bits) { RTL_W16(tp, IntrStatus, bits); + mmiowb(); } static void rtl_irq_disable(struct rtl8169_private *tp) { RTL_W16(tp, IntrMask, 0); + mmiowb(); } #define RTL_EVENT_NAPI_RX (RxOK | RxErr) @@ -6192,7 +6194,6 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, struct device *d = tp_to_dev(tp); dma_addr_t mapping; u32 opts[2], len; - bool stop_queue; int frags; if (unlikely(!rtl_tx_slots_avail(tp, skb_shinfo(skb)->nr_frags))) { @@ -6234,6 +6235,8 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, txd->opts2 = cpu_to_le32(opts[1]); + netdev_sent_queue(dev, skb->len); + skb_tx_timestamp(skb); /* Force memory writes to complete before releasing descriptor */ @@ -6246,14 +6249,16 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, tp->cur_tx += frags + 1; - stop_queue = !rtl_tx_slots_avail(tp, MAX_SKB_FRAGS); - if (unlikely(stop_queue)) - netif_stop_queue(dev); + RTL_W8(tp, TxPoll, NPQ); - if (__netdev_sent_queue(dev, skb->len, skb->xmit_more)) - RTL_W8(tp, TxPoll, NPQ); + mmiowb(); - if (unlikely(stop_queue)) { + if (!rtl_tx_slots_avail(tp, MAX_SKB_FRAGS)) { + /* Avoid wrongly optimistic queue wake-up: rtl_tx thread must + * not miss a ring update when it notices a stopped queue. + */ + smp_wmb(); + netif_stop_queue(dev); /* Sync with rtl_tx: * - publish queue status and cur_tx ring index (write barrier) * - refresh dirty_tx ring index (read barrier). @@ -6592,7 +6597,9 @@ static int rtl8169_poll(struct napi_struct *napi, int budget) if (work_done < budget) { napi_complete_done(napi, work_done); + rtl_irq_enable(tp); + mmiowb(); } return work_done; diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h index 748fb12260a6..2b2e1c4f0dc3 100644 --- a/drivers/net/ethernet/rocker/rocker.h +++ b/drivers/net/ethernet/rocker/rocker.h @@ -109,8 +109,6 @@ struct rocker_world_ops { int (*port_attr_bridge_flags_set)(struct rocker_port *rocker_port, unsigned long brport_flags, struct switchdev_trans *trans); - int (*port_attr_bridge_flags_get)(const struct rocker_port *rocker_port, - unsigned long *p_brport_flags); int (*port_attr_bridge_flags_support_get)(const struct rocker_port * rocker_port, unsigned long * diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 66f72f8c46e5..5ce8d5aba603 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -1583,17 +1583,6 @@ rocker_world_port_attr_bridge_flags_set(struct rocker_port *rocker_port, } static int -rocker_world_port_attr_bridge_flags_get(const struct rocker_port *rocker_port, - unsigned long *p_brport_flags) -{ - struct rocker_world_ops *wops = rocker_port->rocker->wops; - - if (!wops->port_attr_bridge_flags_get) - return -EOPNOTSUPP; - return wops->port_attr_bridge_flags_get(rocker_port, p_brport_flags); -} - -static int rocker_world_port_attr_bridge_flags_support_get(const struct rocker_port * rocker_port, unsigned long * @@ -2061,10 +2050,6 @@ static int rocker_port_attr_get(struct net_device *dev, int err = 0; switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: - err = rocker_world_port_attr_bridge_flags_get(rocker_port, - &attr->u.brport_flags); - break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT: err = rocker_world_port_attr_bridge_flags_support_get(rocker_port, &attr->u.brport_flags_support); diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index bea7895930f6..fa296a7c255d 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -2512,16 +2512,6 @@ static int ofdpa_port_attr_bridge_flags_set(struct rocker_port *rocker_port, } static int -ofdpa_port_attr_bridge_flags_get(const struct rocker_port *rocker_port, - unsigned long *p_brport_flags) -{ - const struct ofdpa_port *ofdpa_port = rocker_port->wpriv; - - *p_brport_flags = ofdpa_port->brport_flags; - return 0; -} - -static int ofdpa_port_attr_bridge_flags_support_get(const struct rocker_port * rocker_port, unsigned long * @@ -2823,7 +2813,6 @@ struct rocker_world_ops rocker_ofdpa_ops = { .port_stop = ofdpa_port_stop, .port_attr_stp_state_set = ofdpa_port_attr_stp_state_set, .port_attr_bridge_flags_set = ofdpa_port_attr_bridge_flags_set, - .port_attr_bridge_flags_get = ofdpa_port_attr_bridge_flags_get, .port_attr_bridge_flags_support_get = ofdpa_port_attr_bridge_flags_support_get, .port_attr_bridge_ageing_time_set = ofdpa_port_attr_bridge_ageing_time_set, .port_obj_vlan_add = ofdpa_port_obj_vlan_add, diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 6062e99fa69b..e888b479c596 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -6046,6 +6046,8 @@ static const struct efx_ef10_nvram_type_info efx_ef10_nvram_types[] = { { NVRAM_PARTITION_TYPE_DYNCONFIG_DEFAULTS, 0, 0, "sfc_dynamic_cfg_dflt" }, { NVRAM_PARTITION_TYPE_ROMCONFIG_DEFAULTS, 0, 0, "sfc_exp_rom_cfg_dflt" }, { NVRAM_PARTITION_TYPE_STATUS, 0, 0, "sfc_status" }, + { NVRAM_PARTITION_TYPE_BUNDLE, 0, 0, "sfc_bundle" }, + { NVRAM_PARTITION_TYPE_BUNDLE_METADATA, 0, 0, "sfc_bundle_metadata" }, }; #define EF10_NVRAM_PARTITION_COUNT ARRAY_SIZE(efx_ef10_nvram_types) @@ -6123,7 +6125,7 @@ static int efx_ef10_mtd_probe_partition(struct efx_nic *efx, static int efx_ef10_mtd_probe(struct efx_nic *efx) { MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_PARTITIONS_OUT_LENMAX); - DECLARE_BITMAP(found, EF10_NVRAM_PARTITION_COUNT); + DECLARE_BITMAP(found, EF10_NVRAM_PARTITION_COUNT) = { 0 }; struct efx_mcdi_mtd_partition *parts; size_t outlen, n_parts_total, i, n_parts; unsigned int type; diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 3643015a55cf..bc655ffc9e02 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -915,7 +915,7 @@ rollback: void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue) { - mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(100)); + mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(10)); } static bool efx_default_channel_want_txqs(struct efx_channel *channel) diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h index 3839eec783ea..20a5523bf9f3 100644 --- a/drivers/net/ethernet/sfc/mcdi_pcol.h +++ b/drivers/net/ethernet/sfc/mcdi_pcol.h @@ -6784,6 +6784,14 @@ * subset of the information stored in this partition. */ #define NVRAM_PARTITION_TYPE_FRU_INFORMATION 0x1d00 +/* enum: Bundle image partition */ +#define NVRAM_PARTITION_TYPE_BUNDLE 0x1e00 +/* enum: Bundle metadata partition that holds additional information related to + * a bundle update in TLV format + */ +#define NVRAM_PARTITION_TYPE_BUNDLE_METADATA 0x1e01 +/* enum: Bundle update non-volatile log output partition */ +#define NVRAM_PARTITION_TYPE_BUNDLE_LOG 0x1e02 /* enum: Start of reserved value range (firmware may use for any purpose) */ #define NVRAM_PARTITION_TYPE_RESERVED_VALUES_MIN 0xff00 /* enum: End of reserved value range (firmware may use for any purpose) */ diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 396ff01298cd..8702ab44d80b 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -360,8 +360,7 @@ void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic) rc = efx_init_rx_buffers(rx_queue, atomic); if (unlikely(rc)) { /* Ensure that we don't leave the rx queue empty */ - if (rx_queue->added_count == rx_queue->removed_count) - efx_schedule_slow_fill(rx_queue); + efx_schedule_slow_fill(rx_queue); goto out; } } while ((space -= batch_size) >= batch_size); diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 22eb059086f7..06c8f282263f 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -471,7 +471,7 @@ static int efx_tx_tso_fallback(struct efx_tx_queue *tx_queue, if (IS_ERR(segments)) return PTR_ERR(segments); - dev_kfree_skb_any(skb); + dev_consume_skb_any(skb); skb = segments; while (skb) { diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c index 3140999642ba..358e66b81926 100644 --- a/drivers/net/ethernet/sgi/ioc3-eth.c +++ b/drivers/net/ethernet/sgi/ioc3-eth.c @@ -666,7 +666,7 @@ static inline void ioc3_tx(struct net_device *dev) packets++; skb = ip->tx_skbs[o_entry]; bytes += skb->len; - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); ip->tx_skbs[o_entry] = NULL; o_entry = (o_entry + 1) & 127; /* Next */ diff --git a/drivers/net/ethernet/sgi/meth.c b/drivers/net/ethernet/sgi/meth.c index 0e1b7e960b98..f425ab528224 100644 --- a/drivers/net/ethernet/sgi/meth.c +++ b/drivers/net/ethernet/sgi/meth.c @@ -68,6 +68,8 @@ module_param(timeout, int, 0); * packets in and out, so there is place for a packet */ struct meth_private { + struct platform_device *pdev; + /* in-memory copy of MAC Control register */ u64 mac_ctrl; @@ -211,8 +213,8 @@ static void meth_check_link(struct net_device *dev) static int meth_init_tx_ring(struct meth_private *priv) { /* Init TX ring */ - priv->tx_ring = dma_alloc_coherent(NULL, TX_RING_BUFFER_SIZE, - &priv->tx_ring_dma, GFP_ATOMIC); + priv->tx_ring = dma_alloc_coherent(&priv->pdev->dev, + TX_RING_BUFFER_SIZE, &priv->tx_ring_dma, GFP_KERNEL); if (!priv->tx_ring) return -ENOMEM; @@ -236,7 +238,7 @@ static int meth_init_rx_ring(struct meth_private *priv) priv->rx_ring[i]=(rx_packet*)(priv->rx_skbs[i]->head); /* I'll need to re-sync it after each RX */ priv->rx_ring_dmas[i] = - dma_map_single(NULL, priv->rx_ring[i], + dma_map_single(&priv->pdev->dev, priv->rx_ring[i], METH_RX_BUFF_SIZE, DMA_FROM_DEVICE); mace->eth.rx_fifo = priv->rx_ring_dmas[i]; } @@ -253,7 +255,7 @@ static void meth_free_tx_ring(struct meth_private *priv) dev_kfree_skb(priv->tx_skbs[i]); priv->tx_skbs[i] = NULL; } - dma_free_coherent(NULL, TX_RING_BUFFER_SIZE, priv->tx_ring, + dma_free_coherent(&priv->pdev->dev, TX_RING_BUFFER_SIZE, priv->tx_ring, priv->tx_ring_dma); } @@ -263,7 +265,7 @@ static void meth_free_rx_ring(struct meth_private *priv) int i; for (i = 0; i < RX_RING_ENTRIES; i++) { - dma_unmap_single(NULL, priv->rx_ring_dmas[i], + dma_unmap_single(&priv->pdev->dev, priv->rx_ring_dmas[i], METH_RX_BUFF_SIZE, DMA_FROM_DEVICE); priv->rx_ring[i] = 0; priv->rx_ring_dmas[i] = 0; @@ -393,7 +395,8 @@ static void meth_rx(struct net_device* dev, unsigned long int_status) fifo_rptr = (fifo_rptr - 1) & 0x0f; } while (priv->rx_write != fifo_rptr) { - dma_unmap_single(NULL, priv->rx_ring_dmas[priv->rx_write], + dma_unmap_single(&priv->pdev->dev, + priv->rx_ring_dmas[priv->rx_write], METH_RX_BUFF_SIZE, DMA_FROM_DEVICE); status = priv->rx_ring[priv->rx_write]->status.raw; #if MFE_DEBUG @@ -454,7 +457,8 @@ static void meth_rx(struct net_device* dev, unsigned long int_status) priv->rx_ring[priv->rx_write] = (rx_packet*)skb->head; priv->rx_ring[priv->rx_write]->status.raw = 0; priv->rx_ring_dmas[priv->rx_write] = - dma_map_single(NULL, priv->rx_ring[priv->rx_write], + dma_map_single(&priv->pdev->dev, + priv->rx_ring[priv->rx_write], METH_RX_BUFF_SIZE, DMA_FROM_DEVICE); mace->eth.rx_fifo = priv->rx_ring_dmas[priv->rx_write]; ADVANCE_RX_PTR(priv->rx_write); @@ -521,7 +525,7 @@ static void meth_tx_cleanup(struct net_device* dev, unsigned long int_status) DPRINTK("RPTR points us here, but packet not done?\n"); break; } - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); priv->tx_skbs[priv->tx_read] = NULL; priv->tx_ring[priv->tx_read].header.raw = 0; priv->tx_read = (priv->tx_read+1)&(TX_RING_ENTRIES-1); @@ -637,7 +641,7 @@ static void meth_tx_1page_prepare(struct meth_private *priv, } /* first page */ - catbuf = dma_map_single(NULL, buffer_data, buffer_len, + catbuf = dma_map_single(&priv->pdev->dev, buffer_data, buffer_len, DMA_TO_DEVICE); desc->data.cat_buf[0].form.start_addr = catbuf >> 3; desc->data.cat_buf[0].form.len = buffer_len - 1; @@ -663,12 +667,12 @@ static void meth_tx_2page_prepare(struct meth_private *priv, } /* first page */ - catbuf1 = dma_map_single(NULL, buffer1_data, buffer1_len, + catbuf1 = dma_map_single(&priv->pdev->dev, buffer1_data, buffer1_len, DMA_TO_DEVICE); desc->data.cat_buf[0].form.start_addr = catbuf1 >> 3; desc->data.cat_buf[0].form.len = buffer1_len - 1; /* second page */ - catbuf2 = dma_map_single(NULL, buffer2_data, buffer2_len, + catbuf2 = dma_map_single(&priv->pdev->dev, buffer2_data, buffer2_len, DMA_TO_DEVICE); desc->data.cat_buf[1].form.start_addr = catbuf2 >> 3; desc->data.cat_buf[1].form.len = buffer2_len - 1; @@ -840,6 +844,7 @@ static int meth_probe(struct platform_device *pdev) memcpy(dev->dev_addr, o2meth_eaddr, ETH_ALEN); priv = netdev_priv(dev); + priv->pdev = pdev; spin_lock_init(&priv->meth_lock); SET_NETDEV_DEV(dev, &pdev->dev); diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c index 808cf9816673..5b351beb78cb 100644 --- a/drivers/net/ethernet/sis/sis190.c +++ b/drivers/net/ethernet/sis/sis190.c @@ -714,7 +714,7 @@ static void sis190_tx_interrupt(struct net_device *dev, sis190_unmap_tx_skb(tp->pci_dev, skb, txd); tp->Tx_skbuff[entry] = NULL; - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); } if (tp->dirty_tx != dirty_tx) { diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index 4bb89f74742c..6073387511f8 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -1927,7 +1927,7 @@ static void sis900_finish_xmit (struct net_device *net_dev) pci_unmap_single(sis_priv->pci_dev, sis_priv->tx_ring[entry].bufptr, skb->len, PCI_DMA_TODEVICE); - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); sis_priv->tx_skbuff[entry] = NULL; sis_priv->tx_ring[entry].bufptr = 0; sis_priv->tx_ring[entry].cmdsts = 0; diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c index 15c62c160953..be47d864f8b9 100644 --- a/drivers/net/ethernet/smsc/epic100.c +++ b/drivers/net/ethernet/smsc/epic100.c @@ -1037,7 +1037,7 @@ static void epic_tx(struct net_device *dev, struct epic_private *ep) skb = ep->tx_skbuff[entry]; pci_unmap_single(ep->pci_dev, ep->tx_ring[entry].bufaddr, skb->len, PCI_DMA_TODEVICE); - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); ep->tx_skbuff[entry] = NULL; } diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c index 8355dfbb8ec3..b550e624500d 100644 --- a/drivers/net/ethernet/smsc/smc911x.c +++ b/drivers/net/ethernet/smsc/smc911x.c @@ -1188,7 +1188,7 @@ smc911x_tx_dma_irq(void *data) DBG(SMC_DEBUG_TX | SMC_DEBUG_DMA, dev, "TX DMA irq handler\n"); BUG_ON(skb == NULL); - dma_unmap_single(NULL, tx_dmabuf, tx_dmalen, DMA_TO_DEVICE); + dma_unmap_single(lp->dev, tx_dmabuf, tx_dmalen, DMA_TO_DEVICE); netif_trans_update(dev); dev_kfree_skb_irq(skb); lp->current_tx_skb = NULL; @@ -1219,7 +1219,7 @@ smc911x_rx_dma_irq(void *data) DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__); DBG(SMC_DEBUG_RX | SMC_DEBUG_DMA, dev, "RX DMA irq handler\n"); - dma_unmap_single(NULL, rx_dmabuf, rx_dmalen, DMA_FROM_DEVICE); + dma_unmap_single(lp->dev, rx_dmabuf, rx_dmalen, DMA_FROM_DEVICE); BUG_ON(skb == NULL); lp->current_rx_skb = NULL; PRINT_PKT(skb->data, skb->len); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index 20299f6f65fc..90045fffd393 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -268,7 +268,7 @@ static int dwmac4_wrback_get_rx_timestamp_status(void *desc, void *next_desc, int ret = -EINVAL; /* Get the status from normal w/b descriptor */ - if (likely(p->des3 & TDES3_RS1V)) { + if (likely(le32_to_cpu(p->des3) & RDES3_RDES1_VALID)) { if (likely(le32_to_cpu(p->des1) & RDES1_TIMESTAMP_AVAILABLE)) { int i = 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index d1f61c25d82b..5d85742a2be0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -721,8 +721,11 @@ static u32 stmmac_usec2riwt(u32 usec, struct stmmac_priv *priv) { unsigned long clk = clk_get_rate(priv->plat->stmmac_clk); - if (!clk) - return 0; + if (!clk) { + clk = priv->plat->clk_ref_rate; + if (!clk) + return 0; + } return (usec * (clk / 1000000)) / 256; } @@ -731,8 +734,11 @@ static u32 stmmac_riwt2usec(u32 riwt, struct stmmac_priv *priv) { unsigned long clk = clk_get_rate(priv->plat->stmmac_clk); - if (!clk) - return 0; + if (!clk) { + clk = priv->plat->clk_ref_rate; + if (!clk) + return 0; + } return (riwt * 256) / (clk / 1000000); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index effff171d250..f12dd59c85cf 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3039,10 +3039,22 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) tx_q = &priv->tx_queue[queue]; + if (priv->tx_path_in_lpi_mode) + stmmac_disable_eee_mode(priv); + /* Manage oversized TCP frames for GMAC4 device */ if (skb_is_gso(skb) && priv->tso) { - if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) + if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) { + /* + * There is no way to determine the number of TSO + * capable Queues. Let's use always the Queue 0 + * because if TSO is supported then at least this + * one will be capable. + */ + skb_set_queue_mapping(skb, 0); + return stmmac_tso_xmit(skb, dev); + } } if (unlikely(stmmac_tx_avail(priv, queue) < nfrags + 1)) { @@ -3057,9 +3069,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_BUSY; } - if (priv->tx_path_in_lpi_mode) - stmmac_disable_eee_mode(priv); - entry = tx_q->cur_tx; first_entry = entry; WARN_ON(tx_q->tx_skbuff[first_entry]); diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index 7ec4eb74fe21..6fc05c106afc 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -1898,7 +1898,7 @@ static inline void cas_tx_ringN(struct cas *cp, int ring, int limit) cp->net_stats[ring].tx_packets++; cp->net_stats[ring].tx_bytes += skb->len; spin_unlock(&cp->stat_lock[ring]); - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); } cp->tx_old[ring] = entry; diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c index 720b7ac77f3b..e9b757b03b56 100644 --- a/drivers/net/ethernet/sun/sunbmac.c +++ b/drivers/net/ethernet/sun/sunbmac.c @@ -781,7 +781,7 @@ static void bigmac_tx(struct bigmac *bp) DTX(("skb(%p) ", skb)); bp->tx_skbs[elem] = NULL; - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); elem = NEXT_TX(elem); } diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c index ff641cf30a4e..d007dfeba5c3 100644 --- a/drivers/net/ethernet/sun/sunhme.c +++ b/drivers/net/ethernet/sun/sunhme.c @@ -1962,7 +1962,7 @@ static void happy_meal_tx(struct happy_meal *hp) this = &txbase[elem]; } - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); dev->stats.tx_packets++; } hp->tx_old = elem; diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c index dc966ddb6d81..b24c11187017 100644 --- a/drivers/net/ethernet/tehuti/tehuti.c +++ b/drivers/net/ethernet/tehuti/tehuti.c @@ -1739,7 +1739,7 @@ static void bdx_tx_cleanup(struct bdx_priv *priv) tx_level -= db->rptr->len; /* '-' koz len is negative */ /* now should come skb pointer - free it */ - dev_kfree_skb_irq(db->rptr->addr.skb); + dev_consume_skb_irq(db->rptr->addr.skb); bdx_tx_db_inc_rptr(db); } diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c index 82412691ee66..27f6cf140845 100644 --- a/drivers/net/ethernet/via/via-velocity.c +++ b/drivers/net/ethernet/via/via-velocity.c @@ -1740,7 +1740,7 @@ static void velocity_free_tx_buf(struct velocity_info *vptr, dma_unmap_single(vptr->dev, tdinfo->skb_dma[i], le16_to_cpu(pktlen), DMA_TO_DEVICE); } - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); tdinfo->skb = NULL; } diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 15bb058db392..44efffbe7970 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -630,7 +630,7 @@ static void temac_start_xmit_done(struct net_device *ndev) dma_unmap_single(ndev->dev.parent, cur_p->phys, cur_p->len, DMA_TO_DEVICE); if (cur_p->app4) - dev_kfree_skb_irq((struct sk_buff *)cur_p->app4); + dev_consume_skb_irq((struct sk_buff *)cur_p->app4); cur_p->app0 = 0; cur_p->app1 = 0; cur_p->app2 = 0; diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 0789d8af7d72..ec7e7ec24ff9 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -595,7 +595,7 @@ static void axienet_start_xmit_done(struct net_device *ndev) (cur_p->cntrl & XAXIDMA_BD_CTRL_LENGTH_MASK), DMA_TO_DEVICE); if (cur_p->app4) - dev_kfree_skb_irq((struct sk_buff *)cur_p->app4); + dev_consume_skb_irq((struct sk_buff *)cur_p->app4); /*cur_p->phys = 0;*/ cur_p->app0 = 0; cur_p->app1 = 0; diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 639e3e99af46..b03a417d0073 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -581,7 +581,7 @@ static void xemaclite_tx_handler(struct net_device *dev) return; dev->stats.tx_bytes += lp->deferred_skb->len; - dev_kfree_skb_irq(lp->deferred_skb); + dev_consume_skb_irq(lp->deferred_skb); lp->deferred_skb = NULL; netif_trans_update(dev); /* prevent tx timeout */ netif_wake_queue(dev); diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index aee55c03def0..ed6623a9801e 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -139,7 +139,7 @@ #ifdef __ARMEB__ typedef struct sk_buff buffer_t; #define free_buffer dev_kfree_skb -#define free_buffer_irq dev_kfree_skb_irq +#define free_buffer_irq dev_consume_skb_irq #else typedef void buffer_t; #define free_buffer kfree diff --git a/drivers/net/fddi/defxx.c b/drivers/net/fddi/defxx.c index 38ac8ef41f5f..56b7791911bf 100644 --- a/drivers/net/fddi/defxx.c +++ b/drivers/net/fddi/defxx.c @@ -3512,7 +3512,7 @@ static int dfx_xmt_done(DFX_board_t *bp) bp->descr_block_virt->xmt_data[comp].long_1, p_xmt_drv_descr->p_skb->len, DMA_TO_DEVICE); - dev_kfree_skb_irq(p_xmt_drv_descr->p_skb); + dev_consume_skb_irq(p_xmt_drv_descr->p_skb); /* * Move to start of next packet by updating completion index diff --git a/drivers/net/fddi/skfp/pcmplc.c b/drivers/net/fddi/skfp/pcmplc.c index 6ef44c480bd5..f29f5a6a45ab 100644 --- a/drivers/net/fddi/skfp/pcmplc.c +++ b/drivers/net/fddi/skfp/pcmplc.c @@ -851,6 +851,7 @@ static void pcm_fsm(struct s_smc *smc, struct s_phy *phy, int cmd) case ACTIONS(PC5_SIGNAL) : ACTIONS_DONE() ; + /* fall through */ case PC5_SIGNAL : if ((cmd != PC_SIGNAL) && (cmd != PC_TIMEOUT_LCT)) break ; diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 58bbba8582b0..3377ac66a347 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1512,9 +1512,13 @@ static void geneve_link_config(struct net_device *dev, } #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: { - struct rt6_info *rt = rt6_lookup(geneve->net, - &info->key.u.ipv6.dst, NULL, 0, - NULL, 0); + struct rt6_info *rt; + + if (!__in6_dev_get(dev)) + break; + + rt = rt6_lookup(geneve->net, &info->key.u.ipv6.dst, NULL, 0, + NULL, 0); if (rt && rt->dst.dev) ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN; diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c index 44de81e5f140..c589f5ae75bb 100644 --- a/drivers/net/ieee802154/mcr20a.c +++ b/drivers/net/ieee802154/mcr20a.c @@ -905,9 +905,9 @@ mcr20a_irq_clean_complete(void *context) } break; case (DAR_IRQSTS1_RXIRQ | DAR_IRQSTS1_SEQIRQ): - /* rx is starting */ - dev_dbg(printdev(lp), "RX is starting\n"); - mcr20a_handle_rx(lp); + /* rx is starting */ + dev_dbg(printdev(lp), "RX is starting\n"); + mcr20a_handle_rx(lp); break; case (DAR_IRQSTS1_RXIRQ | DAR_IRQSTS1_TXIRQ | DAR_IRQSTS1_SEQIRQ): if (lp->is_tx) { diff --git a/drivers/net/ipvlan/Makefile b/drivers/net/ipvlan/Makefile index 8a2c64dc9641..3ee95367a994 100644 --- a/drivers/net/ipvlan/Makefile +++ b/drivers/net/ipvlan/Makefile @@ -5,4 +5,5 @@ obj-$(CONFIG_IPVLAN) += ipvlan.o obj-$(CONFIG_IPVTAP) += ipvtap.o -ipvlan-objs := ipvlan_core.o ipvlan_main.o +ipvlan-objs-$(CONFIG_IPVLAN_L3S) += ipvlan_l3s.o +ipvlan-objs := ipvlan_core.o ipvlan_main.o $(ipvlan-objs-y) diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index adb826f55e60..b906d2f6bd04 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -165,10 +165,9 @@ struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, const void *iaddr, bool is_v6); bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6); void ipvlan_ht_addr_del(struct ipvl_addr *addr); -struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, - u16 proto); -unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state); +struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, void *lyr3h, + int addr_type, bool use_dest); +void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type); void ipvlan_count_rx(const struct ipvl_dev *ipvlan, unsigned int len, bool success, bool mcast); int ipvlan_link_new(struct net *src_net, struct net_device *dev, @@ -177,6 +176,36 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev, void ipvlan_link_delete(struct net_device *dev, struct list_head *head); void ipvlan_link_setup(struct net_device *dev); int ipvlan_link_register(struct rtnl_link_ops *ops); +#ifdef CONFIG_IPVLAN_L3S +int ipvlan_l3s_register(struct ipvl_port *port); +void ipvlan_l3s_unregister(struct ipvl_port *port); +void ipvlan_migrate_l3s_hook(struct net *oldnet, struct net *newnet); +int ipvlan_l3s_init(void); +void ipvlan_l3s_cleanup(void); +#else +static inline int ipvlan_l3s_register(struct ipvl_port *port) +{ + return -ENOTSUPP; +} + +static inline void ipvlan_l3s_unregister(struct ipvl_port *port) +{ +} + +static inline void ipvlan_migrate_l3s_hook(struct net *oldnet, + struct net *newnet) +{ +} + +static inline int ipvlan_l3s_init(void) +{ + return 0; +} + +static inline void ipvlan_l3s_cleanup(void) +{ +} +#endif /* CONFIG_IPVLAN_L3S */ static inline bool netif_is_ipvlan_port(const struct net_device *dev) { diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 1a8132eb2a3e..e0f5bc82b10c 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -138,7 +138,7 @@ bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6) return ret; } -static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type) +void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type) { void *lyr3h = NULL; @@ -355,9 +355,8 @@ out: return ret; } -static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, - void *lyr3h, int addr_type, - bool use_dest) +struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, void *lyr3h, + int addr_type, bool use_dest) { struct ipvl_addr *addr = NULL; @@ -647,7 +646,9 @@ int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) case IPVLAN_MODE_L2: return ipvlan_xmit_mode_l2(skb, dev); case IPVLAN_MODE_L3: +#ifdef CONFIG_IPVLAN_L3S case IPVLAN_MODE_L3S: +#endif return ipvlan_xmit_mode_l3(skb, dev); } @@ -743,8 +744,10 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) return ipvlan_handle_mode_l2(pskb, port); case IPVLAN_MODE_L3: return ipvlan_handle_mode_l3(pskb, port); +#ifdef CONFIG_IPVLAN_L3S case IPVLAN_MODE_L3S: return RX_HANDLER_PASS; +#endif } /* Should not reach here */ @@ -753,97 +756,3 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) kfree_skb(skb); return RX_HANDLER_CONSUMED; } - -static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb, - struct net_device *dev) -{ - struct ipvl_addr *addr = NULL; - struct ipvl_port *port; - void *lyr3h; - int addr_type; - - if (!dev || !netif_is_ipvlan_port(dev)) - goto out; - - port = ipvlan_port_get_rcu(dev); - if (!port || port->mode != IPVLAN_MODE_L3S) - goto out; - - lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type); - if (!lyr3h) - goto out; - - addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); -out: - return addr; -} - -struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, - u16 proto) -{ - struct ipvl_addr *addr; - struct net_device *sdev; - - addr = ipvlan_skb_to_addr(skb, dev); - if (!addr) - goto out; - - sdev = addr->master->dev; - switch (proto) { - case AF_INET: - { - int err; - struct iphdr *ip4h = ip_hdr(skb); - - err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr, - ip4h->tos, sdev); - if (unlikely(err)) - goto out; - break; - } -#if IS_ENABLED(CONFIG_IPV6) - case AF_INET6: - { - struct dst_entry *dst; - struct ipv6hdr *ip6h = ipv6_hdr(skb); - int flags = RT6_LOOKUP_F_HAS_SADDR; - struct flowi6 fl6 = { - .flowi6_iif = sdev->ifindex, - .daddr = ip6h->daddr, - .saddr = ip6h->saddr, - .flowlabel = ip6_flowinfo(ip6h), - .flowi6_mark = skb->mark, - .flowi6_proto = ip6h->nexthdr, - }; - - skb_dst_drop(skb); - dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, - skb, flags); - skb_dst_set(skb, dst); - break; - } -#endif - default: - break; - } - -out: - return skb; -} - -unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct ipvl_addr *addr; - unsigned int len; - - addr = ipvlan_skb_to_addr(skb, skb->dev); - if (!addr) - goto out; - - skb->dev = addr->master->dev; - len = skb->len + ETH_HLEN; - ipvlan_count_rx(addr->master, len, true, false); -out: - return NF_ACCEPT; -} diff --git a/drivers/net/ipvlan/ipvlan_l3s.c b/drivers/net/ipvlan/ipvlan_l3s.c new file mode 100644 index 000000000000..d17480a911a3 --- /dev/null +++ b/drivers/net/ipvlan/ipvlan_l3s.c @@ -0,0 +1,227 @@ +/* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + */ + +#include "ipvlan.h" + +static unsigned int ipvlan_netid __read_mostly; + +struct ipvlan_netns { + unsigned int ipvl_nf_hook_refcnt; +}; + +static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb, + struct net_device *dev) +{ + struct ipvl_addr *addr = NULL; + struct ipvl_port *port; + int addr_type; + void *lyr3h; + + if (!dev || !netif_is_ipvlan_port(dev)) + goto out; + + port = ipvlan_port_get_rcu(dev); + if (!port || port->mode != IPVLAN_MODE_L3S) + goto out; + + lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type); + if (!lyr3h) + goto out; + + addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); +out: + return addr; +} + +static struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, + struct sk_buff *skb, u16 proto) +{ + struct ipvl_addr *addr; + struct net_device *sdev; + + addr = ipvlan_skb_to_addr(skb, dev); + if (!addr) + goto out; + + sdev = addr->master->dev; + switch (proto) { + case AF_INET: + { + struct iphdr *ip4h = ip_hdr(skb); + int err; + + err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr, + ip4h->tos, sdev); + if (unlikely(err)) + goto out; + break; + } +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + { + struct dst_entry *dst; + struct ipv6hdr *ip6h = ipv6_hdr(skb); + int flags = RT6_LOOKUP_F_HAS_SADDR; + struct flowi6 fl6 = { + .flowi6_iif = sdev->ifindex, + .daddr = ip6h->daddr, + .saddr = ip6h->saddr, + .flowlabel = ip6_flowinfo(ip6h), + .flowi6_mark = skb->mark, + .flowi6_proto = ip6h->nexthdr, + }; + + skb_dst_drop(skb); + dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, + skb, flags); + skb_dst_set(skb, dst); + break; + } +#endif + default: + break; + } +out: + return skb; +} + +static const struct l3mdev_ops ipvl_l3mdev_ops = { + .l3mdev_l3_rcv = ipvlan_l3_rcv, +}; + +static unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct ipvl_addr *addr; + unsigned int len; + + addr = ipvlan_skb_to_addr(skb, skb->dev); + if (!addr) + goto out; + + skb->dev = addr->master->dev; + len = skb->len + ETH_HLEN; + ipvlan_count_rx(addr->master, len, true, false); +out: + return NF_ACCEPT; +} + +static const struct nf_hook_ops ipvl_nfops[] = { + { + .hook = ipvlan_nf_input, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = INT_MAX, + }, +#if IS_ENABLED(CONFIG_IPV6) + { + .hook = ipvlan_nf_input, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_IN, + .priority = INT_MAX, + }, +#endif +}; + +static int ipvlan_register_nf_hook(struct net *net) +{ + struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid); + int err = 0; + + if (!vnet->ipvl_nf_hook_refcnt) { + err = nf_register_net_hooks(net, ipvl_nfops, + ARRAY_SIZE(ipvl_nfops)); + if (!err) + vnet->ipvl_nf_hook_refcnt = 1; + } else { + vnet->ipvl_nf_hook_refcnt++; + } + + return err; +} + +static void ipvlan_unregister_nf_hook(struct net *net) +{ + struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid); + + if (WARN_ON(!vnet->ipvl_nf_hook_refcnt)) + return; + + vnet->ipvl_nf_hook_refcnt--; + if (!vnet->ipvl_nf_hook_refcnt) + nf_unregister_net_hooks(net, ipvl_nfops, + ARRAY_SIZE(ipvl_nfops)); +} + +void ipvlan_migrate_l3s_hook(struct net *oldnet, struct net *newnet) +{ + struct ipvlan_netns *old_vnet; + + ASSERT_RTNL(); + + old_vnet = net_generic(oldnet, ipvlan_netid); + if (!old_vnet->ipvl_nf_hook_refcnt) + return; + + ipvlan_register_nf_hook(newnet); + ipvlan_unregister_nf_hook(oldnet); +} + +static void ipvlan_ns_exit(struct net *net) +{ + struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid); + + if (WARN_ON_ONCE(vnet->ipvl_nf_hook_refcnt)) { + vnet->ipvl_nf_hook_refcnt = 0; + nf_unregister_net_hooks(net, ipvl_nfops, + ARRAY_SIZE(ipvl_nfops)); + } +} + +static struct pernet_operations ipvlan_net_ops = { + .id = &ipvlan_netid, + .size = sizeof(struct ipvlan_netns), + .exit = ipvlan_ns_exit, +}; + +int ipvlan_l3s_init(void) +{ + return register_pernet_subsys(&ipvlan_net_ops); +} + +void ipvlan_l3s_cleanup(void) +{ + unregister_pernet_subsys(&ipvlan_net_ops); +} + +int ipvlan_l3s_register(struct ipvl_port *port) +{ + struct net_device *dev = port->dev; + int ret; + + ASSERT_RTNL(); + + ret = ipvlan_register_nf_hook(read_pnet(&port->pnet)); + if (!ret) { + dev->l3mdev_ops = &ipvl_l3mdev_ops; + dev->priv_flags |= IFF_L3MDEV_RX_HANDLER; + } + + return ret; +} + +void ipvlan_l3s_unregister(struct ipvl_port *port) +{ + struct net_device *dev = port->dev; + + ASSERT_RTNL(); + + dev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER; + ipvlan_unregister_nf_hook(read_pnet(&port->pnet)); + dev->l3mdev_ops = NULL; +} diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 19bdde60680c..8ec73d973079 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -9,73 +9,10 @@ #include "ipvlan.h" -static unsigned int ipvlan_netid __read_mostly; - -struct ipvlan_netns { - unsigned int ipvl_nf_hook_refcnt; -}; - -static const struct nf_hook_ops ipvl_nfops[] = { - { - .hook = ipvlan_nf_input, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_IN, - .priority = INT_MAX, - }, -#if IS_ENABLED(CONFIG_IPV6) - { - .hook = ipvlan_nf_input, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_LOCAL_IN, - .priority = INT_MAX, - }, -#endif -}; - -static const struct l3mdev_ops ipvl_l3mdev_ops = { - .l3mdev_l3_rcv = ipvlan_l3_rcv, -}; - -static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev) -{ - ipvlan->dev->mtu = dev->mtu; -} - -static int ipvlan_register_nf_hook(struct net *net) -{ - struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid); - int err = 0; - - if (!vnet->ipvl_nf_hook_refcnt) { - err = nf_register_net_hooks(net, ipvl_nfops, - ARRAY_SIZE(ipvl_nfops)); - if (!err) - vnet->ipvl_nf_hook_refcnt = 1; - } else { - vnet->ipvl_nf_hook_refcnt++; - } - - return err; -} - -static void ipvlan_unregister_nf_hook(struct net *net) -{ - struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid); - - if (WARN_ON(!vnet->ipvl_nf_hook_refcnt)) - return; - - vnet->ipvl_nf_hook_refcnt--; - if (!vnet->ipvl_nf_hook_refcnt) - nf_unregister_net_hooks(net, ipvl_nfops, - ARRAY_SIZE(ipvl_nfops)); -} - static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval, struct netlink_ext_ack *extack) { struct ipvl_dev *ipvlan; - struct net_device *mdev = port->dev; unsigned int flags; int err; @@ -97,17 +34,12 @@ static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval, } if (nval == IPVLAN_MODE_L3S) { /* New mode is L3S */ - err = ipvlan_register_nf_hook(read_pnet(&port->pnet)); - if (!err) { - mdev->l3mdev_ops = &ipvl_l3mdev_ops; - mdev->priv_flags |= IFF_L3MDEV_MASTER; - } else + err = ipvlan_l3s_register(port); + if (err) goto fail; } else if (port->mode == IPVLAN_MODE_L3S) { /* Old mode was L3S */ - mdev->priv_flags &= ~IFF_L3MDEV_MASTER; - ipvlan_unregister_nf_hook(read_pnet(&port->pnet)); - mdev->l3mdev_ops = NULL; + ipvlan_l3s_unregister(port); } port->mode = nval; } @@ -166,11 +98,8 @@ static void ipvlan_port_destroy(struct net_device *dev) struct ipvl_port *port = ipvlan_port_get_rtnl(dev); struct sk_buff *skb; - if (port->mode == IPVLAN_MODE_L3S) { - dev->priv_flags &= ~IFF_L3MDEV_MASTER; - ipvlan_unregister_nf_hook(dev_net(dev)); - dev->l3mdev_ops = NULL; - } + if (port->mode == IPVLAN_MODE_L3S) + ipvlan_l3s_unregister(port); netdev_rx_handler_unregister(dev); cancel_work_sync(&port->wq); while ((skb = __skb_dequeue(&port->backlog)) != NULL) { @@ -446,6 +375,11 @@ static const struct header_ops ipvlan_header_ops = { .cache_update = eth_header_cache_update, }; +static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev) +{ + ipvlan->dev->mtu = dev->mtu; +} + static bool netif_is_ipvlan(const struct net_device *dev) { /* both ipvlan and ipvtap devices use the same netdev_ops */ @@ -781,7 +715,6 @@ static int ipvlan_device_event(struct notifier_block *unused, case NETDEV_REGISTER: { struct net *oldnet, *newnet = dev_net(dev); - struct ipvlan_netns *old_vnet; oldnet = read_pnet(&port->pnet); if (net_eq(newnet, oldnet)) @@ -789,12 +722,7 @@ static int ipvlan_device_event(struct notifier_block *unused, write_pnet(&port->pnet, newnet); - old_vnet = net_generic(oldnet, ipvlan_netid); - if (!old_vnet->ipvl_nf_hook_refcnt) - break; - - ipvlan_register_nf_hook(newnet); - ipvlan_unregister_nf_hook(oldnet); + ipvlan_migrate_l3s_hook(oldnet, newnet); break; } case NETDEV_UNREGISTER: @@ -1068,23 +996,6 @@ static struct notifier_block ipvlan_addr6_vtor_notifier_block __read_mostly = { }; #endif -static void ipvlan_ns_exit(struct net *net) -{ - struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid); - - if (WARN_ON_ONCE(vnet->ipvl_nf_hook_refcnt)) { - vnet->ipvl_nf_hook_refcnt = 0; - nf_unregister_net_hooks(net, ipvl_nfops, - ARRAY_SIZE(ipvl_nfops)); - } -} - -static struct pernet_operations ipvlan_net_ops = { - .id = &ipvlan_netid, - .size = sizeof(struct ipvlan_netns), - .exit = ipvlan_ns_exit, -}; - static int __init ipvlan_init_module(void) { int err; @@ -1099,13 +1010,13 @@ static int __init ipvlan_init_module(void) register_inetaddr_notifier(&ipvlan_addr4_notifier_block); register_inetaddr_validator_notifier(&ipvlan_addr4_vtor_notifier_block); - err = register_pernet_subsys(&ipvlan_net_ops); + err = ipvlan_l3s_init(); if (err < 0) goto error; err = ipvlan_link_register(&ipvlan_link_ops); if (err < 0) { - unregister_pernet_subsys(&ipvlan_net_ops); + ipvlan_l3s_cleanup(); goto error; } @@ -1126,7 +1037,7 @@ error: static void __exit ipvlan_cleanup_module(void) { rtnl_link_unregister(&ipvlan_link_ops); - unregister_pernet_subsys(&ipvlan_net_ops); + ipvlan_l3s_cleanup(); unregister_netdevice_notifier(&ipvlan_notifier_block); unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block); unregister_inetaddr_validator_notifier( diff --git a/drivers/net/phy/aquantia.c b/drivers/net/phy/aquantia.c index 482004efa62d..0f772a47a18e 100644 --- a/drivers/net/phy/aquantia.c +++ b/drivers/net/phy/aquantia.c @@ -17,6 +17,7 @@ #define PHY_ID_AQR105 0x03a1b4a2 #define PHY_ID_AQR106 0x03a1b4d0 #define PHY_ID_AQR107 0x03a1b4e0 +#define PHY_ID_AQCS109 0x03a1b5c2 #define PHY_ID_AQR405 0x03a1b4b0 #define MDIO_AN_TX_VEND_STATUS1 0xc800 @@ -203,6 +204,16 @@ static struct phy_driver aqr_driver[] = { .read_status = aqr_read_status, }, { + PHY_ID_MATCH_MODEL(PHY_ID_AQCS109), + .name = "Aquantia AQCS109", + .features = PHY_10GBIT_FULL_FEATURES, + .aneg_done = genphy_c45_aneg_done, + .config_aneg = aqr_config_aneg, + .config_intr = aqr_config_intr, + .ack_interrupt = aqr_ack_interrupt, + .read_status = aqr_read_status, +}, +{ PHY_ID_MATCH_MODEL(PHY_ID_AQR405), .name = "Aquantia AQR405", .features = PHY_10GBIT_FULL_FEATURES, @@ -222,6 +233,7 @@ static struct mdio_device_id __maybe_unused aqr_tbl[] = { { PHY_ID_MATCH_MODEL(PHY_ID_AQR105) }, { PHY_ID_MATCH_MODEL(PHY_ID_AQR106) }, { PHY_ID_MATCH_MODEL(PHY_ID_AQR107) }, + { PHY_ID_MATCH_MODEL(PHY_ID_AQCS109) }, { PHY_ID_MATCH_MODEL(PHY_ID_AQR405) }, { } }; diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 25ef483bcc24..2fe2ebaf62d1 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -885,14 +885,14 @@ static void decode_txts(struct dp83640_private *dp83640, struct phy_txts *phy_txts) { struct skb_shared_hwtstamps shhwtstamps; + struct dp83640_skb_info *skb_info; struct sk_buff *skb; - u64 ns; u8 overflow; + u64 ns; /* We must already have the skb that triggered this. */ - +again: skb = skb_dequeue(&dp83640->tx_queue); - if (!skb) { pr_debug("have timestamp but tx_queue empty\n"); return; @@ -907,6 +907,11 @@ static void decode_txts(struct dp83640_private *dp83640, } return; } + skb_info = (struct dp83640_skb_info *)skb->cb; + if (time_after(jiffies, skb_info->tmo)) { + kfree_skb(skb); + goto again; + } ns = phy2txts(phy_txts); memset(&shhwtstamps, 0, sizeof(shhwtstamps)); @@ -1459,6 +1464,7 @@ static bool dp83640_rxtstamp(struct phy_device *phydev, static void dp83640_txtstamp(struct phy_device *phydev, struct sk_buff *skb, int type) { + struct dp83640_skb_info *skb_info = (struct dp83640_skb_info *)skb->cb; struct dp83640_private *dp83640 = phydev->priv; switch (dp83640->hwts_tx_en) { @@ -1471,6 +1477,7 @@ static void dp83640_txtstamp(struct phy_device *phydev, /* fall through */ case HWTSTAMP_TX_ON: skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + skb_info->tmo = jiffies + SKB_TIMESTAMP_TIMEOUT; skb_queue_tail(&dp83640->tx_queue, skb); break; diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 90f44ba8aca7..3ccba37bd6dd 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -842,7 +842,6 @@ static int m88e1510_config_init(struct phy_device *phydev) /* SGMII-to-Copper mode initialization */ if (phydev->interface == PHY_INTERFACE_MODE_SGMII) { - /* Select page 18 */ err = marvell_set_page(phydev, 18); if (err < 0) @@ -865,21 +864,6 @@ static int m88e1510_config_init(struct phy_device *phydev) err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE); if (err < 0) return err; - - /* There appears to be a bug in the 88e1512 when used in - * SGMII to copper mode, where the AN advertisement register - * clears the pause bits each time a negotiation occurs. - * This means we can never be truely sure what was advertised, - * so disable Pause support. - */ - linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, - phydev->supported); - linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT, - phydev->supported); - linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, - phydev->advertising); - linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT, - phydev->advertising); } return m88e1318_config_init(phydev); diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 296a537cdfcb..496805c0ddfe 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -141,10 +141,9 @@ static int mv3310_hwmon_config(struct phy_device *phydev, bool enable) return ret; val = enable ? MV_V2_TEMP_CTRL_SAMPLE : MV_V2_TEMP_CTRL_DISABLE; - ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2, MV_V2_TEMP_CTRL, - MV_V2_TEMP_CTRL_MASK, val); - return ret < 0 ? ret : 0; + return phy_modify_mmd(phydev, MDIO_MMD_VEND2, MV_V2_TEMP_CTRL, + MV_V2_TEMP_CTRL_MASK, val); } static void mv3310_hwmon_disable(void *data) @@ -234,8 +233,7 @@ static int mv3310_resume(struct phy_device *phydev) static int mv3310_config_init(struct phy_device *phydev) { - __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, }; - int val; + int ret, val; /* Check that the PHY interface type is compatible */ if (phydev->interface != PHY_INTERFACE_MODE_SGMII && @@ -244,8 +242,8 @@ static int mv3310_config_init(struct phy_device *phydev) phydev->interface != PHY_INTERFACE_MODE_10GKR) return -ENODEV; - __set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported); - __set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, supported); + __set_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->supported); + __set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydev->supported); if (phydev->c45_ids.devices_in_package & MDIO_DEVS_AN) { val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1); @@ -253,74 +251,13 @@ static int mv3310_config_init(struct phy_device *phydev) return val; if (val & MDIO_AN_STAT1_ABLE) - __set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, supported); + __set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, + phydev->supported); } - val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_STAT2); - if (val < 0) - return val; - - /* Ethtool does not support the WAN mode bits */ - if (val & (MDIO_PMA_STAT2_10GBSR | MDIO_PMA_STAT2_10GBLR | - MDIO_PMA_STAT2_10GBER | MDIO_PMA_STAT2_10GBLX4 | - MDIO_PMA_STAT2_10GBSW | MDIO_PMA_STAT2_10GBLW | - MDIO_PMA_STAT2_10GBEW)) - __set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, supported); - if (val & MDIO_PMA_STAT2_10GBSR) - __set_bit(ETHTOOL_LINK_MODE_10000baseSR_Full_BIT, supported); - if (val & MDIO_PMA_STAT2_10GBLR) - __set_bit(ETHTOOL_LINK_MODE_10000baseLR_Full_BIT, supported); - if (val & MDIO_PMA_STAT2_10GBER) - __set_bit(ETHTOOL_LINK_MODE_10000baseER_Full_BIT, supported); - - if (val & MDIO_PMA_STAT2_EXTABLE) { - val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_EXTABLE); - if (val < 0) - return val; - - if (val & (MDIO_PMA_EXTABLE_10GBT | MDIO_PMA_EXTABLE_1000BT | - MDIO_PMA_EXTABLE_100BTX | MDIO_PMA_EXTABLE_10BT)) - __set_bit(ETHTOOL_LINK_MODE_TP_BIT, supported); - if (val & MDIO_PMA_EXTABLE_10GBLRM) - __set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, supported); - if (val & (MDIO_PMA_EXTABLE_10GBKX4 | MDIO_PMA_EXTABLE_10GBKR | - MDIO_PMA_EXTABLE_1000BKX)) - __set_bit(ETHTOOL_LINK_MODE_Backplane_BIT, supported); - if (val & MDIO_PMA_EXTABLE_10GBLRM) - __set_bit(ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT, - supported); - if (val & MDIO_PMA_EXTABLE_10GBT) - __set_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, - supported); - if (val & MDIO_PMA_EXTABLE_10GBKX4) - __set_bit(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT, - supported); - if (val & MDIO_PMA_EXTABLE_10GBKR) - __set_bit(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, - supported); - if (val & MDIO_PMA_EXTABLE_1000BT) - __set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, - supported); - if (val & MDIO_PMA_EXTABLE_1000BKX) - __set_bit(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, - supported); - if (val & MDIO_PMA_EXTABLE_100BTX) { - __set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, - supported); - __set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, - supported); - } - if (val & MDIO_PMA_EXTABLE_10BT) { - __set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, - supported); - __set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, - supported); - } - } - - linkmode_copy(phydev->supported, supported); - linkmode_and(phydev->advertising, phydev->advertising, - phydev->supported); + ret = genphy_c45_pma_read_abilities(phydev); + if (ret) + return ret; return 0; } @@ -345,7 +282,7 @@ static int mv3310_config_aneg(struct phy_device *phydev) linkmode_and(phydev->advertising, phydev->advertising, phydev->supported); - ret = phy_modify_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE, + ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE, ADVERTISE_ALL | ADVERTISE_100BASE4 | ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM, linkmode_adv_to_mii_adv_t(phydev->advertising)); @@ -355,7 +292,7 @@ static int mv3310_config_aneg(struct phy_device *phydev) changed = true; reg = linkmode_adv_to_mii_ctrl1000_t(phydev->advertising); - ret = phy_modify_mmd(phydev, MDIO_MMD_AN, MV_AN_CTRL1000, + ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MV_AN_CTRL1000, ADVERTISE_1000FULL | ADVERTISE_1000HALF, reg); if (ret < 0) return ret; @@ -369,8 +306,8 @@ static int mv3310_config_aneg(struct phy_device *phydev) else reg = 0; - ret = phy_modify_mmd(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL, - MDIO_AN_10GBT_CTRL_ADV10G, reg); + ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL, + MDIO_AN_10GBT_CTRL_ADV10G, reg); if (ret < 0) return ret; if (ret > 0) @@ -428,16 +365,8 @@ static int mv3310_read_10gbr_status(struct phy_device *phydev) static int mv3310_read_status(struct phy_device *phydev) { - u32 mmd_mask = phydev->c45_ids.devices_in_package; int val; - /* The vendor devads do not report link status. Avoid the PHYXS - * instance as there are three, and its status depends on the MAC - * being appropriately configured for the negotiated speed. - */ - mmd_mask &= ~(BIT(MDIO_MMD_VEND1) | BIT(MDIO_MMD_VEND2) | - BIT(MDIO_MMD_PHYXS)); - phydev->speed = SPEED_UNKNOWN; phydev->duplex = DUPLEX_UNKNOWN; linkmode_zero(phydev->lp_advertising); @@ -453,7 +382,7 @@ static int mv3310_read_status(struct phy_device *phydev) if (val & MDIO_STAT1_LSTATUS) return mv3310_read_10gbr_status(phydev); - val = genphy_c45_read_link(phydev, mmd_mask); + val = genphy_c45_read_link(phydev); if (val < 0) return val; diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index c92d0fb7ec4f..7af5fa81daf6 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -47,6 +47,16 @@ int genphy_c45_pma_setup_forced(struct phy_device *phydev) /* Assume 1000base-T */ ctrl2 |= MDIO_PMA_CTRL2_1000BT; break; + case SPEED_2500: + ctrl1 |= MDIO_CTRL1_SPEED2_5G; + /* Assume 2.5Gbase-T */ + ctrl2 |= MDIO_PMA_CTRL2_2_5GBT; + break; + case SPEED_5000: + ctrl1 |= MDIO_CTRL1_SPEED5G; + /* Assume 5Gbase-T */ + ctrl2 |= MDIO_PMA_CTRL2_5GBT; + break; case SPEED_10000: ctrl1 |= MDIO_CTRL1_SPEED10G; /* Assume 10Gbase-T */ @@ -118,25 +128,40 @@ EXPORT_SYMBOL_GPL(genphy_c45_aneg_done); /** * genphy_c45_read_link - read the overall link status from the MMDs * @phydev: target phy_device struct - * @mmd_mask: MMDs to read status from * * Read the link status from the specified MMDs, and if they all indicate * that the link is up, set phydev->link to 1. If an error is encountered, * a negative errno will be returned, otherwise zero. */ -int genphy_c45_read_link(struct phy_device *phydev, u32 mmd_mask) +int genphy_c45_read_link(struct phy_device *phydev) { + u32 mmd_mask = phydev->c45_ids.devices_in_package; int val, devad; bool link = true; + /* The vendor devads and C22EXT do not report link status. Avoid the + * PHYXS instance as its status may depend on the MAC being + * appropriately configured for the negotiated speed. + */ + mmd_mask &= ~(MDIO_DEVS_VEND1 | MDIO_DEVS_VEND2 | MDIO_DEVS_C22EXT | + MDIO_DEVS_PHYXS); + while (mmd_mask && link) { devad = __ffs(mmd_mask); mmd_mask &= ~BIT(devad); /* The link state is latched low so that momentary link - * drops can be detected. Do not double-read the status - * register if the link is down. + * drops can be detected. Do not double-read the status + * in polling mode to detect such short link drops. */ + if (!phy_polling_mode(phydev)) { + val = phy_read_mmd(phydev, devad, MDIO_STAT1); + if (val < 0) + return val; + else if (val & MDIO_STAT1_LSTATUS) + continue; + } + val = phy_read_mmd(phydev, devad, MDIO_STAT1); if (val < 0) return val; @@ -179,6 +204,12 @@ int genphy_c45_read_lpa(struct phy_device *phydev) if (val < 0) return val; + if (val & MDIO_AN_10GBT_STAT_LP2_5G) + linkmode_set_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + phydev->lp_advertising); + if (val & MDIO_AN_10GBT_STAT_LP5G) + linkmode_set_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + phydev->lp_advertising); if (val & MDIO_AN_10GBT_STAT_LP10G) linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, phydev->lp_advertising); @@ -209,6 +240,12 @@ int genphy_c45_read_pma(struct phy_device *phydev) case MDIO_PMA_CTRL1_SPEED1000: phydev->speed = SPEED_1000; break; + case MDIO_CTRL1_SPEED2_5G: + phydev->speed = SPEED_2500; + break; + case MDIO_CTRL1_SPEED5G: + phydev->speed = SPEED_5000; + break; case MDIO_CTRL1_SPEED10G: phydev->speed = SPEED_10000; break; @@ -256,6 +293,95 @@ int genphy_c45_read_mdix(struct phy_device *phydev) } EXPORT_SYMBOL_GPL(genphy_c45_read_mdix); +/** + * genphy_c45_pma_read_abilities - read supported link modes from PMA + * @phydev: target phy_device struct + * + * Read the supported link modes from the PMA Status 2 (1.8) register. If bit + * 1.8.9 is set, the list of supported modes is build using the values in the + * PMA Extended Abilities (1.11) register, indicating 1000BASET an 10G related + * modes. If bit 1.11.14 is set, then the list is also extended with the modes + * in the 2.5G/5G PMA Extended register (1.21), indicating if 2.5GBASET and + * 5GBASET are supported. + */ +int genphy_c45_pma_read_abilities(struct phy_device *phydev) +{ + int val; + + val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_STAT2); + if (val < 0) + return val; + + linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseSR_Full_BIT, + phydev->supported, + val & MDIO_PMA_STAT2_10GBSR); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseLR_Full_BIT, + phydev->supported, + val & MDIO_PMA_STAT2_10GBLR); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseER_Full_BIT, + phydev->supported, + val & MDIO_PMA_STAT2_10GBER); + + if (val & MDIO_PMA_STAT2_EXTABLE) { + val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_EXTABLE); + if (val < 0) + return val; + + linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_10GBLRM); + linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_10GBT); + linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_10GBKX4); + linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_10GBKR); + linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_1000BT); + linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_1000BKX); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_100BTX); + linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_100BTX); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_10BT); + linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_10BT); + + if (val & MDIO_PMA_EXTABLE_NBT) { + val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, + MDIO_PMA_NG_EXTABLE); + if (val < 0) + return val; + + linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + phydev->supported, + val & MDIO_PMA_NG_EXTABLE_2_5GBT); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + phydev->supported, + val & MDIO_PMA_NG_EXTABLE_5GBT); + } + } + + return 0; +} +EXPORT_SYMBOL_GPL(genphy_c45_pma_read_abilities); + /* The gen10g_* functions are the old Clause 45 stub */ int gen10g_config_aneg(struct phy_device *phydev) @@ -266,16 +392,11 @@ EXPORT_SYMBOL_GPL(gen10g_config_aneg); int gen10g_read_status(struct phy_device *phydev) { - u32 mmd_mask = phydev->c45_ids.devices_in_package; - /* For now just lie and say it's 10G all the time */ phydev->speed = SPEED_10000; phydev->duplex = DUPLEX_FULL; - /* Avoid reading the vendor MMDs */ - mmd_mask &= ~(BIT(MDIO_MMD_VEND1) | BIT(MDIO_MMD_VEND2)); - - return genphy_c45_read_link(phydev, mmd_mask); + return genphy_c45_read_link(phydev); } EXPORT_SYMBOL_GPL(gen10g_read_status); diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 7d6aad287f84..de58a59815d5 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -4,6 +4,7 @@ */ #include <linux/export.h> #include <linux/phy.h> +#include <linux/of.h> const char *phy_speed_to_str(int speed) { @@ -338,6 +339,77 @@ size_t phy_speeds(unsigned int *speeds, size_t size, return count; } +static int __set_phy_supported(struct phy_device *phydev, u32 max_speed) +{ + const struct phy_setting *p; + int i; + + for (i = 0, p = settings; i < ARRAY_SIZE(settings); i++, p++) { + if (p->speed > max_speed) + linkmode_clear_bit(p->bit, phydev->supported); + else + break; + } + + return 0; +} + +int phy_set_max_speed(struct phy_device *phydev, u32 max_speed) +{ + int err; + + err = __set_phy_supported(phydev, max_speed); + if (err) + return err; + + linkmode_copy(phydev->advertising, phydev->supported); + + return 0; +} +EXPORT_SYMBOL(phy_set_max_speed); + +void of_set_phy_supported(struct phy_device *phydev) +{ + struct device_node *node = phydev->mdio.dev.of_node; + u32 max_speed; + + if (!IS_ENABLED(CONFIG_OF_MDIO)) + return; + + if (!node) + return; + + if (!of_property_read_u32(node, "max-speed", &max_speed)) + __set_phy_supported(phydev, max_speed); +} + +void of_set_phy_eee_broken(struct phy_device *phydev) +{ + struct device_node *node = phydev->mdio.dev.of_node; + u32 broken = 0; + + if (!IS_ENABLED(CONFIG_OF_MDIO)) + return; + + if (!node) + return; + + if (of_property_read_bool(node, "eee-broken-100tx")) + broken |= MDIO_EEE_100TX; + if (of_property_read_bool(node, "eee-broken-1000t")) + broken |= MDIO_EEE_1000T; + if (of_property_read_bool(node, "eee-broken-10gt")) + broken |= MDIO_EEE_10GT; + if (of_property_read_bool(node, "eee-broken-1000kx")) + broken |= MDIO_EEE_1000KX; + if (of_property_read_bool(node, "eee-broken-10gkx4")) + broken |= MDIO_EEE_10GKX4; + if (of_property_read_bool(node, "eee-broken-10gkr")) + broken |= MDIO_EEE_10GKR; + + phydev->eee_broken_modes = broken; +} + /** * phy_resolve_aneg_linkmode - resolve the advertisements into phy settings * @phydev: The phy_device struct @@ -531,7 +603,7 @@ int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val) EXPORT_SYMBOL(phy_write_mmd); /** - * __phy_modify() - Convenience function for modifying a PHY register + * __phy_modify_changed() - Convenience function for modifying a PHY register * @phydev: a pointer to a &struct phy_device * @regnum: register number * @mask: bit mask of bits to clear @@ -539,16 +611,69 @@ EXPORT_SYMBOL(phy_write_mmd); * * Unlocked helper function which allows a PHY register to be modified as * new register value = (old register value & ~mask) | set + * + * Returns negative errno, 0 if there was no change, and 1 in case of change */ -int __phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set) +int __phy_modify_changed(struct phy_device *phydev, u32 regnum, u16 mask, + u16 set) { - int ret; + int new, ret; ret = __phy_read(phydev, regnum); if (ret < 0) return ret; - ret = __phy_write(phydev, regnum, (ret & ~mask) | set); + new = (ret & ~mask) | set; + if (new == ret) + return 0; + + ret = __phy_write(phydev, regnum, new); + + return ret < 0 ? ret : 1; +} +EXPORT_SYMBOL_GPL(__phy_modify_changed); + +/** + * phy_modify_changed - Function for modifying a PHY register + * @phydev: the phy_device struct + * @regnum: register number to modify + * @mask: bit mask of bits to clear + * @set: new value of bits set in mask to write to @regnum + * + * NOTE: MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. + * + * Returns negative errno, 0 if there was no change, and 1 in case of change + */ +int phy_modify_changed(struct phy_device *phydev, u32 regnum, u16 mask, u16 set) +{ + int ret; + + mutex_lock(&phydev->mdio.bus->mdio_lock); + ret = __phy_modify_changed(phydev, regnum, mask, set); + mutex_unlock(&phydev->mdio.bus->mdio_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(phy_modify_changed); + +/** + * __phy_modify - Convenience function for modifying a PHY register + * @phydev: the phy_device struct + * @regnum: register number to modify + * @mask: bit mask of bits to clear + * @set: new value of bits set in mask to write to @regnum + * + * NOTE: MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. + */ +int __phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set) +{ + int ret; + + ret = __phy_modify_changed(phydev, regnum, mask, set); return ret < 0 ? ret : 0; } @@ -578,7 +703,7 @@ int phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set) EXPORT_SYMBOL_GPL(phy_modify); /** - * __phy_modify_mmd - Convenience function for modifying a register on MMD + * __phy_modify_mmd_changed - Function for modifying a register on MMD * @phydev: the phy_device struct * @devad: the MMD containing register to modify * @regnum: register number to modify @@ -587,17 +712,73 @@ EXPORT_SYMBOL_GPL(phy_modify); * * Unlocked helper function which allows a MMD register to be modified as * new register value = (old register value & ~mask) | set + * + * Returns negative errno, 0 if there was no change, and 1 in case of change */ -int __phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum, - u16 mask, u16 set) +int __phy_modify_mmd_changed(struct phy_device *phydev, int devad, u32 regnum, + u16 mask, u16 set) { - int ret; + int new, ret; ret = __phy_read_mmd(phydev, devad, regnum); if (ret < 0) return ret; - ret = __phy_write_mmd(phydev, devad, regnum, (ret & ~mask) | set); + new = (ret & ~mask) | set; + if (new == ret) + return 0; + + ret = __phy_write_mmd(phydev, devad, regnum, new); + + return ret < 0 ? ret : 1; +} +EXPORT_SYMBOL_GPL(__phy_modify_mmd_changed); + +/** + * phy_modify_mmd_changed - Function for modifying a register on MMD + * @phydev: the phy_device struct + * @devad: the MMD containing register to modify + * @regnum: register number to modify + * @mask: bit mask of bits to clear + * @set: new value of bits set in mask to write to @regnum + * + * NOTE: MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. + * + * Returns negative errno, 0 if there was no change, and 1 in case of change + */ +int phy_modify_mmd_changed(struct phy_device *phydev, int devad, u32 regnum, + u16 mask, u16 set) +{ + int ret; + + mutex_lock(&phydev->mdio.bus->mdio_lock); + ret = __phy_modify_mmd_changed(phydev, devad, regnum, mask, set); + mutex_unlock(&phydev->mdio.bus->mdio_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(phy_modify_mmd_changed); + +/** + * __phy_modify_mmd - Convenience function for modifying a register on MMD + * @phydev: the phy_device struct + * @devad: the MMD containing register to modify + * @regnum: register number to modify + * @mask: bit mask of bits to clear + * @set: new value of bits set in mask to write to @regnum + * + * NOTE: MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. + */ +int __phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum, + u16 mask, u16 set) +{ + int ret; + + ret = __phy_modify_mmd_changed(phydev, devad, regnum, mask, set); return ret < 0 ? ret : 0; } diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 89ead29cec68..69dc64a4dbf8 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -543,7 +543,7 @@ int phy_start_aneg(struct phy_device *phydev) if (err < 0) goto out_unlock; - if (__phy_is_started(phydev)) { + if (phy_is_started(phydev)) { if (phydev->autoneg == AUTONEG_ENABLE) { err = phy_check_link_status(phydev); } else { @@ -699,7 +699,7 @@ void phy_stop_machine(struct phy_device *phydev) cancel_delayed_work_sync(&phydev->state_queue); mutex_lock(&phydev->lock); - if (__phy_is_started(phydev)) + if (phy_is_started(phydev)) phydev->state = PHY_UP; mutex_unlock(&phydev->lock); } @@ -752,9 +752,6 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat) { struct phy_device *phydev = phy_dat; - if (!phy_is_started(phydev)) - return IRQ_NONE; /* It can't be ours. */ - if (phydev->drv->did_interrupt && !phydev->drv->did_interrupt(phydev)) return IRQ_NONE; @@ -813,15 +810,14 @@ EXPORT_SYMBOL(phy_request_interrupt); */ void phy_stop(struct phy_device *phydev) { - mutex_lock(&phydev->lock); - - if (!__phy_is_started(phydev)) { + if (!phy_is_started(phydev)) { WARN(1, "called from state %s\n", phy_state_to_str(phydev->state)); - mutex_unlock(&phydev->lock); return; } + mutex_lock(&phydev->lock); + if (phy_interrupt_is_valid(phydev)) phy_disable_interrupts(phydev); @@ -961,8 +957,10 @@ void phy_state_machine(struct work_struct *work) * state machine would be pointless and possibly error prone when * called from phy_disconnect() synchronously. */ + mutex_lock(&phydev->lock); if (phy_polling_mode(phydev) && phy_is_started(phydev)) phy_queue_state_machine(phydev, PHY_STATE_TIME); + mutex_unlock(&phydev->lock); } /** diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 891e0178b97f..a752de2fff5e 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -30,7 +30,6 @@ #include <linux/mdio.h> #include <linux/io.h> #include <linux/uaccess.h> -#include <linux/of.h> MODULE_DESCRIPTION("PHY library"); MODULE_AUTHOR("Andy Fleming"); @@ -676,13 +675,16 @@ static int get_phy_c45_devs_in_pkg(struct mii_bus *bus, int addr, int dev_addr, phy_reg = mdiobus_read(bus, addr, reg_addr); if (phy_reg < 0) return -EIO; - *devices_in_package = (phy_reg & 0xffff) << 16; + *devices_in_package = phy_reg << 16; reg_addr = MII_ADDR_C45 | dev_addr << 16 | MDIO_DEVS1; phy_reg = mdiobus_read(bus, addr, reg_addr); if (phy_reg < 0) return -EIO; - *devices_in_package |= (phy_reg & 0xffff); + *devices_in_package |= phy_reg; + + /* Bit 0 doesn't represent a device, it indicates c22 regs presence */ + *devices_in_package &= ~BIT(0); return 0; } @@ -743,13 +745,13 @@ static int get_phy_c45_ids(struct mii_bus *bus, int addr, u32 *phy_id, phy_reg = mdiobus_read(bus, addr, reg_addr); if (phy_reg < 0) return -EIO; - c45_ids->device_ids[i] = (phy_reg & 0xffff) << 16; + c45_ids->device_ids[i] = phy_reg << 16; reg_addr = MII_ADDR_C45 | i << 16 | MII_PHYSID2; phy_reg = mdiobus_read(bus, addr, reg_addr); if (phy_reg < 0) return -EIO; - c45_ids->device_ids[i] |= (phy_reg & 0xffff); + c45_ids->device_ids[i] |= phy_reg; } *phy_id = 0; return 0; @@ -786,14 +788,14 @@ static int get_phy_id(struct mii_bus *bus, int addr, u32 *phy_id, return (phy_reg == -EIO || phy_reg == -ENODEV) ? -ENODEV : -EIO; } - *phy_id = (phy_reg & 0xffff) << 16; + *phy_id = phy_reg << 16; /* Grab the bits from PHYIR2, and put them in the lower half */ phy_reg = mdiobus_read(bus, addr, MII_PHYSID2); if (phy_reg < 0) return -EIO; - *phy_id |= (phy_reg & 0xffff); + *phy_id |= phy_reg; return 0; } @@ -1513,7 +1515,7 @@ EXPORT_SYMBOL(phy_reset_after_clk_enable); static int genphy_config_advert(struct phy_device *phydev) { u32 advertise; - int oldadv, adv, bmsr; + int bmsr, adv; int err, changed = 0; /* Only allow advertising what this PHY supports */ @@ -1526,22 +1528,14 @@ static int genphy_config_advert(struct phy_device *phydev) phydev->advertising); /* Setup standard advertisement */ - adv = phy_read(phydev, MII_ADVERTISE); - if (adv < 0) - return adv; - - oldadv = adv; - adv &= ~(ADVERTISE_ALL | ADVERTISE_100BASE4 | ADVERTISE_PAUSE_CAP | - ADVERTISE_PAUSE_ASYM); - adv |= ethtool_adv_to_mii_adv_t(advertise); - - if (adv != oldadv) { - err = phy_write(phydev, MII_ADVERTISE, adv); - - if (err < 0) - return err; + err = phy_modify_changed(phydev, MII_ADVERTISE, + ADVERTISE_ALL | ADVERTISE_100BASE4 | + ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM, + ethtool_adv_to_mii_adv_t(advertise)); + if (err < 0) + return err; + if (err > 0) changed = 1; - } bmsr = phy_read(phydev, MII_BMSR); if (bmsr < 0) @@ -1555,25 +1549,20 @@ static int genphy_config_advert(struct phy_device *phydev) return changed; /* Configure gigabit if it's supported */ - adv = phy_read(phydev, MII_CTRL1000); - if (adv < 0) - return adv; - - oldadv = adv; - adv &= ~(ADVERTISE_1000FULL | ADVERTISE_1000HALF); - + adv = 0; if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, phydev->supported) || linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, phydev->supported)) - adv |= ethtool_adv_to_mii_ctrl1000_t(advertise); + adv = ethtool_adv_to_mii_ctrl1000_t(advertise); - if (adv != oldadv) - changed = 1; - - err = phy_write(phydev, MII_CTRL1000, adv); + err = phy_modify_changed(phydev, MII_CTRL1000, + ADVERTISE_1000FULL | ADVERTISE_1000HALF, + adv); if (err < 0) return err; + if (err > 0) + changed = 1; return changed; } @@ -1588,31 +1577,16 @@ static int genphy_config_advert(struct phy_device *phydev) */ static int genphy_config_eee_advert(struct phy_device *phydev) { - int broken = phydev->eee_broken_modes; - int old_adv, adv; + int err; /* Nothing to disable */ - if (!broken) - return 0; - - /* If the following call fails, we assume that EEE is not - * supported by the phy. If we read 0, EEE is not advertised - * In both case, we don't need to continue - */ - adv = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV); - if (adv <= 0) + if (!phydev->eee_broken_modes) return 0; - old_adv = adv; - adv &= ~broken; - - /* Advertising remains unchanged with the broken mask */ - if (old_adv == adv) - return 0; - - phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV, adv); - - return 1; + err = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV, + phydev->eee_broken_modes, 0); + /* If the call failed, we assume that EEE is not supported */ + return err < 0 ? 0 : err; } /** @@ -1729,10 +1703,19 @@ int genphy_update_link(struct phy_device *phydev) { int status; - /* Do a fake read */ - status = phy_read(phydev, MII_BMSR); - if (status < 0) - return status; + /* The link state is latched low so that momentary link + * drops can be detected. Do not double-read the status + * in polling mode to detect such short link drops. + */ + if (!phy_polling_mode(phydev)) { + status = phy_read(phydev, MII_BMSR); + if (status < 0) { + return status; + } else if (status & BMSR_LSTATUS) { + phydev->link = 1; + return 0; + } + } /* Read link and autonegotiation status */ status = phy_read(phydev, MII_BMSR); @@ -1965,44 +1948,6 @@ int genphy_loopback(struct phy_device *phydev, bool enable) } EXPORT_SYMBOL(genphy_loopback); -static int __set_phy_supported(struct phy_device *phydev, u32 max_speed) -{ - switch (max_speed) { - case SPEED_10: - linkmode_clear_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, - phydev->supported); - linkmode_clear_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, - phydev->supported); - /* fall through */ - case SPEED_100: - linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, - phydev->supported); - linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, - phydev->supported); - break; - case SPEED_1000: - break; - default: - return -ENOTSUPP; - } - - return 0; -} - -int phy_set_max_speed(struct phy_device *phydev, u32 max_speed) -{ - int err; - - err = __set_phy_supported(phydev, max_speed); - if (err) - return err; - - linkmode_copy(phydev->advertising, phydev->supported); - - return 0; -} -EXPORT_SYMBOL(phy_set_max_speed); - /** * phy_remove_link_mode - Remove a supported link mode * @phydev: phy_device structure to remove link mode from @@ -2133,48 +2078,6 @@ bool phy_validate_pause(struct phy_device *phydev, } EXPORT_SYMBOL(phy_validate_pause); -static void of_set_phy_supported(struct phy_device *phydev) -{ - struct device_node *node = phydev->mdio.dev.of_node; - u32 max_speed; - - if (!IS_ENABLED(CONFIG_OF_MDIO)) - return; - - if (!node) - return; - - if (!of_property_read_u32(node, "max-speed", &max_speed)) - __set_phy_supported(phydev, max_speed); -} - -static void of_set_phy_eee_broken(struct phy_device *phydev) -{ - struct device_node *node = phydev->mdio.dev.of_node; - u32 broken = 0; - - if (!IS_ENABLED(CONFIG_OF_MDIO)) - return; - - if (!node) - return; - - if (of_property_read_bool(node, "eee-broken-100tx")) - broken |= MDIO_EEE_100TX; - if (of_property_read_bool(node, "eee-broken-1000t")) - broken |= MDIO_EEE_1000T; - if (of_property_read_bool(node, "eee-broken-10gt")) - broken |= MDIO_EEE_10GT; - if (of_property_read_bool(node, "eee-broken-1000kx")) - broken |= MDIO_EEE_1000KX; - if (of_property_read_bool(node, "eee-broken-10gkx4")) - broken |= MDIO_EEE_10GKX4; - if (of_property_read_bool(node, "eee-broken-10gkr")) - broken |= MDIO_EEE_10GKR; - - phydev->eee_broken_modes = broken; -} - static bool phy_drv_supports_irq(struct phy_driver *phydrv) { return phydrv->config_intr && phydrv->ack_interrupt; @@ -2208,11 +2111,30 @@ static int phy_probe(struct device *dev) mutex_lock(&phydev->lock); + if (phydev->drv->probe) { + /* Deassert the reset signal */ + phy_device_reset(phydev, 0); + + err = phydev->drv->probe(phydev); + if (err) { + /* Assert the reset signal */ + phy_device_reset(phydev, 1); + goto out; + } + } + /* Start out supporting everything. Eventually, * a controller will attach, and may modify one * or both of these values */ - linkmode_copy(phydev->supported, phydrv->features); + if (phydrv->features) { + linkmode_copy(phydev->supported, phydrv->features); + } else { + err = phydrv->get_features(phydev); + if (err) + goto out; + } + of_set_phy_supported(phydev); linkmode_copy(phydev->advertising, phydev->supported); @@ -2232,20 +2154,8 @@ static int phy_probe(struct device *dev) * (e.g. hardware erratum) where the driver wants to set only one * of these bits. */ - if (test_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydrv->features) || - test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydrv->features)) { - linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT, - phydev->supported); - linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, - phydev->supported); - if (test_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydrv->features)) - linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, - phydev->supported); - if (test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, - phydrv->features)) - linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, - phydev->supported); - } else { + if (!test_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->supported) && + !test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydev->supported)) { linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->supported); linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, @@ -2255,17 +2165,7 @@ static int phy_probe(struct device *dev) /* Set the state to READY by default */ phydev->state = PHY_READY; - if (phydev->drv->probe) { - /* Deassert the reset signal */ - phy_device_reset(phydev, 0); - - err = phydev->drv->probe(phydev); - if (err) { - /* Assert the reset signal */ - phy_device_reset(phydev, 1); - } - } - +out: mutex_unlock(&phydev->lock); return err; @@ -2301,7 +2201,11 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner) { int retval; - if (WARN_ON(!new_driver->features)) { + /* Either the features are hard coded, or dynamically + * determine. It cannot be both or neither + */ + if (WARN_ON((!new_driver->features && !new_driver->get_features) || + (new_driver->features && new_driver->get_features))) { pr_err("%s: Driver features are missing\n", new_driver->name); return -EINVAL; } diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 2e21ce42e388..59d175a5ba54 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -302,6 +302,13 @@ static void phylink_mac_config(struct phylink *pl, pl->ops->mac_config(pl->netdev, pl->link_an_mode, state); } +static void phylink_mac_config_up(struct phylink *pl, + const struct phylink_link_state *state) +{ + if (state->link) + phylink_mac_config(pl, state); +} + static void phylink_mac_an_restart(struct phylink *pl) { if (pl->link_config.an_enabled && @@ -401,12 +408,12 @@ static void phylink_resolve(struct work_struct *w) case MLO_AN_PHY: link_state = pl->phy_state; phylink_resolve_flow(pl, &link_state); - phylink_mac_config(pl, &link_state); + phylink_mac_config_up(pl, &link_state); break; case MLO_AN_FIXED: phylink_get_fixed_state(pl, &link_state); - phylink_mac_config(pl, &link_state); + phylink_mac_config_up(pl, &link_state); break; case MLO_AN_INBAND: @@ -471,6 +478,17 @@ static void phylink_run_resolve(struct phylink *pl) queue_work(system_power_efficient_wq, &pl->resolve); } +static void phylink_run_resolve_and_disable(struct phylink *pl, int bit) +{ + unsigned long state = pl->phylink_disable_state; + + set_bit(bit, &pl->phylink_disable_state); + if (state == 0) { + queue_work(system_power_efficient_wq, &pl->resolve); + flush_work(&pl->resolve); + } +} + static void phylink_fixed_poll(struct timer_list *t) { struct phylink *pl = container_of(t, struct phylink, link_poll); @@ -920,9 +938,7 @@ void phylink_stop(struct phylink *pl) if (pl->link_an_mode == MLO_AN_FIXED && !IS_ERR(pl->link_gpio)) del_timer_sync(&pl->link_poll); - set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state); - queue_work(system_power_efficient_wq, &pl->resolve); - flush_work(&pl->resolve); + phylink_run_resolve_and_disable(pl, PHYLINK_DISABLE_STOPPED); } EXPORT_SYMBOL_GPL(phylink_stop); @@ -1265,6 +1281,24 @@ int phylink_get_eee_err(struct phylink *pl) EXPORT_SYMBOL_GPL(phylink_get_eee_err); /** + * phylink_init_eee() - init and check the EEE features + * @pl: a pointer to a &struct phylink returned from phylink_create() + * @clk_stop_enable: allow PHY to stop receive clock + * + * Must be called either with RTNL held or within mac_link_up() + */ +int phylink_init_eee(struct phylink *pl, bool clk_stop_enable) +{ + int ret = -EOPNOTSUPP; + + if (pl->phydev) + ret = phy_init_eee(pl->phydev, clk_stop_enable); + + return ret; +} +EXPORT_SYMBOL_GPL(phylink_init_eee); + +/** * phylink_ethtool_get_eee() - read the energy efficient ethernet parameters * @pl: a pointer to a &struct phylink returned from phylink_create() * @eee: a pointer to a &struct ethtool_eee for the read parameters @@ -1628,9 +1662,7 @@ static void phylink_sfp_link_down(void *upstream) ASSERT_RTNL(); - set_bit(PHYLINK_DISABLE_LINK, &pl->phylink_disable_state); - queue_work(system_power_efficient_wq, &pl->resolve); - flush_work(&pl->resolve); + phylink_run_resolve_and_disable(pl, PHYLINK_DISABLE_LINK); } static void phylink_sfp_link_up(void *upstream) diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index ad9db652874d..fef701bfad62 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -347,6 +347,7 @@ static int sfp_register_bus(struct sfp_bus *bus) return ret; } } + bus->socket_ops->attach(bus->sfp); if (bus->started) bus->socket_ops->start(bus->sfp); bus->netdev->sfp_bus = bus; @@ -362,6 +363,7 @@ static void sfp_unregister_bus(struct sfp_bus *bus) if (bus->registered) { if (bus->started) bus->socket_ops->stop(bus->sfp); + bus->socket_ops->detach(bus->sfp); if (bus->phydev && ops && ops->disconnect_phy) ops->disconnect_phy(bus->upstream); } diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 298ab7546929..d4635c2178d1 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -185,6 +185,7 @@ struct sfp { struct gpio_desc *gpio[GPIO_MAX]; + bool attached; unsigned int state; struct delayed_work poll; struct delayed_work timeout; @@ -1476,7 +1477,7 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event) */ switch (sfp->sm_mod_state) { default: - if (event == SFP_E_INSERT) { + if (event == SFP_E_INSERT && sfp->attached) { sfp_module_tx_disable(sfp); sfp_sm_ins_next(sfp, SFP_MOD_PROBE, T_PROBE_INIT); } @@ -1608,6 +1609,19 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event) mutex_unlock(&sfp->sm_mutex); } +static void sfp_attach(struct sfp *sfp) +{ + sfp->attached = true; + if (sfp->state & SFP_F_PRESENT) + sfp_sm_event(sfp, SFP_E_INSERT); +} + +static void sfp_detach(struct sfp *sfp) +{ + sfp->attached = false; + sfp_sm_event(sfp, SFP_E_REMOVE); +} + static void sfp_start(struct sfp *sfp) { sfp_sm_event(sfp, SFP_E_DEV_UP); @@ -1668,6 +1682,8 @@ static int sfp_module_eeprom(struct sfp *sfp, struct ethtool_eeprom *ee, } static const struct sfp_socket_ops sfp_module_ops = { + .attach = sfp_attach, + .detach = sfp_detach, .start = sfp_start, .stop = sfp_stop, .module_info = sfp_module_info, @@ -1835,10 +1851,6 @@ static int sfp_probe(struct platform_device *pdev) dev_info(sfp->dev, "Host maximum power %u.%uW\n", sfp->max_power_mW / 1000, (sfp->max_power_mW / 100) % 10); - sfp->sfp_bus = sfp_register_socket(sfp->dev, sfp, &sfp_module_ops); - if (!sfp->sfp_bus) - return -ENOMEM; - /* Get the initial state, and always signal TX disable, * since the network interface will not be up. */ @@ -1849,10 +1861,6 @@ static int sfp_probe(struct platform_device *pdev) sfp->state |= SFP_F_RATE_SELECT; sfp_set_state(sfp, sfp->state); sfp_module_tx_disable(sfp); - rtnl_lock(); - if (sfp->state & SFP_F_PRESENT) - sfp_sm_event(sfp, SFP_E_INSERT); - rtnl_unlock(); for (i = 0; i < GPIO_MAX; i++) { if (gpio_flags[i] != GPIOD_IN || !sfp->gpio[i]) @@ -1885,6 +1893,10 @@ static int sfp_probe(struct platform_device *pdev) dev_warn(sfp->dev, "No tx_disable pin: SFP modules will always be emitting.\n"); + sfp->sfp_bus = sfp_register_socket(sfp->dev, sfp, &sfp_module_ops); + if (!sfp->sfp_bus) + return -ENOMEM; + return 0; } diff --git a/drivers/net/phy/sfp.h b/drivers/net/phy/sfp.h index 31b0acf337e2..64f54b0bbd8c 100644 --- a/drivers/net/phy/sfp.h +++ b/drivers/net/phy/sfp.h @@ -7,6 +7,8 @@ struct sfp; struct sfp_socket_ops { + void (*attach)(struct sfp *sfp); + void (*detach)(struct sfp *sfp); void (*start)(struct sfp *sfp); void (*stop)(struct sfp *sfp); int (*module_info)(struct sfp *sfp, struct ethtool_modinfo *modinfo); diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index afd9d25d1992..958f1cf67282 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -256,17 +256,6 @@ static void __team_option_inst_mark_removed_port(struct team *team, } } -static bool __team_option_inst_tmp_find(const struct list_head *opts, - const struct team_option_inst *needle) -{ - struct team_option_inst *opt_inst; - - list_for_each_entry(opt_inst, opts, tmp_list) - if (opt_inst == needle) - return true; - return false; -} - static int __team_options_register(struct team *team, const struct team_option *option, size_t option_count) @@ -2460,7 +2449,6 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info) int err = 0; int i; struct nlattr *nl_option; - LIST_HEAD(opt_inst_list); rtnl_lock(); @@ -2480,6 +2468,7 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info) struct nlattr *opt_attrs[TEAM_ATTR_OPTION_MAX + 1]; struct nlattr *attr; struct nlattr *attr_data; + LIST_HEAD(opt_inst_list); enum team_option_type opt_type; int opt_port_ifindex = 0; /* != 0 for per-port options */ u32 opt_array_index = 0; @@ -2584,23 +2573,17 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info) if (err) goto team_put; opt_inst->changed = true; - - /* dumb/evil user-space can send us duplicate opt, - * keep only the last one - */ - if (__team_option_inst_tmp_find(&opt_inst_list, - opt_inst)) - continue; - list_add(&opt_inst->tmp_list, &opt_inst_list); } if (!opt_found) { err = -ENOENT; goto team_put; } - } - err = team_nl_send_event_options_get(team, &opt_inst_list); + err = team_nl_send_event_options_get(team, &opt_inst_list); + if (err) + break; + } team_put: team_nl_team_put(team); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 18656c4094b3..fed298c0cb39 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -866,8 +866,6 @@ static int tun_attach(struct tun_struct *tun, struct file *file, if (rtnl_dereference(tun->xdp_prog)) sock_set_flag(&tfile->sk, SOCK_XDP); - tun_set_real_num_queues(tun); - /* device is allowed to go away first, so no need to hold extra * refcnt. */ @@ -879,6 +877,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file, rcu_assign_pointer(tfile->tun, tun); rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile); tun->numqueues++; + tun_set_real_num_queues(tun); out: return err; } diff --git a/drivers/net/usb/cdc-phonet.c b/drivers/net/usb/cdc-phonet.c index 78b16eb9e58c..63aaae487995 100644 --- a/drivers/net/usb/cdc-phonet.c +++ b/drivers/net/usb/cdc-phonet.c @@ -361,8 +361,8 @@ static int usbpn_probe(struct usb_interface *intf, const struct usb_device_id *i else return -EINVAL; - dev = alloc_netdev(sizeof(*pnd) + sizeof(pnd->urbs[0]) * rxq_size, - ifname, NET_NAME_UNKNOWN, usbpn_setup); + dev = alloc_netdev(struct_size(pnd, urbs, rxq_size), ifname, + NET_NAME_UNKNOWN, usbpn_setup); if (!dev) return -ENOMEM; diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index f4247b275e09..63e44e746ccc 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -1011,6 +1011,7 @@ static int pegasus_ioctl(struct net_device *net, struct ifreq *rq, int cmd) switch (cmd) { case SIOCDEVPRIVATE: data[0] = pegasus->phy; + /* fall through */ case SIOCDEVPRIVATE + 1: read_mii_word(pegasus, data[0], data[1] & 0x1f, &data[3]); res = 0; diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index 3f145e4c6c08..59dbdbb5feff 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -847,6 +847,7 @@ static int rtl8150_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd) switch (cmd) { case SIOCDEVPRIVATE: data[0] = dev->phy; + /* fall through */ case SIOCDEVPRIVATE + 1: read_mii_word(dev, dev->phy, (data[1] & 0x1f), &data[3]); break; diff --git a/drivers/net/veth.c b/drivers/net/veth.c index f412ea1cef18..fbf890ebbeae 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -540,8 +540,10 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq, goto xdp_xmit; default: bpf_warn_invalid_xdp_action(act); + /* fall through */ case XDP_ABORTED: trace_xdp_exception(rq->dev, xdp_prog, act); + /* fall through */ case XDP_DROP: goto err_xdp; } @@ -661,8 +663,10 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb, goto xdp_xmit; default: bpf_warn_invalid_xdp_action(act); + /* fall through */ case XDP_ABORTED: trace_xdp_exception(rq->dev, xdp_prog, act); + /* fall through */ case XDP_DROP: goto drop; } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 2a0edd4653e3..7eb38ea9ba56 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -57,6 +57,8 @@ module_param(napi_tx, bool, 0644); #define VIRTIO_XDP_TX BIT(0) #define VIRTIO_XDP_REDIR BIT(1) +#define VIRTIO_XDP_FLAG BIT(0) + /* RX packet size EWMA. The average packet size is used to determine the packet * buffer size when refilling RX rings. As the entire RX ring may be refilled * at once, the weight is chosen so that the EWMA will be insensitive to short- @@ -252,6 +254,21 @@ struct padded_vnet_hdr { char padding[4]; }; +static bool is_xdp_frame(void *ptr) +{ + return (unsigned long)ptr & VIRTIO_XDP_FLAG; +} + +static void *xdp_to_ptr(struct xdp_frame *ptr) +{ + return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG); +} + +static struct xdp_frame *ptr_to_xdp(void *ptr) +{ + return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); +} + /* Converting between virtqueue no. and kernel tx/rx queue no. * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq */ @@ -462,7 +479,8 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, sg_init_one(sq->sg, xdpf->data, xdpf->len); - err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdpf, GFP_ATOMIC); + err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdpf), + GFP_ATOMIC); if (unlikely(err)) return -ENOSPC; /* Caller handle free/refcnt */ @@ -482,36 +500,47 @@ static int virtnet_xdp_xmit(struct net_device *dev, { struct virtnet_info *vi = netdev_priv(dev); struct receive_queue *rq = vi->rq; - struct xdp_frame *xdpf_sent; struct bpf_prog *xdp_prog; struct send_queue *sq; unsigned int len; + int packets = 0; + int bytes = 0; int drops = 0; int kicks = 0; int ret, err; + void *ptr; int i; - sq = virtnet_xdp_sq(vi); - - if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { - ret = -EINVAL; - drops = n; - goto out; - } - /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this * indicate XDP resources have been successfully allocated. */ xdp_prog = rcu_dereference(rq->xdp_prog); - if (!xdp_prog) { - ret = -ENXIO; + if (!xdp_prog) + return -ENXIO; + + sq = virtnet_xdp_sq(vi); + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { + ret = -EINVAL; drops = n; goto out; } /* Free up any pending old buffers before queueing new ones. */ - while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) - xdp_return_frame(xdpf_sent); + while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { + if (likely(is_xdp_frame(ptr))) { + struct xdp_frame *frame = ptr_to_xdp(ptr); + + bytes += frame->len; + xdp_return_frame(frame); + } else { + struct sk_buff *skb = ptr; + + bytes += skb->len; + napi_consume_skb(skb, false); + } + packets++; + } for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; @@ -530,6 +559,8 @@ static int virtnet_xdp_xmit(struct net_device *dev, } out: u64_stats_update_begin(&sq->stats.syncp); + sq->stats.bytes += bytes; + sq->stats.packets += packets; sq->stats.xdp_tx += n; sq->stats.xdp_tx_drops += drops; sq->stats.kicks += kicks; @@ -1333,18 +1364,26 @@ static int virtnet_receive(struct receive_queue *rq, int budget, static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) { - struct sk_buff *skb; unsigned int len; unsigned int packets = 0; unsigned int bytes = 0; + void *ptr; - while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) { - pr_debug("Sent skb %p\n", skb); + while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { + if (likely(!is_xdp_frame(ptr))) { + struct sk_buff *skb = ptr; - bytes += skb->len; - packets++; + pr_debug("Sent skb %p\n", skb); + + bytes += skb->len; + napi_consume_skb(skb, in_napi); + } else { + struct xdp_frame *frame = ptr_to_xdp(ptr); - napi_consume_skb(skb, in_napi); + bytes += frame->len; + xdp_return_frame(frame); + } + packets++; } /* Avoid overhead when no packets have been processed @@ -1359,6 +1398,16 @@ static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) u64_stats_update_end(&sq->stats.syncp); } +static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) +{ + if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) + return false; + else if (q < vi->curr_queue_pairs) + return true; + else + return false; +} + static void virtnet_poll_cleantx(struct receive_queue *rq) { struct virtnet_info *vi = rq->vq->vdev->priv; @@ -1366,7 +1415,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq) struct send_queue *sq = &vi->sq[index]; struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); - if (!sq->napi.weight) + if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index)) return; if (__netif_tx_trylock(txq)) { @@ -1443,8 +1492,16 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) { struct send_queue *sq = container_of(napi, struct send_queue, napi); struct virtnet_info *vi = sq->vq->vdev->priv; - struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq)); + unsigned int index = vq2txq(sq->vq); + struct netdev_queue *txq; + if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { + /* We don't need to enable cb for XDP */ + napi_complete_done(napi, 0); + return 0; + } + + txq = netdev_get_tx_queue(vi->dev, index); __netif_tx_lock(txq, raw_smp_processor_id()); free_old_xmit_skbs(sq, true); __netif_tx_unlock(txq); @@ -2396,6 +2453,10 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, return -ENOMEM; } + old_prog = rtnl_dereference(vi->rq[0].xdp_prog); + if (!prog && !old_prog) + return 0; + if (prog) { prog = bpf_prog_add(prog, vi->max_queue_pairs - 1); if (IS_ERR(prog)) @@ -2403,36 +2464,62 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, } /* Make sure NAPI is not using any XDP TX queues for RX. */ - if (netif_running(dev)) - for (i = 0; i < vi->max_queue_pairs; i++) + if (netif_running(dev)) { + for (i = 0; i < vi->max_queue_pairs; i++) { napi_disable(&vi->rq[i].napi); + virtnet_napi_tx_disable(&vi->sq[i].napi); + } + } + + if (!prog) { + for (i = 0; i < vi->max_queue_pairs; i++) { + rcu_assign_pointer(vi->rq[i].xdp_prog, prog); + if (i == 0) + virtnet_restore_guest_offloads(vi); + } + synchronize_net(); + } - netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); err = _virtnet_set_queues(vi, curr_qp + xdp_qp); if (err) goto err; + netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); vi->xdp_queue_pairs = xdp_qp; - for (i = 0; i < vi->max_queue_pairs; i++) { - old_prog = rtnl_dereference(vi->rq[i].xdp_prog); - rcu_assign_pointer(vi->rq[i].xdp_prog, prog); - if (i == 0) { - if (!old_prog) + if (prog) { + for (i = 0; i < vi->max_queue_pairs; i++) { + rcu_assign_pointer(vi->rq[i].xdp_prog, prog); + if (i == 0 && !old_prog) virtnet_clear_guest_offloads(vi); - if (!prog) - virtnet_restore_guest_offloads(vi); } + } + + for (i = 0; i < vi->max_queue_pairs; i++) { if (old_prog) bpf_prog_put(old_prog); - if (netif_running(dev)) + if (netif_running(dev)) { virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); + virtnet_napi_tx_enable(vi, vi->sq[i].vq, + &vi->sq[i].napi); + } } return 0; err: - for (i = 0; i < vi->max_queue_pairs; i++) - virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); + if (!prog) { + virtnet_clear_guest_offloads(vi); + for (i = 0; i < vi->max_queue_pairs; i++) + rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); + } + + if (netif_running(dev)) { + for (i = 0; i < vi->max_queue_pairs; i++) { + virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); + virtnet_napi_tx_enable(vi, vi->sq[i].vq, + &vi->sq[i].napi); + } + } if (prog) bpf_prog_sub(prog, vi->max_queue_pairs - 1); return err; @@ -2614,16 +2701,6 @@ static void free_receive_page_frags(struct virtnet_info *vi) put_page(vi->rq[i].alloc_frag.page); } -static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) -{ - if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) - return false; - else if (q < vi->curr_queue_pairs) - return true; - else - return false; -} - static void free_unused_bufs(struct virtnet_info *vi) { void *buf; @@ -2632,10 +2709,10 @@ static void free_unused_bufs(struct virtnet_info *vi) for (i = 0; i < vi->max_queue_pairs; i++) { struct virtqueue *vq = vi->sq[i].vq; while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { - if (!is_xdp_raw_buffer_queue(vi, i)) + if (!is_xdp_frame(buf)) dev_kfree_skb(buf); else - put_page(virt_to_head_page(buf)); + xdp_return_frame(ptr_to_xdp(buf)); } } diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index c0cd1c022e77..33edc78e818d 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2293,7 +2293,7 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, struct pcpu_sw_netstats *tx_stats, *rx_stats; union vxlan_addr loopback; union vxlan_addr *remote_ip = &dst_vxlan->default_dst.remote_ip; - struct net_device *dev = skb->dev; + struct net_device *dev; int len = skb->len; tx_stats = this_cpu_ptr(src_vxlan->dev->tstats); @@ -2313,9 +2313,15 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, #endif } + rcu_read_lock(); + dev = skb->dev; + if (unlikely(!(dev->flags & IFF_UP))) { + kfree_skb(skb); + goto drop; + } + if (dst_vxlan->cfg.flags & VXLAN_F_LEARN) - vxlan_snoop(skb->dev, &loopback, eth_hdr(skb)->h_source, 0, - vni); + vxlan_snoop(dev, &loopback, eth_hdr(skb)->h_source, 0, vni); u64_stats_update_begin(&tx_stats->syncp); tx_stats->tx_packets++; @@ -2328,8 +2334,10 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, rx_stats->rx_bytes += len; u64_stats_update_end(&rx_stats->syncp); } else { +drop: dev->stats.rx_dropped++; } + rcu_read_unlock(); } static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev, diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c index d5dc823f781e..fa78d2b14136 100644 --- a/drivers/net/wan/dscc4.c +++ b/drivers/net/wan/dscc4.c @@ -1575,7 +1575,7 @@ try: dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; } - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); dpriv->tx_skbuff[cur] = NULL; ++dpriv->tx_dirty; } else { diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 66d889d54e58..a08f04c3f644 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -482,7 +482,7 @@ static int hdlc_tx_done(struct ucc_hdlc_private *priv) memset(priv->tx_buffer + (be32_to_cpu(bd->buf) - priv->dma_tx_addr), 0, skb->len); - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); priv->tx_skbuff[priv->skb_dirtytx] = NULL; priv->skb_dirtytx = diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c index d573a57bc301..459ce52a0a4d 100644 --- a/drivers/net/wan/wanxl.c +++ b/drivers/net/wan/wanxl.c @@ -565,7 +565,7 @@ static int wanxl_pci_init_one(struct pci_dev *pdev, u32 plx_phy; /* PLX PCI base address */ u32 mem_phy; /* memory PCI base addr */ u8 __iomem *mem; /* memory virtual base addr */ - int i, ports, alloc_size; + int i, ports; #ifndef MODULE pr_info_once("%s\n", version); @@ -601,8 +601,7 @@ static int wanxl_pci_init_one(struct pci_dev *pdev, default: ports = 4; } - alloc_size = sizeof(struct card) + ports * sizeof(struct port); - card = kzalloc(alloc_size, GFP_KERNEL); + card = kzalloc(struct_size(card, ports, ports), GFP_KERNEL); if (card == NULL) { pci_release_regions(pdev); pci_disable_device(pdev); diff --git a/drivers/net/wimax/i2400m/rx.c b/drivers/net/wimax/i2400m/rx.c index 0b602951ff6b..d28b96d06919 100644 --- a/drivers/net/wimax/i2400m/rx.c +++ b/drivers/net/wimax/i2400m/rx.c @@ -1260,8 +1260,8 @@ int i2400m_rx(struct i2400m *i2400m, struct sk_buff *skb) goto error_msg_hdr_check; result = -EIO; num_pls = le16_to_cpu(msg_hdr->num_pls); - pl_itr = sizeof(*msg_hdr) + /* Check payload descriptor(s) */ - num_pls * sizeof(msg_hdr->pld[0]); + /* Check payload descriptor(s) */ + pl_itr = struct_size(msg_hdr, pld, num_pls); pl_itr = ALIGN(pl_itr, I2400M_PL_ALIGN); if (pl_itr > skb_len) { /* got all the payload descriptors? */ dev_err(dev, "RX: HW BUG? message too short (%u bytes) for " diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c index f8eb66ef2944..73842a830645 100644 --- a/drivers/net/wimax/i2400m/usb.c +++ b/drivers/net/wimax/i2400m/usb.c @@ -210,6 +210,7 @@ retry: msleep(10); /* give the device some time */ goto retry; } + /* fall through */ case -EINVAL: /* while removing driver */ case -ENODEV: /* dev disconnect ... */ case -ENOENT: /* just ignore it */ diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 399b501f3c3c..e8891f5fc83a 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -548,7 +548,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { { .id = WCN3990_HW_1_0_DEV_VERSION, .dev_id = 0, - .bus = ATH10K_BUS_PCI, + .bus = ATH10K_BUS_SNOC, .name = "wcn3990 hw1.0", .continuous_frag_desc = true, .tx_chain_mask = 0x7, diff --git a/drivers/net/wireless/intel/iwlwifi/Kconfig b/drivers/net/wireless/intel/iwlwifi/Kconfig index 491ca3c8b43c..83d5bceea08f 100644 --- a/drivers/net/wireless/intel/iwlwifi/Kconfig +++ b/drivers/net/wireless/intel/iwlwifi/Kconfig @@ -1,6 +1,6 @@ config IWLWIFI tristate "Intel Wireless WiFi Next Gen AGN - Wireless-N/Advanced-N/Ultimate-N (iwlwifi) " - depends on PCI && HAS_IOMEM + depends on PCI && HAS_IOMEM && CFG80211 select FW_LOADER ---help--- Select to build the driver supporting the: @@ -47,6 +47,7 @@ if IWLWIFI config IWLWIFI_LEDS bool depends on LEDS_CLASS=y || LEDS_CLASS=IWLWIFI + depends on IWLMVM || IWLDVM select LEDS_TRIGGERS select MAC80211_LEDS default y diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c index 497e762978cc..b2cabce1d74d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c @@ -212,24 +212,24 @@ void mt76x0_get_tx_power_per_rate(struct mt76x02_dev *dev) mt76x02_add_rate_power_offset(t, delta); } -void mt76x0_get_power_info(struct mt76x02_dev *dev, u8 *info) +void mt76x0_get_power_info(struct mt76x02_dev *dev, s8 *tp) { struct mt76x0_chan_map { u8 chan; u8 offset; } chan_map[] = { - { 2, 0 }, { 4, 1 }, { 6, 2 }, { 8, 3 }, - { 10, 4 }, { 12, 5 }, { 14, 6 }, { 38, 0 }, - { 44, 1 }, { 48, 2 }, { 54, 3 }, { 60, 4 }, - { 64, 5 }, { 102, 6 }, { 108, 7 }, { 112, 8 }, - { 118, 9 }, { 124, 10 }, { 128, 11 }, { 134, 12 }, - { 140, 13 }, { 151, 14 }, { 157, 15 }, { 161, 16 }, - { 167, 17 }, { 171, 18 }, { 173, 19 }, + { 2, 0 }, { 4, 2 }, { 6, 4 }, { 8, 6 }, + { 10, 8 }, { 12, 10 }, { 14, 12 }, { 38, 0 }, + { 44, 2 }, { 48, 4 }, { 54, 6 }, { 60, 8 }, + { 64, 10 }, { 102, 12 }, { 108, 14 }, { 112, 16 }, + { 118, 18 }, { 124, 20 }, { 128, 22 }, { 134, 24 }, + { 140, 26 }, { 151, 28 }, { 157, 30 }, { 161, 32 }, + { 167, 34 }, { 171, 36 }, { 175, 38 }, }; struct ieee80211_channel *chan = dev->mt76.chandef.chan; u8 offset, addr; + int i, idx = 0; u16 data; - int i; if (mt76x0_tssi_enabled(dev)) { s8 target_power; @@ -239,14 +239,14 @@ void mt76x0_get_power_info(struct mt76x02_dev *dev, u8 *info) else data = mt76x02_eeprom_get(dev, MT_EE_2G_TARGET_POWER); target_power = (data & 0xff) - dev->mt76.rate_power.ofdm[7]; - info[0] = target_power + mt76x0_get_delta(dev); - info[1] = 0; + *tp = target_power + mt76x0_get_delta(dev); return; } for (i = 0; i < ARRAY_SIZE(chan_map); i++) { - if (chan_map[i].chan <= chan->hw_value) { + if (chan->hw_value <= chan_map[i].chan) { + idx = (chan->hw_value == chan_map[i].chan); offset = chan_map[i].offset; break; } @@ -258,13 +258,16 @@ void mt76x0_get_power_info(struct mt76x02_dev *dev, u8 *info) addr = MT_EE_TX_POWER_DELTA_BW80 + offset; } else { switch (chan->hw_value) { + case 42: + offset = 2; + break; case 58: offset = 8; break; case 106: offset = 14; break; - case 112: + case 122: offset = 20; break; case 155: @@ -277,14 +280,9 @@ void mt76x0_get_power_info(struct mt76x02_dev *dev, u8 *info) } data = mt76x02_eeprom_get(dev, addr); - - info[0] = data; - if (!info[0] || info[0] > 0x3f) - info[0] = 5; - - info[1] = data >> 8; - if (!info[1] || info[1] > 0x3f) - info[1] = 5; + *tp = data >> (8 * idx); + if (*tp < 0 || *tp > 0x3f) + *tp = 5; } static int mt76x0_check_eeprom(struct mt76x02_dev *dev) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.h b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.h index ee9ade9f3c8b..42b259f90b6d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.h @@ -26,7 +26,7 @@ struct mt76x02_dev; int mt76x0_eeprom_init(struct mt76x02_dev *dev); void mt76x0_read_rx_gain(struct mt76x02_dev *dev); void mt76x0_get_tx_power_per_rate(struct mt76x02_dev *dev); -void mt76x0_get_power_info(struct mt76x02_dev *dev, u8 *info); +void mt76x0_get_power_info(struct mt76x02_dev *dev, s8 *tp); static inline s8 s6_to_s8(u32 val) { diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/phy.c b/drivers/net/wireless/mediatek/mt76/mt76x0/phy.c index 5a4c6f34267e..1117cdc15b04 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/phy.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/phy.c @@ -845,17 +845,17 @@ static void mt76x0_phy_tssi_calibrate(struct mt76x02_dev *dev) void mt76x0_phy_set_txpower(struct mt76x02_dev *dev) { struct mt76_rate_power *t = &dev->mt76.rate_power; - u8 info[2]; + s8 info; mt76x0_get_tx_power_per_rate(dev); - mt76x0_get_power_info(dev, info); + mt76x0_get_power_info(dev, &info); - mt76x02_add_rate_power_offset(t, info[0]); + mt76x02_add_rate_power_offset(t, info); mt76x02_limit_rate_power(t, dev->mt76.txpower_conf); dev->mt76.txpower_cur = mt76x02_get_max_rate_power(t); - mt76x02_add_rate_power_offset(t, -info[0]); + mt76x02_add_rate_power_offset(t, -info); - mt76x02_phy_set_txpower(dev, info[0], info[1]); + mt76x02_phy_set_txpower(dev, info, info); } void mt76x0_phy_calibrate(struct mt76x02_dev *dev, bool power_on) diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c index bd10165d7eec..4d4b07701149 100644 --- a/drivers/net/wireless/ti/wlcore/sdio.c +++ b/drivers/net/wireless/ti/wlcore/sdio.c @@ -164,6 +164,12 @@ static int wl12xx_sdio_power_on(struct wl12xx_sdio_glue *glue) } sdio_claim_host(func); + /* + * To guarantee that the SDIO card is power cycled, as required to make + * the FW programming to succeed, let's do a brute force HW reset. + */ + mmc_hw_reset(card->host); + sdio_enable_func(func); sdio_release_host(func); @@ -174,20 +180,13 @@ static int wl12xx_sdio_power_off(struct wl12xx_sdio_glue *glue) { struct sdio_func *func = dev_to_sdio_func(glue->dev); struct mmc_card *card = func->card; - int error; sdio_claim_host(func); sdio_disable_func(func); sdio_release_host(func); /* Let runtime PM know the card is powered off */ - error = pm_runtime_put(&card->dev); - if (error < 0 && error != -EBUSY) { - dev_err(&card->dev, "%s failed: %i\n", __func__, error); - - return error; - } - + pm_runtime_put(&card->dev); return 0; } diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 2625740bdc4a..330ddb64930f 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -655,7 +655,7 @@ static void frontend_changed(struct xenbus_device *dev, set_backend_state(be, XenbusStateClosed); if (xenbus_dev_is_online(dev)) break; - /* fall through if not online */ + /* fall through - if not online */ case XenbusStateUnknown: set_backend_state(be, XenbusStateClosed); device_unregister(&dev->dev); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 150e49723c15..6a9dd68c0f4f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1253,6 +1253,7 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, * effects say only one namespace is affected. */ if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) { + mutex_lock(&ctrl->scan_lock); nvme_start_freeze(ctrl); nvme_wait_freeze(ctrl); } @@ -1281,8 +1282,10 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) */ if (effects & NVME_CMD_EFFECTS_LBCC) nvme_update_formats(ctrl); - if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) + if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) { nvme_unfreeze(ctrl); + mutex_unlock(&ctrl->scan_lock); + } if (effects & NVME_CMD_EFFECTS_CCC) nvme_init_identify(ctrl); if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) @@ -3401,6 +3404,7 @@ static void nvme_scan_work(struct work_struct *work) if (nvme_identify_ctrl(ctrl, &id)) return; + mutex_lock(&ctrl->scan_lock); nn = le32_to_cpu(id->nn); if (ctrl->vs >= NVME_VS(1, 1, 0) && !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) { @@ -3409,6 +3413,7 @@ static void nvme_scan_work(struct work_struct *work) } nvme_scan_ns_sequential(ctrl, nn); out_free_id: + mutex_unlock(&ctrl->scan_lock); kfree(id); down_write(&ctrl->namespaces_rwsem); list_sort(NULL, &ctrl->namespaces, ns_cmp); @@ -3652,6 +3657,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ctrl->state = NVME_CTRL_NEW; spin_lock_init(&ctrl->lock); + mutex_init(&ctrl->scan_lock); INIT_LIST_HEAD(&ctrl->namespaces); init_rwsem(&ctrl->namespaces_rwsem); ctrl->dev = dev; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ab961bdeea89..c4a1bb41abf0 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -154,6 +154,7 @@ struct nvme_ctrl { enum nvme_ctrl_state state; bool identified; spinlock_t lock; + struct mutex scan_lock; const struct nvme_ctrl_ops *ops; struct request_queue *admin_q; struct request_queue *connect_q; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 9bc585415d9b..7fee665ec45e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2557,27 +2557,18 @@ static void nvme_reset_work(struct work_struct *work) if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) nvme_dev_disable(dev, false); - /* - * Introduce CONNECTING state from nvme-fc/rdma transports to mark the - * initializing procedure here. - */ - if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) { - dev_warn(dev->ctrl.device, - "failed to mark controller CONNECTING\n"); - goto out; - } - + mutex_lock(&dev->shutdown_lock); result = nvme_pci_enable(dev); if (result) - goto out; + goto out_unlock; result = nvme_pci_configure_admin_queue(dev); if (result) - goto out; + goto out_unlock; result = nvme_alloc_admin_tags(dev); if (result) - goto out; + goto out_unlock; /* * Limit the max command size to prevent iod->sg allocations going @@ -2585,6 +2576,17 @@ static void nvme_reset_work(struct work_struct *work) */ dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1; dev->ctrl.max_segments = NVME_MAX_SEGS; + mutex_unlock(&dev->shutdown_lock); + + /* + * Introduce CONNECTING state from nvme-fc/rdma transports to mark the + * initializing procedure here. + */ + if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) { + dev_warn(dev->ctrl.device, + "failed to mark controller CONNECTING\n"); + goto out; + } result = nvme_init_identify(&dev->ctrl); if (result) @@ -2649,6 +2651,8 @@ static void nvme_reset_work(struct work_struct *work) nvme_start_ctrl(&dev->ctrl); return; + out_unlock: + mutex_unlock(&dev->shutdown_lock); out: nvme_remove_dead_ctrl(dev, result); } diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index 52e47dac028f..80f843030e36 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -310,6 +310,9 @@ static int imx6_pcie_attach_pd(struct device *dev) imx6_pcie->pd_pcie = dev_pm_domain_attach_by_name(dev, "pcie"); if (IS_ERR(imx6_pcie->pd_pcie)) return PTR_ERR(imx6_pcie->pd_pcie); + /* Do nothing when power domain missing */ + if (!imx6_pcie->pd_pcie) + return 0; link = device_link_add(dev, imx6_pcie->pd_pcie, DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME | @@ -323,13 +326,13 @@ static int imx6_pcie_attach_pd(struct device *dev) if (IS_ERR(imx6_pcie->pd_pcie_phy)) return PTR_ERR(imx6_pcie->pd_pcie_phy); - device_link_add(dev, imx6_pcie->pd_pcie_phy, + link = device_link_add(dev, imx6_pcie->pd_pcie_phy, DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE); - if (IS_ERR(link)) { - dev_err(dev, "Failed to add device_link to pcie_phy pd: %ld\n", PTR_ERR(link)); - return PTR_ERR(link); + if (!link) { + dev_err(dev, "Failed to add device_link to pcie_phy pd.\n"); + return -EINVAL; } return 0; diff --git a/drivers/pci/controller/dwc/pcie-armada8k.c b/drivers/pci/controller/dwc/pcie-armada8k.c index b171b6bc15c8..0c389a30ef5d 100644 --- a/drivers/pci/controller/dwc/pcie-armada8k.c +++ b/drivers/pci/controller/dwc/pcie-armada8k.c @@ -22,7 +22,6 @@ #include <linux/resource.h> #include <linux/of_pci.h> #include <linux/of_irq.h> -#include <linux/gpio/consumer.h> #include "pcie-designware.h" @@ -30,7 +29,6 @@ struct armada8k_pcie { struct dw_pcie *pci; struct clk *clk; struct clk *clk_reg; - struct gpio_desc *reset_gpio; }; #define PCIE_VENDOR_REGS_OFFSET 0x8000 @@ -139,12 +137,6 @@ static int armada8k_pcie_host_init(struct pcie_port *pp) struct dw_pcie *pci = to_dw_pcie_from_pp(pp); struct armada8k_pcie *pcie = to_armada8k_pcie(pci); - if (pcie->reset_gpio) { - /* assert and then deassert the reset signal */ - gpiod_set_value_cansleep(pcie->reset_gpio, 1); - msleep(100); - gpiod_set_value_cansleep(pcie->reset_gpio, 0); - } dw_pcie_setup_rc(pp); armada8k_pcie_establish_link(pcie); @@ -257,14 +249,6 @@ static int armada8k_pcie_probe(struct platform_device *pdev) goto fail_clkreg; } - /* Get reset gpio signal and hold asserted (logically high) */ - pcie->reset_gpio = devm_gpiod_get_optional(dev, "reset", - GPIOD_OUT_HIGH); - if (IS_ERR(pcie->reset_gpio)) { - ret = PTR_ERR(pcie->reset_gpio); - goto fail_clkreg; - } - platform_set_drvdata(pdev, pcie); ret = armada8k_add_pcie_port(pcie, pdev); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index b0a413f3f7ca..e2a879e93d86 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -639,8 +639,9 @@ static void quirk_synopsys_haps(struct pci_dev *pdev) break; } } -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SYNOPSYS, PCI_ANY_ID, - quirk_synopsys_haps); +DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_SYNOPSYS, PCI_ANY_ID, + PCI_CLASS_SERIAL_USB_XHCI, 0, + quirk_synopsys_haps); /* * Let's make the southbridge information explicit instead of having to diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index 05044e323ea5..03ec7a5d9d0b 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1513,7 +1513,7 @@ static const struct dmi_system_id chv_no_valid_mask[] = { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"), DMI_MATCH(DMI_PRODUCT_FAMILY, "Intel_Strago"), - DMI_MATCH(DMI_BOARD_VERSION, "1.0"), + DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"), }, }, { @@ -1521,7 +1521,7 @@ static const struct dmi_system_id chv_no_valid_mask[] = { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "HP"), DMI_MATCH(DMI_PRODUCT_NAME, "Setzer"), - DMI_MATCH(DMI_BOARD_VERSION, "1.0"), + DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"), }, }, { @@ -1529,7 +1529,7 @@ static const struct dmi_system_id chv_no_valid_mask[] = { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"), DMI_MATCH(DMI_PRODUCT_NAME, "Cyan"), - DMI_MATCH(DMI_BOARD_VERSION, "1.0"), + DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"), }, }, { @@ -1537,7 +1537,7 @@ static const struct dmi_system_id chv_no_valid_mask[] = { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"), DMI_MATCH(DMI_PRODUCT_NAME, "Celes"), - DMI_MATCH(DMI_BOARD_VERSION, "1.0"), + DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"), }, }, {} diff --git a/drivers/pinctrl/mediatek/Kconfig b/drivers/pinctrl/mediatek/Kconfig index 1817786ab6aa..a005cbccb4f7 100644 --- a/drivers/pinctrl/mediatek/Kconfig +++ b/drivers/pinctrl/mediatek/Kconfig @@ -45,12 +45,14 @@ config PINCTRL_MT2701 config PINCTRL_MT7623 bool "Mediatek MT7623 pin control with generic binding" depends on MACH_MT7623 || COMPILE_TEST + depends on OF default MACH_MT7623 select PINCTRL_MTK_MOORE config PINCTRL_MT7629 bool "Mediatek MT7629 pin control" depends on MACH_MT7629 || COMPILE_TEST + depends on OF default MACH_MT7629 select PINCTRL_MTK_MOORE @@ -92,6 +94,7 @@ config PINCTRL_MT6797 config PINCTRL_MT7622 bool "MediaTek MT7622 pin control" + depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK_MOORE diff --git a/drivers/pinctrl/pinctrl-mcp23s08.c b/drivers/pinctrl/pinctrl-mcp23s08.c index b03481ef99a1..98905d4a79ca 100644 --- a/drivers/pinctrl/pinctrl-mcp23s08.c +++ b/drivers/pinctrl/pinctrl-mcp23s08.c @@ -832,8 +832,13 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, break; case MCP_TYPE_S18: + one_regmap_config = + devm_kmemdup(dev, &mcp23x17_regmap, + sizeof(struct regmap_config), GFP_KERNEL); + if (!one_regmap_config) + return -ENOMEM; mcp->regmap = devm_regmap_init(dev, &mcp23sxx_spi_regmap, mcp, - &mcp23x17_regmap); + one_regmap_config); mcp->reg_shift = 1; mcp->chip.ngpio = 16; mcp->chip.label = "mcp23s18"; diff --git a/drivers/pinctrl/sunxi/pinctrl-sun50i-h6.c b/drivers/pinctrl/sunxi/pinctrl-sun50i-h6.c index aa8b58125568..ef4268cc6227 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sun50i-h6.c +++ b/drivers/pinctrl/sunxi/pinctrl-sun50i-h6.c @@ -588,7 +588,7 @@ static const unsigned int h6_irq_bank_map[] = { 1, 5, 6, 7 }; static const struct sunxi_pinctrl_desc h6_pinctrl_data = { .pins = h6_pins, .npins = ARRAY_SIZE(h6_pins), - .irq_banks = 3, + .irq_banks = 4, .irq_bank_map = h6_irq_bank_map, .irq_read_needs_mux = true, }; diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index 5d9184d18c16..0e7fa69e93df 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -698,26 +698,24 @@ static int sunxi_pmx_request(struct pinctrl_dev *pctldev, unsigned offset) { struct sunxi_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev); unsigned short bank = offset / PINS_PER_BANK; - struct sunxi_pinctrl_regulator *s_reg = &pctl->regulators[bank]; - struct regulator *reg; + unsigned short bank_offset = bank - pctl->desc->pin_base / + PINS_PER_BANK; + struct sunxi_pinctrl_regulator *s_reg = &pctl->regulators[bank_offset]; + struct regulator *reg = s_reg->regulator; + char supply[16]; int ret; - reg = s_reg->regulator; - if (!reg) { - char supply[16]; - - snprintf(supply, sizeof(supply), "vcc-p%c", 'a' + bank); - reg = regulator_get(pctl->dev, supply); - if (IS_ERR(reg)) { - dev_err(pctl->dev, "Couldn't get bank P%c regulator\n", - 'A' + bank); - return PTR_ERR(reg); - } - - s_reg->regulator = reg; - refcount_set(&s_reg->refcount, 1); - } else { + if (reg) { refcount_inc(&s_reg->refcount); + return 0; + } + + snprintf(supply, sizeof(supply), "vcc-p%c", 'a' + bank); + reg = regulator_get(pctl->dev, supply); + if (IS_ERR(reg)) { + dev_err(pctl->dev, "Couldn't get bank P%c regulator\n", + 'A' + bank); + return PTR_ERR(reg); } ret = regulator_enable(reg); @@ -727,13 +725,13 @@ static int sunxi_pmx_request(struct pinctrl_dev *pctldev, unsigned offset) goto out; } + s_reg->regulator = reg; + refcount_set(&s_reg->refcount, 1); + return 0; out: - if (refcount_dec_and_test(&s_reg->refcount)) { - regulator_put(s_reg->regulator); - s_reg->regulator = NULL; - } + regulator_put(s_reg->regulator); return ret; } @@ -742,7 +740,9 @@ static int sunxi_pmx_free(struct pinctrl_dev *pctldev, unsigned offset) { struct sunxi_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev); unsigned short bank = offset / PINS_PER_BANK; - struct sunxi_pinctrl_regulator *s_reg = &pctl->regulators[bank]; + unsigned short bank_offset = bank - pctl->desc->pin_base / + PINS_PER_BANK; + struct sunxi_pinctrl_regulator *s_reg = &pctl->regulators[bank_offset]; if (!refcount_dec_and_test(&s_reg->refcount)) return 0; diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.h b/drivers/pinctrl/sunxi/pinctrl-sunxi.h index e340d2a24b44..034c0317c8d6 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.h +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.h @@ -136,7 +136,7 @@ struct sunxi_pinctrl { struct gpio_chip *chip; const struct sunxi_pinctrl_desc *desc; struct device *dev; - struct sunxi_pinctrl_regulator regulators[12]; + struct sunxi_pinctrl_regulator regulators[9]; struct irq_domain *domain; struct sunxi_pinctrl_function *functions; unsigned nfunctions; diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 5e2109c54c7c..b5e9db85e881 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -905,6 +905,7 @@ config TOSHIBA_WMI config ACPI_CMPC tristate "CMPC Laptop Extras" depends on ACPI && INPUT + depends on BACKLIGHT_LCD_SUPPORT depends on RFKILL || RFKILL=n select BACKLIGHT_CLASS_DEVICE help @@ -1128,6 +1129,7 @@ config INTEL_OAKTRAIL config SAMSUNG_Q10 tristate "Samsung Q10 Extras" depends on ACPI + depends on BACKLIGHT_LCD_SUPPORT select BACKLIGHT_CLASS_DEVICE ---help--- This driver provides support for backlight control on Samsung Q10 diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index aeb4a8b2e0af..7fe18636915a 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -43,7 +43,7 @@ config PTP_1588_CLOCK_DTE config PTP_1588_CLOCK_QORIQ tristate "Freescale QorIQ 1588 timer as PTP clock" - depends on GIANFAR || FSL_DPAA_ETH + depends on GIANFAR || FSL_DPAA_ETH || FSL_ENETC || FSL_ENETC_VF depends on PTP_1588_CLOCK default y help diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c index 43416b2e8a13..42d3654f77f0 100644 --- a/drivers/ptp/ptp_qoriq.c +++ b/drivers/ptp/ptp_qoriq.c @@ -22,7 +22,6 @@ #include <linux/device.h> #include <linux/hrtimer.h> -#include <linux/interrupt.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/of.h> @@ -37,61 +36,61 @@ * Register access functions */ -/* Caller must hold qoriq_ptp->lock. */ -static u64 tmr_cnt_read(struct qoriq_ptp *qoriq_ptp) +/* Caller must hold ptp_qoriq->lock. */ +static u64 tmr_cnt_read(struct ptp_qoriq *ptp_qoriq) { - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u64 ns; u32 lo, hi; - lo = qoriq_read(®s->ctrl_regs->tmr_cnt_l); - hi = qoriq_read(®s->ctrl_regs->tmr_cnt_h); + lo = ptp_qoriq->read(®s->ctrl_regs->tmr_cnt_l); + hi = ptp_qoriq->read(®s->ctrl_regs->tmr_cnt_h); ns = ((u64) hi) << 32; ns |= lo; return ns; } -/* Caller must hold qoriq_ptp->lock. */ -static void tmr_cnt_write(struct qoriq_ptp *qoriq_ptp, u64 ns) +/* Caller must hold ptp_qoriq->lock. */ +static void tmr_cnt_write(struct ptp_qoriq *ptp_qoriq, u64 ns) { - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u32 hi = ns >> 32; u32 lo = ns & 0xffffffff; - qoriq_write(®s->ctrl_regs->tmr_cnt_l, lo); - qoriq_write(®s->ctrl_regs->tmr_cnt_h, hi); + ptp_qoriq->write(®s->ctrl_regs->tmr_cnt_l, lo); + ptp_qoriq->write(®s->ctrl_regs->tmr_cnt_h, hi); } -/* Caller must hold qoriq_ptp->lock. */ -static void set_alarm(struct qoriq_ptp *qoriq_ptp) +/* Caller must hold ptp_qoriq->lock. */ +static void set_alarm(struct ptp_qoriq *ptp_qoriq) { - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u64 ns; u32 lo, hi; - ns = tmr_cnt_read(qoriq_ptp) + 1500000000ULL; + ns = tmr_cnt_read(ptp_qoriq) + 1500000000ULL; ns = div_u64(ns, 1000000000UL) * 1000000000ULL; - ns -= qoriq_ptp->tclk_period; + ns -= ptp_qoriq->tclk_period; hi = ns >> 32; lo = ns & 0xffffffff; - qoriq_write(®s->alarm_regs->tmr_alarm1_l, lo); - qoriq_write(®s->alarm_regs->tmr_alarm1_h, hi); + ptp_qoriq->write(®s->alarm_regs->tmr_alarm1_l, lo); + ptp_qoriq->write(®s->alarm_regs->tmr_alarm1_h, hi); } -/* Caller must hold qoriq_ptp->lock. */ -static void set_fipers(struct qoriq_ptp *qoriq_ptp) +/* Caller must hold ptp_qoriq->lock. */ +static void set_fipers(struct ptp_qoriq *ptp_qoriq) { - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; - set_alarm(qoriq_ptp); - qoriq_write(®s->fiper_regs->tmr_fiper1, qoriq_ptp->tmr_fiper1); - qoriq_write(®s->fiper_regs->tmr_fiper2, qoriq_ptp->tmr_fiper2); + set_alarm(ptp_qoriq); + ptp_qoriq->write(®s->fiper_regs->tmr_fiper1, ptp_qoriq->tmr_fiper1); + ptp_qoriq->write(®s->fiper_regs->tmr_fiper2, ptp_qoriq->tmr_fiper2); } -static int extts_clean_up(struct qoriq_ptp *qoriq_ptp, int index, +static int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index, bool update_event) { - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; struct ptp_clock_event event; void __iomem *reg_etts_l; void __iomem *reg_etts_h; @@ -116,17 +115,17 @@ static int extts_clean_up(struct qoriq_ptp *qoriq_ptp, int index, event.index = index; do { - lo = qoriq_read(reg_etts_l); - hi = qoriq_read(reg_etts_h); + lo = ptp_qoriq->read(reg_etts_l); + hi = ptp_qoriq->read(reg_etts_h); if (update_event) { event.timestamp = ((u64) hi) << 32; event.timestamp |= lo; - ptp_clock_event(qoriq_ptp->clock, &event); + ptp_clock_event(ptp_qoriq->clock, &event); } - stat = qoriq_read(®s->ctrl_regs->tmr_stat); - } while (qoriq_ptp->extts_fifo_support && (stat & valid)); + stat = ptp_qoriq->read(®s->ctrl_regs->tmr_stat); + } while (ptp_qoriq->extts_fifo_support && (stat & valid)); return 0; } @@ -135,89 +134,90 @@ static int extts_clean_up(struct qoriq_ptp *qoriq_ptp, int index, * Interrupt service routine */ -static irqreturn_t isr(int irq, void *priv) +irqreturn_t ptp_qoriq_isr(int irq, void *priv) { - struct qoriq_ptp *qoriq_ptp = priv; - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq *ptp_qoriq = priv; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; struct ptp_clock_event event; u64 ns; u32 ack = 0, lo, hi, mask, val, irqs; - spin_lock(&qoriq_ptp->lock); + spin_lock(&ptp_qoriq->lock); - val = qoriq_read(®s->ctrl_regs->tmr_tevent); - mask = qoriq_read(®s->ctrl_regs->tmr_temask); + val = ptp_qoriq->read(®s->ctrl_regs->tmr_tevent); + mask = ptp_qoriq->read(®s->ctrl_regs->tmr_temask); - spin_unlock(&qoriq_ptp->lock); + spin_unlock(&ptp_qoriq->lock); irqs = val & mask; if (irqs & ETS1) { ack |= ETS1; - extts_clean_up(qoriq_ptp, 0, true); + extts_clean_up(ptp_qoriq, 0, true); } if (irqs & ETS2) { ack |= ETS2; - extts_clean_up(qoriq_ptp, 1, true); + extts_clean_up(ptp_qoriq, 1, true); } if (irqs & ALM2) { ack |= ALM2; - if (qoriq_ptp->alarm_value) { + if (ptp_qoriq->alarm_value) { event.type = PTP_CLOCK_ALARM; event.index = 0; - event.timestamp = qoriq_ptp->alarm_value; - ptp_clock_event(qoriq_ptp->clock, &event); + event.timestamp = ptp_qoriq->alarm_value; + ptp_clock_event(ptp_qoriq->clock, &event); } - if (qoriq_ptp->alarm_interval) { - ns = qoriq_ptp->alarm_value + qoriq_ptp->alarm_interval; + if (ptp_qoriq->alarm_interval) { + ns = ptp_qoriq->alarm_value + ptp_qoriq->alarm_interval; hi = ns >> 32; lo = ns & 0xffffffff; - qoriq_write(®s->alarm_regs->tmr_alarm2_l, lo); - qoriq_write(®s->alarm_regs->tmr_alarm2_h, hi); - qoriq_ptp->alarm_value = ns; + ptp_qoriq->write(®s->alarm_regs->tmr_alarm2_l, lo); + ptp_qoriq->write(®s->alarm_regs->tmr_alarm2_h, hi); + ptp_qoriq->alarm_value = ns; } else { - spin_lock(&qoriq_ptp->lock); - mask = qoriq_read(®s->ctrl_regs->tmr_temask); + spin_lock(&ptp_qoriq->lock); + mask = ptp_qoriq->read(®s->ctrl_regs->tmr_temask); mask &= ~ALM2EN; - qoriq_write(®s->ctrl_regs->tmr_temask, mask); - spin_unlock(&qoriq_ptp->lock); - qoriq_ptp->alarm_value = 0; - qoriq_ptp->alarm_interval = 0; + ptp_qoriq->write(®s->ctrl_regs->tmr_temask, mask); + spin_unlock(&ptp_qoriq->lock); + ptp_qoriq->alarm_value = 0; + ptp_qoriq->alarm_interval = 0; } } if (irqs & PP1) { ack |= PP1; event.type = PTP_CLOCK_PPS; - ptp_clock_event(qoriq_ptp->clock, &event); + ptp_clock_event(ptp_qoriq->clock, &event); } if (ack) { - qoriq_write(®s->ctrl_regs->tmr_tevent, ack); + ptp_qoriq->write(®s->ctrl_regs->tmr_tevent, ack); return IRQ_HANDLED; } else return IRQ_NONE; } +EXPORT_SYMBOL_GPL(ptp_qoriq_isr); /* * PTP clock operations */ -static int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) +int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) { u64 adj, diff; u32 tmr_add; int neg_adj = 0; - struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps); - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps); + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; if (scaled_ppm < 0) { neg_adj = 1; scaled_ppm = -scaled_ppm; } - tmr_add = qoriq_ptp->tmr_add; + tmr_add = ptp_qoriq->tmr_add; adj = tmr_add; /* calculate diff as adj*(scaled_ppm/65536)/1000000 @@ -229,71 +229,74 @@ static int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) tmr_add = neg_adj ? tmr_add - diff : tmr_add + diff; - qoriq_write(®s->ctrl_regs->tmr_add, tmr_add); + ptp_qoriq->write(®s->ctrl_regs->tmr_add, tmr_add); return 0; } +EXPORT_SYMBOL_GPL(ptp_qoriq_adjfine); -static int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta) +int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta) { s64 now; unsigned long flags; - struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps); + struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps); - spin_lock_irqsave(&qoriq_ptp->lock, flags); + spin_lock_irqsave(&ptp_qoriq->lock, flags); - now = tmr_cnt_read(qoriq_ptp); + now = tmr_cnt_read(ptp_qoriq); now += delta; - tmr_cnt_write(qoriq_ptp, now); - set_fipers(qoriq_ptp); + tmr_cnt_write(ptp_qoriq, now); + set_fipers(ptp_qoriq); - spin_unlock_irqrestore(&qoriq_ptp->lock, flags); + spin_unlock_irqrestore(&ptp_qoriq->lock, flags); return 0; } +EXPORT_SYMBOL_GPL(ptp_qoriq_adjtime); -static int ptp_qoriq_gettime(struct ptp_clock_info *ptp, - struct timespec64 *ts) +int ptp_qoriq_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) { u64 ns; unsigned long flags; - struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps); + struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps); - spin_lock_irqsave(&qoriq_ptp->lock, flags); + spin_lock_irqsave(&ptp_qoriq->lock, flags); - ns = tmr_cnt_read(qoriq_ptp); + ns = tmr_cnt_read(ptp_qoriq); - spin_unlock_irqrestore(&qoriq_ptp->lock, flags); + spin_unlock_irqrestore(&ptp_qoriq->lock, flags); *ts = ns_to_timespec64(ns); return 0; } +EXPORT_SYMBOL_GPL(ptp_qoriq_gettime); -static int ptp_qoriq_settime(struct ptp_clock_info *ptp, - const struct timespec64 *ts) +int ptp_qoriq_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts) { u64 ns; unsigned long flags; - struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps); + struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps); ns = timespec64_to_ns(ts); - spin_lock_irqsave(&qoriq_ptp->lock, flags); + spin_lock_irqsave(&ptp_qoriq->lock, flags); - tmr_cnt_write(qoriq_ptp, ns); - set_fipers(qoriq_ptp); + tmr_cnt_write(ptp_qoriq, ns); + set_fipers(ptp_qoriq); - spin_unlock_irqrestore(&qoriq_ptp->lock, flags); + spin_unlock_irqrestore(&ptp_qoriq->lock, flags); return 0; } +EXPORT_SYMBOL_GPL(ptp_qoriq_settime); -static int ptp_qoriq_enable(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, int on) +int ptp_qoriq_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on) { - struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps); - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps); + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; unsigned long flags; u32 bit, mask = 0; @@ -311,7 +314,7 @@ static int ptp_qoriq_enable(struct ptp_clock_info *ptp, } if (on) - extts_clean_up(qoriq_ptp, rq->extts.index, false); + extts_clean_up(ptp_qoriq, rq->extts.index, false); break; case PTP_CLK_REQ_PPS: @@ -321,21 +324,22 @@ static int ptp_qoriq_enable(struct ptp_clock_info *ptp, return -EOPNOTSUPP; } - spin_lock_irqsave(&qoriq_ptp->lock, flags); + spin_lock_irqsave(&ptp_qoriq->lock, flags); - mask = qoriq_read(®s->ctrl_regs->tmr_temask); + mask = ptp_qoriq->read(®s->ctrl_regs->tmr_temask); if (on) { mask |= bit; - qoriq_write(®s->ctrl_regs->tmr_tevent, bit); + ptp_qoriq->write(®s->ctrl_regs->tmr_tevent, bit); } else { mask &= ~bit; } - qoriq_write(®s->ctrl_regs->tmr_temask, mask); + ptp_qoriq->write(®s->ctrl_regs->tmr_temask, mask); - spin_unlock_irqrestore(&qoriq_ptp->lock, flags); + spin_unlock_irqrestore(&ptp_qoriq->lock, flags); return 0; } +EXPORT_SYMBOL_GPL(ptp_qoriq_enable); static const struct ptp_clock_info ptp_qoriq_caps = { .owner = THIS_MODULE, @@ -354,7 +358,7 @@ static const struct ptp_clock_info ptp_qoriq_caps = { }; /** - * qoriq_ptp_nominal_freq - calculate nominal frequency according to + * ptp_qoriq_nominal_freq - calculate nominal frequency according to * reference clock frequency * * @clk_src: reference clock frequency @@ -365,7 +369,7 @@ static const struct ptp_clock_info ptp_qoriq_caps = { * * Return the nominal frequency */ -static u32 qoriq_ptp_nominal_freq(u32 clk_src) +static u32 ptp_qoriq_nominal_freq(u32 clk_src) { u32 remainder = 0; @@ -385,9 +389,9 @@ static u32 qoriq_ptp_nominal_freq(u32 clk_src) } /** - * qoriq_ptp_auto_config - calculate a set of default configurations + * ptp_qoriq_auto_config - calculate a set of default configurations * - * @qoriq_ptp: pointer to qoriq_ptp + * @ptp_qoriq: pointer to ptp_qoriq * @node: pointer to device_node * * If below dts properties are not provided, this function will be @@ -401,7 +405,7 @@ static u32 qoriq_ptp_nominal_freq(u32 clk_src) * * Return 0 if success */ -static int qoriq_ptp_auto_config(struct qoriq_ptp *qoriq_ptp, +static int ptp_qoriq_auto_config(struct ptp_qoriq *ptp_qoriq, struct device_node *node) { struct clk *clk; @@ -411,7 +415,7 @@ static int qoriq_ptp_auto_config(struct qoriq_ptp *qoriq_ptp, u32 remainder = 0; u32 clk_src = 0; - qoriq_ptp->cksel = DEFAULT_CKSEL; + ptp_qoriq->cksel = DEFAULT_CKSEL; clk = of_clk_get(node, 0); if (!IS_ERR(clk)) { @@ -424,12 +428,12 @@ static int qoriq_ptp_auto_config(struct qoriq_ptp *qoriq_ptp, return -EINVAL; } - nominal_freq = qoriq_ptp_nominal_freq(clk_src); + nominal_freq = ptp_qoriq_nominal_freq(clk_src); if (!nominal_freq) return -EINVAL; - qoriq_ptp->tclk_period = 1000000000UL / nominal_freq; - qoriq_ptp->tmr_prsc = DEFAULT_TMR_PRSC; + ptp_qoriq->tclk_period = 1000000000UL / nominal_freq; + ptp_qoriq->tmr_prsc = DEFAULT_TMR_PRSC; /* Calculate initial frequency compensation value for TMR_ADD register. * freq_comp = ceil(2^32 / freq_ratio) @@ -440,172 +444,193 @@ static int qoriq_ptp_auto_config(struct qoriq_ptp *qoriq_ptp, if (remainder) freq_comp++; - qoriq_ptp->tmr_add = freq_comp; - qoriq_ptp->tmr_fiper1 = DEFAULT_FIPER1_PERIOD - qoriq_ptp->tclk_period; - qoriq_ptp->tmr_fiper2 = DEFAULT_FIPER2_PERIOD - qoriq_ptp->tclk_period; + ptp_qoriq->tmr_add = freq_comp; + ptp_qoriq->tmr_fiper1 = DEFAULT_FIPER1_PERIOD - ptp_qoriq->tclk_period; + ptp_qoriq->tmr_fiper2 = DEFAULT_FIPER2_PERIOD - ptp_qoriq->tclk_period; /* max_adj = 1000000000 * (freq_ratio - 1.0) - 1 * freq_ratio = reference_clock_freq / nominal_freq */ max_adj = 1000000000ULL * (clk_src - nominal_freq); max_adj = div_u64(max_adj, nominal_freq) - 1; - qoriq_ptp->caps.max_adj = max_adj; + ptp_qoriq->caps.max_adj = max_adj; return 0; } -static int qoriq_ptp_probe(struct platform_device *dev) +int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base, + const struct ptp_clock_info caps) { - struct device_node *node = dev->dev.of_node; - struct qoriq_ptp *qoriq_ptp; - struct qoriq_ptp_registers *regs; + struct device_node *node = ptp_qoriq->dev->of_node; + struct ptp_qoriq_registers *regs; struct timespec64 now; - int err = -ENOMEM; - u32 tmr_ctrl; unsigned long flags; - void __iomem *base; - - qoriq_ptp = kzalloc(sizeof(*qoriq_ptp), GFP_KERNEL); - if (!qoriq_ptp) - goto no_memory; - - err = -EINVAL; + u32 tmr_ctrl; - qoriq_ptp->dev = &dev->dev; - qoriq_ptp->caps = ptp_qoriq_caps; + ptp_qoriq->base = base; + ptp_qoriq->caps = caps; - if (of_property_read_u32(node, "fsl,cksel", &qoriq_ptp->cksel)) - qoriq_ptp->cksel = DEFAULT_CKSEL; + if (of_property_read_u32(node, "fsl,cksel", &ptp_qoriq->cksel)) + ptp_qoriq->cksel = DEFAULT_CKSEL; if (of_property_read_bool(node, "fsl,extts-fifo")) - qoriq_ptp->extts_fifo_support = true; + ptp_qoriq->extts_fifo_support = true; else - qoriq_ptp->extts_fifo_support = false; + ptp_qoriq->extts_fifo_support = false; if (of_property_read_u32(node, - "fsl,tclk-period", &qoriq_ptp->tclk_period) || + "fsl,tclk-period", &ptp_qoriq->tclk_period) || of_property_read_u32(node, - "fsl,tmr-prsc", &qoriq_ptp->tmr_prsc) || + "fsl,tmr-prsc", &ptp_qoriq->tmr_prsc) || of_property_read_u32(node, - "fsl,tmr-add", &qoriq_ptp->tmr_add) || + "fsl,tmr-add", &ptp_qoriq->tmr_add) || of_property_read_u32(node, - "fsl,tmr-fiper1", &qoriq_ptp->tmr_fiper1) || + "fsl,tmr-fiper1", &ptp_qoriq->tmr_fiper1) || of_property_read_u32(node, - "fsl,tmr-fiper2", &qoriq_ptp->tmr_fiper2) || + "fsl,tmr-fiper2", &ptp_qoriq->tmr_fiper2) || of_property_read_u32(node, - "fsl,max-adj", &qoriq_ptp->caps.max_adj)) { + "fsl,max-adj", &ptp_qoriq->caps.max_adj)) { pr_warn("device tree node missing required elements, try automatic configuration\n"); - if (qoriq_ptp_auto_config(qoriq_ptp, node)) - goto no_config; + if (ptp_qoriq_auto_config(ptp_qoriq, node)) + return -ENODEV; } - err = -ENODEV; + if (of_property_read_bool(node, "little-endian")) { + ptp_qoriq->read = qoriq_read_le; + ptp_qoriq->write = qoriq_write_le; + } else { + ptp_qoriq->read = qoriq_read_be; + ptp_qoriq->write = qoriq_write_be; + } + + /* The eTSEC uses differnt memory map with DPAA/ENETC */ + if (of_device_is_compatible(node, "fsl,etsec-ptp")) { + ptp_qoriq->regs.ctrl_regs = base + ETSEC_CTRL_REGS_OFFSET; + ptp_qoriq->regs.alarm_regs = base + ETSEC_ALARM_REGS_OFFSET; + ptp_qoriq->regs.fiper_regs = base + ETSEC_FIPER_REGS_OFFSET; + ptp_qoriq->regs.etts_regs = base + ETSEC_ETTS_REGS_OFFSET; + } else { + ptp_qoriq->regs.ctrl_regs = base + CTRL_REGS_OFFSET; + ptp_qoriq->regs.alarm_regs = base + ALARM_REGS_OFFSET; + ptp_qoriq->regs.fiper_regs = base + FIPER_REGS_OFFSET; + ptp_qoriq->regs.etts_regs = base + ETTS_REGS_OFFSET; + } + + ktime_get_real_ts64(&now); + ptp_qoriq_settime(&ptp_qoriq->caps, &now); + + tmr_ctrl = + (ptp_qoriq->tclk_period & TCLK_PERIOD_MASK) << TCLK_PERIOD_SHIFT | + (ptp_qoriq->cksel & CKSEL_MASK) << CKSEL_SHIFT; + + spin_lock_init(&ptp_qoriq->lock); + spin_lock_irqsave(&ptp_qoriq->lock, flags); + + regs = &ptp_qoriq->regs; + ptp_qoriq->write(®s->ctrl_regs->tmr_ctrl, tmr_ctrl); + ptp_qoriq->write(®s->ctrl_regs->tmr_add, ptp_qoriq->tmr_add); + ptp_qoriq->write(®s->ctrl_regs->tmr_prsc, ptp_qoriq->tmr_prsc); + ptp_qoriq->write(®s->fiper_regs->tmr_fiper1, ptp_qoriq->tmr_fiper1); + ptp_qoriq->write(®s->fiper_regs->tmr_fiper2, ptp_qoriq->tmr_fiper2); + set_alarm(ptp_qoriq); + ptp_qoriq->write(®s->ctrl_regs->tmr_ctrl, + tmr_ctrl|FIPERST|RTPE|TE|FRD); + + spin_unlock_irqrestore(&ptp_qoriq->lock, flags); + + ptp_qoriq->clock = ptp_clock_register(&ptp_qoriq->caps, ptp_qoriq->dev); + if (IS_ERR(ptp_qoriq->clock)) + return PTR_ERR(ptp_qoriq->clock); + + ptp_qoriq->phc_index = ptp_clock_index(ptp_qoriq->clock); + ptp_qoriq_create_debugfs(ptp_qoriq); + return 0; +} +EXPORT_SYMBOL_GPL(ptp_qoriq_init); + +void ptp_qoriq_free(struct ptp_qoriq *ptp_qoriq) +{ + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; + + ptp_qoriq->write(®s->ctrl_regs->tmr_temask, 0); + ptp_qoriq->write(®s->ctrl_regs->tmr_ctrl, 0); + + ptp_qoriq_remove_debugfs(ptp_qoriq); + ptp_clock_unregister(ptp_qoriq->clock); + iounmap(ptp_qoriq->base); + free_irq(ptp_qoriq->irq, ptp_qoriq); +} +EXPORT_SYMBOL_GPL(ptp_qoriq_free); - qoriq_ptp->irq = platform_get_irq(dev, 0); +static int ptp_qoriq_probe(struct platform_device *dev) +{ + struct ptp_qoriq *ptp_qoriq; + int err = -ENOMEM; + void __iomem *base; + + ptp_qoriq = kzalloc(sizeof(*ptp_qoriq), GFP_KERNEL); + if (!ptp_qoriq) + goto no_memory; + + ptp_qoriq->dev = &dev->dev; - if (qoriq_ptp->irq < 0) { + err = -ENODEV; + + ptp_qoriq->irq = platform_get_irq(dev, 0); + if (ptp_qoriq->irq < 0) { pr_err("irq not in device tree\n"); goto no_node; } - if (request_irq(qoriq_ptp->irq, isr, IRQF_SHARED, DRIVER, qoriq_ptp)) { + if (request_irq(ptp_qoriq->irq, ptp_qoriq_isr, IRQF_SHARED, + DRIVER, ptp_qoriq)) { pr_err("request_irq failed\n"); goto no_node; } - qoriq_ptp->rsrc = platform_get_resource(dev, IORESOURCE_MEM, 0); - if (!qoriq_ptp->rsrc) { + ptp_qoriq->rsrc = platform_get_resource(dev, IORESOURCE_MEM, 0); + if (!ptp_qoriq->rsrc) { pr_err("no resource\n"); goto no_resource; } - if (request_resource(&iomem_resource, qoriq_ptp->rsrc)) { + if (request_resource(&iomem_resource, ptp_qoriq->rsrc)) { pr_err("resource busy\n"); goto no_resource; } - spin_lock_init(&qoriq_ptp->lock); - - base = ioremap(qoriq_ptp->rsrc->start, - resource_size(qoriq_ptp->rsrc)); + base = ioremap(ptp_qoriq->rsrc->start, + resource_size(ptp_qoriq->rsrc)); if (!base) { pr_err("ioremap ptp registers failed\n"); goto no_ioremap; } - qoriq_ptp->base = base; - - if (of_device_is_compatible(node, "fsl,fman-ptp-timer")) { - qoriq_ptp->regs.ctrl_regs = base + FMAN_CTRL_REGS_OFFSET; - qoriq_ptp->regs.alarm_regs = base + FMAN_ALARM_REGS_OFFSET; - qoriq_ptp->regs.fiper_regs = base + FMAN_FIPER_REGS_OFFSET; - qoriq_ptp->regs.etts_regs = base + FMAN_ETTS_REGS_OFFSET; - } else { - qoriq_ptp->regs.ctrl_regs = base + CTRL_REGS_OFFSET; - qoriq_ptp->regs.alarm_regs = base + ALARM_REGS_OFFSET; - qoriq_ptp->regs.fiper_regs = base + FIPER_REGS_OFFSET; - qoriq_ptp->regs.etts_regs = base + ETTS_REGS_OFFSET; - } - - ktime_get_real_ts64(&now); - ptp_qoriq_settime(&qoriq_ptp->caps, &now); - - tmr_ctrl = - (qoriq_ptp->tclk_period & TCLK_PERIOD_MASK) << TCLK_PERIOD_SHIFT | - (qoriq_ptp->cksel & CKSEL_MASK) << CKSEL_SHIFT; - - spin_lock_irqsave(&qoriq_ptp->lock, flags); - - regs = &qoriq_ptp->regs; - qoriq_write(®s->ctrl_regs->tmr_ctrl, tmr_ctrl); - qoriq_write(®s->ctrl_regs->tmr_add, qoriq_ptp->tmr_add); - qoriq_write(®s->ctrl_regs->tmr_prsc, qoriq_ptp->tmr_prsc); - qoriq_write(®s->fiper_regs->tmr_fiper1, qoriq_ptp->tmr_fiper1); - qoriq_write(®s->fiper_regs->tmr_fiper2, qoriq_ptp->tmr_fiper2); - set_alarm(qoriq_ptp); - qoriq_write(®s->ctrl_regs->tmr_ctrl, tmr_ctrl|FIPERST|RTPE|TE|FRD); - - spin_unlock_irqrestore(&qoriq_ptp->lock, flags); - - qoriq_ptp->clock = ptp_clock_register(&qoriq_ptp->caps, &dev->dev); - if (IS_ERR(qoriq_ptp->clock)) { - err = PTR_ERR(qoriq_ptp->clock); + err = ptp_qoriq_init(ptp_qoriq, base, ptp_qoriq_caps); + if (err) goto no_clock; - } - qoriq_ptp->phc_index = ptp_clock_index(qoriq_ptp->clock); - - ptp_qoriq_create_debugfs(qoriq_ptp); - platform_set_drvdata(dev, qoriq_ptp); + platform_set_drvdata(dev, ptp_qoriq); return 0; no_clock: - iounmap(qoriq_ptp->base); + iounmap(ptp_qoriq->base); no_ioremap: - release_resource(qoriq_ptp->rsrc); + release_resource(ptp_qoriq->rsrc); no_resource: - free_irq(qoriq_ptp->irq, qoriq_ptp); -no_config: + free_irq(ptp_qoriq->irq, ptp_qoriq); no_node: - kfree(qoriq_ptp); + kfree(ptp_qoriq); no_memory: return err; } -static int qoriq_ptp_remove(struct platform_device *dev) +static int ptp_qoriq_remove(struct platform_device *dev) { - struct qoriq_ptp *qoriq_ptp = platform_get_drvdata(dev); - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; - - qoriq_write(®s->ctrl_regs->tmr_temask, 0); - qoriq_write(®s->ctrl_regs->tmr_ctrl, 0); - - ptp_qoriq_remove_debugfs(qoriq_ptp); - ptp_clock_unregister(qoriq_ptp->clock); - iounmap(qoriq_ptp->base); - release_resource(qoriq_ptp->rsrc); - free_irq(qoriq_ptp->irq, qoriq_ptp); - kfree(qoriq_ptp); + struct ptp_qoriq *ptp_qoriq = platform_get_drvdata(dev); + ptp_qoriq_free(ptp_qoriq); + release_resource(ptp_qoriq->rsrc); + kfree(ptp_qoriq); return 0; } @@ -616,16 +641,16 @@ static const struct of_device_id match_table[] = { }; MODULE_DEVICE_TABLE(of, match_table); -static struct platform_driver qoriq_ptp_driver = { +static struct platform_driver ptp_qoriq_driver = { .driver = { .name = "ptp_qoriq", .of_match_table = match_table, }, - .probe = qoriq_ptp_probe, - .remove = qoriq_ptp_remove, + .probe = ptp_qoriq_probe, + .remove = ptp_qoriq_remove, }; -module_platform_driver(qoriq_ptp_driver); +module_platform_driver(ptp_qoriq_driver); MODULE_AUTHOR("Richard Cochran <richardcochran@gmail.com>"); MODULE_DESCRIPTION("PTP clock for Freescale QorIQ 1588 timer"); diff --git a/drivers/ptp/ptp_qoriq_debugfs.c b/drivers/ptp/ptp_qoriq_debugfs.c index 970595021088..e8dddcedf288 100644 --- a/drivers/ptp/ptp_qoriq_debugfs.c +++ b/drivers/ptp/ptp_qoriq_debugfs.c @@ -7,11 +7,11 @@ static int ptp_qoriq_fiper1_lpbk_get(void *data, u64 *val) { - struct qoriq_ptp *qoriq_ptp = data; - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq *ptp_qoriq = data; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u32 ctrl; - ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); + ctrl = ptp_qoriq->read(®s->ctrl_regs->tmr_ctrl); *val = ctrl & PP1L ? 1 : 0; return 0; @@ -19,17 +19,17 @@ static int ptp_qoriq_fiper1_lpbk_get(void *data, u64 *val) static int ptp_qoriq_fiper1_lpbk_set(void *data, u64 val) { - struct qoriq_ptp *qoriq_ptp = data; - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq *ptp_qoriq = data; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u32 ctrl; - ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); + ctrl = ptp_qoriq->read(®s->ctrl_regs->tmr_ctrl); if (val == 0) ctrl &= ~PP1L; else ctrl |= PP1L; - qoriq_write(®s->ctrl_regs->tmr_ctrl, ctrl); + ptp_qoriq->write(®s->ctrl_regs->tmr_ctrl, ctrl); return 0; } @@ -38,11 +38,11 @@ DEFINE_DEBUGFS_ATTRIBUTE(ptp_qoriq_fiper1_fops, ptp_qoriq_fiper1_lpbk_get, static int ptp_qoriq_fiper2_lpbk_get(void *data, u64 *val) { - struct qoriq_ptp *qoriq_ptp = data; - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq *ptp_qoriq = data; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u32 ctrl; - ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); + ctrl = ptp_qoriq->read(®s->ctrl_regs->tmr_ctrl); *val = ctrl & PP2L ? 1 : 0; return 0; @@ -50,52 +50,52 @@ static int ptp_qoriq_fiper2_lpbk_get(void *data, u64 *val) static int ptp_qoriq_fiper2_lpbk_set(void *data, u64 val) { - struct qoriq_ptp *qoriq_ptp = data; - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq *ptp_qoriq = data; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u32 ctrl; - ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); + ctrl = ptp_qoriq->read(®s->ctrl_regs->tmr_ctrl); if (val == 0) ctrl &= ~PP2L; else ctrl |= PP2L; - qoriq_write(®s->ctrl_regs->tmr_ctrl, ctrl); + ptp_qoriq->write(®s->ctrl_regs->tmr_ctrl, ctrl); return 0; } DEFINE_DEBUGFS_ATTRIBUTE(ptp_qoriq_fiper2_fops, ptp_qoriq_fiper2_lpbk_get, ptp_qoriq_fiper2_lpbk_set, "%llu\n"); -void ptp_qoriq_create_debugfs(struct qoriq_ptp *qoriq_ptp) +void ptp_qoriq_create_debugfs(struct ptp_qoriq *ptp_qoriq) { struct dentry *root; - root = debugfs_create_dir(dev_name(qoriq_ptp->dev), NULL); + root = debugfs_create_dir(dev_name(ptp_qoriq->dev), NULL); if (IS_ERR(root)) return; if (!root) goto err_root; - qoriq_ptp->debugfs_root = root; + ptp_qoriq->debugfs_root = root; if (!debugfs_create_file_unsafe("fiper1-loopback", 0600, root, - qoriq_ptp, &ptp_qoriq_fiper1_fops)) + ptp_qoriq, &ptp_qoriq_fiper1_fops)) goto err_node; if (!debugfs_create_file_unsafe("fiper2-loopback", 0600, root, - qoriq_ptp, &ptp_qoriq_fiper2_fops)) + ptp_qoriq, &ptp_qoriq_fiper2_fops)) goto err_node; return; err_node: debugfs_remove_recursive(root); - qoriq_ptp->debugfs_root = NULL; + ptp_qoriq->debugfs_root = NULL; err_root: - dev_err(qoriq_ptp->dev, "failed to initialize debugfs\n"); + dev_err(ptp_qoriq->dev, "failed to initialize debugfs\n"); } -void ptp_qoriq_remove_debugfs(struct qoriq_ptp *qoriq_ptp) +void ptp_qoriq_remove_debugfs(struct ptp_qoriq *ptp_qoriq) { - debugfs_remove_recursive(qoriq_ptp->debugfs_root); - qoriq_ptp->debugfs_root = NULL; + debugfs_remove_recursive(ptp_qoriq->debugfs_root); + ptp_qoriq->debugfs_root = NULL; } diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 4e7b55a14b1a..6e294b4d3635 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -4469,6 +4469,14 @@ static int dasd_symm_io(struct dasd_device *device, void __user *argp) usrparm.psf_data &= 0x7fffffffULL; usrparm.rssd_result &= 0x7fffffffULL; } + /* at least 2 bytes are accessed and should be allocated */ + if (usrparm.psf_data_len < 2) { + DBF_DEV_EVENT(DBF_WARNING, device, + "Symmetrix ioctl invalid data length %d", + usrparm.psf_data_len); + rc = -EINVAL; + goto out; + } /* alloc I/O data area */ psf_data = kzalloc(usrparm.psf_data_len, GFP_KERNEL | GFP_DMA); rssd_result = kzalloc(usrparm.rssd_result_len, GFP_KERNEL | GFP_DMA); diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 48ea0004a56d..5a699746c357 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -248,7 +248,8 @@ static inline int ap_test_config(unsigned int *field, unsigned int nr) static inline int ap_test_config_card_id(unsigned int id) { if (!ap_configuration) /* QCI not supported */ - return 1; + /* only ids 0...3F may be probed */ + return id < 0x40 ? 1 : 0; return ap_test_config(ap_configuration->apm, id); } diff --git a/drivers/s390/net/Makefile b/drivers/s390/net/Makefile index f2d6bbe57a6f..bc55ec316adb 100644 --- a/drivers/s390/net/Makefile +++ b/drivers/s390/net/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_NETIUCV) += netiucv.o fsm.o obj-$(CONFIG_SMSGIUCV) += smsgiucv.o obj-$(CONFIG_SMSGIUCV_EVENT) += smsgiucv_app.o obj-$(CONFIG_LCS) += lcs.o -qeth-y += qeth_core_sys.o qeth_core_main.o qeth_core_mpc.o +qeth-y += qeth_core_sys.o qeth_core_main.o qeth_core_mpc.o qeth_ethtool.o obj-$(CONFIG_QETH) += qeth.o qeth_l2-y += qeth_l2_main.o qeth_l2_sys.o obj-$(CONFIG_QETH_L2) += qeth_l2.o diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index d65650ef6b41..c0c46be0b251 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -18,10 +18,10 @@ #include <linux/in6.h> #include <linux/bitops.h> #include <linux/seq_file.h> -#include <linux/ethtool.h> #include <linux/hashtable.h> #include <linux/ip.h> #include <linux/refcount.h> +#include <linux/workqueue.h> #include <net/ipv6.h> #include <net/if_inet6.h> @@ -34,6 +34,8 @@ #include <asm/ccwgroup.h> #include <asm/sysinfo.h> +#include <uapi/linux/if_link.h> + #include "qeth_core_mpc.h" /** @@ -112,59 +114,6 @@ static inline u32 qeth_get_device_id(struct ccw_device *cdev) #define CCW_DEVID(cdev) (qeth_get_device_id(cdev)) #define CARD_DEVID(card) (CCW_DEVID(CARD_RDEV(card))) -/** - * card stuff - */ -struct qeth_perf_stats { - unsigned int bufs_rec; - unsigned int bufs_sent; - unsigned int buf_elements_sent; - - unsigned int skbs_sent_pack; - unsigned int bufs_sent_pack; - - unsigned int sc_dp_p; - unsigned int sc_p_dp; - /* qdio_cq_handler: number of times called, time spent in */ - __u64 cq_start_time; - unsigned int cq_cnt; - unsigned int cq_time; - /* qdio_input_handler: number of times called, time spent in */ - __u64 inbound_start_time; - unsigned int inbound_cnt; - unsigned int inbound_time; - /* qeth_send_packet: number of times called, time spent in */ - __u64 outbound_start_time; - unsigned int outbound_cnt; - unsigned int outbound_time; - /* qdio_output_handler: number of times called, time spent in */ - __u64 outbound_handler_start_time; - unsigned int outbound_handler_cnt; - unsigned int outbound_handler_time; - /* number of calls to and time spent in do_QDIO for inbound queue */ - __u64 inbound_do_qdio_start_time; - unsigned int inbound_do_qdio_cnt; - unsigned int inbound_do_qdio_time; - /* number of calls to and time spent in do_QDIO for outbound queues */ - __u64 outbound_do_qdio_start_time; - unsigned int outbound_do_qdio_cnt; - unsigned int outbound_do_qdio_time; - unsigned int large_send_bytes; - unsigned int large_send_cnt; - unsigned int sg_skbs_sent; - /* initial values when measuring starts */ - unsigned long initial_rx_packets; - unsigned long initial_tx_packets; - /* inbound scatter gather data */ - unsigned int sg_skbs_rx; - unsigned int sg_frags_rx; - unsigned int sg_alloc_page_rx; - unsigned int tx_csum; - unsigned int tx_lin; - unsigned int tx_linfail; - unsigned int rx_csum; -}; - /* Routing stuff */ struct qeth_routing_info { enum qeth_routing_types type; @@ -494,10 +443,54 @@ enum qeth_out_q_states { QETH_OUT_Q_LOCKED_FLUSH, }; +#define QETH_CARD_STAT_ADD(_c, _stat, _val) ((_c)->stats._stat += (_val)) +#define QETH_CARD_STAT_INC(_c, _stat) QETH_CARD_STAT_ADD(_c, _stat, 1) + +#define QETH_TXQ_STAT_ADD(_q, _stat, _val) ((_q)->stats._stat += (_val)) +#define QETH_TXQ_STAT_INC(_q, _stat) QETH_TXQ_STAT_ADD(_q, _stat, 1) + +struct qeth_card_stats { + u64 rx_bufs; + u64 rx_skb_csum; + u64 rx_sg_skbs; + u64 rx_sg_frags; + u64 rx_sg_alloc_page; + + /* rtnl_link_stats64 */ + u64 rx_packets; + u64 rx_bytes; + u64 rx_errors; + u64 rx_dropped; + u64 rx_multicast; + u64 tx_errors; +}; + +struct qeth_out_q_stats { + u64 bufs; + u64 bufs_pack; + u64 buf_elements; + u64 skbs_pack; + u64 skbs_sg; + u64 skbs_csum; + u64 skbs_tso; + u64 skbs_linearized; + u64 skbs_linearized_fail; + u64 tso_bytes; + u64 packing_mode_switch; + + /* rtnl_link_stats64 */ + u64 tx_packets; + u64 tx_bytes; + u64 tx_errors; + u64 tx_dropped; + u64 tx_carrier_errors; +}; + struct qeth_qdio_out_q { struct qdio_buffer *qdio_bufs[QDIO_MAX_BUFFERS_PER_Q]; struct qeth_qdio_out_buffer *bufs[QDIO_MAX_BUFFERS_PER_Q]; struct qdio_outbuf_state *bufstates; /* convenience pointer */ + struct qeth_out_q_stats stats; int queue_no; struct qeth_card *card; atomic_t state; @@ -527,7 +520,7 @@ struct qeth_qdio_info { /* output */ int no_out_queues; - struct qeth_qdio_out_q **out_qs; + struct qeth_qdio_out_q *out_qs[QETH_MAX_QUEUES]; struct qdio_outbuf_state *out_bufstates; /* priority queueing */ @@ -594,8 +587,8 @@ struct qeth_channel; struct qeth_cmd_buffer { enum qeth_cmd_buffer_state state; struct qeth_channel *channel; + struct qeth_reply *reply; unsigned char *data; - int rc; void (*callback)(struct qeth_card *card, struct qeth_channel *channel, struct qeth_cmd_buffer *iob); }; @@ -701,7 +694,6 @@ struct qeth_card_options { struct qeth_vnicc_info vnicc; /* VNICC options */ int fake_broadcast; enum qeth_discipline_id layer; - int performance_stats; int rx_sg_cb; enum qeth_ipa_isolation_modes isolation; enum qeth_ipa_isolation_modes prev_isolation; @@ -777,13 +769,13 @@ struct qeth_card { struct qeth_channel data; struct net_device *dev; - struct net_device_stats stats; - + struct qeth_card_stats stats; struct qeth_card_info info; struct qeth_token token; struct qeth_seqno seqno; struct qeth_card_options options; + struct workqueue_struct *event_wq; wait_queue_head_t wait_q; spinlock_t mclock; unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; @@ -800,7 +792,6 @@ struct qeth_card { struct list_head cmd_waiter_list; /* QDIO buffer handling */ struct qeth_qdio_info qdio; - struct qeth_perf_stats perf_stats; int read_or_write_problem; struct qeth_osn_info osn_info; struct qeth_discipline *discipline; @@ -857,11 +848,6 @@ static inline int qeth_get_elements_for_range(addr_t start, addr_t end) return PFN_UP(end) - PFN_DOWN(start); } -static inline int qeth_get_micros(void) -{ - return (int) (get_tod_clock() >> 12); -} - static inline int qeth_get_ip_version(struct sk_buff *skb) { struct vlan_ethhdr *veth = vlan_eth_hdr(skb); @@ -886,8 +872,7 @@ static inline void qeth_rx_csum(struct qeth_card *card, struct sk_buff *skb, if ((card->dev->features & NETIF_F_RXCSUM) && (flags & QETH_HDR_EXT_CSUM_TRANSP_REQ)) { skb->ip_summed = CHECKSUM_UNNECESSARY; - if (card->options.performance_stats) - card->perf_stats.rx_csum++; + QETH_CARD_STAT_INC(card, rx_skb_csum); } else { skb->ip_summed = CHECKSUM_NONE; } @@ -949,12 +934,13 @@ static inline struct qeth_qdio_out_q *qeth_get_tx_queue(struct qeth_card *card, extern struct qeth_discipline qeth_l2_discipline; extern struct qeth_discipline qeth_l3_discipline; +extern const struct ethtool_ops qeth_ethtool_ops; +extern const struct ethtool_ops qeth_osn_ethtool_ops; extern const struct attribute_group *qeth_generic_attr_groups[]; extern const struct attribute_group *qeth_osn_attr_groups[]; extern const struct attribute_group qeth_device_attr_group; extern const struct attribute_group qeth_device_blkt_group; extern const struct device_type qeth_generic_devtype; -extern struct workqueue_struct *qeth_wq; int qeth_card_hw_is_reachable(struct qeth_card *); const char *qeth_get_cardname_short(struct qeth_card *); @@ -993,33 +979,25 @@ void qeth_clear_working_pool_list(struct qeth_card *); void qeth_clear_cmd_buffers(struct qeth_channel *); void qeth_clear_qdio_buffers(struct qeth_card *); void qeth_setadp_promisc_mode(struct qeth_card *); -struct net_device_stats *qeth_get_stats(struct net_device *); int qeth_setadpparms_change_macaddr(struct qeth_card *); void qeth_tx_timeout(struct net_device *); void qeth_prepare_control_data(struct qeth_card *, int, struct qeth_cmd_buffer *); void qeth_release_buffer(struct qeth_channel *, struct qeth_cmd_buffer *); -void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob); +void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob, + u16 cmd_length); struct qeth_cmd_buffer *qeth_wait_for_buffer(struct qeth_channel *); int qeth_query_switch_attributes(struct qeth_card *card, struct qeth_switch_info *sw_info); -int qeth_send_control_data(struct qeth_card *, int, struct qeth_cmd_buffer *, - int (*reply_cb)(struct qeth_card *, struct qeth_reply*, unsigned long), - void *reply_param); +int qeth_query_card_info(struct qeth_card *card, + struct carrier_info *carrier_info); unsigned int qeth_count_elements(struct sk_buff *skb, unsigned int data_offset); int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue, struct sk_buff *skb, struct qeth_hdr *hdr, unsigned int offset, unsigned int hd_len, int elements_needed); int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); -int qeth_core_get_sset_count(struct net_device *, int); -void qeth_core_get_ethtool_stats(struct net_device *, - struct ethtool_stats *, u64 *); -void qeth_core_get_strings(struct net_device *, u32, u8 *); -void qeth_core_get_drvinfo(struct net_device *, struct ethtool_drvinfo *); void qeth_dbf_longtext(debug_info_t *id, int level, char *text, ...); -int qeth_core_ethtool_get_link_ksettings(struct net_device *netdev, - struct ethtool_link_ksettings *cmd); int qeth_set_access_ctrl_online(struct qeth_card *card, int fallback); int qeth_configure_cq(struct qeth_card *, enum qeth_cq); int qeth_hw_trap(struct qeth_card *, enum qeth_diags_trap_action); @@ -1036,14 +1014,16 @@ netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t); netdev_features_t qeth_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features); +void qeth_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats); int qeth_open(struct net_device *dev); int qeth_stop(struct net_device *dev); int qeth_vm_request_mac(struct qeth_card *card); int qeth_xmit(struct qeth_card *card, struct sk_buff *skb, struct qeth_qdio_out_q *queue, int ipv, int cast_type, - void (*fill_header)(struct qeth_card *card, struct qeth_hdr *hdr, - struct sk_buff *skb, int ipv, int cast_type, + void (*fill_header)(struct qeth_qdio_out_q *queue, + struct qeth_hdr *hdr, struct sk_buff *skb, + int ipv, int cast_type, unsigned int data_len)); /* exports for OSN */ diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index dcc06e48b70b..4708df39f129 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -74,8 +74,7 @@ static void qeth_notify_skbs(struct qeth_qdio_out_q *queue, static void qeth_release_skbs(struct qeth_qdio_out_buffer *buf); static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *, int); -struct workqueue_struct *qeth_wq; -EXPORT_SYMBOL_GPL(qeth_wq); +static struct workqueue_struct *qeth_wq; int qeth_card_hw_is_reachable(struct qeth_card *card) { @@ -540,6 +539,7 @@ static int __qeth_issue_next_read(struct qeth_card *card) QETH_DBF_MESSAGE(2, "error %i on device %x when starting next read ccw!\n", rc, CARD_DEVID(card)); atomic_set(&channel->irq_pending, 0); + qeth_release_buffer(channel, iob); card->read_or_write_problem = 1; qeth_schedule_recovery(card); wake_up(&card->wait_q); @@ -566,6 +566,7 @@ static struct qeth_reply *qeth_alloc_reply(struct qeth_card *card) if (reply) { refcount_set(&reply->refcnt, 1); atomic_set(&reply->received, 0); + init_waitqueue_head(&reply->wait_q); } return reply; } @@ -581,6 +582,26 @@ static void qeth_put_reply(struct qeth_reply *reply) kfree(reply); } +static void qeth_enqueue_reply(struct qeth_card *card, struct qeth_reply *reply) +{ + spin_lock_irq(&card->lock); + list_add_tail(&reply->list, &card->cmd_waiter_list); + spin_unlock_irq(&card->lock); +} + +static void qeth_dequeue_reply(struct qeth_card *card, struct qeth_reply *reply) +{ + spin_lock_irq(&card->lock); + list_del(&reply->list); + spin_unlock_irq(&card->lock); +} + +static void qeth_notify_reply(struct qeth_reply *reply) +{ + atomic_inc(&reply->received); + wake_up(&reply->wait_q); +} + static void qeth_issue_ipa_msg(struct qeth_ipa_cmd *cmd, int rc, struct qeth_card *card) { @@ -657,19 +678,15 @@ static struct qeth_ipa_cmd *qeth_check_ipa_data(struct qeth_card *card, void qeth_clear_ipacmd_list(struct qeth_card *card) { - struct qeth_reply *reply, *r; + struct qeth_reply *reply; unsigned long flags; QETH_CARD_TEXT(card, 4, "clipalst"); spin_lock_irqsave(&card->lock, flags); - list_for_each_entry_safe(reply, r, &card->cmd_waiter_list, list) { - qeth_get_reply(reply); + list_for_each_entry(reply, &card->cmd_waiter_list, list) { reply->rc = -EIO; - atomic_inc(&reply->received); - list_del_init(&reply->list); - wake_up(&reply->wait_q); - qeth_put_reply(reply); + qeth_notify_reply(reply); } spin_unlock_irqrestore(&card->lock, flags); } @@ -726,7 +743,10 @@ void qeth_release_buffer(struct qeth_channel *channel, spin_lock_irqsave(&channel->iob_lock, flags); iob->state = BUF_STATE_FREE; iob->callback = qeth_send_control_data_cb; - iob->rc = 0; + if (iob->reply) { + qeth_put_reply(iob->reply); + iob->reply = NULL; + } spin_unlock_irqrestore(&channel->iob_lock, flags); wake_up(&channel->wait_q); } @@ -739,6 +759,17 @@ static void qeth_release_buffer_cb(struct qeth_card *card, qeth_release_buffer(channel, iob); } +static void qeth_cancel_cmd(struct qeth_cmd_buffer *iob, int rc) +{ + struct qeth_reply *reply = iob->reply; + + if (reply) { + reply->rc = rc; + qeth_notify_reply(reply); + } + qeth_release_buffer(iob->channel, iob); +} + static struct qeth_cmd_buffer *qeth_get_buffer(struct qeth_channel *channel) { struct qeth_cmd_buffer *buffer = NULL; @@ -774,9 +805,9 @@ static void qeth_send_control_data_cb(struct qeth_card *card, struct qeth_cmd_buffer *iob) { struct qeth_ipa_cmd *cmd = NULL; - struct qeth_reply *reply, *r; + struct qeth_reply *reply = NULL; + struct qeth_reply *r; unsigned long flags; - int keep_reply; int rc = 0; QETH_CARD_TEXT(card, 4, "sndctlcb"); @@ -808,44 +839,40 @@ static void qeth_send_control_data_cb(struct qeth_card *card, goto out; } + /* match against pending cmd requests */ spin_lock_irqsave(&card->lock, flags); - list_for_each_entry_safe(reply, r, &card->cmd_waiter_list, list) { - if ((reply->seqno == QETH_IDX_COMMAND_SEQNO) || - ((cmd) && (reply->seqno == cmd->hdr.seqno))) { + list_for_each_entry(r, &card->cmd_waiter_list, list) { + if ((r->seqno == QETH_IDX_COMMAND_SEQNO) || + (cmd && (r->seqno == cmd->hdr.seqno))) { + reply = r; + /* take the object outside the lock */ qeth_get_reply(reply); - list_del_init(&reply->list); - spin_unlock_irqrestore(&card->lock, flags); - keep_reply = 0; - if (reply->callback != NULL) { - if (cmd) { - reply->offset = (__u16)((char *)cmd - - (char *)iob->data); - keep_reply = reply->callback(card, - reply, - (unsigned long)cmd); - } else - keep_reply = reply->callback(card, - reply, - (unsigned long)iob); - } - if (cmd) - reply->rc = (u16) cmd->hdr.return_code; - else if (iob->rc) - reply->rc = iob->rc; - if (keep_reply) { - spin_lock_irqsave(&card->lock, flags); - list_add_tail(&reply->list, - &card->cmd_waiter_list); - spin_unlock_irqrestore(&card->lock, flags); - } else { - atomic_inc(&reply->received); - wake_up(&reply->wait_q); - } - qeth_put_reply(reply); - goto out; + break; } } spin_unlock_irqrestore(&card->lock, flags); + + if (!reply) + goto out; + + if (!reply->callback) { + rc = 0; + } else { + if (cmd) { + reply->offset = (u16)((char *)cmd - (char *)iob->data); + rc = reply->callback(card, reply, (unsigned long)cmd); + } else { + rc = reply->callback(card, reply, (unsigned long)iob); + } + } + + if (rc <= 0) { + reply->rc = rc; + qeth_notify_reply(reply); + } + + qeth_put_reply(reply); + out: memcpy(&card->seqno.pdu_hdr_ack, QETH_PDU_HEADER_SEQ_NO(iob->data), @@ -976,9 +1003,8 @@ static int qeth_get_problem(struct qeth_card *card, struct ccw_device *cdev, return 0; } -static long qeth_check_irb_error(struct qeth_card *card, - struct ccw_device *cdev, unsigned long intparm, - struct irb *irb) +static int qeth_check_irb_error(struct qeth_card *card, struct ccw_device *cdev, + unsigned long intparm, struct irb *irb) { if (!IS_ERR(irb)) return 0; @@ -989,7 +1015,7 @@ static long qeth_check_irb_error(struct qeth_card *card, CCW_DEVID(cdev)); QETH_CARD_TEXT(card, 2, "ckirberr"); QETH_CARD_TEXT_(card, 2, " rc%d", -EIO); - break; + return -EIO; case -ETIMEDOUT: dev_warn(&cdev->dev, "A hardware operation timed out" " on the device\n"); @@ -1001,14 +1027,14 @@ static long qeth_check_irb_error(struct qeth_card *card, wake_up(&card->wait_q); } } - break; + return -ETIMEDOUT; default: QETH_DBF_MESSAGE(2, "unknown error %ld on channel %x\n", PTR_ERR(irb), CCW_DEVID(cdev)); QETH_CARD_TEXT(card, 2, "ckirberr"); QETH_CARD_TEXT(card, 2, " rc???"); + return PTR_ERR(irb); } - return PTR_ERR(irb); } static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, @@ -1043,10 +1069,11 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, if (qeth_intparm_is_iob(intparm)) iob = (struct qeth_cmd_buffer *) __va((addr_t)intparm); - if (qeth_check_irb_error(card, cdev, intparm, irb)) { + rc = qeth_check_irb_error(card, cdev, intparm, irb); + if (rc) { /* IO was terminated, free its resources. */ if (iob) - qeth_release_buffer(iob->channel, iob); + qeth_cancel_cmd(iob, rc); atomic_set(&channel->irq_pending, 0); wake_up(&card->wait_q); return; @@ -1101,6 +1128,8 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, rc = qeth_get_problem(card, cdev, irb); if (rc) { card->read_or_write_problem = 1; + if (iob) + qeth_cancel_cmd(iob, rc); qeth_clear_ipacmd_list(card); qeth_schedule_recovery(card); goto out; @@ -1262,7 +1291,6 @@ static int qeth_setup_channel(struct qeth_channel *channel, bool alloc_buffers) channel->iob[cnt].state = BUF_STATE_FREE; channel->iob[cnt].channel = channel; channel->iob[cnt].callback = qeth_send_control_data_cb; - channel->iob[cnt].rc = 0; } if (cnt < QETH_CMD_BUFFER_NO) { qeth_clean_channel(channel); @@ -1439,6 +1467,10 @@ static struct qeth_card *qeth_alloc_card(struct ccwgroup_device *gdev) CARD_RDEV(card) = gdev->cdev[0]; CARD_WDEV(card) = gdev->cdev[1]; CARD_DDEV(card) = gdev->cdev[2]; + + card->event_wq = alloc_ordered_workqueue("%s", 0, dev_name(&gdev->dev)); + if (!card->event_wq) + goto out_wq; if (qeth_setup_channel(&card->read, true)) goto out_ip; if (qeth_setup_channel(&card->write, true)) @@ -1454,6 +1486,8 @@ out_data: out_channel: qeth_clean_channel(&card->read); out_ip: + destroy_workqueue(card->event_wq); +out_wq: dev_set_drvdata(&gdev->dev, NULL); kfree(card); out: @@ -1782,6 +1816,7 @@ static int qeth_idx_activate_get_answer(struct qeth_card *card, QETH_DBF_MESSAGE(2, "Error2 in activating channel rc=%d\n", rc); QETH_DBF_TEXT_(SETUP, 2, "2err%d", rc); atomic_set(&channel->irq_pending, 0); + qeth_release_buffer(channel, iob); wake_up(&card->wait_q); return rc; } @@ -1851,6 +1886,7 @@ static int qeth_idx_activate_channel(struct qeth_card *card, rc); QETH_DBF_TEXT_(SETUP, 2, "1err%d", rc); atomic_set(&channel->irq_pending, 0); + qeth_release_buffer(channel, iob); wake_up(&card->wait_q); return rc; } @@ -1981,7 +2017,7 @@ void qeth_prepare_control_data(struct qeth_card *card, int len, card->seqno.pdu_hdr++; memcpy(QETH_PDU_HEADER_ACK_SEQ_NO(iob->data), &card->seqno.pdu_hdr_ack, QETH_SEQ_NO_LENGTH); - QETH_DBF_HEX(CTRL, 2, iob->data, QETH_DBF_CTRL_LEN); + QETH_DBF_HEX(CTRL, 2, iob->data, min(len, QETH_DBF_CTRL_LEN)); } EXPORT_SYMBOL_GPL(qeth_prepare_control_data); @@ -1998,24 +2034,22 @@ EXPORT_SYMBOL_GPL(qeth_prepare_control_data); * for the IPA commands. * @reply_param: private pointer passed to the callback * - * Returns the value of the `return_code' field of the response - * block returned from the hardware, or other error indication. - * Value of zero indicates successful execution of the command. - * * Callback function gets called one or more times, with cb_cmd * pointing to the response returned by the hardware. Callback - * function must return non-zero if more reply blocks are expected, - * and zero if the last or only reply block is received. Callback - * function can get the value of the reply_param pointer from the + * function must return + * > 0 if more reply blocks are expected, + * 0 if the last or only reply block is received, and + * < 0 on error. + * Callback function can get the value of the reply_param pointer from the * field 'param' of the structure qeth_reply. */ -int qeth_send_control_data(struct qeth_card *card, int len, - struct qeth_cmd_buffer *iob, - int (*reply_cb)(struct qeth_card *cb_card, - struct qeth_reply *cb_reply, - unsigned long cb_cmd), - void *reply_param) +static int qeth_send_control_data(struct qeth_card *card, int len, + struct qeth_cmd_buffer *iob, + int (*reply_cb)(struct qeth_card *cb_card, + struct qeth_reply *cb_reply, + unsigned long cb_cmd), + void *reply_param) { struct qeth_channel *channel = iob->channel; int rc; @@ -2031,12 +2065,15 @@ int qeth_send_control_data(struct qeth_card *card, int len, } reply = qeth_alloc_reply(card); if (!reply) { + qeth_release_buffer(channel, iob); return -ENOMEM; } reply->callback = reply_cb; reply->param = reply_param; - init_waitqueue_head(&reply->wait_q); + /* pairs with qeth_release_buffer(): */ + qeth_get_reply(reply); + iob->reply = reply; while (atomic_cmpxchg(&channel->irq_pending, 0, 1)) ; @@ -2051,9 +2088,7 @@ int qeth_send_control_data(struct qeth_card *card, int len, } qeth_prepare_control_data(card, len, iob); - spin_lock_irq(&card->lock); - list_add_tail(&reply->list, &card->cmd_waiter_list); - spin_unlock_irq(&card->lock); + qeth_enqueue_reply(card, reply); timeout = jiffies + event_timeout; @@ -2066,10 +2101,8 @@ int qeth_send_control_data(struct qeth_card *card, int len, QETH_DBF_MESSAGE(2, "qeth_send_control_data on device %x: ccw_device_start rc = %i\n", CARD_DEVID(card), rc); QETH_CARD_TEXT_(card, 2, " err%d", rc); - spin_lock_irq(&card->lock); - list_del_init(&reply->list); + qeth_dequeue_reply(card, reply); qeth_put_reply(reply); - spin_unlock_irq(&card->lock); qeth_release_buffer(channel, iob); atomic_set(&channel->irq_pending, 0); wake_up(&card->wait_q); @@ -2091,21 +2124,16 @@ int qeth_send_control_data(struct qeth_card *card, int len, } } + qeth_dequeue_reply(card, reply); rc = reply->rc; qeth_put_reply(reply); return rc; time_err: - reply->rc = -ETIME; - spin_lock_irq(&card->lock); - list_del_init(&reply->list); - spin_unlock_irq(&card->lock); - atomic_inc(&reply->received); - rc = reply->rc; + qeth_dequeue_reply(card, reply); qeth_put_reply(reply); - return rc; + return -ETIME; } -EXPORT_SYMBOL_GPL(qeth_send_control_data); static int qeth_cm_enable_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) @@ -2310,9 +2338,8 @@ static int qeth_ulp_setup_cb(struct qeth_card *card, struct qeth_reply *reply, QETH_DBF_TEXT(SETUP, 2, "olmlimit"); dev_err(&card->gdev->dev, "A connection could not be " "established because of an OLM limit\n"); - iob->rc = -EMLINK; + return -EMLINK; } - QETH_DBF_TEXT_(SETUP, 2, " rc%d", iob->rc); return 0; } @@ -2362,11 +2389,12 @@ static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *q, int bidx) return 0; } -static void qeth_free_qdio_out_buf(struct qeth_qdio_out_q *q) +static void qeth_free_output_queue(struct qeth_qdio_out_q *q) { if (!q) return; + qeth_clear_outq_buffers(q, 1); qdio_free_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q); kfree(q); } @@ -2405,12 +2433,6 @@ static int qeth_alloc_qdio_buffers(struct qeth_card *card) goto out_freeinq; /* outbound */ - card->qdio.out_qs = - kcalloc(card->qdio.no_out_queues, - sizeof(struct qeth_qdio_out_q *), - GFP_KERNEL); - if (!card->qdio.out_qs) - goto out_freepool; for (i = 0; i < card->qdio.no_out_queues; ++i) { card->qdio.out_qs[i] = qeth_alloc_qdio_out_buf(); if (!card->qdio.out_qs[i]) @@ -2441,12 +2463,9 @@ out_freeoutqbufs: } out_freeoutq: while (i > 0) { - qeth_free_qdio_out_buf(card->qdio.out_qs[--i]); - qeth_clear_outq_buffers(card->qdio.out_qs[i], 1); + qeth_free_output_queue(card->qdio.out_qs[--i]); + card->qdio.out_qs[i] = NULL; } - kfree(card->qdio.out_qs); - card->qdio.out_qs = NULL; -out_freepool: qeth_free_buffer_pool(card); out_freeinq: qeth_free_qdio_queue(card->qdio.in_q); @@ -2475,13 +2494,9 @@ static void qeth_free_qdio_buffers(struct qeth_card *card) /* inbound buffer pool */ qeth_free_buffer_pool(card); /* free outbound qdio_qs */ - if (card->qdio.out_qs) { - for (i = 0; i < card->qdio.no_out_queues; ++i) { - qeth_clear_outq_buffers(card->qdio.out_qs[i], 1); - qeth_free_qdio_out_buf(card->qdio.out_qs[i]); - } - kfree(card->qdio.out_qs); - card->qdio.out_qs = NULL; + for (i = 0; i < card->qdio.no_out_queues; i++) { + qeth_free_output_queue(card->qdio.out_qs[i]); + card->qdio.out_qs[i] = NULL; } } @@ -2688,8 +2703,7 @@ static struct qeth_buffer_pool_entry *qeth_find_free_buffer_pool_entry( } else { free_page((unsigned long)entry->elements[i]); entry->elements[i] = page_address(page); - if (card->options.performance_stats) - card->perf_stats.sg_alloc_page_rx++; + QETH_CARD_STAT_INC(card, rx_sg_alloc_page); } } } @@ -2808,14 +2822,20 @@ static void qeth_fill_ipacmd_header(struct qeth_card *card, cmd->hdr.prot_version = prot; } -void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob) +void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob, + u16 cmd_length) { + u16 total_length = IPA_PDU_HEADER_SIZE + cmd_length; u8 prot_type = qeth_mpc_select_prot_type(card); memcpy(iob->data, IPA_PDU_HEADER, IPA_PDU_HEADER_SIZE); + memcpy(QETH_IPA_PDU_LEN_TOTAL(iob->data), &total_length, 2); memcpy(QETH_IPA_CMD_PROT_TYPE(iob->data), &prot_type, 1); + memcpy(QETH_IPA_PDU_LEN_PDU1(iob->data), &cmd_length, 2); + memcpy(QETH_IPA_PDU_LEN_PDU2(iob->data), &cmd_length, 2); memcpy(QETH_IPA_CMD_DEST_ADDR(iob->data), &card->token.ulp_connection_r, QETH_MPC_TOKEN_LENGTH); + memcpy(QETH_IPA_PDU_LEN_PDU3(iob->data), &cmd_length, 2); } EXPORT_SYMBOL_GPL(qeth_prepare_ipa_cmd); @@ -2826,7 +2846,7 @@ struct qeth_cmd_buffer *qeth_get_ipacmd_buffer(struct qeth_card *card, iob = qeth_get_buffer(&card->write); if (iob) { - qeth_prepare_ipa_cmd(card, iob); + qeth_prepare_ipa_cmd(card, iob, sizeof(struct qeth_ipa_cmd)); qeth_fill_ipacmd_header(card, __ipa_cmd(iob), ipacmd, prot); } else { dev_warn(&card->gdev->dev, @@ -2839,6 +2859,14 @@ struct qeth_cmd_buffer *qeth_get_ipacmd_buffer(struct qeth_card *card, } EXPORT_SYMBOL_GPL(qeth_get_ipacmd_buffer); +static int qeth_send_ipa_cmd_cb(struct qeth_card *card, + struct qeth_reply *reply, unsigned long data) +{ + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; + + return (cmd->hdr.return_code) ? -EIO : 0; +} + /** * qeth_send_ipa_cmd() - send an IPA command * @@ -2850,11 +2878,15 @@ int qeth_send_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob, unsigned long), void *reply_param) { + u16 length; int rc; QETH_CARD_TEXT(card, 4, "sendipa"); - rc = qeth_send_control_data(card, IPA_CMD_LENGTH, - iob, reply_cb, reply_param); + + if (reply_cb == NULL) + reply_cb = qeth_send_ipa_cmd_cb; + memcpy(&length, QETH_IPA_PDU_LEN_TOTAL(iob->data), 2); + rc = qeth_send_control_data(card, length, iob, reply_cb, reply_param); if (rc == -ETIME) { qeth_clear_ipacmd_list(card); qeth_schedule_recovery(card); @@ -2863,9 +2895,19 @@ int qeth_send_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob, } EXPORT_SYMBOL_GPL(qeth_send_ipa_cmd); +static int qeth_send_startlan_cb(struct qeth_card *card, + struct qeth_reply *reply, unsigned long data) +{ + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; + + if (cmd->hdr.return_code == IPA_RC_LAN_OFFLINE) + return -ENETDOWN; + + return (cmd->hdr.return_code) ? -EIO : 0; +} + static int qeth_send_startlan(struct qeth_card *card) { - int rc; struct qeth_cmd_buffer *iob; QETH_DBF_TEXT(SETUP, 2, "strtlan"); @@ -2873,8 +2915,7 @@ static int qeth_send_startlan(struct qeth_card *card) iob = qeth_get_ipacmd_buffer(card, IPA_CMD_STARTLAN, 0); if (!iob) return -ENOMEM; - rc = qeth_send_ipa_cmd(card, iob, NULL, NULL); - return rc; + return qeth_send_ipa_cmd(card, iob, qeth_send_startlan_cb, NULL); } static int qeth_setadpparms_inspect_rc(struct qeth_ipa_cmd *cmd) @@ -2892,7 +2933,7 @@ static int qeth_query_setadapterparms_cb(struct qeth_card *card, QETH_CARD_TEXT(card, 3, "quyadpcb"); if (qeth_setadpparms_inspect_rc(cmd)) - return 0; + return -EIO; if (cmd->data.setadapterparms.data.query_cmds_supp.lan_type & 0x7f) { card->info.link_type = @@ -2947,19 +2988,18 @@ static int qeth_query_ipassists_cb(struct qeth_card *card, cmd = (struct qeth_ipa_cmd *) data; switch (cmd->hdr.return_code) { + case IPA_RC_SUCCESS: + break; case IPA_RC_NOTSUPP: case IPA_RC_L2_UNSUPPORTED_CMD: QETH_DBF_TEXT(SETUP, 2, "ipaunsup"); card->options.ipa4.supported_funcs |= IPA_SETADAPTERPARMS; card->options.ipa6.supported_funcs |= IPA_SETADAPTERPARMS; - return 0; + return -EOPNOTSUPP; default: - if (cmd->hdr.return_code) { - QETH_DBF_MESSAGE(1, "IPA_CMD_QIPASSIST on device %x: Unhandled rc=%#x\n", - CARD_DEVID(card), - cmd->hdr.return_code); - return 0; - } + QETH_DBF_MESSAGE(1, "IPA_CMD_QIPASSIST on device %x: Unhandled rc=%#x\n", + CARD_DEVID(card), cmd->hdr.return_code); + return -EIO; } if (cmd->hdr.prot_version == QETH_PROT_IPV4) { @@ -2997,7 +3037,7 @@ static int qeth_query_switch_attributes_cb(struct qeth_card *card, QETH_CARD_TEXT(card, 2, "qswiatcb"); if (qeth_setadpparms_inspect_rc(cmd)) - return 0; + return -EIO; sw_info = (struct qeth_switch_info *)reply->param; attrs = &cmd->data.setadapterparms.data.query_switch_attributes; @@ -3029,15 +3069,15 @@ int qeth_query_switch_attributes(struct qeth_card *card, static int qeth_query_setdiagass_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; - __u16 rc; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; + u16 rc = cmd->hdr.return_code; - cmd = (struct qeth_ipa_cmd *)data; - rc = cmd->hdr.return_code; - if (rc) + if (rc) { QETH_CARD_TEXT_(card, 2, "diagq:%x", rc); - else - card->info.diagass_support = cmd->data.diagass.ext; + return -EIO; + } + + card->info.diagass_support = cmd->data.diagass.ext; return 0; } @@ -3084,13 +3124,13 @@ static void qeth_get_trap_id(struct qeth_card *card, struct qeth_trap_id *tid) static int qeth_hw_trap_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; - __u16 rc; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; + u16 rc = cmd->hdr.return_code; - cmd = (struct qeth_ipa_cmd *)data; - rc = cmd->hdr.return_code; - if (rc) + if (rc) { QETH_CARD_TEXT_(card, 2, "trapc:%x", rc); + return -EIO; + } return 0; } @@ -3139,7 +3179,7 @@ static int qeth_check_qdio_errors(struct qeth_card *card, buf->element[14].sflags); QETH_CARD_TEXT_(card, 2, " qerr=%X", qdio_error); if ((buf->element[15].sflags) == 0x12) { - card->stats.rx_dropped++; + QETH_CARD_STAT_INC(card, rx_dropped); return 0; } else return 1; @@ -3203,17 +3243,8 @@ static void qeth_queue_input_buffer(struct qeth_card *card, int index) * 'index') un-requeued -> this buffer is the first buffer that * will be requeued the next time */ - if (card->options.performance_stats) { - card->perf_stats.inbound_do_qdio_cnt++; - card->perf_stats.inbound_do_qdio_start_time = - qeth_get_micros(); - } rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0, queue->next_buf_to_init, count); - if (card->options.performance_stats) - card->perf_stats.inbound_do_qdio_time += - qeth_get_micros() - - card->perf_stats.inbound_do_qdio_start_time; if (rc) { QETH_CARD_TEXT(card, 2, "qinberr"); } @@ -3290,8 +3321,7 @@ static void qeth_switch_to_packing_if_needed(struct qeth_qdio_out_q *queue) >= QETH_HIGH_WATERMARK_PACK){ /* switch non-PACKING -> PACKING */ QETH_CARD_TEXT(queue->card, 6, "np->pack"); - if (queue->card->options.performance_stats) - queue->card->perf_stats.sc_dp_p++; + QETH_TXQ_STAT_INC(queue, packing_mode_switch); queue->do_pack = 1; } } @@ -3310,8 +3340,7 @@ static int qeth_switch_to_nonpacking_if_needed(struct qeth_qdio_out_q *queue) <= QETH_LOW_WATERMARK_PACK) { /* switch PACKING -> non-PACKING */ QETH_CARD_TEXT(queue->card, 6, "pack->np"); - if (queue->card->options.performance_stats) - queue->card->perf_stats.sc_p_dp++; + QETH_TXQ_STAT_INC(queue, packing_mode_switch); queue->do_pack = 0; return qeth_prep_flush_pack_buffer(queue); } @@ -3365,25 +3394,16 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index, } } + QETH_TXQ_STAT_ADD(queue, bufs, count); netif_trans_update(queue->card->dev); - if (queue->card->options.performance_stats) { - queue->card->perf_stats.outbound_do_qdio_cnt++; - queue->card->perf_stats.outbound_do_qdio_start_time = - qeth_get_micros(); - } qdio_flags = QDIO_FLAG_SYNC_OUTPUT; if (atomic_read(&queue->set_pci_flags_count)) qdio_flags |= QDIO_FLAG_PCI_OUT; atomic_add(count, &queue->used_buffers); - rc = do_QDIO(CARD_DDEV(queue->card), qdio_flags, queue->queue_no, index, count); - if (queue->card->options.performance_stats) - queue->card->perf_stats.outbound_do_qdio_time += - qeth_get_micros() - - queue->card->perf_stats.outbound_do_qdio_start_time; if (rc) { - queue->card->stats.tx_errors += count; + QETH_TXQ_STAT_ADD(queue, tx_errors, count); /* ignore temporary SIGA errors without busy condition */ if (rc == -ENOBUFS) return; @@ -3398,8 +3418,6 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index, qeth_schedule_recovery(queue->card); return; } - if (queue->card->options.performance_stats) - queue->card->perf_stats.bufs_sent += count; } static void qeth_check_outbound_queue(struct qeth_qdio_out_q *queue) @@ -3430,10 +3448,8 @@ static void qeth_check_outbound_queue(struct qeth_qdio_out_q *queue) if (!flush_cnt && !atomic_read(&queue->set_pci_flags_count)) flush_cnt += qeth_prep_flush_pack_buffer(queue); - if (queue->card->options.performance_stats && - q_was_packing) - queue->card->perf_stats.bufs_sent_pack += - flush_cnt; + if (q_was_packing) + QETH_TXQ_STAT_ADD(queue, bufs_pack, flush_cnt); if (flush_cnt) qeth_flush_buffers(queue, index, flush_cnt); atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED); @@ -3488,7 +3504,7 @@ static void qeth_qdio_cq_handler(struct qeth_card *card, unsigned int qdio_err, int rc; if (!qeth_is_cq(card, queue)) - goto out; + return; QETH_CARD_TEXT_(card, 5, "qcqhe%d", first_element); QETH_CARD_TEXT_(card, 5, "qcqhc%d", count); @@ -3497,12 +3513,7 @@ static void qeth_qdio_cq_handler(struct qeth_card *card, unsigned int qdio_err, if (qdio_err) { netif_stop_queue(card->dev); qeth_schedule_recovery(card); - goto out; - } - - if (card->options.performance_stats) { - card->perf_stats.cq_cnt++; - card->perf_stats.cq_start_time = qeth_get_micros(); + return; } for (i = first_element; i < first_element + count; ++i) { @@ -3530,14 +3541,6 @@ static void qeth_qdio_cq_handler(struct qeth_card *card, unsigned int qdio_err, } card->qdio.c_q->next_buf_to_init = (card->qdio.c_q->next_buf_to_init + count) % QDIO_MAX_BUFFERS_PER_Q; - - if (card->options.performance_stats) { - int delta_t = qeth_get_micros(); - delta_t -= card->perf_stats.cq_start_time; - card->perf_stats.cq_time += delta_t; - } -out: - return; } static void qeth_qdio_input_handler(struct ccw_device *ccwdev, @@ -3573,11 +3576,7 @@ static void qeth_qdio_output_handler(struct ccw_device *ccwdev, qeth_schedule_recovery(card); return; } - if (card->options.performance_stats) { - card->perf_stats.outbound_handler_cnt++; - card->perf_stats.outbound_handler_start_time = - qeth_get_micros(); - } + for (i = first_element; i < (first_element + count); ++i) { int bidx = i % QDIO_MAX_BUFFERS_PER_Q; buffer = queue->bufs[bidx]; @@ -3623,9 +3622,6 @@ static void qeth_qdio_output_handler(struct ccw_device *ccwdev, qeth_check_outbound_queue(queue); netif_wake_queue(queue->card->dev); - if (card->options.performance_stats) - card->perf_stats.outbound_handler_time += qeth_get_micros() - - card->perf_stats.outbound_handler_start_time; } /* We cannot use outbound queue 3 for unicast packets on HiperSockets */ @@ -3746,11 +3742,12 @@ EXPORT_SYMBOL_GPL(qeth_count_elements); * The number of needed buffer elements is returned in @elements. * Error to create the hdr is indicated by returning with < 0. */ -static int qeth_add_hw_header(struct qeth_card *card, struct sk_buff *skb, - struct qeth_hdr **hdr, unsigned int hdr_len, - unsigned int proto_len, unsigned int *elements) +static int qeth_add_hw_header(struct qeth_qdio_out_q *queue, + struct sk_buff *skb, struct qeth_hdr **hdr, + unsigned int hdr_len, unsigned int proto_len, + unsigned int *elements) { - const unsigned int max_elements = QETH_MAX_BUFFER_ELEMENTS(card); + const unsigned int max_elements = QETH_MAX_BUFFER_ELEMENTS(queue->card); const unsigned int contiguous = proto_len ? proto_len : 1; unsigned int __elements; addr_t start, end; @@ -3789,15 +3786,12 @@ check_layout: } rc = skb_linearize(skb); - if (card->options.performance_stats) { - if (rc) - card->perf_stats.tx_linfail++; - else - card->perf_stats.tx_lin++; - } - if (rc) + if (rc) { + QETH_TXQ_STAT_INC(queue, skbs_linearized_fail); return rc; + } + QETH_TXQ_STAT_INC(queue, skbs_linearized); /* Linearization changed the layout, re-evaluate: */ goto check_layout; } @@ -3921,9 +3915,8 @@ static int qeth_fill_buffer(struct qeth_qdio_out_q *queue, QETH_CARD_TEXT(queue->card, 6, "fillbfnp"); } else { QETH_CARD_TEXT(queue->card, 6, "fillbfpa"); - if (queue->card->options.performance_stats) - queue->card->perf_stats.skbs_sent_pack++; + QETH_TXQ_STAT_INC(queue, skbs_pack); /* If the buffer still has free elements, keep using it. */ if (buf->next_element_to_fill < QETH_MAX_BUFFER_ELEMENTS(queue->card)) @@ -4037,8 +4030,8 @@ int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue, } out: /* at this point the queue is UNLOCKED again */ - if (queue->card->options.performance_stats && do_pack) - queue->card->perf_stats.bufs_sent_pack += flush_count; + if (do_pack) + QETH_TXQ_STAT_ADD(queue, bufs_pack, flush_count); return rc; } @@ -4062,8 +4055,9 @@ static void qeth_fill_tso_ext(struct qeth_hdr_tso *hdr, int qeth_xmit(struct qeth_card *card, struct sk_buff *skb, struct qeth_qdio_out_q *queue, int ipv, int cast_type, - void (*fill_header)(struct qeth_card *card, struct qeth_hdr *hdr, - struct sk_buff *skb, int ipv, int cast_type, + void (*fill_header)(struct qeth_qdio_out_q *queue, + struct qeth_hdr *hdr, struct sk_buff *skb, + int ipv, int cast_type, unsigned int data_len)) { unsigned int proto_len, hw_hdr_len; @@ -4088,7 +4082,7 @@ int qeth_xmit(struct qeth_card *card, struct sk_buff *skb, if (rc) return rc; - push_len = qeth_add_hw_header(card, skb, &hdr, hw_hdr_len, proto_len, + push_len = qeth_add_hw_header(queue, skb, &hdr, hw_hdr_len, proto_len, &elements); if (push_len < 0) return push_len; @@ -4098,7 +4092,7 @@ int qeth_xmit(struct qeth_card *card, struct sk_buff *skb, data_offset = push_len + proto_len; } memset(hdr, 0, hw_hdr_len); - fill_header(card, hdr, skb, ipv, cast_type, frame_len); + fill_header(queue, hdr, skb, ipv, cast_type, frame_len); if (is_tso) qeth_fill_tso_ext((struct qeth_hdr_tso *) hdr, frame_len - proto_len, skb, proto_len); @@ -4115,14 +4109,12 @@ int qeth_xmit(struct qeth_card *card, struct sk_buff *skb, } if (!rc) { - if (card->options.performance_stats) { - card->perf_stats.buf_elements_sent += elements; - if (is_sg) - card->perf_stats.sg_skbs_sent++; - if (is_tso) { - card->perf_stats.large_send_bytes += frame_len; - card->perf_stats.large_send_cnt++; - } + QETH_TXQ_STAT_ADD(queue, buf_elements, elements); + if (is_sg) + QETH_TXQ_STAT_INC(queue, skbs_sg); + if (is_tso) { + QETH_TXQ_STAT_INC(queue, skbs_tso); + QETH_TXQ_STAT_ADD(queue, tso_bytes, frame_len); } } else { if (!push_len) @@ -4149,7 +4141,7 @@ static int qeth_setadp_promisc_mode_cb(struct qeth_card *card, setparms->data.mode = SET_PROMISC_MODE_OFF; } card->info.promisc_mode = setparms->data.mode; - return 0; + return (cmd->hdr.return_code) ? -EIO : 0; } void qeth_setadp_promisc_mode(struct qeth_card *card) @@ -4181,18 +4173,6 @@ void qeth_setadp_promisc_mode(struct qeth_card *card) } EXPORT_SYMBOL_GPL(qeth_setadp_promisc_mode); -struct net_device_stats *qeth_get_stats(struct net_device *dev) -{ - struct qeth_card *card; - - card = dev->ml_priv; - - QETH_CARD_TEXT(card, 5, "getstat"); - - return &card->stats; -} -EXPORT_SYMBOL_GPL(qeth_get_stats); - static int qeth_setadpparms_change_macaddr_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { @@ -4201,12 +4181,15 @@ static int qeth_setadpparms_change_macaddr_cb(struct qeth_card *card, QETH_CARD_TEXT(card, 4, "chgmaccb"); if (qeth_setadpparms_inspect_rc(cmd)) - return 0; + return -EIO; adp_cmd = &cmd->data.setadapterparms; + if (!is_valid_ether_addr(adp_cmd->data.change_addr.addr)) + return -EADDRNOTAVAIL; + if (IS_LAYER2(card) && IS_OSD(card) && !IS_VM_NIC(card) && !(adp_cmd->hdr.flags & QETH_SETADP_FLAGS_VIRTUAL_MAC)) - return 0; + return -EADDRNOTAVAIL; ether_addr_copy(card->dev->dev_addr, adp_cmd->data.change_addr.addr); return 0; @@ -4245,7 +4228,7 @@ static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card, QETH_CARD_TEXT(card, 4, "setaccb"); if (cmd->hdr.return_code) - return 0; + return -EIO; qeth_setadpparms_inspect_rc(cmd); access_ctrl_req = &cmd->data.setadapterparms.data.set_access_ctrl; @@ -4319,7 +4302,7 @@ static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card, card->options.isolation = card->options.prev_isolation; break; } - return 0; + return (cmd->hdr.return_code) ? -EIO : 0; } static int qeth_setadpparms_set_access_ctrl(struct qeth_card *card, @@ -4383,7 +4366,7 @@ void qeth_tx_timeout(struct net_device *dev) card = dev->ml_priv; QETH_CARD_TEXT(card, 4, "txtimeo"); - card->stats.tx_errors++; + QETH_CARD_STAT_INC(card, tx_errors); qeth_schedule_recovery(card); } EXPORT_SYMBOL_GPL(qeth_tx_timeout); @@ -4453,27 +4436,6 @@ static int qeth_mdio_read(struct net_device *dev, int phy_id, int regnum) return rc; } -static int qeth_send_ipa_snmp_cmd(struct qeth_card *card, - struct qeth_cmd_buffer *iob, int len, - int (*reply_cb)(struct qeth_card *, struct qeth_reply *, - unsigned long), - void *reply_param) -{ - u16 s1, s2; - - QETH_CARD_TEXT(card, 4, "sendsnmp"); - - /* adjust PDU length fields in IPA_PDU_HEADER */ - s1 = (u32) IPA_PDU_HEADER_SIZE + len; - s2 = (u32) len; - memcpy(QETH_IPA_PDU_LEN_TOTAL(iob->data), &s1, 2); - memcpy(QETH_IPA_PDU_LEN_PDU1(iob->data), &s2, 2); - memcpy(QETH_IPA_PDU_LEN_PDU2(iob->data), &s2, 2); - memcpy(QETH_IPA_PDU_LEN_PDU3(iob->data), &s2, 2); - return qeth_send_control_data(card, IPA_PDU_HEADER_SIZE + len, iob, - reply_cb, reply_param); -} - static int qeth_snmp_command_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long sdata) { @@ -4491,13 +4453,13 @@ static int qeth_snmp_command_cb(struct qeth_card *card, if (cmd->hdr.return_code) { QETH_CARD_TEXT_(card, 4, "scer1%x", cmd->hdr.return_code); - return 0; + return -EIO; } if (cmd->data.setadapterparms.hdr.return_code) { cmd->hdr.return_code = cmd->data.setadapterparms.hdr.return_code; QETH_CARD_TEXT_(card, 4, "scer2%x", cmd->hdr.return_code); - return 0; + return -EIO; } data_len = *((__u16 *)QETH_IPA_PDU_LEN_PDU1(data)); if (cmd->data.setadapterparms.hdr.seq_no == 1) { @@ -4512,9 +4474,8 @@ static int qeth_snmp_command_cb(struct qeth_card *card, /* check if there is enough room in userspace */ if ((qinfo->udata_len - qinfo->udata_offset) < data_len) { - QETH_CARD_TEXT_(card, 4, "scer3%i", -ENOMEM); - cmd->hdr.return_code = IPA_RC_ENOMEM; - return 0; + QETH_CARD_TEXT_(card, 4, "scer3%i", -ENOSPC); + return -ENOSPC; } QETH_CARD_TEXT_(card, 4, "snore%i", cmd->data.setadapterparms.hdr.used_total); @@ -4579,10 +4540,13 @@ static int qeth_snmp_command(struct qeth_card *card, char __user *udata) rc = -ENOMEM; goto out; } + + /* for large requests, fix-up the length fields: */ + qeth_prepare_ipa_cmd(card, iob, QETH_SETADP_BASE_LEN + req_len); + cmd = __ipa_cmd(iob); memcpy(&cmd->data.setadapterparms.data.snmp, &ureq->cmd, req_len); - rc = qeth_send_ipa_snmp_cmd(card, iob, QETH_SETADP_BASE_LEN + req_len, - qeth_snmp_command_cb, (void *)&qinfo); + rc = qeth_send_ipa_cmd(card, iob, qeth_snmp_command_cb, &qinfo); if (rc) QETH_DBF_MESSAGE(2, "SNMP command failed on device %x: (%#x)\n", CARD_DEVID(card), rc); @@ -4606,16 +4570,14 @@ static int qeth_setadpparms_query_oat_cb(struct qeth_card *card, QETH_CARD_TEXT(card, 3, "qoatcb"); if (qeth_setadpparms_inspect_rc(cmd)) - return 0; + return -EIO; priv = (struct qeth_qoat_priv *)reply->param; resdatalen = cmd->data.setadapterparms.hdr.cmdlength; resdata = (char *)data + 28; - if (resdatalen > (priv->buffer_len - priv->response_len)) { - cmd->hdr.return_code = IPA_RC_FFFF; - return 0; - } + if (resdatalen > (priv->buffer_len - priv->response_len)) + return -ENOSPC; memcpy((priv->buffer + priv->response_len), resdata, resdatalen); @@ -4688,9 +4650,7 @@ static int qeth_query_oat_command(struct qeth_card *card, char __user *udata) if (copy_to_user(udata, &oat_data, sizeof(struct qeth_query_oat_data))) rc = -EFAULT; - } else - if (rc == IPA_RC_FFFF) - rc = -EFAULT; + } out_free: vfree(priv.buffer); @@ -4707,7 +4667,7 @@ static int qeth_query_card_info_cb(struct qeth_card *card, QETH_CARD_TEXT(card, 2, "qcrdincb"); if (qeth_setadpparms_inspect_rc(cmd)) - return 0; + return -EIO; card_info = &cmd->data.setadapterparms.data.card_info; carrier_info->card_type = card_info->card_type; @@ -4716,8 +4676,8 @@ static int qeth_query_card_info_cb(struct qeth_card *card, return 0; } -static int qeth_query_card_info(struct qeth_card *card, - struct carrier_info *carrier_info) +int qeth_query_card_info(struct qeth_card *card, + struct carrier_info *carrier_info) { struct qeth_cmd_buffer *iob; @@ -4994,6 +4954,7 @@ static void qeth_core_free_card(struct qeth_card *card) qeth_clean_channel(&card->read); qeth_clean_channel(&card->write); qeth_clean_channel(&card->data); + destroy_workqueue(card->event_wq); qeth_free_qdio_buffers(card); unregister_service_level(&card->qeth_service_level); dev_set_drvdata(&card->gdev->dev, NULL); @@ -5108,12 +5069,10 @@ retriable: rc = qeth_send_startlan(card); if (rc) { QETH_DBF_TEXT_(SETUP, 2, "6err%d", rc); - if (rc == IPA_RC_LAN_OFFLINE) { - dev_warn(&card->gdev->dev, - "The LAN is offline\n"); + if (rc == -ENETDOWN) { + dev_warn(&card->gdev->dev, "The LAN is offline\n"); *carrier_ok = false; } else { - rc = -ENODEV; goto out; } } else { @@ -5265,7 +5224,7 @@ struct sk_buff *qeth_core_get_next_skb(struct qeth_card *card, QETH_CARD_TEXT(card, 4, "unexeob"); QETH_CARD_HEX(card, 2, buffer, sizeof(void *)); dev_kfree_skb_any(skb); - card->stats.rx_errors++; + QETH_CARD_STAT_INC(card, rx_errors); return NULL; } element++; @@ -5277,16 +5236,17 @@ struct sk_buff *qeth_core_get_next_skb(struct qeth_card *card, } *__element = element; *__offset = offset; - if (use_rx_sg && card->options.performance_stats) { - card->perf_stats.sg_skbs_rx++; - card->perf_stats.sg_frags_rx += skb_shinfo(skb)->nr_frags; + if (use_rx_sg) { + QETH_CARD_STAT_INC(card, rx_sg_skbs); + QETH_CARD_STAT_ADD(card, rx_sg_frags, + skb_shinfo(skb)->nr_frags); } return skb; no_mem: if (net_ratelimit()) { QETH_CARD_TEXT(card, 2, "noskbmem"); } - card->stats.rx_dropped++; + QETH_CARD_STAT_INC(card, rx_dropped); return NULL; } EXPORT_SYMBOL_GPL(qeth_core_get_next_skb); @@ -5299,11 +5259,6 @@ int qeth_poll(struct napi_struct *napi, int budget) int done; int new_budget = budget; - if (card->options.performance_stats) { - card->perf_stats.inbound_cnt++; - card->perf_stats.inbound_start_time = qeth_get_micros(); - } - while (1) { if (!card->rx.b_count) { card->rx.qdio_err = 0; @@ -5332,8 +5287,7 @@ int qeth_poll(struct napi_struct *napi, int budget) done = 1; if (done) { - if (card->options.performance_stats) - card->perf_stats.bufs_rec++; + QETH_CARD_STAT_INC(card, rx_bufs); qeth_put_buffer_pool_entry(card, buffer->pool_entry); qeth_queue_input_buffer(card, card->rx.b_index); @@ -5361,9 +5315,6 @@ int qeth_poll(struct napi_struct *napi, int budget) if (qdio_start_irq(card->data.ccwdev, 0)) napi_schedule(&card->napi); out: - if (card->options.performance_stats) - card->perf_stats.inbound_time += qeth_get_micros() - - card->perf_stats.inbound_start_time; return work_done; } EXPORT_SYMBOL_GPL(qeth_poll); @@ -5383,7 +5334,7 @@ static int qeth_setassparms_get_caps_cb(struct qeth_card *card, struct qeth_ipa_caps *caps = reply->param; if (qeth_setassparms_inspect_rc(cmd)) - return 0; + return -EIO; caps->supported = cmd->data.setassparms.data.caps.supported; caps->enabled = cmd->data.setassparms.data.caps.enabled; @@ -5393,18 +5344,18 @@ static int qeth_setassparms_get_caps_cb(struct qeth_card *card, int qeth_setassparms_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; QETH_CARD_TEXT(card, 4, "defadpcb"); - cmd = (struct qeth_ipa_cmd *) data; - if (cmd->hdr.return_code == 0) { - cmd->hdr.return_code = cmd->data.setassparms.hdr.return_code; - if (cmd->hdr.prot_version == QETH_PROT_IPV4) - card->options.ipa4.enabled_funcs = cmd->hdr.ipa_enabled; - if (cmd->hdr.prot_version == QETH_PROT_IPV6) - card->options.ipa6.enabled_funcs = cmd->hdr.ipa_enabled; - } + if (cmd->hdr.return_code) + return -EIO; + + cmd->hdr.return_code = cmd->data.setassparms.hdr.return_code; + if (cmd->hdr.prot_version == QETH_PROT_IPV4) + card->options.ipa4.enabled_funcs = cmd->hdr.ipa_enabled; + if (cmd->hdr.prot_version == QETH_PROT_IPV6) + card->options.ipa6.enabled_funcs = cmd->hdr.ipa_enabled; return 0; } EXPORT_SYMBOL_GPL(qeth_setassparms_cb); @@ -5650,7 +5601,10 @@ static struct net_device *qeth_alloc_netdev(struct qeth_card *card) SET_NETDEV_DEV(dev, &card->gdev->dev); netif_carrier_off(dev); - if (!IS_OSN(card)) { + if (IS_OSN(card)) { + dev->ethtool_ops = &qeth_osn_ethtool_ops; + } else { + dev->ethtool_ops = &qeth_ethtool_ops; dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->hw_features |= NETIF_F_SG; dev->vlan_features |= NETIF_F_SG; @@ -5896,9 +5850,6 @@ int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) if (!card) return -ENODEV; - if (card->info.type == QETH_CARD_TYPE_OSN) - return -EPERM; - switch (cmd) { case SIOC_QETH_ADP_SET_SNMP_CONTROL: rc = qeth_snmp_command(card, rq->ifr_ifru.ifru_data); @@ -5938,452 +5889,91 @@ int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) } EXPORT_SYMBOL_GPL(qeth_do_ioctl); -static struct { - const char str[ETH_GSTRING_LEN]; -} qeth_ethtool_stats_keys[] = { -/* 0 */{"rx skbs"}, - {"rx buffers"}, - {"tx skbs"}, - {"tx buffers"}, - {"tx skbs no packing"}, - {"tx buffers no packing"}, - {"tx skbs packing"}, - {"tx buffers packing"}, - {"tx sg skbs"}, - {"tx buffer elements"}, -/* 10 */{"rx sg skbs"}, - {"rx sg frags"}, - {"rx sg page allocs"}, - {"tx large kbytes"}, - {"tx large count"}, - {"tx pk state ch n->p"}, - {"tx pk state ch p->n"}, - {"tx pk watermark low"}, - {"tx pk watermark high"}, - {"queue 0 buffer usage"}, -/* 20 */{"queue 1 buffer usage"}, - {"queue 2 buffer usage"}, - {"queue 3 buffer usage"}, - {"rx poll time"}, - {"rx poll count"}, - {"rx do_QDIO time"}, - {"rx do_QDIO count"}, - {"tx handler time"}, - {"tx handler count"}, - {"tx time"}, -/* 30 */{"tx count"}, - {"tx do_QDIO time"}, - {"tx do_QDIO count"}, - {"tx csum"}, - {"tx lin"}, - {"tx linfail"}, - {"cq handler count"}, - {"cq handler time"}, - {"rx csum"} -}; - -int qeth_core_get_sset_count(struct net_device *dev, int stringset) -{ - switch (stringset) { - case ETH_SS_STATS: - return (sizeof(qeth_ethtool_stats_keys) / ETH_GSTRING_LEN); - default: - return -EINVAL; - } -} -EXPORT_SYMBOL_GPL(qeth_core_get_sset_count); - -void qeth_core_get_ethtool_stats(struct net_device *dev, - struct ethtool_stats *stats, u64 *data) -{ - struct qeth_card *card = dev->ml_priv; - data[0] = card->stats.rx_packets - - card->perf_stats.initial_rx_packets; - data[1] = card->perf_stats.bufs_rec; - data[2] = card->stats.tx_packets - - card->perf_stats.initial_tx_packets; - data[3] = card->perf_stats.bufs_sent; - data[4] = card->stats.tx_packets - card->perf_stats.initial_tx_packets - - card->perf_stats.skbs_sent_pack; - data[5] = card->perf_stats.bufs_sent - card->perf_stats.bufs_sent_pack; - data[6] = card->perf_stats.skbs_sent_pack; - data[7] = card->perf_stats.bufs_sent_pack; - data[8] = card->perf_stats.sg_skbs_sent; - data[9] = card->perf_stats.buf_elements_sent; - data[10] = card->perf_stats.sg_skbs_rx; - data[11] = card->perf_stats.sg_frags_rx; - data[12] = card->perf_stats.sg_alloc_page_rx; - data[13] = (card->perf_stats.large_send_bytes >> 10); - data[14] = card->perf_stats.large_send_cnt; - data[15] = card->perf_stats.sc_dp_p; - data[16] = card->perf_stats.sc_p_dp; - data[17] = QETH_LOW_WATERMARK_PACK; - data[18] = QETH_HIGH_WATERMARK_PACK; - data[19] = atomic_read(&card->qdio.out_qs[0]->used_buffers); - data[20] = (card->qdio.no_out_queues > 1) ? - atomic_read(&card->qdio.out_qs[1]->used_buffers) : 0; - data[21] = (card->qdio.no_out_queues > 2) ? - atomic_read(&card->qdio.out_qs[2]->used_buffers) : 0; - data[22] = (card->qdio.no_out_queues > 3) ? - atomic_read(&card->qdio.out_qs[3]->used_buffers) : 0; - data[23] = card->perf_stats.inbound_time; - data[24] = card->perf_stats.inbound_cnt; - data[25] = card->perf_stats.inbound_do_qdio_time; - data[26] = card->perf_stats.inbound_do_qdio_cnt; - data[27] = card->perf_stats.outbound_handler_time; - data[28] = card->perf_stats.outbound_handler_cnt; - data[29] = card->perf_stats.outbound_time; - data[30] = card->perf_stats.outbound_cnt; - data[31] = card->perf_stats.outbound_do_qdio_time; - data[32] = card->perf_stats.outbound_do_qdio_cnt; - data[33] = card->perf_stats.tx_csum; - data[34] = card->perf_stats.tx_lin; - data[35] = card->perf_stats.tx_linfail; - data[36] = card->perf_stats.cq_cnt; - data[37] = card->perf_stats.cq_time; - data[38] = card->perf_stats.rx_csum; -} -EXPORT_SYMBOL_GPL(qeth_core_get_ethtool_stats); - -void qeth_core_get_strings(struct net_device *dev, u32 stringset, u8 *data) -{ - switch (stringset) { - case ETH_SS_STATS: - memcpy(data, &qeth_ethtool_stats_keys, - sizeof(qeth_ethtool_stats_keys)); - break; - default: - WARN_ON(1); - break; - } -} -EXPORT_SYMBOL_GPL(qeth_core_get_strings); - -void qeth_core_get_drvinfo(struct net_device *dev, - struct ethtool_drvinfo *info) -{ - struct qeth_card *card = dev->ml_priv; - - strlcpy(info->driver, IS_LAYER2(card) ? "qeth_l2" : "qeth_l3", - sizeof(info->driver)); - strlcpy(info->version, "1.0", sizeof(info->version)); - strlcpy(info->fw_version, card->info.mcl_level, - sizeof(info->fw_version)); - snprintf(info->bus_info, sizeof(info->bus_info), "%s/%s/%s", - CARD_RDEV_ID(card), CARD_WDEV_ID(card), CARD_DDEV_ID(card)); -} -EXPORT_SYMBOL_GPL(qeth_core_get_drvinfo); - -/* Helper function to fill 'advertising' and 'supported' which are the same. */ -/* Autoneg and full-duplex are supported and advertised unconditionally. */ -/* Always advertise and support all speeds up to specified, and only one */ -/* specified port type. */ -static void qeth_set_cmd_adv_sup(struct ethtool_link_ksettings *cmd, - int maxspeed, int porttype) -{ - ethtool_link_ksettings_zero_link_mode(cmd, supported); - ethtool_link_ksettings_zero_link_mode(cmd, advertising); - ethtool_link_ksettings_zero_link_mode(cmd, lp_advertising); - - ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg); - - switch (porttype) { - case PORT_TP: - ethtool_link_ksettings_add_link_mode(cmd, supported, TP); - ethtool_link_ksettings_add_link_mode(cmd, advertising, TP); - break; - case PORT_FIBRE: - ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); - ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE); - break; - default: - ethtool_link_ksettings_add_link_mode(cmd, supported, TP); - ethtool_link_ksettings_add_link_mode(cmd, advertising, TP); - WARN_ON_ONCE(1); - } - - /* partially does fall through, to also select lower speeds */ - switch (maxspeed) { - case SPEED_25000: - ethtool_link_ksettings_add_link_mode(cmd, supported, - 25000baseSR_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 25000baseSR_Full); - break; - case SPEED_10000: - ethtool_link_ksettings_add_link_mode(cmd, supported, - 10000baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 10000baseT_Full); - case SPEED_1000: - ethtool_link_ksettings_add_link_mode(cmd, supported, - 1000baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 1000baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, supported, - 1000baseT_Half); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 1000baseT_Half); - case SPEED_100: - ethtool_link_ksettings_add_link_mode(cmd, supported, - 100baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 100baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, supported, - 100baseT_Half); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 100baseT_Half); - case SPEED_10: - ethtool_link_ksettings_add_link_mode(cmd, supported, - 10baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 10baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, supported, - 10baseT_Half); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 10baseT_Half); - /* end fallthrough */ - break; - default: - ethtool_link_ksettings_add_link_mode(cmd, supported, - 10baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 10baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, supported, - 10baseT_Half); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 10baseT_Half); - WARN_ON_ONCE(1); - } -} - -int qeth_core_ethtool_get_link_ksettings(struct net_device *netdev, - struct ethtool_link_ksettings *cmd) -{ - struct qeth_card *card = netdev->ml_priv; - enum qeth_link_types link_type; - struct carrier_info carrier_info; - int rc; - - if ((card->info.type == QETH_CARD_TYPE_IQD) || (card->info.guestlan)) - link_type = QETH_LINK_TYPE_10GBIT_ETH; - else - link_type = card->info.link_type; - - cmd->base.duplex = DUPLEX_FULL; - cmd->base.autoneg = AUTONEG_ENABLE; - cmd->base.phy_address = 0; - cmd->base.mdio_support = 0; - cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID; - cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; - - switch (link_type) { - case QETH_LINK_TYPE_FAST_ETH: - case QETH_LINK_TYPE_LANE_ETH100: - cmd->base.speed = SPEED_100; - cmd->base.port = PORT_TP; - break; - case QETH_LINK_TYPE_GBIT_ETH: - case QETH_LINK_TYPE_LANE_ETH1000: - cmd->base.speed = SPEED_1000; - cmd->base.port = PORT_FIBRE; - break; - case QETH_LINK_TYPE_10GBIT_ETH: - cmd->base.speed = SPEED_10000; - cmd->base.port = PORT_FIBRE; - break; - case QETH_LINK_TYPE_25GBIT_ETH: - cmd->base.speed = SPEED_25000; - cmd->base.port = PORT_FIBRE; - break; - default: - cmd->base.speed = SPEED_10; - cmd->base.port = PORT_TP; - } - qeth_set_cmd_adv_sup(cmd, cmd->base.speed, cmd->base.port); - - /* Check if we can obtain more accurate information. */ - /* If QUERY_CARD_INFO command is not supported or fails, */ - /* just return the heuristics that was filled above. */ - rc = qeth_query_card_info(card, &carrier_info); - if (rc == -EOPNOTSUPP) /* for old hardware, return heuristic */ - return 0; - if (rc) /* report error from the hardware operation */ - return rc; - /* on success, fill in the information got from the hardware */ - - netdev_dbg(netdev, - "card info: card_type=0x%02x, port_mode=0x%04x, port_speed=0x%08x\n", - carrier_info.card_type, - carrier_info.port_mode, - carrier_info.port_speed); - - /* Update attributes for which we've obtained more authoritative */ - /* information, leave the rest the way they where filled above. */ - switch (carrier_info.card_type) { - case CARD_INFO_TYPE_1G_COPPER_A: - case CARD_INFO_TYPE_1G_COPPER_B: - cmd->base.port = PORT_TP; - qeth_set_cmd_adv_sup(cmd, SPEED_1000, cmd->base.port); - break; - case CARD_INFO_TYPE_1G_FIBRE_A: - case CARD_INFO_TYPE_1G_FIBRE_B: - cmd->base.port = PORT_FIBRE; - qeth_set_cmd_adv_sup(cmd, SPEED_1000, cmd->base.port); - break; - case CARD_INFO_TYPE_10G_FIBRE_A: - case CARD_INFO_TYPE_10G_FIBRE_B: - cmd->base.port = PORT_FIBRE; - qeth_set_cmd_adv_sup(cmd, SPEED_10000, cmd->base.port); - break; - } - - switch (carrier_info.port_mode) { - case CARD_INFO_PORTM_FULLDUPLEX: - cmd->base.duplex = DUPLEX_FULL; - break; - case CARD_INFO_PORTM_HALFDUPLEX: - cmd->base.duplex = DUPLEX_HALF; - break; - } - - switch (carrier_info.port_speed) { - case CARD_INFO_PORTS_10M: - cmd->base.speed = SPEED_10; - break; - case CARD_INFO_PORTS_100M: - cmd->base.speed = SPEED_100; - break; - case CARD_INFO_PORTS_1G: - cmd->base.speed = SPEED_1000; - break; - case CARD_INFO_PORTS_10G: - cmd->base.speed = SPEED_10000; - break; - case CARD_INFO_PORTS_25G: - cmd->base.speed = SPEED_25000; - break; - } - - return 0; -} -EXPORT_SYMBOL_GPL(qeth_core_ethtool_get_link_ksettings); - -/* Callback to handle checksum offload command reply from OSA card. - * Verify that required features have been enabled on the card. - * Return error in hdr->return_code as this value is checked by caller. - * - * Always returns zero to indicate no further messages from the OSA card. - */ -static int qeth_ipa_checksum_run_cmd_cb(struct qeth_card *card, - struct qeth_reply *reply, - unsigned long data) +static int qeth_start_csum_cb(struct qeth_card *card, struct qeth_reply *reply, + unsigned long data) { struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; - struct qeth_checksum_cmd *chksum_cb = - (struct qeth_checksum_cmd *)reply->param; + u32 *features = reply->param; - QETH_CARD_TEXT(card, 4, "chkdoccb"); if (qeth_setassparms_inspect_rc(cmd)) - return 0; + return -EIO; - memset(chksum_cb, 0, sizeof(*chksum_cb)); - if (cmd->data.setassparms.hdr.command_code == IPA_CMD_ASS_START) { - chksum_cb->supported = - cmd->data.setassparms.data.chksum.supported; - QETH_CARD_TEXT_(card, 3, "strt:%x", chksum_cb->supported); - } - if (cmd->data.setassparms.hdr.command_code == IPA_CMD_ASS_ENABLE) { - chksum_cb->supported = - cmd->data.setassparms.data.chksum.supported; - chksum_cb->enabled = - cmd->data.setassparms.data.chksum.enabled; - QETH_CARD_TEXT_(card, 3, "supp:%x", chksum_cb->supported); - QETH_CARD_TEXT_(card, 3, "enab:%x", chksum_cb->enabled); - } + *features = cmd->data.setassparms.data.flags_32bit; return 0; } -/* Send command to OSA card and check results. */ -static int qeth_ipa_checksum_run_cmd(struct qeth_card *card, - enum qeth_ipa_funcs ipa_func, - __u16 cmd_code, long data, - struct qeth_checksum_cmd *chksum_cb, - enum qeth_prot_versions prot) +static int qeth_set_csum_off(struct qeth_card *card, enum qeth_ipa_funcs cstype, + enum qeth_prot_versions prot) { - struct qeth_cmd_buffer *iob; - - QETH_CARD_TEXT(card, 4, "chkdocmd"); - iob = qeth_get_setassparms_cmd(card, ipa_func, cmd_code, - sizeof(__u32), prot); - if (!iob) - return -ENOMEM; - - __ipa_cmd(iob)->data.setassparms.data.flags_32bit = (__u32) data; - return qeth_send_ipa_cmd(card, iob, qeth_ipa_checksum_run_cmd_cb, - chksum_cb); + return qeth_send_simple_setassparms_prot(card, cstype, + IPA_CMD_ASS_STOP, 0, prot); } -static int qeth_send_checksum_on(struct qeth_card *card, int cstype, - enum qeth_prot_versions prot) +static int qeth_set_csum_on(struct qeth_card *card, enum qeth_ipa_funcs cstype, + enum qeth_prot_versions prot) { u32 required_features = QETH_IPA_CHECKSUM_UDP | QETH_IPA_CHECKSUM_TCP; - struct qeth_checksum_cmd chksum_cb; + struct qeth_cmd_buffer *iob; + struct qeth_ipa_caps caps; + u32 features; int rc; - if (prot == QETH_PROT_IPV4) + /* some L3 HW requires combined L3+L4 csum offload: */ + if (IS_LAYER3(card) && prot == QETH_PROT_IPV4 && + cstype == IPA_OUTBOUND_CHECKSUM) required_features |= QETH_IPA_CHECKSUM_IP_HDR; - rc = qeth_ipa_checksum_run_cmd(card, cstype, IPA_CMD_ASS_START, 0, - &chksum_cb, prot); - if (!rc) { - if ((required_features & chksum_cb.supported) != - required_features) - rc = -EIO; - else if (!(QETH_IPA_CHECKSUM_LP2LP & chksum_cb.supported) && - cstype == IPA_INBOUND_CHECKSUM) - dev_warn(&card->gdev->dev, - "Hardware checksumming is performed only if %s and its peer use different OSA Express 3 ports\n", - QETH_CARD_IFNAME(card)); - } - if (rc) { - qeth_send_simple_setassparms_prot(card, cstype, - IPA_CMD_ASS_STOP, 0, prot); - dev_warn(&card->gdev->dev, - "Starting HW IPv%d checksumming for %s failed, using SW checksumming\n", - prot, QETH_CARD_IFNAME(card)); + + iob = qeth_get_setassparms_cmd(card, cstype, IPA_CMD_ASS_START, 0, + prot); + if (!iob) + return -ENOMEM; + + rc = qeth_send_ipa_cmd(card, iob, qeth_start_csum_cb, &features); + if (rc) return rc; + + if ((required_features & features) != required_features) { + qeth_set_csum_off(card, cstype, prot); + return -EOPNOTSUPP; } - rc = qeth_ipa_checksum_run_cmd(card, cstype, IPA_CMD_ASS_ENABLE, - chksum_cb.supported, &chksum_cb, + + iob = qeth_get_setassparms_cmd(card, cstype, IPA_CMD_ASS_ENABLE, 4, prot); - if (!rc) { - if ((required_features & chksum_cb.enabled) != - required_features) - rc = -EIO; + if (!iob) { + qeth_set_csum_off(card, cstype, prot); + return -ENOMEM; } + + if (features & QETH_IPA_CHECKSUM_LP2LP) + required_features |= QETH_IPA_CHECKSUM_LP2LP; + __ipa_cmd(iob)->data.setassparms.data.flags_32bit = required_features; + rc = qeth_send_ipa_cmd(card, iob, qeth_setassparms_get_caps_cb, &caps); if (rc) { - qeth_send_simple_setassparms_prot(card, cstype, - IPA_CMD_ASS_STOP, 0, prot); - dev_warn(&card->gdev->dev, - "Enabling HW IPv%d checksumming for %s failed, using SW checksumming\n", - prot, QETH_CARD_IFNAME(card)); + qeth_set_csum_off(card, cstype, prot); return rc; } + if (!qeth_ipa_caps_supported(&caps, required_features) || + !qeth_ipa_caps_enabled(&caps, required_features)) { + qeth_set_csum_off(card, cstype, prot); + return -EOPNOTSUPP; + } + dev_info(&card->gdev->dev, "HW Checksumming (%sbound IPv%d) enabled\n", cstype == IPA_INBOUND_CHECKSUM ? "in" : "out", prot); + if (!qeth_ipa_caps_enabled(&caps, QETH_IPA_CHECKSUM_LP2LP) && + cstype == IPA_OUTBOUND_CHECKSUM) + dev_warn(&card->gdev->dev, + "Hardware checksumming is performed only if %s and its peer use different OSA Express 3 ports\n", + QETH_CARD_IFNAME(card)); return 0; } static int qeth_set_ipa_csum(struct qeth_card *card, bool on, int cstype, enum qeth_prot_versions prot) { - int rc = (on) ? qeth_send_checksum_on(card, cstype, prot) - : qeth_send_simple_setassparms_prot(card, cstype, - IPA_CMD_ASS_STOP, 0, - prot); - return rc ? -EIO : 0; + return on ? qeth_set_csum_on(card, cstype, prot) : + qeth_set_csum_off(card, cstype, prot); } static int qeth_start_tso_cb(struct qeth_card *card, struct qeth_reply *reply, @@ -6393,7 +5983,7 @@ static int qeth_start_tso_cb(struct qeth_card *card, struct qeth_reply *reply, struct qeth_tso_start_data *tso_data = reply->param; if (qeth_setassparms_inspect_rc(cmd)) - return 0; + return -EIO; tso_data->mss = cmd->data.setassparms.data.tso.mss; tso_data->supported = cmd->data.setassparms.data.tso.supported; @@ -6459,10 +6049,7 @@ static int qeth_set_tso_on(struct qeth_card *card, static int qeth_set_ipa_tso(struct qeth_card *card, bool on, enum qeth_prot_versions prot) { - int rc = on ? qeth_set_tso_on(card, prot) : - qeth_set_tso_off(card, prot); - - return rc ? -EIO : 0; + return on ? qeth_set_tso_on(card, prot) : qeth_set_tso_off(card, prot); } static int qeth_set_ipa_rx_csum(struct qeth_card *card, bool on) @@ -6611,6 +6198,33 @@ netdev_features_t qeth_features_check(struct sk_buff *skb, } EXPORT_SYMBOL_GPL(qeth_features_check); +void qeth_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) +{ + struct qeth_card *card = dev->ml_priv; + struct qeth_qdio_out_q *queue; + unsigned int i; + + QETH_CARD_TEXT(card, 5, "getstat"); + + stats->rx_packets = card->stats.rx_packets; + stats->rx_bytes = card->stats.rx_bytes; + stats->rx_errors = card->stats.rx_errors; + stats->rx_dropped = card->stats.rx_dropped; + stats->multicast = card->stats.rx_multicast; + stats->tx_errors = card->stats.tx_errors; + + for (i = 0; i < card->qdio.no_out_queues; i++) { + queue = card->qdio.out_qs[i]; + + stats->tx_packets += queue->stats.tx_packets; + stats->tx_bytes += queue->stats.tx_bytes; + stats->tx_errors += queue->stats.tx_errors; + stats->tx_dropped += queue->stats.tx_dropped; + stats->tx_carrier_errors += queue->stats.tx_carrier_errors; + } +} +EXPORT_SYMBOL_GPL(qeth_get_stats64); + int qeth_open(struct net_device *dev) { struct qeth_card *card = dev->ml_priv; diff --git a/drivers/s390/net/qeth_core_mpc.c b/drivers/s390/net/qeth_core_mpc.c index 16fc51ad0514..e3f4866c158e 100644 --- a/drivers/s390/net/qeth_core_mpc.c +++ b/drivers/s390/net/qeth_core_mpc.c @@ -125,24 +125,13 @@ unsigned char DM_ACT[] = { unsigned char IPA_PDU_HEADER[] = { 0x00, 0xe0, 0x00, 0x00, 0x77, 0x77, 0x77, 0x77, - 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, - (IPA_PDU_HEADER_SIZE+sizeof(struct qeth_ipa_cmd)) / 256, - (IPA_PDU_HEADER_SIZE+sizeof(struct qeth_ipa_cmd)) % 256, + 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0xc1, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, - sizeof(struct qeth_ipa_cmd) / 256, - sizeof(struct qeth_ipa_cmd) % 256, - 0x00, - sizeof(struct qeth_ipa_cmd) / 256, - sizeof(struct qeth_ipa_cmd) % 256, - 0x05, - 0x77, 0x77, 0x77, 0x77, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x05, 0x77, 0x77, 0x77, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, - sizeof(struct qeth_ipa_cmd) / 256, - sizeof(struct qeth_ipa_cmd) % 256, - 0x00, 0x00, 0x00, 0x40, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, }; struct ipa_rc_msg { @@ -212,12 +201,10 @@ static const struct ipa_rc_msg qeth_ipa_rc_msg[] = { {IPA_RC_LAN_OFFLINE, "STRTLAN_LAN_DISABLED - LAN offline"}, {IPA_RC_VEPA_TO_VEB_TRANSITION, "Adj. switch disabled port mode RR"}, {IPA_RC_INVALID_IP_VERSION2, "Invalid IP version"}, - {IPA_RC_ENOMEM, "Memory problem"}, + /* default for qeth_get_ipa_msg(): */ {IPA_RC_FFFF, "Unknown Error"} }; - - const char *qeth_get_ipa_msg(enum qeth_ipa_return_codes rc) { int x; diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index c7fb14a61eed..f8c5d4a9be13 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -21,8 +21,6 @@ extern unsigned char IPA_PDU_HEADER[]; #define QETH_IPA_CMD_DEST_ADDR(buffer) (buffer + 0x2c) -#define IPA_CMD_LENGTH (IPA_PDU_HEADER_SIZE + sizeof(struct qeth_ipa_cmd)) - #define QETH_SEQ_NO_LENGTH 4 #define QETH_MPC_TOKEN_LENGTH 4 #define QETH_MCL_LENGTH 4 @@ -83,6 +81,7 @@ enum qeth_card_types { #define IS_OSD(card) ((card)->info.type == QETH_CARD_TYPE_OSD) #define IS_OSM(card) ((card)->info.type == QETH_CARD_TYPE_OSM) #define IS_OSN(card) ((card)->info.type == QETH_CARD_TYPE_OSN) +#define IS_OSX(card) ((card)->info.type == QETH_CARD_TYPE_OSX) #define IS_VM_NIC(card) ((card)->info.guestlan) #define QETH_MPC_DIFINFO_LEN_INDICATES_LINK_TYPE 0x18 @@ -231,7 +230,6 @@ enum qeth_ipa_return_codes { IPA_RC_LAN_OFFLINE = 0xe080, IPA_RC_VEPA_TO_VEB_TRANSITION = 0xe090, IPA_RC_INVALID_IP_VERSION2 = 0xf001, - IPA_RC_ENOMEM = 0xfffe, IPA_RC_FFFF = 0xffff }; /* for VNIC Characteristics */ @@ -419,12 +417,6 @@ enum qeth_ipa_checksum_bits { QETH_IPA_CHECKSUM_LP2LP = 0x0020 }; -/* IPA Assist checksum offload reply layout. */ -struct qeth_checksum_cmd { - __u32 supported; - __u32 enabled; -} __packed; - enum qeth_ipa_large_send_caps { QETH_IPA_LARGE_SEND_TCP = 0x00000001, }; @@ -440,7 +432,6 @@ struct qeth_ipacmd_setassparms { union { __u32 flags_32bit; struct qeth_ipa_caps caps; - struct qeth_checksum_cmd chksum; struct qeth_arp_cache_entry arp_entry; struct qeth_arp_query_data query_arp; struct qeth_tso_start_data tso; @@ -834,15 +825,10 @@ enum qeth_ipa_arp_return_codes { extern const char *qeth_get_ipa_msg(enum qeth_ipa_return_codes rc); extern const char *qeth_get_ipa_cmd_name(enum qeth_ipa_cmds cmd); -#define QETH_SETASS_BASE_LEN (IPA_PDU_HEADER_SIZE + \ - sizeof(struct qeth_ipacmd_hdr) + \ - sizeof(struct qeth_ipacmd_setassparms_hdr)) #define QETH_SETADP_BASE_LEN (sizeof(struct qeth_ipacmd_hdr) + \ sizeof(struct qeth_ipacmd_setadpparms_hdr)) #define QETH_SNMP_SETADP_CMDLENGTH 16 -#define QETH_ARP_DATA_SIZE 3968 -#define QETH_ARP_CMD_LEN (QETH_ARP_DATA_SIZE + 8) /* Helper functions */ #define IS_IPA_REPLY(cmd) ((cmd->hdr.initiator == IPA_CMD_INITIATOR_HOST) || \ (cmd->hdr.initiator == IPA_CMD_INITIATOR_OSA_REPLY)) diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c index 30f61608fa22..8b223cc2c19b 100644 --- a/drivers/s390/net/qeth_core_sys.c +++ b/drivers/s390/net/qeth_core_sys.c @@ -316,7 +316,7 @@ static ssize_t qeth_dev_recover_store(struct device *dev, if (!card) return -EINVAL; - if (card->state != CARD_STATE_UP) + if (!qeth_card_hw_is_reachable(card)) return -EPERM; i = simple_strtoul(buf, &tmp, 16); @@ -336,35 +336,36 @@ static ssize_t qeth_dev_performance_stats_show(struct device *dev, if (!card) return -EINVAL; - return sprintf(buf, "%i\n", card->options.performance_stats ? 1:0); + return sprintf(buf, "1\n"); } static ssize_t qeth_dev_performance_stats_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct qeth_card *card = dev_get_drvdata(dev); - char *tmp; - int i, rc = 0; + struct qeth_qdio_out_q *queue; + unsigned int i; + bool reset; + int rc; if (!card) return -EINVAL; - mutex_lock(&card->conf_mutex); - i = simple_strtoul(buf, &tmp, 16); - if ((i == 0) || (i == 1)) { - if (i == card->options.performance_stats) - goto out; - card->options.performance_stats = i; - if (i == 0) - memset(&card->perf_stats, 0, - sizeof(struct qeth_perf_stats)); - card->perf_stats.initial_rx_packets = card->stats.rx_packets; - card->perf_stats.initial_tx_packets = card->stats.tx_packets; - } else - rc = -EINVAL; -out: - mutex_unlock(&card->conf_mutex); - return rc ? rc : count; + rc = kstrtobool(buf, &reset); + if (rc) + return rc; + + if (reset) { + memset(&card->stats, 0, sizeof(card->stats)); + for (i = 0; i < card->qdio.no_out_queues; i++) { + queue = card->qdio.out_qs[i]; + if (!queue) + break; + memset(&queue->stats, 0, sizeof(queue->stats)); + } + } + + return count; } static DEVICE_ATTR(performance_stats, 0644, qeth_dev_performance_stats_show, diff --git a/drivers/s390/net/qeth_ethtool.c b/drivers/s390/net/qeth_ethtool.c new file mode 100644 index 000000000000..93a53fed4cf8 --- /dev/null +++ b/drivers/s390/net/qeth_ethtool.c @@ -0,0 +1,370 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 2018 + */ + +#define KMSG_COMPONENT "qeth" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/ethtool.h> +#include "qeth_core.h" + + +#define QETH_TXQ_STAT(_name, _stat) { \ + .name = _name, \ + .offset = offsetof(struct qeth_out_q_stats, _stat) \ +} + +#define QETH_CARD_STAT(_name, _stat) { \ + .name = _name, \ + .offset = offsetof(struct qeth_card_stats, _stat) \ +} + +struct qeth_stats { + char name[ETH_GSTRING_LEN]; + unsigned int offset; +}; + +static const struct qeth_stats txq_stats[] = { + QETH_TXQ_STAT("IO buffers", bufs), + QETH_TXQ_STAT("IO buffer elements", buf_elements), + QETH_TXQ_STAT("packed IO buffers", bufs_pack), + QETH_TXQ_STAT("skbs", tx_packets), + QETH_TXQ_STAT("packed skbs", skbs_pack), + QETH_TXQ_STAT("SG skbs", skbs_sg), + QETH_TXQ_STAT("HW csum skbs", skbs_csum), + QETH_TXQ_STAT("TSO skbs", skbs_tso), + QETH_TXQ_STAT("linearized skbs", skbs_linearized), + QETH_TXQ_STAT("linearized+error skbs", skbs_linearized_fail), + QETH_TXQ_STAT("TSO bytes", tso_bytes), + QETH_TXQ_STAT("Packing mode switches", packing_mode_switch), +}; + +static const struct qeth_stats card_stats[] = { + QETH_CARD_STAT("rx0 IO buffers", rx_bufs), + QETH_CARD_STAT("rx0 HW csum skbs", rx_skb_csum), + QETH_CARD_STAT("rx0 SG skbs", rx_sg_skbs), + QETH_CARD_STAT("rx0 SG page frags", rx_sg_frags), + QETH_CARD_STAT("rx0 SG page allocs", rx_sg_alloc_page), +}; + +#define TXQ_STATS_LEN ARRAY_SIZE(txq_stats) +#define CARD_STATS_LEN ARRAY_SIZE(card_stats) + +static void qeth_add_stat_data(u64 **dst, void *src, + const struct qeth_stats stats[], + unsigned int size) +{ + unsigned int i; + char *stat; + + for (i = 0; i < size; i++) { + stat = (char *)src + stats[i].offset; + **dst = *(u64 *)stat; + (*dst)++; + } +} + +static void qeth_add_stat_strings(u8 **data, const char *prefix, + const struct qeth_stats stats[], + unsigned int size) +{ + unsigned int i; + + for (i = 0; i < size; i++) { + snprintf(*data, ETH_GSTRING_LEN, "%s%s", prefix, stats[i].name); + *data += ETH_GSTRING_LEN; + } +} + +static int qeth_get_sset_count(struct net_device *dev, int stringset) +{ + struct qeth_card *card = dev->ml_priv; + + switch (stringset) { + case ETH_SS_STATS: + return CARD_STATS_LEN + + card->qdio.no_out_queues * TXQ_STATS_LEN; + default: + return -EINVAL; + } +} + +static void qeth_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct qeth_card *card = dev->ml_priv; + unsigned int i; + + qeth_add_stat_data(&data, &card->stats, card_stats, CARD_STATS_LEN); + for (i = 0; i < card->qdio.no_out_queues; i++) + qeth_add_stat_data(&data, &card->qdio.out_qs[i]->stats, + txq_stats, TXQ_STATS_LEN); +} + +static void qeth_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *param) +{ + struct qeth_card *card = dev->ml_priv; + + param->rx_max_pending = QDIO_MAX_BUFFERS_PER_Q; + param->rx_mini_max_pending = 0; + param->rx_jumbo_max_pending = 0; + param->tx_max_pending = QDIO_MAX_BUFFERS_PER_Q; + + param->rx_pending = card->qdio.in_buf_pool.buf_count; + param->rx_mini_pending = 0; + param->rx_jumbo_pending = 0; + param->tx_pending = QDIO_MAX_BUFFERS_PER_Q; +} + +static void qeth_get_strings(struct net_device *dev, u32 stringset, u8 *data) +{ + struct qeth_card *card = dev->ml_priv; + char prefix[ETH_GSTRING_LEN] = ""; + unsigned int i; + + switch (stringset) { + case ETH_SS_STATS: + qeth_add_stat_strings(&data, prefix, card_stats, + CARD_STATS_LEN); + for (i = 0; i < card->qdio.no_out_queues; i++) { + snprintf(prefix, ETH_GSTRING_LEN, "tx%u ", i); + qeth_add_stat_strings(&data, prefix, txq_stats, + TXQ_STATS_LEN); + } + break; + default: + WARN_ON(1); + break; + } +} + +static void qeth_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + struct qeth_card *card = dev->ml_priv; + + strlcpy(info->driver, IS_LAYER2(card) ? "qeth_l2" : "qeth_l3", + sizeof(info->driver)); + strlcpy(info->version, "1.0", sizeof(info->version)); + strlcpy(info->fw_version, card->info.mcl_level, + sizeof(info->fw_version)); + snprintf(info->bus_info, sizeof(info->bus_info), "%s/%s/%s", + CARD_RDEV_ID(card), CARD_WDEV_ID(card), CARD_DDEV_ID(card)); +} + +/* Helper function to fill 'advertising' and 'supported' which are the same. */ +/* Autoneg and full-duplex are supported and advertised unconditionally. */ +/* Always advertise and support all speeds up to specified, and only one */ +/* specified port type. */ +static void qeth_set_cmd_adv_sup(struct ethtool_link_ksettings *cmd, + int maxspeed, int porttype) +{ + ethtool_link_ksettings_zero_link_mode(cmd, supported); + ethtool_link_ksettings_zero_link_mode(cmd, advertising); + ethtool_link_ksettings_zero_link_mode(cmd, lp_advertising); + + ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg); + + switch (porttype) { + case PORT_TP: + ethtool_link_ksettings_add_link_mode(cmd, supported, TP); + ethtool_link_ksettings_add_link_mode(cmd, advertising, TP); + break; + case PORT_FIBRE: + ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); + ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE); + break; + default: + ethtool_link_ksettings_add_link_mode(cmd, supported, TP); + ethtool_link_ksettings_add_link_mode(cmd, advertising, TP); + WARN_ON_ONCE(1); + } + + /* partially does fall through, to also select lower speeds */ + switch (maxspeed) { + case SPEED_25000: + ethtool_link_ksettings_add_link_mode(cmd, supported, + 25000baseSR_Full); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 25000baseSR_Full); + break; + case SPEED_10000: + ethtool_link_ksettings_add_link_mode(cmd, supported, + 10000baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 10000baseT_Full); + /* fall through */ + case SPEED_1000: + ethtool_link_ksettings_add_link_mode(cmd, supported, + 1000baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 1000baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, supported, + 1000baseT_Half); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 1000baseT_Half); + /* fall through */ + case SPEED_100: + ethtool_link_ksettings_add_link_mode(cmd, supported, + 100baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 100baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, supported, + 100baseT_Half); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 100baseT_Half); + /* fall through */ + case SPEED_10: + ethtool_link_ksettings_add_link_mode(cmd, supported, + 10baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 10baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, supported, + 10baseT_Half); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 10baseT_Half); + break; + default: + ethtool_link_ksettings_add_link_mode(cmd, supported, + 10baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 10baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, supported, + 10baseT_Half); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 10baseT_Half); + WARN_ON_ONCE(1); + } +} + + +static int qeth_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) +{ + struct qeth_card *card = netdev->ml_priv; + enum qeth_link_types link_type; + struct carrier_info carrier_info; + int rc; + + if (IS_IQD(card) || IS_VM_NIC(card)) + link_type = QETH_LINK_TYPE_10GBIT_ETH; + else + link_type = card->info.link_type; + + cmd->base.duplex = DUPLEX_FULL; + cmd->base.autoneg = AUTONEG_ENABLE; + cmd->base.phy_address = 0; + cmd->base.mdio_support = 0; + cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; + + switch (link_type) { + case QETH_LINK_TYPE_FAST_ETH: + case QETH_LINK_TYPE_LANE_ETH100: + cmd->base.speed = SPEED_100; + cmd->base.port = PORT_TP; + break; + case QETH_LINK_TYPE_GBIT_ETH: + case QETH_LINK_TYPE_LANE_ETH1000: + cmd->base.speed = SPEED_1000; + cmd->base.port = PORT_FIBRE; + break; + case QETH_LINK_TYPE_10GBIT_ETH: + cmd->base.speed = SPEED_10000; + cmd->base.port = PORT_FIBRE; + break; + case QETH_LINK_TYPE_25GBIT_ETH: + cmd->base.speed = SPEED_25000; + cmd->base.port = PORT_FIBRE; + break; + default: + cmd->base.speed = SPEED_10; + cmd->base.port = PORT_TP; + } + qeth_set_cmd_adv_sup(cmd, cmd->base.speed, cmd->base.port); + + /* Check if we can obtain more accurate information. */ + /* If QUERY_CARD_INFO command is not supported or fails, */ + /* just return the heuristics that was filled above. */ + rc = qeth_query_card_info(card, &carrier_info); + if (rc == -EOPNOTSUPP) /* for old hardware, return heuristic */ + return 0; + if (rc) /* report error from the hardware operation */ + return rc; + /* on success, fill in the information got from the hardware */ + + netdev_dbg(netdev, + "card info: card_type=0x%02x, port_mode=0x%04x, port_speed=0x%08x\n", + carrier_info.card_type, + carrier_info.port_mode, + carrier_info.port_speed); + + /* Update attributes for which we've obtained more authoritative */ + /* information, leave the rest the way they where filled above. */ + switch (carrier_info.card_type) { + case CARD_INFO_TYPE_1G_COPPER_A: + case CARD_INFO_TYPE_1G_COPPER_B: + cmd->base.port = PORT_TP; + qeth_set_cmd_adv_sup(cmd, SPEED_1000, cmd->base.port); + break; + case CARD_INFO_TYPE_1G_FIBRE_A: + case CARD_INFO_TYPE_1G_FIBRE_B: + cmd->base.port = PORT_FIBRE; + qeth_set_cmd_adv_sup(cmd, SPEED_1000, cmd->base.port); + break; + case CARD_INFO_TYPE_10G_FIBRE_A: + case CARD_INFO_TYPE_10G_FIBRE_B: + cmd->base.port = PORT_FIBRE; + qeth_set_cmd_adv_sup(cmd, SPEED_10000, cmd->base.port); + break; + } + + switch (carrier_info.port_mode) { + case CARD_INFO_PORTM_FULLDUPLEX: + cmd->base.duplex = DUPLEX_FULL; + break; + case CARD_INFO_PORTM_HALFDUPLEX: + cmd->base.duplex = DUPLEX_HALF; + break; + } + + switch (carrier_info.port_speed) { + case CARD_INFO_PORTS_10M: + cmd->base.speed = SPEED_10; + break; + case CARD_INFO_PORTS_100M: + cmd->base.speed = SPEED_100; + break; + case CARD_INFO_PORTS_1G: + cmd->base.speed = SPEED_1000; + break; + case CARD_INFO_PORTS_10G: + cmd->base.speed = SPEED_10000; + break; + case CARD_INFO_PORTS_25G: + cmd->base.speed = SPEED_25000; + break; + } + + return 0; +} + +const struct ethtool_ops qeth_ethtool_ops = { + .get_link = ethtool_op_get_link, + .get_ringparam = qeth_get_ringparam, + .get_strings = qeth_get_strings, + .get_ethtool_stats = qeth_get_ethtool_stats, + .get_sset_count = qeth_get_sset_count, + .get_drvinfo = qeth_get_drvinfo, + .get_link_ksettings = qeth_get_link_ksettings, +}; + +const struct ethtool_ops qeth_osn_ethtool_ops = { + .get_strings = qeth_get_strings, + .get_ethtool_stats = qeth_get_ethtool_stats, + .get_sset_count = qeth_get_sset_count, + .get_drvinfo = qeth_get_drvinfo, +}; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 82f50cc30b0a..2c9714215775 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -35,7 +35,7 @@ static void qeth_l2_vnicc_init(struct qeth_card *card); static bool qeth_l2_vnicc_recover_timeout(struct qeth_card *card, u32 vnicc, u32 *timeout); -static int qeth_setdelmac_makerc(struct qeth_card *card, int retcode) +static int qeth_l2_setdelmac_makerc(struct qeth_card *card, u16 retcode) { int rc; @@ -62,9 +62,6 @@ static int qeth_setdelmac_makerc(struct qeth_card *card, int retcode) case IPA_RC_L2_MAC_NOT_FOUND: rc = -ENOENT; break; - case -ENOMEM: - rc = -ENOMEM; - break; default: rc = -EIO; break; @@ -72,6 +69,15 @@ static int qeth_setdelmac_makerc(struct qeth_card *card, int retcode) return rc; } +static int qeth_l2_send_setdelmac_cb(struct qeth_card *card, + struct qeth_reply *reply, + unsigned long data) +{ + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; + + return qeth_l2_setdelmac_makerc(card, cmd->hdr.return_code); +} + static int qeth_l2_send_setdelmac(struct qeth_card *card, __u8 *mac, enum qeth_ipa_cmds ipacmd) { @@ -85,8 +91,7 @@ static int qeth_l2_send_setdelmac(struct qeth_card *card, __u8 *mac, cmd = __ipa_cmd(iob); cmd->data.setdelmac.mac_length = ETH_ALEN; ether_addr_copy(cmd->data.setdelmac.mac, mac); - return qeth_setdelmac_makerc(card, qeth_send_ipa_cmd(card, iob, - NULL, NULL)); + return qeth_send_ipa_cmd(card, iob, qeth_l2_send_setdelmac_cb, NULL); } static int qeth_l2_send_setmac(struct qeth_card *card, __u8 *mac) @@ -169,9 +174,9 @@ static int qeth_l2_get_cast_type(struct qeth_card *card, struct sk_buff *skb) return RTN_UNICAST; } -static void qeth_l2_fill_header(struct qeth_card *card, struct qeth_hdr *hdr, - struct sk_buff *skb, int ipv, int cast_type, - unsigned int data_len) +static void qeth_l2_fill_header(struct qeth_qdio_out_q *queue, + struct qeth_hdr *hdr, struct sk_buff *skb, + int ipv, int cast_type, unsigned int data_len) { struct vlan_ethhdr *veth = vlan_eth_hdr(skb); @@ -183,8 +188,7 @@ static void qeth_l2_fill_header(struct qeth_card *card, struct qeth_hdr *hdr, hdr->hdr.l2.id = QETH_HEADER_TYPE_LAYER2; if (skb->ip_summed == CHECKSUM_PARTIAL) { qeth_tx_csum(skb, &hdr->hdr.l2.flags[1], ipv); - if (card->options.performance_stats) - card->perf_stats.tx_csum++; + QETH_TXQ_STAT_INC(queue, skbs_csum); } } @@ -205,7 +209,7 @@ static void qeth_l2_fill_header(struct qeth_card *card, struct qeth_hdr *hdr, } } -static int qeth_setdelvlan_makerc(struct qeth_card *card, int retcode) +static int qeth_l2_setdelvlan_makerc(struct qeth_card *card, u16 retcode) { if (retcode) QETH_CARD_TEXT_(card, 2, "err%04x", retcode); @@ -221,8 +225,6 @@ static int qeth_setdelvlan_makerc(struct qeth_card *card, int retcode) return -ENOENT; case IPA_RC_L2_VLAN_ID_NOT_ALLOWED: return -EPERM; - case -ENOMEM: - return -ENOMEM; default: return -EIO; } @@ -240,9 +242,8 @@ static int qeth_l2_send_setdelvlan_cb(struct qeth_card *card, cmd->data.setdelvlan.vlan_id, CARD_DEVID(card), cmd->hdr.return_code); QETH_CARD_TEXT_(card, 2, "L2VL%4x", cmd->hdr.command); - QETH_CARD_TEXT_(card, 2, "err%d", cmd->hdr.return_code); } - return 0; + return qeth_l2_setdelvlan_makerc(card, cmd->hdr.return_code); } static int qeth_l2_send_setdelvlan(struct qeth_card *card, __u16 i, @@ -257,8 +258,7 @@ static int qeth_l2_send_setdelvlan(struct qeth_card *card, __u16 i, return -ENOMEM; cmd = __ipa_cmd(iob); cmd->data.setdelvlan.vlan_id = i; - return qeth_setdelvlan_makerc(card, qeth_send_ipa_cmd(card, iob, - qeth_l2_send_setdelvlan_cb, NULL)); + return qeth_send_ipa_cmd(card, iob, qeth_l2_send_setdelvlan_cb, NULL); } static int qeth_l2_vlan_rx_add_vid(struct net_device *dev, @@ -319,6 +319,8 @@ static void qeth_l2_stop_card(struct qeth_card *card, int recovery_mode) qeth_clear_cmd_buffers(&card->read); qeth_clear_cmd_buffers(&card->write); } + + flush_workqueue(card->event_wq); } static int qeth_l2_process_inbound_buffer(struct qeth_card *card, @@ -366,8 +368,8 @@ static int qeth_l2_process_inbound_buffer(struct qeth_card *card, } work_done++; budget--; - card->stats.rx_packets++; - card->stats.rx_bytes += len; + QETH_CARD_STAT_INC(card, rx_packets); + QETH_CARD_STAT_ADD(card, rx_bytes, len); } return work_done; } @@ -391,7 +393,7 @@ static int qeth_l2_request_initial_mac(struct qeth_card *card) if (!IS_OSN(card)) { rc = qeth_setadpparms_change_macaddr(card); - if (!rc && is_valid_ether_addr(card->dev->dev_addr)) + if (!rc) goto out; QETH_DBF_MESSAGE(2, "READ_MAC Assist failed on device %x: %#x\n", CARD_DEVID(card), rc); @@ -423,7 +425,7 @@ static int qeth_l2_validate_addr(struct net_device *dev) { struct qeth_card *card = dev->ml_priv; - if (IS_OSN(card) || (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED)) + if (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED) return eth_validate_addr(dev); QETH_CARD_TEXT(card, 4, "nomacadr"); @@ -439,9 +441,7 @@ static int qeth_l2_set_mac_address(struct net_device *dev, void *p) QETH_CARD_TEXT(card, 3, "setmac"); - if (card->info.type == QETH_CARD_TYPE_OSN || - card->info.type == QETH_CARD_TYPE_OSM || - card->info.type == QETH_CARD_TYPE_OSX) { + if (IS_OSM(card) || IS_OSX(card)) { QETH_CARD_TEXT(card, 3, "setmcTYP"); return -EOPNOTSUPP; } @@ -535,9 +535,6 @@ static void qeth_l2_set_rx_mode(struct net_device *dev) int i; int rc; - if (card->info.type == QETH_CARD_TYPE_OSN) - return; - QETH_CARD_TEXT(card, 3, "setmulti"); spin_lock_bh(&card->mclock); @@ -623,17 +620,13 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb, int tx_bytes = skb->len; int rc; + queue = qeth_get_tx_queue(card, skb, ipv, cast_type); + if (card->state != CARD_STATE_UP) { - card->stats.tx_carrier_errors++; + QETH_TXQ_STAT_INC(queue, tx_carrier_errors); goto tx_drop; } - queue = qeth_get_tx_queue(card, skb, ipv, cast_type); - - if (card->options.performance_stats) { - card->perf_stats.outbound_cnt++; - card->perf_stats.outbound_start_time = qeth_get_micros(); - } netif_stop_queue(dev); if (IS_OSN(card)) @@ -643,11 +636,8 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb, qeth_l2_fill_header); if (!rc) { - card->stats.tx_packets++; - card->stats.tx_bytes += tx_bytes; - if (card->options.performance_stats) - card->perf_stats.outbound_time += qeth_get_micros() - - card->perf_stats.outbound_start_time; + QETH_TXQ_STAT_INC(queue, tx_packets); + QETH_TXQ_STAT_ADD(queue, tx_bytes, tx_bytes); netif_wake_queue(dev); return NETDEV_TX_OK; } else if (rc == -EBUSY) { @@ -655,8 +645,8 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb, } /* else fall through */ tx_drop: - card->stats.tx_dropped++; - card->stats.tx_errors++; + QETH_TXQ_STAT_INC(queue, tx_dropped); + QETH_TXQ_STAT_INC(queue, tx_errors); dev_kfree_skb_any(skb); netif_wake_queue(dev); return NETDEV_TX_OK; @@ -695,30 +685,16 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev) if (cgdev->state == CCWGROUP_ONLINE) qeth_l2_set_offline(cgdev); + + cancel_work_sync(&card->close_dev_work); if (qeth_netdev_is_registered(card->dev)) unregister_netdev(card->dev); } -static const struct ethtool_ops qeth_l2_ethtool_ops = { - .get_link = ethtool_op_get_link, - .get_strings = qeth_core_get_strings, - .get_ethtool_stats = qeth_core_get_ethtool_stats, - .get_sset_count = qeth_core_get_sset_count, - .get_drvinfo = qeth_core_get_drvinfo, - .get_link_ksettings = qeth_core_ethtool_get_link_ksettings, -}; - -static const struct ethtool_ops qeth_l2_osn_ops = { - .get_strings = qeth_core_get_strings, - .get_ethtool_stats = qeth_core_get_ethtool_stats, - .get_sset_count = qeth_core_get_sset_count, - .get_drvinfo = qeth_core_get_drvinfo, -}; - static const struct net_device_ops qeth_l2_netdev_ops = { .ndo_open = qeth_open, .ndo_stop = qeth_stop, - .ndo_get_stats = qeth_get_stats, + .ndo_get_stats64 = qeth_get_stats64, .ndo_start_xmit = qeth_l2_hard_start_xmit, .ndo_features_check = qeth_features_check, .ndo_validate_addr = qeth_l2_validate_addr, @@ -732,20 +708,29 @@ static const struct net_device_ops qeth_l2_netdev_ops = { .ndo_set_features = qeth_set_features }; +static const struct net_device_ops qeth_osn_netdev_ops = { + .ndo_open = qeth_open, + .ndo_stop = qeth_stop, + .ndo_get_stats64 = qeth_get_stats64, + .ndo_start_xmit = qeth_l2_hard_start_xmit, + .ndo_validate_addr = eth_validate_addr, + .ndo_tx_timeout = qeth_tx_timeout, +}; + static int qeth_l2_setup_netdev(struct qeth_card *card, bool carrier_ok) { int rc; - card->dev->priv_flags |= IFF_UNICAST_FLT; - card->dev->netdev_ops = &qeth_l2_netdev_ops; - if (card->info.type == QETH_CARD_TYPE_OSN) { - card->dev->ethtool_ops = &qeth_l2_osn_ops; + if (IS_OSN(card)) { + card->dev->netdev_ops = &qeth_osn_netdev_ops; card->dev->flags |= IFF_NOARP; - } else { - card->dev->ethtool_ops = &qeth_l2_ethtool_ops; - card->dev->needed_headroom = sizeof(struct qeth_hdr); + goto add_napi; } + card->dev->needed_headroom = sizeof(struct qeth_hdr); + card->dev->netdev_ops = &qeth_l2_netdev_ops; + card->dev->priv_flags |= IFF_UNICAST_FLT; + if (IS_OSM(card)) { card->dev->features |= NETIF_F_VLAN_CHALLENGED; } else { @@ -786,6 +771,7 @@ static int qeth_l2_setup_netdev(struct qeth_card *card, bool carrier_ok) PAGE_SIZE * (QDIO_MAX_ELEMENTS_PER_BUFFER - 1)); } +add_napi: netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT); rc = register_netdev(card->dev); if (!rc && carrier_ok) @@ -1120,20 +1106,14 @@ static int qeth_osn_send_control_data(struct qeth_card *card, int len, } static int qeth_osn_send_ipa_cmd(struct qeth_card *card, - struct qeth_cmd_buffer *iob, int data_len) + struct qeth_cmd_buffer *iob) { - u16 s1, s2; + u16 length; QETH_CARD_TEXT(card, 4, "osndipa"); - qeth_prepare_ipa_cmd(card, iob); - s1 = (u16)(IPA_PDU_HEADER_SIZE + data_len); - s2 = (u16)data_len; - memcpy(QETH_IPA_PDU_LEN_TOTAL(iob->data), &s1, 2); - memcpy(QETH_IPA_PDU_LEN_PDU1(iob->data), &s2, 2); - memcpy(QETH_IPA_PDU_LEN_PDU2(iob->data), &s2, 2); - memcpy(QETH_IPA_PDU_LEN_PDU3(iob->data), &s2, 2); - return qeth_osn_send_control_data(card, s1, iob); + memcpy(&length, QETH_IPA_PDU_LEN_TOTAL(iob->data), 2); + return qeth_osn_send_control_data(card, length, iob); } int qeth_osn_assist(struct net_device *dev, void *data, int data_len) @@ -1150,8 +1130,9 @@ int qeth_osn_assist(struct net_device *dev, void *data, int data_len) if (!qeth_card_hw_is_reachable(card)) return -ENODEV; iob = qeth_wait_for_buffer(&card->write); + qeth_prepare_ipa_cmd(card, iob, (u16) data_len); memcpy(__ipa_cmd(iob), data, data_len); - return qeth_osn_send_ipa_cmd(card, iob, data_len); + return qeth_osn_send_ipa_cmd(card, iob); } EXPORT_SYMBOL(qeth_osn_assist); @@ -1330,7 +1311,7 @@ static void qeth_bridge_state_change(struct qeth_card *card, data->card = card; memcpy(&data->qports, qports, sizeof(struct qeth_sbp_state_change) + extrasize); - queue_work(qeth_wq, &data->worker); + queue_work(card->event_wq, &data->worker); } struct qeth_bridge_host_data { @@ -1402,14 +1383,12 @@ static void qeth_bridge_host_event(struct qeth_card *card, data->card = card; memcpy(&data->hostevs, hostevs, sizeof(struct qeth_ipacmd_addr_change) + extrasize); - queue_work(qeth_wq, &data->worker); + queue_work(card->event_wq, &data->worker); } /* SETBRIDGEPORT support; sending commands */ struct _qeth_sbp_cbctl { - u16 ipa_rc; - u16 cmd_rc; union { u32 supported; struct { @@ -1419,23 +1398,21 @@ struct _qeth_sbp_cbctl { } data; }; -/** - * qeth_bridgeport_makerc() - derive "traditional" error from hardware codes. - * @card: qeth_card structure pointer, for debug messages. - * @cbctl: state structure with hardware return codes. - * @setcmd: IPA command code - * - * Returns negative errno-compatible error indication or 0 on success. - */ static int qeth_bridgeport_makerc(struct qeth_card *card, - struct _qeth_sbp_cbctl *cbctl, enum qeth_ipa_sbp_cmd setcmd) + struct qeth_ipa_cmd *cmd) { + struct qeth_ipacmd_setbridgeport *sbp = &cmd->data.sbp; + enum qeth_ipa_sbp_cmd setcmd = sbp->hdr.command_code; + u16 ipa_rc = cmd->hdr.return_code; + u16 sbp_rc = sbp->hdr.return_code; int rc; - int is_iqd = (card->info.type == QETH_CARD_TYPE_IQD); - if ((is_iqd && (cbctl->ipa_rc == IPA_RC_SUCCESS)) || - (!is_iqd && (cbctl->ipa_rc == cbctl->cmd_rc))) - switch (cbctl->cmd_rc) { + if (ipa_rc == IPA_RC_SUCCESS && sbp_rc == IPA_RC_SUCCESS) + return 0; + + if ((IS_IQD(card) && ipa_rc == IPA_RC_SUCCESS) || + (!IS_IQD(card) && ipa_rc == sbp_rc)) { + switch (sbp_rc) { case IPA_RC_SUCCESS: rc = 0; break; @@ -1499,8 +1476,8 @@ static int qeth_bridgeport_makerc(struct qeth_card *card, default: rc = -EIO; } - else - switch (cbctl->ipa_rc) { + } else { + switch (ipa_rc) { case IPA_RC_NOTSUPP: rc = -EOPNOTSUPP; break; @@ -1510,10 +1487,11 @@ static int qeth_bridgeport_makerc(struct qeth_card *card, default: rc = -EIO; } + } if (rc) { - QETH_CARD_TEXT_(card, 2, "SBPi%04x", cbctl->ipa_rc); - QETH_CARD_TEXT_(card, 2, "SBPc%04x", cbctl->cmd_rc); + QETH_CARD_TEXT_(card, 2, "SBPi%04x", ipa_rc); + QETH_CARD_TEXT_(card, 2, "SBPc%04x", sbp_rc); } return rc; } @@ -1545,15 +1523,15 @@ static int qeth_bridgeport_query_support_cb(struct qeth_card *card, { struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; struct _qeth_sbp_cbctl *cbctl = (struct _qeth_sbp_cbctl *)reply->param; + int rc; + QETH_CARD_TEXT(card, 2, "brqsupcb"); - cbctl->ipa_rc = cmd->hdr.return_code; - cbctl->cmd_rc = cmd->data.sbp.hdr.return_code; - if ((cbctl->ipa_rc == 0) && (cbctl->cmd_rc == 0)) { - cbctl->data.supported = - cmd->data.sbp.data.query_cmds_supp.supported_cmds; - } else { - cbctl->data.supported = 0; - } + rc = qeth_bridgeport_makerc(card, cmd); + if (rc) + return rc; + + cbctl->data.supported = + cmd->data.sbp.data.query_cmds_supp.supported_cmds; return 0; } @@ -1574,12 +1552,11 @@ static void qeth_bridgeport_query_support(struct qeth_card *card) sizeof(struct qeth_sbp_query_cmds_supp)); if (!iob) return; + if (qeth_send_ipa_cmd(card, iob, qeth_bridgeport_query_support_cb, - (void *)&cbctl) || - qeth_bridgeport_makerc(card, &cbctl, - IPA_SBP_QUERY_COMMANDS_SUPPORTED)) { - /* non-zero makerc signifies failure, and produce messages */ + &cbctl)) { card->options.sbp.role = QETH_SBP_ROLE_NONE; + card->options.sbp.supported_funcs = 0; return; } card->options.sbp.supported_funcs = cbctl.data.supported; @@ -1591,16 +1568,16 @@ static int qeth_bridgeport_query_ports_cb(struct qeth_card *card, struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; struct qeth_sbp_query_ports *qports = &cmd->data.sbp.data.query_ports; struct _qeth_sbp_cbctl *cbctl = (struct _qeth_sbp_cbctl *)reply->param; + int rc; QETH_CARD_TEXT(card, 2, "brqprtcb"); - cbctl->ipa_rc = cmd->hdr.return_code; - cbctl->cmd_rc = cmd->data.sbp.hdr.return_code; - if ((cbctl->ipa_rc != 0) || (cbctl->cmd_rc != 0)) - return 0; + rc = qeth_bridgeport_makerc(card, cmd); + if (rc) + return rc; + if (qports->entry_length != sizeof(struct qeth_sbp_port_entry)) { - cbctl->cmd_rc = 0xffff; QETH_CARD_TEXT_(card, 2, "SBPs%04x", qports->entry_length); - return 0; + return -EINVAL; } /* first entry contains the state of the local port */ if (qports->num_entries > 0) { @@ -1625,7 +1602,6 @@ static int qeth_bridgeport_query_ports_cb(struct qeth_card *card, int qeth_bridgeport_query_ports(struct qeth_card *card, enum qeth_sbp_roles *role, enum qeth_sbp_states *state) { - int rc = 0; struct qeth_cmd_buffer *iob; struct _qeth_sbp_cbctl cbctl = { .data = { @@ -1642,22 +1618,18 @@ int qeth_bridgeport_query_ports(struct qeth_card *card, iob = qeth_sbp_build_cmd(card, IPA_SBP_QUERY_BRIDGE_PORTS, 0); if (!iob) return -ENOMEM; - rc = qeth_send_ipa_cmd(card, iob, qeth_bridgeport_query_ports_cb, - (void *)&cbctl); - if (rc < 0) - return rc; - return qeth_bridgeport_makerc(card, &cbctl, IPA_SBP_QUERY_BRIDGE_PORTS); + + return qeth_send_ipa_cmd(card, iob, qeth_bridgeport_query_ports_cb, + &cbctl); } static int qeth_bridgeport_set_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *)data; - struct _qeth_sbp_cbctl *cbctl = (struct _qeth_sbp_cbctl *)reply->param; + QETH_CARD_TEXT(card, 2, "brsetrcb"); - cbctl->ipa_rc = cmd->hdr.return_code; - cbctl->cmd_rc = cmd->data.sbp.hdr.return_code; - return 0; + return qeth_bridgeport_makerc(card, cmd); } /** @@ -1669,10 +1641,8 @@ static int qeth_bridgeport_set_cb(struct qeth_card *card, */ int qeth_bridgeport_setrole(struct qeth_card *card, enum qeth_sbp_roles role) { - int rc = 0; int cmdlength; struct qeth_cmd_buffer *iob; - struct _qeth_sbp_cbctl cbctl; enum qeth_ipa_sbp_cmd setcmd; QETH_CARD_TEXT(card, 2, "brsetrol"); @@ -1697,11 +1667,8 @@ int qeth_bridgeport_setrole(struct qeth_card *card, enum qeth_sbp_roles role) iob = qeth_sbp_build_cmd(card, setcmd, cmdlength); if (!iob) return -ENOMEM; - rc = qeth_send_ipa_cmd(card, iob, qeth_bridgeport_set_cb, - (void *)&cbctl); - if (rc < 0) - return rc; - return qeth_bridgeport_makerc(card, &cbctl, setcmd); + + return qeth_send_ipa_cmd(card, iob, qeth_bridgeport_set_cb, NULL); } /** @@ -1805,7 +1772,7 @@ static bool qeth_bridgeport_is_in_use(struct qeth_card *card) /* VNIC Characteristics support */ /* handle VNICC IPA command return codes; convert to error codes */ -static int qeth_l2_vnicc_makerc(struct qeth_card *card, int ipa_rc) +static int qeth_l2_vnicc_makerc(struct qeth_card *card, u16 ipa_rc) { int rc; @@ -1863,7 +1830,7 @@ static int qeth_l2_vnicc_request_cb(struct qeth_card *card, QETH_CARD_TEXT(card, 2, "vniccrcb"); if (cmd->hdr.return_code) - return 0; + return qeth_l2_vnicc_makerc(card, cmd->hdr.return_code); /* return results to caller */ card->options.vnicc.sup_chars = rep->hdr.sup; card->options.vnicc.cur_chars = rep->hdr.cur; @@ -1884,7 +1851,6 @@ static int qeth_l2_vnicc_request(struct qeth_card *card, struct qeth_ipacmd_vnicc *req; struct qeth_cmd_buffer *iob; struct qeth_ipa_cmd *cmd; - int rc; QETH_CARD_TEXT(card, 2, "vniccreq"); @@ -1927,10 +1893,7 @@ static int qeth_l2_vnicc_request(struct qeth_card *card, } /* send request */ - rc = qeth_send_ipa_cmd(card, iob, qeth_l2_vnicc_request_cb, - (void *) cbctl); - - return qeth_l2_vnicc_makerc(card, rc); + return qeth_send_ipa_cmd(card, iob, qeth_l2_vnicc_request_cb, cbctl); } /* VNICC query VNIC characteristics request */ diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 59535ecb1487..07c3149e228c 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -253,8 +253,7 @@ static int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr) } else rc = qeth_l3_register_addr_entry(card, addr); - if (!rc || (rc == IPA_RC_DUPLICATE_IP_ADDRESS) || - (rc == IPA_RC_LAN_OFFLINE)) { + if (!rc || rc == -EADDRINUSE || rc == -ENETDOWN) { addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING; if (addr->ref_counter < 1) { qeth_l3_deregister_addr_entry(card, addr); @@ -338,10 +337,28 @@ static void qeth_l3_recover_ip(struct qeth_card *card) } +static int qeth_l3_setdelip_cb(struct qeth_card *card, struct qeth_reply *reply, + unsigned long data) +{ + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; + + switch (cmd->hdr.return_code) { + case IPA_RC_SUCCESS: + return 0; + case IPA_RC_DUPLICATE_IP_ADDRESS: + return -EADDRINUSE; + case IPA_RC_MC_ADDR_NOT_FOUND: + return -ENOENT; + case IPA_RC_LAN_OFFLINE: + return -ENETDOWN; + default: + return -EIO; + } +} + static int qeth_l3_send_setdelmc(struct qeth_card *card, struct qeth_ipaddr *addr, int ipacmd) { - int rc; struct qeth_cmd_buffer *iob; struct qeth_ipa_cmd *cmd; @@ -358,9 +375,7 @@ static int qeth_l3_send_setdelmc(struct qeth_card *card, else memcpy(&cmd->data.setdelipm.ip4, &addr->u.a4.addr, 4); - rc = qeth_send_ipa_cmd(card, iob, NULL, NULL); - - return rc; + return qeth_send_ipa_cmd(card, iob, qeth_l3_setdelip_cb, NULL); } static void qeth_l3_fill_netmask(u8 *netmask, unsigned int len) @@ -422,7 +437,7 @@ static int qeth_l3_send_setdelip(struct qeth_card *card, cmd->data.setdelip4.flags = flags; } - return qeth_send_ipa_cmd(card, iob, NULL, NULL); + return qeth_send_ipa_cmd(card, iob, qeth_l3_setdelip_cb, NULL); } static int qeth_l3_send_setrouting(struct qeth_card *card, @@ -942,12 +957,13 @@ static int qeth_l3_start_ipassists(struct qeth_card *card) static int qeth_l3_iqd_read_initial_mac_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; - cmd = (struct qeth_ipa_cmd *) data; - if (cmd->hdr.return_code == 0) - ether_addr_copy(card->dev->dev_addr, - cmd->data.create_destroy_addr.unique_id); + if (cmd->hdr.return_code) + return -EIO; + + ether_addr_copy(card->dev->dev_addr, + cmd->data.create_destroy_addr.unique_id); return 0; } @@ -975,19 +991,18 @@ static int qeth_l3_iqd_read_initial_mac(struct qeth_card *card) static int qeth_l3_get_unique_id_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; - cmd = (struct qeth_ipa_cmd *) data; - if (cmd->hdr.return_code == 0) + if (cmd->hdr.return_code == 0) { card->info.unique_id = *((__u16 *) &cmd->data.create_destroy_addr.unique_id[6]); - else { - card->info.unique_id = UNIQUE_ID_IF_CREATE_ADDR_FAILED | - UNIQUE_ID_NOT_BY_CARD; - dev_warn(&card->gdev->dev, "The network adapter failed to " - "generate a unique ID\n"); + return 0; } - return 0; + + card->info.unique_id = UNIQUE_ID_IF_CREATE_ADDR_FAILED | + UNIQUE_ID_NOT_BY_CARD; + dev_warn(&card->gdev->dev, "The network adapter failed to generate a unique ID\n"); + return -EIO; } static int qeth_l3_get_unique_id(struct qeth_card *card) @@ -1070,7 +1085,7 @@ qeth_diags_trace_cb(struct qeth_card *card, struct qeth_reply *reply, cmd->data.diagass.action, CARD_DEVID(card)); } - return 0; + return rc ? -EIO : 0; } static int @@ -1300,12 +1315,11 @@ static void qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb, ip_eth_mc_map(ip_hdr(skb)->daddr, tg_addr); else ipv6_eth_mc_map(&ipv6_hdr(skb)->daddr, tg_addr); - - card->stats.multicast++; + QETH_CARD_STAT_INC(card, rx_multicast); break; case QETH_CAST_BROADCAST: ether_addr_copy(tg_addr, card->dev->broadcast); - card->stats.multicast++; + QETH_CARD_STAT_INC(card, rx_multicast); break; default: if (card->options.sniffer) @@ -1386,8 +1400,8 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card, } work_done++; budget--; - card->stats.rx_packets++; - card->stats.rx_bytes += len; + QETH_CARD_STAT_INC(card, rx_packets); + QETH_CARD_STAT_ADD(card, rx_bytes, len); } return work_done; } @@ -1428,6 +1442,8 @@ static void qeth_l3_stop_card(struct qeth_card *card, int recovery_mode) qeth_clear_cmd_buffers(&card->read); qeth_clear_cmd_buffers(&card->write); } + + flush_workqueue(card->event_wq); } /* @@ -1479,14 +1495,14 @@ static void qeth_l3_set_rx_mode(struct net_device *dev) switch (addr->disp_flag) { case QETH_DISP_ADDR_DELETE: rc = qeth_l3_deregister_addr_entry(card, addr); - if (!rc || rc == IPA_RC_MC_ADDR_NOT_FOUND) { + if (!rc || rc == -ENOENT) { hash_del(&addr->hnode); kfree(addr); } break; case QETH_DISP_ADDR_ADD: rc = qeth_l3_register_addr_entry(card, addr); - if (rc && rc != IPA_RC_LAN_OFFLINE) { + if (rc && rc != -ENETDOWN) { hash_del(&addr->hnode); kfree(addr); break; @@ -1507,7 +1523,7 @@ static void qeth_l3_set_rx_mode(struct net_device *dev) qeth_l3_handle_promisc_mode(card); } -static int qeth_l3_arp_makerc(int rc) +static int qeth_l3_arp_makerc(u16 rc) { switch (rc) { case IPA_RC_SUCCESS: @@ -1524,8 +1540,18 @@ static int qeth_l3_arp_makerc(int rc) } } +static int qeth_l3_arp_cmd_cb(struct qeth_card *card, struct qeth_reply *reply, + unsigned long data) +{ + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; + + qeth_setassparms_cb(card, reply, data); + return qeth_l3_arp_makerc(cmd->hdr.return_code); +} + static int qeth_l3_arp_set_no_entries(struct qeth_card *card, int no_entries) { + struct qeth_cmd_buffer *iob; int rc; QETH_CARD_TEXT(card, 3, "arpstnoe"); @@ -1540,13 +1566,19 @@ static int qeth_l3_arp_set_no_entries(struct qeth_card *card, int no_entries) if (!qeth_is_supported(card, IPA_ARP_PROCESSING)) { return -EOPNOTSUPP; } - rc = qeth_send_simple_setassparms(card, IPA_ARP_PROCESSING, - IPA_CMD_ASS_ARP_SET_NO_ENTRIES, - no_entries); + + iob = qeth_get_setassparms_cmd(card, IPA_ARP_PROCESSING, + IPA_CMD_ASS_ARP_SET_NO_ENTRIES, 4, + QETH_PROT_IPV4); + if (!iob) + return -ENOMEM; + + __ipa_cmd(iob)->data.setassparms.data.flags_32bit = (u32) no_entries; + rc = qeth_send_ipa_cmd(card, iob, qeth_l3_arp_cmd_cb, NULL); if (rc) QETH_DBF_MESSAGE(2, "Could not set number of ARP entries on device %x: %#x\n", CARD_DEVID(card), rc); - return qeth_l3_arp_makerc(rc); + return rc; } static __u32 get_arp_entry_size(struct qeth_card *card, @@ -1597,7 +1629,6 @@ static int qeth_l3_arp_query_cb(struct qeth_card *card, struct qeth_ipa_cmd *cmd; struct qeth_arp_query_data *qdata; struct qeth_arp_query_info *qinfo; - int i; int e; int entrybytes_done; int stripped_bytes; @@ -1611,13 +1642,13 @@ static int qeth_l3_arp_query_cb(struct qeth_card *card, if (cmd->hdr.return_code) { QETH_CARD_TEXT(card, 4, "arpcberr"); QETH_CARD_TEXT_(card, 4, "%i", cmd->hdr.return_code); - return 0; + return qeth_l3_arp_makerc(cmd->hdr.return_code); } if (cmd->data.setassparms.hdr.return_code) { cmd->hdr.return_code = cmd->data.setassparms.hdr.return_code; QETH_CARD_TEXT(card, 4, "setaperr"); QETH_CARD_TEXT_(card, 4, "%i", cmd->hdr.return_code); - return 0; + return qeth_l3_arp_makerc(cmd->hdr.return_code); } qdata = &cmd->data.setassparms.data.query_arp; QETH_CARD_TEXT_(card, 4, "anoen%i", qdata->no_entries); @@ -1644,9 +1675,9 @@ static int qeth_l3_arp_query_cb(struct qeth_card *card, break; if ((qinfo->udata_len - qinfo->udata_offset) < esize) { - QETH_CARD_TEXT_(card, 4, "qaer3%i", -ENOMEM); - cmd->hdr.return_code = IPA_RC_ENOMEM; - goto out_error; + QETH_CARD_TEXT_(card, 4, "qaer3%i", -ENOSPC); + memset(qinfo->udata, 0, 4); + return -ENOSPC; } memcpy(qinfo->udata + qinfo->udata_offset, @@ -1669,10 +1700,6 @@ static int qeth_l3_arp_query_cb(struct qeth_card *card, memcpy(qinfo->udata + QETH_QARP_MASK_OFFSET, &qdata->reply_bits, 2); QETH_CARD_TEXT_(card, 4, "rc%i", 0); return 0; -out_error: - i = 0; - memcpy(qinfo->udata, &i, 4); - return 0; } static int qeth_l3_query_arp_cache_info(struct qeth_card *card, @@ -1694,13 +1721,11 @@ static int qeth_l3_query_arp_cache_info(struct qeth_card *card, return -ENOMEM; cmd = __ipa_cmd(iob); cmd->data.setassparms.data.query_arp.request_bits = 0x000F; - rc = qeth_send_control_data(card, - QETH_SETASS_BASE_LEN + QETH_ARP_CMD_LEN, - iob, qeth_l3_arp_query_cb, qinfo); + rc = qeth_send_ipa_cmd(card, iob, qeth_l3_arp_query_cb, qinfo); if (rc) QETH_DBF_MESSAGE(2, "Error while querying ARP cache on device %x: %#x\n", CARD_DEVID(card), rc); - return qeth_l3_arp_makerc(rc); + return rc; } static int qeth_l3_arp_query(struct qeth_card *card, char __user *udata) @@ -1782,16 +1807,16 @@ static int qeth_l3_arp_modify_entry(struct qeth_card *card, cmd_entry = &__ipa_cmd(iob)->data.setassparms.data.arp_entry; ether_addr_copy(cmd_entry->macaddr, entry->macaddr); memcpy(cmd_entry->ipaddr, entry->ipaddr, 4); - rc = qeth_send_ipa_cmd(card, iob, qeth_setassparms_cb, NULL); + rc = qeth_send_ipa_cmd(card, iob, qeth_l3_arp_cmd_cb, NULL); if (rc) QETH_DBF_MESSAGE(2, "Could not modify (cmd: %#x) ARP entry on device %x: %#x\n", arp_cmd, CARD_DEVID(card), rc); - - return qeth_l3_arp_makerc(rc); + return rc; } static int qeth_l3_arp_flush_cache(struct qeth_card *card) { + struct qeth_cmd_buffer *iob; int rc; QETH_CARD_TEXT(card, 3, "arpflush"); @@ -1806,12 +1831,18 @@ static int qeth_l3_arp_flush_cache(struct qeth_card *card) if (!qeth_is_supported(card, IPA_ARP_PROCESSING)) { return -EOPNOTSUPP; } - rc = qeth_send_simple_setassparms(card, IPA_ARP_PROCESSING, - IPA_CMD_ASS_ARP_FLUSH_CACHE, 0); + + iob = qeth_get_setassparms_cmd(card, IPA_ARP_PROCESSING, + IPA_CMD_ASS_ARP_FLUSH_CACHE, 0, + QETH_PROT_IPV4); + if (!iob) + return -ENOMEM; + + rc = qeth_send_ipa_cmd(card, iob, qeth_l3_arp_cmd_cb, NULL); if (rc) QETH_DBF_MESSAGE(2, "Could not flush ARP cache on device %x: %#x\n", CARD_DEVID(card), rc); - return qeth_l3_arp_makerc(rc); + return rc; } static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) @@ -1913,12 +1944,13 @@ static u8 qeth_l3_cast_type_to_flag(int cast_type) return QETH_CAST_UNICAST; } -static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr, - struct sk_buff *skb, int ipv, int cast_type, - unsigned int data_len) +static void qeth_l3_fill_header(struct qeth_qdio_out_q *queue, + struct qeth_hdr *hdr, struct sk_buff *skb, + int ipv, int cast_type, unsigned int data_len) { struct qeth_hdr_layer3 *l3_hdr = &hdr->hdr.l3; struct vlan_ethhdr *veth = vlan_eth_hdr(skb); + struct qeth_card *card = queue->card; hdr->hdr.l3.length = data_len; @@ -1940,8 +1972,7 @@ static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr, /* some HW requires combined L3+L4 csum offload: */ if (ipv == 4) hdr->hdr.l3.ext_flags |= QETH_HDR_EXT_CSUM_HDR_REQ; - if (card->options.performance_stats) - card->perf_stats.tx_csum++; + QETH_TXQ_STAT_INC(queue, skbs_csum); } } @@ -2042,6 +2073,8 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb, int tx_bytes = skb->len; int rc; + queue = qeth_get_tx_queue(card, skb, ipv, cast_type); + if (IS_IQD(card)) { if (card->options.sniffer) goto tx_drop; @@ -2052,19 +2085,13 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb, } if (card->state != CARD_STATE_UP) { - card->stats.tx_carrier_errors++; + QETH_TXQ_STAT_INC(queue, tx_carrier_errors); goto tx_drop; } if (cast_type == RTN_BROADCAST && !card->info.broadcast_capable) goto tx_drop; - queue = qeth_get_tx_queue(card, skb, ipv, cast_type); - - if (card->options.performance_stats) { - card->perf_stats.outbound_cnt++; - card->perf_stats.outbound_start_time = qeth_get_micros(); - } netif_stop_queue(dev); if (ipv == 4 || IS_IQD(card)) @@ -2074,11 +2101,8 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb, qeth_l3_fill_header); if (!rc) { - card->stats.tx_packets++; - card->stats.tx_bytes += tx_bytes; - if (card->options.performance_stats) - card->perf_stats.outbound_time += qeth_get_micros() - - card->perf_stats.outbound_start_time; + QETH_TXQ_STAT_INC(queue, tx_packets); + QETH_TXQ_STAT_ADD(queue, tx_bytes, tx_bytes); netif_wake_queue(dev); return NETDEV_TX_OK; } else if (rc == -EBUSY) { @@ -2086,22 +2110,13 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb, } /* else fall through */ tx_drop: - card->stats.tx_dropped++; - card->stats.tx_errors++; + QETH_TXQ_STAT_INC(queue, tx_dropped); + QETH_TXQ_STAT_INC(queue, tx_errors); dev_kfree_skb_any(skb); netif_wake_queue(dev); return NETDEV_TX_OK; } -static const struct ethtool_ops qeth_l3_ethtool_ops = { - .get_link = ethtool_op_get_link, - .get_strings = qeth_core_get_strings, - .get_ethtool_stats = qeth_core_get_ethtool_stats, - .get_sset_count = qeth_core_get_sset_count, - .get_drvinfo = qeth_core_get_drvinfo, - .get_link_ksettings = qeth_core_ethtool_get_link_ksettings, -}; - /* * we need NOARP for IPv4 but we want neighbor solicitation for IPv6. Setting * NOARP on the netdevice is no option because it also turns off neighbor @@ -2138,7 +2153,7 @@ static netdev_features_t qeth_l3_osa_features_check(struct sk_buff *skb, static const struct net_device_ops qeth_l3_netdev_ops = { .ndo_open = qeth_open, .ndo_stop = qeth_stop, - .ndo_get_stats = qeth_get_stats, + .ndo_get_stats64 = qeth_get_stats64, .ndo_start_xmit = qeth_l3_hard_start_xmit, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = qeth_l3_set_rx_mode, @@ -2153,7 +2168,7 @@ static const struct net_device_ops qeth_l3_netdev_ops = { static const struct net_device_ops qeth_l3_osa_netdev_ops = { .ndo_open = qeth_open, .ndo_stop = qeth_stop, - .ndo_get_stats = qeth_get_stats, + .ndo_get_stats64 = qeth_get_stats64, .ndo_start_xmit = qeth_l3_hard_start_xmit, .ndo_features_check = qeth_l3_osa_features_check, .ndo_validate_addr = eth_validate_addr, @@ -2223,7 +2238,6 @@ static int qeth_l3_setup_netdev(struct qeth_card *card, bool carrier_ok) return -ENODEV; card->dev->needed_headroom = headroom; - card->dev->ethtool_ops = &qeth_l3_ethtool_ops; card->dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; @@ -2278,6 +2292,7 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev) if (cgdev->state == CCWGROUP_ONLINE) qeth_l3_set_offline(cgdev); + cancel_work_sync(&card->close_dev_work); if (qeth_netdev_is_registered(card->dev)) unregister_netdev(card->dev); qeth_l3_clear_ip_htable(card, 0); diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index 9cf30d124b9e..e390f8c6d5f3 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -403,7 +403,6 @@ struct zfcp_adapter *zfcp_adapter_enqueue(struct ccw_device *ccw_device) goto failed; /* report size limit per scatter-gather segment */ - adapter->dma_parms.max_segment_size = ZFCP_QDIO_SBALE_LEN; adapter->ccw_device->dev.dma_parms = &adapter->dma_parms; adapter->stat_read_buf_num = FSF_STATUS_READS_RECOM; diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 00acc7144bbc..f4f6a07c5222 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -428,6 +428,8 @@ static struct scsi_host_template zfcp_scsi_host_template = { .max_sectors = (((QDIO_MAX_ELEMENTS_PER_BUFFER - 1) * ZFCP_QDIO_MAX_SBALS_PER_REQ) - 2) * 8, /* GCD, adjusted later */ + /* report size limit per scatter-gather segment */ + .max_segment_size = ZFCP_QDIO_SBALE_LEN, .dma_boundary = ZFCP_QDIO_SBALE_LEN - 1, .shost_attrs = zfcp_sysfs_shost_attrs, .sdev_attrs = zfcp_sysfs_sdev_attrs, diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c index 128d658d472a..16957d7ac414 100644 --- a/drivers/scsi/53c700.c +++ b/drivers/scsi/53c700.c @@ -295,7 +295,7 @@ NCR_700_detect(struct scsi_host_template *tpnt, if(tpnt->sdev_attrs == NULL) tpnt->sdev_attrs = NCR_700_dev_attrs; - memory = dma_alloc_attrs(hostdata->dev, TOTAL_MEM_SIZE, &pScript, + memory = dma_alloc_attrs(dev, TOTAL_MEM_SIZE, &pScript, GFP_KERNEL, DMA_ATTR_NON_CONSISTENT); if(memory == NULL) { printk(KERN_ERR "53c700: Failed to allocate memory for driver, detaching\n"); diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c index f83f79b07b50..07efcb9b5b94 100644 --- a/drivers/scsi/aic94xx/aic94xx_init.c +++ b/drivers/scsi/aic94xx/aic94xx_init.c @@ -280,7 +280,7 @@ static ssize_t asd_show_dev_rev(struct device *dev, return snprintf(buf, PAGE_SIZE, "%s\n", asd_dev_rev[asd_ha->revision_id]); } -static DEVICE_ATTR(revision, S_IRUGO, asd_show_dev_rev, NULL); +static DEVICE_ATTR(aic_revision, S_IRUGO, asd_show_dev_rev, NULL); static ssize_t asd_show_dev_bios_build(struct device *dev, struct device_attribute *attr,char *buf) @@ -477,7 +477,7 @@ static int asd_create_dev_attrs(struct asd_ha_struct *asd_ha) { int err; - err = device_create_file(&asd_ha->pcidev->dev, &dev_attr_revision); + err = device_create_file(&asd_ha->pcidev->dev, &dev_attr_aic_revision); if (err) return err; @@ -499,13 +499,13 @@ err_update_bios: err_biosb: device_remove_file(&asd_ha->pcidev->dev, &dev_attr_bios_build); err_rev: - device_remove_file(&asd_ha->pcidev->dev, &dev_attr_revision); + device_remove_file(&asd_ha->pcidev->dev, &dev_attr_aic_revision); return err; } static void asd_remove_dev_attrs(struct asd_ha_struct *asd_ha) { - device_remove_file(&asd_ha->pcidev->dev, &dev_attr_revision); + device_remove_file(&asd_ha->pcidev->dev, &dev_attr_aic_revision); device_remove_file(&asd_ha->pcidev->dev, &dev_attr_bios_build); device_remove_file(&asd_ha->pcidev->dev, &dev_attr_pcba_sn); device_remove_file(&asd_ha->pcidev->dev, &dev_attr_update_bios); diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c index 350257c13a5b..bc9f2a2365f4 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_io.c +++ b/drivers/scsi/bnx2fc/bnx2fc_io.c @@ -240,6 +240,7 @@ struct bnx2fc_cmd_mgr *bnx2fc_cmd_mgr_alloc(struct bnx2fc_hba *hba) return NULL; } + cmgr->hba = hba; cmgr->free_list = kcalloc(arr_sz, sizeof(*cmgr->free_list), GFP_KERNEL); if (!cmgr->free_list) { @@ -256,7 +257,6 @@ struct bnx2fc_cmd_mgr *bnx2fc_cmd_mgr_alloc(struct bnx2fc_hba *hba) goto mem_err; } - cmgr->hba = hba; cmgr->cmds = (struct bnx2fc_cmd **)(cmgr + 1); for (i = 0; i < arr_sz; i++) { @@ -295,7 +295,7 @@ struct bnx2fc_cmd_mgr *bnx2fc_cmd_mgr_alloc(struct bnx2fc_hba *hba) /* Allocate pool of io_bdts - one for each bnx2fc_cmd */ mem_size = num_ios * sizeof(struct io_bdt *); - cmgr->io_bdt_pool = kmalloc(mem_size, GFP_KERNEL); + cmgr->io_bdt_pool = kzalloc(mem_size, GFP_KERNEL); if (!cmgr->io_bdt_pool) { printk(KERN_ERR PFX "failed to alloc io_bdt_pool\n"); goto mem_err; diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c index bfa13e3b191c..c8bad2c093b8 100644 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c @@ -3687,6 +3687,7 @@ static int cxlflash_probe(struct pci_dev *pdev, host->max_cmd_len = CXLFLASH_MAX_CDB_LEN; cfg = shost_priv(host); + cfg->state = STATE_PROBING; cfg->host = host; rc = alloc_mem(cfg); if (rc) { @@ -3775,6 +3776,7 @@ out: return rc; out_remove: + cfg->state = STATE_PROBED; cxlflash_remove(pdev); goto out; } diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index be83590ed955..ff943f477d6f 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -1726,14 +1726,14 @@ void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp, fc_frame_payload_op(fp) != ELS_LS_ACC) { FC_LPORT_DBG(lport, "FLOGI not accepted or bad response\n"); fc_lport_error(lport, fp); - goto err; + goto out; } flp = fc_frame_payload_get(fp, sizeof(*flp)); if (!flp) { FC_LPORT_DBG(lport, "FLOGI bad response\n"); fc_lport_error(lport, fp); - goto err; + goto out; } mfs = ntohs(flp->fl_csp.sp_bb_data) & @@ -1743,7 +1743,7 @@ void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp, FC_LPORT_DBG(lport, "FLOGI bad mfs:%hu response, " "lport->mfs:%hu\n", mfs, lport->mfs); fc_lport_error(lport, fp); - goto err; + goto out; } if (mfs <= lport->mfs) { diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c index 9192a1d9dec6..dfba4921b265 100644 --- a/drivers/scsi/libfc/fc_rport.c +++ b/drivers/scsi/libfc/fc_rport.c @@ -184,7 +184,6 @@ void fc_rport_destroy(struct kref *kref) struct fc_rport_priv *rdata; rdata = container_of(kref, struct fc_rport_priv, kref); - WARN_ON(!list_empty(&rdata->peers)); kfree_rcu(rdata, rcu); } EXPORT_SYMBOL(fc_rport_destroy); diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 661512bec3ac..e27f4df24021 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -62,7 +62,7 @@ /* make sure inq_product_rev string corresponds to this version */ #define SDEBUG_VERSION "0188" /* format to fit INQUIRY revision field */ -static const char *sdebug_version_date = "20180128"; +static const char *sdebug_version_date = "20190125"; #define MY_NAME "scsi_debug" @@ -735,7 +735,7 @@ static inline bool scsi_debug_lbp(void) (sdebug_lbpu || sdebug_lbpws || sdebug_lbpws10); } -static void *fake_store(unsigned long long lba) +static void *lba2fake_store(unsigned long long lba) { lba = do_div(lba, sdebug_store_sectors); @@ -2514,8 +2514,8 @@ static int do_device_access(struct scsi_cmnd *scmd, u32 sg_skip, u64 lba, return ret; } -/* If fake_store(lba,num) compares equal to arr(num), then copy top half of - * arr into fake_store(lba,num) and return true. If comparison fails then +/* If lba2fake_store(lba,num) compares equal to arr(num), then copy top half of + * arr into lba2fake_store(lba,num) and return true. If comparison fails then * return false. */ static bool comp_write_worker(u64 lba, u32 num, const u8 *arr) { @@ -2643,7 +2643,7 @@ static int prot_verify_read(struct scsi_cmnd *SCpnt, sector_t start_sec, if (sdt->app_tag == cpu_to_be16(0xffff)) continue; - ret = dif_verify(sdt, fake_store(sector), sector, ei_lba); + ret = dif_verify(sdt, lba2fake_store(sector), sector, ei_lba); if (ret) { dif_errors++; return ret; @@ -3261,10 +3261,12 @@ err_out: static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num, u32 ei_lba, bool unmap, bool ndob) { + int ret; unsigned long iflags; unsigned long long i; - int ret; - u64 lba_off; + u32 lb_size = sdebug_sector_size; + u64 block, lbaa; + u8 *fs1p; ret = check_device_access_params(scp, lba, num); if (ret) @@ -3276,31 +3278,30 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num, unmap_region(lba, num); goto out; } - - lba_off = lba * sdebug_sector_size; + lbaa = lba; + block = do_div(lbaa, sdebug_store_sectors); /* if ndob then zero 1 logical block, else fetch 1 logical block */ + fs1p = fake_storep + (block * lb_size); if (ndob) { - memset(fake_storep + lba_off, 0, sdebug_sector_size); + memset(fs1p, 0, lb_size); ret = 0; } else - ret = fetch_to_dev_buffer(scp, fake_storep + lba_off, - sdebug_sector_size); + ret = fetch_to_dev_buffer(scp, fs1p, lb_size); if (-1 == ret) { write_unlock_irqrestore(&atomic_rw, iflags); return DID_ERROR << 16; - } else if (sdebug_verbose && !ndob && (ret < sdebug_sector_size)) + } else if (sdebug_verbose && !ndob && (ret < lb_size)) sdev_printk(KERN_INFO, scp->device, "%s: %s: lb size=%u, IO sent=%d bytes\n", - my_name, "write same", - sdebug_sector_size, ret); + my_name, "write same", lb_size, ret); /* Copy first sector to remaining blocks */ - for (i = 1 ; i < num ; i++) - memcpy(fake_storep + ((lba + i) * sdebug_sector_size), - fake_storep + lba_off, - sdebug_sector_size); - + for (i = 1 ; i < num ; i++) { + lbaa = lba + i; + block = do_div(lbaa, sdebug_store_sectors); + memmove(fake_storep + (block * lb_size), fs1p, lb_size); + } if (scsi_debug_lbp()) map_region(lba, num); out: diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 83365b29a4d8..fff86940388b 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -462,12 +462,16 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) sdkp->device->use_10_for_rw = 0; /* - * If something changed, revalidate the disk zone bitmaps once we have - * the capacity, that is on the second revalidate execution during disk - * scan and always during normal revalidate. + * Revalidate the disk zone bitmaps once the block device capacity is + * set on the second revalidate execution during disk scan and if + * something changed when executing a normal revalidate. */ - if (sdkp->first_scan) + if (sdkp->first_scan) { + sdkp->zone_blocks = zone_blocks; + sdkp->nr_zones = nr_zones; return 0; + } + if (sdkp->zone_blocks != zone_blocks || sdkp->nr_zones != nr_zones || disk->queue->nr_zones != nr_zones) { diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index 52c153cd795a..636f83f781f5 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -1143,18 +1143,19 @@ static void qm_mr_process_task(struct work_struct *work); static irqreturn_t portal_isr(int irq, void *ptr) { struct qman_portal *p = ptr; - - u32 clear = QM_DQAVAIL_MASK | p->irq_sources; u32 is = qm_in(&p->p, QM_REG_ISR) & p->irq_sources; + u32 clear = 0; if (unlikely(!is)) return IRQ_NONE; /* DQRR-handling if it's interrupt-driven */ - if (is & QM_PIRQ_DQRI) + if (is & QM_PIRQ_DQRI) { __poll_portal_fast(p, QMAN_POLL_LIMIT); + clear = QM_DQAVAIL_MASK | QM_PIRQ_DQRI; + } /* Handling of anything else that's interrupt-driven */ - clear |= __poll_portal_slow(p, is); + clear |= __poll_portal_slow(p, is) & QM_PIRQ_SLOW; qm_out(&p->p, QM_REG_ISR, clear); return IRQ_HANDLED; } diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c index e559f4c25cf7..1b3943b71254 100644 --- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c +++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c @@ -643,14 +643,7 @@ static void ethsw_teardown_irqs(struct fsl_mc_device *sw_dev) static int swdev_port_attr_get(struct net_device *netdev, struct switchdev_attr *attr) { - struct ethsw_port_priv *port_priv = netdev_priv(netdev); - switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: - attr->u.brport_flags = - (port_priv->ethsw_data->learning ? BR_LEARNING : 0) | - (port_priv->flood ? BR_FLOOD : 0); - break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT: attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD; break; diff --git a/drivers/staging/octeon/ethernet-mdio.c b/drivers/staging/octeon/ethernet-mdio.c index 2848fa71a33d..d6248eecf123 100644 --- a/drivers/staging/octeon/ethernet-mdio.c +++ b/drivers/staging/octeon/ethernet-mdio.c @@ -170,7 +170,7 @@ int cvm_oct_phy_setup_device(struct net_device *dev) return -ENODEV; priv->last_link = 0; - phy_start_aneg(phydev); + phy_start(phydev); return 0; no_phy: diff --git a/drivers/staging/speakup/spk_ttyio.c b/drivers/staging/speakup/spk_ttyio.c index c92bbd05516e..005de0024dd4 100644 --- a/drivers/staging/speakup/spk_ttyio.c +++ b/drivers/staging/speakup/spk_ttyio.c @@ -265,7 +265,8 @@ static void spk_ttyio_send_xchar(char ch) return; } - speakup_tty->ops->send_xchar(speakup_tty, ch); + if (speakup_tty->ops->send_xchar) + speakup_tty->ops->send_xchar(speakup_tty, ch); mutex_unlock(&speakup_tty_mutex); } @@ -277,7 +278,8 @@ static void spk_ttyio_tiocmset(unsigned int set, unsigned int clear) return; } - speakup_tty->ops->tiocmset(speakup_tty, set, clear); + if (speakup_tty->ops->tiocmset) + speakup_tty->ops->tiocmset(speakup_tty, set, clear); mutex_unlock(&speakup_tty_mutex); } diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 72016d0dfca5..8e7fffbb8802 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -852,6 +852,12 @@ static ssize_t pi_prot_type_store(struct config_item *item, return count; } +/* always zero, but attr needs to remain RW to avoid userspace breakage */ +static ssize_t pi_prot_format_show(struct config_item *item, char *page) +{ + return snprintf(page, PAGE_SIZE, "0\n"); +} + static ssize_t pi_prot_format_store(struct config_item *item, const char *page, size_t count) { @@ -1132,7 +1138,7 @@ CONFIGFS_ATTR(, emulate_3pc); CONFIGFS_ATTR(, emulate_pr); CONFIGFS_ATTR(, pi_prot_type); CONFIGFS_ATTR_RO(, hw_pi_prot_type); -CONFIGFS_ATTR_WO(, pi_prot_format); +CONFIGFS_ATTR(, pi_prot_format); CONFIGFS_ATTR(, pi_prot_verify); CONFIGFS_ATTR(, enforce_pr_isids); CONFIGFS_ATTR(, is_nonrot); diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index dfd23245f778..6fff16113628 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -774,7 +774,7 @@ of_cpufreq_cooling_register(struct cpufreq_policy *policy) cdev = __cpufreq_cooling_register(np, policy, capacitance); if (IS_ERR(cdev)) { - pr_err("cpu_cooling: cpu%d is not running as cooling device: %ld\n", + pr_err("cpu_cooling: cpu%d failed to register as cooling device: %ld\n", policy->cpu, PTR_ERR(cdev)); cdev = NULL; } diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c index 4bfdb4a1e47d..2df059cc07e2 100644 --- a/drivers/thermal/of-thermal.c +++ b/drivers/thermal/of-thermal.c @@ -867,14 +867,14 @@ __init *thermal_of_build_thermal_zone(struct device_node *np) ret = of_property_read_u32(np, "polling-delay-passive", &prop); if (ret < 0) { - pr_err("missing polling-delay-passive property\n"); + pr_err("%pOFn: missing polling-delay-passive property\n", np); goto free_tz; } tz->passive_delay = prop; ret = of_property_read_u32(np, "polling-delay", &prop); if (ret < 0) { - pr_err("missing polling-delay property\n"); + pr_err("%pOFn: missing polling-delay property\n", np); goto free_tz; } tz->polling_delay = prop; diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c index e2c407656fa6..c1fdbc0b6840 100644 --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c @@ -357,6 +357,9 @@ static int mtk8250_probe_of(struct platform_device *pdev, struct uart_port *p, if (dmacnt == 2) { data->dma = devm_kzalloc(&pdev->dev, sizeof(*data->dma), GFP_KERNEL); + if (!data->dma) + return -ENOMEM; + data->dma->fn = mtk8250_dma_filter; data->dma->rx_size = MTK_UART_RX_SIZE; data->dma->rxconf.src_maxburst = MTK_UART_RX_TRIGGER; diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index f80a300b5d68..48bd694a5fa1 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -3420,6 +3420,11 @@ static int serial_pci_guess_board(struct pci_dev *dev, struct pciserial_board *board) { int num_iomem, num_port, first_port = -1, i; + int rc; + + rc = serial_pci_is_class_communication(dev); + if (rc) + return rc; /* * Should we try to make guesses for multiport serial devices later? @@ -3647,10 +3652,6 @@ pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent) board = &pci_boards[ent->driver_data]; - rc = serial_pci_is_class_communication(dev); - if (rc) - return rc; - rc = serial_pci_is_blacklisted(dev); if (rc) return rc; diff --git a/drivers/tty/serial/earlycon-riscv-sbi.c b/drivers/tty/serial/earlycon-riscv-sbi.c index e1a551aae336..ce81523c3113 100644 --- a/drivers/tty/serial/earlycon-riscv-sbi.c +++ b/drivers/tty/serial/earlycon-riscv-sbi.c @@ -10,13 +10,16 @@ #include <linux/serial_core.h> #include <asm/sbi.h> -static void sbi_console_write(struct console *con, - const char *s, unsigned int n) +static void sbi_putc(struct uart_port *port, int c) { - int i; + sbi_console_putchar(c); +} - for (i = 0; i < n; ++i) - sbi_console_putchar(s[i]); +static void sbi_console_write(struct console *con, + const char *s, unsigned n) +{ + struct earlycon_device *dev = con->data; + uart_console_write(&dev->port, s, n, sbi_putc); } static int __init early_sbi_setup(struct earlycon_device *device, diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 5c01bb6d1c24..556f50aa1b58 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -130,6 +130,9 @@ static void uart_start(struct tty_struct *tty) struct uart_port *port; unsigned long flags; + if (!state) + return; + port = uart_port_lock(state, flags); __uart_start(tty); uart_port_unlock(port, flags); @@ -727,6 +730,9 @@ static void uart_unthrottle(struct tty_struct *tty) upstat_t mask = UPSTAT_SYNC_FIFO; struct uart_port *port; + if (!state) + return; + port = uart_port_ref(state); if (!port) return; diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 8df0fd824520..64bbeb7d7e0c 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -1921,7 +1921,7 @@ out_nomem: static void sci_free_irq(struct sci_port *port) { - int i; + int i, j; /* * Intentionally in reverse order so we iterate over the muxed @@ -1937,6 +1937,13 @@ static void sci_free_irq(struct sci_port *port) if (unlikely(irq < 0)) continue; + /* Check if already freed (irq was muxed) */ + for (j = 0; j < i; j++) + if (port->irqs[j] == irq) + j = i + 1; + if (j > i) + continue; + free_irq(port->irqs[i], port); kfree(port->irqstr[i]); diff --git a/drivers/usb/dwc3/dwc3-exynos.c b/drivers/usb/dwc3/dwc3-exynos.c index cb7fcd7c0ad8..c1e9ea621f41 100644 --- a/drivers/usb/dwc3/dwc3-exynos.c +++ b/drivers/usb/dwc3/dwc3-exynos.c @@ -78,7 +78,7 @@ static int dwc3_exynos_probe(struct platform_device *pdev) for (i = 0; i < exynos->num_clks; i++) { ret = clk_prepare_enable(exynos->clks[i]); if (ret) { - while (--i > 0) + while (i-- > 0) clk_disable_unprepare(exynos->clks[i]); return ret; } @@ -223,7 +223,7 @@ static int dwc3_exynos_resume(struct device *dev) for (i = 0; i < exynos->num_clks; i++) { ret = clk_prepare_enable(exynos->clks[i]); if (ret) { - while (--i > 0) + while (i-- > 0) clk_disable_unprepare(exynos->clks[i]); return ret; } diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index bed2ff42780b..6c9b76bcc2e1 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1119,7 +1119,7 @@ static void dwc3_prepare_one_trb_linear(struct dwc3_ep *dep, unsigned int maxp = usb_endpoint_maxp(dep->endpoint.desc); unsigned int rem = length % maxp; - if (rem && usb_endpoint_dir_out(dep->endpoint.desc)) { + if ((!length || rem) && usb_endpoint_dir_out(dep->endpoint.desc)) { struct dwc3 *dwc = dep->dwc; struct dwc3_trb *trb; diff --git a/drivers/usb/gadget/udc/net2272.c b/drivers/usb/gadget/udc/net2272.c index 660878a19505..b77f3126580e 100644 --- a/drivers/usb/gadget/udc/net2272.c +++ b/drivers/usb/gadget/udc/net2272.c @@ -2083,7 +2083,7 @@ static irqreturn_t net2272_irq(int irq, void *_dev) #if defined(PLX_PCI_RDK2) /* see if PCI int for us by checking irqstat */ intcsr = readl(dev->rdk2.fpga_base_addr + RDK2_IRQSTAT); - if (!intcsr & (1 << NET2272_PCI_IRQ)) { + if (!(intcsr & (1 << NET2272_PCI_IRQ))) { spin_unlock(&dev->lock); return IRQ_NONE; } diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index eae8b1b1b45b..ffe462a657b1 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -452,13 +452,10 @@ void musb_g_tx(struct musb *musb, u8 epnum) } if (request) { - u8 is_dma = 0; - bool short_packet = false; trace_musb_req_tx(req); if (dma && (csr & MUSB_TXCSR_DMAENAB)) { - is_dma = 1; csr |= MUSB_TXCSR_P_WZC_BITS; csr &= ~(MUSB_TXCSR_DMAENAB | MUSB_TXCSR_P_UNDERRUN | MUSB_TXCSR_TXPKTRDY | MUSB_TXCSR_AUTOSET); @@ -476,16 +473,8 @@ void musb_g_tx(struct musb *musb, u8 epnum) */ if ((request->zero && request->length) && (request->length % musb_ep->packet_sz == 0) - && (request->actual == request->length)) - short_packet = true; + && (request->actual == request->length)) { - if ((musb_dma_inventra(musb) || musb_dma_ux500(musb)) && - (is_dma && (!dma->desired_mode || - (request->actual & - (musb_ep->packet_sz - 1))))) - short_packet = true; - - if (short_packet) { /* * On DMA completion, FIFO may not be * available yet... diff --git a/drivers/usb/musb/musbhsdma.c b/drivers/usb/musb/musbhsdma.c index a688f7f87829..5fc6825745f2 100644 --- a/drivers/usb/musb/musbhsdma.c +++ b/drivers/usb/musb/musbhsdma.c @@ -346,12 +346,10 @@ static irqreturn_t dma_controller_irq(int irq, void *private_data) channel->status = MUSB_DMA_STATUS_FREE; /* completed */ - if ((devctl & MUSB_DEVCTL_HM) - && (musb_channel->transmit) - && ((channel->desired_mode == 0) - || (channel->actual_len & - (musb_channel->max_packet_sz - 1))) - ) { + if (musb_channel->transmit && + (!channel->desired_mode || + (channel->actual_len % + musb_channel->max_packet_sz))) { u8 epnum = musb_channel->epnum; int offset = musb->io.ep_offset(epnum, MUSB_TXCSR); @@ -363,11 +361,14 @@ static irqreturn_t dma_controller_irq(int irq, void *private_data) */ musb_ep_select(mbase, epnum); txcsr = musb_readw(mbase, offset); - txcsr &= ~(MUSB_TXCSR_DMAENAB + if (channel->desired_mode == 1) { + txcsr &= ~(MUSB_TXCSR_DMAENAB | MUSB_TXCSR_AUTOSET); - musb_writew(mbase, offset, txcsr); - /* Send out the packet */ - txcsr &= ~MUSB_TXCSR_DMAMODE; + musb_writew(mbase, offset, txcsr); + /* Send out the packet */ + txcsr &= ~MUSB_TXCSR_DMAMODE; + txcsr |= MUSB_TXCSR_DMAENAB; + } txcsr |= MUSB_TXCSR_TXPKTRDY; musb_writew(mbase, offset, txcsr); } diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig index d7312eed6088..91ea3083e7ad 100644 --- a/drivers/usb/phy/Kconfig +++ b/drivers/usb/phy/Kconfig @@ -21,7 +21,7 @@ config AB8500_USB config FSL_USB2_OTG bool "Freescale USB OTG Transceiver Driver" - depends on USB_EHCI_FSL && USB_FSL_USB2 && USB_OTG_FSM && PM + depends on USB_EHCI_FSL && USB_FSL_USB2 && USB_OTG_FSM=y && PM depends on USB_GADGET || !USB_GADGET # if USB_GADGET=m, this can't be 'y' select USB_PHY help diff --git a/drivers/usb/phy/phy-am335x.c b/drivers/usb/phy/phy-am335x.c index 27bdb7222527..f5f0568d8533 100644 --- a/drivers/usb/phy/phy-am335x.c +++ b/drivers/usb/phy/phy-am335x.c @@ -61,9 +61,6 @@ static int am335x_phy_probe(struct platform_device *pdev) if (ret) return ret; - ret = usb_add_phy_dev(&am_phy->usb_phy_gen.phy); - if (ret) - return ret; am_phy->usb_phy_gen.phy.init = am335x_init; am_phy->usb_phy_gen.phy.shutdown = am335x_shutdown; @@ -82,7 +79,7 @@ static int am335x_phy_probe(struct platform_device *pdev) device_set_wakeup_enable(dev, false); phy_ctrl_power(am_phy->phy_ctrl, am_phy->id, am_phy->dr_mode, false); - return 0; + return usb_add_phy_dev(&am_phy->usb_phy_gen.phy); } static int am335x_phy_remove(struct platform_device *pdev) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 4bc29b586698..f1c39a3c7534 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -2297,7 +2297,8 @@ static unsigned int tcpm_pd_select_pps_apdo(struct tcpm_port *port) pdo_pps_apdo_max_voltage(snk)); port->pps_data.max_curr = min_pps_apdo_current(src, snk); port->pps_data.out_volt = min(port->pps_data.max_volt, - port->pps_data.out_volt); + max(port->pps_data.min_volt, + port->pps_data.out_volt)); port->pps_data.op_curr = min(port->pps_data.max_curr, port->pps_data.op_curr); } diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index cd7e755484e3..a0b07c331255 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -152,7 +152,12 @@ struct vring_virtqueue { /* Available for packed ring */ struct { /* Actual memory layout for this queue. */ - struct vring_packed vring; + struct { + unsigned int num; + struct vring_packed_desc *desc; + struct vring_packed_desc_event *driver; + struct vring_packed_desc_event *device; + } vring; /* Driver ring wrap counter. */ bool avail_wrap_counter; @@ -1609,6 +1614,9 @@ static struct virtqueue *vring_create_virtqueue_packed( !context; vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); + if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) + vq->weak_barriers = false; + vq->packed.ring_dma_addr = ring_dma_addr; vq->packed.driver_event_dma_addr = driver_event_dma_addr; vq->packed.device_event_dma_addr = device_event_dma_addr; @@ -2079,6 +2087,9 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, !context; vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); + if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) + vq->weak_barriers = false; + vq->split.queue_dma_addr = 0; vq->split.queue_size_in_bytes = 0; @@ -2213,6 +2224,8 @@ void vring_transport_features(struct virtio_device *vdev) break; case VIRTIO_F_RING_PACKED: break; + case VIRTIO_F_ORDER_PLATFORM: + break; default: /* We don't understand this bit. */ __virtio_clear_bit(vdev, i); @@ -1436,6 +1436,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb) if (unlikely(!req->ki_filp)) return -EBADF; req->ki_complete = aio_complete_rw; + req->private = NULL; req->ki_pos = iocb->aio_offset; req->ki_flags = iocb_flags(req->ki_filp); if (iocb->aio_flags & IOCB_FLAG_RESFD) diff --git a/fs/autofs/expire.c b/fs/autofs/expire.c index d441244b79df..28d9c2b1b3bb 100644 --- a/fs/autofs/expire.c +++ b/fs/autofs/expire.c @@ -596,7 +596,6 @@ int autofs_expire_run(struct super_block *sb, pkt.len = dentry->d_name.len; memcpy(pkt.name, dentry->d_name.name, pkt.len); pkt.name[pkt.len] = '\0'; - dput(dentry); if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire))) ret = -EFAULT; @@ -609,6 +608,8 @@ int autofs_expire_run(struct super_block *sb, complete_all(&ino->expire_complete); spin_unlock(&sbi->fs_lock); + dput(dentry); + return ret; } diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index 0e8ea2d9a2bb..078992eee299 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -266,8 +266,10 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) } root_inode = autofs_get_inode(s, S_IFDIR | 0755); root = d_make_root(root_inode); - if (!root) + if (!root) { + ret = -ENOMEM; goto fail_ino; + } pipe = NULL; root->d_fsdata = ino; diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index d0078cbb718b..7cde3f46ad26 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -42,14 +42,10 @@ static int load_script(struct linux_binprm *bprm) fput(bprm->file); bprm->file = NULL; - for (cp = bprm->buf+2;; cp++) { - if (cp >= bprm->buf + BINPRM_BUF_SIZE) - return -ENOEXEC; - if (!*cp || (*cp == '\n')) - break; - } + bprm->buf[BINPRM_BUF_SIZE - 1] = '\0'; + if ((cp = strchr(bprm->buf, '\n')) == NULL) + cp = bprm->buf+BINPRM_BUF_SIZE-1; *cp = '\0'; - while (cp > bprm->buf) { cp--; if ((*cp == ' ') || (*cp == '\t')) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index f64aad613727..5a6c39b44c84 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -968,6 +968,48 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, return 0; } +static struct extent_buffer *alloc_tree_block_no_bg_flush( + struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 parent_start, + const struct btrfs_disk_key *disk_key, + int level, + u64 hint, + u64 empty_size) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct extent_buffer *ret; + + /* + * If we are COWing a node/leaf from the extent, chunk, device or free + * space trees, make sure that we do not finish block group creation of + * pending block groups. We do this to avoid a deadlock. + * COWing can result in allocation of a new chunk, and flushing pending + * block groups (btrfs_create_pending_block_groups()) can be triggered + * when finishing allocation of a new chunk. Creation of a pending block + * group modifies the extent, chunk, device and free space trees, + * therefore we could deadlock with ourselves since we are holding a + * lock on an extent buffer that btrfs_create_pending_block_groups() may + * try to COW later. + * For similar reasons, we also need to delay flushing pending block + * groups when splitting a leaf or node, from one of those trees, since + * we are holding a write lock on it and its parent or when inserting a + * new root node for one of those trees. + */ + if (root == fs_info->extent_root || + root == fs_info->chunk_root || + root == fs_info->dev_root || + root == fs_info->free_space_root) + trans->can_flush_pending_bgs = false; + + ret = btrfs_alloc_tree_block(trans, root, parent_start, + root->root_key.objectid, disk_key, level, + hint, empty_size); + trans->can_flush_pending_bgs = true; + + return ret; +} + /* * does the dirty work in cow of a single block. The parent block (if * supplied) is updated to point to the new cow copy. The new buffer is marked @@ -1015,28 +1057,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent) parent_start = parent->start; - /* - * If we are COWing a node/leaf from the extent, chunk, device or free - * space trees, make sure that we do not finish block group creation of - * pending block groups. We do this to avoid a deadlock. - * COWing can result in allocation of a new chunk, and flushing pending - * block groups (btrfs_create_pending_block_groups()) can be triggered - * when finishing allocation of a new chunk. Creation of a pending block - * group modifies the extent, chunk, device and free space trees, - * therefore we could deadlock with ourselves since we are holding a - * lock on an extent buffer that btrfs_create_pending_block_groups() may - * try to COW later. - */ - if (root == fs_info->extent_root || - root == fs_info->chunk_root || - root == fs_info->dev_root || - root == fs_info->free_space_root) - trans->can_flush_pending_bgs = false; - - cow = btrfs_alloc_tree_block(trans, root, parent_start, - root->root_key.objectid, &disk_key, level, - search_start, empty_size); - trans->can_flush_pending_bgs = true; + cow = alloc_tree_block_no_bg_flush(trans, root, parent_start, &disk_key, + level, search_start, empty_size); if (IS_ERR(cow)) return PTR_ERR(cow); @@ -3345,8 +3367,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, else btrfs_node_key(lower, &lower_key, 0); - c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, - &lower_key, level, root->node->start, 0); + c = alloc_tree_block_no_bg_flush(trans, root, 0, &lower_key, level, + root->node->start, 0); if (IS_ERR(c)) return PTR_ERR(c); @@ -3475,8 +3497,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans, mid = (c_nritems + 1) / 2; btrfs_node_key(c, &disk_key, mid); - split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, - &disk_key, level, c->start, 0); + split = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, level, + c->start, 0); if (IS_ERR(split)) return PTR_ERR(split); @@ -4260,8 +4282,8 @@ again: else btrfs_item_key(l, &disk_key, mid); - right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, - &disk_key, 0, l->start, 0); + right = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, 0, + l->start, 0); if (IS_ERR(right)) return PTR_ERR(right); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c5586ffd1426..0a3f122dd61f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1621,6 +1621,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, flags | SB_RDONLY, device_name, data); if (IS_ERR(mnt_root)) { root = ERR_CAST(mnt_root); + kfree(subvol_name); goto out; } @@ -1630,12 +1631,14 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, if (error < 0) { root = ERR_PTR(error); mntput(mnt_root); + kfree(subvol_name); goto out; } } } if (IS_ERR(mnt_root)) { root = ERR_CAST(mnt_root); + kfree(subvol_name); goto out; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 127fa1535f58..4ec2b660d014 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -850,14 +850,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, btrfs_trans_release_chunk_metadata(trans); - if (lock && should_end_transaction(trans) && - READ_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) { - spin_lock(&info->trans_lock); - if (cur_trans->state == TRANS_STATE_RUNNING) - cur_trans->state = TRANS_STATE_BLOCKED; - spin_unlock(&info->trans_lock); - } - if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { if (throttle) return btrfs_commit_transaction(trans); @@ -1879,6 +1871,21 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err) kmem_cache_free(btrfs_trans_handle_cachep, trans); } +/* + * Release reserved delayed ref space of all pending block groups of the + * transaction and remove them from the list + */ +static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans) +{ + struct btrfs_fs_info *fs_info = trans->fs_info; + struct btrfs_block_group_cache *block_group, *tmp; + + list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) { + btrfs_delayed_refs_rsv_release(fs_info, 1); + list_del_init(&block_group->bg_list); + } +} + static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) { /* @@ -2270,6 +2277,7 @@ scrub_continue: btrfs_scrub_continue(fs_info); cleanup_transaction: btrfs_trans_release_metadata(trans); + btrfs_cleanup_pending_block_groups(trans); btrfs_trans_release_chunk_metadata(trans); trans->block_rsv = NULL; btrfs_warn(fs_info, "Skipping commit of aborted transaction."); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3e4f8f88353e..15561926ab32 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -957,11 +957,11 @@ static noinline struct btrfs_device *device_list_add(const char *path, else fs_devices = alloc_fs_devices(disk_super->fsid, NULL); - fs_devices->fsid_change = fsid_change_in_progress; - if (IS_ERR(fs_devices)) return ERR_CAST(fs_devices); + fs_devices->fsid_change = fsid_change_in_progress; + mutex_lock(&fs_devices->device_list_mutex); list_add(&fs_devices->fs_list, &fs_uuids); diff --git a/fs/buffer.c b/fs/buffer.c index 52d024bfdbc1..48318fb74938 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -200,6 +200,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) struct buffer_head *head; struct page *page; int all_mapped = 1; + static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1); index = block >> (PAGE_SHIFT - bd_inode->i_blkbits); page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED); @@ -227,15 +228,15 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) * file io on the block device and getblk. It gets dealt with * elsewhere, don't buffer_error if we had some unmapped buffers */ - if (all_mapped) { - printk("__find_get_block_slow() failed. " - "block=%llu, b_blocknr=%llu\n", - (unsigned long long)block, - (unsigned long long)bh->b_blocknr); - printk("b_state=0x%08lx, b_size=%zu\n", - bh->b_state, bh->b_size); - printk("device %pg blocksize: %d\n", bdev, - 1 << bd_inode->i_blkbits); + ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE); + if (all_mapped && __ratelimit(&last_warned)) { + printk("__find_get_block_slow() failed. block=%llu, " + "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, " + "device %pg blocksize: %d\n", + (unsigned long long)block, + (unsigned long long)bh->b_blocknr, + bh->b_state, bh->b_size, bdev, + 1 << bd_inode->i_blkbits); } out_unlock: spin_unlock(&bd_mapping->private_lock); diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index d1f9c2f3f575..7652551a1fc4 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -150,5 +150,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "2.16" +#define CIFS_VERSION "2.17" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 2c7689f3998d..659ce1b92c44 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2696,6 +2696,7 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, rc = cifs_write_allocate_pages(wdata->pages, nr_pages); if (rc) { + kvfree(wdata->pages); kfree(wdata); add_credits_and_wake_if(server, credits, 0); break; @@ -2707,6 +2708,7 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, if (rc) { for (i = 0; i < nr_pages; i++) put_page(wdata->pages[i]); + kvfree(wdata->pages); kfree(wdata); add_credits_and_wake_if(server, credits, 0); break; @@ -3386,8 +3388,12 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, } rc = cifs_read_allocate_pages(rdata, npages); - if (rc) - goto error; + if (rc) { + kvfree(rdata->pages); + kfree(rdata); + add_credits_and_wake_if(server, credits, 0); + break; + } rdata->tailsz = PAGE_SIZE; } @@ -3407,7 +3413,6 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, if (!rdata->cfile->invalidHandle || !(rc = cifs_reopen_file(rdata->cfile, true))) rc = server->ops->async_readv(rdata); -error: if (rc) { add_credits_and_wake_if(server, rdata->credits, 0); kref_put(&rdata->refcount, diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 153238fc4fa9..6f96e2292856 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -866,7 +866,9 @@ smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon, FILE_READ_EA, FILE_FULL_EA_INFORMATION, SMB2_O_INFO_FILE, - SMB2_MAX_EA_BUF, + CIFSMaxBufSize - + MAX_SMB2_CREATE_RESPONSE_SIZE - + MAX_SMB2_CLOSE_RESPONSE_SIZE, &rsp_iov, &buftype, cifs_sb); if (rc) { /* diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 2ff209ec4fab..77b3aaa39b35 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3241,8 +3241,17 @@ smb2_readv_callback(struct mid_q_entry *mid) rdata->mr = NULL; } #endif - if (rdata->result) + if (rdata->result && rdata->result != -ENODATA) { cifs_stats_fail_inc(tcon, SMB2_READ_HE); + trace_smb3_read_err(0 /* xid */, + rdata->cfile->fid.persistent_fid, + tcon->tid, tcon->ses->Suid, rdata->offset, + rdata->bytes, rdata->result); + } else + trace_smb3_read_done(0 /* xid */, + rdata->cfile->fid.persistent_fid, + tcon->tid, tcon->ses->Suid, + rdata->offset, rdata->got_bytes); queue_work(cifsiod_wq, &rdata->work); DeleteMidQEntry(mid); @@ -3317,13 +3326,11 @@ smb2_async_readv(struct cifs_readdata *rdata) if (rc) { kref_put(&rdata->refcount, cifs_readdata_release); cifs_stats_fail_inc(io_parms.tcon, SMB2_READ_HE); - trace_smb3_read_err(rc, 0 /* xid */, io_parms.persistent_fid, - io_parms.tcon->tid, io_parms.tcon->ses->Suid, - io_parms.offset, io_parms.length); - } else - trace_smb3_read_done(0 /* xid */, io_parms.persistent_fid, - io_parms.tcon->tid, io_parms.tcon->ses->Suid, - io_parms.offset, io_parms.length); + trace_smb3_read_err(0 /* xid */, io_parms.persistent_fid, + io_parms.tcon->tid, + io_parms.tcon->ses->Suid, + io_parms.offset, io_parms.length, rc); + } cifs_small_buf_release(buf); return rc; @@ -3367,10 +3374,11 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, if (rc != -ENODATA) { cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE); cifs_dbg(VFS, "Send error in read = %d\n", rc); + trace_smb3_read_err(xid, req->PersistentFileId, + io_parms->tcon->tid, ses->Suid, + io_parms->offset, io_parms->length, + rc); } - trace_smb3_read_err(rc, xid, req->PersistentFileId, - io_parms->tcon->tid, ses->Suid, - io_parms->offset, io_parms->length); free_rsp_buf(resp_buftype, rsp_iov.iov_base); return rc == -ENODATA ? 0 : rc; } else @@ -3459,8 +3467,17 @@ smb2_writev_callback(struct mid_q_entry *mid) wdata->mr = NULL; } #endif - if (wdata->result) + if (wdata->result) { cifs_stats_fail_inc(tcon, SMB2_WRITE_HE); + trace_smb3_write_err(0 /* no xid */, + wdata->cfile->fid.persistent_fid, + tcon->tid, tcon->ses->Suid, wdata->offset, + wdata->bytes, wdata->result); + } else + trace_smb3_write_done(0 /* no xid */, + wdata->cfile->fid.persistent_fid, + tcon->tid, tcon->ses->Suid, + wdata->offset, wdata->bytes); queue_work(cifsiod_wq, &wdata->work); DeleteMidQEntry(mid); @@ -3602,10 +3619,7 @@ smb2_async_writev(struct cifs_writedata *wdata, wdata->bytes, rc); kref_put(&wdata->refcount, release); cifs_stats_fail_inc(tcon, SMB2_WRITE_HE); - } else - trace_smb3_write_done(0 /* no xid */, req->PersistentFileId, - tcon->tid, tcon->ses->Suid, wdata->offset, - wdata->bytes); + } async_writev_out: cifs_small_buf_release(req); @@ -3831,8 +3845,8 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, rsp->sync_hdr.Status == STATUS_NO_MORE_FILES) { srch_inf->endOfSearch = true; rc = 0; - } - cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE); + } else + cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE); goto qdir_exit; } @@ -4427,8 +4441,8 @@ SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon, rc = cifs_send_recv(xid, ses, &rqst, &resp_buf_type, flags, &rsp_iov); cifs_small_buf_release(req); - please_key_low = (__u64 *)req->LeaseKey; - please_key_high = (__u64 *)(req->LeaseKey+8); + please_key_low = (__u64 *)lease_key; + please_key_high = (__u64 *)(lease_key+8); if (rc) { cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE); trace_smb3_lease_err(le32_to_cpu(lease_state), tcon->tid, diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 7a2d0a2255e6..538e2299805f 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -84,8 +84,9 @@ #define NUMBER_OF_SMB2_COMMANDS 0x0013 -/* 4 len + 52 transform hdr + 64 hdr + 56 create rsp */ -#define MAX_SMB2_HDR_SIZE 0x00b0 +/* 52 transform hdr + 64 hdr + 88 create rsp */ +#define SMB2_TRANSFORM_HEADER_SIZE 52 +#define MAX_SMB2_HDR_SIZE 204 #define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe) #define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd) @@ -648,6 +649,13 @@ struct smb2_create_req { __u8 Buffer[0]; } __packed; +/* + * Maximum size of a SMB2_CREATE response is 64 (smb2 header) + + * 88 (fixed part of create response) + 520 (path) + 150 (contexts) + + * 2 bytes of padding. + */ +#define MAX_SMB2_CREATE_RESPONSE_SIZE 824 + struct smb2_create_rsp { struct smb2_sync_hdr sync_hdr; __le16 StructureSize; /* Must be 89 */ @@ -996,6 +1004,11 @@ struct smb2_close_req { __u64 VolatileFileId; /* opaque endianness */ } __packed; +/* + * Maximum size of a SMB2_CLOSE response is 64 (smb2 header) + 60 (data) + */ +#define MAX_SMB2_CLOSE_RESPONSE_SIZE 124 + struct smb2_close_rsp { struct smb2_sync_hdr sync_hdr; __le16 StructureSize; /* 60 */ @@ -1398,8 +1411,6 @@ struct smb2_file_link_info { /* encoding of request for level 11 */ char FileName[0]; /* Name to be assigned to new link */ } __packed; /* level 11 Set */ -#define SMB2_MAX_EA_BUF 65536 - struct smb2_file_full_ea_info { /* encoding of response for level 15 */ __le32 next_entry_offset; __u8 flags; diff --git a/fs/dcache.c b/fs/dcache.c index 2593153471cf..aac41adf4743 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -119,6 +119,7 @@ struct dentry_stat_t dentry_stat = { static DEFINE_PER_CPU(long, nr_dentry); static DEFINE_PER_CPU(long, nr_dentry_unused); +static DEFINE_PER_CPU(long, nr_dentry_negative); #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) @@ -152,11 +153,22 @@ static long get_nr_dentry_unused(void) return sum < 0 ? 0 : sum; } +static long get_nr_dentry_negative(void) +{ + int i; + long sum = 0; + + for_each_possible_cpu(i) + sum += per_cpu(nr_dentry_negative, i); + return sum < 0 ? 0 : sum; +} + int proc_nr_dentry(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { dentry_stat.nr_dentry = get_nr_dentry(); dentry_stat.nr_unused = get_nr_dentry_unused(); + dentry_stat.nr_negative = get_nr_dentry_negative(); return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } #endif @@ -317,6 +329,8 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry) flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); WRITE_ONCE(dentry->d_flags, flags); dentry->d_inode = NULL; + if (dentry->d_flags & DCACHE_LRU_LIST) + this_cpu_inc(nr_dentry_negative); } static void dentry_free(struct dentry *dentry) @@ -371,6 +385,11 @@ static void dentry_unlink_inode(struct dentry * dentry) * The per-cpu "nr_dentry_unused" counters are updated with * the DCACHE_LRU_LIST bit. * + * The per-cpu "nr_dentry_negative" counters are only updated + * when deleted from or added to the per-superblock LRU list, not + * from/to the shrink list. That is to avoid an unneeded dec/inc + * pair when moving from LRU to shrink list in select_collect(). + * * These helper functions make sure we always follow the * rules. d_lock must be held by the caller. */ @@ -380,6 +399,8 @@ static void d_lru_add(struct dentry *dentry) D_FLAG_VERIFY(dentry, 0); dentry->d_flags |= DCACHE_LRU_LIST; this_cpu_inc(nr_dentry_unused); + if (d_is_negative(dentry)) + this_cpu_inc(nr_dentry_negative); WARN_ON_ONCE(!list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)); } @@ -388,6 +409,8 @@ static void d_lru_del(struct dentry *dentry) D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); dentry->d_flags &= ~DCACHE_LRU_LIST; this_cpu_dec(nr_dentry_unused); + if (d_is_negative(dentry)) + this_cpu_dec(nr_dentry_negative); WARN_ON_ONCE(!list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)); } @@ -418,6 +441,8 @@ static void d_lru_isolate(struct list_lru_one *lru, struct dentry *dentry) D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); dentry->d_flags &= ~DCACHE_LRU_LIST; this_cpu_dec(nr_dentry_unused); + if (d_is_negative(dentry)) + this_cpu_dec(nr_dentry_negative); list_lru_isolate(lru, &dentry->d_lru); } @@ -426,6 +451,8 @@ static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry, { D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); dentry->d_flags |= DCACHE_SHRINK_LIST; + if (d_is_negative(dentry)) + this_cpu_dec(nr_dentry_negative); list_lru_isolate_move(lru, &dentry->d_lru, list); } @@ -1188,15 +1215,11 @@ static enum lru_status dentry_lru_isolate_shrink(struct list_head *item, */ void shrink_dcache_sb(struct super_block *sb) { - long freed; - do { LIST_HEAD(dispose); - freed = list_lru_walk(&sb->s_dentry_lru, + list_lru_walk(&sb->s_dentry_lru, dentry_lru_isolate_shrink, &dispose, 1024); - - this_cpu_sub(nr_dentry_unused, freed); shrink_dentry_list(&dispose); } while (list_lru_count(&sb->s_dentry_lru) > 0); } @@ -1820,6 +1843,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) WARN_ON(d_in_lookup(dentry)); spin_lock(&dentry->d_lock); + /* + * Decrement negative dentry count if it was in the LRU list. + */ + if (dentry->d_flags & DCACHE_LRU_LIST) + this_cpu_dec(nr_dentry_negative); hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); raw_write_seqcount_begin(&dentry->d_seq); __d_set_inode_and_type(dentry, inode, add_flags); diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 13b01351dd1c..29c68c5d44d5 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -324,7 +324,7 @@ static struct dentry *failed_creating(struct dentry *dentry) inode_unlock(d_inode(dentry->d_parent)); dput(dentry); simple_release_fs(&debugfs_mount, &debugfs_mount_count); - return NULL; + return ERR_PTR(-ENOMEM); } static struct dentry *end_creating(struct dentry *dentry) @@ -347,7 +347,7 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode, dentry = start_creating(name, parent); if (IS_ERR(dentry)) - return NULL; + return dentry; inode = debugfs_get_inode(dentry->d_sb); if (unlikely(!inode)) @@ -386,7 +386,8 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode, * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, %NULL will be returned. + * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be + * returned. * * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. @@ -464,7 +465,8 @@ EXPORT_SYMBOL_GPL(debugfs_create_file_unsafe); * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, %NULL will be returned. + * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be + * returned. * * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. @@ -495,7 +497,8 @@ EXPORT_SYMBOL_GPL(debugfs_create_file_size); * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, %NULL will be returned. + * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be + * returned. * * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. @@ -506,7 +509,7 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent) struct inode *inode; if (IS_ERR(dentry)) - return NULL; + return dentry; inode = debugfs_get_inode(dentry->d_sb); if (unlikely(!inode)) @@ -545,7 +548,7 @@ struct dentry *debugfs_create_automount(const char *name, struct inode *inode; if (IS_ERR(dentry)) - return NULL; + return dentry; inode = debugfs_get_inode(dentry->d_sb); if (unlikely(!inode)) @@ -581,8 +584,8 @@ EXPORT_SYMBOL(debugfs_create_automount); * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the symbolic * link is to be removed (no automatic cleanup happens if your module is - * unloaded, you are responsible here.) If an error occurs, %NULL will be - * returned. + * unloaded, you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) + * will be returned. * * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. @@ -594,12 +597,12 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, struct inode *inode; char *link = kstrdup(target, GFP_KERNEL); if (!link) - return NULL; + return ERR_PTR(-ENOMEM); dentry = start_creating(name, parent); if (IS_ERR(dentry)) { kfree(link); - return NULL; + return dentry; } inode = debugfs_get_inode(dentry->d_sb); @@ -787,6 +790,13 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *dentry = NULL, *trap; struct name_snapshot old_name; + if (IS_ERR(old_dir)) + return old_dir; + if (IS_ERR(new_dir)) + return new_dir; + if (IS_ERR_OR_NULL(old_dentry)) + return old_dentry; + trap = lock_rename(new_dir, old_dir); /* Source or destination directories don't exist? */ if (d_really_is_negative(old_dir) || d_really_is_negative(new_dir)) @@ -820,7 +830,9 @@ exit: if (dentry && !IS_ERR(dentry)) dput(dentry); unlock_rename(new_dir, old_dir); - return NULL; + if (IS_ERR(dentry)) + return dentry; + return ERR_PTR(-EINVAL); } EXPORT_SYMBOL_GPL(debugfs_rename); diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 82377017130f..d31b6c72b476 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -21,8 +21,13 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { spin_lock(&inode->i_lock); + /* + * We must skip inodes in unusual state. We may also skip + * inodes without pages but we deliberately won't in case + * we need to reschedule to avoid softlockups. + */ if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || - (inode->i_mapping->nrpages == 0)) { + (inode->i_mapping->nrpages == 0 && !need_resched())) { spin_unlock(&inode->i_lock); continue; } @@ -30,6 +35,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) spin_unlock(&inode->i_lock); spin_unlock(&sb->s_inode_list_lock); + cond_resched(); invalidate_mapping_pages(inode->i_mapping, 0, -1); iput(toput_inode); toput_inode = inode; diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 712f00995390..5508baa11bb6 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -116,16 +116,8 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) goto out; } - ret = file_write_and_wait_range(file, start, end); - if (ret) - return ret; - if (!journal) { - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL - }; - - ret = ext4_write_inode(inode, &wbc); + ret = __generic_file_fsync(file, start, end, datasync); if (!ret) ret = ext4_sync_parent(inode); if (test_opt(inode->i_sb, BARRIER)) @@ -133,6 +125,9 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) goto out; } + ret = file_write_and_wait_range(file, start, end); + if (ret) + return ret; /* * data=writeback,ordered: * The caller's filemap_fdatawrite()/wait will sync the data. diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index a5e516a40e7a..809c0f2f9942 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1742,7 +1742,6 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, req->in.h.nodeid = outarg->nodeid; req->in.numargs = 2; req->in.argpages = 1; - req->page_descs[0].offset = offset; req->end = fuse_retrieve_end; index = outarg->offset >> PAGE_SHIFT; @@ -1757,6 +1756,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, this_num = min_t(unsigned, num, PAGE_SIZE - offset); req->pages[req->num_pages] = page; + req->page_descs[req->num_pages].offset = offset; req->page_descs[req->num_pages].length = this_num; req->num_pages++; @@ -2077,8 +2077,10 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, ret = fuse_dev_do_write(fud, &cs, len); + pipe_lock(pipe); for (idx = 0; idx < nbuf; idx++) pipe_buf_release(pipe, &bufs[idx]); + pipe_unlock(pipe); out: kvfree(bufs); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index ffaffe18352a..a59c16bd90ac 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1782,7 +1782,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req, spin_unlock(&fc->lock); dec_wb_stat(&bdi->wb, WB_WRITEBACK); - dec_node_page_state(page, NR_WRITEBACK_TEMP); + dec_node_page_state(new_req->pages[0], NR_WRITEBACK_TEMP); wb_writeout_inc(&bdi->wb); fuse_writepage_free(fc, new_req); fuse_request_free(new_req); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 76baaa6be393..c2d4099429be 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -628,6 +628,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns) get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); fc->pid_ns = get_pid_ns(task_active_pid_ns(current)); fc->user_ns = get_user_ns(user_ns); + fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ; } EXPORT_SYMBOL_GPL(fuse_conn_init); @@ -1162,7 +1163,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fc->user_id = d.user_id; fc->group_id = d.group_id; fc->max_read = max_t(unsigned, 4096, d.max_read); - fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ; /* Used by get_root_inode() */ sb->s_fs_info = fc; diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index f15b4c57c4bd..78510ab91835 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -28,7 +28,6 @@ #include "util.h" #include "trans.h" #include "dir.h" -#include "lops.h" struct workqueue_struct *gfs2_freeze_wq; diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 5bfaf381921a..b8830fda51e8 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -733,7 +733,7 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, lh->lh_crc = cpu_to_be32(crc); gfs2_log_write(sdp, page, sb->s_blocksize, 0, addr); - gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE | op_flags); + gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE, op_flags); log_flush_wait(sdp); } @@ -810,7 +810,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) gfs2_ordered_write(sdp); lops_before_commit(sdp, tr); - gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE); + gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE, 0); if (sdp->sd_log_head != sdp->sd_log_flush_head) { log_flush_wait(sdp); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 94dcab655bc0..2295042bc625 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -17,9 +17,7 @@ #include <linux/bio.h> #include <linux/fs.h> #include <linux/list_sort.h> -#include <linux/blkdev.h> -#include "bmap.h" #include "dir.h" #include "gfs2.h" #include "incore.h" @@ -195,6 +193,7 @@ static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec, /** * gfs2_end_log_write - end of i/o to the log * @bio: The bio + * @error: Status of i/o request * * Each bio_vec contains either data from the pagecache or data * relating to the log itself. Here we iterate over the bio_vec @@ -231,19 +230,20 @@ static void gfs2_end_log_write(struct bio *bio) /** * gfs2_log_submit_bio - Submit any pending log bio * @biop: Address of the bio pointer - * @opf: REQ_OP | op_flags + * @op: REQ_OP + * @op_flags: req_flag_bits * * Submit any pending part-built or full bio to the block device. If * there is no pending bio, then this is a no-op. */ -void gfs2_log_submit_bio(struct bio **biop, int opf) +void gfs2_log_submit_bio(struct bio **biop, int op, int op_flags) { struct bio *bio = *biop; if (bio) { struct gfs2_sbd *sdp = bio->bi_private; atomic_inc(&sdp->sd_log_in_flight); - bio->bi_opf = opf; + bio_set_op_attrs(bio, op, op_flags); submit_bio(bio); *biop = NULL; } @@ -304,7 +304,7 @@ static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno, nblk >>= sdp->sd_fsb2bb_shift; if (blkno == nblk && !flush) return bio; - gfs2_log_submit_bio(biop, op); + gfs2_log_submit_bio(biop, op, 0); } *biop = gfs2_log_alloc_bio(sdp, blkno, end_io); @@ -375,184 +375,6 @@ void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page) gfs2_log_bmap(sdp)); } -/** - * gfs2_end_log_read - end I/O callback for reads from the log - * @bio: The bio - * - * Simply unlock the pages in the bio. The main thread will wait on them and - * process them in order as necessary. - */ - -static void gfs2_end_log_read(struct bio *bio) -{ - struct page *page; - struct bio_vec *bvec; - int i; - - bio_for_each_segment_all(bvec, bio, i) { - page = bvec->bv_page; - if (bio->bi_status) { - int err = blk_status_to_errno(bio->bi_status); - - SetPageError(page); - mapping_set_error(page->mapping, err); - } - unlock_page(page); - } - - bio_put(bio); -} - -/** - * gfs2_jhead_pg_srch - Look for the journal head in a given page. - * @jd: The journal descriptor - * @page: The page to look in - * - * Returns: 1 if found, 0 otherwise. - */ - -static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd, - struct gfs2_log_header_host *head, - struct page *page) -{ - struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); - struct gfs2_log_header_host uninitialized_var(lh); - void *kaddr = kmap_atomic(page); - unsigned int offset; - bool ret = false; - - for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) { - if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) { - if (lh.lh_sequence > head->lh_sequence) - *head = lh; - else { - ret = true; - break; - } - } - } - kunmap_atomic(kaddr); - return ret; -} - -/** - * gfs2_jhead_process_page - Search/cleanup a page - * @jd: The journal descriptor - * @index: Index of the page to look into - * @done: If set, perform only cleanup, else search and set if found. - * - * Find the page with 'index' in the journal's mapping. Search the page for - * the journal head if requested (cleanup == false). Release refs on the - * page so the page cache can reclaim it (put_page() twice). We grabbed a - * reference on this page two times, first when we did a find_or_create_page() - * to obtain the page to add it to the bio and second when we do a - * find_get_page() here to get the page to wait on while I/O on it is being - * completed. - * This function is also used to free up a page we might've grabbed but not - * used. Maybe we added it to a bio, but not submitted it for I/O. Or we - * submitted the I/O, but we already found the jhead so we only need to drop - * our references to the page. - */ - -static void gfs2_jhead_process_page(struct gfs2_jdesc *jd, unsigned long index, - struct gfs2_log_header_host *head, - bool *done) -{ - struct page *page; - - page = find_get_page(jd->jd_inode->i_mapping, index); - wait_on_page_locked(page); - - if (PageError(page)) - *done = true; - - if (!*done) - *done = gfs2_jhead_pg_srch(jd, head, page); - - put_page(page); /* Once for find_get_page */ - put_page(page); /* Once more for find_or_create_page */ -} - -/** - * gfs2_find_jhead - find the head of a log - * @jd: The journal descriptor - * @head: The log descriptor for the head of the log is returned here - * - * Do a search of a journal by reading it in large chunks using bios and find - * the valid log entry with the highest sequence number. (i.e. the log head) - * - * Returns: 0 on success, errno otherwise - */ - -int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head) -{ - struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); - struct address_space *mapping = jd->jd_inode->i_mapping; - struct gfs2_journal_extent *je; - u32 block, read_idx = 0, submit_idx = 0, index = 0; - int shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift; - int blocks_per_page = 1 << shift, sz, ret = 0; - struct bio *bio = NULL; - struct page *page; - bool done = false; - errseq_t since; - - memset(head, 0, sizeof(*head)); - if (list_empty(&jd->extent_list)) - gfs2_map_journal_extents(sdp, jd); - - since = filemap_sample_wb_err(mapping); - list_for_each_entry(je, &jd->extent_list, list) { - for (block = 0; block < je->blocks; block += blocks_per_page) { - index = (je->lblock + block) >> shift; - - page = find_or_create_page(mapping, index, GFP_NOFS); - if (!page) { - ret = -ENOMEM; - done = true; - goto out; - } - - if (bio) { - sz = bio_add_page(bio, page, PAGE_SIZE, 0); - if (sz == PAGE_SIZE) - goto page_added; - submit_idx = index; - submit_bio(bio); - bio = NULL; - } - - bio = gfs2_log_alloc_bio(sdp, - je->dblock + (index << shift), - gfs2_end_log_read); - bio->bi_opf = REQ_OP_READ; - sz = bio_add_page(bio, page, PAGE_SIZE, 0); - gfs2_assert_warn(sdp, sz == PAGE_SIZE); - -page_added: - if (submit_idx <= read_idx + BIO_MAX_PAGES) { - /* Keep at least one bio in flight */ - continue; - } - - gfs2_jhead_process_page(jd, read_idx++, head, &done); - if (done) - goto out; /* found */ - } - } - -out: - if (bio) - submit_bio(bio); - while (read_idx <= index) - gfs2_jhead_process_page(jd, read_idx++, head, &done); - - if (!ret) - ret = filemap_check_wb_err(mapping, since); - - return ret; -} - static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type, u32 ld_length, u32 ld_data1) { diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h index 331160fc568b..711c4d89c063 100644 --- a/fs/gfs2/lops.h +++ b/fs/gfs2/lops.h @@ -30,10 +30,8 @@ extern u64 gfs2_log_bmap(struct gfs2_sbd *sdp); extern void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page, unsigned size, unsigned offset, u64 blkno); extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page); -extern void gfs2_log_submit_bio(struct bio **biop, int opf); +extern void gfs2_log_submit_bio(struct bio **biop, int op, int op_flags); extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh); -extern int gfs2_find_jhead(struct gfs2_jdesc *jd, - struct gfs2_log_header_host *head); static inline unsigned int buf_limit(struct gfs2_sbd *sdp) { diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 1179763f6370..b041cb8ae383 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -41,7 +41,6 @@ #include "dir.h" #include "meta_io.h" #include "trace_gfs2.h" -#include "lops.h" #define DO 0 #define UNDO 1 diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 7389e445a7a7..2dac43065382 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -182,6 +182,129 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk, } /** + * find_good_lh - find a good log header + * @jd: the journal + * @blk: the segment to start searching from + * @lh: the log header to fill in + * @forward: if true search forward in the log, else search backward + * + * Call get_log_header() to get a log header for a segment, but if the + * segment is bad, either scan forward or backward until we find a good one. + * + * Returns: errno + */ + +static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk, + struct gfs2_log_header_host *head) +{ + unsigned int orig_blk = *blk; + int error; + + for (;;) { + error = get_log_header(jd, *blk, head); + if (error <= 0) + return error; + + if (++*blk == jd->jd_blocks) + *blk = 0; + + if (*blk == orig_blk) { + gfs2_consist_inode(GFS2_I(jd->jd_inode)); + return -EIO; + } + } +} + +/** + * jhead_scan - make sure we've found the head of the log + * @jd: the journal + * @head: this is filled in with the log descriptor of the head + * + * At this point, seg and lh should be either the head of the log or just + * before. Scan forward until we find the head. + * + * Returns: errno + */ + +static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head) +{ + unsigned int blk = head->lh_blkno; + struct gfs2_log_header_host lh; + int error; + + for (;;) { + if (++blk == jd->jd_blocks) + blk = 0; + + error = get_log_header(jd, blk, &lh); + if (error < 0) + return error; + if (error == 1) + continue; + + if (lh.lh_sequence == head->lh_sequence) { + gfs2_consist_inode(GFS2_I(jd->jd_inode)); + return -EIO; + } + if (lh.lh_sequence < head->lh_sequence) + break; + + *head = lh; + } + + return 0; +} + +/** + * gfs2_find_jhead - find the head of a log + * @jd: the journal + * @head: the log descriptor for the head of the log is returned here + * + * Do a binary search of a journal and find the valid log entry with the + * highest sequence number. (i.e. the log head) + * + * Returns: errno + */ + +int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head) +{ + struct gfs2_log_header_host lh_1, lh_m; + u32 blk_1, blk_2, blk_m; + int error; + + blk_1 = 0; + blk_2 = jd->jd_blocks - 1; + + for (;;) { + blk_m = (blk_1 + blk_2) / 2; + + error = find_good_lh(jd, &blk_1, &lh_1); + if (error) + return error; + + error = find_good_lh(jd, &blk_m, &lh_m); + if (error) + return error; + + if (blk_1 == blk_m || blk_m == blk_2) + break; + + if (lh_1.lh_sequence <= lh_m.lh_sequence) + blk_1 = blk_m; + else + blk_2 = blk_m; + } + + error = jhead_scan(jd, &lh_1); + if (error) + return error; + + *head = lh_1; + + return error; +} + +/** * foreach_descriptor - go through the active part of the log * @jd: the journal * @start: the first log header in the active region diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h index 99575ab81202..11d81248be85 100644 --- a/fs/gfs2/recovery.h +++ b/fs/gfs2/recovery.h @@ -27,6 +27,8 @@ extern int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where) extern int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where); extern void gfs2_revoke_clean(struct gfs2_jdesc *jd); +extern int gfs2_find_jhead(struct gfs2_jdesc *jd, + struct gfs2_log_header_host *head); extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, bool wait); extern void gfs2_recover_func(struct work_struct *work); extern int __get_log_header(struct gfs2_sbd *sdp, diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 831d7cb5a49c..17a8d3b43990 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1780,9 +1780,9 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, goto next_iter; } if (ret == -E2BIG) { - n += rbm->bii - initial_bii; rbm->bii = 0; rbm->offset = 0; + n += (rbm->bii - initial_bii); goto res_covered_end_of_rgrp; } return ret; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index d4b11c903971..ca71163ff7cf 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -45,7 +45,6 @@ #include "util.h" #include "sys.h" #include "xattr.h" -#include "lops.h" #define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x) diff --git a/fs/inode.c b/fs/inode.c index 0cd47fe0dbe5..73432e64f874 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -730,11 +730,8 @@ static enum lru_status inode_lru_isolate(struct list_head *item, return LRU_REMOVED; } - /* - * Recently referenced inodes and inodes with many attached pages - * get one more pass. - */ - if (inode->i_state & I_REFERENCED || inode->i_data.nrpages > 1) { + /* recently referenced inodes get one more pass */ + if (inode->i_state & I_REFERENCED) { inode->i_state &= ~I_REFERENCED; spin_unlock(&inode->i_lock); return LRU_ROTATE; diff --git a/fs/iomap.c b/fs/iomap.c index a3088fae567b..897c60215dd1 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -116,6 +116,12 @@ iomap_page_create(struct inode *inode, struct page *page) atomic_set(&iop->read_count, 0); atomic_set(&iop->write_count, 0); bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE); + + /* + * migrate_page_move_mapping() assumes that pages with private data have + * their count elevated by 1. + */ + get_page(page); set_page_private(page, (unsigned long)iop); SetPagePrivate(page); return iop; @@ -132,6 +138,7 @@ iomap_page_release(struct page *page) WARN_ON_ONCE(atomic_read(&iop->write_count)); ClearPagePrivate(page); set_page_private(page, 0); + put_page(page); kfree(iop); } @@ -569,8 +576,10 @@ iomap_migrate_page(struct address_space *mapping, struct page *newpage, if (page_has_private(page)) { ClearPagePrivate(page); + get_page(newpage); set_page_private(newpage, page_private(page)); set_page_private(page, 0); + put_page(page); SetPagePrivate(newpage); } @@ -1804,6 +1813,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, loff_t pos = iocb->ki_pos, start = pos; loff_t end = iocb->ki_pos + count - 1, ret = 0; unsigned int flags = IOMAP_DIRECT; + bool wait_for_completion = is_sync_kiocb(iocb); struct blk_plug plug; struct iomap_dio *dio; @@ -1823,7 +1833,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, dio->end_io = end_io; dio->error = 0; dio->flags = 0; - dio->wait_for_completion = is_sync_kiocb(iocb); dio->submit.iter = iter; dio->submit.waiter = current; @@ -1878,7 +1887,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, dio_warn_stale_pagecache(iocb->ki_filp); ret = 0; - if (iov_iter_rw(iter) == WRITE && !dio->wait_for_completion && + if (iov_iter_rw(iter) == WRITE && !wait_for_completion && !inode->i_sb->s_dio_done_wq) { ret = sb_init_dio_done_wq(inode->i_sb); if (ret < 0) @@ -1894,7 +1903,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (ret <= 0) { /* magic error code to fall back to buffered I/O */ if (ret == -ENOTBLK) { - dio->wait_for_completion = true; + wait_for_completion = true; ret = 0; } break; @@ -1916,8 +1925,24 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (dio->flags & IOMAP_DIO_WRITE_FUA) dio->flags &= ~IOMAP_DIO_NEED_SYNC; + /* + * We are about to drop our additional submission reference, which + * might be the last reference to the dio. There are three three + * different ways we can progress here: + * + * (a) If this is the last reference we will always complete and free + * the dio ourselves. + * (b) If this is not the last reference, and we serve an asynchronous + * iocb, we must never touch the dio after the decrement, the + * I/O completion handler will complete and free it. + * (c) If this is not the last reference, but we serve a synchronous + * iocb, the I/O completion handler will wake us up on the drop + * of the final reference, and we will complete and free it here + * after we got woken by the I/O completion handler. + */ + dio->wait_for_completion = wait_for_completion; if (!atomic_dec_and_test(&dio->ref)) { - if (!dio->wait_for_completion) + if (!wait_for_completion) return -EIOCBQUEUED; for (;;) { @@ -1934,9 +1959,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, __set_current_state(TASK_RUNNING); } - ret = iomap_dio_complete(dio); - - return ret; + return iomap_dio_complete(dio); out_free_dio: kfree(dio); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 22ce3c8a2f46..0570391eaa16 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1895,6 +1895,11 @@ static int nfs_parse_devname(const char *dev_name, size_t len; char *end; + if (unlikely(!dev_name || !*dev_name)) { + dfprintk(MOUNT, "NFS: device name not specified\n"); + return -EINVAL; + } + /* Is the host name protected with square brakcets? */ if (*dev_name == '[') { end = strchr(++dev_name, ']'); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5a0bbf917a32..f12cb31a41e5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -621,11 +621,12 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, nfs_set_page_writeback(page); WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags)); - ret = 0; + ret = req->wb_context->error; /* If there is a fatal error that covers this write, just exit */ - if (nfs_error_is_fatal_on_server(req->wb_context->error)) + if (nfs_error_is_fatal_on_server(ret)) goto out_launder; + ret = 0; if (!nfs_pageio_add_request(pgio, req)) { ret = pgio->pg_error; /* @@ -635,9 +636,9 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, nfs_context_set_write_error(req->wb_context, ret); if (nfs_error_is_fatal_on_server(ret)) goto out_launder; - } + } else + ret = -EAGAIN; nfs_redirty_request(req); - ret = -EAGAIN; } else nfs_add_stats(page_file_mapping(page)->host, NFSIOS_WRITEPAGES, 1); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 9824e32b2f23..7dc98e14655d 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -557,9 +557,11 @@ __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, loff_t cloned; cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0); + if (cloned < 0) + return nfserrno(cloned); if (count && cloned != count) - cloned = -EINVAL; - return nfserrno(cloned < 0 ? cloned : 0); + return nfserrno(-EINVAL); + return 0; } ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 8ae109429a88..e39bac94dead 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -256,7 +256,7 @@ struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry, inode = proc_get_inode(dir->i_sb, de); if (!inode) return ERR_PTR(-ENOMEM); - d_set_d_op(dentry, &proc_misc_dentry_ops); + d_set_d_op(dentry, de->proc_dops); return d_splice_alias(inode, dentry); } read_unlock(&proc_subdir_lock); @@ -429,6 +429,8 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, INIT_LIST_HEAD(&ent->pde_openers); proc_set_user(ent, (*parent)->uid, (*parent)->gid); + ent->proc_dops = &proc_misc_dentry_ops; + out: return ent; } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 5185d7f6a51e..95b14196f284 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -44,6 +44,7 @@ struct proc_dir_entry { struct completion *pde_unload_completion; const struct inode_operations *proc_iops; const struct file_operations *proc_fops; + const struct dentry_operations *proc_dops; union { const struct seq_operations *seq_ops; int (*single_show)(struct seq_file *, void *); diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index d5e0fcb3439e..a7b12435519e 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -38,6 +38,22 @@ static struct net *get_proc_net(const struct inode *inode) return maybe_get_net(PDE_NET(PDE(inode))); } +static int proc_net_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + return 0; +} + +static const struct dentry_operations proc_net_dentry_ops = { + .d_revalidate = proc_net_d_revalidate, + .d_delete = always_delete_dentry, +}; + +static void pde_force_lookup(struct proc_dir_entry *pde) +{ + /* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */ + pde->proc_dops = &proc_net_dentry_ops; +} + static int seq_open_net(struct inode *inode, struct file *file) { unsigned int state_size = PDE(inode)->state_size; @@ -90,6 +106,7 @@ struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode, p = proc_create_reg(name, mode, &parent, data); if (!p) return NULL; + pde_force_lookup(p); p->proc_fops = &proc_net_seq_fops; p->seq_ops = ops; p->state_size = state_size; @@ -133,6 +150,7 @@ struct proc_dir_entry *proc_create_net_data_write(const char *name, umode_t mode p = proc_create_reg(name, mode, &parent, data); if (!p) return NULL; + pde_force_lookup(p); p->proc_fops = &proc_net_seq_fops; p->seq_ops = ops; p->state_size = state_size; @@ -181,6 +199,7 @@ struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode, p = proc_create_reg(name, mode, &parent, data); if (!p) return NULL; + pde_force_lookup(p); p->proc_fops = &proc_net_single_fops; p->single_show = show; return proc_register(parent, p); @@ -223,6 +242,7 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo p = proc_create_reg(name, mode, &parent, data); if (!p) return NULL; + pde_force_lookup(p); p->proc_fops = &proc_net_single_fops; p->single_show = show; p->write = write; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index f0ec9edab2f3..85b0ef890b28 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -423,7 +423,7 @@ struct mem_size_stats { }; static void smaps_account(struct mem_size_stats *mss, struct page *page, - bool compound, bool young, bool dirty) + bool compound, bool young, bool dirty, bool locked) { int i, nr = compound ? 1 << compound_order(page) : 1; unsigned long size = nr * PAGE_SIZE; @@ -450,24 +450,31 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page, else mss->private_clean += size; mss->pss += (u64)size << PSS_SHIFT; + if (locked) + mss->pss_locked += (u64)size << PSS_SHIFT; return; } for (i = 0; i < nr; i++, page++) { int mapcount = page_mapcount(page); + unsigned long pss = (PAGE_SIZE << PSS_SHIFT); if (mapcount >= 2) { if (dirty || PageDirty(page)) mss->shared_dirty += PAGE_SIZE; else mss->shared_clean += PAGE_SIZE; - mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; + mss->pss += pss / mapcount; + if (locked) + mss->pss_locked += pss / mapcount; } else { if (dirty || PageDirty(page)) mss->private_dirty += PAGE_SIZE; else mss->private_clean += PAGE_SIZE; - mss->pss += PAGE_SIZE << PSS_SHIFT; + mss->pss += pss; + if (locked) + mss->pss_locked += pss; } } } @@ -490,6 +497,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = walk->vma; + bool locked = !!(vma->vm_flags & VM_LOCKED); struct page *page = NULL; if (pte_present(*pte)) { @@ -532,7 +540,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, if (!page) return; - smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte)); + smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -541,6 +549,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = walk->vma; + bool locked = !!(vma->vm_flags & VM_LOCKED); struct page *page; /* FOLL_DUMP will return -EFAULT on huge zero page */ @@ -555,7 +564,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, /* pass */; else VM_BUG_ON_PAGE(1, page); - smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd)); + smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked); } #else static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, @@ -737,11 +746,8 @@ static void smap_gather_stats(struct vm_area_struct *vma, } } #endif - /* mmap_sem is held in m_start */ walk_page_vma(vma, &smaps_walk); - if (vma->vm_flags & VM_LOCKED) - mss->pss_locked += mss->pss; } #define SEQ_PUT_DEC(str, val) \ diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 1c8eecfe52b8..6acf1bfa0bfe 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -768,18 +768,23 @@ xrep_findroot_block( if (!uuid_equal(&btblock->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) goto out; + /* + * Read verifiers can reference b_ops, so we set the pointer + * here. If the verifier fails we'll reset the buffer state + * to what it was before we touched the buffer. + */ + bp->b_ops = fab->buf_ops; fab->buf_ops->verify_read(bp); if (bp->b_error) { + bp->b_ops = NULL; bp->b_error = 0; goto out; } /* * Some read verifiers will (re)set b_ops, so we must be - * careful not to blow away any such assignment. + * careful not to change b_ops after running the verifier. */ - if (!bp->b_ops) - bp->b_ops = fab->buf_ops; } /* diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 338b9d9984e0..d9048bcea49c 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -449,6 +449,7 @@ xfs_map_blocks( } wpc->imap = imap; + xfs_trim_extent_eof(&wpc->imap, ip); trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap); return 0; allocate_blocks: @@ -459,6 +460,7 @@ allocate_blocks: ASSERT(whichfork == XFS_COW_FORK || cow_fsb == NULLFILEOFF || imap.br_startoff + imap.br_blockcount <= cow_fsb); wpc->imap = imap; + xfs_trim_extent_eof(&wpc->imap, ip); trace_xfs_map_blocks_alloc(ip, offset, count, wpc->io_type, &imap); return 0; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index eedc5e0156ff..4f5f2ff3f70f 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -776,10 +776,26 @@ _xfs_buf_read( } /* + * Set buffer ops on an unchecked buffer and validate it, if possible. + * * If the caller passed in an ops structure and the buffer doesn't have ops * assigned, set the ops and use them to verify the contents. If the contents * cannot be verified, we'll clear XBF_DONE. We assume the buffer has no * recorded errors and is already in XBF_DONE state. + * + * Under normal operations, every in-core buffer must have buffer ops assigned + * to them when the buffer is read in from disk so that we can validate the + * metadata. + * + * However, there are two scenarios where one can encounter in-core buffers + * that don't have buffer ops. The first is during log recovery of buffers on + * a V4 filesystem, though these buffers are purged at the end of recovery. + * + * The other is online repair, which tries to match arbitrary metadata blocks + * with btree types in order to find the root. If online repair doesn't match + * the buffer with /any/ btree type, the buffer remains in memory in DONE state + * with no ops, and a subsequent read_buf call from elsewhere will not set the + * ops. This function helps us fix this situation. */ int xfs_buf_ensure_ops( @@ -1536,8 +1552,7 @@ __xfs_buf_submit( xfs_buf_ioerror(bp, -EIO); bp->b_flags &= ~XBF_DONE; xfs_buf_stale(bp); - if (bp->b_flags & XBF_ASYNC) - xfs_buf_ioend(bp); + xfs_buf_ioend(bp); return -EIO; } diff --git a/include/uapi/asm-generic/shmparam.h b/include/asm-generic/shmparam.h index 8b78c0ba08b1..8b78c0ba08b1 100644 --- a/include/uapi/asm-generic/shmparam.h +++ b/include/asm-generic/shmparam.h diff --git a/include/dt-bindings/clock/imx8mq-clock.h b/include/dt-bindings/clock/imx8mq-clock.h index b53be41929be..04f7ac345984 100644 --- a/include/dt-bindings/clock/imx8mq-clock.h +++ b/include/dt-bindings/clock/imx8mq-clock.h @@ -350,7 +350,7 @@ #define IMX8MQ_CLK_VPU_G2_ROOT 241 /* SCCG PLL GATE */ -#define IMX8MQ_SYS1_PLL_OUT 232 +#define IMX8MQ_SYS1_PLL_OUT 242 #define IMX8MQ_SYS2_PLL_OUT 243 #define IMX8MQ_SYS3_PLL_OUT 244 #define IMX8MQ_DRAM_PLL_OUT 245 @@ -372,24 +372,24 @@ /* txesc clock */ #define IMX8MQ_CLK_DSI_IPG_DIV 256 -#define IMX8MQ_CLK_TMU_ROOT 265 +#define IMX8MQ_CLK_TMU_ROOT 257 /* Display root clocks */ -#define IMX8MQ_CLK_DISP_AXI_ROOT 266 -#define IMX8MQ_CLK_DISP_APB_ROOT 267 -#define IMX8MQ_CLK_DISP_RTRM_ROOT 268 +#define IMX8MQ_CLK_DISP_AXI_ROOT 258 +#define IMX8MQ_CLK_DISP_APB_ROOT 259 +#define IMX8MQ_CLK_DISP_RTRM_ROOT 260 -#define IMX8MQ_CLK_OCOTP_ROOT 269 +#define IMX8MQ_CLK_OCOTP_ROOT 261 -#define IMX8MQ_CLK_DRAM_ALT_ROOT 270 -#define IMX8MQ_CLK_DRAM_CORE 271 +#define IMX8MQ_CLK_DRAM_ALT_ROOT 262 +#define IMX8MQ_CLK_DRAM_CORE 263 -#define IMX8MQ_CLK_MU_ROOT 272 -#define IMX8MQ_VIDEO2_PLL_OUT 273 +#define IMX8MQ_CLK_MU_ROOT 264 +#define IMX8MQ_VIDEO2_PLL_OUT 265 -#define IMX8MQ_CLK_CLKO2 274 +#define IMX8MQ_CLK_CLKO2 266 -#define IMX8MQ_CLK_NAND_USDHC_BUS_RAWNAND_CLK 275 +#define IMX8MQ_CLK_NAND_USDHC_BUS_RAWNAND_CLK 267 -#define IMX8MQ_CLK_END 276 +#define IMX8MQ_CLK_END 268 #endif /* __DT_BINDINGS_CLOCK_IMX8MQ_H */ diff --git a/include/dt-bindings/clock/marvell,mmp2.h b/include/dt-bindings/clock/marvell,mmp2.h index 7b24fc791146..228a5e234af0 100644 --- a/include/dt-bindings/clock/marvell,mmp2.h +++ b/include/dt-bindings/clock/marvell,mmp2.h @@ -71,7 +71,6 @@ #define MMP2_CLK_CCIC1_MIX 117 #define MMP2_CLK_CCIC1_PHY 118 #define MMP2_CLK_CCIC1_SPHY 119 -#define MMP2_CLK_SP 120 #define MMP2_NR_CLKS 200 #endif diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 8804753805ac..7bb2d8de9f30 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -116,7 +116,13 @@ extern void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes); static inline sector_t blk_rq_trace_sector(struct request *rq) { - return blk_rq_is_passthrough(rq) ? 0 : blk_rq_pos(rq); + /* + * Tracing should ignore starting sector for passthrough requests and + * requests where starting sector didn't get set. + */ + if (blk_rq_is_passthrough(rq) || blk_rq_pos(rq) == (sector_t)-1) + return 0; + return blk_rq_pos(rq); } static inline unsigned int blk_rq_trace_nr_sectors(struct request *rq) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 218df7f4d3e1..5041357d0297 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -180,12 +180,10 @@ enum cpuhp_smt_control { #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT) extern enum cpuhp_smt_control cpu_smt_control; extern void cpu_smt_disable(bool force); -extern void cpu_smt_check_topology_early(void); extern void cpu_smt_check_topology(void); #else # define cpu_smt_control (CPU_SMT_ENABLED) static inline void cpu_smt_disable(bool force) { } -static inline void cpu_smt_check_topology_early(void) { } static inline void cpu_smt_check_topology(void) { } #endif diff --git a/include/linux/dcache.h b/include/linux/dcache.h index ef4b70f64f33..60996e64c579 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -62,9 +62,10 @@ extern const struct qstr slash_name; struct dentry_stat_t { long nr_dentry; long nr_unused; - long age_limit; /* age in seconds */ - long want_pages; /* pages requested by system */ - long dummy[2]; + long age_limit; /* age in seconds */ + long want_pages; /* pages requested by system */ + long nr_negative; /* # of unused negative dentries */ + long dummy; /* Reserved for future use */ }; extern struct dentry_stat_t dentry_stat; diff --git a/include/linux/filter.h b/include/linux/filter.h index 7317376734f7..95e2d7ebdf21 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -611,8 +611,8 @@ static inline u8 *bpf_skb_cb(struct sk_buff *skb) return qdisc_skb_cb(skb)->data; } -static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, - struct sk_buff *skb) +static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog, + struct sk_buff *skb) { u8 *cb_data = bpf_skb_cb(skb); u8 cb_saved[BPF_SKB_CB_LEN]; @@ -631,15 +631,30 @@ static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, return res; } +static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, + struct sk_buff *skb) +{ + u32 res; + + preempt_disable(); + res = __bpf_prog_run_save_cb(prog, skb); + preempt_enable(); + return res; +} + static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, struct sk_buff *skb) { u8 *cb_data = bpf_skb_cb(skb); + u32 res; if (unlikely(prog->cb_access)) memset(cb_data, 0, BPF_SKB_CB_LEN); - return BPF_PROG_RUN(prog, skb); + preempt_disable(); + res = BPF_PROG_RUN(prog, skb); + preempt_enable(); + return res; } static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, diff --git a/include/linux/fs.h b/include/linux/fs.h index 811c77743dad..29d8e2cfed0e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1479,11 +1479,12 @@ struct super_block { struct user_namespace *s_user_ns; /* - * Keep the lru lists last in the structure so they always sit on their - * own individual cachelines. + * The list_lru structure is essentially just a pointer to a table + * of per-node lru lists, each of which has its own spinlock. + * There is no need to put them into separate cachelines. */ - struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; - struct list_lru s_inode_lru ____cacheline_aligned_in_smp; + struct list_lru s_dentry_lru; + struct list_lru s_inode_lru; struct rcu_head rcu; struct work_struct destroy_work; diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h index 94e9797e434c..f127adb71041 100644 --- a/include/linux/fsl/ptp_qoriq.h +++ b/include/linux/fsl/ptp_qoriq.h @@ -7,6 +7,7 @@ #define __PTP_QORIQ_H__ #include <linux/io.h> +#include <linux/interrupt.h> #include <linux/ptp_clock_kernel.h> /* @@ -49,7 +50,7 @@ struct etts_regs { u32 tmr_etts2_l; /* Timestamp of general purpose external trigger */ }; -struct qoriq_ptp_registers { +struct ptp_qoriq_registers { struct ctrl_regs __iomem *ctrl_regs; struct alarm_regs __iomem *alarm_regs; struct fiper_regs __iomem *fiper_regs; @@ -57,15 +58,15 @@ struct qoriq_ptp_registers { }; /* Offset definitions for the four register groups */ -#define CTRL_REGS_OFFSET 0x0 -#define ALARM_REGS_OFFSET 0x40 -#define FIPER_REGS_OFFSET 0x80 -#define ETTS_REGS_OFFSET 0xa0 +#define ETSEC_CTRL_REGS_OFFSET 0x0 +#define ETSEC_ALARM_REGS_OFFSET 0x40 +#define ETSEC_FIPER_REGS_OFFSET 0x80 +#define ETSEC_ETTS_REGS_OFFSET 0xa0 -#define FMAN_CTRL_REGS_OFFSET 0x80 -#define FMAN_ALARM_REGS_OFFSET 0xb8 -#define FMAN_FIPER_REGS_OFFSET 0xd0 -#define FMAN_ETTS_REGS_OFFSET 0xe0 +#define CTRL_REGS_OFFSET 0x80 +#define ALARM_REGS_OFFSET 0xb8 +#define FIPER_REGS_OFFSET 0xd0 +#define ETTS_REGS_OFFSET 0xe0 /* Bit definitions for the TMR_CTRL register */ @@ -136,9 +137,9 @@ struct qoriq_ptp_registers { #define DEFAULT_FIPER1_PERIOD 1000000000 #define DEFAULT_FIPER2_PERIOD 100000 -struct qoriq_ptp { +struct ptp_qoriq { void __iomem *base; - struct qoriq_ptp_registers regs; + struct ptp_qoriq_registers regs; spinlock_t lock; /* protects regs */ struct ptp_clock *clock; struct ptp_clock_info caps; @@ -156,28 +157,48 @@ struct qoriq_ptp { u32 cksel; u32 tmr_fiper1; u32 tmr_fiper2; + u32 (*read)(unsigned __iomem *addr); + void (*write)(unsigned __iomem *addr, u32 val); }; -static inline u32 qoriq_read(unsigned __iomem *addr) +static inline u32 qoriq_read_be(unsigned __iomem *addr) { - u32 val; - - val = ioread32be(addr); - return val; + return ioread32be(addr); } -static inline void qoriq_write(unsigned __iomem *addr, u32 val) +static inline void qoriq_write_be(unsigned __iomem *addr, u32 val) { iowrite32be(val, addr); } +static inline u32 qoriq_read_le(unsigned __iomem *addr) +{ + return ioread32(addr); +} + +static inline void qoriq_write_le(unsigned __iomem *addr, u32 val) +{ + iowrite32(val, addr); +} + +irqreturn_t ptp_qoriq_isr(int irq, void *priv); +int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base, + const struct ptp_clock_info caps); +void ptp_qoriq_free(struct ptp_qoriq *ptp_qoriq); +int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm); +int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta); +int ptp_qoriq_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts); +int ptp_qoriq_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts); +int ptp_qoriq_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on); #ifdef CONFIG_DEBUG_FS -void ptp_qoriq_create_debugfs(struct qoriq_ptp *qoriq_ptp); -void ptp_qoriq_remove_debugfs(struct qoriq_ptp *qoriq_ptp); +void ptp_qoriq_create_debugfs(struct ptp_qoriq *ptp_qoriq); +void ptp_qoriq_remove_debugfs(struct ptp_qoriq *ptp_qoriq); #else -static inline void ptp_qoriq_create_debugfs(struct qoriq_ptp *qoriq_ptp) +static inline void ptp_qoriq_create_debugfs(struct ptp_qoriq *ptp_qoriq) { } -static inline void ptp_qoriq_remove_debugfs(struct qoriq_ptp *qoriq_ptp) +static inline void ptp_qoriq_remove_debugfs(struct ptp_qoriq *ptp_qoriq) { } #endif diff --git a/include/linux/hid-debug.h b/include/linux/hid-debug.h index 8663f216c563..2d6100edf204 100644 --- a/include/linux/hid-debug.h +++ b/include/linux/hid-debug.h @@ -24,7 +24,10 @@ #ifdef CONFIG_DEBUG_FS +#include <linux/kfifo.h> + #define HID_DEBUG_BUFSIZE 512 +#define HID_DEBUG_FIFOSIZE 512 void hid_dump_input(struct hid_device *, struct hid_usage *, __s32); void hid_dump_report(struct hid_device *, int , u8 *, int); @@ -37,11 +40,8 @@ void hid_debug_init(void); void hid_debug_exit(void); void hid_debug_event(struct hid_device *, char *); - struct hid_debug_list { - char *hid_debug_buf; - int head; - int tail; + DECLARE_KFIFO_PTR(hid_debug_fifo, char); struct fasync_struct *fasync; struct hid_device *hdev; struct list_head node; @@ -64,4 +64,3 @@ struct hid_debug_list { #endif #endif - diff --git a/include/linux/ide.h b/include/linux/ide.h index e7d29ae633cd..971cf76a78a0 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -615,6 +615,7 @@ struct ide_drive_s { /* current sense rq and buffer */ bool sense_rq_armed; + bool sense_rq_active; struct request *sense_rq; struct request_sense sense_data; @@ -1219,6 +1220,7 @@ extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout); extern void ide_timer_expiry(struct timer_list *t); extern irqreturn_t ide_intr(int irq, void *dev_id); extern blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); +extern blk_status_t ide_issue_rq(ide_drive_t *, struct request *, bool); extern void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq); void ide_init_disk(struct gendisk *, ide_drive_t *); diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 071b4cbdf010..c848a7cc502e 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -319,7 +319,7 @@ #define GITS_TYPER_PLPIS (1UL << 0) #define GITS_TYPER_VLPIS (1UL << 1) #define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4 -#define GITS_TYPER_ITT_ENTRY_SIZE(r) ((((r) >> GITS_TYPER_ITT_ENTRY_SIZE_SHIFT) & 0x1f) + 1) +#define GITS_TYPER_ITT_ENTRY_SIZE(r) ((((r) >> GITS_TYPER_ITT_ENTRY_SIZE_SHIFT) & 0xf) + 1) #define GITS_TYPER_IDBITS_SHIFT 8 #define GITS_TYPER_DEVBITS_SHIFT 13 #define GITS_TYPER_DEVBITS(r) ((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1) diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 07da5c6c5ba0..368267c1b71b 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -21,14 +21,16 @@ struct vmem_altmap; * walkers which rely on the fully initialized page->flags and others * should use this rather than pfn_valid && pfn_to_page */ -#define pfn_to_online_page(pfn) \ -({ \ - struct page *___page = NULL; \ - unsigned long ___nr = pfn_to_section_nr(pfn); \ - \ - if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr))\ - ___page = pfn_to_page(pfn); \ - ___page; \ +#define pfn_to_online_page(pfn) \ +({ \ + struct page *___page = NULL; \ + unsigned long ___pfn = pfn; \ + unsigned long ___nr = pfn_to_section_nr(___pfn); \ + \ + if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr) && \ + pfn_valid_within(___pfn)) \ + ___page = pfn_to_page(___pfn); \ + ___page; \ }) /* diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 8c4a820bd4c1..f93a5598b942 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -67,7 +67,7 @@ #define MLX5_UN_SZ_BYTES(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 8) #define MLX5_UN_SZ_DW(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 32) #define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8) -#define MLX5_ADDR_OF(typ, p, fld) ((char *)(p) + MLX5_BYTE_OFF(typ, fld)) +#define MLX5_ADDR_OF(typ, p, fld) ((void *)((uint8_t *)(p) + MLX5_BYTE_OFF(typ, fld))) /* insert a value to a struct */ #define MLX5_SET(typ, p, fld, v) do { \ @@ -342,6 +342,8 @@ enum mlx5_event { MLX5_EVENT_TYPE_PAGE_FAULT = 0xc, MLX5_EVENT_TYPE_NIC_VPORT_CHANGE = 0xd, + MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE = 0xe, + MLX5_EVENT_TYPE_DCT_DRAINED = 0x1c, MLX5_EVENT_TYPE_FPGA_ERROR = 0x20, @@ -591,7 +593,7 @@ struct mlx5_eqe_cmd { }; struct mlx5_eqe_page_req { - u8 rsvd0[2]; + __be16 ec_function; __be16 func_id; __be32 num_pages; __be32 rsvd1[5]; @@ -1201,6 +1203,9 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->caps.hca_cur[MLX5_CAP_ODP], cap) +#define MLX5_CAP_ODP_MAX(mdev, cap)\ + MLX5_GET(odp_cap, mdev->caps.hca_max[MLX5_CAP_ODP], cap) + #define MLX5_CAP_VECTOR_CALC(mdev, cap) \ MLX5_GET(vector_calc_cap, \ mdev->caps.hca_cur[MLX5_CAP_VECTOR_CALC], cap) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 54299251d40d..c2de50f02b33 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -522,6 +522,7 @@ struct mlx5_priv { atomic_t reg_pages; struct list_head free_list; int vfs_pages; + int peer_pf_pages; struct mlx5_core_health health; @@ -652,6 +653,7 @@ struct mlx5_core_dev { u32 mcam[MLX5_ST_SZ_DW(mcam_reg)]; u32 fpga[MLX5_ST_SZ_DW(fpga_cap)]; u32 qcam[MLX5_ST_SZ_DW(qcam_reg)]; + u8 embedded_cpu; } caps; u64 sys_image_guid; phys_addr_t iseg_base; @@ -850,11 +852,30 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); void mlx5_cmd_use_events(struct mlx5_core_dev *dev); void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); +struct mlx5_async_ctx { + struct mlx5_core_dev *dev; + atomic_t num_inflight; + struct wait_queue_head wait; +}; + +struct mlx5_async_work; + +typedef void (*mlx5_async_cbk_t)(int status, struct mlx5_async_work *context); + +struct mlx5_async_work { + struct mlx5_async_ctx *ctx; + mlx5_async_cbk_t user_callback; +}; + +void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, + struct mlx5_async_ctx *ctx); +void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx); +int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, + void *out, int out_size, mlx5_async_cbk_t callback, + struct mlx5_async_work *work); + int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); -int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, - void *out, int out_size, mlx5_cmd_cbk_t callback, - void *context); int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome); @@ -885,9 +906,10 @@ void mlx5_init_mkey_table(struct mlx5_core_dev *dev); void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev); int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, - u32 *in, int inlen, - u32 *out, int outlen, - mlx5_cmd_cbk_t callback, void *context); + struct mlx5_async_ctx *async_ctx, u32 *in, + int inlen, u32 *out, int outlen, + mlx5_async_cbk_t callback, + struct mlx5_async_work *context); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *in, int inlen); @@ -897,14 +919,12 @@ int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *out, int outlen); int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn); int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn); -int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, - u16 opmod, u8 port); int mlx5_pagealloc_init(struct mlx5_core_dev *dev); void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); void mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, - s32 npages); + s32 npages, bool ec_function); int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev); void mlx5_register_debugfs(void); @@ -1058,11 +1078,29 @@ static inline int mlx5_core_is_pf(struct mlx5_core_dev *dev) return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF); } -#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev)) -#define MLX5_VPORT_MANAGER(mdev) \ - (MLX5_CAP_GEN(mdev, vport_group_manager) && \ - (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \ - mlx5_core_is_pf(mdev)) +static inline bool mlx5_core_is_ecpf(struct mlx5_core_dev *dev) +{ + return dev->caps.embedded_cpu; +} + +static inline bool mlx5_core_is_ecpf_esw_manager(struct mlx5_core_dev *dev) +{ + return dev->caps.embedded_cpu && MLX5_CAP_GEN(dev, eswitch_manager); +} + +static inline bool mlx5_ecpf_vport_exists(struct mlx5_core_dev *dev) +{ + return mlx5_core_is_pf(dev) && MLX5_CAP_ESW(dev, ecpf_vport_exists); +} + +#define MLX5_HOST_PF_MAX_VFS (127u) +static inline u16 mlx5_core_max_vfs(struct mlx5_core_dev *dev) +{ + if (mlx5_core_is_ecpf_esw_manager(dev)) + return MLX5_HOST_PF_MAX_VFS; + else + return pci_sriov_get_totalvfs(dev->pdev); +} static inline int mlx5_get_gid_table_len(u16 param) { diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index fab5121ffb8f..96d8435421de 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -22,6 +22,12 @@ enum { NUM_REP_TYPES, }; +enum { + REP_UNREGISTERED, + REP_REGISTERED, + REP_LOADED, +}; + struct mlx5_eswitch_rep; struct mlx5_eswitch_rep_if { int (*load)(struct mlx5_core_dev *dev, @@ -29,7 +35,7 @@ struct mlx5_eswitch_rep_if { void (*unload)(struct mlx5_eswitch_rep *rep); void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep); void *priv; - bool valid; + u8 state; }; struct mlx5_eswitch_rep { @@ -40,13 +46,10 @@ struct mlx5_eswitch_rep { u32 vlan_refcount; }; -void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, - int vport_index, - struct mlx5_eswitch_rep_if *rep_if, - u8 rep_type); -void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, - int vport_index, - u8 rep_type); +void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep_if *rep_if, + u8 rep_type); +void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type); void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, int vport, u8 rep_type); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 35fe5217b244..b7bb774b57b0 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -72,6 +72,7 @@ enum { enum { MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0, + MLX5_SET_HCA_CAP_OP_MOD_ODP = 0x2, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3, }; @@ -141,6 +142,7 @@ enum { MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY = 0x725, MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY = 0x726, MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS = 0x727, + MLX5_CMD_OP_QUERY_HOST_PARAMS = 0x740, MLX5_CMD_OP_QUERY_VPORT_STATE = 0x750, MLX5_CMD_OP_MODIFY_VPORT_STATE = 0x751, MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT = 0x752, @@ -629,7 +631,8 @@ struct mlx5_ifc_e_switch_cap_bits { u8 vport_svlan_insert[0x1]; u8 vport_cvlan_insert_if_not_exist[0x1]; u8 vport_cvlan_insert_overwrite[0x1]; - u8 reserved_at_5[0x17]; + u8 reserved_at_5[0x16]; + u8 ecpf_vport_exists[0x1]; u8 counter_eswitch_affinity[0x1]; u8 merged_eswitch[0x1]; u8 nic_vport_node_guid_modify[0x1]; @@ -831,7 +834,9 @@ struct mlx5_ifc_odp_cap_bits { struct mlx5_ifc_odp_per_transport_service_cap_bits ud_odp_caps; - u8 reserved_at_e0[0x720]; + struct mlx5_ifc_odp_per_transport_service_cap_bits xrc_odp_caps; + + u8 reserved_at_100[0x700]; }; struct mlx5_ifc_calc_op { @@ -4438,7 +4443,8 @@ struct mlx5_ifc_query_pages_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x10]; + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; u8 function_id[0x10]; u8 num_pages[0x20]; @@ -4457,7 +4463,8 @@ struct mlx5_ifc_query_pages_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x10]; + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; u8 function_id[0x10]; u8 reserved_at_60[0x20]; @@ -5877,7 +5884,8 @@ struct mlx5_ifc_manage_pages_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x10]; + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; u8 function_id[0x10]; u8 input_num_entries[0x20]; @@ -6055,7 +6063,8 @@ struct mlx5_ifc_enable_hca_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x10]; + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; u8 function_id[0x10]; u8 reserved_at_60[0x20]; @@ -6099,7 +6108,8 @@ struct mlx5_ifc_disable_hca_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x10]; + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; u8 function_id[0x10]; u8 reserved_at_60[0x20]; @@ -7817,21 +7827,23 @@ struct mlx5_ifc_ptys_reg_bits { u8 proto_mask[0x3]; u8 an_status[0x4]; - u8 reserved_at_24[0x3c]; + u8 reserved_at_24[0x1c]; + + u8 ext_eth_proto_capability[0x20]; u8 eth_proto_capability[0x20]; u8 ib_link_width_capability[0x10]; u8 ib_proto_capability[0x10]; - u8 reserved_at_a0[0x20]; + u8 ext_eth_proto_admin[0x20]; u8 eth_proto_admin[0x20]; u8 ib_link_width_admin[0x10]; u8 ib_proto_admin[0x10]; - u8 reserved_at_100[0x20]; + u8 ext_eth_proto_oper[0x20]; u8 eth_proto_oper[0x20]; @@ -8280,7 +8292,9 @@ struct mlx5_ifc_mpegc_reg_bits { struct mlx5_ifc_pcam_enhanced_features_bits { u8 reserved_at_0[0x6d]; u8 rx_icrc_encapsulated_counter[0x1]; - u8 reserved_at_6e[0x8]; + u8 reserved_at_6e[0x4]; + u8 ptys_extended_ethernet[0x1]; + u8 reserved_at_73[0x3]; u8 pfcc_mask[0x1]; u8 reserved_at_77[0x3]; u8 per_lane_error_counters[0x1]; @@ -8746,7 +8760,8 @@ struct mlx5_ifc_initial_seg_bits { u8 initializing[0x1]; u8 reserved_at_fe1[0x4]; u8 nic_interface_supported[0x3]; - u8 reserved_at_fe8[0x18]; + u8 embedded_cpu[0x1]; + u8 reserved_at_fe9[0x17]; struct mlx5_ifc_health_buffer_bits health_buffer; @@ -9513,4 +9528,44 @@ struct mlx5_ifc_mtrc_ctrl_bits { u8 reserved_at_80[0x180]; }; +struct mlx5_ifc_host_params_context_bits { + u8 host_number[0x8]; + u8 reserved_at_8[0x8]; + u8 host_num_of_vfs[0x10]; + + u8 reserved_at_20[0x10]; + u8 host_pci_bus[0x10]; + + u8 reserved_at_40[0x10]; + u8 host_pci_device[0x10]; + + u8 reserved_at_60[0x10]; + u8 host_pci_function[0x10]; + + u8 reserved_at_80[0x180]; +}; + +struct mlx5_ifc_query_host_params_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_query_host_params_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_host_params_context_bits host_params_context; + + u8 reserved_at_280[0x180]; +}; + #endif /* MLX5_IFC_H */ diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index bf4bc01ffb0c..814fa194663b 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -92,6 +92,22 @@ enum mlx5e_link_mode { MLX5E_LINK_MODES_NUMBER, }; +enum mlx5e_ext_link_mode { + MLX5E_SGMII_100M = 0, + MLX5E_1000BASE_X_SGMII = 1, + MLX5E_5GBASE_R = 3, + MLX5E_10GBASE_XFI_XAUI_1 = 4, + MLX5E_40GBASE_XLAUI_4_XLPPI_4 = 5, + MLX5E_25GAUI_1_25GBASE_CR_KR = 6, + MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2 = 7, + MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR = 8, + MLX5E_CAUI_4_100GBASE_CR4_KR4 = 9, + MLX5E_100GAUI_2_100GBASE_CR2_KR2 = 10, + MLX5E_200GAUI_4_200GBASE_CR4_KR4 = 12, + MLX5E_400GAUI_8 = 15, + MLX5E_EXT_LINK_MODES_NUMBER, +}; + enum mlx5e_connector_type { MLX5E_PORT_UNKNOWN = 0, MLX5E_PORT_NONE = 1, @@ -106,31 +122,23 @@ enum mlx5e_connector_type { }; #define MLX5E_PROT_MASK(link_mode) (1 << link_mode) +#define MLX5_GET_ETH_PROTO(reg, out, ext, field) \ + (ext ? MLX5_GET(reg, out, ext_##field) : \ + MLX5_GET(reg, out, field)) int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, int ptys_size, int proto_mask, u8 local_port); -int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, - u32 *proto_cap, int proto_mask); -int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, - u32 *proto_admin, int proto_mask); int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, u8 *link_width_oper, u8 local_port); int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, u8 *proto_oper, u8 local_port); -int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev, - u32 *proto_oper, u8 local_port); -int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable, - u32 proto_admin, int proto_mask); void mlx5_toggle_port_link(struct mlx5_core_dev *dev); int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status status); int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status *status); int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration); -void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask, - u8 *an_status, - u8 *an_disable_cap, u8 *an_disable_admin); int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port); void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, u8 port); diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 9c694808c212..0eef548b9946 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -36,15 +36,38 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/device.h> +#define MLX5_VPORT_PF_PLACEHOLDER (1u) +#define MLX5_VPORT_UPLINK_PLACEHOLDER (1u) +#define MLX5_VPORT_ECPF_PLACEHOLDER(mdev) (mlx5_ecpf_vport_exists(mdev)) + +#define MLX5_SPECIAL_VPORTS(mdev) (MLX5_VPORT_PF_PLACEHOLDER + \ + MLX5_VPORT_UPLINK_PLACEHOLDER + \ + MLX5_VPORT_ECPF_PLACEHOLDER(mdev)) + +#define MLX5_TOTAL_VPORTS(mdev) (MLX5_SPECIAL_VPORTS(mdev) + \ + mlx5_core_max_vfs(mdev)) + +#define MLX5_VPORT_MANAGER(mdev) \ + (MLX5_CAP_GEN(mdev, vport_group_manager) && \ + (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \ + mlx5_core_is_pf(mdev)) + enum { MLX5_CAP_INLINE_MODE_L2, MLX5_CAP_INLINE_MODE_VPORT_CONTEXT, MLX5_CAP_INLINE_MODE_NOT_REQUIRED, }; +enum { + MLX5_VPORT_PF = 0x0, + MLX5_VPORT_FIRST_VF = 0x1, + MLX5_VPORT_ECPF = 0xfffe, + MLX5_VPORT_UPLINK = 0xffff +}; + u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport); int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, - u16 vport, u8 state); + u16 vport, u8 other_vport, u8 state); int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u16 vport, u8 *addr); int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev, @@ -60,7 +83,7 @@ int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, u64 *system_image_guid); int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid); int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, - u32 vport, u64 node_guid); + u16 vport, u64 node_guid); int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev, u16 *qkey_viol_cntr); int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, @@ -78,7 +101,7 @@ int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev, int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, u64 *node_guid); int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, - u32 vport, + u16 vport, enum mlx5_list_type list_type, u8 addr_list[][ETH_ALEN], int *list_size); @@ -87,7 +110,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, u8 addr_list[][ETH_ALEN], int list_size); int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev, - u32 vport, + u16 vport, int *promisc_uc, int *promisc_mc, int *promisc_all); @@ -96,7 +119,7 @@ int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev, int promisc_mc, int promisc_all); int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev, - u32 vport, + u16 vport, u16 vlans[], int *size); int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, @@ -106,7 +129,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev); int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev); int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport, - u64 *rx_discard_vport_down, + u8 other_vport, u64 *rx_discard_vport_down, u64 *tx_discard_vport_down); int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport, int vf, u8 port_num, void *out, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 2c471a2c43fa..0a36a22228e7 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -95,6 +95,13 @@ struct page { */ unsigned long private; }; + struct { /* page_pool used by netstack */ + /** + * @dma_addr: might require a 64-bit value even on + * 32-bit architectures. + */ + dma_addr_t dma_addr; + }; struct { /* slab, slob and slub */ union { struct list_head slab_list; /* uses lru */ diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index de7377815b6b..8ef330027b13 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -308,6 +308,7 @@ struct mmc_card { unsigned int nr_parts; unsigned int bouncesz; /* Bounce buffer size */ + struct workqueue_struct *complete_wq; /* Private workqueue */ }; static inline bool mmc_large_sector(struct mmc_card *card) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6aedaf1e9a25..aab4d9f6613d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1491,6 +1491,7 @@ struct net_device_ops { * @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook * @IFF_FAILOVER: device is a failover master device * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device + * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1522,6 +1523,7 @@ enum netdev_priv_flags { IFF_NO_RX_HANDLER = 1<<26, IFF_FAILOVER = 1<<27, IFF_FAILOVER_SLAVE = 1<<28, + IFF_L3MDEV_RX_HANDLER = 1<<29, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1552,6 +1554,7 @@ enum netdev_priv_flags { #define IFF_NO_RX_HANDLER IFF_NO_RX_HANDLER #define IFF_FAILOVER IFF_FAILOVER #define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE +#define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER /** * struct net_device - The DEVICE structure. @@ -4560,6 +4563,11 @@ static inline bool netif_supports_nofcs(struct net_device *dev) return dev->priv_flags & IFF_SUPP_NOFCS; } +static inline bool netif_has_l3_rx_handler(const struct net_device *dev) +{ + return dev->priv_flags & IFF_L3MDEV_RX_HANDLER; +} + static inline bool netif_is_l3_master(const struct net_device *dev) { return dev->priv_flags & IFF_L3MDEV_MASTER; diff --git a/include/linux/objagg.h b/include/linux/objagg.h index 34f38c186ea0..78021777df46 100644 --- a/include/linux/objagg.h +++ b/include/linux/objagg.h @@ -6,14 +6,19 @@ struct objagg_ops { size_t obj_size; + bool (*delta_check)(void *priv, const void *parent_obj, + const void *obj); + int (*hints_obj_cmp)(const void *obj1, const void *obj2); void * (*delta_create)(void *priv, void *parent_obj, void *obj); void (*delta_destroy)(void *priv, void *delta_priv); - void * (*root_create)(void *priv, void *obj); + void * (*root_create)(void *priv, void *obj, unsigned int root_id); +#define OBJAGG_OBJ_ROOT_ID_INVALID UINT_MAX void (*root_destroy)(void *priv, void *root_priv); }; struct objagg; struct objagg_obj; +struct objagg_hints; const void *objagg_obj_root_priv(const struct objagg_obj *objagg_obj); const void *objagg_obj_delta_priv(const struct objagg_obj *objagg_obj); @@ -21,7 +26,8 @@ const void *objagg_obj_raw(const struct objagg_obj *objagg_obj); struct objagg_obj *objagg_obj_get(struct objagg *objagg, void *obj); void objagg_obj_put(struct objagg *objagg, struct objagg_obj *objagg_obj); -struct objagg *objagg_create(const struct objagg_ops *ops, void *priv); +struct objagg *objagg_create(const struct objagg_ops *ops, + struct objagg_hints *hints, void *priv); void objagg_destroy(struct objagg *objagg); struct objagg_obj_stats { @@ -36,6 +42,7 @@ struct objagg_obj_stats_info { }; struct objagg_stats { + unsigned int root_count; unsigned int stats_info_count; struct objagg_obj_stats_info stats_info[]; }; @@ -43,4 +50,14 @@ struct objagg_stats { const struct objagg_stats *objagg_stats_get(struct objagg *objagg); void objagg_stats_put(const struct objagg_stats *objagg_stats); +enum objagg_opt_algo_type { + OBJAGG_OPT_ALGO_SIMPLE_GREEDY, +}; + +struct objagg_hints *objagg_hints_get(struct objagg *objagg, + enum objagg_opt_algo_type opt_algo_type); +void objagg_hints_put(struct objagg_hints *objagg_hints); +const struct objagg_stats * +objagg_hints_stats_get(struct objagg_hints *objagg_hints); + #endif diff --git a/include/linux/phy.h b/include/linux/phy.h index 237dd035858a..bf1070c2a53b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -502,6 +502,12 @@ struct phy_driver { */ int (*probe)(struct phy_device *phydev); + /* + * Probe the hardware to determine what abilities it has. + * Should only set phydev->supported. + */ + int (*get_features)(struct phy_device *phydev); + /* PHY Power Management */ int (*suspend)(struct phy_device *phydev); int (*resume)(struct phy_device *phydev); @@ -667,13 +673,8 @@ phy_lookup_setting(int speed, int duplex, const unsigned long *mask, bool exact); size_t phy_speeds(unsigned int *speeds, size_t size, unsigned long *mask); - -static inline bool __phy_is_started(struct phy_device *phydev) -{ - WARN_ON(!mutex_is_locked(&phydev->lock)); - - return phydev->state >= PHY_UP; -} +void of_set_phy_supported(struct phy_device *phydev); +void of_set_phy_eee_broken(struct phy_device *phydev); /** * phy_is_started - Convenience function to check whether PHY is started @@ -681,13 +682,7 @@ static inline bool __phy_is_started(struct phy_device *phydev) */ static inline bool phy_is_started(struct phy_device *phydev) { - bool started; - - mutex_lock(&phydev->lock); - started = __phy_is_started(phydev); - mutex_unlock(&phydev->lock); - - return started; + return phydev->state >= PHY_UP; } void phy_resolve_aneg_linkmode(struct phy_device *phydev); @@ -793,13 +788,21 @@ int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); */ int __phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); +int __phy_modify_changed(struct phy_device *phydev, u32 regnum, u16 mask, + u16 set); +int phy_modify_changed(struct phy_device *phydev, u32 regnum, u16 mask, + u16 set); int __phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set); int phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set); +int __phy_modify_mmd_changed(struct phy_device *phydev, int devad, u32 regnum, + u16 mask, u16 set); +int phy_modify_mmd_changed(struct phy_device *phydev, int devad, u32 regnum, + u16 mask, u16 set); int __phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum, - u16 mask, u16 set); + u16 mask, u16 set); int phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum, - u16 mask, u16 set); + u16 mask, u16 set); /** * __phy_set_bits - Convenience function for setting bits in a PHY register @@ -1094,12 +1097,13 @@ int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum, /* Clause 45 PHY */ int genphy_c45_restart_aneg(struct phy_device *phydev); int genphy_c45_aneg_done(struct phy_device *phydev); -int genphy_c45_read_link(struct phy_device *phydev, u32 mmd_mask); +int genphy_c45_read_link(struct phy_device *phydev); int genphy_c45_read_lpa(struct phy_device *phydev); int genphy_c45_read_pma(struct phy_device *phydev); int genphy_c45_pma_setup_forced(struct phy_device *phydev); int genphy_c45_an_disable_aneg(struct phy_device *phydev); int genphy_c45_read_mdix(struct phy_device *phydev); +int genphy_c45_pma_read_abilities(struct phy_device *phydev); /* The gen10g_* functions are the old Clause 45 stub */ int gen10g_config_aneg(struct phy_device *phydev); diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 021fc6595856..f57059e4353f 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -220,6 +220,7 @@ void phylink_ethtool_get_pauseparam(struct phylink *, int phylink_ethtool_set_pauseparam(struct phylink *, struct ethtool_pauseparam *); int phylink_get_eee_err(struct phylink *); +int phylink_init_eee(struct phylink *, bool); int phylink_ethtool_get_eee(struct phylink *, struct ethtool_eee *); int phylink_ethtool_set_eee(struct phylink *, struct ethtool_eee *); int phylink_mii_ioctl(struct phylink *, struct ifreq *, int); diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 54af4eef169f..fed5be706bc9 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -105,7 +105,7 @@ static inline bool pm_runtime_callbacks_present(struct device *dev) static inline void pm_runtime_mark_last_busy(struct device *dev) { - WRITE_ONCE(dev->power.last_busy, ktime_to_ns(ktime_get())); + WRITE_ONCE(dev->power.last_busy, ktime_get_mono_fast_ns()); } static inline bool pm_runtime_is_irq_safe(struct device *dev) diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 35170f74ed80..f6165d304b4d 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -643,6 +643,7 @@ struct qed_dev_info { u16 mtu; bool wol_support; + bool smart_an; /* MBI version */ u32 mbi_version; diff --git a/include/linux/sched.h b/include/linux/sched.h index d2f90fa92468..bba3afb4e9bf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -995,7 +995,7 @@ struct task_struct { /* cg_list protected by css_set_lock and tsk->alloc_lock: */ struct list_head cg_list; #endif -#ifdef CONFIG_X86_RESCTRL +#ifdef CONFIG_X86_CPU_RESCTRL u32 closid; u32 rmid; #endif diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index ec912d01126f..ecdc6542070f 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -71,6 +71,7 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ #define MMF_OOM_VICTIM 25 /* mm is the oom victim */ +#define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */ #define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ diff --git a/include/linux/signal.h b/include/linux/signal.h index cc7e2c1cd444..9702016734b1 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -392,7 +392,7 @@ extern bool unhandled_signal(struct task_struct *tsk, int sig); #endif #define siginmask(sig, mask) \ - ((sig) < SIGRTMIN && (rt_sigmask(sig) & (mask))) + ((sig) > 0 && (sig) < SIGRTMIN && (rt_sigmask(sig) & (mask))) #define SIG_KERNEL_ONLY_MASK (\ rt_sigmask(SIGKILL) | rt_sigmask(SIGSTOP)) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 831846617d07..a41e84f7730c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1889,12 +1889,12 @@ static inline void __skb_queue_before(struct sk_buff_head *list, * * A buffer cannot be placed on two lists at the same time. */ -void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk); static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) { __skb_queue_after(list, (struct sk_buff *)list, newsk); } +void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk); /** * __skb_queue_tail - queue a buffer at the list tail @@ -1906,12 +1906,12 @@ static inline void __skb_queue_head(struct sk_buff_head *list, * * A buffer cannot be placed on two lists at the same time. */ -void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk); static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) { __skb_queue_before(list, (struct sk_buff *)list, newsk); } +void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk); /* * remove sk_buff from list. _Must_ be called atomically, and with @@ -1938,7 +1938,6 @@ static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) * so must be used with appropriate locks held only. The head item is * returned or %NULL if the list is empty. */ -struct sk_buff *skb_dequeue(struct sk_buff_head *list); static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) { struct sk_buff *skb = skb_peek(list); @@ -1946,6 +1945,7 @@ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) __skb_unlink(skb, list); return skb; } +struct sk_buff *skb_dequeue(struct sk_buff_head *list); /** * __skb_dequeue_tail - remove from the tail of the queue @@ -1955,7 +1955,6 @@ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) * so must be used with appropriate locks held only. The tail item is * returned or %NULL if the list is empty. */ -struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list); static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) { struct sk_buff *skb = skb_peek_tail(list); @@ -1963,6 +1962,7 @@ static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) __skb_unlink(skb, list); return skb; } +struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list); static inline bool skb_is_nonlinear(const struct sk_buff *skb) @@ -2653,13 +2653,13 @@ static inline int skb_orphan_frags_rx(struct sk_buff *skb, gfp_t gfp_mask) * the list and one reference dropped. This function does not take the * list lock and the caller must hold the relevant locks to use it. */ -void skb_queue_purge(struct sk_buff_head *list); static inline void __skb_queue_purge(struct sk_buff_head *list) { struct sk_buff *skb; while ((skb = __skb_dequeue(list)) != NULL) kfree_skb(skb); } +void skb_queue_purge(struct sk_buff_head *list); unsigned int skb_rbtree_purge(struct rb_root *root); @@ -3028,7 +3028,7 @@ static inline int skb_padto(struct sk_buff *skb, unsigned int len) } /** - * skb_put_padto - increase size and pad an skbuff up to a minimal size + * __skb_put_padto - increase size and pad an skbuff up to a minimal size * @skb: buffer to pad * @len: minimal length * @free_on_error: free buffer on error diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 7ddfc65586b0..4335bd771ce5 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -184,6 +184,7 @@ struct plat_stmmacenet_data { struct clk *pclk; struct clk *clk_ptp_ref; unsigned int clk_ptp_rate; + unsigned int clk_ref_rate; struct reset_control *stmmac_rst; struct stmmac_axi *axi; int has_gmac4; diff --git a/include/net/act_api.h b/include/net/act_api.h index dbc795ec659e..c745e9ccfab2 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -80,7 +80,7 @@ static inline void tcf_tm_dump(struct tcf_t *dtm, const struct tcf_t *stm) struct tc_action_ops { struct list_head head; char kind[IFNAMSIZ]; - __u32 type; /* TBD to match kind */ + enum tca_id id; /* identifier should match kind */ size_t size; struct module *owner; int (*act)(struct sk_buff *, const struct tc_action *, diff --git a/include/net/devlink.h b/include/net/devlink.h index c12ad6e9095d..c6d88759b7d5 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -358,6 +358,7 @@ struct devlink_param_item { const struct devlink_param *param; union devlink_param_value driverinit_value; bool driverinit_value_valid; + bool published; }; enum devlink_param_generic_id { @@ -369,17 +370,12 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX, DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN, DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY, - DEVLINK_PARAM_GENERIC_ID_WOL, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, DEVLINK_PARAM_GENERIC_ID_MAX = __DEVLINK_PARAM_GENERIC_ID_MAX - 1, }; -enum devlink_param_wol_types { - DEVLINK_PARAM_WAKE_MAGIC = (1 << 0), -}; - #define DEVLINK_PARAM_GENERIC_INT_ERR_RESET_NAME "internal_error_reset" #define DEVLINK_PARAM_GENERIC_INT_ERR_RESET_TYPE DEVLINK_PARAM_TYPE_BOOL @@ -404,9 +400,6 @@ enum devlink_param_wol_types { #define DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME "fw_load_policy" #define DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE DEVLINK_PARAM_TYPE_U8 -#define DEVLINK_PARAM_GENERIC_WOL_NAME "wake_on_lan" -#define DEVLINK_PARAM_GENERIC_WOL_TYPE DEVLINK_PARAM_TYPE_U8 - #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ @@ -434,6 +427,8 @@ enum devlink_param_wol_types { #define DEVLINK_INFO_VERSION_GENERIC_BOARD_ID "board.id" /* Revision of board design */ #define DEVLINK_INFO_VERSION_GENERIC_BOARD_REV "board.rev" +/* Maker of the board */ +#define DEVLINK_INFO_VERSION_GENERIC_BOARD_MANUFACTURE "board.manufacture" /* Control processor FW version */ #define DEVLINK_INFO_VERSION_GENERIC_FW_MGMT "fw.mgmt" @@ -618,6 +613,8 @@ int devlink_params_register(struct devlink *devlink, void devlink_params_unregister(struct devlink *devlink, const struct devlink_param *params, size_t params_count); +void devlink_params_publish(struct devlink *devlink); +void devlink_params_unpublish(struct devlink *devlink); int devlink_port_params_register(struct devlink_port *devlink_port, const struct devlink_param *params, size_t params_count); @@ -724,6 +721,14 @@ static inline void devlink_unregister(struct devlink *devlink) { } +static inline void devlink_params_publish(struct devlink *devlink) +{ +} + +static inline void devlink_params_unpublish(struct devlink *devlink) +{ +} + static inline void devlink_free(struct devlink *devlink) { kfree(devlink); diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 23166caa0da5..d035183c8d03 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -171,7 +171,7 @@ static inline bool flow_action_has_entries(const struct flow_action *action) } #define flow_action_for_each(__i, __act, __actions) \ - for (__i = 0, __act = &(__actions)->entries[0]; __i < (__actions)->num_entries; __act = &(__actions)->entries[__i++]) + for (__i = 0, __act = &(__actions)->entries[0]; __i < (__actions)->num_entries; __act = &(__actions)->entries[++__i]) struct flow_rule { struct flow_match match; @@ -195,9 +195,9 @@ struct flow_stats { static inline void flow_stats_update(struct flow_stats *flow_stats, u64 bytes, u64 pkts, u64 lastused) { - flow_stats->pkts = pkts; - flow_stats->bytes = bytes; - flow_stats->lastused = lastused; + flow_stats->pkts += pkts; + flow_stats->bytes += bytes; + flow_stats->lastused = max_t(u64, flow_stats->lastused, lastused); } #endif /* _NET_FLOW_OFFLOAD_H */ diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 00b5e7825508..74ff688568a0 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -39,6 +39,7 @@ struct inet_peer { u32 metrics[RTAX_MAX]; u32 rate_tokens; /* rate limiting for ICMP */ + u32 n_redirects; unsigned long rate_last; /* * Once inet_peer is queued for deletion (refcnt == 0), following field diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 78fa0ac4613c..5175fd63cd82 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -153,7 +153,8 @@ struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto) if (netif_is_l3_slave(skb->dev)) master = netdev_master_upper_dev_get_rcu(skb->dev); - else if (netif_is_l3_master(skb->dev)) + else if (netif_is_l3_master(skb->dev) || + netif_has_l3_rx_handler(skb->dev)) master = skb->dev; if (master && master->l3mdev_ops->l3mdev_l3_rcv) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 45eba7d7ab38..a66fcd316734 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -469,9 +469,7 @@ struct nft_set_binding { int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding); void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding); -void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding); + struct nft_set_binding *binding, bool commit); void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set); /** @@ -721,6 +719,13 @@ struct nft_expr_type { #define NFT_EXPR_STATEFUL 0x1 #define NFT_EXPR_GC 0x2 +enum nft_trans_phase { + NFT_TRANS_PREPARE, + NFT_TRANS_ABORT, + NFT_TRANS_COMMIT, + NFT_TRANS_RELEASE +}; + /** * struct nft_expr_ops - nf_tables expression operations * @@ -750,7 +755,8 @@ struct nft_expr_ops { void (*activate)(const struct nft_ctx *ctx, const struct nft_expr *expr); void (*deactivate)(const struct nft_ctx *ctx, - const struct nft_expr *expr); + const struct nft_expr *expr, + enum nft_trans_phase phase); void (*destroy)(const struct nft_ctx *ctx, const struct nft_expr *expr); void (*destroy_clone)(const struct nft_ctx *ctx, @@ -1335,12 +1341,15 @@ struct nft_trans_rule { struct nft_trans_set { struct nft_set *set; u32 set_id; + bool bound; }; #define nft_trans_set(trans) \ (((struct nft_trans_set *)trans->data)->set) #define nft_trans_set_id(trans) \ (((struct nft_trans_set *)trans->data)->set_id) +#define nft_trans_set_bound(trans) \ + (((struct nft_trans_set *)trans->data)->bound) struct nft_trans_chain { bool update; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index cb8be396a11f..6a530bef9253 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -44,6 +44,10 @@ bool tcf_queue_work(struct rcu_work *rwork, work_func_t func); struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index); void tcf_chain_put_by_act(struct tcf_chain *chain); +struct tcf_chain *tcf_get_next_chain(struct tcf_block *block, + struct tcf_chain *chain); +struct tcf_proto *tcf_get_next_proto(struct tcf_chain *chain, + struct tcf_proto *tp, bool rtnl_held); void tcf_block_netif_keep_dst(struct tcf_block *block); int tcf_block_get(struct tcf_block **p_block, struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, @@ -412,7 +416,7 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, - struct tcf_exts *exts, bool ovr, + struct tcf_exts *exts, bool ovr, bool rtnl_held, struct netlink_ext_ack *extack); void tcf_exts_destroy(struct tcf_exts *exts); void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 7a4957599874..e50b729f8691 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -12,6 +12,7 @@ #include <linux/list.h> #include <linux/refcount.h> #include <linux/workqueue.h> +#include <linux/mutex.h> #include <net/gen_stats.h> #include <net/rtnetlink.h> @@ -178,6 +179,7 @@ static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq) } struct Qdisc_class_ops { + unsigned int flags; /* Child qdisc manipulation */ struct netdev_queue * (*select_queue)(struct Qdisc *, struct tcmsg *); int (*graft)(struct Qdisc *, unsigned long cl, @@ -209,6 +211,13 @@ struct Qdisc_class_ops { struct gnet_dump *); }; +/* Qdisc_class_ops flag values */ + +/* Implements API that doesn't require rtnl lock */ +enum qdisc_class_ops_flags { + QDISC_CLASS_OPS_DOIT_UNLOCKED = 1, +}; + struct Qdisc_ops { struct Qdisc_ops *next; const struct Qdisc_class_ops *cl_ops; @@ -272,19 +281,21 @@ struct tcf_proto_ops { const struct tcf_proto *, struct tcf_result *); int (*init)(struct tcf_proto*); - void (*destroy)(struct tcf_proto *tp, + void (*destroy)(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack); void* (*get)(struct tcf_proto*, u32 handle); + void (*put)(struct tcf_proto *tp, void *f); int (*change)(struct net *net, struct sk_buff *, struct tcf_proto*, unsigned long, u32 handle, struct nlattr **, - void **, bool, + void **, bool, bool, struct netlink_ext_ack *); int (*delete)(struct tcf_proto *tp, void *arg, - bool *last, + bool *last, bool rtnl_held, struct netlink_ext_ack *); - void (*walk)(struct tcf_proto*, struct tcf_walker *arg); + void (*walk)(struct tcf_proto *tp, + struct tcf_walker *arg, bool rtnl_held); int (*reoffload)(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, void *cb_priv, struct netlink_ext_ack *extack); @@ -297,12 +308,18 @@ struct tcf_proto_ops { /* rtnetlink specific */ int (*dump)(struct net*, struct tcf_proto*, void *, - struct sk_buff *skb, struct tcmsg*); + struct sk_buff *skb, struct tcmsg*, + bool); int (*tmplt_dump)(struct sk_buff *skb, struct net *net, void *tmplt_priv); struct module *owner; + int flags; +}; + +enum tcf_proto_ops_flags { + TCF_PROTO_OPS_DOIT_UNLOCKED = 1, }; struct tcf_proto { @@ -321,6 +338,12 @@ struct tcf_proto { void *data; const struct tcf_proto_ops *ops; struct tcf_chain *chain; + /* Lock protects tcf_proto shared state and can be used by unlocked + * classifiers to protect their private data. + */ + spinlock_t lock; + bool deleting; + refcount_t refcnt; struct rcu_head rcu; }; @@ -340,6 +363,8 @@ struct qdisc_skb_cb { typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv); struct tcf_chain { + /* Protects filter_chain. */ + struct mutex filter_chain_lock; struct tcf_proto __rcu *filter_chain; struct list_head list; struct tcf_block *block; @@ -347,11 +372,16 @@ struct tcf_chain { unsigned int refcnt; unsigned int action_refcnt; bool explicitly_created; + bool flushing; const struct tcf_proto_ops *tmplt_ops; void *tmplt_priv; }; struct tcf_block { + /* Lock protects tcf_block and lifetime-management data of chains + * attached to the block (refcnt, action_refcnt, explicitly_created). + */ + struct mutex lock; struct list_head chain_list; u32 index; /* block index for shared blocks */ refcount_t refcnt; @@ -369,6 +399,34 @@ struct tcf_block { struct rcu_head rcu; }; +#ifdef CONFIG_PROVE_LOCKING +static inline bool lockdep_tcf_chain_is_locked(struct tcf_chain *chain) +{ + return lockdep_is_held(&chain->filter_chain_lock); +} + +static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp) +{ + return lockdep_is_held(&tp->lock); +} +#else +static inline bool lockdep_tcf_chain_is_locked(struct tcf_block *chain) +{ + return true; +} + +static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp) +{ + return true; +} +#endif /* #ifdef CONFIG_PROVE_LOCKING */ + +#define tcf_chain_dereference(p, chain) \ + rcu_dereference_protected(p, lockdep_tcf_chain_is_locked(chain)) + +#define tcf_proto_dereference(p, tp) \ + rcu_dereference_protected(p, lockdep_tcf_proto_is_locked(tp)) + static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) { if (*flags & TCA_CLS_FLAGS_IN_HW) diff --git a/include/net/sock.h b/include/net/sock.h index 6679f3c120b0..328cb7cb7b0b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1278,7 +1278,7 @@ static inline void sk_sockets_allocated_inc(struct sock *sk) percpu_counter_inc(sk->sk_prot->sockets_allocated); } -static inline int +static inline u64 sk_sockets_allocated_read_positive(struct sock *sk) { return percpu_counter_read_positive(sk->sk_prot->sockets_allocated); diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h index 32d2454c0479..68269e4581b7 100644 --- a/include/net/tc_act/tc_csum.h +++ b/include/net/tc_act/tc_csum.h @@ -21,7 +21,7 @@ struct tcf_csum { static inline bool is_tcf_csum(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT - if (a->ops && a->ops->type == TCA_ACT_CSUM) + if (a->ops && a->ops->id == TCA_ID_CSUM) return true; #endif return false; diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index ef8dd0db70ce..ee8d005f56fc 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -22,7 +22,7 @@ static inline bool __is_tcf_gact_act(const struct tc_action *a, int act, #ifdef CONFIG_NET_CLS_ACT struct tcf_gact *gact; - if (a->ops && a->ops->type != TCA_ACT_GACT) + if (a->ops && a->ops->id != TCA_ID_GACT) return false; gact = to_gact(a); diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index a2e9cbca5c9e..c757585a05b0 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -17,7 +17,7 @@ struct tcf_mirred { static inline bool is_tcf_mirred_egress_redirect(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT - if (a->ops && a->ops->type == TCA_ACT_MIRRED) + if (a->ops && a->ops->id == TCA_ID_MIRRED) return to_mirred(a)->tcfm_eaction == TCA_EGRESS_REDIR; #endif return false; @@ -26,7 +26,7 @@ static inline bool is_tcf_mirred_egress_redirect(const struct tc_action *a) static inline bool is_tcf_mirred_egress_mirror(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT - if (a->ops && a->ops->type == TCA_ACT_MIRRED) + if (a->ops && a->ops->id == TCA_ID_MIRRED) return to_mirred(a)->tcfm_eaction == TCA_EGRESS_MIRROR; #endif return false; diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h index fac3ad4a86de..748cf87a4d7e 100644 --- a/include/net/tc_act/tc_pedit.h +++ b/include/net/tc_act/tc_pedit.h @@ -23,7 +23,7 @@ struct tcf_pedit { static inline bool is_tcf_pedit(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT - if (a->ops && a->ops->type == TCA_ACT_PEDIT) + if (a->ops && a->ops->id == TCA_ID_PEDIT) return true; #endif return false; diff --git a/include/net/tc_act/tc_sample.h b/include/net/tc_act/tc_sample.h index 01dbfea32672..0a559d4b6f0f 100644 --- a/include/net/tc_act/tc_sample.h +++ b/include/net/tc_act/tc_sample.h @@ -20,7 +20,7 @@ struct tcf_sample { static inline bool is_tcf_sample(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT - return a->ops && a->ops->type == TCA_ACT_SAMPLE; + return a->ops && a->ops->id == TCA_ID_SAMPLE; #else return false; #endif diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h index 911bbac838a2..85c5c4756d92 100644 --- a/include/net/tc_act/tc_skbedit.h +++ b/include/net/tc_act/tc_skbedit.h @@ -44,7 +44,7 @@ static inline bool is_tcf_skbedit_mark(const struct tc_action *a) #ifdef CONFIG_NET_CLS_ACT u32 flags; - if (a->ops && a->ops->type == TCA_ACT_SKBEDIT) { + if (a->ops && a->ops->id == TCA_ID_SKBEDIT) { rcu_read_lock(); flags = rcu_dereference(to_skbedit(a)->params)->flags; rcu_read_unlock(); diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h index 46b8c7f1c8d5..23d5b8b19f3e 100644 --- a/include/net/tc_act/tc_tunnel_key.h +++ b/include/net/tc_act/tc_tunnel_key.h @@ -34,7 +34,7 @@ static inline bool is_tcf_tunnel_set(const struct tc_action *a) struct tcf_tunnel_key *t = to_tunnel_key(a); struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); - if (a->ops && a->ops->type == TCA_ACT_TUNNEL_KEY) + if (a->ops && a->ops->id == TCA_ID_TUNNEL_KEY) return params->tcft_action == TCA_TUNNEL_KEY_ACT_SET; #endif return false; @@ -46,7 +46,7 @@ static inline bool is_tcf_tunnel_release(const struct tc_action *a) struct tcf_tunnel_key *t = to_tunnel_key(a); struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); - if (a->ops && a->ops->type == TCA_ACT_TUNNEL_KEY) + if (a->ops && a->ops->id == TCA_ID_TUNNEL_KEY) return params->tcft_action == TCA_TUNNEL_KEY_ACT_RELEASE; #endif return false; diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index 22ae260d6869..fe39ed502bef 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -30,7 +30,7 @@ struct tcf_vlan { static inline bool is_tcf_vlan(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT - if (a->ops && a->ops->type == TCA_ACT_VLAN) + if (a->ops && a->ops->id == TCA_ID_VLAN) return true; #endif return false; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index a3ceed3a040a..80debf5982ac 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2579,9 +2579,10 @@ struct ib_device { const struct uapi_definition *driver_def; enum rdma_driver_id driver_id; + /* - * Provides synchronization between device unregistration and netlink - * commands on a device. To be used only by core. + * Positive refcount indicates that the device is currently + * registered and cannot be unregistered. */ refcount_t refcount; struct completion unreg_completion; @@ -3926,6 +3927,25 @@ static inline bool ib_access_writable(int access_flags) int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status); +/** + * ib_device_try_get: Hold a registration lock + * device: The device to lock + * + * A device under an active registration lock cannot become unregistered. It + * is only possible to obtain a registration lock on a device that is fully + * registered, otherwise this function returns false. + * + * The registration lock is only necessary for actions which require the + * device to still be registered. Uses that only require the device pointer to + * be valid should use get_device(&ibdev->dev) to hold the memory. + * + */ +static inline bool ib_device_try_get(struct ib_device *dev) +{ + return refcount_inc_not_zero(&dev->refcount); +} + +void ib_device_put(struct ib_device *device); struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port, u16 pkey, const union ib_gid *gid, const struct sockaddr *addr); diff --git a/include/sound/compress_driver.h b/include/sound/compress_driver.h index 0cdc3999ecfa..c5188ff724d1 100644 --- a/include/sound/compress_driver.h +++ b/include/sound/compress_driver.h @@ -173,7 +173,11 @@ static inline void snd_compr_drain_notify(struct snd_compr_stream *stream) if (snd_BUG_ON(!stream)) return; - stream->runtime->state = SNDRV_PCM_STATE_SETUP; + if (stream->direction == SND_COMPRESS_PLAYBACK) + stream->runtime->state = SNDRV_PCM_STATE_SETUP; + else + stream->runtime->state = SNDRV_PCM_STATE_PREPARED; + wake_up(&stream->runtime->sleep); } diff --git a/include/sound/hda_codec.h b/include/sound/hda_codec.h index 7fa48b100936..cc7c8d42d4fd 100644 --- a/include/sound/hda_codec.h +++ b/include/sound/hda_codec.h @@ -68,6 +68,7 @@ struct hda_bus { unsigned int response_reset:1; /* controller was reset */ unsigned int in_reset:1; /* during reset operation */ unsigned int no_response_fallback:1; /* don't fallback at RIRB error */ + unsigned int bus_probing :1; /* during probing process */ int primary_dig_out_type; /* primary digital out PCM type */ unsigned int mixer_assigned; /* codec addr for mixer name */ diff --git a/include/trace/events/mlxsw.h b/include/trace/events/mlxsw.h index 6c2bafcade18..a5ce6df9dc49 100644 --- a/include/trace/events/mlxsw.h +++ b/include/trace/events/mlxsw.h @@ -11,6 +11,7 @@ struct mlxsw_sp; struct mlxsw_sp_acl_atcam_region; +struct mlxsw_sp_acl_tcam_vregion; TRACE_EVENT(mlxsw_sp_acl_atcam_entry_add_ctcam_spill, TP_PROTO(const struct mlxsw_sp *mlxsw_sp, @@ -32,6 +33,66 @@ TRACE_EVENT(mlxsw_sp_acl_atcam_entry_add_ctcam_spill, __entry->mlxsw_sp, __entry->aregion) ); +TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_rehash, + TP_PROTO(const struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_acl_tcam_vregion *vregion), + + TP_ARGS(mlxsw_sp, vregion), + + TP_STRUCT__entry( + __field(const void *, mlxsw_sp) + __field(const void *, vregion) + ), + + TP_fast_assign( + __entry->mlxsw_sp = mlxsw_sp; + __entry->vregion = vregion; + ), + + TP_printk("mlxsw_sp %p, vregion %p", + __entry->mlxsw_sp, __entry->vregion) +); + +TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_migrate, + TP_PROTO(const struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_acl_tcam_vregion *vregion), + + TP_ARGS(mlxsw_sp, vregion), + + TP_STRUCT__entry( + __field(const void *, mlxsw_sp) + __field(const void *, vregion) + ), + + TP_fast_assign( + __entry->mlxsw_sp = mlxsw_sp; + __entry->vregion = vregion; + ), + + TP_printk("mlxsw_sp %p, vregion %p", + __entry->mlxsw_sp, __entry->vregion) +); + +TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_rehash_dis, + TP_PROTO(const struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_acl_tcam_vregion *vregion), + + TP_ARGS(mlxsw_sp, vregion), + + TP_STRUCT__entry( + __field(const void *, mlxsw_sp) + __field(const void *, vregion) + ), + + TP_fast_assign( + __entry->mlxsw_sp = mlxsw_sp; + __entry->vregion = vregion; + ), + + TP_printk("mlxsw_sp %p, vregion %p", + __entry->mlxsw_sp, __entry->vregion) +); + #endif /* _MLXSW_TRACEPOINT_H */ /* This part must be outside protection */ diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h index 7eb2936a8e22..c99336f4eefe 100644 --- a/include/uapi/linux/batadv_packet.h +++ b/include/uapi/linux/batadv_packet.h @@ -229,7 +229,7 @@ struct batadv_ogm_packet { * @packet_type: batman-adv packet type, part of the general header * @version: batman-adv protocol version, part of the general header * @ttl: time to live for this packet, part of the general header - * @flags: reseved for routing relevant flags - currently always 0 + * @flags: reserved for routing relevant flags - currently always 0 * @seqno: sequence number * @orig: originator mac address * @tvlv_len: length of the appended tvlv buffer (in bytes) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index a28e76a7e0a2..305bf316dd03 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -27,6 +27,7 @@ #define BATADV_NL_NAME "batadv" +#define BATADV_NL_MCAST_GROUP_CONFIG "config" #define BATADV_NL_MCAST_GROUP_TPMETER "tpmeter" /** @@ -139,6 +140,20 @@ enum batadv_mcast_flags_priv { }; /** + * enum batadv_gw_modes - gateway mode of node + */ +enum batadv_gw_modes { + /** @BATADV_GW_MODE_OFF: gw mode disabled */ + BATADV_GW_MODE_OFF, + + /** @BATADV_GW_MODE_CLIENT: send DHCP requests to gw servers */ + BATADV_GW_MODE_CLIENT, + + /** @BATADV_GW_MODE_SERVER: announce itself as gatway server */ + BATADV_GW_MODE_SERVER, +}; + +/** * enum batadv_nl_attrs - batman-adv netlink attributes */ enum batadv_nl_attrs { @@ -344,6 +359,138 @@ enum batadv_nl_attrs { */ BATADV_ATTR_MCAST_FLAGS_PRIV, + /** + * @BATADV_ATTR_VLANID: VLAN id on top of soft interface + */ + BATADV_ATTR_VLANID, + + /** + * @BATADV_ATTR_AGGREGATED_OGMS_ENABLED: whether the batman protocol + * messages of the mesh interface shall be aggregated or not. + */ + BATADV_ATTR_AGGREGATED_OGMS_ENABLED, + + /** + * @BATADV_ATTR_AP_ISOLATION_ENABLED: whether the data traffic going + * from a wireless client to another wireless client will be silently + * dropped. + */ + BATADV_ATTR_AP_ISOLATION_ENABLED, + + /** + * @BATADV_ATTR_ISOLATION_MARK: the isolation mark which is used to + * classify clients as "isolated" by the Extended Isolation feature. + */ + BATADV_ATTR_ISOLATION_MARK, + + /** + * @BATADV_ATTR_ISOLATION_MASK: the isolation (bit)mask which is used to + * classify clients as "isolated" by the Extended Isolation feature. + */ + BATADV_ATTR_ISOLATION_MASK, + + /** + * @BATADV_ATTR_BONDING_ENABLED: whether the data traffic going through + * the mesh will be sent using multiple interfaces at the same time. + */ + BATADV_ATTR_BONDING_ENABLED, + + /** + * @BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED: whether the bridge loop + * avoidance feature is enabled. This feature detects and avoids loops + * between the mesh and devices bridged with the soft interface + */ + BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED, + + /** + * @BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED: whether the distributed + * arp table feature is enabled. This feature uses a distributed hash + * table to answer ARP requests without flooding the request through + * the whole mesh. + */ + BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED, + + /** + * @BATADV_ATTR_FRAGMENTATION_ENABLED: whether the data traffic going + * through the mesh will be fragmented or silently discarded if the + * packet size exceeds the outgoing interface MTU. + */ + BATADV_ATTR_FRAGMENTATION_ENABLED, + + /** + * @BATADV_ATTR_GW_BANDWIDTH_DOWN: defines the download bandwidth which + * is propagated by this node if %BATADV_ATTR_GW_BANDWIDTH_MODE was set + * to 'server'. + */ + BATADV_ATTR_GW_BANDWIDTH_DOWN, + + /** + * @BATADV_ATTR_GW_BANDWIDTH_UP: defines the upload bandwidth which + * is propagated by this node if %BATADV_ATTR_GW_BANDWIDTH_MODE was set + * to 'server'. + */ + BATADV_ATTR_GW_BANDWIDTH_UP, + + /** + * @BATADV_ATTR_GW_MODE: defines the state of the gateway features. + * Possible values are specified in enum batadv_gw_modes + */ + BATADV_ATTR_GW_MODE, + + /** + * @BATADV_ATTR_GW_SEL_CLASS: defines the selection criteria this node + * will use to choose a gateway if gw_mode was set to 'client'. + */ + BATADV_ATTR_GW_SEL_CLASS, + + /** + * @BATADV_ATTR_HOP_PENALTY: defines the penalty which will be applied + * to an originator message's tq-field on every hop. + */ + BATADV_ATTR_HOP_PENALTY, + + /** + * @BATADV_ATTR_LOG_LEVEL: bitmask with to define which debug messages + * should be send to the debug log/trace ring buffer + */ + BATADV_ATTR_LOG_LEVEL, + + /** + * @BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED: whether multicast + * optimizations should be replaced by simple broadcast-like flooding + * of multicast packets. If set to non-zero then all nodes in the mesh + * are going to use classic flooding for any multicast packet with no + * optimizations. + */ + BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED, + + /** + * @BATADV_ATTR_NETWORK_CODING_ENABLED: whether Network Coding (using + * some magic to send fewer wifi packets but still the same content) is + * enabled or not. + */ + BATADV_ATTR_NETWORK_CODING_ENABLED, + + /** + * @BATADV_ATTR_ORIG_INTERVAL: defines the interval in milliseconds in + * which batman sends its protocol messages. + */ + BATADV_ATTR_ORIG_INTERVAL, + + /** + * @BATADV_ATTR_ELP_INTERVAL: defines the interval in milliseconds in + * which batman emits probing packets for neighbor sensing (ELP). + */ + BATADV_ATTR_ELP_INTERVAL, + + /** + * @BATADV_ATTR_THROUGHPUT_OVERRIDE: defines the throughput value to be + * used by B.A.T.M.A.N. V when estimating the link throughput using + * this interface. If the value is set to 0 then batman-adv will try to + * estimate the throughput by itself. + */ + BATADV_ATTR_THROUGHPUT_OVERRIDE, + /* add attributes above here, update the policy in netlink.c */ /** @@ -372,10 +519,14 @@ enum batadv_nl_commands { BATADV_CMD_UNSPEC, /** - * @BATADV_CMD_GET_MESH_INFO: Query basic information about batman-adv - * device + * @BATADV_CMD_GET_MESH: Get attributes from softif/mesh + */ + BATADV_CMD_GET_MESH, + + /** + * @BATADV_CMD_GET_MESH_INFO: Alias for @BATADV_CMD_GET_MESH */ - BATADV_CMD_GET_MESH_INFO, + BATADV_CMD_GET_MESH_INFO = BATADV_CMD_GET_MESH, /** * @BATADV_CMD_TP_METER: Start a tp meter session @@ -393,9 +544,15 @@ enum batadv_nl_commands { BATADV_CMD_GET_ROUTING_ALGOS, /** - * @BATADV_CMD_GET_HARDIFS: Query list of hard interfaces + * @BATADV_CMD_GET_HARDIF: Get attributes from a hardif of the + * current softif */ - BATADV_CMD_GET_HARDIFS, + BATADV_CMD_GET_HARDIF, + + /** + * @BATADV_CMD_GET_HARDIFS: Alias for @BATADV_CMD_GET_HARDIF + */ + BATADV_CMD_GET_HARDIFS = BATADV_CMD_GET_HARDIF, /** * @BATADV_CMD_GET_TRANSTABLE_LOCAL: Query list of local translations @@ -443,6 +600,29 @@ enum batadv_nl_commands { */ BATADV_CMD_GET_MCAST_FLAGS, + /** + * @BATADV_CMD_SET_MESH: Set attributes for softif/mesh + */ + BATADV_CMD_SET_MESH, + + /** + * @BATADV_CMD_SET_HARDIF: Set attributes for hardif of the + * current softif + */ + BATADV_CMD_SET_HARDIF, + + /** + * @BATADV_CMD_GET_VLAN: Get attributes from a VLAN of the + * current softif + */ + BATADV_CMD_GET_VLAN, + + /** + * @BATADV_CMD_SET_VLAN: Set attributes for VLAN of the + * current softif + */ + BATADV_CMD_SET_VLAN, + /* add new commands above here */ /** diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h index d955b9e32288..28491dac074b 100644 --- a/include/uapi/linux/errqueue.h +++ b/include/uapi/linux/errqueue.h @@ -3,6 +3,7 @@ #define _UAPI_LINUX_ERRQUEUE_H #include <linux/types.h> +#include <linux/time_types.h> struct sock_extended_err { __u32 ee_errno; diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 14565d703291..e8baca85bac6 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -137,15 +137,21 @@ enum { INET_DIAG_TCLASS, INET_DIAG_SKMEMINFO, INET_DIAG_SHUTDOWN, - INET_DIAG_DCTCPINFO, - INET_DIAG_PROTOCOL, /* response attribute only */ + + /* + * Next extenstions cannot be requested in struct inet_diag_req_v2: + * its field idiag_ext has only 8 bits. + */ + + INET_DIAG_DCTCPINFO, /* request as INET_DIAG_VEGASINFO */ + INET_DIAG_PROTOCOL, /* response attribute only */ INET_DIAG_SKV6ONLY, INET_DIAG_LOCALS, INET_DIAG_PEERS, INET_DIAG_PAD, - INET_DIAG_MARK, - INET_DIAG_BBRINFO, - INET_DIAG_CLASS_ID, + INET_DIAG_MARK, /* only with CAP_NET_ADMIN */ + INET_DIAG_BBRINFO, /* request as INET_DIAG_VEGASINFO */ + INET_DIAG_CLASS_ID, /* request as INET_DIAG_TCLASS */ INET_DIAG_MD5SIG, __INET_DIAG_MAX, }; diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index d435b00d64ad..0a552061ff1c 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -45,6 +45,7 @@ #define MDIO_AN_ADVERTISE 16 /* AN advertising (base page) */ #define MDIO_AN_LPA 19 /* AN LP abilities (base page) */ #define MDIO_PCS_EEE_ABLE 20 /* EEE Capability register */ +#define MDIO_PMA_NG_EXTABLE 21 /* 2.5G/5G PMA/PMD extended ability */ #define MDIO_PCS_EEE_WK_ERR 22 /* EEE wake error counter */ #define MDIO_PHYXS_LNSTAT 24 /* PHY XGXS lane state */ #define MDIO_AN_EEE_ADV 60 /* EEE advertisement */ @@ -92,6 +93,10 @@ #define MDIO_CTRL1_SPEED10G (MDIO_CTRL1_SPEEDSELEXT | 0x00) /* 10PASS-TS/2BASE-TL */ #define MDIO_CTRL1_SPEED10P2B (MDIO_CTRL1_SPEEDSELEXT | 0x04) +/* 2.5 Gb/s */ +#define MDIO_CTRL1_SPEED2_5G (MDIO_CTRL1_SPEEDSELEXT | 0x18) +/* 5 Gb/s */ +#define MDIO_CTRL1_SPEED5G (MDIO_CTRL1_SPEEDSELEXT | 0x1c) /* Status register 1. */ #define MDIO_STAT1_LPOWERABLE 0x0002 /* Low-power ability */ @@ -115,6 +120,7 @@ /* Device present registers. */ #define MDIO_DEVS_PRESENT(devad) (1 << (devad)) +#define MDIO_DEVS_C22PRESENT MDIO_DEVS_PRESENT(0) #define MDIO_DEVS_PMAPMD MDIO_DEVS_PRESENT(MDIO_MMD_PMAPMD) #define MDIO_DEVS_WIS MDIO_DEVS_PRESENT(MDIO_MMD_WIS) #define MDIO_DEVS_PCS MDIO_DEVS_PRESENT(MDIO_MMD_PCS) @@ -123,6 +129,8 @@ #define MDIO_DEVS_TC MDIO_DEVS_PRESENT(MDIO_MMD_TC) #define MDIO_DEVS_AN MDIO_DEVS_PRESENT(MDIO_MMD_AN) #define MDIO_DEVS_C22EXT MDIO_DEVS_PRESENT(MDIO_MMD_C22EXT) +#define MDIO_DEVS_VEND1 MDIO_DEVS_PRESENT(MDIO_MMD_VEND1) +#define MDIO_DEVS_VEND2 MDIO_DEVS_PRESENT(MDIO_MMD_VEND2) /* Control register 2. */ #define MDIO_PMA_CTRL2_TYPE 0x000f /* PMA/PMD type selection */ @@ -142,6 +150,8 @@ #define MDIO_PMA_CTRL2_1000BKX 0x000d /* 1000BASE-KX type */ #define MDIO_PMA_CTRL2_100BTX 0x000e /* 100BASE-TX type */ #define MDIO_PMA_CTRL2_10BT 0x000f /* 10BASE-T type */ +#define MDIO_PMA_CTRL2_2_5GBT 0x0030 /* 2.5GBaseT type */ +#define MDIO_PMA_CTRL2_5GBT 0x0031 /* 5GBaseT type */ #define MDIO_PCS_CTRL2_TYPE 0x0003 /* PCS type selection */ #define MDIO_PCS_CTRL2_10GBR 0x0000 /* 10GBASE-R type */ #define MDIO_PCS_CTRL2_10GBX 0x0001 /* 10GBASE-X type */ @@ -195,6 +205,7 @@ #define MDIO_PMA_EXTABLE_1000BKX 0x0040 /* 1000BASE-KX ability */ #define MDIO_PMA_EXTABLE_100BTX 0x0080 /* 100BASE-TX ability */ #define MDIO_PMA_EXTABLE_10BT 0x0100 /* 10BASE-T ability */ +#define MDIO_PMA_EXTABLE_NBT 0x4000 /* 2.5/5GBASE-T ability */ /* PHY XGXS lane state register. */ #define MDIO_PHYXS_LNSTAT_SYNC0 0x0001 @@ -231,9 +242,13 @@ #define MDIO_PCS_10GBRT_STAT2_BER 0x3f00 /* AN 10GBASE-T control register. */ +#define MDIO_AN_10GBT_CTRL_ADV2_5G 0x0080 /* Advertise 2.5GBASE-T */ +#define MDIO_AN_10GBT_CTRL_ADV5G 0x0100 /* Advertise 5GBASE-T */ #define MDIO_AN_10GBT_CTRL_ADV10G 0x1000 /* Advertise 10GBASE-T */ /* AN 10GBASE-T status register. */ +#define MDIO_AN_10GBT_STAT_LP2_5G 0x0020 /* LP is 2.5GBT capable */ +#define MDIO_AN_10GBT_STAT_LP5G 0x0040 /* LP is 5GBT capable */ #define MDIO_AN_10GBT_STAT_LPTRR 0x0200 /* LP training reset req. */ #define MDIO_AN_10GBT_STAT_LPLTABLE 0x0400 /* LP loop timing ability */ #define MDIO_AN_10GBT_STAT_LP10G 0x0800 /* LP is 10GBT capable */ @@ -262,6 +277,10 @@ #define MDIO_EEE_10GKX4 0x0020 /* 10G KX4 EEE cap */ #define MDIO_EEE_10GKR 0x0040 /* 10G KR EEE cap */ +/* 2.5G/5G Extended abilities register. */ +#define MDIO_PMA_NG_EXTABLE_2_5GBT 0x0001 /* 2.5GBASET ability */ +#define MDIO_PMA_NG_EXTABLE_5GBT 0x0002 /* 5GBASET ability */ + /* LASI RX_ALARM control/status registers. */ #define MDIO_PMA_LASI_RX_PHYXSLFLT 0x0001 /* PHY XS RX local fault */ #define MDIO_PMA_LASI_RX_PCSLFLT 0x0008 /* PCS RX local fault */ diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 02ac251be8c4..51a0496f78ea 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -63,12 +63,49 @@ enum { #define TC_ACT_GOTO_CHAIN __TC_ACT_EXT(2) #define TC_ACT_EXT_OPCODE_MAX TC_ACT_GOTO_CHAIN +/* These macros are put here for binary compatibility with userspace apps that + * make use of them. For kernel code and new userspace apps, use the TCA_ID_* + * versions. + */ +#define TCA_ACT_GACT 5 +#define TCA_ACT_IPT 6 +#define TCA_ACT_PEDIT 7 +#define TCA_ACT_MIRRED 8 +#define TCA_ACT_NAT 9 +#define TCA_ACT_XT 10 +#define TCA_ACT_SKBEDIT 11 +#define TCA_ACT_VLAN 12 +#define TCA_ACT_BPF 13 +#define TCA_ACT_CONNMARK 14 +#define TCA_ACT_SKBMOD 15 +#define TCA_ACT_CSUM 16 +#define TCA_ACT_TUNNEL_KEY 17 +#define TCA_ACT_SIMP 22 +#define TCA_ACT_IFE 25 +#define TCA_ACT_SAMPLE 26 + /* Action type identifiers*/ -enum { - TCA_ID_UNSPEC=0, - TCA_ID_POLICE=1, +enum tca_id { + TCA_ID_UNSPEC = 0, + TCA_ID_POLICE = 1, + TCA_ID_GACT = TCA_ACT_GACT, + TCA_ID_IPT = TCA_ACT_IPT, + TCA_ID_PEDIT = TCA_ACT_PEDIT, + TCA_ID_MIRRED = TCA_ACT_MIRRED, + TCA_ID_NAT = TCA_ACT_NAT, + TCA_ID_XT = TCA_ACT_XT, + TCA_ID_SKBEDIT = TCA_ACT_SKBEDIT, + TCA_ID_VLAN = TCA_ACT_VLAN, + TCA_ID_BPF = TCA_ACT_BPF, + TCA_ID_CONNMARK = TCA_ACT_CONNMARK, + TCA_ID_SKBMOD = TCA_ACT_SKBMOD, + TCA_ID_CSUM = TCA_ACT_CSUM, + TCA_ID_TUNNEL_KEY = TCA_ACT_TUNNEL_KEY, + TCA_ID_SIMP = TCA_ACT_SIMP, + TCA_ID_IFE = TCA_ACT_IFE, + TCA_ID_SAMPLE = TCA_ACT_SAMPLE, /* other actions go here */ - __TCA_ID_MAX=255 + __TCA_ID_MAX = 255 }; #define TCA_ID_MAX __TCA_ID_MAX diff --git a/include/uapi/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h index 6e89a5df49a4..653c4f94f76e 100644 --- a/include/uapi/linux/tc_act/tc_bpf.h +++ b/include/uapi/linux/tc_act/tc_bpf.h @@ -13,8 +13,6 @@ #include <linux/pkt_cls.h> -#define TCA_ACT_BPF 13 - struct tc_act_bpf { tc_gen; }; diff --git a/include/uapi/linux/tc_act/tc_connmark.h b/include/uapi/linux/tc_act/tc_connmark.h index 80caa47b1933..9f8f6f709feb 100644 --- a/include/uapi/linux/tc_act/tc_connmark.h +++ b/include/uapi/linux/tc_act/tc_connmark.h @@ -5,8 +5,6 @@ #include <linux/types.h> #include <linux/pkt_cls.h> -#define TCA_ACT_CONNMARK 14 - struct tc_connmark { tc_gen; __u16 zone; diff --git a/include/uapi/linux/tc_act/tc_csum.h b/include/uapi/linux/tc_act/tc_csum.h index 0ecf4d29e2f3..94b2044929de 100644 --- a/include/uapi/linux/tc_act/tc_csum.h +++ b/include/uapi/linux/tc_act/tc_csum.h @@ -5,8 +5,6 @@ #include <linux/types.h> #include <linux/pkt_cls.h> -#define TCA_ACT_CSUM 16 - enum { TCA_CSUM_UNSPEC, TCA_CSUM_PARMS, diff --git a/include/uapi/linux/tc_act/tc_gact.h b/include/uapi/linux/tc_act/tc_gact.h index 94273c3b81b0..37e5392e02c7 100644 --- a/include/uapi/linux/tc_act/tc_gact.h +++ b/include/uapi/linux/tc_act/tc_gact.h @@ -5,7 +5,6 @@ #include <linux/types.h> #include <linux/pkt_cls.h> -#define TCA_ACT_GACT 5 struct tc_gact { tc_gen; diff --git a/include/uapi/linux/tc_act/tc_ife.h b/include/uapi/linux/tc_act/tc_ife.h index 2f48490ef386..8c401f185675 100644 --- a/include/uapi/linux/tc_act/tc_ife.h +++ b/include/uapi/linux/tc_act/tc_ife.h @@ -6,7 +6,6 @@ #include <linux/pkt_cls.h> #include <linux/ife.h> -#define TCA_ACT_IFE 25 /* Flag bits for now just encoding/decoding; mutually exclusive */ #define IFE_ENCODE 1 #define IFE_DECODE 0 diff --git a/include/uapi/linux/tc_act/tc_ipt.h b/include/uapi/linux/tc_act/tc_ipt.h index b743c8bddd13..c48d7da6750d 100644 --- a/include/uapi/linux/tc_act/tc_ipt.h +++ b/include/uapi/linux/tc_act/tc_ipt.h @@ -4,9 +4,6 @@ #include <linux/pkt_cls.h> -#define TCA_ACT_IPT 6 -#define TCA_ACT_XT 10 - enum { TCA_IPT_UNSPEC, TCA_IPT_TABLE, diff --git a/include/uapi/linux/tc_act/tc_mirred.h b/include/uapi/linux/tc_act/tc_mirred.h index 5dd671cf5776..2500a0005d05 100644 --- a/include/uapi/linux/tc_act/tc_mirred.h +++ b/include/uapi/linux/tc_act/tc_mirred.h @@ -5,7 +5,6 @@ #include <linux/types.h> #include <linux/pkt_cls.h> -#define TCA_ACT_MIRRED 8 #define TCA_EGRESS_REDIR 1 /* packet redirect to EGRESS*/ #define TCA_EGRESS_MIRROR 2 /* mirror packet to EGRESS */ #define TCA_INGRESS_REDIR 3 /* packet redirect to INGRESS*/ diff --git a/include/uapi/linux/tc_act/tc_nat.h b/include/uapi/linux/tc_act/tc_nat.h index 086be842587b..21399c2c6130 100644 --- a/include/uapi/linux/tc_act/tc_nat.h +++ b/include/uapi/linux/tc_act/tc_nat.h @@ -5,8 +5,6 @@ #include <linux/pkt_cls.h> #include <linux/types.h> -#define TCA_ACT_NAT 9 - enum { TCA_NAT_UNSPEC, TCA_NAT_PARMS, diff --git a/include/uapi/linux/tc_act/tc_pedit.h b/include/uapi/linux/tc_act/tc_pedit.h index 24ec792dacc1..f3e61b04fa01 100644 --- a/include/uapi/linux/tc_act/tc_pedit.h +++ b/include/uapi/linux/tc_act/tc_pedit.h @@ -5,8 +5,6 @@ #include <linux/types.h> #include <linux/pkt_cls.h> -#define TCA_ACT_PEDIT 7 - enum { TCA_PEDIT_UNSPEC, TCA_PEDIT_TM, diff --git a/include/uapi/linux/tc_act/tc_sample.h b/include/uapi/linux/tc_act/tc_sample.h index bd7e9f03abd2..fee1bcc20793 100644 --- a/include/uapi/linux/tc_act/tc_sample.h +++ b/include/uapi/linux/tc_act/tc_sample.h @@ -6,8 +6,6 @@ #include <linux/pkt_cls.h> #include <linux/if_ether.h> -#define TCA_ACT_SAMPLE 26 - struct tc_sample { tc_gen; }; diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h index 6de6071ebed6..800e93377218 100644 --- a/include/uapi/linux/tc_act/tc_skbedit.h +++ b/include/uapi/linux/tc_act/tc_skbedit.h @@ -23,8 +23,6 @@ #include <linux/pkt_cls.h> -#define TCA_ACT_SKBEDIT 11 - #define SKBEDIT_F_PRIORITY 0x1 #define SKBEDIT_F_QUEUE_MAPPING 0x2 #define SKBEDIT_F_MARK 0x4 diff --git a/include/uapi/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h index 38c072f66f2f..c525b3503797 100644 --- a/include/uapi/linux/tc_act/tc_skbmod.h +++ b/include/uapi/linux/tc_act/tc_skbmod.h @@ -13,8 +13,6 @@ #include <linux/pkt_cls.h> -#define TCA_ACT_SKBMOD 15 - #define SKBMOD_F_DMAC 0x1 #define SKBMOD_F_SMAC 0x2 #define SKBMOD_F_ETYPE 0x4 diff --git a/include/uapi/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h index be384d63e1b5..41c8b462c177 100644 --- a/include/uapi/linux/tc_act/tc_tunnel_key.h +++ b/include/uapi/linux/tc_act/tc_tunnel_key.h @@ -14,8 +14,6 @@ #include <linux/pkt_cls.h> -#define TCA_ACT_TUNNEL_KEY 17 - #define TCA_TUNNEL_KEY_ACT_SET 1 #define TCA_TUNNEL_KEY_ACT_RELEASE 2 diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h index 0d7b5fd6605b..168995b54a70 100644 --- a/include/uapi/linux/tc_act/tc_vlan.h +++ b/include/uapi/linux/tc_act/tc_vlan.h @@ -13,8 +13,6 @@ #include <linux/pkt_cls.h> -#define TCA_ACT_VLAN 12 - #define TCA_VLAN_ACT_POP 1 #define TCA_VLAN_ACT_PUSH 2 #define TCA_VLAN_ACT_MODIFY 3 diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h index b8ad1b86b942..958932effc5e 100644 --- a/include/uapi/linux/time.h +++ b/include/uapi/linux/time.h @@ -3,7 +3,7 @@ #define _UAPI_LINUX_TIME_H #include <linux/types.h> - +#include <linux/time_types.h> #ifndef _STRUCT_TIMESPEC #define _STRUCT_TIMESPEC @@ -23,7 +23,6 @@ struct timezone { int tz_dsttime; /* type of dst correction */ }; - /* * Names of the interval timers, and structure * defining a timer setting: @@ -42,39 +41,6 @@ struct itimerval { struct timeval it_value; /* current value */ }; -#ifndef __kernel_timespec -struct __kernel_timespec { - __kernel_time64_t tv_sec; /* seconds */ - long long tv_nsec; /* nanoseconds */ -}; -#endif - -#ifndef __kernel_itimerspec -struct __kernel_itimerspec { - struct __kernel_timespec it_interval; /* timer period */ - struct __kernel_timespec it_value; /* timer expiration */ -}; -#endif - -/* - * legacy timeval structure, only embedded in structures that - * traditionally used 'timeval' to pass time intervals (not absolute - * times). Do not add new users. If user space fails to compile - * here, this is probably because it is not y2038 safe and needs to - * be changed to use another interface. - */ -#ifndef __kernel_old_timeval -struct __kernel_old_timeval { - __kernel_long_t tv_sec; - __kernel_long_t tv_usec; -}; -#endif - -struct __kernel_sock_timeval { - __s64 tv_sec; - __s64 tv_usec; -}; - /* * The IDs of the various system clocks (for POSIX.1b interval timers): */ diff --git a/include/uapi/linux/time_types.h b/include/uapi/linux/time_types.h new file mode 100644 index 000000000000..459070c61d47 --- /dev/null +++ b/include/uapi/linux/time_types.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_TIME_TYPES_H +#define _UAPI_LINUX_TIME_TYPES_H + +#include <linux/types.h> + +#ifndef __kernel_timespec +struct __kernel_timespec { + __kernel_time64_t tv_sec; /* seconds */ + long long tv_nsec; /* nanoseconds */ +}; +#endif + +#ifndef __kernel_itimerspec +struct __kernel_itimerspec { + struct __kernel_timespec it_interval; /* timer period */ + struct __kernel_timespec it_value; /* timer expiration */ +}; +#endif + +/* + * legacy timeval structure, only embedded in structures that + * traditionally used 'timeval' to pass time intervals (not absolute + * times). Do not add new users. If user space fails to compile + * here, this is probably because it is not y2038 safe and needs to + * be changed to use another interface. + */ +#ifndef __kernel_old_timeval +struct __kernel_old_timeval { + __kernel_long_t tv_sec; + __kernel_long_t tv_usec; +}; +#endif + +struct __kernel_sock_timeval { + __s64 tv_sec; + __s64 tv_usec; +}; + +#endif /* _UAPI_LINUX_TIME_TYPES_H */ diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index 1196e1c1d4f6..ff8e7dc9d4dd 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -79,6 +79,12 @@ #define VIRTIO_F_RING_PACKED 34 /* + * This feature indicates that memory accesses by the driver and the + * device are ordered in a way described by the platform. + */ +#define VIRTIO_F_ORDER_PLATFORM 36 + +/* * Does the device support Single Root I/O Virtualization? */ #define VIRTIO_F_SR_IOV 37 diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h index 2414f8af26b3..4c4e24c291a5 100644 --- a/include/uapi/linux/virtio_ring.h +++ b/include/uapi/linux/virtio_ring.h @@ -213,14 +213,4 @@ struct vring_packed_desc { __le16 flags; }; -struct vring_packed { - unsigned int num; - - struct vring_packed_desc *desc; - - struct vring_packed_desc_event *driver; - - struct vring_packed_desc_event *device; -}; - #endif /* _UAPI_LINUX_VIRTIO_RING_H */ diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index ef3c7ec793a7..eb76b38a00d4 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -52,6 +52,11 @@ struct hns_roce_ib_create_srq { __aligned_u64 que_addr; }; +struct hns_roce_ib_create_srq_resp { + __u32 srqn; + __u32 reserved; +}; + struct hns_roce_ib_create_qp { __aligned_u64 buf_addr; __aligned_u64 db_addr; diff --git a/init/Kconfig b/init/Kconfig index 513fa544a134..c9386a365eea 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -512,6 +512,17 @@ config PSI_DEFAULT_DISABLED per default but can be enabled through passing psi=1 on the kernel commandline during boot. + This feature adds some code to the task wakeup and sleep + paths of the scheduler. The overhead is too low to affect + common scheduling-intense workloads in practice (such as + webservers, memcache), but it does show up in artificial + scheduler stress tests, such as hackbench. + + If you are paranoid and not sure what the kernel will be + used for, say Y. + + Say N if unsure. + endmenu # "CPU/Task time and stats accounting" config CPU_ISOLATION @@ -825,7 +836,7 @@ config CGROUP_PIDS PIDs controller is designed to stop this from happening. It should be noted that organisational operations (such as attaching - to a cgroup hierarchy will *not* be blocked by the PIDs controller), + to a cgroup hierarchy) will *not* be blocked by the PIDs controller, since the PIDs limit only affects a process's ability to fork, not to attach to a cgroup. diff --git a/init/main.c b/init/main.c index e2e80ca3165a..c86a1c8f19f4 100644 --- a/init/main.c +++ b/init/main.c @@ -695,7 +695,6 @@ asmlinkage __visible void __init start_kernel(void) initrd_start = 0; } #endif - page_ext_init(); kmemleak_init(); setup_per_cpu_pageset(); numa_policy_init(); @@ -1131,6 +1130,8 @@ static noinline void __init kernel_init_freeable(void) sched_init_smp(); page_alloc_init_late(); + /* Initialize page ext after all struct pages are initialized. */ + page_ext_init(); do_basic_setup(); diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 7019c1f05cab..bd3921b1514b 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -1530,7 +1530,8 @@ static int btf_modifier_resolve(struct btf_verifier_env *env, /* "typedef void new_void", "const void"...etc */ if (!btf_type_is_void(next_type) && - !btf_type_is_fwd(next_type)) { + !btf_type_is_fwd(next_type) && + !btf_type_is_func_proto(next_type)) { btf_verifier_log_type(env, v->t, "Invalid type_id"); return -EINVAL; } diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index d78cfec5807d..4e807973aa80 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -573,7 +573,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, bpf_compute_and_save_data_end(skb, &saved_data_end); ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, - bpf_prog_run_save_cb); + __bpf_prog_run_save_cb); bpf_restore_data_end(skb, saved_data_end); __skb_pull(skb, offset); skb->sk = save_sk; diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 937776531998..fed15cf94dca 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -686,7 +686,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) } if (htab_is_prealloc(htab)) { - pcpu_freelist_push(&htab->freelist, &l->fnode); + __pcpu_freelist_push(&htab->freelist, &l->fnode); } else { atomic_dec(&htab->count); l->htab = htab; @@ -739,7 +739,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, } else { struct pcpu_freelist_node *l; - l = pcpu_freelist_pop(&htab->freelist); + l = __pcpu_freelist_pop(&htab->freelist); if (!l) return ERR_PTR(-E2BIG); l_new = container_of(l, struct htab_elem, fnode); diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c index 673fa6fe2d73..0c1b4ba9e90e 100644 --- a/kernel/bpf/percpu_freelist.c +++ b/kernel/bpf/percpu_freelist.c @@ -28,8 +28,8 @@ void pcpu_freelist_destroy(struct pcpu_freelist *s) free_percpu(s->freelist); } -static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head, - struct pcpu_freelist_node *node) +static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head, + struct pcpu_freelist_node *node) { raw_spin_lock(&head->lock); node->next = head->first; @@ -37,12 +37,22 @@ static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head, raw_spin_unlock(&head->lock); } -void pcpu_freelist_push(struct pcpu_freelist *s, +void __pcpu_freelist_push(struct pcpu_freelist *s, struct pcpu_freelist_node *node) { struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist); - __pcpu_freelist_push(head, node); + ___pcpu_freelist_push(head, node); +} + +void pcpu_freelist_push(struct pcpu_freelist *s, + struct pcpu_freelist_node *node) +{ + unsigned long flags; + + local_irq_save(flags); + __pcpu_freelist_push(s, node); + local_irq_restore(flags); } void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, @@ -63,7 +73,7 @@ void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, for_each_possible_cpu(cpu) { again: head = per_cpu_ptr(s->freelist, cpu); - __pcpu_freelist_push(head, buf); + ___pcpu_freelist_push(head, buf); i++; buf += elem_size; if (i == nr_elems) @@ -74,14 +84,12 @@ again: local_irq_restore(flags); } -struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) +struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s) { struct pcpu_freelist_head *head; struct pcpu_freelist_node *node; - unsigned long flags; int orig_cpu, cpu; - local_irq_save(flags); orig_cpu = cpu = raw_smp_processor_id(); while (1) { head = per_cpu_ptr(s->freelist, cpu); @@ -89,16 +97,25 @@ struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) node = head->first; if (node) { head->first = node->next; - raw_spin_unlock_irqrestore(&head->lock, flags); + raw_spin_unlock(&head->lock); return node; } raw_spin_unlock(&head->lock); cpu = cpumask_next(cpu, cpu_possible_mask); if (cpu >= nr_cpu_ids) cpu = 0; - if (cpu == orig_cpu) { - local_irq_restore(flags); + if (cpu == orig_cpu) return NULL; - } } } + +struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) +{ + struct pcpu_freelist_node *ret; + unsigned long flags; + + local_irq_save(flags); + ret = __pcpu_freelist_pop(s); + local_irq_restore(flags); + return ret; +} diff --git a/kernel/bpf/percpu_freelist.h b/kernel/bpf/percpu_freelist.h index 3049aae8ea1e..c3960118e617 100644 --- a/kernel/bpf/percpu_freelist.h +++ b/kernel/bpf/percpu_freelist.h @@ -22,8 +22,12 @@ struct pcpu_freelist_node { struct pcpu_freelist_node *next; }; +/* pcpu_freelist_* do spin_lock_irqsave. */ void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *); struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *); +/* __pcpu_freelist_* do spin_lock only. caller must disable irqs. */ +void __pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *); +struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *); void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, u32 nr_elems); int pcpu_freelist_init(struct pcpu_freelist *); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0834958f1dc4..ec7c552af76b 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -740,8 +740,13 @@ static int map_lookup_elem(union bpf_attr *attr) if (bpf_map_is_dev_bound(map)) { err = bpf_map_offload_lookup_elem(map, key, value); - } else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || - map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { + goto done; + } + + preempt_disable(); + this_cpu_inc(bpf_prog_active); + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { err = bpf_percpu_hash_copy(map, key, value); } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { err = bpf_percpu_array_copy(map, key, value); @@ -777,7 +782,10 @@ static int map_lookup_elem(union bpf_attr *attr) } rcu_read_unlock(); } + this_cpu_dec(bpf_prog_active); + preempt_enable(); +done: if (err) goto free_value; diff --git a/kernel/cpu.c b/kernel/cpu.c index 91d5c38eb7e5..d1c6d152da89 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -376,9 +376,6 @@ void __weak arch_smt_update(void) { } #ifdef CONFIG_HOTPLUG_SMT enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; -EXPORT_SYMBOL_GPL(cpu_smt_control); - -static bool cpu_smt_available __read_mostly; void __init cpu_smt_disable(bool force) { @@ -397,25 +394,11 @@ void __init cpu_smt_disable(bool force) /* * The decision whether SMT is supported can only be done after the full - * CPU identification. Called from architecture code before non boot CPUs - * are brought up. - */ -void __init cpu_smt_check_topology_early(void) -{ - if (!topology_smt_supported()) - cpu_smt_control = CPU_SMT_NOT_SUPPORTED; -} - -/* - * If SMT was disabled by BIOS, detect it here, after the CPUs have been - * brought online. This ensures the smt/l1tf sysfs entries are consistent - * with reality. cpu_smt_available is set to true during the bringup of non - * boot CPUs when a SMT sibling is detected. Note, this may overwrite - * cpu_smt_control's previous setting. + * CPU identification. Called from architecture code. */ void __init cpu_smt_check_topology(void) { - if (!cpu_smt_available) + if (!topology_smt_supported()) cpu_smt_control = CPU_SMT_NOT_SUPPORTED; } @@ -428,18 +411,10 @@ early_param("nosmt", smt_cmdline_disable); static inline bool cpu_smt_allowed(unsigned int cpu) { - if (topology_is_primary_thread(cpu)) + if (cpu_smt_control == CPU_SMT_ENABLED) return true; - /* - * If the CPU is not a 'primary' thread and the booted_once bit is - * set then the processor has SMT support. Store this information - * for the late check of SMT support in cpu_smt_check_topology(). - */ - if (per_cpu(cpuhp_state, cpu).booted_once) - cpu_smt_available = true; - - if (cpu_smt_control == CPU_SMT_ENABLED) + if (topology_is_primary_thread(cpu)) return true; /* @@ -2090,10 +2065,8 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) */ cpuhp_offline_cpu_device(cpu); } - if (!ret) { + if (!ret) cpu_smt_control = ctrlval; - arch_smt_update(); - } cpu_maps_update_done(); return ret; } @@ -2104,7 +2077,6 @@ static int cpuhp_smt_enable(void) cpu_maps_update_begin(); cpu_smt_control = CPU_SMT_ENABLED; - arch_smt_update(); for_each_present_cpu(cpu) { /* Skip online CPUs and CPUs on offline nodes */ if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) diff --git a/kernel/events/core.c b/kernel/events/core.c index 3cd13a30f732..e5ede6918050 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -436,18 +436,18 @@ int perf_proc_update_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - - if (ret || !write) - return ret; - + int ret; + int perf_cpu = sysctl_perf_cpu_time_max_percent; /* * If throttling is disabled don't allow the write: */ - if (sysctl_perf_cpu_time_max_percent == 100 || - sysctl_perf_cpu_time_max_percent == 0) + if (write && (perf_cpu == 100 || perf_cpu == 0)) return -EINVAL; + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (ret || !write) + return ret; + max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ); perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate; update_perf_cpu_limits(); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 4a9937076331..309ef5a64af5 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -734,6 +734,9 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) size = sizeof(struct ring_buffer); size += nr_pages * sizeof(void *); + if (order_base_2(size) >= MAX_ORDER) + goto fail; + rb = kzalloc(size, GFP_KERNEL); if (!rb) goto fail; diff --git a/kernel/exit.c b/kernel/exit.c index 3fb7be001964..2639a30a8aa5 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -558,12 +558,14 @@ static struct task_struct *find_alive_thread(struct task_struct *p) return NULL; } -static struct task_struct *find_child_reaper(struct task_struct *father) +static struct task_struct *find_child_reaper(struct task_struct *father, + struct list_head *dead) __releases(&tasklist_lock) __acquires(&tasklist_lock) { struct pid_namespace *pid_ns = task_active_pid_ns(father); struct task_struct *reaper = pid_ns->child_reaper; + struct task_struct *p, *n; if (likely(reaper != father)) return reaper; @@ -579,6 +581,12 @@ static struct task_struct *find_child_reaper(struct task_struct *father) panic("Attempted to kill init! exitcode=0x%08x\n", father->signal->group_exit_code ?: father->exit_code); } + + list_for_each_entry_safe(p, n, dead, ptrace_entry) { + list_del_init(&p->ptrace_entry); + release_task(p); + } + zap_pid_ns_processes(pid_ns); write_lock_irq(&tasklist_lock); @@ -668,7 +676,7 @@ static void forget_original_parent(struct task_struct *father, exit_ptrace(father, dead); /* Can drop and reacquire tasklist_lock */ - reaper = find_child_reaper(father); + reaper = find_child_reaper(father, dead); if (list_empty(&father->children)) return; diff --git a/kernel/futex.c b/kernel/futex.c index fdd312da0992..a0514e01c3eb 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2221,11 +2221,11 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) * decrement the counter at queue_unlock() when some error has * occurred and we don't end up adding the task to the list. */ - hb_waiters_inc(hb); + hb_waiters_inc(hb); /* implies smp_mb(); (A) */ q->lock_ptr = &hb->lock; - spin_lock(&hb->lock); /* implies smp_mb(); (A) */ + spin_lock(&hb->lock); return hb; } @@ -2861,35 +2861,39 @@ retry_private: * and BUG when futex_unlock_pi() interleaves with this. * * Therefore acquire wait_lock while holding hb->lock, but drop the - * latter before calling rt_mutex_start_proxy_lock(). This still fully - * serializes against futex_unlock_pi() as that does the exact same - * lock handoff sequence. + * latter before calling __rt_mutex_start_proxy_lock(). This + * interleaves with futex_unlock_pi() -- which does a similar lock + * handoff -- such that the latter can observe the futex_q::pi_state + * before __rt_mutex_start_proxy_lock() is done. */ raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); spin_unlock(q.lock_ptr); + /* + * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter + * such that futex_unlock_pi() is guaranteed to observe the waiter when + * it sees the futex_q::pi_state. + */ ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock); if (ret) { if (ret == 1) ret = 0; - - spin_lock(q.lock_ptr); - goto no_block; + goto cleanup; } - if (unlikely(to)) hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); +cleanup: spin_lock(q.lock_ptr); /* - * If we failed to acquire the lock (signal/timeout), we must + * If we failed to acquire the lock (deadlock/signal/timeout), we must * first acquire the hb->lock before removing the lock from the - * rt_mutex waitqueue, such that we can keep the hb and rt_mutex - * wait lists consistent. + * rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait + * lists consistent. * * In particular; it is important that futex_unlock_pi() can not * observe this inconsistency. @@ -3013,6 +3017,10 @@ retry: * there is no point where we hold neither; and therefore * wake_futex_pi() must observe a state consistent with what we * observed. + * + * In particular; this forces __rt_mutex_start_proxy() to + * complete such that we're guaranteed to observe the + * rt_waiter. Also see the WARN in wake_futex_pi(). */ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); spin_unlock(&hb->lock); diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 581edcc63c26..978d63a8261c 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1726,12 +1726,33 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock, rt_mutex_set_owner(lock, NULL); } +/** + * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task + * @lock: the rt_mutex to take + * @waiter: the pre-initialized rt_mutex_waiter + * @task: the task to prepare + * + * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock + * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. + * + * NOTE: does _NOT_ remove the @waiter on failure; must either call + * rt_mutex_wait_proxy_lock() or rt_mutex_cleanup_proxy_lock() after this. + * + * Returns: + * 0 - task blocked on lock + * 1 - acquired the lock for task, caller should wake it up + * <0 - error + * + * Special API call for PI-futex support. + */ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, struct task_struct *task) { int ret; + lockdep_assert_held(&lock->wait_lock); + if (try_to_take_rt_mutex(lock, task, NULL)) return 1; @@ -1749,9 +1770,6 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, ret = 0; } - if (unlikely(ret)) - remove_waiter(lock, waiter); - debug_rt_mutex_print_deadlock(waiter); return ret; @@ -1763,12 +1781,18 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, * @waiter: the pre-initialized rt_mutex_waiter * @task: the task to prepare * + * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock + * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. + * + * NOTE: unlike __rt_mutex_start_proxy_lock this _DOES_ remove the @waiter + * on failure. + * * Returns: * 0 - task blocked on lock * 1 - acquired the lock for task, caller should wake it up * <0 - error * - * Special API call for FUTEX_REQUEUE_PI support. + * Special API call for PI-futex support. */ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, @@ -1778,6 +1802,8 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, raw_spin_lock_irq(&lock->wait_lock); ret = __rt_mutex_start_proxy_lock(lock, waiter, task); + if (unlikely(ret)) + remove_waiter(lock, waiter); raw_spin_unlock_irq(&lock->wait_lock); return ret; @@ -1845,7 +1871,8 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, * @lock: the rt_mutex we were woken on * @waiter: the pre-initialized rt_mutex_waiter * - * Attempt to clean up after a failed rt_mutex_wait_proxy_lock(). + * Attempt to clean up after a failed __rt_mutex_start_proxy_lock() or + * rt_mutex_wait_proxy_lock(). * * Unless we acquired the lock; we're still enqueued on the wait-list and can * in fact still be granted ownership until we're removed. Therefore we can diff --git a/kernel/relay.c b/kernel/relay.c index 04f248644e06..9e0f52375487 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -428,6 +428,8 @@ static struct dentry *relay_create_buf_file(struct rchan *chan, dentry = chan->cb->create_buf_file(tmpname, chan->parent, S_IRUSR, buf, &chan->is_global); + if (IS_ERR(dentry)) + dentry = NULL; kfree(tmpname); @@ -461,7 +463,7 @@ static struct rchan_buf *relay_open_buf(struct rchan *chan, unsigned int cpu) dentry = chan->cb->create_buf_file(NULL, NULL, S_IRUSR, buf, &chan->is_global); - if (WARN_ON(dentry)) + if (IS_ERR_OR_NULL(dentry)) goto free_buf; } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 50aa2aba69bd..310d0637fe4b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5980,6 +5980,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p #ifdef CONFIG_SCHED_SMT DEFINE_STATIC_KEY_FALSE(sched_smt_present); +EXPORT_SYMBOL_GPL(sched_smt_present); static inline void set_idle_cores(int cpu, int val) { diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index fe24de3fbc93..c3484785b179 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -124,6 +124,7 @@ * sampling of the aggregate task states would be. */ +#include "../workqueue_internal.h" #include <linux/sched/loadavg.h> #include <linux/seq_file.h> #include <linux/proc_fs.h> @@ -480,9 +481,6 @@ static void psi_group_change(struct psi_group *group, int cpu, groupc->tasks[t]++; write_seqcount_end(&groupc->seq); - - if (!delayed_work_pending(&group->clock_work)) - schedule_delayed_work(&group->clock_work, PSI_FREQ); } static struct psi_group *iterate_groups(struct task_struct *task, void **iter) @@ -513,6 +511,7 @@ void psi_task_change(struct task_struct *task, int clear, int set) { int cpu = task_cpu(task); struct psi_group *group; + bool wake_clock = true; void *iter = NULL; if (!task->pid) @@ -530,8 +529,22 @@ void psi_task_change(struct task_struct *task, int clear, int set) task->psi_flags &= ~clear; task->psi_flags |= set; - while ((group = iterate_groups(task, &iter))) + /* + * Periodic aggregation shuts off if there is a period of no + * task changes, so we wake it back up if necessary. However, + * don't do this if the task change is the aggregation worker + * itself going to sleep, or we'll ping-pong forever. + */ + if (unlikely((clear & TSK_RUNNING) && + (task->flags & PF_WQ_WORKER) && + wq_worker_last_func(task) == psi_update_work)) + wake_clock = false; + + while ((group = iterate_groups(task, &iter))) { psi_group_change(group, cpu, clear, set); + if (wake_clock && !delayed_work_pending(&group->clock_work)) + schedule_delayed_work(&group->clock_work, PSI_FREQ); + } } void psi_memstall_tick(struct task_struct *task, int cpu) diff --git a/kernel/signal.c b/kernel/signal.c index e1d7ad8e6ab1..57b7771e20d7 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -688,6 +688,48 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, kernel_siginfo_t *in } EXPORT_SYMBOL_GPL(dequeue_signal); +static int dequeue_synchronous_signal(kernel_siginfo_t *info) +{ + struct task_struct *tsk = current; + struct sigpending *pending = &tsk->pending; + struct sigqueue *q, *sync = NULL; + + /* + * Might a synchronous signal be in the queue? + */ + if (!((pending->signal.sig[0] & ~tsk->blocked.sig[0]) & SYNCHRONOUS_MASK)) + return 0; + + /* + * Return the first synchronous signal in the queue. + */ + list_for_each_entry(q, &pending->list, list) { + /* Synchronous signals have a postive si_code */ + if ((q->info.si_code > SI_USER) && + (sigmask(q->info.si_signo) & SYNCHRONOUS_MASK)) { + sync = q; + goto next; + } + } + return 0; +next: + /* + * Check if there is another siginfo for the same signal. + */ + list_for_each_entry_continue(q, &pending->list, list) { + if (q->info.si_signo == sync->info.si_signo) + goto still_pending; + } + + sigdelset(&pending->signal, sync->info.si_signo); + recalc_sigpending(); +still_pending: + list_del_init(&sync->list); + copy_siginfo(info, &sync->info); + __sigqueue_free(sync); + return info->si_signo; +} + /* * Tell a process that it has a new active signal.. * @@ -1057,10 +1099,9 @@ static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struc result = TRACE_SIGNAL_DELIVERED; /* - * Skip useless siginfo allocation for SIGKILL SIGSTOP, - * and kernel threads. + * Skip useless siginfo allocation for SIGKILL and kernel threads. */ - if (sig_kernel_only(sig) || (t->flags & PF_KTHREAD)) + if ((sig == SIGKILL) || (t->flags & PF_KTHREAD)) goto out_set; /* @@ -2394,6 +2435,14 @@ relock: goto relock; } + /* Has this task already been marked for death? */ + if (signal_group_exit(signal)) { + ksig->info.si_signo = signr = SIGKILL; + sigdelset(¤t->pending.signal, SIGKILL); + recalc_sigpending(); + goto fatal; + } + for (;;) { struct k_sigaction *ka; @@ -2407,7 +2456,15 @@ relock: goto relock; } - signr = dequeue_signal(current, ¤t->blocked, &ksig->info); + /* + * Signals generated by the execution of an instruction + * need to be delivered before any other pending signals + * so that the instruction pointer in the signal stack + * frame points to the faulting instruction. + */ + signr = dequeue_synchronous_signal(&ksig->info); + if (!signr) + signr = dequeue_signal(current, ¤t->blocked, &ksig->info); if (!signr) break; /* will return 0 */ @@ -2489,6 +2546,7 @@ relock: continue; } + fatal: spin_unlock_irq(&sighand->siglock); /* diff --git a/kernel/smp.c b/kernel/smp.c index 163c451af42e..f4cf1b0bb3b8 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -584,8 +584,6 @@ void __init smp_init(void) num_nodes, (num_nodes > 1 ? "s" : ""), num_cpus, (num_cpus > 1 ? "s" : "")); - /* Final decision about SMT support */ - cpu_smt_check_topology(); /* Any cleanup work */ smp_cpus_done(setup_max_cpus); } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 8b068adb9da1..f1a86a0d881d 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1204,22 +1204,12 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog * int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog) { - int err; - - mutex_lock(&bpf_event_mutex); - err = __bpf_probe_register(btp, prog); - mutex_unlock(&bpf_event_mutex); - return err; + return __bpf_probe_register(btp, prog); } int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog) { - int err; - - mutex_lock(&bpf_event_mutex); - err = tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog); - mutex_unlock(&bpf_event_mutex); - return err; + return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog); } int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h index 5c56afc17cf8..4737bb8c07a3 100644 --- a/kernel/trace/trace_probe_tmpl.h +++ b/kernel/trace/trace_probe_tmpl.h @@ -180,10 +180,12 @@ store_trace_args(void *data, struct trace_probe *tp, struct pt_regs *regs, if (unlikely(arg->dynamic)) *dl = make_data_loc(maxlen, dyndata - base); ret = process_fetch_insn(arg->code, regs, dl, base); - if (unlikely(ret < 0 && arg->dynamic)) + if (unlikely(ret < 0 && arg->dynamic)) { *dl = make_data_loc(0, dyndata - base); - else + } else { dyndata += ret; + maxlen -= ret; + } } } diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index e335576b9411..9bde07c06362 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -5,7 +5,7 @@ * Copyright (C) IBM Corporation, 2010-2012 * Author: Srikar Dronamraju <srikar@linux.vnet.ibm.com> */ -#define pr_fmt(fmt) "trace_kprobe: " fmt +#define pr_fmt(fmt) "trace_uprobe: " fmt #include <linux/ctype.h> #include <linux/module.h> @@ -160,6 +160,13 @@ fetch_store_string(unsigned long addr, void *dest, void *base) if (ret >= 0) { if (ret == maxlen) dst[ret - 1] = '\0'; + else + /* + * Include the terminating null byte. In this case it + * was copied by strncpy_from_user but not accounted + * for in ret. + */ + ret++; *(u32 *)dest = make_data_loc(ret, (void *)dst - base); } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 392be4b252f6..fc5d23d752a5 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -910,6 +910,26 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task) } /** + * wq_worker_last_func - retrieve worker's last work function + * + * Determine the last function a worker executed. This is called from + * the scheduler to get a worker's last known identity. + * + * CONTEXT: + * spin_lock_irq(rq->lock) + * + * Return: + * The last work function %current executed as a worker, NULL if it + * hasn't executed any work yet. + */ +work_func_t wq_worker_last_func(struct task_struct *task) +{ + struct worker *worker = kthread_data(task); + + return worker->last_func; +} + +/** * worker_set_flags - set worker flags and adjust nr_running accordingly * @worker: self * @flags: flags to set @@ -2184,6 +2204,9 @@ __acquires(&pool->lock) if (unlikely(cpu_intensive)) worker_clr_flags(worker, WORKER_CPU_INTENSIVE); + /* tag the worker for identification in schedule() */ + worker->last_func = worker->current_func; + /* we're done with it, release */ hash_del(&worker->hentry); worker->current_work = NULL; diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h index 66fbb5a9e633..cb68b03ca89a 100644 --- a/kernel/workqueue_internal.h +++ b/kernel/workqueue_internal.h @@ -53,6 +53,9 @@ struct worker { /* used only by rescuers to point to the target workqueue */ struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */ + + /* used by the scheduler to determine a worker's last known identity */ + work_func_t last_func; }; /** @@ -67,9 +70,10 @@ static inline struct worker *current_wq_worker(void) /* * Scheduler hooks for concurrency managed workqueue. Only to be used from - * sched/core.c and workqueue.c. + * sched/ and workqueue.c. */ void wq_worker_waking_up(struct task_struct *task, int cpu); struct task_struct *wq_worker_sleeping(struct task_struct *task); +work_func_t wq_worker_last_func(struct task_struct *task); #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */ diff --git a/lib/objagg.c b/lib/objagg.c index c9b457a91153..576be22e86de 100644 --- a/lib/objagg.c +++ b/lib/objagg.c @@ -4,6 +4,7 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/rhashtable.h> +#include <linux/idr.h> #include <linux/list.h> #include <linux/sort.h> #include <linux/objagg.h> @@ -11,6 +12,34 @@ #define CREATE_TRACE_POINTS #include <trace/events/objagg.h> +struct objagg_hints { + struct rhashtable node_ht; + struct rhashtable_params ht_params; + struct list_head node_list; + unsigned int node_count; + unsigned int root_count; + unsigned int refcount; + const struct objagg_ops *ops; +}; + +struct objagg_hints_node { + struct rhash_head ht_node; /* member of objagg_hints->node_ht */ + struct list_head list; /* member of objagg_hints->node_list */ + struct objagg_hints_node *parent; + unsigned int root_id; + struct objagg_obj_stats_info stats_info; + unsigned long obj[0]; +}; + +static struct objagg_hints_node * +objagg_hints_lookup(struct objagg_hints *objagg_hints, void *obj) +{ + if (!objagg_hints) + return NULL; + return rhashtable_lookup_fast(&objagg_hints->node_ht, obj, + objagg_hints->ht_params); +} + struct objagg { const struct objagg_ops *ops; void *priv; @@ -18,6 +47,8 @@ struct objagg { struct rhashtable_params ht_params; struct list_head obj_list; unsigned int obj_count; + struct ida root_ida; + struct objagg_hints *hints; }; struct objagg_obj { @@ -30,6 +61,7 @@ struct objagg_obj { void *delta_priv; /* user delta private */ void *root_priv; /* user root private */ }; + unsigned int root_id; unsigned int refcount; /* counts number of users of this object * including nested objects */ @@ -130,7 +162,8 @@ static struct objagg_obj *objagg_obj_lookup(struct objagg *objagg, void *obj) static int objagg_obj_parent_assign(struct objagg *objagg, struct objagg_obj *objagg_obj, - struct objagg_obj *parent) + struct objagg_obj *parent, + bool take_parent_ref) { void *delta_priv; @@ -144,7 +177,8 @@ static int objagg_obj_parent_assign(struct objagg *objagg, */ objagg_obj->parent = parent; objagg_obj->delta_priv = delta_priv; - objagg_obj_ref_inc(objagg_obj->parent); + if (take_parent_ref) + objagg_obj_ref_inc(objagg_obj->parent); trace_objagg_obj_parent_assign(objagg, objagg_obj, parent, parent->refcount); @@ -164,7 +198,7 @@ static int objagg_obj_parent_lookup_assign(struct objagg *objagg, if (!objagg_obj_is_root(objagg_obj_cur)) continue; err = objagg_obj_parent_assign(objagg, objagg_obj, - objagg_obj_cur); + objagg_obj_cur, true); if (!err) return 0; } @@ -184,16 +218,68 @@ static void objagg_obj_parent_unassign(struct objagg *objagg, __objagg_obj_put(objagg, objagg_obj->parent); } +static int objagg_obj_root_id_alloc(struct objagg *objagg, + struct objagg_obj *objagg_obj, + struct objagg_hints_node *hnode) +{ + unsigned int min, max; + int root_id; + + /* In case there are no hints available, the root id is invalid. */ + if (!objagg->hints) { + objagg_obj->root_id = OBJAGG_OBJ_ROOT_ID_INVALID; + return 0; + } + + if (hnode) { + min = hnode->root_id; + max = hnode->root_id; + } else { + /* For objects with no hint, start after the last + * hinted root_id. + */ + min = objagg->hints->root_count; + max = ~0; + } + + root_id = ida_alloc_range(&objagg->root_ida, min, max, GFP_KERNEL); + + if (root_id < 0) + return root_id; + objagg_obj->root_id = root_id; + return 0; +} + +static void objagg_obj_root_id_free(struct objagg *objagg, + struct objagg_obj *objagg_obj) +{ + if (!objagg->hints) + return; + ida_free(&objagg->root_ida, objagg_obj->root_id); +} + static int objagg_obj_root_create(struct objagg *objagg, - struct objagg_obj *objagg_obj) + struct objagg_obj *objagg_obj, + struct objagg_hints_node *hnode) { - objagg_obj->root_priv = objagg->ops->root_create(objagg->priv, - objagg_obj->obj); - if (IS_ERR(objagg_obj->root_priv)) - return PTR_ERR(objagg_obj->root_priv); + int err; + err = objagg_obj_root_id_alloc(objagg, objagg_obj, hnode); + if (err) + return err; + objagg_obj->root_priv = objagg->ops->root_create(objagg->priv, + objagg_obj->obj, + objagg_obj->root_id); + if (IS_ERR(objagg_obj->root_priv)) { + err = PTR_ERR(objagg_obj->root_priv); + goto err_root_create; + } trace_objagg_obj_root_create(objagg, objagg_obj); return 0; + +err_root_create: + objagg_obj_root_id_free(objagg, objagg_obj); + return err; } static void objagg_obj_root_destroy(struct objagg *objagg, @@ -201,19 +287,69 @@ static void objagg_obj_root_destroy(struct objagg *objagg, { trace_objagg_obj_root_destroy(objagg, objagg_obj); objagg->ops->root_destroy(objagg->priv, objagg_obj->root_priv); + objagg_obj_root_id_free(objagg, objagg_obj); +} + +static struct objagg_obj *__objagg_obj_get(struct objagg *objagg, void *obj); + +static int objagg_obj_init_with_hints(struct objagg *objagg, + struct objagg_obj *objagg_obj, + bool *hint_found) +{ + struct objagg_hints_node *hnode; + struct objagg_obj *parent; + int err; + + hnode = objagg_hints_lookup(objagg->hints, objagg_obj->obj); + if (!hnode) { + *hint_found = false; + return 0; + } + *hint_found = true; + + if (!hnode->parent) + return objagg_obj_root_create(objagg, objagg_obj, hnode); + + parent = __objagg_obj_get(objagg, hnode->parent->obj); + if (IS_ERR(parent)) + return PTR_ERR(parent); + + err = objagg_obj_parent_assign(objagg, objagg_obj, parent, false); + if (err) { + *hint_found = false; + err = 0; + goto err_parent_assign; + } + + return 0; + +err_parent_assign: + objagg_obj_put(objagg, parent); + return err; } static int objagg_obj_init(struct objagg *objagg, struct objagg_obj *objagg_obj) { + bool hint_found; int err; + /* First, try to use hints if they are available and + * if they provide result. + */ + err = objagg_obj_init_with_hints(objagg, objagg_obj, &hint_found); + if (err) + return err; + + if (hint_found) + return 0; + /* Try to find if the object can be aggregated under an existing one. */ err = objagg_obj_parent_lookup_assign(objagg, objagg_obj); if (!err) return 0; /* If aggregation is not possible, make the object a root. */ - return objagg_obj_root_create(objagg, objagg_obj); + return objagg_obj_root_create(objagg, objagg_obj, NULL); } static void objagg_obj_fini(struct objagg *objagg, @@ -349,8 +485,9 @@ EXPORT_SYMBOL(objagg_obj_put); /** * objagg_create - creates a new objagg instance - * @ops: user-specific callbacks - * @priv: pointer to a private data passed to the ops + * @ops: user-specific callbacks + * @objagg_hints: hints, can be NULL + * @priv: pointer to a private data passed to the ops * * Note: all locking must be provided by the caller. * @@ -374,18 +511,25 @@ EXPORT_SYMBOL(objagg_obj_put); * Returns a pointer to newly created objagg instance in case of success, * otherwise it returns pointer error using ERR_PTR macro. */ -struct objagg *objagg_create(const struct objagg_ops *ops, void *priv) +struct objagg *objagg_create(const struct objagg_ops *ops, + struct objagg_hints *objagg_hints, void *priv) { struct objagg *objagg; int err; if (WARN_ON(!ops || !ops->root_create || !ops->root_destroy || - !ops->delta_create || !ops->delta_destroy)) + !ops->delta_check || !ops->delta_create || + !ops->delta_destroy)) return ERR_PTR(-EINVAL); + objagg = kzalloc(sizeof(*objagg), GFP_KERNEL); if (!objagg) return ERR_PTR(-ENOMEM); objagg->ops = ops; + if (objagg_hints) { + objagg->hints = objagg_hints; + objagg_hints->refcount++; + } objagg->priv = priv; INIT_LIST_HEAD(&objagg->obj_list); @@ -397,6 +541,8 @@ struct objagg *objagg_create(const struct objagg_ops *ops, void *priv) if (err) goto err_rhashtable_init; + ida_init(&objagg->root_ida); + trace_objagg_create(objagg); return objagg; @@ -415,8 +561,11 @@ EXPORT_SYMBOL(objagg_create); void objagg_destroy(struct objagg *objagg) { trace_objagg_destroy(objagg); + ida_destroy(&objagg->root_ida); WARN_ON(!list_empty(&objagg->obj_list)); rhashtable_destroy(&objagg->obj_ht); + if (objagg->hints) + objagg_hints_put(objagg->hints); kfree(objagg); } EXPORT_SYMBOL(objagg_destroy); @@ -472,6 +621,8 @@ const struct objagg_stats *objagg_stats_get(struct objagg *objagg) objagg_stats->stats_info[i].objagg_obj = objagg_obj; objagg_stats->stats_info[i].is_root = objagg_obj_is_root(objagg_obj); + if (objagg_stats->stats_info[i].is_root) + objagg_stats->root_count++; i++; } objagg_stats->stats_info_count = i; @@ -485,7 +636,7 @@ const struct objagg_stats *objagg_stats_get(struct objagg *objagg) EXPORT_SYMBOL(objagg_stats_get); /** - * objagg_stats_puts - puts stats of the objagg instance + * objagg_stats_put - puts stats of the objagg instance * @objagg_stats: objagg instance stats * * Note: all locking must be provided by the caller. @@ -496,6 +647,410 @@ void objagg_stats_put(const struct objagg_stats *objagg_stats) } EXPORT_SYMBOL(objagg_stats_put); +static struct objagg_hints_node * +objagg_hints_node_create(struct objagg_hints *objagg_hints, + struct objagg_obj *objagg_obj, size_t obj_size, + struct objagg_hints_node *parent_hnode) +{ + unsigned int user_count = objagg_obj->stats.user_count; + struct objagg_hints_node *hnode; + int err; + + hnode = kzalloc(sizeof(*hnode) + obj_size, GFP_KERNEL); + if (!hnode) + return ERR_PTR(-ENOMEM); + memcpy(hnode->obj, &objagg_obj->obj, obj_size); + hnode->stats_info.stats.user_count = user_count; + hnode->stats_info.stats.delta_user_count = user_count; + if (parent_hnode) { + parent_hnode->stats_info.stats.delta_user_count += user_count; + } else { + hnode->root_id = objagg_hints->root_count++; + hnode->stats_info.is_root = true; + } + hnode->stats_info.objagg_obj = objagg_obj; + + err = rhashtable_insert_fast(&objagg_hints->node_ht, &hnode->ht_node, + objagg_hints->ht_params); + if (err) + goto err_ht_insert; + + list_add(&hnode->list, &objagg_hints->node_list); + hnode->parent = parent_hnode; + objagg_hints->node_count++; + + return hnode; + +err_ht_insert: + kfree(hnode); + return ERR_PTR(err); +} + +static void objagg_hints_flush(struct objagg_hints *objagg_hints) +{ + struct objagg_hints_node *hnode, *tmp; + + list_for_each_entry_safe(hnode, tmp, &objagg_hints->node_list, list) { + list_del(&hnode->list); + rhashtable_remove_fast(&objagg_hints->node_ht, &hnode->ht_node, + objagg_hints->ht_params); + kfree(hnode); + } +} + +struct objagg_tmp_node { + struct objagg_obj *objagg_obj; + bool crossed_out; +}; + +struct objagg_tmp_graph { + struct objagg_tmp_node *nodes; + unsigned long nodes_count; + unsigned long *edges; +}; + +static int objagg_tmp_graph_edge_index(struct objagg_tmp_graph *graph, + int parent_index, int index) +{ + return index * graph->nodes_count + parent_index; +} + +static void objagg_tmp_graph_edge_set(struct objagg_tmp_graph *graph, + int parent_index, int index) +{ + int edge_index = objagg_tmp_graph_edge_index(graph, index, + parent_index); + + __set_bit(edge_index, graph->edges); +} + +static bool objagg_tmp_graph_is_edge(struct objagg_tmp_graph *graph, + int parent_index, int index) +{ + int edge_index = objagg_tmp_graph_edge_index(graph, index, + parent_index); + + return test_bit(edge_index, graph->edges); +} + +static unsigned int objagg_tmp_graph_node_weight(struct objagg_tmp_graph *graph, + unsigned int index) +{ + struct objagg_tmp_node *node = &graph->nodes[index]; + unsigned int weight = node->objagg_obj->stats.user_count; + int j; + + /* Node weight is sum of node users and all other nodes users + * that this node can represent with delta. + */ + + for (j = 0; j < graph->nodes_count; j++) { + if (!objagg_tmp_graph_is_edge(graph, index, j)) + continue; + node = &graph->nodes[j]; + if (node->crossed_out) + continue; + weight += node->objagg_obj->stats.user_count; + } + return weight; +} + +static int objagg_tmp_graph_node_max_weight(struct objagg_tmp_graph *graph) +{ + struct objagg_tmp_node *node; + unsigned int max_weight = 0; + unsigned int weight; + int max_index = -1; + int i; + + for (i = 0; i < graph->nodes_count; i++) { + node = &graph->nodes[i]; + if (node->crossed_out) + continue; + weight = objagg_tmp_graph_node_weight(graph, i); + if (weight >= max_weight) { + max_weight = weight; + max_index = i; + } + } + return max_index; +} + +static struct objagg_tmp_graph *objagg_tmp_graph_create(struct objagg *objagg) +{ + unsigned int nodes_count = objagg->obj_count; + struct objagg_tmp_graph *graph; + struct objagg_tmp_node *node; + struct objagg_tmp_node *pnode; + struct objagg_obj *objagg_obj; + size_t alloc_size; + int i, j; + + graph = kzalloc(sizeof(*graph), GFP_KERNEL); + if (!graph) + return NULL; + + graph->nodes = kcalloc(nodes_count, sizeof(*graph->nodes), GFP_KERNEL); + if (!graph->nodes) + goto err_nodes_alloc; + graph->nodes_count = nodes_count; + + alloc_size = BITS_TO_LONGS(nodes_count * nodes_count) * + sizeof(unsigned long); + graph->edges = kzalloc(alloc_size, GFP_KERNEL); + if (!graph->edges) + goto err_edges_alloc; + + i = 0; + list_for_each_entry(objagg_obj, &objagg->obj_list, list) { + node = &graph->nodes[i++]; + node->objagg_obj = objagg_obj; + } + + /* Assemble a temporary graph. Insert edge X->Y in case Y can be + * in delta of X. + */ + for (i = 0; i < nodes_count; i++) { + for (j = 0; j < nodes_count; j++) { + if (i == j) + continue; + pnode = &graph->nodes[i]; + node = &graph->nodes[j]; + if (objagg->ops->delta_check(objagg->priv, + pnode->objagg_obj->obj, + node->objagg_obj->obj)) { + objagg_tmp_graph_edge_set(graph, i, j); + + } + } + } + return graph; + +err_edges_alloc: + kfree(graph->nodes); +err_nodes_alloc: + kfree(graph); + return NULL; +} + +static void objagg_tmp_graph_destroy(struct objagg_tmp_graph *graph) +{ + kfree(graph->edges); + kfree(graph->nodes); + kfree(graph); +} + +static int +objagg_opt_simple_greedy_fillup_hints(struct objagg_hints *objagg_hints, + struct objagg *objagg) +{ + struct objagg_hints_node *hnode, *parent_hnode; + struct objagg_tmp_graph *graph; + struct objagg_tmp_node *node; + int index; + int j; + int err; + + graph = objagg_tmp_graph_create(objagg); + if (!graph) + return -ENOMEM; + + /* Find the nodes from the ones that can accommodate most users + * and cross them out of the graph. Save them to the hint list. + */ + while ((index = objagg_tmp_graph_node_max_weight(graph)) != -1) { + node = &graph->nodes[index]; + node->crossed_out = true; + hnode = objagg_hints_node_create(objagg_hints, + node->objagg_obj, + objagg->ops->obj_size, + NULL); + if (IS_ERR(hnode)) { + err = PTR_ERR(hnode); + goto out; + } + parent_hnode = hnode; + for (j = 0; j < graph->nodes_count; j++) { + if (!objagg_tmp_graph_is_edge(graph, index, j)) + continue; + node = &graph->nodes[j]; + if (node->crossed_out) + continue; + node->crossed_out = true; + hnode = objagg_hints_node_create(objagg_hints, + node->objagg_obj, + objagg->ops->obj_size, + parent_hnode); + if (IS_ERR(hnode)) { + err = PTR_ERR(hnode); + goto out; + } + } + } + + err = 0; +out: + objagg_tmp_graph_destroy(graph); + return err; +} + +struct objagg_opt_algo { + int (*fillup_hints)(struct objagg_hints *objagg_hints, + struct objagg *objagg); +}; + +static const struct objagg_opt_algo objagg_opt_simple_greedy = { + .fillup_hints = objagg_opt_simple_greedy_fillup_hints, +}; + + +static const struct objagg_opt_algo *objagg_opt_algos[] = { + [OBJAGG_OPT_ALGO_SIMPLE_GREEDY] = &objagg_opt_simple_greedy, +}; + +static int objagg_hints_obj_cmp(struct rhashtable_compare_arg *arg, + const void *obj) +{ + struct rhashtable *ht = arg->ht; + struct objagg_hints *objagg_hints = + container_of(ht, struct objagg_hints, node_ht); + const struct objagg_ops *ops = objagg_hints->ops; + const char *ptr = obj; + + ptr += ht->p.key_offset; + return ops->hints_obj_cmp ? ops->hints_obj_cmp(ptr, arg->key) : + memcmp(ptr, arg->key, ht->p.key_len); +} + +/** + * objagg_hints_get - obtains hints instance + * @objagg: objagg instance + * @opt_algo_type: type of hints finding algorithm + * + * Note: all locking must be provided by the caller. + * + * According to the algo type, the existing objects of objagg instance + * are going to be went-through to assemble an optimal tree. We call this + * tree hints. These hints can be later on used for creation of + * a new objagg instance. There, the future object creations are going + * to be consulted with these hints in order to find out, where exactly + * the new object should be put as a root or delta. + * + * Returns a pointer to hints instance in case of success, + * otherwise it returns pointer error using ERR_PTR macro. + */ +struct objagg_hints *objagg_hints_get(struct objagg *objagg, + enum objagg_opt_algo_type opt_algo_type) +{ + const struct objagg_opt_algo *algo = objagg_opt_algos[opt_algo_type]; + struct objagg_hints *objagg_hints; + int err; + + objagg_hints = kzalloc(sizeof(*objagg_hints), GFP_KERNEL); + if (!objagg_hints) + return ERR_PTR(-ENOMEM); + + objagg_hints->ops = objagg->ops; + objagg_hints->refcount = 1; + + INIT_LIST_HEAD(&objagg_hints->node_list); + + objagg_hints->ht_params.key_len = objagg->ops->obj_size; + objagg_hints->ht_params.key_offset = + offsetof(struct objagg_hints_node, obj); + objagg_hints->ht_params.head_offset = + offsetof(struct objagg_hints_node, ht_node); + objagg_hints->ht_params.obj_cmpfn = objagg_hints_obj_cmp; + + err = rhashtable_init(&objagg_hints->node_ht, &objagg_hints->ht_params); + if (err) + goto err_rhashtable_init; + + err = algo->fillup_hints(objagg_hints, objagg); + if (err) + goto err_fillup_hints; + + if (WARN_ON(objagg_hints->node_count != objagg->obj_count)) { + err = -EINVAL; + goto err_node_count_check; + } + + return objagg_hints; + +err_node_count_check: +err_fillup_hints: + objagg_hints_flush(objagg_hints); + rhashtable_destroy(&objagg_hints->node_ht); +err_rhashtable_init: + kfree(objagg_hints); + return ERR_PTR(err); +} +EXPORT_SYMBOL(objagg_hints_get); + +/** + * objagg_hints_put - puts hints instance + * @objagg_hints: objagg hints instance + * + * Note: all locking must be provided by the caller. + */ +void objagg_hints_put(struct objagg_hints *objagg_hints) +{ + if (--objagg_hints->refcount) + return; + objagg_hints_flush(objagg_hints); + rhashtable_destroy(&objagg_hints->node_ht); + kfree(objagg_hints); +} +EXPORT_SYMBOL(objagg_hints_put); + +/** + * objagg_hints_stats_get - obtains stats of the hints instance + * @objagg_hints: hints instance + * + * Note: all locking must be provided by the caller. + * + * The returned structure contains statistics of all objects + * currently in use, ordered by following rules: + * 1) Root objects are always on lower indexes than the rest. + * 2) Objects with higher delta user count are always on lower + * indexes. + * 3) In case multiple objects have the same delta user count, + * the objects are ordered by user count. + * + * Returns a pointer to stats instance in case of success, + * otherwise it returns pointer error using ERR_PTR macro. + */ +const struct objagg_stats * +objagg_hints_stats_get(struct objagg_hints *objagg_hints) +{ + struct objagg_stats *objagg_stats; + struct objagg_hints_node *hnode; + int i; + + objagg_stats = kzalloc(struct_size(objagg_stats, stats_info, + objagg_hints->node_count), + GFP_KERNEL); + if (!objagg_stats) + return ERR_PTR(-ENOMEM); + + i = 0; + list_for_each_entry(hnode, &objagg_hints->node_list, list) { + memcpy(&objagg_stats->stats_info[i], &hnode->stats_info, + sizeof(objagg_stats->stats_info[0])); + if (objagg_stats->stats_info[i].is_root) + objagg_stats->root_count++; + i++; + } + objagg_stats->stats_info_count = i; + + sort(objagg_stats->stats_info, objagg_stats->stats_info_count, + sizeof(struct objagg_obj_stats_info), + objagg_stats_info_sort_cmp_func, NULL); + + return objagg_stats; +} +EXPORT_SYMBOL(objagg_hints_stats_get); + MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>"); MODULE_DESCRIPTION("Object aggregation manager"); diff --git a/lib/test_kmod.c b/lib/test_kmod.c index d82d022111e0..9cf77628fc91 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -632,7 +632,7 @@ static void __kmod_config_free(struct test_config *config) config->test_driver = NULL; kfree_const(config->test_fs); - config->test_driver = NULL; + config->test_fs = NULL; } static void kmod_config_free(struct kmod_test_device *test_dev) diff --git a/lib/test_objagg.c b/lib/test_objagg.c index ab57144bb0cd..72c1abfa154d 100644 --- a/lib/test_objagg.c +++ b/lib/test_objagg.c @@ -87,6 +87,15 @@ static void world_obj_put(struct world *world, struct objagg *objagg, #define MAX_KEY_ID_DIFF 5 +static bool delta_check(void *priv, const void *parent_obj, const void *obj) +{ + const struct tokey *parent_key = parent_obj; + const struct tokey *key = obj; + int diff = key->id - parent_key->id; + + return diff >= 0 && diff <= MAX_KEY_ID_DIFF; +} + static void *delta_create(void *priv, void *parent_obj, void *obj) { struct tokey *parent_key = parent_obj; @@ -95,7 +104,7 @@ static void *delta_create(void *priv, void *parent_obj, void *obj) int diff = key->id - parent_key->id; struct delta *delta; - if (diff < 0 || diff > MAX_KEY_ID_DIFF) + if (!delta_check(priv, parent_obj, obj)) return ERR_PTR(-EINVAL); delta = kzalloc(sizeof(*delta), GFP_KERNEL); @@ -115,7 +124,7 @@ static void delta_destroy(void *priv, void *delta_priv) kfree(delta); } -static void *root_create(void *priv, void *obj) +static void *root_create(void *priv, void *obj, unsigned int id) { struct world *world = priv; struct tokey *key = obj; @@ -268,6 +277,12 @@ stats_put: return err; } +static bool delta_check_dummy(void *priv, const void *parent_obj, + const void *obj) +{ + return false; +} + static void *delta_create_dummy(void *priv, void *parent_obj, void *obj) { return ERR_PTR(-EOPNOTSUPP); @@ -279,6 +294,7 @@ static void delta_destroy_dummy(void *priv, void *delta_priv) static const struct objagg_ops nodelta_ops = { .obj_size = sizeof(struct tokey), + .delta_check = delta_check_dummy, .delta_create = delta_create_dummy, .delta_destroy = delta_destroy_dummy, .root_create = root_create, @@ -292,7 +308,7 @@ static int test_nodelta(void) int i; int err; - objagg = objagg_create(&nodelta_ops, &world); + objagg = objagg_create(&nodelta_ops, NULL, &world); if (IS_ERR(objagg)) return PTR_ERR(objagg); @@ -357,6 +373,7 @@ err_stats_second_zero: static const struct objagg_ops delta_ops = { .obj_size = sizeof(struct tokey), + .delta_check = delta_check, .delta_create = delta_create, .delta_destroy = delta_destroy, .root_create = root_create, @@ -728,8 +745,10 @@ static int check_expect_stats(struct objagg *objagg, int err; stats = objagg_stats_get(objagg); - if (IS_ERR(stats)) + if (IS_ERR(stats)) { + *errmsg = "objagg_stats_get() failed."; return PTR_ERR(stats); + } err = __check_expect_stats(stats, expect_stats, errmsg); objagg_stats_put(stats); return err; @@ -769,7 +788,6 @@ static int test_delta_action_item(struct world *world, if (err) goto errout; - errmsg = NULL; err = check_expect_stats(objagg, &action_item->expect_stats, &errmsg); if (err) { pr_err("Key %u: Stats: %s\n", action_item->key_id, errmsg); @@ -793,7 +811,7 @@ static int test_delta(void) int i; int err; - objagg = objagg_create(&delta_ops, &world); + objagg = objagg_create(&delta_ops, NULL, &world); if (IS_ERR(objagg)) return PTR_ERR(objagg); @@ -815,6 +833,170 @@ err_do_action_item: return err; } +struct hints_case { + const unsigned int *key_ids; + size_t key_ids_count; + struct expect_stats expect_stats; + struct expect_stats expect_stats_hints; +}; + +static const unsigned int hints_case_key_ids[] = { + 1, 7, 3, 5, 3, 1, 30, 8, 8, 5, 6, 8, +}; + +static const struct hints_case hints_case = { + .key_ids = hints_case_key_ids, + .key_ids_count = ARRAY_SIZE(hints_case_key_ids), + .expect_stats = + EXPECT_STATS(7, ROOT(1, 2, 7), ROOT(7, 1, 4), ROOT(30, 1, 1), + DELTA(8, 3), DELTA(3, 2), + DELTA(5, 2), DELTA(6, 1)), + .expect_stats_hints = + EXPECT_STATS(7, ROOT(3, 2, 9), ROOT(1, 2, 2), ROOT(30, 1, 1), + DELTA(8, 3), DELTA(5, 2), + DELTA(6, 1), DELTA(7, 1)), +}; + +static void __pr_debug_stats(const struct objagg_stats *stats) +{ + int i; + + for (i = 0; i < stats->stats_info_count; i++) + pr_debug("Stat index %d key %u: u %d, d %d, %s\n", i, + obj_to_key_id(stats->stats_info[i].objagg_obj), + stats->stats_info[i].stats.user_count, + stats->stats_info[i].stats.delta_user_count, + stats->stats_info[i].is_root ? "root" : "noroot"); +} + +static void pr_debug_stats(struct objagg *objagg) +{ + const struct objagg_stats *stats; + + stats = objagg_stats_get(objagg); + if (IS_ERR(stats)) + return; + __pr_debug_stats(stats); + objagg_stats_put(stats); +} + +static void pr_debug_hints_stats(struct objagg_hints *objagg_hints) +{ + const struct objagg_stats *stats; + + stats = objagg_hints_stats_get(objagg_hints); + if (IS_ERR(stats)) + return; + __pr_debug_stats(stats); + objagg_stats_put(stats); +} + +static int check_expect_hints_stats(struct objagg_hints *objagg_hints, + const struct expect_stats *expect_stats, + const char **errmsg) +{ + const struct objagg_stats *stats; + int err; + + stats = objagg_hints_stats_get(objagg_hints); + if (IS_ERR(stats)) + return PTR_ERR(stats); + err = __check_expect_stats(stats, expect_stats, errmsg); + objagg_stats_put(stats); + return err; +} + +static int test_hints_case(const struct hints_case *hints_case) +{ + struct objagg_obj *objagg_obj; + struct objagg_hints *hints; + struct world world2 = {}; + struct world world = {}; + struct objagg *objagg2; + struct objagg *objagg; + const char *errmsg; + int i; + int err; + + objagg = objagg_create(&delta_ops, NULL, &world); + if (IS_ERR(objagg)) + return PTR_ERR(objagg); + + for (i = 0; i < hints_case->key_ids_count; i++) { + objagg_obj = world_obj_get(&world, objagg, + hints_case->key_ids[i]); + if (IS_ERR(objagg_obj)) { + err = PTR_ERR(objagg_obj); + goto err_world_obj_get; + } + } + + pr_debug_stats(objagg); + err = check_expect_stats(objagg, &hints_case->expect_stats, &errmsg); + if (err) { + pr_err("Stats: %s\n", errmsg); + goto err_check_expect_stats; + } + + hints = objagg_hints_get(objagg, OBJAGG_OPT_ALGO_SIMPLE_GREEDY); + if (IS_ERR(hints)) { + err = PTR_ERR(hints); + goto err_hints_get; + } + + pr_debug_hints_stats(hints); + err = check_expect_hints_stats(hints, &hints_case->expect_stats_hints, + &errmsg); + if (err) { + pr_err("Hints stats: %s\n", errmsg); + goto err_check_expect_hints_stats; + } + + objagg2 = objagg_create(&delta_ops, hints, &world2); + if (IS_ERR(objagg2)) + return PTR_ERR(objagg2); + + for (i = 0; i < hints_case->key_ids_count; i++) { + objagg_obj = world_obj_get(&world2, objagg2, + hints_case->key_ids[i]); + if (IS_ERR(objagg_obj)) { + err = PTR_ERR(objagg_obj); + goto err_world2_obj_get; + } + } + + pr_debug_stats(objagg2); + err = check_expect_stats(objagg2, &hints_case->expect_stats_hints, + &errmsg); + if (err) { + pr_err("Stats2: %s\n", errmsg); + goto err_check_expect_stats2; + } + + err = 0; + +err_check_expect_stats2: +err_world2_obj_get: + for (i--; i >= 0; i--) + world_obj_put(&world2, objagg, hints_case->key_ids[i]); + objagg_hints_put(hints); + objagg_destroy(objagg2); + i = hints_case->key_ids_count; +err_check_expect_hints_stats: +err_hints_get: +err_check_expect_stats: +err_world_obj_get: + for (i--; i >= 0; i--) + world_obj_put(&world, objagg, hints_case->key_ids[i]); + + objagg_destroy(objagg); + return err; +} +static int test_hints(void) +{ + return test_hints_case(&hints_case); +} + static int __init test_objagg_init(void) { int err; @@ -822,7 +1004,10 @@ static int __init test_objagg_init(void) err = test_nodelta(); if (err) return err; - return test_delta(); + err = test_delta(); + if (err) + return err; + return test_hints(); } static void __exit test_objagg_exit(void) diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 6a8ac7626797..e52f8cafe227 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -541,38 +541,45 @@ static unsigned int __init print_ht(struct rhltable *rhlt) static int __init test_insert_dup(struct test_obj_rhl *rhl_test_objects, int cnt, bool slow) { - struct rhltable rhlt; + struct rhltable *rhlt; unsigned int i, ret; const char *key; int err = 0; - err = rhltable_init(&rhlt, &test_rht_params_dup); - if (WARN_ON(err)) + rhlt = kmalloc(sizeof(*rhlt), GFP_KERNEL); + if (WARN_ON(!rhlt)) + return -EINVAL; + + err = rhltable_init(rhlt, &test_rht_params_dup); + if (WARN_ON(err)) { + kfree(rhlt); return err; + } for (i = 0; i < cnt; i++) { rhl_test_objects[i].value.tid = i; - key = rht_obj(&rhlt.ht, &rhl_test_objects[i].list_node.rhead); + key = rht_obj(&rhlt->ht, &rhl_test_objects[i].list_node.rhead); key += test_rht_params_dup.key_offset; if (slow) { - err = PTR_ERR(rhashtable_insert_slow(&rhlt.ht, key, + err = PTR_ERR(rhashtable_insert_slow(&rhlt->ht, key, &rhl_test_objects[i].list_node.rhead)); if (err == -EAGAIN) err = 0; } else - err = rhltable_insert(&rhlt, + err = rhltable_insert(rhlt, &rhl_test_objects[i].list_node, test_rht_params_dup); if (WARN(err, "error %d on element %d/%d (%s)\n", err, i, cnt, slow? "slow" : "fast")) goto skip_print; } - ret = print_ht(&rhlt); + ret = print_ht(rhlt); WARN(ret != cnt, "missing rhltable elements (%d != %d, %s)\n", ret, cnt, slow? "slow" : "fast"); skip_print: - rhltable_destroy(&rhlt); + rhltable_destroy(rhlt); + kfree(rhlt); return 0; } @@ -1674,7 +1674,8 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, if (!pmd_present(pmd)) return 0; - if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd))) { + if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) || + pmd_devmap(pmd))) { /* * NUMA hinting faults need to be handled in the GUP * slowpath for accounting purposes and so that they diff --git a/mm/hugetlb.c b/mm/hugetlb.c index df2e7dd5ff17..afef61656c1e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4268,7 +4268,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, break; } if (ret & VM_FAULT_RETRY) { - if (nonblocking) + if (nonblocking && + !(fault_flags & FAULT_FLAG_RETRY_NOWAIT)) *nonblocking = 0; *nr_pages = 0; /* diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile index 0a14fcff70ed..e2bb06c1b45e 100644 --- a/mm/kasan/Makefile +++ b/mm/kasan/Makefile @@ -5,6 +5,7 @@ UBSAN_SANITIZE_generic.o := n UBSAN_SANITIZE_tags.o := n KCOV_INSTRUMENT := n +CFLAGS_REMOVE_common.o = -pg CFLAGS_REMOVE_generic.o = -pg # Function splitter causes unnecessary splits in __asan_load1/__asan_store1 # see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533 diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 7c72f2a95785..831be5ff5f4d 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -372,7 +372,8 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail, if (fail || tk->addr_valid == 0) { pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n", pfn, tk->tsk->comm, tk->tsk->pid); - force_sig(SIGKILL, tk->tsk); + do_send_sig_info(SIGKILL, SEND_SIG_PRIV, + tk->tsk, PIDTYPE_PID); } /* diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b9a667d36c55..124e794867c5 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1233,7 +1233,8 @@ static bool is_pageblock_removable_nolock(struct page *page) bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) { struct page *page = pfn_to_page(start_pfn); - struct page *end_page = page + nr_pages; + unsigned long end_pfn = min(start_pfn + nr_pages, zone_end_pfn(page_zone(page))); + struct page *end_page = pfn_to_page(end_pfn); /* Check the starting page of each pageblock within the range */ for (; page < end_page; page = next_active_pageblock(page)) { @@ -1273,6 +1274,9 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, i++; if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn) continue; + /* Check if we got outside of the zone */ + if (zone && !zone_spans_pfn(zone, pfn + i)) + return 0; page = pfn_to_page(pfn + i); if (zone && page_zone(page) != zone) return 0; @@ -1301,23 +1305,27 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, static unsigned long scan_movable_pages(unsigned long start, unsigned long end) { unsigned long pfn; - struct page *page; + for (pfn = start; pfn < end; pfn++) { - if (pfn_valid(pfn)) { - page = pfn_to_page(pfn); - if (PageLRU(page)) - return pfn; - if (__PageMovable(page)) - return pfn; - if (PageHuge(page)) { - if (hugepage_migration_supported(page_hstate(page)) && - page_huge_active(page)) - return pfn; - else - pfn = round_up(pfn + 1, - 1 << compound_order(page)) - 1; - } - } + struct page *page, *head; + unsigned long skip; + + if (!pfn_valid(pfn)) + continue; + page = pfn_to_page(pfn); + if (PageLRU(page)) + return pfn; + if (__PageMovable(page)) + return pfn; + + if (!PageHuge(page)) + continue; + head = compound_head(page); + if (hugepage_migration_supported(page_hstate(head)) && + page_huge_active(head)) + return pfn; + skip = (1 << compound_order(head)) - (page - head); + pfn += skip - 1; } return 0; } @@ -1344,7 +1352,6 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) { unsigned long pfn; struct page *page; - int not_managed = 0; int ret = 0; LIST_HEAD(source); @@ -1392,7 +1399,6 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) else ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE); if (!ret) { /* Success */ - put_page(page); list_add_tail(&page->lru, &source); if (!__PageMovable(page)) inc_node_page_state(page, NR_ISOLATED_ANON + @@ -1401,22 +1407,10 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) } else { pr_warn("failed to isolate pfn %lx\n", pfn); dump_page(page, "isolation failed"); - put_page(page); - /* Because we don't have big zone->lock. we should - check this again here. */ - if (page_count(page)) { - not_managed++; - ret = -EBUSY; - break; - } } + put_page(page); } if (!list_empty(&source)) { - if (not_managed) { - putback_movable_pages(&source); - goto out; - } - /* Allocate a new page from the nearest neighbor node */ ret = migrate_pages(&source, new_node_page, NULL, 0, MIGRATE_SYNC, MR_MEMORY_HOTPLUG); @@ -1429,7 +1423,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) putback_movable_pages(&source); } } -out: + return ret; } @@ -1576,7 +1570,6 @@ static int __ref __offline_pages(unsigned long start_pfn, we assume this for now. .*/ if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, &valid_end)) { - mem_hotplug_done(); ret = -EINVAL; reason = "multizone range"; goto failed_removal; @@ -1591,7 +1584,6 @@ static int __ref __offline_pages(unsigned long start_pfn, MIGRATE_MOVABLE, SKIP_HWPOISON | REPORT_FAILURE); if (ret) { - mem_hotplug_done(); reason = "failure to isolate range"; goto failed_removal; } diff --git a/mm/migrate.c b/mm/migrate.c index a16b15090df3..d4fd680be3b0 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -709,7 +709,6 @@ static bool buffer_migrate_lock_buffers(struct buffer_head *head, /* Simple case, sync compaction */ if (mode != MIGRATE_ASYNC) { do { - get_bh(bh); lock_buffer(bh); bh = bh->b_this_page; @@ -720,18 +719,15 @@ static bool buffer_migrate_lock_buffers(struct buffer_head *head, /* async case, we cannot block on lock_buffer so use trylock_buffer */ do { - get_bh(bh); if (!trylock_buffer(bh)) { /* * We failed to lock the buffer and cannot stall in * async migration. Release the taken locks */ struct buffer_head *failed_bh = bh; - put_bh(failed_bh); bh = head; while (bh != failed_bh) { unlock_buffer(bh); - put_bh(bh); bh = bh->b_this_page; } return false; @@ -818,7 +814,6 @@ unlock_buffers: bh = head; do { unlock_buffer(bh); - put_bh(bh); bh = bh->b_this_page; } while (bh != head); @@ -1135,10 +1130,13 @@ out: * If migration is successful, decrease refcount of the newpage * which will not free the page because new page owner increased * refcounter. As well, if it is LRU page, add the page to LRU - * list in here. + * list in here. Use the old state of the isolated source page to + * determine if we migrated a LRU page. newpage was already unlocked + * and possibly modified by its owner - don't rely on the page + * state. */ if (rc == MIGRATEPAGE_SUCCESS) { - if (unlikely(__PageMovable(newpage))) + if (unlikely(!is_lru)) put_page(newpage); else putback_lru_page(newpage); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index f0e8cd9edb1a..26ea8636758f 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -647,8 +647,8 @@ static int oom_reaper(void *unused) static void wake_oom_reaper(struct task_struct *tsk) { - /* tsk is already queued? */ - if (tsk == oom_reaper_list || tsk->oom_reaper_list) + /* mm is already queued? */ + if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags)) return; get_task_struct(tsk); @@ -975,6 +975,13 @@ static void oom_kill_process(struct oom_control *oc, const char *message) * still freeing memory. */ read_lock(&tasklist_lock); + + /* + * The task 'p' might have already exited before reaching here. The + * put_task_struct() will free task_struct 'p' while the loop still try + * to access the field of 'p', so, get an extra reference. + */ + get_task_struct(p); for_each_thread(p, t) { list_for_each_entry(child, &t->children, sibling) { unsigned int child_points; @@ -994,6 +1001,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message) } } } + put_task_struct(p); read_unlock(&tasklist_lock); /* diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 35fdde041f5c..46285d28e43b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4675,11 +4675,11 @@ refill: /* Even if we own the page, we do not use atomic_set(). * This would break get_page_unless_zero() users. */ - page_ref_add(page, size - 1); + page_ref_add(page, size); /* reset page count bias and offset to start of new frag */ nc->pfmemalloc = page_is_pfmemalloc(page); - nc->pagecnt_bias = size; + nc->pagecnt_bias = size + 1; nc->offset = size; } @@ -4695,10 +4695,10 @@ refill: size = nc->size; #endif /* OK, page count is 0, we can safely set it */ - set_page_count(page, size); + set_page_count(page, size + 1); /* reset page count bias and offset to start of new frag */ - nc->pagecnt_bias = size; + nc->pagecnt_bias = size + 1; offset = size - fragsz; } diff --git a/mm/page_ext.c b/mm/page_ext.c index ae44f7adbe07..8c78b8d45117 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -398,10 +398,8 @@ void __init page_ext_init(void) * We know some arch can have a nodes layout such as * -------------pfn--------------> * N0 | N1 | N2 | N0 | N1 | N2|.... - * - * Take into account DEFERRED_STRUCT_PAGE_INIT. */ - if (early_pfn_to_nid(pfn) != nid) + if (pfn_to_nid(pfn) != nid) continue; if (init_section_page_ext(pfn, nid)) goto oom; diff --git a/mm/vmscan.c b/mm/vmscan.c index a714c4f800e9..e979705bbf32 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -491,16 +491,6 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, delta = freeable / 2; } - /* - * Make sure we apply some minimal pressure on default priority - * even on small cgroups. Stale objects are not only consuming memory - * by themselves, but can also hold a reference to a dying cgroup, - * preventing it from being reclaimed. A dying cgroup with all - * corresponding structures like per-cpu stats and kmem caches - * can be really big, so it may lead to a significant waste of memory. - */ - delta = max_t(unsigned long long, delta, min(freeable, batch_size)); - total_scan += delta; if (total_scan < 0) { pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n", diff --git a/net/atm/proc.c b/net/atm/proc.c index 0b0495a41bbe..d79221fd4dae 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -134,7 +134,8 @@ static void vcc_seq_stop(struct seq_file *seq, void *v) static void *vcc_seq_next(struct seq_file *seq, void *v, loff_t *pos) { v = vcc_walk(seq, 1); - *pos += !!PTR_ERR(v); + if (v) + (*pos)++; return v; } diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index 7b80f6f8d4dc..a9b7919c9de5 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -104,6 +104,9 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) ret = cfg80211_get_station(real_netdev, neigh->addr, &sinfo); + /* free the TID stats immediately */ + cfg80211_sinfo_release_content(&sinfo); + dev_put(real_netdev); if (ret == -ENOENT) { /* Node is not associated anymore! It would be diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 899ab051ffce..310a4f353008 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -1711,6 +1711,8 @@ static void batadv_dat_put_dhcp(struct batadv_priv *bat_priv, u8 *chaddr, batadv_dat_send_data(bat_priv, skb, yiaddr, vid, BATADV_P_DAT_DHT_PUT); batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_PUT); + consume_skb(skb); + batadv_dbg(BATADV_DBG_DAT, bat_priv, "Snooped from outgoing DHCPACK (server address): %pI4, %pM (vid: %i)\n", &ip_dst, hw_dst, batadv_print_vid(vid)); diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 0a6b9b30eabd..f5811f61aa92 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -47,7 +47,6 @@ #include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> -#include "gateway_common.h" #include "hard-interface.h" #include "log.h" #include "netlink.h" diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index 53d03adc9bfc..e064de45e22c 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -28,6 +28,7 @@ #include <linux/stddef.h> #include <linux/string.h> #include <uapi/linux/batadv_packet.h> +#include <uapi/linux/batman_adv.h> #include "gateway_client.h" #include "log.h" diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h index 89bae100a0b0..128467a0fb89 100644 --- a/net/batman-adv/gateway_common.h +++ b/net/batman-adv/gateway_common.h @@ -25,12 +25,6 @@ struct net_device; -enum batadv_gw_modes { - BATADV_GW_MODE_OFF, - BATADV_GW_MODE_CLIENT, - BATADV_GW_MODE_SERVER, -}; - /** * enum batadv_bandwidth_units - bandwidth unit types */ diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 28c1fb8d1af0..96ef7c70b4d9 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -20,7 +20,6 @@ #include "main.h" #include <linux/atomic.h> -#include <linux/bug.h> #include <linux/byteorder/generic.h> #include <linux/errno.h> #include <linux/gfp.h> @@ -179,8 +178,10 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) parent_dev = __dev_get_by_index((struct net *)parent_net, dev_get_iflink(net_dev)); /* if we got a NULL parent_dev there is something broken.. */ - if (WARN(!parent_dev, "Cannot find parent device")) + if (!parent_dev) { + pr_err("Cannot find parent device\n"); return false; + } if (batadv_mutual_parents(net_dev, net, parent_dev, parent_net)) return false; diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 5fe833cc9507..67a58da2e6a0 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -20,13 +20,17 @@ #include "main.h" #include <linux/atomic.h> +#include <linux/bitops.h> +#include <linux/bug.h> #include <linux/byteorder/generic.h> #include <linux/cache.h> +#include <linux/err.h> #include <linux/errno.h> #include <linux/export.h> #include <linux/genetlink.h> #include <linux/gfp.h> #include <linux/if_ether.h> +#include <linux/if_vlan.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/list.h> @@ -47,21 +51,54 @@ #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" #include "gateway_client.h" +#include "gateway_common.h" #include "hard-interface.h" +#include "log.h" #include "multicast.h" +#include "network-coding.h" #include "originator.h" #include "soft-interface.h" #include "tp_meter.h" #include "translation-table.h" +struct net; + struct genl_family batadv_netlink_family; /* multicast groups */ enum batadv_netlink_multicast_groups { + BATADV_NL_MCGRP_CONFIG, BATADV_NL_MCGRP_TPMETER, }; +/** + * enum batadv_genl_ops_flags - flags for genl_ops's internal_flags + */ +enum batadv_genl_ops_flags { + /** + * @BATADV_FLAG_NEED_MESH: request requires valid soft interface in + * attribute BATADV_ATTR_MESH_IFINDEX and expects a pointer to it to be + * saved in info->user_ptr[0] + */ + BATADV_FLAG_NEED_MESH = BIT(0), + + /** + * @BATADV_FLAG_NEED_HARDIF: request requires valid hard interface in + * attribute BATADV_ATTR_HARD_IFINDEX and expects a pointer to it to be + * saved in info->user_ptr[1] + */ + BATADV_FLAG_NEED_HARDIF = BIT(1), + + /** + * @BATADV_FLAG_NEED_VLAN: request requires valid vlan in + * attribute BATADV_ATTR_VLANID and expects a pointer to it to be + * saved in info->user_ptr[1] + */ + BATADV_FLAG_NEED_VLAN = BIT(2), +}; + static const struct genl_multicast_group batadv_netlink_mcgrps[] = { + [BATADV_NL_MCGRP_CONFIG] = { .name = BATADV_NL_MCAST_GROUP_CONFIG }, [BATADV_NL_MCGRP_TPMETER] = { .name = BATADV_NL_MCAST_GROUP_TPMETER }, }; @@ -104,6 +141,26 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_DAT_CACHE_VID] = { .type = NLA_U16 }, [BATADV_ATTR_MCAST_FLAGS] = { .type = NLA_U32 }, [BATADV_ATTR_MCAST_FLAGS_PRIV] = { .type = NLA_U32 }, + [BATADV_ATTR_VLANID] = { .type = NLA_U16 }, + [BATADV_ATTR_AGGREGATED_OGMS_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_AP_ISOLATION_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_ISOLATION_MARK] = { .type = NLA_U32 }, + [BATADV_ATTR_ISOLATION_MASK] = { .type = NLA_U32 }, + [BATADV_ATTR_BONDING_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_FRAGMENTATION_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_GW_BANDWIDTH_DOWN] = { .type = NLA_U32 }, + [BATADV_ATTR_GW_BANDWIDTH_UP] = { .type = NLA_U32 }, + [BATADV_ATTR_GW_MODE] = { .type = NLA_U8 }, + [BATADV_ATTR_GW_SEL_CLASS] = { .type = NLA_U32 }, + [BATADV_ATTR_HOP_PENALTY] = { .type = NLA_U8 }, + [BATADV_ATTR_LOG_LEVEL] = { .type = NLA_U32 }, + [BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_NETWORK_CODING_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_ORIG_INTERVAL] = { .type = NLA_U32 }, + [BATADV_ATTR_ELP_INTERVAL] = { .type = NLA_U32 }, + [BATADV_ATTR_THROUGHPUT_OVERRIDE] = { .type = NLA_U32 }, }; /** @@ -122,20 +179,75 @@ batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype) } /** - * batadv_netlink_mesh_info_put() - fill in generic information about mesh - * interface - * @msg: netlink message to be sent back - * @soft_iface: interface for which the data should be taken + * batadv_netlink_mesh_fill_ap_isolation() - Add ap_isolation softif attribute + * @msg: Netlink message to dump into + * @bat_priv: the bat priv with all the soft interface information * - * Return: 0 on success, < 0 on error + * Return: 0 on success or negative error number in case of failure */ -static int -batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface) +static int batadv_netlink_mesh_fill_ap_isolation(struct sk_buff *msg, + struct batadv_priv *bat_priv) +{ + struct batadv_softif_vlan *vlan; + u8 ap_isolation; + + vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); + if (!vlan) + return 0; + + ap_isolation = atomic_read(&vlan->ap_isolation); + batadv_softif_vlan_put(vlan); + + return nla_put_u8(msg, BATADV_ATTR_AP_ISOLATION_ENABLED, + !!ap_isolation); +} + +/** + * batadv_option_set_ap_isolation() - Set ap_isolation from genl msg + * @attr: parsed BATADV_ATTR_AP_ISOLATION_ENABLED attribute + * @bat_priv: the bat priv with all the soft interface information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_set_mesh_ap_isolation(struct nlattr *attr, + struct batadv_priv *bat_priv) +{ + struct batadv_softif_vlan *vlan; + + vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); + if (!vlan) + return -ENOENT; + + atomic_set(&vlan->ap_isolation, !!nla_get_u8(attr)); + batadv_softif_vlan_put(vlan); + + return 0; +} + +/** + * batadv_netlink_mesh_fill() - Fill message with mesh attributes + * @msg: Netlink message to dump into + * @bat_priv: the bat priv with all the soft interface information + * @cmd: type of message to generate + * @portid: Port making netlink request + * @seq: sequence number for message + * @flags: Additional flags for message + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_mesh_fill(struct sk_buff *msg, + struct batadv_priv *bat_priv, + enum batadv_nl_commands cmd, + u32 portid, u32 seq, int flags) { - struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct net_device *soft_iface = bat_priv->soft_iface; struct batadv_hard_iface *primary_if = NULL; struct net_device *hard_iface; - int ret = -ENOBUFS; + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd); + if (!hdr) + return -ENOBUFS; if (nla_put_string(msg, BATADV_ATTR_VERSION, BATADV_SOURCE_VERSION) || nla_put_string(msg, BATADV_ATTR_ALGO_NAME, @@ -146,16 +258,16 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface) soft_iface->dev_addr) || nla_put_u8(msg, BATADV_ATTR_TT_TTVN, (u8)atomic_read(&bat_priv->tt.vn))) - goto out; + goto nla_put_failure; #ifdef CONFIG_BATMAN_ADV_BLA if (nla_put_u16(msg, BATADV_ATTR_BLA_CRC, ntohs(bat_priv->bla.claim_dest.group))) - goto out; + goto nla_put_failure; #endif if (batadv_mcast_mesh_info_put(msg, bat_priv)) - goto out; + goto nla_put_failure; primary_if = batadv_primary_if_get_selected(bat_priv); if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) { @@ -167,77 +279,345 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface) hard_iface->name) || nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN, hard_iface->dev_addr)) - goto out; + goto nla_put_failure; } - ret = 0; + if (nla_put_u8(msg, BATADV_ATTR_AGGREGATED_OGMS_ENABLED, + !!atomic_read(&bat_priv->aggregated_ogms))) + goto nla_put_failure; + + if (batadv_netlink_mesh_fill_ap_isolation(msg, bat_priv)) + goto nla_put_failure; + + if (nla_put_u32(msg, BATADV_ATTR_ISOLATION_MARK, + bat_priv->isolation_mark)) + goto nla_put_failure; + + if (nla_put_u32(msg, BATADV_ATTR_ISOLATION_MASK, + bat_priv->isolation_mark_mask)) + goto nla_put_failure; + + if (nla_put_u8(msg, BATADV_ATTR_BONDING_ENABLED, + !!atomic_read(&bat_priv->bonding))) + goto nla_put_failure; + +#ifdef CONFIG_BATMAN_ADV_BLA + if (nla_put_u8(msg, BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED, + !!atomic_read(&bat_priv->bridge_loop_avoidance))) + goto nla_put_failure; +#endif /* CONFIG_BATMAN_ADV_BLA */ + +#ifdef CONFIG_BATMAN_ADV_DAT + if (nla_put_u8(msg, BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED, + !!atomic_read(&bat_priv->distributed_arp_table))) + goto nla_put_failure; +#endif /* CONFIG_BATMAN_ADV_DAT */ + + if (nla_put_u8(msg, BATADV_ATTR_FRAGMENTATION_ENABLED, + !!atomic_read(&bat_priv->fragmentation))) + goto nla_put_failure; + + if (nla_put_u32(msg, BATADV_ATTR_GW_BANDWIDTH_DOWN, + atomic_read(&bat_priv->gw.bandwidth_down))) + goto nla_put_failure; + + if (nla_put_u32(msg, BATADV_ATTR_GW_BANDWIDTH_UP, + atomic_read(&bat_priv->gw.bandwidth_up))) + goto nla_put_failure; + + if (nla_put_u8(msg, BATADV_ATTR_GW_MODE, + atomic_read(&bat_priv->gw.mode))) + goto nla_put_failure; + + if (bat_priv->algo_ops->gw.get_best_gw_node && + bat_priv->algo_ops->gw.is_eligible) { + /* GW selection class is not available if the routing algorithm + * in use does not implement the GW API + */ + if (nla_put_u32(msg, BATADV_ATTR_GW_SEL_CLASS, + atomic_read(&bat_priv->gw.sel_class))) + goto nla_put_failure; + } + + if (nla_put_u8(msg, BATADV_ATTR_HOP_PENALTY, + atomic_read(&bat_priv->hop_penalty))) + goto nla_put_failure; + +#ifdef CONFIG_BATMAN_ADV_DEBUG + if (nla_put_u32(msg, BATADV_ATTR_LOG_LEVEL, + atomic_read(&bat_priv->log_level))) + goto nla_put_failure; +#endif /* CONFIG_BATMAN_ADV_DEBUG */ + +#ifdef CONFIG_BATMAN_ADV_MCAST + if (nla_put_u8(msg, BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED, + !atomic_read(&bat_priv->multicast_mode))) + goto nla_put_failure; +#endif /* CONFIG_BATMAN_ADV_MCAST */ + +#ifdef CONFIG_BATMAN_ADV_NC + if (nla_put_u8(msg, BATADV_ATTR_NETWORK_CODING_ENABLED, + !!atomic_read(&bat_priv->network_coding))) + goto nla_put_failure; +#endif /* CONFIG_BATMAN_ADV_NC */ + + if (nla_put_u32(msg, BATADV_ATTR_ORIG_INTERVAL, + atomic_read(&bat_priv->orig_interval))) + goto nla_put_failure; - out: if (primary_if) batadv_hardif_put(primary_if); - return ret; + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + if (primary_if) + batadv_hardif_put(primary_if); + + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; } /** - * batadv_netlink_get_mesh_info() - handle incoming BATADV_CMD_GET_MESH_INFO - * netlink request - * @skb: received netlink message - * @info: receiver information + * batadv_netlink_notify_mesh() - send softif attributes to listener + * @bat_priv: the bat priv with all the soft interface information * * Return: 0 on success, < 0 on error */ -static int -batadv_netlink_get_mesh_info(struct sk_buff *skb, struct genl_info *info) +int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv) { - struct net *net = genl_info_net(info); - struct net_device *soft_iface; - struct sk_buff *msg = NULL; - void *msg_head; - int ifindex; + struct sk_buff *msg; int ret; - if (!info->attrs[BATADV_ATTR_MESH_IFINDEX]) - return -EINVAL; - - ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]); - if (!ifindex) - return -EINVAL; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; - soft_iface = dev_get_by_index(net, ifindex); - if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { - ret = -ENODEV; - goto out; + ret = batadv_netlink_mesh_fill(msg, bat_priv, BATADV_CMD_SET_MESH, + 0, 0, 0); + if (ret < 0) { + nlmsg_free(msg); + return ret; } + genlmsg_multicast_netns(&batadv_netlink_family, + dev_net(bat_priv->soft_iface), msg, 0, + BATADV_NL_MCGRP_CONFIG, GFP_KERNEL); + + return 0; +} + +/** + * batadv_netlink_get_mesh() - Get softif attributes + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_get_mesh(struct sk_buff *skb, struct genl_info *info) +{ + struct batadv_priv *bat_priv = info->user_ptr[0]; + struct sk_buff *msg; + int ret; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) { - ret = -ENOMEM; - goto out; + if (!msg) + return -ENOMEM; + + ret = batadv_netlink_mesh_fill(msg, bat_priv, BATADV_CMD_GET_MESH, + info->snd_portid, info->snd_seq, 0); + if (ret < 0) { + nlmsg_free(msg); + return ret; } - msg_head = genlmsg_put(msg, info->snd_portid, info->snd_seq, - &batadv_netlink_family, 0, - BATADV_CMD_GET_MESH_INFO); - if (!msg_head) { - ret = -ENOBUFS; - goto out; + ret = genlmsg_reply(msg, info); + + return ret; +} + +/** + * batadv_netlink_set_mesh() - Set softif attributes + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) +{ + struct batadv_priv *bat_priv = info->user_ptr[0]; + struct nlattr *attr; + + if (info->attrs[BATADV_ATTR_AGGREGATED_OGMS_ENABLED]) { + attr = info->attrs[BATADV_ATTR_AGGREGATED_OGMS_ENABLED]; + + atomic_set(&bat_priv->aggregated_ogms, !!nla_get_u8(attr)); } - ret = batadv_netlink_mesh_info_put(msg, soft_iface); + if (info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]) { + attr = info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]; - out: - if (soft_iface) - dev_put(soft_iface); + batadv_netlink_set_mesh_ap_isolation(attr, bat_priv); + } - if (ret) { - if (msg) - nlmsg_free(msg); - return ret; + if (info->attrs[BATADV_ATTR_ISOLATION_MARK]) { + attr = info->attrs[BATADV_ATTR_ISOLATION_MARK]; + + bat_priv->isolation_mark = nla_get_u32(attr); } - genlmsg_end(msg, msg_head); - return genlmsg_reply(msg, info); + if (info->attrs[BATADV_ATTR_ISOLATION_MASK]) { + attr = info->attrs[BATADV_ATTR_ISOLATION_MASK]; + + bat_priv->isolation_mark_mask = nla_get_u32(attr); + } + + if (info->attrs[BATADV_ATTR_BONDING_ENABLED]) { + attr = info->attrs[BATADV_ATTR_BONDING_ENABLED]; + + atomic_set(&bat_priv->bonding, !!nla_get_u8(attr)); + } + +#ifdef CONFIG_BATMAN_ADV_BLA + if (info->attrs[BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED]) { + attr = info->attrs[BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED]; + + atomic_set(&bat_priv->bridge_loop_avoidance, + !!nla_get_u8(attr)); + batadv_bla_status_update(bat_priv->soft_iface); + } +#endif /* CONFIG_BATMAN_ADV_BLA */ + +#ifdef CONFIG_BATMAN_ADV_DAT + if (info->attrs[BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED]) { + attr = info->attrs[BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED]; + + atomic_set(&bat_priv->distributed_arp_table, + !!nla_get_u8(attr)); + batadv_dat_status_update(bat_priv->soft_iface); + } +#endif /* CONFIG_BATMAN_ADV_DAT */ + + if (info->attrs[BATADV_ATTR_FRAGMENTATION_ENABLED]) { + attr = info->attrs[BATADV_ATTR_FRAGMENTATION_ENABLED]; + + atomic_set(&bat_priv->fragmentation, !!nla_get_u8(attr)); + batadv_update_min_mtu(bat_priv->soft_iface); + } + + if (info->attrs[BATADV_ATTR_GW_BANDWIDTH_DOWN]) { + attr = info->attrs[BATADV_ATTR_GW_BANDWIDTH_DOWN]; + + atomic_set(&bat_priv->gw.bandwidth_down, nla_get_u32(attr)); + batadv_gw_tvlv_container_update(bat_priv); + } + + if (info->attrs[BATADV_ATTR_GW_BANDWIDTH_UP]) { + attr = info->attrs[BATADV_ATTR_GW_BANDWIDTH_UP]; + + atomic_set(&bat_priv->gw.bandwidth_up, nla_get_u32(attr)); + batadv_gw_tvlv_container_update(bat_priv); + } + + if (info->attrs[BATADV_ATTR_GW_MODE]) { + u8 gw_mode; + + attr = info->attrs[BATADV_ATTR_GW_MODE]; + gw_mode = nla_get_u8(attr); + + if (gw_mode <= BATADV_GW_MODE_SERVER) { + /* Invoking batadv_gw_reselect() is not enough to really + * de-select the current GW. It will only instruct the + * gateway client code to perform a re-election the next + * time that this is needed. + * + * When gw client mode is being switched off the current + * GW must be de-selected explicitly otherwise no GW_ADD + * uevent is thrown on client mode re-activation. This + * is operation is performed in + * batadv_gw_check_client_stop(). + */ + batadv_gw_reselect(bat_priv); + + /* always call batadv_gw_check_client_stop() before + * changing the gateway state + */ + batadv_gw_check_client_stop(bat_priv); + atomic_set(&bat_priv->gw.mode, gw_mode); + batadv_gw_tvlv_container_update(bat_priv); + } + } + + if (info->attrs[BATADV_ATTR_GW_SEL_CLASS] && + bat_priv->algo_ops->gw.get_best_gw_node && + bat_priv->algo_ops->gw.is_eligible) { + /* setting the GW selection class is allowed only if the routing + * algorithm in use implements the GW API + */ + + u32 sel_class_max = 0xffffffffu; + u32 sel_class; + + attr = info->attrs[BATADV_ATTR_GW_SEL_CLASS]; + sel_class = nla_get_u32(attr); + + if (!bat_priv->algo_ops->gw.store_sel_class) + sel_class_max = BATADV_TQ_MAX_VALUE; + + if (sel_class >= 1 && sel_class <= sel_class_max) { + atomic_set(&bat_priv->gw.sel_class, sel_class); + batadv_gw_reselect(bat_priv); + } + } + + if (info->attrs[BATADV_ATTR_HOP_PENALTY]) { + attr = info->attrs[BATADV_ATTR_HOP_PENALTY]; + + atomic_set(&bat_priv->hop_penalty, nla_get_u8(attr)); + } + +#ifdef CONFIG_BATMAN_ADV_DEBUG + if (info->attrs[BATADV_ATTR_LOG_LEVEL]) { + attr = info->attrs[BATADV_ATTR_LOG_LEVEL]; + + atomic_set(&bat_priv->log_level, + nla_get_u32(attr) & BATADV_DBG_ALL); + } +#endif /* CONFIG_BATMAN_ADV_DEBUG */ + +#ifdef CONFIG_BATMAN_ADV_MCAST + if (info->attrs[BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED]) { + attr = info->attrs[BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED]; + + atomic_set(&bat_priv->multicast_mode, !nla_get_u8(attr)); + } +#endif /* CONFIG_BATMAN_ADV_MCAST */ + +#ifdef CONFIG_BATMAN_ADV_NC + if (info->attrs[BATADV_ATTR_NETWORK_CODING_ENABLED]) { + attr = info->attrs[BATADV_ATTR_NETWORK_CODING_ENABLED]; + + atomic_set(&bat_priv->network_coding, !!nla_get_u8(attr)); + batadv_nc_status_update(bat_priv->soft_iface); + } +#endif /* CONFIG_BATMAN_ADV_NC */ + + if (info->attrs[BATADV_ATTR_ORIG_INTERVAL]) { + u32 orig_interval; + + attr = info->attrs[BATADV_ATTR_ORIG_INTERVAL]; + orig_interval = nla_get_u32(attr); + + orig_interval = min_t(u32, orig_interval, INT_MAX); + orig_interval = max_t(u32, orig_interval, 2 * BATADV_JITTER); + + atomic_set(&bat_priv->orig_interval, orig_interval); + } + + batadv_netlink_notify_mesh(bat_priv); + + return 0; } /** @@ -329,40 +709,24 @@ err_genlmsg: static int batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info) { - struct net *net = genl_info_net(info); - struct net_device *soft_iface; - struct batadv_priv *bat_priv; + struct batadv_priv *bat_priv = info->user_ptr[0]; struct sk_buff *msg = NULL; u32 test_length; void *msg_head; - int ifindex; u32 cookie; u8 *dst; int ret; - if (!info->attrs[BATADV_ATTR_MESH_IFINDEX]) - return -EINVAL; - if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS]) return -EINVAL; if (!info->attrs[BATADV_ATTR_TPMETER_TEST_TIME]) return -EINVAL; - ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]); - if (!ifindex) - return -EINVAL; - dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]); test_length = nla_get_u32(info->attrs[BATADV_ATTR_TPMETER_TEST_TIME]); - soft_iface = dev_get_by_index(net, ifindex); - if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { - ret = -ENODEV; - goto out; - } - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; @@ -377,15 +741,11 @@ batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info) goto out; } - bat_priv = netdev_priv(soft_iface); batadv_tp_start(bat_priv, dst, test_length, &cookie); ret = batadv_netlink_tp_meter_put(msg, cookie); out: - if (soft_iface) - dev_put(soft_iface); - if (ret) { if (msg) nlmsg_free(msg); @@ -406,65 +766,53 @@ batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info) static int batadv_netlink_tp_meter_cancel(struct sk_buff *skb, struct genl_info *info) { - struct net *net = genl_info_net(info); - struct net_device *soft_iface; - struct batadv_priv *bat_priv; - int ifindex; + struct batadv_priv *bat_priv = info->user_ptr[0]; u8 *dst; int ret = 0; - if (!info->attrs[BATADV_ATTR_MESH_IFINDEX]) - return -EINVAL; - if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS]) return -EINVAL; - ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]); - if (!ifindex) - return -EINVAL; - dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]); - soft_iface = dev_get_by_index(net, ifindex); - if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { - ret = -ENODEV; - goto out; - } - - bat_priv = netdev_priv(soft_iface); batadv_tp_stop(bat_priv, dst, BATADV_TP_REASON_CANCEL); -out: - if (soft_iface) - dev_put(soft_iface); - return ret; } /** - * batadv_netlink_dump_hardif_entry() - Dump one hard interface into a message + * batadv_netlink_hardif_fill() - Fill message with hardif attributes * @msg: Netlink message to dump into + * @bat_priv: the bat priv with all the soft interface information + * @hard_iface: hard interface which was modified + * @cmd: type of message to generate * @portid: Port making netlink request + * @seq: sequence number for message + * @flags: Additional flags for message * @cb: Control block containing additional options - * @hard_iface: Hard interface to dump * - * Return: error code, or 0 on success + * Return: 0 on success or negative error number in case of failure */ -static int -batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid, - struct netlink_callback *cb, - struct batadv_hard_iface *hard_iface) +static int batadv_netlink_hardif_fill(struct sk_buff *msg, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *hard_iface, + enum batadv_nl_commands cmd, + u32 portid, u32 seq, int flags, + struct netlink_callback *cb) { struct net_device *net_dev = hard_iface->net_dev; void *hdr; - hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq, - &batadv_netlink_family, NLM_F_MULTI, - BATADV_CMD_GET_HARDIFS); + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd); if (!hdr) - return -EMSGSIZE; + return -ENOBUFS; + + if (cb) + genl_dump_check_consistent(cb, hdr); - genl_dump_check_consistent(cb, hdr); + if (nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, + bat_priv->soft_iface->ifindex)) + goto nla_put_failure; if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, net_dev->ifindex) || @@ -479,27 +827,137 @@ batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid, goto nla_put_failure; } +#ifdef CONFIG_BATMAN_ADV_BATMAN_V + if (nla_put_u32(msg, BATADV_ATTR_ELP_INTERVAL, + atomic_read(&hard_iface->bat_v.elp_interval))) + goto nla_put_failure; + + if (nla_put_u32(msg, BATADV_ATTR_THROUGHPUT_OVERRIDE, + atomic_read(&hard_iface->bat_v.throughput_override))) + goto nla_put_failure; +#endif /* CONFIG_BATMAN_ADV_BATMAN_V */ + genlmsg_end(msg, hdr); return 0; - nla_put_failure: +nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } /** - * batadv_netlink_dump_hardifs() - Dump all hard interface into a messages + * batadv_netlink_notify_hardif() - send hardif attributes to listener + * @bat_priv: the bat priv with all the soft interface information + * @hard_iface: hard interface which was modified + * + * Return: 0 on success, < 0 on error + */ +int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv, + struct batadv_hard_iface *hard_iface) +{ + struct sk_buff *msg; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = batadv_netlink_hardif_fill(msg, bat_priv, hard_iface, + BATADV_CMD_SET_HARDIF, 0, 0, 0, NULL); + if (ret < 0) { + nlmsg_free(msg); + return ret; + } + + genlmsg_multicast_netns(&batadv_netlink_family, + dev_net(bat_priv->soft_iface), msg, 0, + BATADV_NL_MCGRP_CONFIG, GFP_KERNEL); + + return 0; +} + +/** + * batadv_netlink_get_hardif() - Get hardif attributes + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_get_hardif(struct sk_buff *skb, + struct genl_info *info) +{ + struct batadv_hard_iface *hard_iface = info->user_ptr[1]; + struct batadv_priv *bat_priv = info->user_ptr[0]; + struct sk_buff *msg; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = batadv_netlink_hardif_fill(msg, bat_priv, hard_iface, + BATADV_CMD_GET_HARDIF, + info->snd_portid, info->snd_seq, 0, + NULL); + if (ret < 0) { + nlmsg_free(msg); + return ret; + } + + ret = genlmsg_reply(msg, info); + + return ret; +} + +/** + * batadv_netlink_set_hardif() - Set hardif attributes + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_set_hardif(struct sk_buff *skb, + struct genl_info *info) +{ + struct batadv_hard_iface *hard_iface = info->user_ptr[1]; + struct batadv_priv *bat_priv = info->user_ptr[0]; + +#ifdef CONFIG_BATMAN_ADV_BATMAN_V + struct nlattr *attr; + + if (info->attrs[BATADV_ATTR_ELP_INTERVAL]) { + attr = info->attrs[BATADV_ATTR_ELP_INTERVAL]; + + atomic_set(&hard_iface->bat_v.elp_interval, nla_get_u32(attr)); + } + + if (info->attrs[BATADV_ATTR_THROUGHPUT_OVERRIDE]) { + attr = info->attrs[BATADV_ATTR_THROUGHPUT_OVERRIDE]; + + atomic_set(&hard_iface->bat_v.throughput_override, + nla_get_u32(attr)); + } +#endif /* CONFIG_BATMAN_ADV_BATMAN_V */ + + batadv_netlink_notify_hardif(bat_priv, hard_iface); + + return 0; +} + +/** + * batadv_netlink_dump_hardif() - Dump all hard interface into a messages * @msg: Netlink message to dump into * @cb: Parameters from query * * Return: error code, or length of reply message on success */ static int -batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb) +batadv_netlink_dump_hardif(struct sk_buff *msg, struct netlink_callback *cb) { struct net *net = sock_net(cb->skb->sk); struct net_device *soft_iface; struct batadv_hard_iface *hard_iface; + struct batadv_priv *bat_priv; int ifindex; int portid = NETLINK_CB(cb->skb).portid; int skip = cb->args[0]; @@ -519,6 +977,8 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb) return -ENODEV; } + bat_priv = netdev_priv(soft_iface); + rtnl_lock(); cb->seq = batadv_hardif_generation << 1 | 1; @@ -529,8 +989,10 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb) if (i++ < skip) continue; - if (batadv_netlink_dump_hardif_entry(msg, portid, cb, - hard_iface)) { + if (batadv_netlink_hardif_fill(msg, bat_priv, hard_iface, + BATADV_CMD_GET_HARDIF, + portid, cb->nlh->nlmsg_seq, + NLM_F_MULTI, cb)) { i--; break; } @@ -545,24 +1007,361 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb) return msg->len; } +/** + * batadv_netlink_vlan_fill() - Fill message with vlan attributes + * @msg: Netlink message to dump into + * @bat_priv: the bat priv with all the soft interface information + * @vlan: vlan which was modified + * @cmd: type of message to generate + * @portid: Port making netlink request + * @seq: sequence number for message + * @flags: Additional flags for message + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_vlan_fill(struct sk_buff *msg, + struct batadv_priv *bat_priv, + struct batadv_softif_vlan *vlan, + enum batadv_nl_commands cmd, + u32 portid, u32 seq, int flags) +{ + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd); + if (!hdr) + return -ENOBUFS; + + if (nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, + bat_priv->soft_iface->ifindex)) + goto nla_put_failure; + + if (nla_put_u32(msg, BATADV_ATTR_VLANID, vlan->vid & VLAN_VID_MASK)) + goto nla_put_failure; + + if (nla_put_u8(msg, BATADV_ATTR_AP_ISOLATION_ENABLED, + !!atomic_read(&vlan->ap_isolation))) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_netlink_notify_vlan() - send vlan attributes to listener + * @bat_priv: the bat priv with all the soft interface information + * @vlan: vlan which was modified + * + * Return: 0 on success, < 0 on error + */ +int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv, + struct batadv_softif_vlan *vlan) +{ + struct sk_buff *msg; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = batadv_netlink_vlan_fill(msg, bat_priv, vlan, + BATADV_CMD_SET_VLAN, 0, 0, 0); + if (ret < 0) { + nlmsg_free(msg); + return ret; + } + + genlmsg_multicast_netns(&batadv_netlink_family, + dev_net(bat_priv->soft_iface), msg, 0, + BATADV_NL_MCGRP_CONFIG, GFP_KERNEL); + + return 0; +} + +/** + * batadv_netlink_get_vlan() - Get vlan attributes + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_get_vlan(struct sk_buff *skb, struct genl_info *info) +{ + struct batadv_softif_vlan *vlan = info->user_ptr[1]; + struct batadv_priv *bat_priv = info->user_ptr[0]; + struct sk_buff *msg; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = batadv_netlink_vlan_fill(msg, bat_priv, vlan, BATADV_CMD_GET_VLAN, + info->snd_portid, info->snd_seq, 0); + if (ret < 0) { + nlmsg_free(msg); + return ret; + } + + ret = genlmsg_reply(msg, info); + + return ret; +} + +/** + * batadv_netlink_set_vlan() - Get vlan attributes + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_set_vlan(struct sk_buff *skb, struct genl_info *info) +{ + struct batadv_softif_vlan *vlan = info->user_ptr[1]; + struct batadv_priv *bat_priv = info->user_ptr[0]; + struct nlattr *attr; + + if (info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]) { + attr = info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]; + + atomic_set(&vlan->ap_isolation, !!nla_get_u8(attr)); + } + + batadv_netlink_notify_vlan(bat_priv, vlan); + + return 0; +} + +/** + * batadv_get_softif_from_info() - Retrieve soft interface from genl attributes + * @net: the applicable net namespace + * @info: receiver information + * + * Return: Pointer to soft interface (with increased refcnt) on success, error + * pointer on error + */ +static struct net_device * +batadv_get_softif_from_info(struct net *net, struct genl_info *info) +{ + struct net_device *soft_iface; + int ifindex; + + if (!info->attrs[BATADV_ATTR_MESH_IFINDEX]) + return ERR_PTR(-EINVAL); + + ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]); + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface) + return ERR_PTR(-ENODEV); + + if (!batadv_softif_is_valid(soft_iface)) + goto err_put_softif; + + return soft_iface; + +err_put_softif: + dev_put(soft_iface); + + return ERR_PTR(-EINVAL); +} + +/** + * batadv_get_hardif_from_info() - Retrieve hardif from genl attributes + * @bat_priv: the bat priv with all the soft interface information + * @net: the applicable net namespace + * @info: receiver information + * + * Return: Pointer to hard interface (with increased refcnt) on success, error + * pointer on error + */ +static struct batadv_hard_iface * +batadv_get_hardif_from_info(struct batadv_priv *bat_priv, struct net *net, + struct genl_info *info) +{ + struct batadv_hard_iface *hard_iface; + struct net_device *hard_dev; + unsigned int hardif_index; + + if (!info->attrs[BATADV_ATTR_HARD_IFINDEX]) + return ERR_PTR(-EINVAL); + + hardif_index = nla_get_u32(info->attrs[BATADV_ATTR_HARD_IFINDEX]); + + hard_dev = dev_get_by_index(net, hardif_index); + if (!hard_dev) + return ERR_PTR(-ENODEV); + + hard_iface = batadv_hardif_get_by_netdev(hard_dev); + if (!hard_iface) + goto err_put_harddev; + + if (hard_iface->soft_iface != bat_priv->soft_iface) + goto err_put_hardif; + + /* hard_dev is referenced by hard_iface and not needed here */ + dev_put(hard_dev); + + return hard_iface; + +err_put_hardif: + batadv_hardif_put(hard_iface); +err_put_harddev: + dev_put(hard_dev); + + return ERR_PTR(-EINVAL); +} + +/** + * batadv_get_vlan_from_info() - Retrieve vlan from genl attributes + * @bat_priv: the bat priv with all the soft interface information + * @net: the applicable net namespace + * @info: receiver information + * + * Return: Pointer to vlan on success (with increased refcnt), error pointer + * on error + */ +static struct batadv_softif_vlan * +batadv_get_vlan_from_info(struct batadv_priv *bat_priv, struct net *net, + struct genl_info *info) +{ + struct batadv_softif_vlan *vlan; + u16 vid; + + if (!info->attrs[BATADV_ATTR_VLANID]) + return ERR_PTR(-EINVAL); + + vid = nla_get_u16(info->attrs[BATADV_ATTR_VLANID]); + + vlan = batadv_softif_vlan_get(bat_priv, vid | BATADV_VLAN_HAS_TAG); + if (!vlan) + return ERR_PTR(-ENOENT); + + return vlan; +} + +/** + * batadv_pre_doit() - Prepare batman-adv genl doit request + * @ops: requested netlink operation + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, + struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct batadv_hard_iface *hard_iface; + struct batadv_priv *bat_priv = NULL; + struct batadv_softif_vlan *vlan; + struct net_device *soft_iface; + u8 user_ptr1_flags; + u8 mesh_dep_flags; + int ret; + + user_ptr1_flags = BATADV_FLAG_NEED_HARDIF | BATADV_FLAG_NEED_VLAN; + if (WARN_ON(hweight8(ops->internal_flags & user_ptr1_flags) > 1)) + return -EINVAL; + + mesh_dep_flags = BATADV_FLAG_NEED_HARDIF | BATADV_FLAG_NEED_VLAN; + if (WARN_ON((ops->internal_flags & mesh_dep_flags) && + (~ops->internal_flags & BATADV_FLAG_NEED_MESH))) + return -EINVAL; + + if (ops->internal_flags & BATADV_FLAG_NEED_MESH) { + soft_iface = batadv_get_softif_from_info(net, info); + if (IS_ERR(soft_iface)) + return PTR_ERR(soft_iface); + + bat_priv = netdev_priv(soft_iface); + info->user_ptr[0] = bat_priv; + } + + if (ops->internal_flags & BATADV_FLAG_NEED_HARDIF) { + hard_iface = batadv_get_hardif_from_info(bat_priv, net, info); + if (IS_ERR(hard_iface)) { + ret = PTR_ERR(hard_iface); + goto err_put_softif; + } + + info->user_ptr[1] = hard_iface; + } + + if (ops->internal_flags & BATADV_FLAG_NEED_VLAN) { + vlan = batadv_get_vlan_from_info(bat_priv, net, info); + if (IS_ERR(vlan)) { + ret = PTR_ERR(vlan); + goto err_put_softif; + } + + info->user_ptr[1] = vlan; + } + + return 0; + +err_put_softif: + if (bat_priv) + dev_put(bat_priv->soft_iface); + + return ret; +} + +/** + * batadv_post_doit() - End batman-adv genl doit request + * @ops: requested netlink operation + * @skb: Netlink message with request data + * @info: receiver information + */ +static void batadv_post_doit(const struct genl_ops *ops, struct sk_buff *skb, + struct genl_info *info) +{ + struct batadv_hard_iface *hard_iface; + struct batadv_softif_vlan *vlan; + struct batadv_priv *bat_priv; + + if (ops->internal_flags & BATADV_FLAG_NEED_HARDIF && + info->user_ptr[1]) { + hard_iface = info->user_ptr[1]; + + batadv_hardif_put(hard_iface); + } + + if (ops->internal_flags & BATADV_FLAG_NEED_VLAN && info->user_ptr[1]) { + vlan = info->user_ptr[1]; + batadv_softif_vlan_put(vlan); + } + + if (ops->internal_flags & BATADV_FLAG_NEED_MESH && info->user_ptr[0]) { + bat_priv = info->user_ptr[0]; + dev_put(bat_priv->soft_iface); + } +} + static const struct genl_ops batadv_netlink_ops[] = { { - .cmd = BATADV_CMD_GET_MESH_INFO, - .flags = GENL_ADMIN_PERM, + .cmd = BATADV_CMD_GET_MESH, + /* can be retrieved by unprivileged users */ .policy = batadv_netlink_policy, - .doit = batadv_netlink_get_mesh_info, + .doit = batadv_netlink_get_mesh, + .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_TP_METER, .flags = GENL_ADMIN_PERM, .policy = batadv_netlink_policy, .doit = batadv_netlink_tp_meter_start, + .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_TP_METER_CANCEL, .flags = GENL_ADMIN_PERM, .policy = batadv_netlink_policy, .doit = batadv_netlink_tp_meter_cancel, + .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_GET_ROUTING_ALGOS, @@ -571,10 +1370,13 @@ static const struct genl_ops batadv_netlink_ops[] = { .dumpit = batadv_algo_dump, }, { - .cmd = BATADV_CMD_GET_HARDIFS, - .flags = GENL_ADMIN_PERM, + .cmd = BATADV_CMD_GET_HARDIF, + /* can be retrieved by unprivileged users */ .policy = batadv_netlink_policy, - .dumpit = batadv_netlink_dump_hardifs, + .dumpit = batadv_netlink_dump_hardif, + .doit = batadv_netlink_get_hardif, + .internal_flags = BATADV_FLAG_NEED_MESH | + BATADV_FLAG_NEED_HARDIF, }, { .cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL, @@ -630,7 +1432,37 @@ static const struct genl_ops batadv_netlink_ops[] = { .policy = batadv_netlink_policy, .dumpit = batadv_mcast_flags_dump, }, - + { + .cmd = BATADV_CMD_SET_MESH, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .doit = batadv_netlink_set_mesh, + .internal_flags = BATADV_FLAG_NEED_MESH, + }, + { + .cmd = BATADV_CMD_SET_HARDIF, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .doit = batadv_netlink_set_hardif, + .internal_flags = BATADV_FLAG_NEED_MESH | + BATADV_FLAG_NEED_HARDIF, + }, + { + .cmd = BATADV_CMD_GET_VLAN, + /* can be retrieved by unprivileged users */ + .policy = batadv_netlink_policy, + .doit = batadv_netlink_get_vlan, + .internal_flags = BATADV_FLAG_NEED_MESH | + BATADV_FLAG_NEED_VLAN, + }, + { + .cmd = BATADV_CMD_SET_VLAN, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .doit = batadv_netlink_set_vlan, + .internal_flags = BATADV_FLAG_NEED_MESH | + BATADV_FLAG_NEED_VLAN, + }, }; struct genl_family batadv_netlink_family __ro_after_init = { @@ -639,6 +1471,8 @@ struct genl_family batadv_netlink_family __ro_after_init = { .version = 1, .maxattr = BATADV_ATTR_MAX, .netnsok = true, + .pre_doit = batadv_pre_doit, + .post_doit = batadv_post_doit, .module = THIS_MODULE, .ops = batadv_netlink_ops, .n_ops = ARRAY_SIZE(batadv_netlink_ops), diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h index 216484b8b82d..7273368544fc 100644 --- a/net/batman-adv/netlink.h +++ b/net/batman-adv/netlink.h @@ -34,6 +34,12 @@ int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst, u8 result, u32 test_time, u64 total_bytes, u32 cookie); +int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv); +int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv, + struct batadv_hard_iface *hard_iface); +int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv, + struct batadv_softif_vlan *vlan); + extern struct genl_family batadv_netlink_family; #endif /* _NET_BATMAN_ADV_NETLINK_H_ */ diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index b14fb3462af7..2e367230376b 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -50,13 +50,13 @@ #include <linux/string.h> #include <linux/types.h> #include <uapi/linux/batadv_packet.h> +#include <uapi/linux/batman_adv.h> #include "bat_algo.h" #include "bridge_loop_avoidance.h" #include "debugfs.h" #include "distributed-arp-table.h" #include "gateway_client.h" -#include "gateway_common.h" #include "hard-interface.h" #include "multicast.h" #include "network-coding.h" @@ -222,12 +222,16 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, netif_trans_update(soft_iface); vid = batadv_get_vid(skb, 0); + + skb_reset_mac_header(skb); ethhdr = eth_hdr(skb); proto = ethhdr->h_proto; switch (ntohs(proto)) { case ETH_P_8021Q: + if (!pskb_may_pull(skb, sizeof(*vhdr))) + goto dropped; vhdr = vlan_eth_hdr(skb); proto = vhdr->h_vlan_encapsulated_proto; diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index e1b816262c53..0b4b3fb778a6 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -40,6 +40,7 @@ #include <linux/stringify.h> #include <linux/workqueue.h> #include <uapi/linux/batadv_packet.h> +#include <uapi/linux/batman_adv.h> #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" @@ -47,6 +48,7 @@ #include "gateway_common.h" #include "hard-interface.h" #include "log.h" +#include "netlink.h" #include "network-coding.h" #include "soft-interface.h" @@ -153,9 +155,14 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ { \ struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \ struct batadv_priv *bat_priv = netdev_priv(net_dev); \ + ssize_t length; \ + \ + length = __batadv_store_bool_attr(buff, count, _post_func, attr,\ + &bat_priv->_name, net_dev); \ \ - return __batadv_store_bool_attr(buff, count, _post_func, attr, \ - &bat_priv->_name, net_dev); \ + batadv_netlink_notify_mesh(bat_priv); \ + \ + return length; \ } #define BATADV_ATTR_SIF_SHOW_BOOL(_name) \ @@ -185,11 +192,16 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ { \ struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \ struct batadv_priv *bat_priv = netdev_priv(net_dev); \ + ssize_t length; \ \ - return __batadv_store_uint_attr(buff, count, _min, _max, \ - _post_func, attr, \ - &bat_priv->_var, net_dev, \ - NULL); \ + length = __batadv_store_uint_attr(buff, count, _min, _max, \ + _post_func, attr, \ + &bat_priv->_var, net_dev, \ + NULL); \ + \ + batadv_netlink_notify_mesh(bat_priv); \ + \ + return length; \ } #define BATADV_ATTR_SIF_SHOW_UINT(_name, _var) \ @@ -222,6 +234,11 @@ ssize_t batadv_store_vlan_##_name(struct kobject *kobj, \ attr, &vlan->_name, \ bat_priv->soft_iface); \ \ + if (vlan->vid) \ + batadv_netlink_notify_vlan(bat_priv, vlan); \ + else \ + batadv_netlink_notify_mesh(bat_priv); \ + \ batadv_softif_vlan_put(vlan); \ return res; \ } @@ -255,6 +272,7 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ { \ struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \ struct batadv_hard_iface *hard_iface; \ + struct batadv_priv *bat_priv; \ ssize_t length; \ \ hard_iface = batadv_hardif_get_by_netdev(net_dev); \ @@ -267,6 +285,11 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ hard_iface->soft_iface, \ net_dev); \ \ + if (hard_iface->soft_iface) { \ + bat_priv = netdev_priv(hard_iface->soft_iface); \ + batadv_netlink_notify_hardif(bat_priv, hard_iface); \ + } \ + \ batadv_hardif_put(hard_iface); \ return length; \ } @@ -536,6 +559,9 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj, batadv_gw_check_client_stop(bat_priv); atomic_set(&bat_priv->gw.mode, (unsigned int)gw_mode_tmp); batadv_gw_tvlv_container_update(bat_priv); + + batadv_netlink_notify_mesh(bat_priv); + return count; } @@ -562,6 +588,7 @@ static ssize_t batadv_store_gw_sel_class(struct kobject *kobj, size_t count) { struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); + ssize_t length; /* setting the GW selection class is allowed only if the routing * algorithm in use implements the GW API @@ -577,10 +604,14 @@ static ssize_t batadv_store_gw_sel_class(struct kobject *kobj, return bat_priv->algo_ops->gw.store_sel_class(bat_priv, buff, count); - return __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE, - batadv_post_gw_reselect, attr, - &bat_priv->gw.sel_class, - bat_priv->soft_iface, NULL); + length = __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE, + batadv_post_gw_reselect, attr, + &bat_priv->gw.sel_class, + bat_priv->soft_iface, NULL); + + batadv_netlink_notify_mesh(bat_priv); + + return length; } static ssize_t batadv_show_gw_bwidth(struct kobject *kobj, @@ -600,12 +631,18 @@ static ssize_t batadv_store_gw_bwidth(struct kobject *kobj, struct attribute *attr, char *buff, size_t count) { + struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); struct net_device *net_dev = batadv_kobj_to_netdev(kobj); + ssize_t length; if (buff[count - 1] == '\n') buff[count - 1] = '\0'; - return batadv_gw_bandwidth_set(net_dev, buff, count); + length = batadv_gw_bandwidth_set(net_dev, buff, count); + + batadv_netlink_notify_mesh(bat_priv); + + return length; } /** @@ -673,6 +710,8 @@ static ssize_t batadv_store_isolation_mark(struct kobject *kobj, "New skb mark for extended isolation: %#.8x/%#.8x\n", bat_priv->isolation_mark, bat_priv->isolation_mark_mask); + batadv_netlink_notify_mesh(bat_priv); + return count; } @@ -1077,6 +1116,7 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj, struct attribute *attr, char *buff, size_t count) { + struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); struct net_device *net_dev = batadv_kobj_to_netdev(kobj); struct batadv_hard_iface *hard_iface; u32 tp_override; @@ -1107,6 +1147,8 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj, atomic_set(&hard_iface->bat_v.throughput_override, tp_override); + batadv_netlink_notify_hardif(bat_priv, hard_iface); + out: batadv_hardif_put(hard_iface); return count; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 1fb885a33c66..4a048fd1cbea 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1018,8 +1018,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, if (!nsrcs) return -EINVAL; - grec_len = sizeof(*grec) + - sizeof(struct in6_addr) * ntohs(*nsrcs); + grec_len = struct_size(grec, grec_src, ntohs(*nsrcs)); if (!ipv6_mc_may_pull(skb, len + grec_len)) return -EINVAL; diff --git a/net/core/devlink.c b/net/core/devlink.c index 7fbdba547d4f..1d7502a5a651 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -116,6 +116,8 @@ static struct devlink *devlink_get_from_attrs(struct net *net, busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]); devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]); + lockdep_assert_held(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { if (strcmp(devlink->dev->bus->name, busname) == 0 && strcmp(dev_name(devlink->dev), devname) == 0 && @@ -2701,11 +2703,6 @@ static const struct devlink_param devlink_param_generic[] = { .name = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME, .type = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE, }, - { - .id = DEVLINK_PARAM_GENERIC_ID_WOL, - .name = DEVLINK_PARAM_GENERIC_WOL_NAME, - .type = DEVLINK_PARAM_GENERIC_WOL_TYPE, - }, }; static int devlink_param_generic_verify(const struct devlink_param *param) @@ -2858,6 +2855,7 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, u32 portid, u32 seq, int flags) { union devlink_param_value param_value[DEVLINK_PARAM_CMODE_MAX + 1]; + bool param_value_set[DEVLINK_PARAM_CMODE_MAX + 1] = {}; const struct devlink_param *param = param_item->param; struct devlink_param_gset_ctx ctx; struct nlattr *param_values_list; @@ -2876,12 +2874,15 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, return -EOPNOTSUPP; param_value[i] = param_item->driverinit_value; } else { + if (!param_item->published) + continue; ctx.cmode = i; err = devlink_param_get(devlink, param, &ctx); if (err) return err; param_value[i] = ctx.val; } + param_value_set[i] = true; } hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); @@ -2916,7 +2917,7 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, goto param_nest_cancel; for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) { - if (!devlink_param_cmode_is_supported(param, i)) + if (!param_value_set[i]) continue; err = devlink_nl_param_value_fill_one(msg, param->type, i, param_value[i]); @@ -3625,44 +3626,56 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { u64 ret_offset, start_offset, end_offset = 0; - struct nlattr *attrs[DEVLINK_ATTR_MAX + 1]; const struct genl_ops *ops = cb->data; struct devlink_region *region; struct nlattr *chunks_attr; const char *region_name; struct devlink *devlink; + struct nlattr **attrs; bool dump = true; void *hdr; int err; start_offset = *((u64 *)&cb->args[0]); + attrs = kmalloc_array(DEVLINK_ATTR_MAX + 1, sizeof(*attrs), GFP_KERNEL); + if (!attrs) + return -ENOMEM; + err = nlmsg_parse(cb->nlh, GENL_HDRLEN + devlink_nl_family.hdrsize, attrs, DEVLINK_ATTR_MAX, ops->policy, cb->extack); if (err) - goto out; + goto out_free; + mutex_lock(&devlink_mutex); devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs); - if (IS_ERR(devlink)) - goto out; + if (IS_ERR(devlink)) { + err = PTR_ERR(devlink); + goto out_dev; + } - mutex_lock(&devlink_mutex); mutex_lock(&devlink->lock); if (!attrs[DEVLINK_ATTR_REGION_NAME] || - !attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]) + !attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]) { + err = -EINVAL; goto out_unlock; + } region_name = nla_data(attrs[DEVLINK_ATTR_REGION_NAME]); region = devlink_region_get_by_name(devlink, region_name); - if (!region) + if (!region) { + err = -EINVAL; goto out_unlock; + } hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI, DEVLINK_CMD_REGION_READ); - if (!hdr) + if (!hdr) { + err = -EMSGSIZE; goto out_unlock; + } err = devlink_nl_put_handle(skb, devlink); if (err) @@ -3673,8 +3686,10 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, goto nla_put_failure; chunks_attr = nla_nest_start(skb, DEVLINK_ATTR_REGION_CHUNKS); - if (!chunks_attr) + if (!chunks_attr) { + err = -EMSGSIZE; goto nla_put_failure; + } if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] && attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]) { @@ -3697,8 +3712,10 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, goto nla_put_failure; /* Check if there was any progress done to prevent infinite loop */ - if (ret_offset == start_offset) + if (ret_offset == start_offset) { + err = -EINVAL; goto nla_put_failure; + } *((u64 *)&cb->args[0]) = ret_offset; @@ -3706,6 +3723,7 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, genlmsg_end(skb, hdr); mutex_unlock(&devlink->lock); mutex_unlock(&devlink_mutex); + kfree(attrs); return skb->len; @@ -3713,9 +3731,11 @@ nla_put_failure: genlmsg_cancel(skb, hdr); out_unlock: mutex_unlock(&devlink->lock); +out_dev: mutex_unlock(&devlink_mutex); -out: - return 0; +out_free: + kfree(attrs); + return err; } struct devlink_info_req { @@ -4351,11 +4371,8 @@ static int devlink_fmsg_snd(struct devlink_fmsg *fmsg, err = -EMSGSIZE; goto nla_put_failure; } - err = genlmsg_reply(skb, info); - if (err) - return err; - return 0; + return genlmsg_reply(skb, info); nla_put_failure: nlmsg_free(skb); @@ -5237,6 +5254,14 @@ EXPORT_SYMBOL_GPL(devlink_unregister); */ void devlink_free(struct devlink *devlink) { + WARN_ON(!list_empty(&devlink->reporter_list)); + WARN_ON(!list_empty(&devlink->region_list)); + WARN_ON(!list_empty(&devlink->param_list)); + WARN_ON(!list_empty(&devlink->resource_list)); + WARN_ON(!list_empty(&devlink->dpipe_table_list)); + WARN_ON(!list_empty(&devlink->sb_list)); + WARN_ON(!list_empty(&devlink->port_list)); + kfree(devlink); } EXPORT_SYMBOL_GPL(devlink_free); @@ -5887,6 +5912,48 @@ void devlink_params_unregister(struct devlink *devlink, EXPORT_SYMBOL_GPL(devlink_params_unregister); /** + * devlink_params_publish - publish configuration parameters + * + * @devlink: devlink + * + * Publish previously registered configuration parameters. + */ +void devlink_params_publish(struct devlink *devlink) +{ + struct devlink_param_item *param_item; + + list_for_each_entry(param_item, &devlink->param_list, list) { + if (param_item->published) + continue; + param_item->published = true; + devlink_param_notify(devlink, 0, param_item, + DEVLINK_CMD_PARAM_NEW); + } +} +EXPORT_SYMBOL_GPL(devlink_params_publish); + +/** + * devlink_params_unpublish - unpublish configuration parameters + * + * @devlink: devlink + * + * Unpublish previously registered configuration parameters. + */ +void devlink_params_unpublish(struct devlink *devlink) +{ + struct devlink_param_item *param_item; + + list_for_each_entry(param_item, &devlink->param_list, list) { + if (!param_item->published) + continue; + param_item->published = false; + devlink_param_notify(devlink, 0, param_item, + DEVLINK_CMD_PARAM_DEL); + } +} +EXPORT_SYMBOL_GPL(devlink_params_unpublish); + +/** * devlink_port_params_register - register port configuration parameters * * @devlink_port: devlink port @@ -6339,7 +6406,7 @@ void devlink_compat_running_version(struct net_device *dev, list_for_each_entry(devlink, &devlink_list, list) { mutex_lock(&devlink->lock); list_for_each_entry(devlink_port, &devlink->port_list, list) { - if (devlink_port->type == DEVLINK_PORT_TYPE_ETH || + if (devlink_port->type == DEVLINK_PORT_TYPE_ETH && devlink_port->type_dev == dev) { __devlink_compat_running_version(devlink, buf, len); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 0fbf39239b29..d2c47cdf25da 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -3020,17 +3020,15 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input) const struct ethtool_flow_ext *ext_h_spec = &fs->h_ext; const struct ethtool_flow_ext *ext_m_spec = &fs->m_ext; - if (ext_m_spec->h_dest) { - memcpy(match->key.eth_addrs.dst, ext_h_spec->h_dest, - ETH_ALEN); - memcpy(match->mask.eth_addrs.dst, ext_m_spec->h_dest, - ETH_ALEN); - - match->dissector.used_keys |= - BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS); - match->dissector.offset[FLOW_DISSECTOR_KEY_ETH_ADDRS] = - offsetof(struct ethtool_rx_flow_key, eth_addrs); - } + memcpy(match->key.eth_addrs.dst, ext_h_spec->h_dest, + ETH_ALEN); + memcpy(match->mask.eth_addrs.dst, ext_m_spec->h_dest, + ETH_ALEN); + + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS); + match->dissector.offset[FLOW_DISSECTOR_KEY_ETH_ADDRS] = + offsetof(struct ethtool_rx_flow_key, eth_addrs); } act = &flow->rule->action.entries[0]; diff --git a/net/core/filter.c b/net/core/filter.c index a78deb2656e1..b584cb42a803 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4127,10 +4127,12 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, /* Only some socketops are supported */ switch (optname) { case SO_RCVBUF: + val = min_t(u32, val, sysctl_rmem_max); sk->sk_userlocks |= SOCK_RCVBUF_LOCK; sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); break; case SO_SNDBUF: + val = min_t(u32, val, sysctl_wmem_max); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); break; diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 43a932cb609b..5b2252c6d49b 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -136,17 +136,19 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, if (!(pool->p.flags & PP_FLAG_DMA_MAP)) goto skip_dma_map; - /* Setup DMA mapping: use page->private for DMA-addr + /* Setup DMA mapping: use 'struct page' area for storing DMA-addr + * since dma_addr_t can be either 32 or 64 bits and does not always fit + * into page private data (i.e 32bit cpu with 64bit DMA caps) * This mapping is kept for lifetime of page, until leaving pool. */ - dma = dma_map_page(pool->p.dev, page, 0, - (PAGE_SIZE << pool->p.order), - pool->p.dma_dir); + dma = dma_map_page_attrs(pool->p.dev, page, 0, + (PAGE_SIZE << pool->p.order), + pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC); if (dma_mapping_error(pool->p.dev, dma)) { put_page(page); return NULL; } - set_page_private(page, dma); /* page->private = dma; */ + page->dma_addr = dma; skip_dma_map: /* When page just alloc'ed is should/must have refcnt 1. */ @@ -175,13 +177,17 @@ EXPORT_SYMBOL(page_pool_alloc_pages); static void __page_pool_clean_page(struct page_pool *pool, struct page *page) { + dma_addr_t dma; + if (!(pool->p.flags & PP_FLAG_DMA_MAP)) return; + dma = page->dma_addr; /* DMA unmap */ - dma_unmap_page(pool->p.dev, page_private(page), - PAGE_SIZE << pool->p.order, pool->p.dma_dir); - set_page_private(page, 0); + dma_unmap_page_attrs(pool->p.dev, dma, + PAGE_SIZE << pool->p.order, pool->p.dma_dir, + DMA_ATTR_SKIP_CPU_SYNC); + page->dma_addr = 0; } /* Return a page to the page allocator, cleaning up our state */ diff --git a/net/core/skmsg.c b/net/core/skmsg.c index e76ed8df9f13..ae6f06e45737 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -554,8 +554,7 @@ static void sk_psock_destroy_deferred(struct work_struct *gc) struct sk_psock *psock = container_of(gc, struct sk_psock, gc); /* No sk_callback_lock since already detached. */ - if (psock->parser.enabled) - strp_done(&psock->parser.strp); + strp_done(&psock->parser.strp); cancel_work_sync(&psock->work); diff --git a/net/core/sock.c b/net/core/sock.c index 71ded4d8025c..f4b8b78535f8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -785,6 +785,10 @@ int sock_setsockopt(struct socket *sock, int level, int optname, */ val = min_t(u32, val, sysctl_wmem_max); set_sndbuf: + /* Ensure val * 2 fits into an int, to prevent max_t() + * from treating it as a negative value. + */ + val = min_t(int, val, INT_MAX / 2); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); /* Wake up sending tasks if we upped the value. */ @@ -796,6 +800,12 @@ set_sndbuf: ret = -EPERM; break; } + + /* No negative values (to prevent underflow, as val will be + * multiplied by 2). + */ + if (val < 0) + val = 0; goto set_sndbuf; case SO_RCVBUF: @@ -806,6 +816,10 @@ set_sndbuf: */ val = min_t(u32, val, sysctl_rmem_max); set_rcvbuf: + /* Ensure val * 2 fits into an int, to prevent max_t() + * from treating it as a negative value. + */ + val = min_t(int, val, INT_MAX / 2); sk->sk_userlocks |= SOCK_RCVBUF_LOCK; /* * We double it on the way in to account for @@ -830,6 +844,12 @@ set_rcvbuf: ret = -EPERM; break; } + + /* No negative values (to prevent underflow, as val will be + * multiplied by 2). + */ + if (val < 0) + val = 0; goto set_rcvbuf; case SO_KEEPALIVE: @@ -2475,7 +2495,7 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) } if (sk_has_memory_pressure(sk)) { - int alloc; + u64 alloc; if (!sk_under_memory_pressure(sk)) return 1; diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 6eb837a47b5c..baaaeb2b2c42 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -202,7 +202,7 @@ static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk, static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, u8 pkt, u8 opt, u8 *val, u8 len) { - if (ccid->ccid_ops->ccid_hc_tx_parse_options == NULL) + if (!ccid || !ccid->ccid_ops->ccid_hc_tx_parse_options) return 0; return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len); } @@ -214,7 +214,7 @@ static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, u8 pkt, u8 opt, u8 *val, u8 len) { - if (ccid->ccid_ops->ccid_hc_rx_parse_options == NULL) + if (!ccid || !ccid->ccid_ops->ccid_hc_rx_parse_options) return 0; return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len); } diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index a1917025e155..8c431e0f3627 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -767,11 +767,10 @@ static int dsa_switch_probe(struct dsa_switch *ds) struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n) { - size_t size = sizeof(struct dsa_switch) + n * sizeof(struct dsa_port); struct dsa_switch *ds; int i; - ds = devm_kzalloc(dev, size, GFP_KERNEL); + ds = devm_kzalloc(dev, struct_size(ds, ports, n), GFP_KERNEL); if (!ds) return NULL; diff --git a/net/dsa/master.c b/net/dsa/master.c index 79e97d2f2d9b..c58f33931be1 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -248,6 +248,8 @@ static void dsa_master_reset_mtu(struct net_device *dev) rtnl_unlock(); } +static struct lock_class_key dsa_master_addr_list_lock_key; + int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) { int ret; @@ -261,6 +263,8 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) wmb(); dev->dsa_ptr = cpu_dp; + lockdep_set_class(&dev->addr_list_lock, + &dsa_master_addr_list_lock_key); ret = dsa_master_ethtool_setup(dev); if (ret) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 70395a0ae52e..2e5e7c04821b 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -140,11 +140,14 @@ static int dsa_slave_close(struct net_device *dev) static void dsa_slave_change_rx_flags(struct net_device *dev, int change) { struct net_device *master = dsa_slave_to_master(dev); - - if (change & IFF_ALLMULTI) - dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1); - if (change & IFF_PROMISC) - dev_set_promiscuity(master, dev->flags & IFF_PROMISC ? 1 : -1); + if (dev->flags & IFF_UP) { + if (change & IFF_ALLMULTI) + dev_set_allmulti(master, + dev->flags & IFF_ALLMULTI ? 1 : -1); + if (change & IFF_PROMISC) + dev_set_promiscuity(master, + dev->flags & IFF_PROMISC ? 1 : -1); + } } static void dsa_slave_set_rx_mode(struct net_device *dev) @@ -644,7 +647,7 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e) int ret; /* Port's PHY and MAC both need to be EEE capable */ - if (!dev->phydev && !dp->pl) + if (!dev->phydev || !dp->pl) return -ENODEV; if (!ds->ops->set_mac_eee) @@ -664,7 +667,7 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) int ret; /* Port's PHY and MAC both need to be EEE capable */ - if (!dev->phydev && !dp->pl) + if (!dev->phydev || !dp->pl) return -ENODEV; if (!ds->ops->get_mac_eee) diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index da71b9e2af52..927e9c86f745 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -67,6 +67,8 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb, pskb_trim_rcsum(skb, skb->len - len); + skb->offload_fwd_mark = true; + return skb; } diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 1a4e9ff02762..5731670c560b 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -108,6 +108,7 @@ static size_t inet_sk_attr_size(struct sock *sk, + nla_total_size(1) /* INET_DIAG_TOS */ + nla_total_size(1) /* INET_DIAG_TCLASS */ + nla_total_size(4) /* INET_DIAG_MARK */ + + nla_total_size(4) /* INET_DIAG_CLASS_ID */ + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(sizeof(struct inet_diag_msg)) + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) @@ -287,12 +288,19 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, goto errout; } - if (ext & (1 << (INET_DIAG_CLASS_ID - 1))) { + if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || + ext & (1 << (INET_DIAG_TCLASS - 1))) { u32 classid = 0; #ifdef CONFIG_SOCK_CGROUP_DATA classid = sock_cgroup_classid(&sk->sk_cgrp_data); #endif + /* Fallback to socket priority if class id isn't set. + * Classful qdiscs use it as direct reference to class. + * For cgroup2 classid is always zero. + */ + if (!classid) + classid = sk->sk_priority; if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) goto errout; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index d757b9642d0d..be778599bfed 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -216,6 +216,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, atomic_set(&p->rid, 0); p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; + p->n_redirects = 0; /* 60*HZ is arbitrary, but chosen enough high so that the first * calculation of tokens is at its maximum. */ diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d1cef66820d3..ccee9411dae1 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1381,12 +1381,17 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm *p = &t->parms; + __be16 o_flags = p->o_flags; + + if ((t->erspan_ver == 1 || t->erspan_ver == 2) && + !t->collect_md) + o_flags |= TUNNEL_KEY; if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || nla_put_be16(skb, IFLA_GRE_IFLAGS, gre_tnl_flags_to_gre_flags(p->i_flags)) || nla_put_be16(skb, IFLA_GRE_OFLAGS, - gre_tnl_flags_to_gre_flags(p->o_flags)) || + gre_tnl_flags_to_gre_flags(o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) || diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index e26165af45cb..4b07eb8a9b18 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -215,6 +215,7 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb, /* Change outer to look like the reply to an incoming packet */ nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); + target.dst.protonum = IPPROTO_ICMP; if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip)) return 0; diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c index a0aa13bcabda..0a8a60c1bf9a 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c @@ -105,6 +105,8 @@ static void fast_csum(struct snmp_ctx *ctx, unsigned char offset) int snmp_version(void *context, size_t hdrlen, unsigned char tag, const void *data, size_t datalen) { + if (datalen != 1) + return -EINVAL; if (*(unsigned char *)data > 1) return -ENOTSUPP; return 1; @@ -114,8 +116,11 @@ int snmp_helper(void *context, size_t hdrlen, unsigned char tag, const void *data, size_t datalen) { struct snmp_ctx *ctx = (struct snmp_ctx *)context; - __be32 *pdata = (__be32 *)data; + __be32 *pdata; + if (datalen != 4) + return -EINVAL; + pdata = (__be32 *)data; if (*pdata == ctx->from) { pr_debug("%s: %pI4 to %pI4\n", __func__, (void *)&ctx->from, (void *)&ctx->to); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 16259ea9df54..ecc12a768191 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -887,13 +887,15 @@ void ip_rt_send_redirect(struct sk_buff *skb) /* No redirected packets during ip_rt_redirect_silence; * reset the algorithm. */ - if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) + if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) { peer->rate_tokens = 0; + peer->n_redirects = 0; + } /* Too many ignored redirects; do not send anything * set dst.rate_last to the last seen redirected packet. */ - if (peer->rate_tokens >= ip_rt_redirect_number) { + if (peer->n_redirects >= ip_rt_redirect_number) { peer->rate_last = jiffies; goto out_put_peer; } @@ -910,6 +912,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw); peer->rate_last = jiffies; ++peer->rate_tokens; + ++peer->n_redirects; #ifdef CONFIG_IP_ROUTE_VERBOSE if (log_martians && peer->rate_tokens == ip_rt_redirect_number) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index dcb1d434f7da..da5a21050ba9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1200,7 +1200,8 @@ check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires) list_for_each_entry(ifa, &idev->addr_list, if_list) { if (ifa == ifp) continue; - if (!ipv6_prefix_equal(&ifa->addr, &ifp->addr, + if (ifa->prefix_len != ifp->prefix_len || + !ipv6_prefix_equal(&ifa->addr, &ifp->addr, ifp->prefix_len)) continue; if (ifa->flags & (IFA_F_PERMANENT | IFA_F_NOPREFIXROUTE)) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index e081e69d534e..65a4f96dc462 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -2098,12 +2098,17 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct __ip6_tnl_parm *p = &t->parms; + __be16 o_flags = p->o_flags; + + if ((p->erspan_ver == 1 || p->erspan_ver == 2) && + !p->collect_md) + o_flags |= TUNNEL_KEY; if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || nla_put_be16(skb, IFLA_GRE_IFLAGS, gre_tnl_flags_to_gre_flags(p->i_flags)) || nla_put_be16(skb, IFLA_GRE_OFLAGS, - gre_tnl_flags_to_gre_flags(p->o_flags)) || + gre_tnl_flags_to_gre_flags(o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) || diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 8b075f0bc351..6d0b1f3e927b 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -23,9 +23,11 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb) struct sock *sk = sk_to_full_sk(skb->sk); unsigned int hh_len; struct dst_entry *dst; + int strict = (ipv6_addr_type(&iph->daddr) & + (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); struct flowi6 fl6 = { .flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if : - rt6_need_strict(&iph->daddr) ? skb_dst(skb)->dev->ifindex : 0, + strict ? skb_dst(skb)->dev->ifindex : 0, .flowi6_mark = skb->mark, .flowi6_uid = sock_net_uid(net, sk), .daddr = iph->daddr, diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index 9c914db44bec..f0ec31933c15 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -226,6 +226,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, } nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); + target.dst.protonum = IPPROTO_ICMPV6; if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip)) return 0; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index dc066fdf7e46..87a0561136dd 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2277,14 +2277,8 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) { - bool from_set; - - rcu_read_lock(); - from_set = !!rcu_dereference(rt->from); - rcu_read_unlock(); - return !(rt->rt6i_flags & RTF_CACHE) && - (rt->rt6i_flags & RTF_PCPU || from_set); + (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from)); } static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 8d0ba757a46c..9b2f272ca164 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -221,9 +221,7 @@ static int seg6_genl_get_tunsrc(struct sk_buff *skb, struct genl_info *info) rcu_read_unlock(); genlmsg_end(msg, hdr); - genlmsg_reply(msg, info); - - return 0; + return genlmsg_reply(msg, info); nla_put_failure: rcu_read_unlock(); diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 8181ee7e1e27..ee5403cbe655 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -146,6 +146,8 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) } else { ip6_flow_hdr(hdr, 0, flowlabel); hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); + + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); } hdr->nexthdr = NEXTHDR_ROUTING; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1e03305c0549..e8a1dabef803 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -546,7 +546,8 @@ static int ipip6_err(struct sk_buff *skb, u32 info) } err = 0; - if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len)) + if (__in6_dev_get(skb->dev) && + !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len)) goto out; if (t->parms.iph.daddr == 0) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 26f1d435696a..fed6becc5daf 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -83,8 +83,7 @@ #define L2TP_SLFLAG_S 0x40000000 #define L2TP_SL_SEQ_MASK 0x00ffffff -#define L2TP_HDR_SIZE_SEQ 10 -#define L2TP_HDR_SIZE_NOSEQ 6 +#define L2TP_HDR_SIZE_MAX 14 /* Default trace flags */ #define L2TP_DEFAULT_DEBUG_FLAGS 0 @@ -808,7 +807,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) __skb_pull(skb, sizeof(struct udphdr)); /* Short packet? */ - if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) { + if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) { l2tp_info(tunnel, L2TP_MSG_DATA, "%s: recv short packet (len=%d)\n", tunnel->name, skb->len); @@ -884,6 +883,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) goto error; } + if (tunnel->version == L2TP_HDR_VER_3 && + l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) + goto error; + l2tp_recv_common(session, skb, ptr, optr, hdrflags, length); l2tp_session_dec_refcount(session); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 9c9afe94d389..b2ce90260c35 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -301,6 +301,26 @@ static inline bool l2tp_tunnel_uses_xfrm(const struct l2tp_tunnel *tunnel) } #endif +static inline int l2tp_v3_ensure_opt_in_linear(struct l2tp_session *session, struct sk_buff *skb, + unsigned char **ptr, unsigned char **optr) +{ + int opt_len = session->peer_cookie_len + l2tp_get_l2specific_len(session); + + if (opt_len > 0) { + int off = *ptr - *optr; + + if (!pskb_may_pull(skb, off + opt_len)) + return -1; + + if (skb->data != *optr) { + *optr = skb->data; + *ptr = skb->data + off; + } + } + + return 0; +} + #define l2tp_printk(ptr, type, func, fmt, ...) \ do { \ if (((ptr)->debug) & (type)) \ diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 35f6f86d4dcc..d4c60523c549 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -165,6 +165,9 @@ static int l2tp_ip_recv(struct sk_buff *skb) print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); } + if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) + goto discard_sess; + l2tp_recv_common(session, skb, ptr, optr, 0, skb->len); l2tp_session_dec_refcount(session); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 237f1a4a0b0c..0ae6899edac0 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -178,6 +178,9 @@ static int l2tp_ip6_recv(struct sk_buff *skb) print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); } + if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) + goto discard_sess; + l2tp_recv_common(session, skb, ptr, optr, 0, skb->len); l2tp_session_dec_refcount(session); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index e94b1a0407af..2c4cd4183bf9 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -8,7 +8,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018 - 2019 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -366,6 +366,8 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state); + ieee80211_agg_stop_txq(sta, tid); + spin_unlock_bh(&sta->lock); ht_dbg(sta->sdata, "Tx BA session stop requested for %pM tid %u\n", diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 61c7ea9de2cc..8a49a74c0a37 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1945,9 +1945,16 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, int head_need, bool may_encrypt) { struct ieee80211_local *local = sdata->local; + struct ieee80211_hdr *hdr; + bool enc_tailroom; int tail_need = 0; - if (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt) { + hdr = (struct ieee80211_hdr *) skb->data; + enc_tailroom = may_encrypt && + (sdata->crypto_tx_tailroom_needed_cnt || + ieee80211_is_mgmt(hdr->frame_control)); + + if (enc_tailroom) { tail_need = IEEE80211_ENCRYPT_TAILROOM; tail_need -= skb_tailroom(skb); tail_need = max_t(int, tail_need, 0); @@ -1955,8 +1962,7 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, if (skb_cloned(skb) && (!ieee80211_hw_check(&local->hw, SUPPORTS_CLONED_SKBS) || - !skb_clone_writable(skb, ETH_HLEN) || - (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt))) + !skb_clone_writable(skb, ETH_HLEN) || enc_tailroom)) I802_DEBUG_INC(local->tx_expand_skb_head_cloned); else if (head_need || tail_need) I802_DEBUG_INC(local->tx_expand_skb_head); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index d0eb38b890aa..ba950ae974fc 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -5,7 +5,7 @@ * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018-2019 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -2146,6 +2146,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MONITOR: break; + case NL80211_IFTYPE_ADHOC: + if (sdata->vif.bss_conf.ibss_joined) + WARN_ON(drv_join_ibss(local, sdata)); + /* fall through */ default: ieee80211_reconfig_stations(sdata); /* fall through */ diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 94f53a9b7d1a..dda8930f20e7 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -183,8 +183,8 @@ static int mpls_build_state(struct nlattr *nla, &n_labels, NULL, extack)) return -EINVAL; - newts = lwtunnel_state_alloc(sizeof(*tun_encap_info) + - n_labels * sizeof(u32)); + newts = lwtunnel_state_alloc(struct_size(tun_encap_info, label, + n_labels)); if (!newts) return -ENOMEM; diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index cad48d07c818..8401cefd9f65 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -29,6 +29,7 @@ config IP_VS_IPV6 bool "IPv6 support for IPVS" depends on IPV6 = y || IP_VS = IPV6 select IP6_NF_IPTABLES + select NF_DEFRAG_IPV6 ---help--- Add IPv6 support to IPVS. diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index e969dad66991..43bbaa32b1d6 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1566,14 +1566,12 @@ ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, /* sorry, all this trouble for a no-hit :) */ IP_VS_DBG_PKT(12, af, pp, skb, iph->off, "ip_vs_in: packet continues traversal as normal"); - if (iph->fragoffs) { - /* Fragment that couldn't be mapped to a conn entry - * is missing module nf_defrag_ipv6 - */ - IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n"); + + /* Fragment couldn't be mapped to a conn entry */ + if (iph->fragoffs) IP_VS_DBG_PKT(7, af, pp, skb, iph->off, "unhandled fragment"); - } + *verdict = NF_ACCEPT; return 0; } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 7d6318664eb2..86afacb07e5f 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -43,6 +43,7 @@ #ifdef CONFIG_IP_VS_IPV6 #include <net/ipv6.h> #include <net/ip6_route.h> +#include <net/netfilter/ipv6/nf_defrag_ipv6.h> #endif #include <net/route.h> #include <net/sock.h> @@ -895,6 +896,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, { struct ip_vs_dest *dest; unsigned int atype, i; + int ret = 0; EnterFunction(2); @@ -905,6 +907,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, atype & IPV6_ADDR_LINKLOCAL) && !__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6)) return -EINVAL; + + ret = nf_defrag_ipv6_enable(svc->ipvs->net); + if (ret) + return ret; } else #endif { @@ -1228,6 +1234,10 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, ret = -EINVAL; goto out_err; } + + ret = nf_defrag_ipv6_enable(ipvs->net); + if (ret) + goto out_err; } #endif diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 815956ac5a76..08ee03407ace 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1042,6 +1042,22 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, } if (nf_ct_key_equal(h, tuple, zone, net)) { + /* Tuple is taken already, so caller will need to find + * a new source port to use. + * + * Only exception: + * If the *original tuples* are identical, then both + * conntracks refer to the same flow. + * This is a rare situation, it can occur e.g. when + * more than one UDP packet is sent from same socket + * in different threads. + * + * Let nf_ct_resolve_clash() deal with this later. + */ + if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) + continue; + NF_CT_STAT_INC_ATOMIC(net, found); rcu_read_unlock(); return 1; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e92bedd09cde..5ca5ec8f3cf0 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -131,6 +131,23 @@ static void nft_trans_destroy(struct nft_trans *trans) kfree(trans); } +static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) +{ + struct net *net = ctx->net; + struct nft_trans *trans; + + if (!nft_set_is_anonymous(set)) + return; + + list_for_each_entry_reverse(trans, &net->nft.commit_list, list) { + if (trans->msg_type == NFT_MSG_NEWSET && + nft_trans_set(trans) == set) { + nft_trans_set_bound(trans) = true; + break; + } + } +} + static int nf_tables_register_hook(struct net *net, const struct nft_table *table, struct nft_chain *chain) @@ -226,18 +243,6 @@ static int nft_delchain(struct nft_ctx *ctx) return err; } -/* either expr ops provide both activate/deactivate, or neither */ -static bool nft_expr_check_ops(const struct nft_expr_ops *ops) -{ - if (!ops) - return true; - - if (WARN_ON_ONCE((!ops->activate ^ !ops->deactivate))) - return false; - - return true; -} - static void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule) { @@ -253,14 +258,15 @@ static void nft_rule_expr_activate(const struct nft_ctx *ctx, } static void nft_rule_expr_deactivate(const struct nft_ctx *ctx, - struct nft_rule *rule) + struct nft_rule *rule, + enum nft_trans_phase phase) { struct nft_expr *expr; expr = nft_expr_first(rule); while (expr != nft_expr_last(rule) && expr->ops) { if (expr->ops->deactivate) - expr->ops->deactivate(ctx, expr); + expr->ops->deactivate(ctx, expr, phase); expr = nft_expr_next(expr); } @@ -311,7 +317,7 @@ static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule) nft_trans_destroy(trans); return err; } - nft_rule_expr_deactivate(ctx, rule); + nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_PREPARE); return 0; } @@ -1972,9 +1978,6 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk, */ int nft_register_expr(struct nft_expr_type *type) { - if (!nft_expr_check_ops(type->ops)) - return -EINVAL; - nfnl_lock(NFNL_SUBSYS_NFTABLES); if (type->family == NFPROTO_UNSPEC) list_add_tail_rcu(&type->list, &nf_tables_expressions); @@ -2122,10 +2125,6 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx, err = PTR_ERR(ops); goto err1; } - if (!nft_expr_check_ops(ops)) { - err = -EINVAL; - goto err1; - } } else ops = type->ops; @@ -2554,7 +2553,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule) { - nft_rule_expr_deactivate(ctx, rule); + nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE); nf_tables_rule_destroy(ctx, rule); } @@ -3760,39 +3759,30 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, bind: binding->chain = ctx->chain; list_add_tail_rcu(&binding->list, &set->bindings); + nft_set_trans_bind(ctx, set); + return 0; } EXPORT_SYMBOL_GPL(nf_tables_bind_set); -void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding) -{ - if (list_empty(&set->bindings) && nft_set_is_anonymous(set) && - nft_is_active(ctx->net, set)) - list_add_tail_rcu(&set->list, &ctx->table->sets); - - list_add_tail_rcu(&binding->list, &set->bindings); -} -EXPORT_SYMBOL_GPL(nf_tables_rebind_set); - void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding) + struct nft_set_binding *binding, bool event) { list_del_rcu(&binding->list); - if (list_empty(&set->bindings) && nft_set_is_anonymous(set) && - nft_is_active(ctx->net, set)) + if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) { list_del_rcu(&set->list); + if (event) + nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, + GFP_KERNEL); + } } EXPORT_SYMBOL_GPL(nf_tables_unbind_set); void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set) { - if (list_empty(&set->bindings) && nft_set_is_anonymous(set) && - nft_is_active(ctx->net, set)) { - nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC); + if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) nft_set_destroy(set); - } } EXPORT_SYMBOL_GPL(nf_tables_destroy_set); @@ -6621,6 +6611,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_rule_notify(&trans->ctx, nft_trans_rule(trans), NFT_MSG_DELRULE); + nft_rule_expr_deactivate(&trans->ctx, + nft_trans_rule(trans), + NFT_TRANS_COMMIT); break; case NFT_MSG_NEWSET: nft_clear(net, nft_trans_set(trans)); @@ -6707,7 +6700,8 @@ static void nf_tables_abort_release(struct nft_trans *trans) nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); break; case NFT_MSG_NEWSET: - nft_set_destroy(nft_trans_set(trans)); + if (!nft_trans_set_bound(trans)) + nft_set_destroy(nft_trans_set(trans)); break; case NFT_MSG_NEWSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), @@ -6768,7 +6762,9 @@ static int __nf_tables_abort(struct net *net) case NFT_MSG_NEWRULE: trans->ctx.chain->use--; list_del_rcu(&nft_trans_rule(trans)->list); - nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans)); + nft_rule_expr_deactivate(&trans->ctx, + nft_trans_rule(trans), + NFT_TRANS_ABORT); break; case NFT_MSG_DELRULE: trans->ctx.chain->use++; @@ -6778,7 +6774,8 @@ static int __nf_tables_abort(struct net *net) break; case NFT_MSG_NEWSET: trans->ctx.table->use--; - list_del_rcu(&nft_trans_set(trans)->list); + if (!nft_trans_set_bound(trans)) + list_del_rcu(&nft_trans_set(trans)->list); break; case NFT_MSG_DELSET: trans->ctx.table->use++; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 5eb269428832..0a4bad55a8aa 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -61,6 +61,21 @@ static struct nft_compat_net *nft_compat_pernet(struct net *net) return net_generic(net, nft_compat_net_id); } +static void nft_xt_get(struct nft_xt *xt) +{ + /* refcount_inc() warns on 0 -> 1 transition, but we can't + * init the reference count to 1 in .select_ops -- we can't + * undo such an increase when another expression inside the same + * rule fails afterwards. + */ + if (xt->listcnt == 0) + refcount_set(&xt->refcnt, 1); + else + refcount_inc(&xt->refcnt); + + xt->listcnt++; +} + static bool nft_xt_put(struct nft_xt *xt) { if (refcount_dec_and_test(&xt->refcnt)) { @@ -291,7 +306,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; nft_xt = container_of(expr->ops, struct nft_xt, ops); - refcount_inc(&nft_xt->refcnt); + nft_xt_get(nft_xt); return 0; } @@ -300,6 +315,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct xt_target *target = expr->ops->data; void *info = nft_expr_priv(expr); + struct module *me = target->me; struct xt_tgdtor_param par; par.net = ctx->net; @@ -310,7 +326,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) par.target->destroy(&par); if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops))) - module_put(target->me); + module_put(me); } static int nft_extension_dump_info(struct sk_buff *skb, int attr, @@ -504,7 +520,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return ret; nft_xt = container_of(expr->ops, struct nft_xt, ops); - refcount_inc(&nft_xt->refcnt); + nft_xt_get(nft_xt); return 0; } @@ -558,41 +574,16 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) __nft_match_destroy(ctx, expr, nft_expr_priv(expr)); } -static void nft_compat_activate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - struct list_head *h) -{ - struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops); - - if (xt->listcnt == 0) - list_add(&xt->head, h); - - xt->listcnt++; -} - -static void nft_compat_activate_mt(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - struct nft_compat_net *cn = nft_compat_pernet(ctx->net); - - nft_compat_activate(ctx, expr, &cn->nft_match_list); -} - -static void nft_compat_activate_tg(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - struct nft_compat_net *cn = nft_compat_pernet(ctx->net); - - nft_compat_activate(ctx, expr, &cn->nft_target_list); -} - static void nft_compat_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr) + const struct nft_expr *expr, + enum nft_trans_phase phase) { struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops); - if (--xt->listcnt == 0) - list_del_init(&xt->head); + if (phase == NFT_TRANS_ABORT || phase == NFT_TRANS_COMMIT) { + if (--xt->listcnt == 0) + list_del_init(&xt->head); + } } static void @@ -848,7 +839,6 @@ nft_match_select_ops(const struct nft_ctx *ctx, nft_match->ops.eval = nft_match_eval; nft_match->ops.init = nft_match_init; nft_match->ops.destroy = nft_match_destroy; - nft_match->ops.activate = nft_compat_activate_mt; nft_match->ops.deactivate = nft_compat_deactivate; nft_match->ops.dump = nft_match_dump; nft_match->ops.validate = nft_match_validate; @@ -866,7 +856,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, nft_match->ops.size = matchsize; - nft_match->listcnt = 1; + nft_match->listcnt = 0; list_add(&nft_match->head, &cn->nft_match_list); return &nft_match->ops; @@ -953,7 +943,6 @@ nft_target_select_ops(const struct nft_ctx *ctx, nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); nft_target->ops.init = nft_target_init; nft_target->ops.destroy = nft_target_destroy; - nft_target->ops.activate = nft_compat_activate_tg; nft_target->ops.deactivate = nft_compat_deactivate; nft_target->ops.dump = nft_target_dump; nft_target->ops.validate = nft_target_validate; @@ -964,7 +953,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, else nft_target->ops.eval = nft_target_eval_xt; - nft_target->listcnt = 1; + nft_target->listcnt = 0; list_add(&nft_target->head, &cn->nft_target_list); return &nft_target->ops; diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 9658493d37d4..a8a74a16f9c4 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -234,20 +234,17 @@ err1: return err; } -static void nft_dynset_activate(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - struct nft_dynset *priv = nft_expr_priv(expr); - - nf_tables_rebind_set(ctx, priv->set, &priv->binding); -} - static void nft_dynset_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr) + const struct nft_expr *expr, + enum nft_trans_phase phase) { struct nft_dynset *priv = nft_expr_priv(expr); - nf_tables_unbind_set(ctx, priv->set, &priv->binding); + if (phase == NFT_TRANS_PREPARE) + return; + + nf_tables_unbind_set(ctx, priv->set, &priv->binding, + phase == NFT_TRANS_COMMIT); } static void nft_dynset_destroy(const struct nft_ctx *ctx, @@ -295,7 +292,6 @@ static const struct nft_expr_ops nft_dynset_ops = { .eval = nft_dynset_eval, .init = nft_dynset_init, .destroy = nft_dynset_destroy, - .activate = nft_dynset_activate, .deactivate = nft_dynset_deactivate, .dump = nft_dynset_dump, }; diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 3e5ed787b1d4..5ec43124cbca 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -72,10 +72,14 @@ static void nft_immediate_activate(const struct nft_ctx *ctx, } static void nft_immediate_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr) + const struct nft_expr *expr, + enum nft_trans_phase phase) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); + if (phase == NFT_TRANS_COMMIT) + return; + return nft_data_release(&priv->data, nft_dreg_to_type(priv->dreg)); } diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 227b2b15a19c..14496da5141d 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -121,20 +121,17 @@ static int nft_lookup_init(const struct nft_ctx *ctx, return 0; } -static void nft_lookup_activate(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - struct nft_lookup *priv = nft_expr_priv(expr); - - nf_tables_rebind_set(ctx, priv->set, &priv->binding); -} - static void nft_lookup_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr) + const struct nft_expr *expr, + enum nft_trans_phase phase) { struct nft_lookup *priv = nft_expr_priv(expr); - nf_tables_unbind_set(ctx, priv->set, &priv->binding); + if (phase == NFT_TRANS_PREPARE) + return; + + nf_tables_unbind_set(ctx, priv->set, &priv->binding, + phase == NFT_TRANS_COMMIT); } static void nft_lookup_destroy(const struct nft_ctx *ctx, @@ -225,7 +222,6 @@ static const struct nft_expr_ops nft_lookup_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)), .eval = nft_lookup_eval, .init = nft_lookup_init, - .activate = nft_lookup_activate, .deactivate = nft_lookup_deactivate, .destroy = nft_lookup_destroy, .dump = nft_lookup_dump, diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index c1f2adf198a0..79ef074c18ca 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -156,20 +156,17 @@ nla_put_failure: return -1; } -static void nft_objref_map_activate(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - struct nft_objref_map *priv = nft_expr_priv(expr); - - nf_tables_rebind_set(ctx, priv->set, &priv->binding); -} - static void nft_objref_map_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr) + const struct nft_expr *expr, + enum nft_trans_phase phase) { struct nft_objref_map *priv = nft_expr_priv(expr); - nf_tables_unbind_set(ctx, priv->set, &priv->binding); + if (phase == NFT_TRANS_PREPARE) + return; + + nf_tables_unbind_set(ctx, priv->set, &priv->binding, + phase == NFT_TRANS_COMMIT); } static void nft_objref_map_destroy(const struct nft_ctx *ctx, @@ -186,7 +183,6 @@ static const struct nft_expr_ops nft_objref_map_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_objref_map)), .eval = nft_objref_map_eval, .init = nft_objref_map_init, - .activate = nft_objref_map_activate, .deactivate = nft_objref_map_deactivate, .destroy = nft_objref_map_destroy, .dump = nft_objref_map_dump, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index aecadd471e1d..13e1ac333fa4 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1899,7 +1899,7 @@ static int __init xt_init(void) seqcount_init(&per_cpu(xt_recseq, i)); } - xt = kmalloc_array(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL); + xt = kcalloc(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL); if (!xt) return -ENOMEM; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 3b1a78906bc0..1cd1d83a4be0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4292,7 +4292,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, rb->frames_per_block = req->tp_block_size / req->tp_frame_size; if (unlikely(rb->frames_per_block == 0)) goto out; - if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr)) + if (unlikely(rb->frames_per_block > UINT_MAX / req->tp_block_nr)) goto out; if (unlikely((rb->frames_per_block * req->tp_block_nr) != req->tp_frame_nr)) diff --git a/net/rds/bind.c b/net/rds/bind.c index 762d2c6788a3..17c9d9f0c848 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -78,10 +78,10 @@ struct rds_sock *rds_find_bound(const struct in6_addr *addr, __be16 port, __rds_create_bind_key(key, addr, port, scope_id); rcu_read_lock(); rs = rhashtable_lookup(&bind_hash_table, key, ht_parms); - if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) - rds_sock_addref(rs); - else + if (rs && (sock_flag(rds_rs_to_sk(rs), SOCK_DEAD) || + !refcount_inc_not_zero(&rds_rs_to_sk(rs)->sk_refcnt))) rs = NULL; + rcu_read_unlock(); rdsdebug("returning rs %p for %pI6c:%u\n", rs, addr, diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index eaf19ebaa964..3f7bb11f3290 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -596,6 +596,7 @@ error_requeue_call: } error_no_call: release_sock(&rx->sk); +error_trace: trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret); return ret; @@ -604,7 +605,7 @@ wait_interrupted: wait_error: finish_wait(sk_sleep(&rx->sk), &wait); call = NULL; - goto error_no_call; + goto error_trace; } /** diff --git a/net/sched/act_api.c b/net/sched/act_api.c index d4b8355737d8..aecf1bf233c8 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -543,7 +543,7 @@ int tcf_register_action(struct tc_action_ops *act, write_lock(&act_mod_lock); list_for_each_entry(a, &act_base, head) { - if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) { + if (act->id == a->id || (strcmp(act->kind, a->kind) == 0)) { write_unlock(&act_mod_lock); unregister_pernet_subsys(ops); return -EEXIST; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index c7633843e223..aa5c38d11a30 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -396,7 +396,7 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_bpf_ops __read_mostly = { .kind = "bpf", - .type = TCA_ACT_BPF, + .id = TCA_ID_BPF, .owner = THIS_MODULE, .act = tcf_bpf_act, .dump = tcf_bpf_dump, diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 8475913f2070..5d24993cccfe 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -204,7 +204,7 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_connmark_ops = { .kind = "connmark", - .type = TCA_ACT_CONNMARK, + .id = TCA_ID_CONNMARK, .owner = THIS_MODULE, .act = tcf_connmark_act, .dump = tcf_connmark_dump, diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 3dc25b7806d7..945fb34ae721 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -660,7 +660,7 @@ static size_t tcf_csum_get_fill_size(const struct tc_action *act) static struct tc_action_ops act_csum_ops = { .kind = "csum", - .type = TCA_ACT_CSUM, + .id = TCA_ID_CSUM, .owner = THIS_MODULE, .act = tcf_csum_act, .dump = tcf_csum_dump, diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index b61c20ebb314..93da0004e9f4 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -253,7 +253,7 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act) static struct tc_action_ops act_gact_ops = { .kind = "gact", - .type = TCA_ACT_GACT, + .id = TCA_ID_GACT, .owner = THIS_MODULE, .act = tcf_gact_act, .stats_update = tcf_gact_stats_update, diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 30b63fa23ee2..9b1f2b3990ee 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -864,7 +864,7 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_ife_ops = { .kind = "ife", - .type = TCA_ACT_IFE, + .id = TCA_ID_IFE, .owner = THIS_MODULE, .act = tcf_ife_act, .dump = tcf_ife_dump, diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 8af6c11d2482..1bad190710ad 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -338,7 +338,7 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_ipt_ops = { .kind = "ipt", - .type = TCA_ACT_IPT, + .id = TCA_ID_IPT, .owner = THIS_MODULE, .act = tcf_ipt_act, .dump = tcf_ipt_dump, @@ -387,7 +387,7 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_xt_ops = { .kind = "xt", - .type = TCA_ACT_XT, + .id = TCA_ID_XT, .owner = THIS_MODULE, .act = tcf_ipt_act, .dump = tcf_ipt_dump, diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index c8cf4d10c435..6692fd054617 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -400,7 +400,7 @@ static void tcf_mirred_put_dev(struct net_device *dev) static struct tc_action_ops act_mirred_ops = { .kind = "mirred", - .type = TCA_ACT_MIRRED, + .id = TCA_ID_MIRRED, .owner = THIS_MODULE, .act = tcf_mirred_act, .stats_update = tcf_stats_update, diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index c5c1e23add77..543eab9193f1 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -304,7 +304,7 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_nat_ops = { .kind = "nat", - .type = TCA_ACT_NAT, + .id = TCA_ID_NAT, .owner = THIS_MODULE, .act = tcf_nat_act, .dump = tcf_nat_dump, diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 2b372a06b432..a80373878df7 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -406,7 +406,7 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, struct tcf_t t; int s; - s = sizeof(*opt) + p->tcfp_nkeys * sizeof(struct tc_pedit_key); + s = struct_size(opt, keys, p->tcfp_nkeys); /* netlink spinlocks held above us - must use ATOMIC */ opt = kzalloc(s, GFP_ATOMIC); @@ -470,7 +470,7 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_pedit_ops = { .kind = "pedit", - .type = TCA_ACT_PEDIT, + .id = TCA_ID_PEDIT, .owner = THIS_MODULE, .act = tcf_pedit_act, .dump = tcf_pedit_dump, diff --git a/net/sched/act_police.c b/net/sched/act_police.c index ec8ec55e0fe8..8271a6263824 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -366,7 +366,7 @@ MODULE_LICENSE("GPL"); static struct tc_action_ops act_police_ops = { .kind = "police", - .type = TCA_ID_POLICE, + .id = TCA_ID_POLICE, .owner = THIS_MODULE, .act = tcf_police_act, .dump = tcf_police_dump, diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 1a0c682fd734..203e399e5c85 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -233,7 +233,7 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_sample_ops = { .kind = "sample", - .type = TCA_ACT_SAMPLE, + .id = TCA_ID_SAMPLE, .owner = THIS_MODULE, .act = tcf_sample_act, .dump = tcf_sample_dump, diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 902957beceb3..d54cb608dbaf 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -19,8 +19,6 @@ #include <net/netlink.h> #include <net/pkt_sched.h> -#define TCA_ACT_SIMP 22 - #include <linux/tc_act/tc_defact.h> #include <net/tc_act/tc_defact.h> @@ -197,7 +195,7 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_simp_ops = { .kind = "simple", - .type = TCA_ACT_SIMP, + .id = TCA_ID_SIMP, .owner = THIS_MODULE, .act = tcf_simp_act, .dump = tcf_simp_dump, diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 64dba3708fce..39f8a67ea940 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -305,7 +305,7 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_skbedit_ops = { .kind = "skbedit", - .type = TCA_ACT_SKBEDIT, + .id = TCA_ID_SKBEDIT, .owner = THIS_MODULE, .act = tcf_skbedit_act, .dump = tcf_skbedit_dump, diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 59710a183bd3..7bac1d78e7a3 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -260,7 +260,7 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_skbmod_ops = { .kind = "skbmod", - .type = TCA_ACT_SKBMOD, + .id = TCA_ACT_SKBMOD, .owner = THIS_MODULE, .act = tcf_skbmod_act, .dump = tcf_skbmod_dump, diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 8b43fe0130f7..9104b8e36482 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -563,7 +563,7 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_tunnel_key_ops = { .kind = "tunnel_key", - .type = TCA_ACT_TUNNEL_KEY, + .id = TCA_ID_TUNNEL_KEY, .owner = THIS_MODULE, .act = tunnel_key_act, .dump = tunnel_key_dump, diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 93fdaf707313..ac0061599225 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -297,7 +297,7 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_vlan_ops = { .kind = "vlan", - .type = TCA_ACT_VLAN, + .id = TCA_ID_VLAN, .owner = THIS_MODULE, .act = tcf_vlan_act, .dump = tcf_vlan_dump, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 02cf6d2fa0e1..9ad53895e604 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -38,7 +38,6 @@ #include <net/tc_act/tc_csum.h> #include <net/tc_act/tc_gact.h> #include <net/tc_act/tc_skbedit.h> -#include <net/tc_act/tc_mirred.h> extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; @@ -69,7 +68,8 @@ static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind) } static const struct tcf_proto_ops * -tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack) +tcf_proto_lookup_ops(const char *kind, bool rtnl_held, + struct netlink_ext_ack *extack) { const struct tcf_proto_ops *ops; @@ -77,9 +77,11 @@ tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack) if (ops) return ops; #ifdef CONFIG_MODULES - rtnl_unlock(); + if (rtnl_held) + rtnl_unlock(); request_module("cls_%s", kind); - rtnl_lock(); + if (rtnl_held) + rtnl_lock(); ops = __tcf_proto_lookup_ops(kind); /* We dropped the RTNL semaphore in order to perform * the module load. So, even if we succeeded in loading @@ -160,8 +162,26 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp) return TC_H_MAJ(first); } +static bool tcf_proto_is_unlocked(const char *kind) +{ + const struct tcf_proto_ops *ops; + bool ret; + + ops = tcf_proto_lookup_ops(kind, false, NULL); + /* On error return false to take rtnl lock. Proto lookup/create + * functions will perform lookup again and properly handle errors. + */ + if (IS_ERR(ops)) + return false; + + ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED); + module_put(ops->owner); + return ret; +} + static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, u32 prio, struct tcf_chain *chain, + bool rtnl_held, struct netlink_ext_ack *extack) { struct tcf_proto *tp; @@ -171,7 +191,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, if (!tp) return ERR_PTR(-ENOBUFS); - tp->ops = tcf_proto_lookup_ops(kind, extack); + tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack); if (IS_ERR(tp->ops)) { err = PTR_ERR(tp->ops); goto errout; @@ -180,6 +200,8 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, tp->protocol = protocol; tp->prio = prio; tp->chain = chain; + spin_lock_init(&tp->lock); + refcount_set(&tp->refcnt, 1); err = tp->ops->init(tp); if (err) { @@ -193,14 +215,75 @@ errout: return ERR_PTR(err); } -static void tcf_proto_destroy(struct tcf_proto *tp, +static void tcf_proto_get(struct tcf_proto *tp) +{ + refcount_inc(&tp->refcnt); +} + +static void tcf_chain_put(struct tcf_chain *chain); + +static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { - tp->ops->destroy(tp, extack); + tp->ops->destroy(tp, rtnl_held, extack); + tcf_chain_put(tp->chain); module_put(tp->ops->owner); kfree_rcu(tp, rcu); } +static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) +{ + if (refcount_dec_and_test(&tp->refcnt)) + tcf_proto_destroy(tp, rtnl_held, extack); +} + +static int walker_noop(struct tcf_proto *tp, void *d, struct tcf_walker *arg) +{ + return -1; +} + +static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held) +{ + struct tcf_walker walker = { .fn = walker_noop, }; + + if (tp->ops->walk) { + tp->ops->walk(tp, &walker, rtnl_held); + return !walker.stop; + } + return true; +} + +static bool tcf_proto_check_delete(struct tcf_proto *tp, bool rtnl_held) +{ + spin_lock(&tp->lock); + if (tcf_proto_is_empty(tp, rtnl_held)) + tp->deleting = true; + spin_unlock(&tp->lock); + return tp->deleting; +} + +static void tcf_proto_mark_delete(struct tcf_proto *tp) +{ + spin_lock(&tp->lock); + tp->deleting = true; + spin_unlock(&tp->lock); +} + +static bool tcf_proto_is_deleting(struct tcf_proto *tp) +{ + bool deleting; + + spin_lock(&tp->lock); + deleting = tp->deleting; + spin_unlock(&tp->lock); + + return deleting; +} + +#define ASSERT_BLOCK_LOCKED(block) \ + lockdep_assert_held(&(block)->lock) + struct tcf_filter_chain_list_item { struct list_head list; tcf_chain_head_change_t *chain_head_change; @@ -212,10 +295,13 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block, { struct tcf_chain *chain; + ASSERT_BLOCK_LOCKED(block); + chain = kzalloc(sizeof(*chain), GFP_KERNEL); if (!chain) return NULL; list_add_tail(&chain->list, &block->chain_list); + mutex_init(&chain->filter_chain_lock); chain->block = block; chain->index = chain_index; chain->refcnt = 1; @@ -239,29 +325,59 @@ static void tcf_chain0_head_change(struct tcf_chain *chain, if (chain->index) return; + + mutex_lock(&block->lock); list_for_each_entry(item, &block->chain0.filter_chain_list, list) tcf_chain_head_change_item(item, tp_head); + mutex_unlock(&block->lock); } -static void tcf_chain_destroy(struct tcf_chain *chain) +/* Returns true if block can be safely freed. */ + +static bool tcf_chain_detach(struct tcf_chain *chain) { struct tcf_block *block = chain->block; + ASSERT_BLOCK_LOCKED(block); + list_del(&chain->list); if (!chain->index) block->chain0.chain = NULL; + + if (list_empty(&block->chain_list) && + refcount_read(&block->refcnt) == 0) + return true; + + return false; +} + +static void tcf_block_destroy(struct tcf_block *block) +{ + mutex_destroy(&block->lock); + kfree_rcu(block, rcu); +} + +static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block) +{ + struct tcf_block *block = chain->block; + + mutex_destroy(&chain->filter_chain_lock); kfree(chain); - if (list_empty(&block->chain_list) && !refcount_read(&block->refcnt)) - kfree_rcu(block, rcu); + if (free_block) + tcf_block_destroy(block); } static void tcf_chain_hold(struct tcf_chain *chain) { + ASSERT_BLOCK_LOCKED(chain->block); + ++chain->refcnt; } static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain) { + ASSERT_BLOCK_LOCKED(chain->block); + /* In case all the references are action references, this * chain should not be shown to the user. */ @@ -273,6 +389,8 @@ static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block, { struct tcf_chain *chain; + ASSERT_BLOCK_LOCKED(block); + list_for_each_entry(chain, &block->chain_list, list) { if (chain->index == chain_index) return chain; @@ -287,31 +405,40 @@ static struct tcf_chain *__tcf_chain_get(struct tcf_block *block, u32 chain_index, bool create, bool by_act) { - struct tcf_chain *chain = tcf_chain_lookup(block, chain_index); + struct tcf_chain *chain = NULL; + bool is_first_reference; + mutex_lock(&block->lock); + chain = tcf_chain_lookup(block, chain_index); if (chain) { tcf_chain_hold(chain); } else { if (!create) - return NULL; + goto errout; chain = tcf_chain_create(block, chain_index); if (!chain) - return NULL; + goto errout; } if (by_act) ++chain->action_refcnt; + is_first_reference = chain->refcnt - chain->action_refcnt == 1; + mutex_unlock(&block->lock); /* Send notification only in case we got the first * non-action reference. Until then, the chain acts only as * a placeholder for actions pointing to it and user ought * not know about them. */ - if (chain->refcnt - chain->action_refcnt == 1 && !by_act) + if (is_first_reference && !by_act) tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL, RTM_NEWCHAIN, false); return chain; + +errout: + mutex_unlock(&block->lock); + return chain; } static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, @@ -326,51 +453,94 @@ struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index) } EXPORT_SYMBOL(tcf_chain_get_by_act); -static void tc_chain_tmplt_del(struct tcf_chain *chain); +static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops, + void *tmplt_priv); +static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops, + void *tmplt_priv, u32 chain_index, + struct tcf_block *block, struct sk_buff *oskb, + u32 seq, u16 flags, bool unicast); -static void __tcf_chain_put(struct tcf_chain *chain, bool by_act) +static void __tcf_chain_put(struct tcf_chain *chain, bool by_act, + bool explicitly_created) { + struct tcf_block *block = chain->block; + const struct tcf_proto_ops *tmplt_ops; + bool is_last, free_block = false; + unsigned int refcnt; + void *tmplt_priv; + u32 chain_index; + + mutex_lock(&block->lock); + if (explicitly_created) { + if (!chain->explicitly_created) { + mutex_unlock(&block->lock); + return; + } + chain->explicitly_created = false; + } + if (by_act) chain->action_refcnt--; - chain->refcnt--; + + /* tc_chain_notify_delete can't be called while holding block lock. + * However, when block is unlocked chain can be changed concurrently, so + * save these to temporary variables. + */ + refcnt = --chain->refcnt; + is_last = refcnt - chain->action_refcnt == 0; + tmplt_ops = chain->tmplt_ops; + tmplt_priv = chain->tmplt_priv; + chain_index = chain->index; + + if (refcnt == 0) + free_block = tcf_chain_detach(chain); + mutex_unlock(&block->lock); /* The last dropped non-action reference will trigger notification. */ - if (chain->refcnt - chain->action_refcnt == 0 && !by_act) - tc_chain_notify(chain, NULL, 0, 0, RTM_DELCHAIN, false); + if (is_last && !by_act) { + tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain_index, + block, NULL, 0, 0, false); + /* Last reference to chain, no need to lock. */ + chain->flushing = false; + } - if (chain->refcnt == 0) { - tc_chain_tmplt_del(chain); - tcf_chain_destroy(chain); + if (refcnt == 0) { + tc_chain_tmplt_del(tmplt_ops, tmplt_priv); + tcf_chain_destroy(chain, free_block); } } static void tcf_chain_put(struct tcf_chain *chain) { - __tcf_chain_put(chain, false); + __tcf_chain_put(chain, false, false); } void tcf_chain_put_by_act(struct tcf_chain *chain) { - __tcf_chain_put(chain, true); + __tcf_chain_put(chain, true, false); } EXPORT_SYMBOL(tcf_chain_put_by_act); static void tcf_chain_put_explicitly_created(struct tcf_chain *chain) { - if (chain->explicitly_created) - tcf_chain_put(chain); + __tcf_chain_put(chain, false, true); } -static void tcf_chain_flush(struct tcf_chain *chain) +static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held) { - struct tcf_proto *tp = rtnl_dereference(chain->filter_chain); + struct tcf_proto *tp, *tp_next; + mutex_lock(&chain->filter_chain_lock); + tp = tcf_chain_dereference(chain->filter_chain, chain); + RCU_INIT_POINTER(chain->filter_chain, NULL); tcf_chain0_head_change(chain, NULL); + chain->flushing = true; + mutex_unlock(&chain->filter_chain_lock); + while (tp) { - RCU_INIT_POINTER(chain->filter_chain, tp->next); - tcf_proto_destroy(tp, NULL); - tp = rtnl_dereference(chain->filter_chain); - tcf_chain_put(chain); + tp_next = rcu_dereference_protected(tp->next, 1); + tcf_proto_put(tp, rtnl_held, NULL); + tp = tp_next; } } @@ -692,8 +862,8 @@ tcf_chain0_head_change_cb_add(struct tcf_block *block, struct tcf_block_ext_info *ei, struct netlink_ext_ack *extack) { - struct tcf_chain *chain0 = block->chain0.chain; struct tcf_filter_chain_list_item *item; + struct tcf_chain *chain0; item = kmalloc(sizeof(*item), GFP_KERNEL); if (!item) { @@ -702,9 +872,32 @@ tcf_chain0_head_change_cb_add(struct tcf_block *block, } item->chain_head_change = ei->chain_head_change; item->chain_head_change_priv = ei->chain_head_change_priv; - if (chain0 && chain0->filter_chain) - tcf_chain_head_change_item(item, chain0->filter_chain); - list_add(&item->list, &block->chain0.filter_chain_list); + + mutex_lock(&block->lock); + chain0 = block->chain0.chain; + if (chain0) + tcf_chain_hold(chain0); + else + list_add(&item->list, &block->chain0.filter_chain_list); + mutex_unlock(&block->lock); + + if (chain0) { + struct tcf_proto *tp_head; + + mutex_lock(&chain0->filter_chain_lock); + + tp_head = tcf_chain_dereference(chain0->filter_chain, chain0); + if (tp_head) + tcf_chain_head_change_item(item, tp_head); + + mutex_lock(&block->lock); + list_add(&item->list, &block->chain0.filter_chain_list); + mutex_unlock(&block->lock); + + mutex_unlock(&chain0->filter_chain_lock); + tcf_chain_put(chain0); + } + return 0; } @@ -712,20 +905,23 @@ static void tcf_chain0_head_change_cb_del(struct tcf_block *block, struct tcf_block_ext_info *ei) { - struct tcf_chain *chain0 = block->chain0.chain; struct tcf_filter_chain_list_item *item; + mutex_lock(&block->lock); list_for_each_entry(item, &block->chain0.filter_chain_list, list) { if ((!ei->chain_head_change && !ei->chain_head_change_priv) || (item->chain_head_change == ei->chain_head_change && item->chain_head_change_priv == ei->chain_head_change_priv)) { - if (chain0) + if (block->chain0.chain) tcf_chain_head_change_item(item, NULL); list_del(&item->list); + mutex_unlock(&block->lock); + kfree(item); return; } } + mutex_unlock(&block->lock); WARN_ON(1); } @@ -772,6 +968,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, NL_SET_ERR_MSG(extack, "Memory allocation for block failed"); return ERR_PTR(-ENOMEM); } + mutex_init(&block->lock); INIT_LIST_HEAD(&block->chain_list); INIT_LIST_HEAD(&block->cb_list); INIT_LIST_HEAD(&block->owner_list); @@ -807,157 +1004,241 @@ static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index) return block; } -static void tcf_block_flush_all_chains(struct tcf_block *block) +static struct tcf_chain * +__tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain) { - struct tcf_chain *chain; + mutex_lock(&block->lock); + if (chain) + chain = list_is_last(&chain->list, &block->chain_list) ? + NULL : list_next_entry(chain, list); + else + chain = list_first_entry_or_null(&block->chain_list, + struct tcf_chain, list); - /* Hold a refcnt for all chains, so that they don't disappear - * while we are iterating. - */ - list_for_each_entry(chain, &block->chain_list, list) + /* skip all action-only chains */ + while (chain && tcf_chain_held_by_acts_only(chain)) + chain = list_is_last(&chain->list, &block->chain_list) ? + NULL : list_next_entry(chain, list); + + if (chain) tcf_chain_hold(chain); + mutex_unlock(&block->lock); - list_for_each_entry(chain, &block->chain_list, list) - tcf_chain_flush(chain); + return chain; } -static void tcf_block_put_all_chains(struct tcf_block *block) +/* Function to be used by all clients that want to iterate over all chains on + * block. It properly obtains block->lock and takes reference to chain before + * returning it. Users of this function must be tolerant to concurrent chain + * insertion/deletion or ensure that no concurrent chain modification is + * possible. Note that all netlink dump callbacks cannot guarantee to provide + * consistent dump because rtnl lock is released each time skb is filled with + * data and sent to user-space. + */ + +struct tcf_chain * +tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain) { - struct tcf_chain *chain, *tmp; + struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain); - /* At this point, all the chains should have refcnt >= 1. */ - list_for_each_entry_safe(chain, tmp, &block->chain_list, list) { - tcf_chain_put_explicitly_created(chain); + if (chain) tcf_chain_put(chain); - } + + return chain_next; } +EXPORT_SYMBOL(tcf_get_next_chain); -static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q, - struct tcf_block_ext_info *ei) +static struct tcf_proto * +__tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp) { - if (refcount_dec_and_test(&block->refcnt)) { - /* Flushing/putting all chains will cause the block to be - * deallocated when last chain is freed. However, if chain_list - * is empty, block has to be manually deallocated. After block - * reference counter reached 0, it is no longer possible to - * increment it or add new chains to block. - */ - bool free_block = list_empty(&block->chain_list); + u32 prio = 0; - if (tcf_block_shared(block)) - tcf_block_remove(block, block->net); - if (!free_block) - tcf_block_flush_all_chains(block); + ASSERT_RTNL(); + mutex_lock(&chain->filter_chain_lock); - if (q) - tcf_block_offload_unbind(block, q, ei); + if (!tp) { + tp = tcf_chain_dereference(chain->filter_chain, chain); + } else if (tcf_proto_is_deleting(tp)) { + /* 'deleting' flag is set and chain->filter_chain_lock was + * unlocked, which means next pointer could be invalid. Restart + * search. + */ + prio = tp->prio + 1; + tp = tcf_chain_dereference(chain->filter_chain, chain); - if (free_block) - kfree_rcu(block, rcu); - else - tcf_block_put_all_chains(block); - } else if (q) { - tcf_block_offload_unbind(block, q, ei); + for (; tp; tp = tcf_chain_dereference(tp->next, chain)) + if (!tp->deleting && tp->prio >= prio) + break; + } else { + tp = tcf_chain_dereference(tp->next, chain); } + + if (tp) + tcf_proto_get(tp); + + mutex_unlock(&chain->filter_chain_lock); + + return tp; +} + +/* Function to be used by all clients that want to iterate over all tp's on + * chain. Users of this function must be tolerant to concurrent tp + * insertion/deletion or ensure that no concurrent chain modification is + * possible. Note that all netlink dump callbacks cannot guarantee to provide + * consistent dump because rtnl lock is released each time skb is filled with + * data and sent to user-space. + */ + +struct tcf_proto * +tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp, + bool rtnl_held) +{ + struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp); + + if (tp) + tcf_proto_put(tp, rtnl_held, NULL); + + return tp_next; } +EXPORT_SYMBOL(tcf_get_next_proto); -static void tcf_block_refcnt_put(struct tcf_block *block) +static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held) { - __tcf_block_put(block, NULL, NULL); + struct tcf_chain *chain; + + /* Last reference to block. At this point chains cannot be added or + * removed concurrently. + */ + for (chain = tcf_get_next_chain(block, NULL); + chain; + chain = tcf_get_next_chain(block, chain)) { + tcf_chain_put_explicitly_created(chain); + tcf_chain_flush(chain, rtnl_held); + } } -/* Find tcf block. - * Set q, parent, cl when appropriate. +/* Lookup Qdisc and increments its reference counter. + * Set parent, if necessary. */ -static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q, - u32 *parent, unsigned long *cl, - int ifindex, u32 block_index, - struct netlink_ext_ack *extack) +static int __tcf_qdisc_find(struct net *net, struct Qdisc **q, + u32 *parent, int ifindex, bool rtnl_held, + struct netlink_ext_ack *extack) { - struct tcf_block *block; + const struct Qdisc_class_ops *cops; + struct net_device *dev; int err = 0; - if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) { - block = tcf_block_refcnt_get(net, block_index); - if (!block) { - NL_SET_ERR_MSG(extack, "Block of given index was not found"); - return ERR_PTR(-EINVAL); - } - } else { - const struct Qdisc_class_ops *cops; - struct net_device *dev; - - rcu_read_lock(); + if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) + return 0; - /* Find link */ - dev = dev_get_by_index_rcu(net, ifindex); - if (!dev) { - rcu_read_unlock(); - return ERR_PTR(-ENODEV); - } + rcu_read_lock(); - /* Find qdisc */ - if (!*parent) { - *q = dev->qdisc; - *parent = (*q)->handle; - } else { - *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent)); - if (!*q) { - NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); - err = -EINVAL; - goto errout_rcu; - } - } + /* Find link */ + dev = dev_get_by_index_rcu(net, ifindex); + if (!dev) { + rcu_read_unlock(); + return -ENODEV; + } - *q = qdisc_refcount_inc_nz(*q); + /* Find qdisc */ + if (!*parent) { + *q = dev->qdisc; + *parent = (*q)->handle; + } else { + *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent)); if (!*q) { NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); err = -EINVAL; goto errout_rcu; } + } - /* Is it classful? */ - cops = (*q)->ops->cl_ops; - if (!cops) { - NL_SET_ERR_MSG(extack, "Qdisc not classful"); - err = -EINVAL; - goto errout_rcu; - } + *q = qdisc_refcount_inc_nz(*q); + if (!*q) { + NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); + err = -EINVAL; + goto errout_rcu; + } - if (!cops->tcf_block) { - NL_SET_ERR_MSG(extack, "Class doesn't support blocks"); - err = -EOPNOTSUPP; - goto errout_rcu; - } + /* Is it classful? */ + cops = (*q)->ops->cl_ops; + if (!cops) { + NL_SET_ERR_MSG(extack, "Qdisc not classful"); + err = -EINVAL; + goto errout_qdisc; + } - /* At this point we know that qdisc is not noop_qdisc, - * which means that qdisc holds a reference to net_device - * and we hold a reference to qdisc, so it is safe to release - * rcu read lock. - */ - rcu_read_unlock(); + if (!cops->tcf_block) { + NL_SET_ERR_MSG(extack, "Class doesn't support blocks"); + err = -EOPNOTSUPP; + goto errout_qdisc; + } - /* Do we search for filter, attached to class? */ - if (TC_H_MIN(*parent)) { - *cl = cops->find(*q, *parent); - if (*cl == 0) { - NL_SET_ERR_MSG(extack, "Specified class doesn't exist"); - err = -ENOENT; - goto errout_qdisc; - } +errout_rcu: + /* At this point we know that qdisc is not noop_qdisc, + * which means that qdisc holds a reference to net_device + * and we hold a reference to qdisc, so it is safe to release + * rcu read lock. + */ + rcu_read_unlock(); + return err; + +errout_qdisc: + rcu_read_unlock(); + + if (rtnl_held) + qdisc_put(*q); + else + qdisc_put_unlocked(*q); + *q = NULL; + + return err; +} + +static int __tcf_qdisc_cl_find(struct Qdisc *q, u32 parent, unsigned long *cl, + int ifindex, struct netlink_ext_ack *extack) +{ + if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) + return 0; + + /* Do we search for filter, attached to class? */ + if (TC_H_MIN(parent)) { + const struct Qdisc_class_ops *cops = q->ops->cl_ops; + + *cl = cops->find(q, parent); + if (*cl == 0) { + NL_SET_ERR_MSG(extack, "Specified class doesn't exist"); + return -ENOENT; } + } + + return 0; +} - /* And the last stroke */ - block = cops->tcf_block(*q, *cl, extack); +static struct tcf_block *__tcf_block_find(struct net *net, struct Qdisc *q, + unsigned long cl, int ifindex, + u32 block_index, + struct netlink_ext_ack *extack) +{ + struct tcf_block *block; + + if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) { + block = tcf_block_refcnt_get(net, block_index); if (!block) { - err = -EINVAL; - goto errout_qdisc; + NL_SET_ERR_MSG(extack, "Block of given index was not found"); + return ERR_PTR(-EINVAL); } + } else { + const struct Qdisc_class_ops *cops = q->ops->cl_ops; + + block = cops->tcf_block(q, cl, extack); + if (!block) + return ERR_PTR(-EINVAL); + if (tcf_block_shared(block)) { NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters"); - err = -EOPNOTSUPP; - goto errout_qdisc; + return ERR_PTR(-EOPNOTSUPP); } /* Always take reference to block in order to support execution @@ -970,24 +1251,89 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q, } return block; +} + +static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q, + struct tcf_block_ext_info *ei, bool rtnl_held) +{ + if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) { + /* Flushing/putting all chains will cause the block to be + * deallocated when last chain is freed. However, if chain_list + * is empty, block has to be manually deallocated. After block + * reference counter reached 0, it is no longer possible to + * increment it or add new chains to block. + */ + bool free_block = list_empty(&block->chain_list); + + mutex_unlock(&block->lock); + if (tcf_block_shared(block)) + tcf_block_remove(block, block->net); + + if (q) + tcf_block_offload_unbind(block, q, ei); + + if (free_block) + tcf_block_destroy(block); + else + tcf_block_flush_all_chains(block, rtnl_held); + } else if (q) { + tcf_block_offload_unbind(block, q, ei); + } +} + +static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held) +{ + __tcf_block_put(block, NULL, NULL, rtnl_held); +} + +/* Find tcf block. + * Set q, parent, cl when appropriate. + */ + +static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q, + u32 *parent, unsigned long *cl, + int ifindex, u32 block_index, + struct netlink_ext_ack *extack) +{ + struct tcf_block *block; + int err = 0; + + ASSERT_RTNL(); + + err = __tcf_qdisc_find(net, q, parent, ifindex, true, extack); + if (err) + goto errout; + + err = __tcf_qdisc_cl_find(*q, *parent, cl, ifindex, extack); + if (err) + goto errout_qdisc; + + block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack); + if (IS_ERR(block)) + goto errout_qdisc; + + return block; -errout_rcu: - rcu_read_unlock(); errout_qdisc: - if (*q) { + if (*q) qdisc_put(*q); - *q = NULL; - } +errout: + *q = NULL; return ERR_PTR(err); } -static void tcf_block_release(struct Qdisc *q, struct tcf_block *block) +static void tcf_block_release(struct Qdisc *q, struct tcf_block *block, + bool rtnl_held) { if (!IS_ERR_OR_NULL(block)) - tcf_block_refcnt_put(block); + tcf_block_refcnt_put(block, rtnl_held); - if (q) - qdisc_put(q); + if (q) { + if (rtnl_held) + qdisc_put(q); + else + qdisc_put_unlocked(q); + } } struct tcf_block_owner_item { @@ -1095,7 +1441,7 @@ err_chain0_head_change_cb_add: tcf_block_owner_del(block, q, ei->binder_type); err_block_owner_add: err_block_insert: - tcf_block_refcnt_put(block); + tcf_block_refcnt_put(block, true); return err; } EXPORT_SYMBOL(tcf_block_get_ext); @@ -1132,7 +1478,7 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, tcf_chain0_head_change_cb_del(block, ei); tcf_block_owner_del(block, q, ei->binder_type); - __tcf_block_put(block, q, ei); + __tcf_block_put(block, q, ei, true); } EXPORT_SYMBOL(tcf_block_put_ext); @@ -1189,13 +1535,19 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb, void *cb_priv, bool add, bool offload_in_use, struct netlink_ext_ack *extack) { - struct tcf_chain *chain; - struct tcf_proto *tp; + struct tcf_chain *chain, *chain_prev; + struct tcf_proto *tp, *tp_prev; int err; - list_for_each_entry(chain, &block->chain_list, list) { - for (tp = rtnl_dereference(chain->filter_chain); tp; - tp = rtnl_dereference(tp->next)) { + for (chain = __tcf_get_next_chain(block, NULL); + chain; + chain_prev = chain, + chain = __tcf_get_next_chain(block, chain), + tcf_chain_put(chain_prev)) { + for (tp = __tcf_get_next_proto(chain, NULL); tp; + tp_prev = tp, + tp = __tcf_get_next_proto(chain, tp), + tcf_proto_put(tp_prev, true, NULL)) { if (tp->ops->reoffload) { err = tp->ops->reoffload(tp, add, cb, cb_priv, extack); @@ -1212,6 +1564,8 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb, return 0; err_playback_remove: + tcf_proto_put(tp, true, NULL); + tcf_chain_put(chain); tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use, extack); return err; @@ -1337,32 +1691,116 @@ struct tcf_chain_info { struct tcf_proto __rcu *next; }; -static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info) +static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain, + struct tcf_chain_info *chain_info) { - return rtnl_dereference(*chain_info->pprev); + return tcf_chain_dereference(*chain_info->pprev, chain); } -static void tcf_chain_tp_insert(struct tcf_chain *chain, - struct tcf_chain_info *chain_info, - struct tcf_proto *tp) +static int tcf_chain_tp_insert(struct tcf_chain *chain, + struct tcf_chain_info *chain_info, + struct tcf_proto *tp) { + if (chain->flushing) + return -EAGAIN; + if (*chain_info->pprev == chain->filter_chain) tcf_chain0_head_change(chain, tp); - RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info)); + tcf_proto_get(tp); + RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info)); rcu_assign_pointer(*chain_info->pprev, tp); - tcf_chain_hold(chain); + + return 0; } static void tcf_chain_tp_remove(struct tcf_chain *chain, struct tcf_chain_info *chain_info, struct tcf_proto *tp) { - struct tcf_proto *next = rtnl_dereference(chain_info->next); + struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain); + tcf_proto_mark_delete(tp); if (tp == chain->filter_chain) tcf_chain0_head_change(chain, next); RCU_INIT_POINTER(*chain_info->pprev, next); - tcf_chain_put(chain); +} + +static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, + struct tcf_chain_info *chain_info, + u32 protocol, u32 prio, + bool prio_allocate); + +/* Try to insert new proto. + * If proto with specified priority already exists, free new proto + * and return existing one. + */ + +static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain, + struct tcf_proto *tp_new, + u32 protocol, u32 prio, + bool rtnl_held) +{ + struct tcf_chain_info chain_info; + struct tcf_proto *tp; + int err = 0; + + mutex_lock(&chain->filter_chain_lock); + + tp = tcf_chain_tp_find(chain, &chain_info, + protocol, prio, false); + if (!tp) + err = tcf_chain_tp_insert(chain, &chain_info, tp_new); + mutex_unlock(&chain->filter_chain_lock); + + if (tp) { + tcf_proto_destroy(tp_new, rtnl_held, NULL); + tp_new = tp; + } else if (err) { + tcf_proto_destroy(tp_new, rtnl_held, NULL); + tp_new = ERR_PTR(err); + } + + return tp_new; +} + +static void tcf_chain_tp_delete_empty(struct tcf_chain *chain, + struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) +{ + struct tcf_chain_info chain_info; + struct tcf_proto *tp_iter; + struct tcf_proto **pprev; + struct tcf_proto *next; + + mutex_lock(&chain->filter_chain_lock); + + /* Atomically find and remove tp from chain. */ + for (pprev = &chain->filter_chain; + (tp_iter = tcf_chain_dereference(*pprev, chain)); + pprev = &tp_iter->next) { + if (tp_iter == tp) { + chain_info.pprev = pprev; + chain_info.next = tp_iter->next; + WARN_ON(tp_iter->deleting); + break; + } + } + /* Verify that tp still exists and no new filters were inserted + * concurrently. + * Mark tp for deletion if it is empty. + */ + if (!tp_iter || !tcf_proto_check_delete(tp, rtnl_held)) { + mutex_unlock(&chain->filter_chain_lock); + return; + } + + next = tcf_chain_dereference(chain_info.next, chain); + if (tp == chain->filter_chain) + tcf_chain0_head_change(chain, next); + RCU_INIT_POINTER(*chain_info.pprev, next); + mutex_unlock(&chain->filter_chain_lock); + + tcf_proto_put(tp, rtnl_held, extack); } static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, @@ -1375,7 +1813,8 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, /* Check the chain for existence of proto-tcf with this priority */ for (pprev = &chain->filter_chain; - (tp = rtnl_dereference(*pprev)); pprev = &tp->next) { + (tp = tcf_chain_dereference(*pprev, chain)); + pprev = &tp->next) { if (tp->prio >= prio) { if (tp->prio == prio) { if (prio_allocate || @@ -1388,14 +1827,20 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, } } chain_info->pprev = pprev; - chain_info->next = tp ? tp->next : NULL; + if (tp) { + chain_info->next = tp->next; + tcf_proto_get(tp); + } else { + chain_info->next = NULL; + } return tp; } static int tcf_fill_node(struct net *net, struct sk_buff *skb, struct tcf_proto *tp, struct tcf_block *block, struct Qdisc *q, u32 parent, void *fh, - u32 portid, u32 seq, u16 flags, int event) + u32 portid, u32 seq, u16 flags, int event, + bool rtnl_held) { struct tcmsg *tcm; struct nlmsghdr *nlh; @@ -1423,7 +1868,8 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb, if (!fh) { tcm->tcm_handle = 0; } else { - if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0) + if (tp->ops->dump && + tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0) goto nla_put_failure; } nlh->nlmsg_len = skb_tail_pointer(skb) - b; @@ -1438,7 +1884,8 @@ nla_put_failure: static int tfilter_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct tcf_proto *tp, struct tcf_block *block, struct Qdisc *q, - u32 parent, void *fh, int event, bool unicast) + u32 parent, void *fh, int event, bool unicast, + bool rtnl_held) { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; @@ -1448,7 +1895,8 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, return -ENOBUFS; if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid, - n->nlmsg_seq, n->nlmsg_flags, event) <= 0) { + n->nlmsg_seq, n->nlmsg_flags, event, + rtnl_held) <= 0) { kfree_skb(skb); return -EINVAL; } @@ -1464,7 +1912,7 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct tcf_proto *tp, struct tcf_block *block, struct Qdisc *q, u32 parent, void *fh, bool unicast, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; @@ -1475,13 +1923,14 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, return -ENOBUFS; if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid, - n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER) <= 0) { + n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER, + rtnl_held) <= 0) { NL_SET_ERR_MSG(extack, "Failed to build del event notification"); kfree_skb(skb); return -EINVAL; } - err = tp->ops->delete(tp, fh, last, extack); + err = tp->ops->delete(tp, fh, last, rtnl_held, extack); if (err) { kfree_skb(skb); return err; @@ -1500,14 +1949,21 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, struct tcf_block *block, struct Qdisc *q, u32 parent, struct nlmsghdr *n, - struct tcf_chain *chain, int event) + struct tcf_chain *chain, int event, + bool rtnl_held) { struct tcf_proto *tp; - for (tp = rtnl_dereference(chain->filter_chain); - tp; tp = rtnl_dereference(tp->next)) + for (tp = tcf_get_next_proto(chain, NULL, rtnl_held); + tp; tp = tcf_get_next_proto(chain, tp, rtnl_held)) tfilter_notify(net, oskb, n, tp, block, - q, parent, NULL, event, false); + q, parent, NULL, event, false, rtnl_held); +} + +static void tfilter_put(struct tcf_proto *tp, void *fh) +{ + if (tp->ops->put && fh) + tp->ops->put(tp, fh); } static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, @@ -1530,6 +1986,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *fh; int err; int tp_created; + bool rtnl_held = false; if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -1546,7 +2003,9 @@ replay: prio = TC_H_MAJ(t->tcm_info); prio_allocate = false; parent = t->tcm_parent; + tp = NULL; cl = 0; + block = NULL; if (prio == 0) { /* If no priority is provided by the user, @@ -1563,8 +2022,27 @@ replay: /* Find head of filter chain. */ - block = tcf_block_find(net, &q, &parent, &cl, - t->tcm_ifindex, t->tcm_block_index, extack); + err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack); + if (err) + return err; + + /* Take rtnl mutex if rtnl_held was set to true on previous iteration, + * block is shared (no qdisc found), qdisc is not unlocked, classifier + * type is not specified, classifier is not unlocked. + */ + if (rtnl_held || + (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || + !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) { + rtnl_held = true; + rtnl_lock(); + } + + err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack); + if (err) + goto errout; + + block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index, + extack); if (IS_ERR(block)) { err = PTR_ERR(block); goto errout; @@ -1583,40 +2061,62 @@ replay: goto errout; } + mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, prio_allocate); if (IS_ERR(tp)) { NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); err = PTR_ERR(tp); - goto errout; + goto errout_locked; } if (tp == NULL) { + struct tcf_proto *tp_new = NULL; + + if (chain->flushing) { + err = -EAGAIN; + goto errout_locked; + } + /* Proto-tcf does not exist, create new one */ if (tca[TCA_KIND] == NULL || !protocol) { NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified"); err = -EINVAL; - goto errout; + goto errout_locked; } if (!(n->nlmsg_flags & NLM_F_CREATE)) { NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter"); err = -ENOENT; - goto errout; + goto errout_locked; } if (prio_allocate) - prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info)); + prio = tcf_auto_prio(tcf_chain_tp_prev(chain, + &chain_info)); - tp = tcf_proto_create(nla_data(tca[TCA_KIND]), - protocol, prio, chain, extack); + mutex_unlock(&chain->filter_chain_lock); + tp_new = tcf_proto_create(nla_data(tca[TCA_KIND]), + protocol, prio, chain, rtnl_held, + extack); + if (IS_ERR(tp_new)) { + err = PTR_ERR(tp_new); + goto errout_tp; + } + + tp_created = 1; + tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio, + rtnl_held); if (IS_ERR(tp)) { err = PTR_ERR(tp); - goto errout; + goto errout_tp; } - tp_created = 1; - } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { + } else { + mutex_unlock(&chain->filter_chain_lock); + } + + if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); err = -EINVAL; goto errout; @@ -1631,6 +2131,7 @@ replay: goto errout; } } else if (n->nlmsg_flags & NLM_F_EXCL) { + tfilter_put(tp, fh); NL_SET_ERR_MSG(extack, "Filter already exists"); err = -EEXIST; goto errout; @@ -1644,25 +2145,41 @@ replay: err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE, - extack); + rtnl_held, extack); if (err == 0) { - if (tp_created) - tcf_chain_tp_insert(chain, &chain_info, tp); tfilter_notify(net, skb, n, tp, block, q, parent, fh, - RTM_NEWTFILTER, false); - } else { - if (tp_created) - tcf_proto_destroy(tp, NULL); + RTM_NEWTFILTER, false, rtnl_held); + tfilter_put(tp, fh); } errout: - if (chain) - tcf_chain_put(chain); - tcf_block_release(q, block); - if (err == -EAGAIN) + if (err && tp_created) + tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL); +errout_tp: + if (chain) { + if (tp && !IS_ERR(tp)) + tcf_proto_put(tp, rtnl_held, NULL); + if (!tp_created) + tcf_chain_put(chain); + } + tcf_block_release(q, block, rtnl_held); + + if (rtnl_held) + rtnl_unlock(); + + if (err == -EAGAIN) { + /* Take rtnl lock in case EAGAIN is caused by concurrent flush + * of target chain. + */ + rtnl_held = true; /* Replay the request. */ goto replay; + } return err; + +errout_locked: + mutex_unlock(&chain->filter_chain_lock); + goto errout; } static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, @@ -1678,11 +2195,12 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, struct Qdisc *q = NULL; struct tcf_chain_info chain_info; struct tcf_chain *chain = NULL; - struct tcf_block *block; + struct tcf_block *block = NULL; struct tcf_proto *tp = NULL; unsigned long cl = 0; void *fh = NULL; int err; + bool rtnl_held = false; if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -1703,8 +2221,27 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, /* Find head of filter chain. */ - block = tcf_block_find(net, &q, &parent, &cl, - t->tcm_ifindex, t->tcm_block_index, extack); + err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack); + if (err) + return err; + + /* Take rtnl mutex if flushing whole chain, block is shared (no qdisc + * found), qdisc is not unlocked, classifier type is not specified, + * classifier is not unlocked. + */ + if (!prio || + (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || + !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) { + rtnl_held = true; + rtnl_lock(); + } + + err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack); + if (err) + goto errout; + + block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index, + extack); if (IS_ERR(block)) { err = PTR_ERR(block); goto errout; @@ -1732,56 +2269,69 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, if (prio == 0) { tfilter_notify_chain(net, skb, block, q, parent, n, - chain, RTM_DELTFILTER); - tcf_chain_flush(chain); + chain, RTM_DELTFILTER, rtnl_held); + tcf_chain_flush(chain, rtnl_held); err = 0; goto errout; } + mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, false); if (!tp || IS_ERR(tp)) { NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); err = tp ? PTR_ERR(tp) : -ENOENT; - goto errout; + goto errout_locked; } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); err = -EINVAL; + goto errout_locked; + } else if (t->tcm_handle == 0) { + tcf_chain_tp_remove(chain, &chain_info, tp); + mutex_unlock(&chain->filter_chain_lock); + + tcf_proto_put(tp, rtnl_held, NULL); + tfilter_notify(net, skb, n, tp, block, q, parent, fh, + RTM_DELTFILTER, false, rtnl_held); + err = 0; goto errout; } + mutex_unlock(&chain->filter_chain_lock); fh = tp->ops->get(tp, t->tcm_handle); if (!fh) { - if (t->tcm_handle == 0) { - tcf_chain_tp_remove(chain, &chain_info, tp); - tfilter_notify(net, skb, n, tp, block, q, parent, fh, - RTM_DELTFILTER, false); - tcf_proto_destroy(tp, extack); - err = 0; - } else { - NL_SET_ERR_MSG(extack, "Specified filter handle not found"); - err = -ENOENT; - } + NL_SET_ERR_MSG(extack, "Specified filter handle not found"); + err = -ENOENT; } else { bool last; err = tfilter_del_notify(net, skb, n, tp, block, q, parent, fh, false, &last, - extack); + rtnl_held, extack); + if (err) goto errout; - if (last) { - tcf_chain_tp_remove(chain, &chain_info, tp); - tcf_proto_destroy(tp, extack); - } + if (last) + tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack); } errout: - if (chain) + if (chain) { + if (tp && !IS_ERR(tp)) + tcf_proto_put(tp, rtnl_held, NULL); tcf_chain_put(chain); - tcf_block_release(q, block); + } + tcf_block_release(q, block, rtnl_held); + + if (rtnl_held) + rtnl_unlock(); + return err; + +errout_locked: + mutex_unlock(&chain->filter_chain_lock); + goto errout; } static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, @@ -1797,11 +2347,12 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, struct Qdisc *q = NULL; struct tcf_chain_info chain_info; struct tcf_chain *chain = NULL; - struct tcf_block *block; + struct tcf_block *block = NULL; struct tcf_proto *tp = NULL; unsigned long cl = 0; void *fh = NULL; int err; + bool rtnl_held = false; err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) @@ -1819,8 +2370,26 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, /* Find head of filter chain. */ - block = tcf_block_find(net, &q, &parent, &cl, - t->tcm_ifindex, t->tcm_block_index, extack); + err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack); + if (err) + return err; + + /* Take rtnl mutex if block is shared (no qdisc found), qdisc is not + * unlocked, classifier type is not specified, classifier is not + * unlocked. + */ + if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || + !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) { + rtnl_held = true; + rtnl_lock(); + } + + err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack); + if (err) + goto errout; + + block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index, + extack); if (IS_ERR(block)) { err = PTR_ERR(block); goto errout; @@ -1839,8 +2408,10 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, goto errout; } + mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, false); + mutex_unlock(&chain->filter_chain_lock); if (!tp || IS_ERR(tp)) { NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); err = tp ? PTR_ERR(tp) : -ENOENT; @@ -1858,15 +2429,23 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, err = -ENOENT; } else { err = tfilter_notify(net, skb, n, tp, block, q, parent, - fh, RTM_NEWTFILTER, true); + fh, RTM_NEWTFILTER, true, rtnl_held); if (err < 0) NL_SET_ERR_MSG(extack, "Failed to send filter notify message"); } + tfilter_put(tp, fh); errout: - if (chain) + if (chain) { + if (tp && !IS_ERR(tp)) + tcf_proto_put(tp, rtnl_held, NULL); tcf_chain_put(chain); - tcf_block_release(q, block); + } + tcf_block_release(q, block, rtnl_held); + + if (rtnl_held) + rtnl_unlock(); + return err; } @@ -1887,7 +2466,7 @@ static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg) return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent, n, NETLINK_CB(a->cb->skb).portid, a->cb->nlh->nlmsg_seq, NLM_F_MULTI, - RTM_NEWTFILTER); + RTM_NEWTFILTER, true); } static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, @@ -1897,11 +2476,15 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, struct net *net = sock_net(skb->sk); struct tcf_block *block = chain->block; struct tcmsg *tcm = nlmsg_data(cb->nlh); + struct tcf_proto *tp, *tp_prev; struct tcf_dump_args arg; - struct tcf_proto *tp; - for (tp = rtnl_dereference(chain->filter_chain); - tp; tp = rtnl_dereference(tp->next), (*p_index)++) { + for (tp = __tcf_get_next_proto(chain, NULL); + tp; + tp_prev = tp, + tp = __tcf_get_next_proto(chain, tp), + tcf_proto_put(tp_prev, true, NULL), + (*p_index)++) { if (*p_index < index_start) continue; if (TC_H_MAJ(tcm->tcm_info) && @@ -1917,9 +2500,8 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, if (tcf_fill_node(net, skb, tp, block, q, parent, NULL, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - RTM_NEWTFILTER) <= 0) - return false; - + RTM_NEWTFILTER, true) <= 0) + goto errout; cb->args[1] = 1; } if (!tp->ops->walk) @@ -1934,23 +2516,27 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, arg.w.skip = cb->args[1] - 1; arg.w.count = 0; arg.w.cookie = cb->args[2]; - tp->ops->walk(tp, &arg.w); + tp->ops->walk(tp, &arg.w, true); cb->args[2] = arg.w.cookie; cb->args[1] = arg.w.count + 1; if (arg.w.stop) - return false; + goto errout; } return true; + +errout: + tcf_proto_put(tp, true, NULL); + return false; } /* called with RTNL */ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) { + struct tcf_chain *chain, *chain_prev; struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; struct Qdisc *q = NULL; struct tcf_block *block; - struct tcf_chain *chain; struct tcmsg *tcm = nlmsg_data(cb->nlh); long index_start; long index; @@ -2014,19 +2600,24 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) index_start = cb->args[0]; index = 0; - list_for_each_entry(chain, &block->chain_list, list) { + for (chain = __tcf_get_next_chain(block, NULL); + chain; + chain_prev = chain, + chain = __tcf_get_next_chain(block, chain), + tcf_chain_put(chain_prev)) { if (tca[TCA_CHAIN] && nla_get_u32(tca[TCA_CHAIN]) != chain->index) continue; if (!tcf_chain_dump(chain, q, parent, skb, cb, index_start, &index)) { + tcf_chain_put(chain); err = -EMSGSIZE; break; } } if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) - tcf_block_refcnt_put(block); + tcf_block_refcnt_put(block, true); cb->args[0] = index; out: @@ -2036,8 +2627,10 @@ out: return skb->len; } -static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net, - struct sk_buff *skb, struct tcf_block *block, +static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops, + void *tmplt_priv, u32 chain_index, + struct net *net, struct sk_buff *skb, + struct tcf_block *block, u32 portid, u32 seq, u16 flags, int event) { unsigned char *b = skb_tail_pointer(skb); @@ -2046,8 +2639,8 @@ static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net, struct tcmsg *tcm; void *priv; - ops = chain->tmplt_ops; - priv = chain->tmplt_priv; + ops = tmplt_ops; + priv = tmplt_priv; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) @@ -2065,7 +2658,7 @@ static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net, tcm->tcm_block_index = block->index; } - if (nla_put_u32(skb, TCA_CHAIN, chain->index)) + if (nla_put_u32(skb, TCA_CHAIN, chain_index)) goto nla_put_failure; if (ops) { @@ -2096,7 +2689,8 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, if (!skb) return -ENOBUFS; - if (tc_chain_fill_node(chain, net, skb, block, portid, + if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv, + chain->index, net, skb, block, portid, seq, flags, event) <= 0) { kfree_skb(skb); return -EINVAL; @@ -2108,6 +2702,31 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); } +static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops, + void *tmplt_priv, u32 chain_index, + struct tcf_block *block, struct sk_buff *oskb, + u32 seq, u16 flags, bool unicast) +{ + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + struct net *net = block->net; + struct sk_buff *skb; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb, + block, portid, seq, flags, RTM_DELCHAIN) <= 0) { + kfree_skb(skb); + return -EINVAL; + } + + if (unicast) + return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); +} + static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net, struct nlattr **tca, struct netlink_ext_ack *extack) @@ -2119,7 +2738,7 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net, if (!tca[TCA_KIND]) return 0; - ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), extack); + ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), true, extack); if (IS_ERR(ops)) return PTR_ERR(ops); if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) { @@ -2137,16 +2756,15 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net, return 0; } -static void tc_chain_tmplt_del(struct tcf_chain *chain) +static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops, + void *tmplt_priv) { - const struct tcf_proto_ops *ops = chain->tmplt_ops; - /* If template ops are set, no work to do for us. */ - if (!ops) + if (!tmplt_ops) return; - ops->tmplt_destroy(chain->tmplt_priv); - module_put(ops->owner); + tmplt_ops->tmplt_destroy(tmplt_priv); + module_put(tmplt_ops->owner); } /* Add/delete/get a chain */ @@ -2189,6 +2807,8 @@ replay: err = -EINVAL; goto errout_block; } + + mutex_lock(&block->lock); chain = tcf_chain_lookup(block, chain_index); if (n->nlmsg_type == RTM_NEWCHAIN) { if (chain) { @@ -2200,54 +2820,61 @@ replay: } else { NL_SET_ERR_MSG(extack, "Filter chain already exists"); err = -EEXIST; - goto errout_block; + goto errout_block_locked; } } else { if (!(n->nlmsg_flags & NLM_F_CREATE)) { NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain"); err = -ENOENT; - goto errout_block; + goto errout_block_locked; } chain = tcf_chain_create(block, chain_index); if (!chain) { NL_SET_ERR_MSG(extack, "Failed to create filter chain"); err = -ENOMEM; - goto errout_block; + goto errout_block_locked; } } } else { if (!chain || tcf_chain_held_by_acts_only(chain)) { NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); err = -EINVAL; - goto errout_block; + goto errout_block_locked; } tcf_chain_hold(chain); } + if (n->nlmsg_type == RTM_NEWCHAIN) { + /* Modifying chain requires holding parent block lock. In case + * the chain was successfully added, take a reference to the + * chain. This ensures that an empty chain does not disappear at + * the end of this function. + */ + tcf_chain_hold(chain); + chain->explicitly_created = true; + } + mutex_unlock(&block->lock); + switch (n->nlmsg_type) { case RTM_NEWCHAIN: err = tc_chain_tmplt_add(chain, net, tca, extack); - if (err) + if (err) { + tcf_chain_put_explicitly_created(chain); goto errout; - /* In case the chain was successfully added, take a reference - * to the chain. This ensures that an empty chain - * does not disappear at the end of this function. - */ - tcf_chain_hold(chain); - chain->explicitly_created = true; + } + tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL, RTM_NEWCHAIN, false); break; case RTM_DELCHAIN: tfilter_notify_chain(net, skb, block, q, parent, n, - chain, RTM_DELTFILTER); + chain, RTM_DELTFILTER, true); /* Flush the chain first as the user requested chain removal. */ - tcf_chain_flush(chain); + tcf_chain_flush(chain, true); /* In case the chain was successfully deleted, put a reference * to the chain previously taken during addition. */ tcf_chain_put_explicitly_created(chain); - chain->explicitly_created = false; break; case RTM_GETCHAIN: err = tc_chain_notify(chain, skb, n->nlmsg_seq, @@ -2264,21 +2891,25 @@ replay: errout: tcf_chain_put(chain); errout_block: - tcf_block_release(q, block); + tcf_block_release(q, block, true); if (err == -EAGAIN) /* Replay the request. */ goto replay; return err; + +errout_block_locked: + mutex_unlock(&block->lock); + goto errout_block; } /* called with RTNL */ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) { + struct tcf_chain *chain, *chain_prev; struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; struct Qdisc *q = NULL; struct tcf_block *block; - struct tcf_chain *chain; struct tcmsg *tcm = nlmsg_data(cb->nlh); long index_start; long index; @@ -2342,7 +2973,11 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) index_start = cb->args[0]; index = 0; - list_for_each_entry(chain, &block->chain_list, list) { + for (chain = __tcf_get_next_chain(block, NULL); + chain; + chain_prev = chain, + chain = __tcf_get_next_chain(block, chain), + tcf_chain_put(chain_prev)) { if ((tca[TCA_CHAIN] && nla_get_u32(tca[TCA_CHAIN]) != chain->index)) continue; @@ -2350,19 +2985,20 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) index++; continue; } - if (tcf_chain_held_by_acts_only(chain)) - continue; - err = tc_chain_fill_node(chain, net, skb, block, + err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv, + chain->index, net, skb, block, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWCHAIN); - if (err <= 0) + if (err <= 0) { + tcf_chain_put(chain); break; + } index++; } if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) - tcf_block_refcnt_put(block); + tcf_block_refcnt_put(block, true); cb->args[0] = index; out: @@ -2384,7 +3020,7 @@ EXPORT_SYMBOL(tcf_exts_destroy); int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { #ifdef CONFIG_NET_CLS_ACT { @@ -2394,7 +3030,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, if (exts->police && tb[exts->police]) { act = tcf_action_init_1(net, tp, tb[exts->police], rate_tlv, "police", ovr, - TCA_ACT_BIND, true, extack); + TCA_ACT_BIND, rtnl_held, + extack); if (IS_ERR(act)) return PTR_ERR(act); @@ -2406,8 +3043,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, err = tcf_action_init(net, tp, tb[exts->action], rate_tlv, NULL, ovr, TCA_ACT_BIND, - exts->actions, &attr_size, true, - extack); + exts->actions, &attr_size, + rtnl_held, extack); if (err < 0) return err; exts->nr_actions = err; @@ -2671,10 +3308,12 @@ static int __init tc_filter_init(void) if (err) goto err_rhash_setup_block_ht; - rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, + RTNL_FLAG_DOIT_UNLOCKED); + rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, + RTNL_FLAG_DOIT_UNLOCKED); rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter, - tc_dump_tfilter, 0); + tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED); rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0); rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain, diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 4a57fec6f306..2383f449d2bc 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -107,7 +107,8 @@ static void basic_delete_filter_work(struct work_struct *work) rtnl_unlock(); } -static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void basic_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f, *n; @@ -126,7 +127,7 @@ static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) } static int basic_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f = arg; @@ -153,7 +154,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp, { int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack); if (err < 0) return err; @@ -173,7 +174,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp, static int basic_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, bool ovr, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { int err; struct basic_head *head = rtnl_dereference(tp->root); @@ -247,7 +248,8 @@ errout: return err; } -static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f; @@ -274,7 +276,7 @@ static void basic_bind_class(void *fh, u32 classid, unsigned long cl) } static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct tc_basic_pcnt gpf = {}; struct basic_filter *f = fh; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index a95cb240a606..062350c6621c 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -298,7 +298,7 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog, } static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct cls_bpf_head *head = rtnl_dereference(tp->root); @@ -307,7 +307,7 @@ static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last, return 0; } -static void cls_bpf_destroy(struct tcf_proto *tp, +static void cls_bpf_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { struct cls_bpf_head *head = rtnl_dereference(tp->root); @@ -417,7 +417,8 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) return -EINVAL; - ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, extack); + ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, true, + extack); if (ret < 0) return ret; @@ -455,7 +456,8 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, struct netlink_ext_ack *extack) + void **arg, bool ovr, bool rtnl_held, + struct netlink_ext_ack *extack) { struct cls_bpf_head *head = rtnl_dereference(tp->root); struct cls_bpf_prog *oldprog = *arg; @@ -575,7 +577,7 @@ static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog, } static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *tm) + struct sk_buff *skb, struct tcmsg *tm, bool rtnl_held) { struct cls_bpf_prog *prog = fh; struct nlattr *nest; @@ -635,7 +637,8 @@ static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl) prog->res.class = cl; } -static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct cls_bpf_head *head = rtnl_dereference(tp->root); struct cls_bpf_prog *prog; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 3bc01bdde165..1cef3b416094 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -78,7 +78,7 @@ static void cls_cgroup_destroy_work(struct work_struct *work) static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, + void **arg, bool ovr, bool rtnl_held, struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_CGROUP_MAX + 1]; @@ -110,7 +110,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, goto errout; err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr, - extack); + true, extack); if (err < 0) goto errout; @@ -130,7 +130,7 @@ errout: return err; } -static void cls_cgroup_destroy(struct tcf_proto *tp, +static void cls_cgroup_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); @@ -145,12 +145,13 @@ static void cls_cgroup_destroy(struct tcf_proto *tp, } static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } -static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); @@ -166,7 +167,7 @@ skip: } static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); struct nlattr *nest; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 2bb043cd436b..204e2edae8d5 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -391,7 +391,8 @@ static void flow_destroy_filter_work(struct work_struct *work) static int flow_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, struct netlink_ext_ack *extack) + void **arg, bool ovr, bool rtnl_held, + struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *fold, *fnew; @@ -445,7 +446,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, goto err2; err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr, - extack); + true, extack); if (err < 0) goto err2; @@ -566,7 +567,7 @@ err1: } static int flow_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f = arg; @@ -590,7 +591,8 @@ static int flow_init(struct tcf_proto *tp) return 0; } -static void flow_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void flow_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f, *next; @@ -617,7 +619,7 @@ static void *flow_get(struct tcf_proto *tp, u32 handle) } static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct flow_filter *f = fh; struct nlattr *nest; @@ -677,7 +679,8 @@ nla_put_failure: return -1; } -static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index c5d1db3a3db7..640f83e7f93f 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -396,7 +396,11 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); if (err) { kfree(cls_flower.rule); - return err; + if (skip_sw) { + NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); + return err; + } + return 0; } err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw); @@ -465,7 +469,8 @@ static void fl_destroy_sleepable(struct work_struct *work) module_put(THIS_MODULE); } -static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void fl_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct fl_flow_mask *mask, *next_mask; @@ -1272,7 +1277,8 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, { int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, + extack); if (err < 0) return err; @@ -1299,7 +1305,8 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, static int fl_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, struct netlink_ext_ack *extack) + void **arg, bool ovr, bool rtnl_held, + struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *fold = *arg; @@ -1385,7 +1392,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (!tc_skip_hw(fnew->flags)) { err = fl_hw_replace_filter(tp, fnew, extack); if (err) - goto errout_mask; + goto errout_mask_ht; } if (!tc_in_hw(fnew->flags)) @@ -1415,6 +1422,10 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, kfree(mask); return 0; +errout_mask_ht: + rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node, + fnew->mask->filter_ht_params); + errout_mask: fl_mask_put(head, fnew->mask, false); @@ -1432,7 +1443,7 @@ errout_mask_alloc: } static int fl_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f = arg; @@ -1444,7 +1455,8 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last, return 0; } -static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f; @@ -1495,7 +1507,11 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, &f->exts); if (err) { kfree(cls_flower.rule); - return err; + if (tc_skip_sw(f->flags)) { + NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); + return err; + } + continue; } cls_flower.classid = f->res.classid; @@ -2039,7 +2055,7 @@ nla_put_failure: } static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct cls_fl_filter *f = fh; struct nlattr *nest; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 29eeeaf3ea44..317151bae73b 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -139,7 +139,8 @@ static void fw_delete_filter_work(struct work_struct *work) rtnl_unlock(); } -static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void fw_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f; @@ -163,7 +164,7 @@ static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) } static int fw_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = arg; @@ -217,7 +218,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp, int err; err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr, - extack); + true, extack); if (err < 0) return err; @@ -250,7 +251,8 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp, static int fw_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, - bool ovr, struct netlink_ext_ack *extack) + bool ovr, bool rtnl_held, + struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = *arg; @@ -354,7 +356,8 @@ errout: return err; } -static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct fw_head *head = rtnl_dereference(tp->root); int h; @@ -384,7 +387,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) } static int fw_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = fh; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index a1b803fd372e..a37137430e61 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -109,7 +109,8 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, return 0; } -static void mall_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void mall_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct cls_mall_head *head = rtnl_dereference(tp->root); @@ -145,7 +146,8 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp, { int err; - err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, extack); + err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, true, + extack); if (err < 0) return err; @@ -159,7 +161,8 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp, static int mall_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, struct netlink_ext_ack *extack) + void **arg, bool ovr, bool rtnl_held, + struct netlink_ext_ack *extack) { struct cls_mall_head *head = rtnl_dereference(tp->root); struct nlattr *tb[TCA_MATCHALL_MAX + 1]; @@ -232,12 +235,13 @@ err_exts_init: } static int mall_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } -static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct cls_mall_head *head = rtnl_dereference(tp->root); @@ -279,7 +283,7 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, } static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct tc_matchall_pcnt gpf = {}; struct cls_mall_head *head = fh; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 0404aa5fa7cb..e590c3a2999d 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -276,7 +276,8 @@ static void route4_queue_work(struct route4_filter *f) tcf_queue_work(&f->rwork, route4_delete_filter_work); } -static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void route4_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct route4_head *head = rtnl_dereference(tp->root); int h1, h2; @@ -312,7 +313,7 @@ static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) } static int route4_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_filter *f = arg; @@ -393,7 +394,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, struct route4_bucket *b; int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack); if (err < 0) return err; @@ -468,7 +469,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, static int route4_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, bool ovr, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_filter __rcu **fp; @@ -560,7 +561,8 @@ errout: return err; } -static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct route4_head *head = rtnl_dereference(tp->root); unsigned int h, h1; @@ -597,7 +599,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) } static int route4_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct route4_filter *f = fh; struct nlattr *nest; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index e9ccf7daea7d..4d3836178fa5 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -312,7 +312,8 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) __rsvp_delete_filter(f); } -static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void rsvp_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct rsvp_head *data = rtnl_dereference(tp->root); int h1, h2; @@ -341,7 +342,7 @@ static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) } static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct rsvp_head *head = rtnl_dereference(tp->root); struct rsvp_filter *nfp, *f = arg; @@ -477,7 +478,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, struct netlink_ext_ack *extack) + void **arg, bool ovr, bool rtnl_held, + struct netlink_ext_ack *extack) { struct rsvp_head *data = rtnl_dereference(tp->root); struct rsvp_filter *f, *nfp; @@ -502,7 +504,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, extack); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, true, + extack); if (err < 0) goto errout2; @@ -654,7 +657,8 @@ errout2: return err; } -static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct rsvp_head *head = rtnl_dereference(tp->root); unsigned int h, h1; @@ -688,7 +692,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) } static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct rsvp_filter *f = fh; struct rsvp_session *s; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 9ccc93f257db..e1981628047b 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -173,7 +173,7 @@ static void tcindex_destroy_fexts_work(struct work_struct *work) } static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = arg; @@ -226,7 +226,7 @@ static int tcindex_destroy_element(struct tcf_proto *tp, { bool last; - return tcindex_delete(tp, arg, &last, NULL); + return tcindex_delete(tp, arg, &last, false, NULL); } static void __tcindex_destroy(struct rcu_head *head) @@ -275,7 +275,7 @@ static void tcindex_free_perfect_hash(struct tcindex_data *cp) kfree(cp->perfect); } -static int tcindex_alloc_perfect_hash(struct tcindex_data *cp) +static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp) { int i, err = 0; @@ -289,6 +289,9 @@ static int tcindex_alloc_perfect_hash(struct tcindex_data *cp) TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); if (err < 0) goto errout; +#ifdef CONFIG_NET_CLS_ACT + cp->perfect[i].exts.net = net; +#endif } return 0; @@ -305,16 +308,16 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct nlattr *est, bool ovr, struct netlink_ext_ack *extack) { struct tcindex_filter_result new_filter_result, *old_r = r; - struct tcindex_filter_result cr; struct tcindex_data *cp = NULL, *oldp; struct tcindex_filter *f = NULL; /* make gcc behave */ + struct tcf_result cr = {}; int err, balloc = 0; struct tcf_exts e; err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, est, &e, ovr, extack); + err = tcf_exts_validate(net, tp, tb, est, &e, ovr, true, extack); if (err < 0) goto errout; @@ -337,7 +340,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (p->perfect) { int i; - if (tcindex_alloc_perfect_hash(cp) < 0) + if (tcindex_alloc_perfect_hash(net, cp) < 0) goto errout; for (i = 0; i < cp->hash; i++) cp->perfect[i].res = p->perfect[i].res; @@ -348,11 +351,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, err = tcindex_filter_result_init(&new_filter_result); if (err < 0) goto errout1; - err = tcindex_filter_result_init(&cr); - if (err < 0) - goto errout1; if (old_r) - cr.res = r->res; + cr = r->res; if (tb[TCA_TCINDEX_HASH]) cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); @@ -406,7 +406,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, err = -ENOMEM; if (!cp->perfect && !cp->h) { if (valid_perfect_hash(cp)) { - if (tcindex_alloc_perfect_hash(cp) < 0) + if (tcindex_alloc_perfect_hash(net, cp) < 0) goto errout_alloc; balloc = 1; } else { @@ -443,8 +443,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, } if (tb[TCA_TCINDEX_CLASSID]) { - cr.res.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]); - tcf_bind_filter(tp, &cr.res, base); + cr.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]); + tcf_bind_filter(tp, &cr, base); } if (old_r && old_r != r) { @@ -456,7 +456,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, } oldp = p; - r->res = cr.res; + r->res = cr; tcf_exts_change(&r->exts, &e); rcu_assign_pointer(tp->root, cp); @@ -475,6 +475,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, ; /* nothing */ rcu_assign_pointer(*fp, f); + } else { + tcf_exts_destroy(&new_filter_result.exts); } if (oldp) @@ -487,7 +489,6 @@ errout_alloc: else if (balloc == 2) kfree(cp->h); errout1: - tcf_exts_destroy(&cr.exts); tcf_exts_destroy(&new_filter_result.exts); errout: kfree(cp); @@ -499,7 +500,7 @@ static int tcindex_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, bool ovr, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_TCINDEX_MAX + 1]; @@ -522,7 +523,8 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, tca[TCA_RATE], ovr, extack); } -static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) +static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker, + bool rtnl_held) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter *f, *next; @@ -558,7 +560,7 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) } } -static void tcindex_destroy(struct tcf_proto *tp, +static void tcindex_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { struct tcindex_data *p = rtnl_dereference(tp->root); @@ -568,14 +570,14 @@ static void tcindex_destroy(struct tcf_proto *tp, walker.count = 0; walker.skip = 0; walker.fn = tcindex_destroy_element; - tcindex_walk(tp, &walker); + tcindex_walk(tp, &walker, true); call_rcu(&p->rcu, __tcindex_destroy); } static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = fh; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index dcea21004604..27d29c04dcc9 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -629,7 +629,8 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, return -ENOENT; } -static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void u32_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); @@ -663,7 +664,7 @@ static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) } static int u32_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct tc_u_hnode *ht = arg; struct tc_u_common *tp_c = tp->data; @@ -726,7 +727,7 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, { int err; - err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, extack); + err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, true, extack); if (err < 0) return err; @@ -858,7 +859,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, static int u32_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, void **arg, bool ovr, + struct nlattr **tca, void **arg, bool ovr, bool rtnl_held, struct netlink_ext_ack *extack) { struct tc_u_common *tp_c = tp->data; @@ -1123,7 +1124,8 @@ erridr: return err; } -static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *ht; @@ -1281,7 +1283,7 @@ static void u32_bind_class(void *fh, u32 classid, unsigned long cl) } static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct tc_u_knode *n = fh; struct tc_u_hnode *ht_up, *ht_down; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 03e26e8d0ec9..2283924fb56d 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1909,17 +1909,19 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, block = cops->tcf_block(q, cl, NULL); if (!block) return; - list_for_each_entry(chain, &block->chain_list, list) { + for (chain = tcf_get_next_chain(block, NULL); + chain; + chain = tcf_get_next_chain(block, chain)) { struct tcf_proto *tp; - for (tp = rtnl_dereference(chain->filter_chain); - tp; tp = rtnl_dereference(tp->next)) { + for (tp = tcf_get_next_proto(chain, NULL, true); + tp; tp = tcf_get_next_proto(chain, tp, true)) { struct tcf_bind_args arg = {}; arg.w.fn = tcf_node_bind; arg.classid = clid; arg.cl = new_cl; - tp->ops->walk(tp, &arg.w); + tp->ops->walk(tp, &arg.w, true); } } } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 66ba2ce2320f..38e5add14fab 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -500,7 +500,7 @@ static void dev_watchdog_down(struct net_device *dev) * netif_carrier_on - set carrier * @dev: network device * - * Device has detected that carrier. + * Device has detected acquisition of carrier. */ void netif_carrier_on(struct net_device *dev) { @@ -1366,7 +1366,11 @@ static void mini_qdisc_rcu_func(struct rcu_head *head) void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, struct tcf_proto *tp_head) { - struct mini_Qdisc *miniq_old = rtnl_dereference(*miniqp->p_miniq); + /* Protected with chain0->filter_chain_lock. + * Can't access chain directly because tp_head can be NULL. + */ + struct mini_Qdisc *miniq_old = + rcu_dereference_protected(*miniqp->p_miniq, 1); struct mini_Qdisc *miniq; if (!tp_head) { diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 078f01a8d582..435847d98b51 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -256,6 +256,7 @@ static size_t inet_assoc_attr_size(struct sctp_association *asoc) + nla_total_size(1) /* INET_DIAG_TOS */ + nla_total_size(1) /* INET_DIAG_TCLASS */ + nla_total_size(4) /* INET_DIAG_MARK */ + + nla_total_size(4) /* INET_DIAG_CLASS_ID */ + nla_total_size(addrlen * asoc->peer.transport_count) + nla_total_size(addrlen * addrcnt) + nla_total_size(sizeof(struct inet_diag_meminfo)) diff --git a/net/sctp/offload.c b/net/sctp/offload.c index 123e9f2dc226..edfcf16e704c 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -36,6 +36,7 @@ static __le32 sctp_gso_make_checksum(struct sk_buff *skb) { skb->ip_summed = CHECKSUM_NONE; skb->csum_not_inet = 0; + gso_reset_checksum(skb, ~0); return sctp_compute_cksum(skb, skb_transport_offset(skb)); } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9644bdc8e85c..a78e55a1bb9c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2027,7 +2027,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_transport *transport = NULL; struct sctp_sndrcvinfo _sinfo, *sinfo; - struct sctp_association *asoc; + struct sctp_association *asoc, *tmp; struct sctp_cmsgs cmsgs; union sctp_addr *daddr; bool new = false; @@ -2053,7 +2053,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) /* SCTP_SENDALL process */ if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP)) { - list_for_each_entry(asoc, &ep->asocs, asocs) { + list_for_each_entry_safe(asoc, tmp, &ep->asocs, asocs) { err = sctp_sendmsg_check_sflags(asoc, sflags, msg, msg_len); if (err == 0) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 80e0ae5534ec..2936ed17bf9e 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -84,6 +84,19 @@ static void fa_zero(struct flex_array *fa, size_t index, size_t count) } } +static size_t fa_index(struct flex_array *fa, void *elem, size_t count) +{ + size_t index = 0; + + while (count--) { + if (elem == flex_array_get(fa, index)) + break; + index++; + } + + return index; +} + /* Migrates chunks from stream queues to new stream queues if needed, * but not across associations. Also, removes those chunks to streams * higher than the new max. @@ -131,8 +144,10 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream, } } - for (i = outcnt; i < stream->outcnt; i++) + for (i = outcnt; i < stream->outcnt; i++) { kfree(SCTP_SO(stream, i)->ext); + SCTP_SO(stream, i)->ext = NULL; + } } static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, @@ -147,6 +162,13 @@ static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, if (stream->out) { fa_copy(out, stream->out, 0, min(outcnt, stream->outcnt)); + if (stream->out_curr) { + size_t index = fa_index(stream->out, stream->out_curr, + stream->outcnt); + + BUG_ON(index == stream->outcnt); + stream->out_curr = flex_array_get(out, index); + } fa_free(stream->out); } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 48ea7669161f..46fa9f3016cc 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1523,6 +1523,11 @@ static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, smc = smc_sk(sk); lock_sock(sk); + if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { + /* socket was connected before, no more data to read */ + rc = 0; + goto out; + } if ((sk->sk_state == SMC_INIT) || (sk->sk_state == SMC_LISTEN) || (sk->sk_state == SMC_CLOSED)) @@ -1858,7 +1863,11 @@ static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, smc = smc_sk(sk); lock_sock(sk); - + if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { + /* socket was connected before, no more data to read */ + rc = 0; + goto out; + } if (sk->sk_state == SMC_INIT || sk->sk_state == SMC_LISTEN || sk->sk_state == SMC_CLOSED) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index db83332ac1c8..d0b0f4c865b4 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -21,13 +21,6 @@ /********************************** send *************************************/ -struct smc_cdc_tx_pend { - struct smc_connection *conn; /* socket connection */ - union smc_host_cursor cursor; /* tx sndbuf cursor sent */ - union smc_host_cursor p_cursor; /* rx RMBE cursor produced */ - u16 ctrl_seq; /* conn. tx sequence # */ -}; - /* handler for send/transmission completion of a CDC msg */ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, struct smc_link *link, @@ -61,12 +54,14 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, int smc_cdc_get_free_slot(struct smc_connection *conn, struct smc_wr_buf **wr_buf, + struct smc_rdma_wr **wr_rdma_buf, struct smc_cdc_tx_pend **pend) { struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; int rc; rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, + wr_rdma_buf, (struct smc_wr_tx_pend_priv **)pend); if (!conn->alert_token_local) /* abnormal termination */ @@ -96,6 +91,7 @@ int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, struct smc_cdc_tx_pend *pend) { + union smc_host_cursor cfed; struct smc_link *link; int rc; @@ -105,12 +101,12 @@ int smc_cdc_msg_send(struct smc_connection *conn, conn->tx_cdc_seq++; conn->local_tx_ctrl.seqno = conn->tx_cdc_seq; - smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, - &conn->local_tx_ctrl, conn); + smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, conn, &cfed); rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); - if (!rc) - smc_curs_copy(&conn->rx_curs_confirmed, - &conn->local_tx_ctrl.cons, conn); + if (!rc) { + smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn); + conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0; + } return rc; } @@ -121,11 +117,14 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn) struct smc_wr_buf *wr_buf; int rc; - rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); + rc = smc_cdc_get_free_slot(conn, &wr_buf, NULL, &pend); if (rc) return rc; - return smc_cdc_msg_send(conn, wr_buf, pend); + spin_lock_bh(&conn->send_lock); + rc = smc_cdc_msg_send(conn, wr_buf, pend); + spin_unlock_bh(&conn->send_lock); + return rc; } int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) @@ -195,6 +194,7 @@ int smcd_cdc_msg_send(struct smc_connection *conn) if (rc) return rc; smc_curs_copy(&conn->rx_curs_confirmed, &curs, conn); + conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0; /* Calculate transmitted data and increment free send buffer space */ diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin, &conn->tx_curs_sent); @@ -271,26 +271,18 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, smp_mb__after_atomic(); smc->sk.sk_data_ready(&smc->sk); } else { - if (conn->local_rx_ctrl.prod_flags.write_blocked || - conn->local_rx_ctrl.prod_flags.cons_curs_upd_req || - conn->local_rx_ctrl.prod_flags.urg_data_pending) { - if (conn->local_rx_ctrl.prod_flags.urg_data_pending) - conn->urg_state = SMC_URG_NOTYET; - /* force immediate tx of current consumer cursor, but - * under send_lock to guarantee arrival in seqno-order - */ - if (smc->sk.sk_state != SMC_INIT) - smc_tx_sndbuf_nonempty(conn); - } + if (conn->local_rx_ctrl.prod_flags.write_blocked) + smc->sk.sk_data_ready(&smc->sk); + if (conn->local_rx_ctrl.prod_flags.urg_data_pending) + conn->urg_state = SMC_URG_NOTYET; } - /* piggy backed tx info */ /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */ - if (diff_cons && smc_tx_prepared_sends(conn)) { + if ((diff_cons && smc_tx_prepared_sends(conn)) || + conn->local_rx_ctrl.prod_flags.cons_curs_upd_req || + conn->local_rx_ctrl.prod_flags.urg_data_pending) smc_tx_sndbuf_nonempty(conn); - /* trigger socket release if connection closed */ - smc_close_wake_tx_prepared(smc); - } + if (diff_cons && conn->urg_tx_pend && atomic_read(&conn->peer_rmbe_space) == conn->peer_rmbe_size) { /* urg data confirmed by peer, indicate we're ready for more */ diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index e8c214b992b6..861dc24c588c 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -160,7 +160,9 @@ static inline void smcd_curs_copy(union smcd_cdc_cursor *tgt, #endif } -/* calculate cursor difference between old and new, where old <= new */ +/* calculate cursor difference between old and new, where old <= new and + * difference cannot exceed size + */ static inline int smc_curs_diff(unsigned int size, union smc_host_cursor *old, union smc_host_cursor *new) @@ -185,28 +187,51 @@ static inline int smc_curs_comp(unsigned int size, return smc_curs_diff(size, old, new); } +/* calculate cursor difference between old and new, where old <= new and + * difference may exceed size + */ +static inline int smc_curs_diff_large(unsigned int size, + union smc_host_cursor *old, + union smc_host_cursor *new) +{ + if (old->wrap < new->wrap) + return min_t(int, + (size - old->count) + new->count + + (new->wrap - old->wrap - 1) * size, + size); + + if (old->wrap > new->wrap) /* wrap has switched from 0xffff to 0x0000 */ + return min_t(int, + (size - old->count) + new->count + + (new->wrap + 0xffff - old->wrap) * size, + size); + + return max_t(int, 0, (new->count - old->count)); +} + static inline void smc_host_cursor_to_cdc(union smc_cdc_cursor *peer, union smc_host_cursor *local, + union smc_host_cursor *save, struct smc_connection *conn) { - union smc_host_cursor temp; - - smc_curs_copy(&temp, local, conn); - peer->count = htonl(temp.count); - peer->wrap = htons(temp.wrap); + smc_curs_copy(save, local, conn); + peer->count = htonl(save->count); + peer->wrap = htons(save->wrap); /* peer->reserved = htons(0); must be ensured by caller */ } static inline void smc_host_msg_to_cdc(struct smc_cdc_msg *peer, - struct smc_host_cdc_msg *local, - struct smc_connection *conn) + struct smc_connection *conn, + union smc_host_cursor *save) { + struct smc_host_cdc_msg *local = &conn->local_tx_ctrl; + peer->common.type = local->common.type; peer->len = local->len; peer->seqno = htons(local->seqno); peer->token = htonl(local->token); - smc_host_cursor_to_cdc(&peer->prod, &local->prod, conn); - smc_host_cursor_to_cdc(&peer->cons, &local->cons, conn); + smc_host_cursor_to_cdc(&peer->prod, &local->prod, save, conn); + smc_host_cursor_to_cdc(&peer->cons, &local->cons, save, conn); peer->prod_flags = local->prod_flags; peer->conn_state_flags = local->conn_state_flags; } @@ -271,10 +296,16 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local, smcr_cdc_msg_to_host(local, peer, conn); } -struct smc_cdc_tx_pend; +struct smc_cdc_tx_pend { + struct smc_connection *conn; /* socket connection */ + union smc_host_cursor cursor; /* tx sndbuf cursor sent */ + union smc_host_cursor p_cursor; /* rx RMBE cursor produced */ + u16 ctrl_seq; /* conn. tx sequence # */ +}; int smc_cdc_get_free_slot(struct smc_connection *conn, struct smc_wr_buf **wr_buf, + struct smc_rdma_wr **wr_rdma_buf, struct smc_cdc_tx_pend **pend); void smc_cdc_tx_dismiss_slots(struct smc_connection *conn); int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 776e9dfc915d..d53fd588d1f5 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -378,7 +378,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) vec.iov_len = sizeof(struct smc_clc_msg_decline); len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(struct smc_clc_msg_decline)); - if (len < sizeof(struct smc_clc_msg_decline)) + if (len < 0 || len < sizeof(struct smc_clc_msg_decline)) len = -EPROTO; return len > 0 ? 0 : len; } diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 0e60dd741698..2ad37e998509 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -345,14 +345,7 @@ static void smc_close_passive_work(struct work_struct *work) switch (sk->sk_state) { case SMC_INIT: - if (atomic_read(&conn->bytes_to_rcv) || - (rxflags->peer_done_writing && - !smc_cdc_rxed_any_close(conn))) { - sk->sk_state = SMC_APPCLOSEWAIT1; - } else { - sk->sk_state = SMC_CLOSED; - sock_put(sk); /* passive closing */ - } + sk->sk_state = SMC_APPCLOSEWAIT1; break; case SMC_ACTIVE: sk->sk_state = SMC_APPCLOSEWAIT1; diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index a1a6d351ae1b..53a17cfa61af 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -127,6 +127,8 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr; + if (!lgr) + return; write_lock_bh(&lgr->conns_lock); if (conn->alert_token_local) { __smc_lgr_unregister_conn(conn); @@ -158,8 +160,6 @@ static void smc_lgr_free_work(struct work_struct *work) bool conns; spin_lock_bh(&smc_lgr_list.lock); - if (list_empty(&lgr->list)) - goto free; read_lock_bh(&lgr->conns_lock); conns = RB_EMPTY_ROOT(&lgr->conns_all); read_unlock_bh(&lgr->conns_lock); @@ -167,8 +167,8 @@ static void smc_lgr_free_work(struct work_struct *work) spin_unlock_bh(&smc_lgr_list.lock); return; } - list_del_init(&lgr->list); /* remove from smc_lgr_list */ -free: + if (!list_empty(&lgr->list)) + list_del_init(&lgr->list); /* remove from smc_lgr_list */ spin_unlock_bh(&smc_lgr_list.lock); if (!lgr->is_smcd && !lgr->terminating) { @@ -299,13 +299,13 @@ static void smc_buf_unuse(struct smc_connection *conn, conn->sndbuf_desc->used = 0; if (conn->rmb_desc) { if (!conn->rmb_desc->regerr) { - conn->rmb_desc->used = 0; if (!lgr->is_smcd) { /* unregister rmb with peer */ smc_llc_do_delete_rkey( &lgr->lnk[SMC_SINGLE_LINK], conn->rmb_desc); } + conn->rmb_desc->used = 0; } else { /* buf registration failed, reuse not possible */ write_lock_bh(&lgr->rmbs_lock); @@ -629,6 +629,8 @@ int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact, local_contact = SMC_REUSE_CONTACT; conn->lgr = lgr; smc_lgr_register_conn(conn); /* add smc conn to lgr */ + if (delayed_work_pending(&lgr->free_work)) + cancel_delayed_work(&lgr->free_work); write_unlock_bh(&lgr->conns_lock); break; } diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index b00287989a3d..8806d2afa6ed 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -52,6 +52,24 @@ enum smc_wr_reg_state { FAILED /* ib_wr_reg_mr response: failure */ }; +struct smc_rdma_sge { /* sges for RDMA writes */ + struct ib_sge wr_tx_rdma_sge[SMC_IB_MAX_SEND_SGE]; +}; + +#define SMC_MAX_RDMA_WRITES 2 /* max. # of RDMA writes per + * message send + */ + +struct smc_rdma_sges { /* sges per message send */ + struct smc_rdma_sge tx_rdma_sge[SMC_MAX_RDMA_WRITES]; +}; + +struct smc_rdma_wr { /* work requests per message + * send + */ + struct ib_rdma_wr wr_tx_rdma[SMC_MAX_RDMA_WRITES]; +}; + struct smc_link { struct smc_ib_device *smcibdev; /* ib-device */ u8 ibport; /* port - values 1 | 2 */ @@ -64,6 +82,8 @@ struct smc_link { struct smc_wr_buf *wr_tx_bufs; /* WR send payload buffers */ struct ib_send_wr *wr_tx_ibs; /* WR send meta data */ struct ib_sge *wr_tx_sges; /* WR send gather meta data */ + struct smc_rdma_sges *wr_tx_rdma_sges;/*RDMA WRITE gather meta data*/ + struct smc_rdma_wr *wr_tx_rdmas; /* WR RDMA WRITE */ struct smc_wr_tx_pend *wr_tx_pends; /* WR send waiting for CQE */ /* above four vectors have wr_tx_cnt elements and use the same index */ dma_addr_t wr_tx_dma_addr; /* DMA address of wr_tx_bufs */ diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index e519ef29c0ff..0b244be24fe0 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -257,12 +257,20 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler, smcibdev = container_of(handler, struct smc_ib_device, event_handler); switch (ibevent->event) { - case IB_EVENT_PORT_ERR: case IB_EVENT_DEVICE_FATAL: + /* terminate all ports on device */ + for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++) + set_bit(port_idx, &smcibdev->port_event_mask); + schedule_work(&smcibdev->port_event_work); + break; + case IB_EVENT_PORT_ERR: case IB_EVENT_PORT_ACTIVE: + case IB_EVENT_GID_CHANGE: port_idx = ibevent->element.port_num - 1; - set_bit(port_idx, &smcibdev->port_event_mask); - schedule_work(&smcibdev->port_event_work); + if (port_idx < SMC_MAX_PORTS) { + set_bit(port_idx, &smcibdev->port_event_mask); + schedule_work(&smcibdev->port_event_work); + } break; default: break; @@ -289,18 +297,18 @@ int smc_ib_create_protection_domain(struct smc_link *lnk) static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv) { - struct smc_ib_device *smcibdev = - (struct smc_ib_device *)ibevent->device; + struct smc_link *lnk = (struct smc_link *)priv; + struct smc_ib_device *smcibdev = lnk->smcibdev; u8 port_idx; switch (ibevent->event) { - case IB_EVENT_DEVICE_FATAL: - case IB_EVENT_GID_CHANGE: - case IB_EVENT_PORT_ERR: + case IB_EVENT_QP_FATAL: case IB_EVENT_QP_ACCESS_ERR: - port_idx = ibevent->element.port_num - 1; - set_bit(port_idx, &smcibdev->port_event_mask); - schedule_work(&smcibdev->port_event_work); + port_idx = ibevent->element.qp->port - 1; + if (port_idx < SMC_MAX_PORTS) { + set_bit(port_idx, &smcibdev->port_event_mask); + schedule_work(&smcibdev->port_event_work); + } break; default: break; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index a6d3623d06f4..4fd60c522802 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -166,7 +166,8 @@ static int smc_llc_add_pending_send(struct smc_link *link, { int rc; - rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, pend); + rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, NULL, + pend); if (rc < 0) return rc; BUILD_BUG_ON_MSG( diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 7cb3e4f07c10..632c3109dee5 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -27,7 +27,7 @@ static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { [SMC_PNETID_NAME] = { .type = NLA_NUL_STRING, - .len = SMC_MAX_PNETID_LEN - 1 + .len = SMC_MAX_PNETID_LEN }, [SMC_PNETID_ETHNAME] = { .type = NLA_NUL_STRING, diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index d8366ed51757..a3bff08ff8c8 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -24,10 +24,11 @@ #include "smc.h" #include "smc_wr.h" #include "smc_cdc.h" +#include "smc_close.h" #include "smc_ism.h" #include "smc_tx.h" -#define SMC_TX_WORK_DELAY HZ +#define SMC_TX_WORK_DELAY 0 #define SMC_TX_CORK_DELAY (HZ >> 2) /* 250 ms */ /***************************** sndbuf producer *******************************/ @@ -165,12 +166,11 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) conn->local_tx_ctrl.prod_flags.urg_data_pending = 1; if (!atomic_read(&conn->sndbuf_space) || conn->urg_tx_pend) { + if (send_done) + return send_done; rc = smc_tx_wait(smc, msg->msg_flags); - if (rc) { - if (send_done) - return send_done; + if (rc) goto out_err; - } continue; } @@ -267,27 +267,23 @@ int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len, /* sndbuf consumer: actual data transfer of one target chunk with RDMA write */ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset, - int num_sges, struct ib_sge sges[]) + int num_sges, struct ib_rdma_wr *rdma_wr) { struct smc_link_group *lgr = conn->lgr; - struct ib_rdma_wr rdma_wr; struct smc_link *link; int rc; - memset(&rdma_wr, 0, sizeof(rdma_wr)); link = &lgr->lnk[SMC_SINGLE_LINK]; - rdma_wr.wr.wr_id = smc_wr_tx_get_next_wr_id(link); - rdma_wr.wr.sg_list = sges; - rdma_wr.wr.num_sge = num_sges; - rdma_wr.wr.opcode = IB_WR_RDMA_WRITE; - rdma_wr.remote_addr = + rdma_wr->wr.wr_id = smc_wr_tx_get_next_wr_id(link); + rdma_wr->wr.num_sge = num_sges; + rdma_wr->remote_addr = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr + /* RMBE within RMB */ conn->tx_off + /* offset within RMBE */ peer_rmbe_offset; - rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey; - rc = ib_post_send(link->roce_qp, &rdma_wr.wr, NULL); + rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey; + rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL); if (rc) { conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; smc_lgr_terminate(lgr); @@ -314,24 +310,25 @@ static inline void smc_tx_advance_cursors(struct smc_connection *conn, /* SMC-R helper for smc_tx_rdma_writes() */ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len, size_t src_off, size_t src_len, - size_t dst_off, size_t dst_len) + size_t dst_off, size_t dst_len, + struct smc_rdma_wr *wr_rdma_buf) { dma_addr_t dma_addr = sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl); - struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; int src_len_sum = src_len, dst_len_sum = dst_len; - struct ib_sge sges[SMC_IB_MAX_SEND_SGE]; int sent_count = src_off; int srcchunk, dstchunk; int num_sges; int rc; for (dstchunk = 0; dstchunk < 2; dstchunk++) { + struct ib_sge *sge = + wr_rdma_buf->wr_tx_rdma[dstchunk].wr.sg_list; + num_sges = 0; for (srcchunk = 0; srcchunk < 2; srcchunk++) { - sges[srcchunk].addr = dma_addr + src_off; - sges[srcchunk].length = src_len; - sges[srcchunk].lkey = link->roce_pd->local_dma_lkey; + sge[srcchunk].addr = dma_addr + src_off; + sge[srcchunk].length = src_len; num_sges++; src_off += src_len; @@ -344,7 +341,8 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len, src_len = dst_len - src_len; /* remainder */ src_len_sum += src_len; } - rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges); + rc = smc_tx_rdma_write(conn, dst_off, num_sges, + &wr_rdma_buf->wr_tx_rdma[dstchunk]); if (rc) return rc; if (dst_len_sum == len) @@ -403,7 +401,8 @@ static int smcd_tx_rdma_writes(struct smc_connection *conn, size_t len, /* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit; * usable snd_wnd as max transmit */ -static int smc_tx_rdma_writes(struct smc_connection *conn) +static int smc_tx_rdma_writes(struct smc_connection *conn, + struct smc_rdma_wr *wr_rdma_buf) { size_t len, src_len, dst_off, dst_len; /* current chunk values */ union smc_host_cursor sent, prep, prod, cons; @@ -464,7 +463,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) dst_off, dst_len); else rc = smcr_tx_rdma_writes(conn, len, sent.count, src_len, - dst_off, dst_len); + dst_off, dst_len, wr_rdma_buf); if (rc) return rc; @@ -485,31 +484,30 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) { struct smc_cdc_producer_flags *pflags; + struct smc_rdma_wr *wr_rdma_buf; struct smc_cdc_tx_pend *pend; struct smc_wr_buf *wr_buf; int rc; - spin_lock_bh(&conn->send_lock); - rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); + rc = smc_cdc_get_free_slot(conn, &wr_buf, &wr_rdma_buf, &pend); if (rc < 0) { if (rc == -EBUSY) { struct smc_sock *smc = container_of(conn, struct smc_sock, conn); - if (smc->sk.sk_err == ECONNABORTED) { - rc = sock_error(&smc->sk); - goto out_unlock; - } + if (smc->sk.sk_err == ECONNABORTED) + return sock_error(&smc->sk); rc = 0; if (conn->alert_token_local) /* connection healthy */ mod_delayed_work(system_wq, &conn->tx_work, SMC_TX_WORK_DELAY); } - goto out_unlock; + return rc; } + spin_lock_bh(&conn->send_lock); if (!conn->local_tx_ctrl.prod_flags.urg_data_present) { - rc = smc_tx_rdma_writes(conn); + rc = smc_tx_rdma_writes(conn, wr_rdma_buf); if (rc) { smc_wr_tx_put_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], (struct smc_wr_tx_pend_priv *)pend); @@ -536,7 +534,7 @@ static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn) spin_lock_bh(&conn->send_lock); if (!pflags->urg_data_present) - rc = smc_tx_rdma_writes(conn); + rc = smc_tx_rdma_writes(conn, NULL); if (!rc) rc = smcd_cdc_msg_send(conn); @@ -557,6 +555,12 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn) else rc = smcr_tx_sndbuf_nonempty(conn); + if (!rc) { + /* trigger socket release if connection is closing */ + struct smc_sock *smc = container_of(conn, struct smc_sock, + conn); + smc_close_wake_tx_prepared(smc); + } return rc; } @@ -598,7 +602,8 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force) if (to_confirm > conn->rmbe_update_limit) { smc_curs_copy(&prod, &conn->local_rx_ctrl.prod, conn); sender_free = conn->rmb_desc->len - - smc_curs_diff(conn->rmb_desc->len, &prod, &cfed); + smc_curs_diff_large(conn->rmb_desc->len, + &cfed, &prod); } if (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req || @@ -612,9 +617,6 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force) SMC_TX_WORK_DELAY); return; } - smc_curs_copy(&conn->rx_curs_confirmed, - &conn->local_tx_ctrl.cons, conn); - conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0; } if (conn->local_rx_ctrl.prod_flags.write_blocked && !atomic_read(&conn->bytes_to_rcv)) diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index c2694750a6a8..253aa75dc2b6 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -160,6 +160,7 @@ static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx) * @link: Pointer to smc_link used to later send the message. * @handler: Send completion handler function pointer. * @wr_buf: Out value returns pointer to message buffer. + * @wr_rdma_buf: Out value returns pointer to rdma work request. * @wr_pend_priv: Out value returns pointer serving as handler context. * * Return: 0 on success, or -errno on error. @@ -167,6 +168,7 @@ static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx) int smc_wr_tx_get_free_slot(struct smc_link *link, smc_wr_tx_handler handler, struct smc_wr_buf **wr_buf, + struct smc_rdma_wr **wr_rdma_buf, struct smc_wr_tx_pend_priv **wr_pend_priv) { struct smc_wr_tx_pend *wr_pend; @@ -204,6 +206,8 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, wr_ib = &link->wr_tx_ibs[idx]; wr_ib->wr_id = wr_id; *wr_buf = &link->wr_tx_bufs[idx]; + if (wr_rdma_buf) + *wr_rdma_buf = &link->wr_tx_rdmas[idx]; *wr_pend_priv = &wr_pend->priv; return 0; } @@ -218,10 +222,10 @@ int smc_wr_tx_put_slot(struct smc_link *link, u32 idx = pend->idx; /* clear the full struct smc_wr_tx_pend including .priv */ - memset(&link->wr_tx_pends[pend->idx], 0, - sizeof(link->wr_tx_pends[pend->idx])); - memset(&link->wr_tx_bufs[pend->idx], 0, - sizeof(link->wr_tx_bufs[pend->idx])); + memset(&link->wr_tx_pends[idx], 0, + sizeof(link->wr_tx_pends[idx])); + memset(&link->wr_tx_bufs[idx], 0, + sizeof(link->wr_tx_bufs[idx])); test_and_clear_bit(idx, link->wr_tx_mask); return 1; } @@ -465,12 +469,26 @@ static void smc_wr_init_sge(struct smc_link *lnk) lnk->wr_tx_dma_addr + i * SMC_WR_BUF_SIZE; lnk->wr_tx_sges[i].length = SMC_WR_TX_SIZE; lnk->wr_tx_sges[i].lkey = lnk->roce_pd->local_dma_lkey; + lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[0].lkey = + lnk->roce_pd->local_dma_lkey; + lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[1].lkey = + lnk->roce_pd->local_dma_lkey; + lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[0].lkey = + lnk->roce_pd->local_dma_lkey; + lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[1].lkey = + lnk->roce_pd->local_dma_lkey; lnk->wr_tx_ibs[i].next = NULL; lnk->wr_tx_ibs[i].sg_list = &lnk->wr_tx_sges[i]; lnk->wr_tx_ibs[i].num_sge = 1; lnk->wr_tx_ibs[i].opcode = IB_WR_SEND; lnk->wr_tx_ibs[i].send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED; + lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.opcode = IB_WR_RDMA_WRITE; + lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.opcode = IB_WR_RDMA_WRITE; + lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.sg_list = + lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge; + lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.sg_list = + lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge; } for (i = 0; i < lnk->wr_rx_cnt; i++) { lnk->wr_rx_sges[i].addr = @@ -521,8 +539,12 @@ void smc_wr_free_link_mem(struct smc_link *lnk) lnk->wr_tx_mask = NULL; kfree(lnk->wr_tx_sges); lnk->wr_tx_sges = NULL; + kfree(lnk->wr_tx_rdma_sges); + lnk->wr_tx_rdma_sges = NULL; kfree(lnk->wr_rx_sges); lnk->wr_rx_sges = NULL; + kfree(lnk->wr_tx_rdmas); + lnk->wr_tx_rdmas = NULL; kfree(lnk->wr_rx_ibs); lnk->wr_rx_ibs = NULL; kfree(lnk->wr_tx_ibs); @@ -552,10 +574,20 @@ int smc_wr_alloc_link_mem(struct smc_link *link) GFP_KERNEL); if (!link->wr_rx_ibs) goto no_mem_wr_tx_ibs; + link->wr_tx_rdmas = kcalloc(SMC_WR_BUF_CNT, + sizeof(link->wr_tx_rdmas[0]), + GFP_KERNEL); + if (!link->wr_tx_rdmas) + goto no_mem_wr_rx_ibs; + link->wr_tx_rdma_sges = kcalloc(SMC_WR_BUF_CNT, + sizeof(link->wr_tx_rdma_sges[0]), + GFP_KERNEL); + if (!link->wr_tx_rdma_sges) + goto no_mem_wr_tx_rdmas; link->wr_tx_sges = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_sges[0]), GFP_KERNEL); if (!link->wr_tx_sges) - goto no_mem_wr_rx_ibs; + goto no_mem_wr_tx_rdma_sges; link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3, sizeof(link->wr_rx_sges[0]), GFP_KERNEL); @@ -579,6 +611,10 @@ no_mem_wr_rx_sges: kfree(link->wr_rx_sges); no_mem_wr_tx_sges: kfree(link->wr_tx_sges); +no_mem_wr_tx_rdma_sges: + kfree(link->wr_tx_rdma_sges); +no_mem_wr_tx_rdmas: + kfree(link->wr_tx_rdmas); no_mem_wr_rx_ibs: kfree(link->wr_rx_ibs); no_mem_wr_tx_ibs: diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 1d85bb14fd6f..09bf32fd3959 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -85,6 +85,7 @@ void smc_wr_add_dev(struct smc_ib_device *smcibdev); int smc_wr_tx_get_free_slot(struct smc_link *link, smc_wr_tx_handler handler, struct smc_wr_buf **wr_buf, + struct smc_rdma_wr **wrs, struct smc_wr_tx_pend_priv **wr_pend_priv); int smc_wr_tx_put_slot(struct smc_link *link, struct smc_wr_tx_pend_priv *wr_pend_priv); diff --git a/net/socket.c b/net/socket.c index d51930689b98..643a1648fcc2 100644 --- a/net/socket.c +++ b/net/socket.c @@ -963,8 +963,7 @@ void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) EXPORT_SYMBOL(dlci_ioctl_set); static long sock_do_ioctl(struct net *net, struct socket *sock, - unsigned int cmd, unsigned long arg, - unsigned int ifreq_size) + unsigned int cmd, unsigned long arg) { int err; void __user *argp = (void __user *)arg; @@ -990,11 +989,11 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, } else { struct ifreq ifr; bool need_copyout; - if (copy_from_user(&ifr, argp, ifreq_size)) + if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) return -EFAULT; err = dev_ioctl(net, cmd, &ifr, &need_copyout); if (!err && need_copyout) - if (copy_to_user(argp, &ifr, ifreq_size)) + if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) return -EFAULT; } return err; @@ -1093,8 +1092,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) err = open_related_ns(&net->ns, get_net_ns); break; default: - err = sock_do_ioctl(net, sock, cmd, arg, - sizeof(struct ifreq)); + err = sock_do_ioctl(net, sock, cmd, arg); break; } return err; @@ -2802,8 +2800,7 @@ static int do_siocgstamp(struct net *net, struct socket *sock, int err; set_fs(KERNEL_DS); - err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv, - sizeof(struct compat_ifreq)); + err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv); set_fs(old_fs); if (!err) err = compat_put_timeval(&ktv, up); @@ -2819,8 +2816,7 @@ static int do_siocgstampns(struct net *net, struct socket *sock, int err; set_fs(KERNEL_DS); - err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts, - sizeof(struct compat_ifreq)); + err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts); set_fs(old_fs); if (!err) err = compat_put_timespec(&kts, up); @@ -3016,6 +3012,54 @@ static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd, return dev_ioctl(net, cmd, &ifreq, NULL); } +static int compat_ifreq_ioctl(struct net *net, struct socket *sock, + unsigned int cmd, + struct compat_ifreq __user *uifr32) +{ + struct ifreq __user *uifr; + int err; + + /* Handle the fact that while struct ifreq has the same *layout* on + * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data, + * which are handled elsewhere, it still has different *size* due to + * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit, + * resulting in struct ifreq being 32 and 40 bytes respectively). + * As a result, if the struct happens to be at the end of a page and + * the next page isn't readable/writable, we get a fault. To prevent + * that, copy back and forth to the full size. + */ + + uifr = compat_alloc_user_space(sizeof(*uifr)); + if (copy_in_user(uifr, uifr32, sizeof(*uifr32))) + return -EFAULT; + + err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr); + + if (!err) { + switch (cmd) { + case SIOCGIFFLAGS: + case SIOCGIFMETRIC: + case SIOCGIFMTU: + case SIOCGIFMEM: + case SIOCGIFHWADDR: + case SIOCGIFINDEX: + case SIOCGIFADDR: + case SIOCGIFBRDADDR: + case SIOCGIFDSTADDR: + case SIOCGIFNETMASK: + case SIOCGIFPFLAGS: + case SIOCGIFTXQLEN: + case SIOCGMIIPHY: + case SIOCGMIIREG: + case SIOCGIFNAME: + if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) + err = -EFAULT; + break; + } + } + return err; +} + static int compat_sioc_ifmap(struct net *net, unsigned int cmd, struct compat_ifreq __user *uifr32) { @@ -3131,8 +3175,7 @@ static int routing_ioctl(struct net *net, struct socket *sock, } set_fs(KERNEL_DS); - ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r, - sizeof(struct compat_ifreq)); + ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r); set_fs(old_fs); out: @@ -3232,21 +3275,22 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCSIFTXQLEN: case SIOCBRADDIF: case SIOCBRDELIF: + case SIOCGIFNAME: case SIOCSIFNAME: case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - case SIOCSARP: - case SIOCGARP: - case SIOCDARP: - case SIOCATMARK: case SIOCBONDENSLAVE: case SIOCBONDRELEASE: case SIOCBONDSETHWADDR: case SIOCBONDCHANGEACTIVE: - case SIOCGIFNAME: - return sock_do_ioctl(net, sock, cmd, arg, - sizeof(struct compat_ifreq)); + return compat_ifreq_ioctl(net, sock, cmd, argp); + + case SIOCSARP: + case SIOCGARP: + case SIOCDARP: + case SIOCATMARK: + return sock_do_ioctl(net, sock, cmd, arg); } return -ENOIOCTLCMD; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index cf51b8f9b15f..1f200119268c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -537,6 +537,99 @@ void svc_rdma_sync_reply_hdr(struct svcxprt_rdma *rdma, DMA_TO_DEVICE); } +/* If the xdr_buf has more elements than the device can + * transmit in a single RDMA Send, then the reply will + * have to be copied into a bounce buffer. + */ +static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma, + struct xdr_buf *xdr, + __be32 *wr_lst) +{ + int elements; + + /* xdr->head */ + elements = 1; + + /* xdr->pages */ + if (!wr_lst) { + unsigned int remaining; + unsigned long pageoff; + + pageoff = xdr->page_base & ~PAGE_MASK; + remaining = xdr->page_len; + while (remaining) { + ++elements; + remaining -= min_t(u32, PAGE_SIZE - pageoff, + remaining); + pageoff = 0; + } + } + + /* xdr->tail */ + if (xdr->tail[0].iov_len) + ++elements; + + /* assume 1 SGE is needed for the transport header */ + return elements >= rdma->sc_max_send_sges; +} + +/* The device is not capable of sending the reply directly. + * Assemble the elements of @xdr into the transport header + * buffer. + */ +static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt, + struct xdr_buf *xdr, __be32 *wr_lst) +{ + unsigned char *dst, *tailbase; + unsigned int taillen; + + dst = ctxt->sc_xprt_buf; + dst += ctxt->sc_sges[0].length; + + memcpy(dst, xdr->head[0].iov_base, xdr->head[0].iov_len); + dst += xdr->head[0].iov_len; + + tailbase = xdr->tail[0].iov_base; + taillen = xdr->tail[0].iov_len; + if (wr_lst) { + u32 xdrpad; + + xdrpad = xdr_padsize(xdr->page_len); + if (taillen && xdrpad) { + tailbase += xdrpad; + taillen -= xdrpad; + } + } else { + unsigned int len, remaining; + unsigned long pageoff; + struct page **ppages; + + ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); + pageoff = xdr->page_base & ~PAGE_MASK; + remaining = xdr->page_len; + while (remaining) { + len = min_t(u32, PAGE_SIZE - pageoff, remaining); + + memcpy(dst, page_address(*ppages), len); + remaining -= len; + dst += len; + pageoff = 0; + } + } + + if (taillen) + memcpy(dst, tailbase, taillen); + + ctxt->sc_sges[0].length += xdr->len; + ib_dma_sync_single_for_device(rdma->sc_pd->device, + ctxt->sc_sges[0].addr, + ctxt->sc_sges[0].length, + DMA_TO_DEVICE); + + return 0; +} + /* svc_rdma_map_reply_msg - Map the buffer holding RPC message * @rdma: controlling transport * @ctxt: send_ctxt for the Send WR @@ -559,8 +652,10 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, u32 xdr_pad; int ret; - if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges) - return -EIO; + if (svc_rdma_pull_up_needed(rdma, xdr, wr_lst)) + return svc_rdma_pull_up_reply_msg(rdma, ctxt, xdr, wr_lst); + + ++ctxt->sc_cur_sge_no; ret = svc_rdma_dma_map_buf(rdma, ctxt, xdr->head[0].iov_base, xdr->head[0].iov_len); @@ -591,8 +686,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, while (remaining) { len = min_t(u32, PAGE_SIZE - page_off, remaining); - if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges) - return -EIO; + ++ctxt->sc_cur_sge_no; ret = svc_rdma_dma_map_page(rdma, ctxt, *ppages++, page_off, len); if (ret < 0) @@ -606,8 +700,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, len = xdr->tail[0].iov_len; tail: if (len) { - if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges) - return -EIO; + ++ctxt->sc_cur_sge_no; ret = svc_rdma_dma_map_buf(rdma, ctxt, base, len); if (ret < 0) return ret; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 924c17d46903..57f86c63a463 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -419,12 +419,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) /* Transport header, head iovec, tail iovec */ newxprt->sc_max_send_sges = 3; /* Add one SGE per page list entry */ - newxprt->sc_max_send_sges += svcrdma_max_req_size / PAGE_SIZE; - if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge) { - pr_err("svcrdma: too few Send SGEs available (%d needed)\n", - newxprt->sc_max_send_sges); - goto errout; - } + newxprt->sc_max_send_sges += (svcrdma_max_req_size / PAGE_SIZE) + 1; + if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge) + newxprt->sc_max_send_sges = dev->attrs.max_send_sge; newxprt->sc_max_req_size = svcrdma_max_req_size; newxprt->sc_max_requests = svcrdma_max_requests; newxprt->sc_max_bc_requests = svcrdma_max_bc_requests; diff --git a/net/tipc/link.c b/net/tipc/link.c index ac306d17f8ad..341ecd796aa4 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1145,7 +1145,7 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, default: pr_warn("Dropping received illegal msg type\n"); kfree_skb(skb); - return false; + return true; }; } @@ -1425,6 +1425,10 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, l->rcv_unacked = 0; } else { /* RESET_MSG or ACTIVATE_MSG */ + if (mtyp == ACTIVATE_MSG) { + msg_set_dest_session_valid(hdr, 1); + msg_set_dest_session(hdr, l->peer_session); + } msg_set_max_pkt(hdr, l->advertised_mtu); strcpy(data, l->if_name); msg_set_size(hdr, INT_H_SIZE + TIPC_MAX_IF_NAME); @@ -1642,6 +1646,17 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); break; } + + /* If this endpoint was re-created while peer was ESTABLISHING + * it doesn't know current session number. Force re-synch. + */ + if (mtyp == ACTIVATE_MSG && msg_dest_session_valid(hdr) && + l->session != msg_dest_session(hdr)) { + if (less(l->session, msg_dest_session(hdr))) + l->session = msg_dest_session(hdr) + 1; + break; + } + /* ACTIVATE_MSG serves as PEER_RESET if link is already down */ if (mtyp == RESET_MSG || !link_is_up(l)) rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index a0924956bb61..d7e4b8b93f9d 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -360,6 +360,28 @@ static inline void msg_set_bcast_ack(struct tipc_msg *m, u16 n) msg_set_bits(m, 1, 0, 0xffff, n); } +/* Note: reusing bits in word 1 for ACTIVATE_MSG only, to re-synch + * link peer session number + */ +static inline bool msg_dest_session_valid(struct tipc_msg *m) +{ + return msg_bits(m, 1, 16, 0x1); +} + +static inline void msg_set_dest_session_valid(struct tipc_msg *m, bool valid) +{ + msg_set_bits(m, 1, 16, 0x1, valid); +} + +static inline u16 msg_dest_session(struct tipc_msg *m) +{ + return msg_bits(m, 1, 0, 0xffff); +} + +static inline void msg_set_dest_session(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 1, 0, 0xffff, n); +} /* * Word 2 diff --git a/net/tipc/node.c b/net/tipc/node.c index db2a6c3e0be9..2dc4919ab23c 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -830,15 +830,16 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) tipc_node_write_lock(n); if (!tipc_link_is_establishing(l)) { __tipc_node_link_down(n, &bearer_id, &xmitq, &maddr); - if (delete) { - kfree(l); - le->link = NULL; - n->link_cnt--; - } } else { /* Defuse pending tipc_node_link_up() */ + tipc_link_reset(l); tipc_link_fsm_evt(l, LINK_RESET_EVT); } + if (delete) { + kfree(l); + le->link = NULL; + n->link_cnt--; + } trace_tipc_node_link_down(n, true, "node link down or deleted!"); tipc_node_write_unlock(n); if (delete) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 8051a9164139..ae4784734547 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1645,10 +1645,10 @@ int tls_sw_recvmsg(struct sock *sk, do { bool retain_skb = false; - bool async = false; bool zc = false; int to_decrypt; int chunk = 0; + bool async; skb = tls_wait_data(sk, psock, flags, timeo, &err); if (!skb) { @@ -1674,18 +1674,21 @@ int tls_sw_recvmsg(struct sock *sk, tls_ctx->crypto_recv.info.version != TLS_1_3_VERSION) zc = true; + /* Do not use async mode if record is non-data */ + if (ctx->control == TLS_RECORD_TYPE_DATA) + async = ctx->async_capable; + else + async = false; + err = decrypt_skb_update(sk, skb, &msg->msg_iter, - &chunk, &zc, ctx->async_capable); + &chunk, &zc, async); if (err < 0 && err != -EINPROGRESS) { tls_err_abort(sk, EBADMSG); goto recv_end; } - if (err == -EINPROGRESS) { - async = true; + if (err == -EINPROGRESS) num_async++; - goto pick_next_record; - } if (!cmsg) { int cerr; @@ -1704,6 +1707,9 @@ int tls_sw_recvmsg(struct sock *sk, goto recv_end; } + if (async) + goto pick_next_record; + if (!zc) { if (rxm->full_len > len) { retain_skb = true; @@ -2215,8 +2221,12 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) if (sw_ctx_rx) { tfm = crypto_aead_tfm(sw_ctx_rx->aead_recv); - sw_ctx_rx->async_capable = - tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC; + + if (crypto_info->version == TLS_1_3_VERSION) + sw_ctx_rx->async_capable = false; + else + sw_ctx_rx->async_capable = + tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC; /* Set up strparser */ memset(&cb, 0, sizeof(cb)); diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 5d3cce9e8744..15eb5d3d4750 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -75,6 +75,9 @@ static u32 virtio_transport_get_local_cid(void) { struct virtio_vsock *vsock = virtio_vsock_get(); + if (!vsock) + return VMADDR_CID_ANY; + return vsock->guest_cid; } @@ -584,10 +587,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev) virtio_vsock_update_guest_cid(vsock); - ret = vsock_core_init(&virtio_transport.transport); - if (ret < 0) - goto out_vqs; - vsock->rx_buf_nr = 0; vsock->rx_buf_max_nr = 0; atomic_set(&vsock->queued_replies, 0); @@ -618,8 +617,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev) mutex_unlock(&the_virtio_vsock_mutex); return 0; -out_vqs: - vsock->vdev->config->del_vqs(vsock->vdev); out: kfree(vsock); mutex_unlock(&the_virtio_vsock_mutex); @@ -637,6 +634,9 @@ static void virtio_vsock_remove(struct virtio_device *vdev) flush_work(&vsock->event_work); flush_work(&vsock->send_pkt_work); + /* Reset all connected sockets when the device disappear */ + vsock_for_each_connected_socket(virtio_vsock_reset_sock); + vdev->config->reset(vdev); mutex_lock(&vsock->rx_lock); @@ -669,7 +669,6 @@ static void virtio_vsock_remove(struct virtio_device *vdev) mutex_lock(&the_virtio_vsock_mutex); the_virtio_vsock = NULL; - vsock_core_exit(); mutex_unlock(&the_virtio_vsock_mutex); vdev->config->del_vqs(vdev); @@ -702,14 +701,28 @@ static int __init virtio_vsock_init(void) virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0); if (!virtio_vsock_workqueue) return -ENOMEM; + ret = register_virtio_driver(&virtio_vsock_driver); if (ret) - destroy_workqueue(virtio_vsock_workqueue); + goto out_wq; + + ret = vsock_core_init(&virtio_transport.transport); + if (ret) + goto out_vdr; + + return 0; + +out_vdr: + unregister_virtio_driver(&virtio_vsock_driver); +out_wq: + destroy_workqueue(virtio_vsock_workqueue); return ret; + } static void __exit virtio_vsock_exit(void) { + vsock_core_exit(); unregister_virtio_driver(&virtio_vsock_driver); destroy_workqueue(virtio_vsock_workqueue); } diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index c361ce782412..c3d5ab01fba7 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1651,6 +1651,10 @@ static void vmci_transport_cleanup(struct work_struct *work) static void vmci_transport_destruct(struct vsock_sock *vsk) { + /* transport can be NULL if we hit a failure at init() time */ + if (!vmci_trans(vsk)) + return; + /* Ensure that the detach callback doesn't use the sk/vsk * we are about to destruct. */ diff --git a/net/wireless/ap.c b/net/wireless/ap.c index 882d97bdc6bf..550ac9d827fe 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -41,6 +41,8 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, cfg80211_sched_dfs_chan_update(rdev); } + schedule_work(&cfg80211_disconnect_work); + return err; } diff --git a/net/wireless/core.c b/net/wireless/core.c index 623dfe5e211c..b36ad8efb5e5 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1068,6 +1068,8 @@ static void __cfg80211_unregister_wdev(struct wireless_dev *wdev, bool sync) ASSERT_RTNL(); + flush_work(&wdev->pmsr_free_wk); + nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE); list_del_rcu(&wdev->list); diff --git a/net/wireless/core.h b/net/wireless/core.h index c5d6f3418601..f6b40563dc63 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -445,6 +445,8 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev); bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range, u32 center_freq_khz, u32 bw_khz); +extern struct work_struct cfg80211_disconnect_work; + /** * cfg80211_chandef_dfs_usable - checks if chandef is DFS usable * @wiphy: the wiphy to validate against diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a3cc039b9f55..e36437abc45a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -250,7 +250,7 @@ nl80211_pmsr_ftm_req_attr_policy[NL80211_PMSR_FTM_REQ_ATTR_MAX + 1] = { [NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION] = NLA_POLICY_MAX(NLA_U8, 15), [NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST] = - NLA_POLICY_MAX(NLA_U8, 15), + NLA_POLICY_MAX(NLA_U8, 31), [NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES] = { .type = NLA_U8 }, [NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI] = { .type = NLA_FLAG }, [NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC] = { .type = NLA_FLAG }, diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index de9286703280..0216ab555249 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -256,8 +256,7 @@ int nl80211_pmsr_start(struct sk_buff *skb, struct genl_info *info) if (err) goto out_err; } else { - memcpy(req->mac_addr, nla_data(info->attrs[NL80211_ATTR_MAC]), - ETH_ALEN); + memcpy(req->mac_addr, wdev_address(wdev), ETH_ALEN); memset(req->mac_addr_mask, 0xff, ETH_ALEN); } @@ -272,6 +271,7 @@ int nl80211_pmsr_start(struct sk_buff *skb, struct genl_info *info) req->n_peers = count; req->cookie = cfg80211_assign_cookie(rdev); + req->nl_portid = info->snd_portid; err = rdev_start_pmsr(rdev, wdev, req); if (err) @@ -530,14 +530,14 @@ free: } EXPORT_SYMBOL_GPL(cfg80211_pmsr_report); -void cfg80211_pmsr_free_wk(struct work_struct *work) +static void cfg80211_pmsr_process_abort(struct wireless_dev *wdev) { - struct wireless_dev *wdev = container_of(work, struct wireless_dev, - pmsr_free_wk); struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_pmsr_request *req, *tmp; LIST_HEAD(free_list); + lockdep_assert_held(&wdev->mtx); + spin_lock_bh(&wdev->pmsr_lock); list_for_each_entry_safe(req, tmp, &wdev->pmsr_list, list) { if (req->nl_portid) @@ -547,14 +547,22 @@ void cfg80211_pmsr_free_wk(struct work_struct *work) spin_unlock_bh(&wdev->pmsr_lock); list_for_each_entry_safe(req, tmp, &free_list, list) { - wdev_lock(wdev); rdev_abort_pmsr(rdev, wdev, req); - wdev_unlock(wdev); kfree(req); } } +void cfg80211_pmsr_free_wk(struct work_struct *work) +{ + struct wireless_dev *wdev = container_of(work, struct wireless_dev, + pmsr_free_wk); + + wdev_lock(wdev); + cfg80211_pmsr_process_abort(wdev); + wdev_unlock(wdev); +} + void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev) { struct cfg80211_pmsr_request *req; @@ -568,8 +576,8 @@ void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev) spin_unlock_bh(&wdev->pmsr_lock); if (found) - schedule_work(&wdev->pmsr_free_wk); - flush_work(&wdev->pmsr_free_wk); + cfg80211_pmsr_process_abort(wdev); + WARN_ON(!list_empty(&wdev->pmsr_list)); } diff --git a/net/wireless/sme.c b/net/wireless/sme.c index f741d8376a46..7d34cb884840 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -667,7 +667,7 @@ static void disconnect_work(struct work_struct *work) rtnl_unlock(); } -static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); +DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); /* diff --git a/net/wireless/util.c b/net/wireless/util.c index cd48cdd582c0..ec30e3732c7b 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -5,7 +5,7 @@ * Copyright 2007-2009 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2017 Intel Deutschland GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018-2019 Intel Corporation */ #include <linux/export.h> #include <linux/bitops.h> @@ -19,6 +19,7 @@ #include <linux/mpls.h> #include <linux/gcd.h> #include <linux/bitfield.h> +#include <linux/nospec.h> #include "core.h" #include "rdev-ops.h" @@ -715,20 +716,25 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb, { unsigned int dscp; unsigned char vlan_priority; + unsigned int ret; /* skb->priority values from 256->263 are magic values to * directly indicate a specific 802.1d priority. This is used * to allow 802.1d priority to be passed directly in from VLAN * tags, etc. */ - if (skb->priority >= 256 && skb->priority <= 263) - return skb->priority - 256; + if (skb->priority >= 256 && skb->priority <= 263) { + ret = skb->priority - 256; + goto out; + } if (skb_vlan_tag_present(skb)) { vlan_priority = (skb_vlan_tag_get(skb) & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; - if (vlan_priority > 0) - return vlan_priority; + if (vlan_priority > 0) { + ret = vlan_priority; + goto out; + } } switch (skb->protocol) { @@ -747,8 +753,9 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb, if (!mpls) return 0; - return (ntohl(mpls->entry) & MPLS_LS_TC_MASK) + ret = (ntohl(mpls->entry) & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT; + goto out; } case htons(ETH_P_80221): /* 802.21 is always network control traffic */ @@ -761,18 +768,24 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb, unsigned int i, tmp_dscp = dscp >> 2; for (i = 0; i < qos_map->num_des; i++) { - if (tmp_dscp == qos_map->dscp_exception[i].dscp) - return qos_map->dscp_exception[i].up; + if (tmp_dscp == qos_map->dscp_exception[i].dscp) { + ret = qos_map->dscp_exception[i].up; + goto out; + } } for (i = 0; i < 8; i++) { if (tmp_dscp >= qos_map->up[i].low && - tmp_dscp <= qos_map->up[i].high) - return i; + tmp_dscp <= qos_map->up[i].high) { + ret = i; + goto out; + } } } - return dscp >> 5; + ret = dscp >> 5; +out: + return array_index_nospec(ret, IEEE80211_NUM_TIDS); } EXPORT_SYMBOL(cfg80211_classify8021d); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 5121729b8b63..ec3a828672ef 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -352,17 +352,15 @@ static unsigned int x25_new_lci(struct x25_neigh *nb) unsigned int lci = 1; struct sock *sk; - read_lock_bh(&x25_list_lock); - - while ((sk = __x25_find_socket(lci, nb)) != NULL) { + while ((sk = x25_find_socket(lci, nb)) != NULL) { sock_put(sk); if (++lci == 4096) { lci = 0; break; } + cond_resched(); } - read_unlock_bh(&x25_list_lock); return lci; } diff --git a/samples/mei/mei-amt-version.c b/samples/mei/mei-amt-version.c index 33e67bd1dc34..32234481ad7d 100644 --- a/samples/mei/mei-amt-version.c +++ b/samples/mei/mei-amt-version.c @@ -117,7 +117,7 @@ static bool mei_init(struct mei *me, const uuid_le *guid, me->verbose = verbose; - me->fd = open("/dev/mei", O_RDWR); + me->fd = open("/dev/mei0", O_RDWR); if (me->fd == -1) { mei_err(me, "Cannot establish a handle to the Intel MEI driver\n"); goto err; diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 08c88de0ffda..11975ec8d566 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -1444,7 +1444,10 @@ check: new = aa_label_merge(label, target, GFP_KERNEL); if (IS_ERR_OR_NULL(new)) { info = "failed to build target label"; - error = PTR_ERR(new); + if (!new) + error = -ENOMEM; + else + error = PTR_ERR(new); new = NULL; perms.allow = 0; goto audit; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 2c010874329f..8db1731d046a 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1599,12 +1599,14 @@ static unsigned int apparmor_ipv4_postroute(void *priv, return apparmor_ip_postroute(priv, skb, state); } +#if IS_ENABLED(CONFIG_IPV6) static unsigned int apparmor_ipv6_postroute(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { return apparmor_ip_postroute(priv, skb, state); } +#endif static const struct nf_hook_ops apparmor_nf_ops[] = { { diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index 40013b26f671..6c0b30391ba9 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -2177,16 +2177,11 @@ snd_pcm_sframes_t __snd_pcm_lib_xfer(struct snd_pcm_substream *substream, snd_pcm_update_hw_ptr(substream); if (!is_playback && - runtime->status->state == SNDRV_PCM_STATE_PREPARED) { - if (size >= runtime->start_threshold) { - err = snd_pcm_start(substream); - if (err < 0) - goto _end_unlock; - } else { - /* nothing to do */ - err = 0; + runtime->status->state == SNDRV_PCM_STATE_PREPARED && + size >= runtime->start_threshold) { + err = snd_pcm_start(substream); + if (err < 0) goto _end_unlock; - } } avail = snd_pcm_avail(substream); diff --git a/sound/pci/hda/hda_bind.c b/sound/pci/hda/hda_bind.c index 9174f1b3a987..1ec706ced75c 100644 --- a/sound/pci/hda/hda_bind.c +++ b/sound/pci/hda/hda_bind.c @@ -115,7 +115,8 @@ static int hda_codec_driver_probe(struct device *dev) err = snd_hda_codec_build_controls(codec); if (err < 0) goto error_module; - if (codec->card->registered) { + /* only register after the bus probe finished; otherwise it's racy */ + if (!codec->bus->bus_probing && codec->card->registered) { err = snd_card_register(codec->card); if (err < 0) goto error_module; diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index e784130ea4e0..e5c49003e75f 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2185,6 +2185,7 @@ static int azx_probe_continue(struct azx *chip) int dev = chip->dev_index; int err; + to_hda_bus(bus)->bus_probing = 1; hda->probe_continued = 1; /* bind with i915 if needed */ @@ -2269,6 +2270,7 @@ out_free: if (err < 0) hda->init_failed = 1; complete_all(&hda->probe_wait); + to_hda_bus(bus)->bus_probing = 0; return err; } diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index e5bdbc245682..29882bda7632 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -8451,8 +8451,10 @@ static void ca0132_free(struct hda_codec *codec) ca0132_exit_chip(codec); snd_hda_power_down(codec); - if (IS_ENABLED(CONFIG_PCI) && spec->mem_base) +#ifdef CONFIG_PCI + if (spec->mem_base) pci_iounmap(codec->bus->pci, spec->mem_base); +#endif kfree(spec->spec_init_verbs); kfree(codec->spec); } diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 152f54137082..a4ee7656d9ee 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -924,6 +924,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x807C, "HP EliteBook 820 G3", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x80FD, "HP ProBook 640 G2", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x828c, "HP EliteBook 840 G4", CXT_FIXUP_HP_DOCK), + SND_PCI_QUIRK(0x103c, 0x83b2, "HP EliteBook 840 G5", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x83b3, "HP EliteBook 830 G5", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x83d3, "HP ProBook 640 G4", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE), diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b4f472157ebd..6df758adff84 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -117,6 +117,7 @@ struct alc_spec { int codec_variant; /* flag for other variants */ unsigned int has_alc5505_dsp:1; unsigned int no_depop_delay:1; + unsigned int done_hp_init:1; /* for PLL fix */ hda_nid_t pll_nid; @@ -514,6 +515,15 @@ static void alc_auto_init_amp(struct hda_codec *codec, int type) } } +/* get a primary headphone pin if available */ +static hda_nid_t alc_get_hp_pin(struct alc_spec *spec) +{ + if (spec->gen.autocfg.hp_pins[0]) + return spec->gen.autocfg.hp_pins[0]; + if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT) + return spec->gen.autocfg.line_out_pins[0]; + return 0; +} /* * Realtek SSID verification @@ -724,9 +734,7 @@ do_sku: * 15 : 1 --> enable the function "Mute internal speaker * when the external headphone out jack is plugged" */ - if (!spec->gen.autocfg.hp_pins[0] && - !(spec->gen.autocfg.line_out_pins[0] && - spec->gen.autocfg.line_out_type == AUTO_PIN_HP_OUT)) { + if (!alc_get_hp_pin(spec)) { hda_nid_t nid; tmp = (ass >> 11) & 0x3; /* HP to chassis */ nid = ports[tmp]; @@ -2958,7 +2966,7 @@ static void alc282_restore_default_value(struct hda_codec *codec) static void alc282_init(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp_pin_sense; int coef78; @@ -2995,7 +3003,7 @@ static void alc282_init(struct hda_codec *codec) static void alc282_shutup(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp_pin_sense; int coef78; @@ -3073,14 +3081,9 @@ static void alc283_restore_default_value(struct hda_codec *codec) static void alc283_init(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp_pin_sense; - if (!spec->gen.autocfg.hp_outs) { - if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT) - hp_pin = spec->gen.autocfg.line_out_pins[0]; - } - alc283_restore_default_value(codec); if (!hp_pin) @@ -3114,14 +3117,9 @@ static void alc283_init(struct hda_codec *codec) static void alc283_shutup(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp_pin_sense; - if (!spec->gen.autocfg.hp_outs) { - if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT) - hp_pin = spec->gen.autocfg.line_out_pins[0]; - } - if (!hp_pin) { alc269_shutup(codec); return; @@ -3155,7 +3153,7 @@ static void alc283_shutup(struct hda_codec *codec) static void alc256_init(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp_pin_sense; if (!hp_pin) @@ -3191,7 +3189,7 @@ static void alc256_init(struct hda_codec *codec) static void alc256_shutup(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp_pin_sense; if (!hp_pin) { @@ -3227,7 +3225,7 @@ static void alc256_shutup(struct hda_codec *codec) static void alc225_init(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp1_pin_sense, hp2_pin_sense; if (!hp_pin) @@ -3270,7 +3268,7 @@ static void alc225_init(struct hda_codec *codec) static void alc225_shutup(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp1_pin_sense, hp2_pin_sense; if (!hp_pin) { @@ -3314,7 +3312,7 @@ static void alc225_shutup(struct hda_codec *codec) static void alc_default_init(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp_pin_sense; if (!hp_pin) @@ -3343,7 +3341,7 @@ static void alc_default_init(struct hda_codec *codec) static void alc_default_shutup(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp_pin_sense; if (!hp_pin) { @@ -3372,6 +3370,48 @@ static void alc_default_shutup(struct hda_codec *codec) snd_hda_shutup_pins(codec); } +static void alc294_hp_init(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + hda_nid_t hp_pin = alc_get_hp_pin(spec); + int i, val; + + if (!hp_pin) + return; + + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); + + msleep(100); + + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0); + + alc_update_coef_idx(codec, 0x6f, 0x000f, 0);/* Set HP depop to manual mode */ + alc_update_coefex_idx(codec, 0x58, 0x00, 0x8000, 0x8000); /* HP depop procedure start */ + + /* Wait for depop procedure finish */ + val = alc_read_coefex_idx(codec, 0x58, 0x01); + for (i = 0; i < 20 && val & 0x0080; i++) { + msleep(50); + val = alc_read_coefex_idx(codec, 0x58, 0x01); + } + /* Set HP depop to auto mode */ + alc_update_coef_idx(codec, 0x6f, 0x000f, 0x000b); + msleep(50); +} + +static void alc294_init(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + + if (!spec->done_hp_init) { + alc294_hp_init(codec); + spec->done_hp_init = true; + } + alc_default_init(codec); +} + static void alc5505_coef_set(struct hda_codec *codec, unsigned int index_reg, unsigned int val) { @@ -4737,7 +4777,7 @@ static void alc_update_headset_mode(struct hda_codec *codec) struct alc_spec *spec = codec->spec; hda_nid_t mux_pin = spec->gen.imux_pins[spec->gen.cur_mux[0]]; - hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + hda_nid_t hp_pin = alc_get_hp_pin(spec); int new_headset_mode; @@ -5016,7 +5056,7 @@ static void alc_fixup_tpt470_dock(struct hda_codec *codec, static void alc_shutup_dell_xps13(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - int hp_pin = spec->gen.autocfg.hp_pins[0]; + int hp_pin = alc_get_hp_pin(spec); /* Prevent pop noises when headphones are plugged in */ snd_hda_codec_write(codec, hp_pin, 0, @@ -5109,7 +5149,7 @@ static void alc271_hp_gate_mic_jack(struct hda_codec *codec, if (action == HDA_FIXUP_ACT_PROBE) { int mic_pin = find_ext_mic_pin(codec); - int hp_pin = spec->gen.autocfg.hp_pins[0]; + int hp_pin = alc_get_hp_pin(spec); if (snd_BUG_ON(!mic_pin || !hp_pin)) return; @@ -5591,6 +5631,7 @@ enum { ALC294_FIXUP_ASUS_HEADSET_MIC, ALC294_FIXUP_ASUS_SPK, ALC225_FIXUP_HEADSET_JACK, + ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE, }; static const struct hda_fixup alc269_fixups[] = { @@ -6537,6 +6578,15 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_headset_jack, }, + [ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1a, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -6715,6 +6765,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1558, 0x1325, "System76 Darter Pro (darp5)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC233_FIXUP_LENOVO_MULTI_CODECS), SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE), @@ -7373,37 +7424,6 @@ static void alc269_fill_coef(struct hda_codec *codec) alc_update_coef_idx(codec, 0x4, 0, 1<<11); } -static void alc294_hp_init(struct hda_codec *codec) -{ - struct alc_spec *spec = codec->spec; - hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; - int i, val; - - if (!hp_pin) - return; - - snd_hda_codec_write(codec, hp_pin, 0, - AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); - - msleep(100); - - snd_hda_codec_write(codec, hp_pin, 0, - AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0); - - alc_update_coef_idx(codec, 0x6f, 0x000f, 0);/* Set HP depop to manual mode */ - alc_update_coefex_idx(codec, 0x58, 0x00, 0x8000, 0x8000); /* HP depop procedure start */ - - /* Wait for depop procedure finish */ - val = alc_read_coefex_idx(codec, 0x58, 0x01); - for (i = 0; i < 20 && val & 0x0080; i++) { - msleep(50); - val = alc_read_coefex_idx(codec, 0x58, 0x01); - } - /* Set HP depop to auto mode */ - alc_update_coef_idx(codec, 0x6f, 0x000f, 0x000b); - msleep(50); -} - /* */ static int patch_alc269(struct hda_codec *codec) @@ -7529,7 +7549,7 @@ static int patch_alc269(struct hda_codec *codec) spec->codec_variant = ALC269_TYPE_ALC294; spec->gen.mixer_nid = 0; /* ALC2x4 does not have any loopback mixer path */ alc_update_coef_idx(codec, 0x6b, 0x0018, (1<<4) | (1<<3)); /* UAJ MIC Vref control by verb */ - alc294_hp_init(codec); + spec->init_hook = alc294_init; break; case 0x10ec0300: spec->codec_variant = ALC269_TYPE_ALC300; @@ -7541,7 +7561,7 @@ static int patch_alc269(struct hda_codec *codec) spec->codec_variant = ALC269_TYPE_ALC700; spec->gen.mixer_nid = 0; /* ALC700 does not have any loopback mixer path */ alc_update_coef_idx(codec, 0x4a, 1 << 15, 0); /* Combo jack auto trigger control */ - alc294_hp_init(codec); + spec->init_hook = alc294_init; break; } diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c index d00734d31e04..e5b6769b9797 100644 --- a/sound/soc/codecs/hdmi-codec.c +++ b/sound/soc/codecs/hdmi-codec.c @@ -795,6 +795,8 @@ static int hdmi_codec_probe(struct platform_device *pdev) if (hcd->spdif) hcp->daidrv[i] = hdmi_spdif_dai; + dev_set_drvdata(dev, hcp); + ret = devm_snd_soc_register_component(dev, &hdmi_driver, hcp->daidrv, dai_count); if (ret) { @@ -802,8 +804,6 @@ static int hdmi_codec_probe(struct platform_device *pdev) __func__, ret); return ret; } - - dev_set_drvdata(dev, hcp); return 0; } diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index 89c43b26c379..a9b91bcfcc09 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -1778,7 +1778,9 @@ static const struct snd_soc_dapm_route rt5682_dapm_routes[] = { {"ADC Stereo1 Filter", NULL, "ADC STO1 ASRC", is_using_asrc}, {"DAC Stereo1 Filter", NULL, "DAC STO1 ASRC", is_using_asrc}, {"ADC STO1 ASRC", NULL, "AD ASRC"}, + {"ADC STO1 ASRC", NULL, "DA ASRC"}, {"ADC STO1 ASRC", NULL, "CLKDET"}, + {"DAC STO1 ASRC", NULL, "AD ASRC"}, {"DAC STO1 ASRC", NULL, "DA ASRC"}, {"DAC STO1 ASRC", NULL, "CLKDET"}, diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c index d6c62aa13041..ce00fe2f6aae 100644 --- a/sound/soc/samsung/i2s.c +++ b/sound/soc/samsung/i2s.c @@ -700,6 +700,7 @@ static int i2s_hw_params(struct snd_pcm_substream *substream, { struct i2s_dai *i2s = to_info(dai); u32 mod, mask = 0, val = 0; + struct clk *rclksrc; unsigned long flags; WARN_ON(!pm_runtime_active(dai->dev)); @@ -782,6 +783,10 @@ static int i2s_hw_params(struct snd_pcm_substream *substream, i2s->frmclk = params_rate(params); + rclksrc = i2s->clk_table[CLK_I2S_RCLK_SRC]; + if (rclksrc && !IS_ERR(rclksrc)) + i2s->rclk_srcrate = clk_get_rate(rclksrc); + return 0; } @@ -886,11 +891,6 @@ static int config_setup(struct i2s_dai *i2s) return 0; if (!(i2s->quirks & QUIRK_NO_MUXPSR)) { - struct clk *rclksrc = i2s->clk_table[CLK_I2S_RCLK_SRC]; - - if (rclksrc && !IS_ERR(rclksrc)) - i2s->rclk_srcrate = clk_get_rate(rclksrc); - psr = i2s->rclk_srcrate / i2s->frmclk / rfs; writel(((psr - 1) << 8) | PSR_PSREN, i2s->addr + I2SPSR); dev_dbg(&i2s->pdev->dev, diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index 59e250cc2e9d..e819e965e1db 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -1526,14 +1526,14 @@ int rsnd_kctrl_new(struct rsnd_mod *mod, int ret; /* - * 1) Avoid duplicate register (ex. MIXer case) - * 2) re-register if card was rebinded + * 1) Avoid duplicate register for DVC with MIX case + * 2) Allow duplicate register for MIX + * 3) re-register if card was rebinded */ list_for_each_entry(kctrl, &card->controls, list) { struct rsnd_kctrl_cfg *c = kctrl->private_data; - if (strcmp(kctrl->id.name, name) == 0 && - c->mod == mod) + if (c == cfg) return 0; } diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c index 45ef295743ec..f5afab631abb 100644 --- a/sound/soc/sh/rcar/ssi.c +++ b/sound/soc/sh/rcar/ssi.c @@ -286,7 +286,7 @@ static int rsnd_ssi_master_clk_start(struct rsnd_mod *mod, if (rsnd_ssi_is_multi_slave(mod, io)) return 0; - if (ssi->usrcnt > 1) { + if (ssi->usrcnt > 0) { if (ssi->rate != rate) { dev_err(dev, "SSI parent/child should use same rate\n"); return -EINVAL; diff --git a/sound/soc/sh/rcar/ssiu.c b/sound/soc/sh/rcar/ssiu.c index c5934adcfd01..c74991dd18ab 100644 --- a/sound/soc/sh/rcar/ssiu.c +++ b/sound/soc/sh/rcar/ssiu.c @@ -79,7 +79,7 @@ static int rsnd_ssiu_init(struct rsnd_mod *mod, break; case 9: for (i = 0; i < 4; i++) - rsnd_mod_write(mod, SSI_SYS_STATUS((i * 2) + 1), 0xf << (id * 4)); + rsnd_mod_write(mod, SSI_SYS_STATUS((i * 2) + 1), 0xf << 4); break; } diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index aae450ba4f08..50617db05c46 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -735,12 +735,17 @@ static struct snd_soc_component *soc_find_component( const struct device_node *of_node, const char *name) { struct snd_soc_component *component; + struct device_node *component_of_node; lockdep_assert_held(&client_mutex); for_each_component(component) { if (of_node) { - if (component->dev->of_node == of_node) + component_of_node = component->dev->of_node; + if (!component_of_node && component->dev->parent) + component_of_node = component->dev->parent->of_node; + + if (component_of_node == of_node) return component; } else if (name && strcmp(component->name, name) == 0) { return component; @@ -951,7 +956,7 @@ static void soc_remove_dai(struct snd_soc_dai *dai, int order) { int err; - if (!dai || !dai->probed || + if (!dai || !dai->probed || !dai->driver || dai->driver->remove_order != order) return; diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 2c4c13419539..20bad755888b 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -70,12 +70,16 @@ static int dapm_up_seq[] = { [snd_soc_dapm_clock_supply] = 1, [snd_soc_dapm_supply] = 2, [snd_soc_dapm_micbias] = 3, + [snd_soc_dapm_vmid] = 3, [snd_soc_dapm_dai_link] = 2, [snd_soc_dapm_dai_in] = 4, [snd_soc_dapm_dai_out] = 4, [snd_soc_dapm_aif_in] = 4, [snd_soc_dapm_aif_out] = 4, [snd_soc_dapm_mic] = 5, + [snd_soc_dapm_siggen] = 5, + [snd_soc_dapm_input] = 5, + [snd_soc_dapm_output] = 5, [snd_soc_dapm_mux] = 6, [snd_soc_dapm_demux] = 6, [snd_soc_dapm_dac] = 7, @@ -83,11 +87,19 @@ static int dapm_up_seq[] = { [snd_soc_dapm_mixer] = 8, [snd_soc_dapm_mixer_named_ctl] = 8, [snd_soc_dapm_pga] = 9, + [snd_soc_dapm_buffer] = 9, + [snd_soc_dapm_scheduler] = 9, + [snd_soc_dapm_effect] = 9, + [snd_soc_dapm_src] = 9, + [snd_soc_dapm_asrc] = 9, + [snd_soc_dapm_encoder] = 9, + [snd_soc_dapm_decoder] = 9, [snd_soc_dapm_adc] = 10, [snd_soc_dapm_out_drv] = 11, [snd_soc_dapm_hp] = 11, [snd_soc_dapm_spk] = 11, [snd_soc_dapm_line] = 11, + [snd_soc_dapm_sink] = 11, [snd_soc_dapm_kcontrol] = 12, [snd_soc_dapm_post] = 13, }; @@ -100,13 +112,25 @@ static int dapm_down_seq[] = { [snd_soc_dapm_spk] = 3, [snd_soc_dapm_line] = 3, [snd_soc_dapm_out_drv] = 3, + [snd_soc_dapm_sink] = 3, [snd_soc_dapm_pga] = 4, + [snd_soc_dapm_buffer] = 4, + [snd_soc_dapm_scheduler] = 4, + [snd_soc_dapm_effect] = 4, + [snd_soc_dapm_src] = 4, + [snd_soc_dapm_asrc] = 4, + [snd_soc_dapm_encoder] = 4, + [snd_soc_dapm_decoder] = 4, [snd_soc_dapm_switch] = 5, [snd_soc_dapm_mixer_named_ctl] = 5, [snd_soc_dapm_mixer] = 5, [snd_soc_dapm_dac] = 6, [snd_soc_dapm_mic] = 7, + [snd_soc_dapm_siggen] = 7, + [snd_soc_dapm_input] = 7, + [snd_soc_dapm_output] = 7, [snd_soc_dapm_micbias] = 8, + [snd_soc_dapm_vmid] = 8, [snd_soc_dapm_mux] = 9, [snd_soc_dapm_demux] = 9, [snd_soc_dapm_aif_in] = 10, diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 045ef136903d..fc79ec6927e3 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -502,6 +502,7 @@ static void remove_dai(struct snd_soc_component *comp, { struct snd_soc_dai_driver *dai_drv = container_of(dobj, struct snd_soc_dai_driver, dobj); + struct snd_soc_dai *dai; if (pass != SOC_TPLG_PASS_PCM_DAI) return; @@ -509,6 +510,10 @@ static void remove_dai(struct snd_soc_component *comp, if (dobj->ops && dobj->ops->dai_unload) dobj->ops->dai_unload(comp, dobj); + list_for_each_entry(dai, &comp->dai_list, list) + if (dai->driver == dai_drv) + dai->driver = NULL; + kfree(dai_drv->name); list_del(&dobj->list); kfree(dai_drv); diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 382847154227..db114f3977e0 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -314,6 +314,9 @@ static int search_roland_implicit_fb(struct usb_device *dev, int ifnum, return 0; } +/* Setup an implicit feedback endpoint from a quirk. Returns 0 if no quirk + * applies. Returns 1 if a quirk was found. + */ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs, struct usb_device *dev, struct usb_interface_descriptor *altsd, @@ -384,7 +387,7 @@ add_sync_ep: subs->data_endpoint->sync_master = subs->sync_endpoint; - return 0; + return 1; } static int set_sync_endpoint(struct snd_usb_substream *subs, @@ -423,6 +426,10 @@ static int set_sync_endpoint(struct snd_usb_substream *subs, if (err < 0) return err; + /* endpoint set by quirk */ + if (err > 0) + return 0; + if (altsd->bNumEndpoints < 2) return 0; diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index ebbadb3a7094..7e65fe853ee3 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1492,6 +1492,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, return SNDRV_PCM_FMTBIT_DSD_U32_BE; break; + case USB_ID(0x10cb, 0x0103): /* The Bit Opus #3; with fp->dsd_raw */ case USB_ID(0x152a, 0x85de): /* SMSL D1 DAC */ case USB_ID(0x16d0, 0x09dd): /* Encore mDSD */ case USB_ID(0x0d8c, 0x0316): /* Hegel HD12 DSD */ @@ -1566,6 +1567,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, case 0x20b1: /* XMOS based devices */ case 0x152a: /* Thesycon devices */ case 0x25ce: /* Mytek devices */ + case 0x2ab6: /* T+A devices */ if (fp->dsd_raw) return SNDRV_PCM_FMTBIT_DSD_U32_BE; break; diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 897483457bf0..f7261fad45c1 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -297,10 +297,8 @@ char *get_fdinfo(int fd, const char *key) snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", fd); fdi = fopen(path, "r"); - if (!fdi) { - p_err("can't open fdinfo: %s", strerror(errno)); + if (!fdi) return NULL; - } while ((n = getline(&line, &line_n, fdi)) > 0) { char *value; @@ -313,7 +311,6 @@ char *get_fdinfo(int fd, const char *key) value = strchr(line, '\t'); if (!value || !value[1]) { - p_err("malformed fdinfo!?"); free(line); return NULL; } @@ -326,7 +323,6 @@ char *get_fdinfo(int fd, const char *key) return line; } - p_err("key '%s' not found in fdinfo", key); free(line); fclose(fdi); return NULL; diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 2160a8ef17e5..e0c650d91784 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -358,6 +358,20 @@ static char **parse_bytes(char **argv, const char *name, unsigned char *val, return argv + i; } +/* on per cpu maps we must copy the provided value on all value instances */ +static void fill_per_cpu_value(struct bpf_map_info *info, void *value) +{ + unsigned int i, n, step; + + if (!map_is_per_cpu(info->type)) + return; + + n = get_possible_cpus(); + step = round_up(info->value_size, 8); + for (i = 1; i < n; i++) + memcpy(value + i * step, value, info->value_size); +} + static int parse_elem(char **argv, struct bpf_map_info *info, void *key, void *value, __u32 key_size, __u32 value_size, __u32 *flags, __u32 **value_fd) @@ -440,6 +454,8 @@ static int parse_elem(char **argv, struct bpf_map_info *info, argv = parse_bytes(argv, "value", value, value_size); if (!argv) return -1; + + fill_per_cpu_value(info, value); } return parse_elem(argv, info, key, NULL, key_size, value_size, @@ -511,10 +527,9 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) jsonw_uint_field(json_wtr, "owner_prog_type", prog_type); } - if (atoi(owner_jited)) - jsonw_bool_field(json_wtr, "owner_jited", true); - else - jsonw_bool_field(json_wtr, "owner_jited", false); + if (owner_jited) + jsonw_bool_field(json_wtr, "owner_jited", + !!atoi(owner_jited)); free(owner_prog_type); free(owner_jited); @@ -567,7 +582,8 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) char *owner_prog_type = get_fdinfo(fd, "owner_prog_type"); char *owner_jited = get_fdinfo(fd, "owner_jited"); - printf("\n\t"); + if (owner_prog_type || owner_jited) + printf("\n\t"); if (owner_prog_type) { unsigned int prog_type = atoi(owner_prog_type); @@ -577,10 +593,9 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) else printf("owner_prog_type %d ", prog_type); } - if (atoi(owner_jited)) - printf("owner jited"); - else - printf("owner not jited"); + if (owner_jited) + printf("owner%s jited", + atoi(owner_jited) ? "" : " not"); free(owner_prog_type); free(owner_jited); diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 0640e9bc0ada..33ed0806ccc0 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -78,13 +78,14 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) static int prog_fd_by_tag(unsigned char *tag) { - struct bpf_prog_info info = {}; - __u32 len = sizeof(info); unsigned int id = 0; int err; int fd; while (true) { + struct bpf_prog_info info = {}; + __u32 len = sizeof(info); + err = bpf_prog_get_next_id(id, &id); if (err) { p_err("%s", strerror(errno)); diff --git a/tools/iio/iio_generic_buffer.c b/tools/iio/iio_generic_buffer.c index 3040830d7797..84545666a09c 100644 --- a/tools/iio/iio_generic_buffer.c +++ b/tools/iio/iio_generic_buffer.c @@ -330,7 +330,7 @@ static const struct option longopts[] = { int main(int argc, char **argv) { - unsigned long long num_loops = 2; + long long num_loops = 2; unsigned long timedelay = 1000000; unsigned long buf_len = 128; diff --git a/tools/include/uapi/asm/bitsperlong.h b/tools/include/uapi/asm/bitsperlong.h index fd92ce8388fc..57aaeaf8e192 100644 --- a/tools/include/uapi/asm/bitsperlong.h +++ b/tools/include/uapi/asm/bitsperlong.h @@ -15,6 +15,8 @@ #include "../../arch/ia64/include/uapi/asm/bitsperlong.h" #elif defined(__riscv) #include "../../arch/riscv/include/uapi/asm/bitsperlong.h" +#elif defined(__alpha__) +#include "../../arch/alpha/include/uapi/asm/bitsperlong.h" #else #include <asm-generic/bitsperlong.h> #endif diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index f6052e70bf40..a55cb8b10165 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -268,7 +268,7 @@ struct sockaddr_in { #define IN_MULTICAST(a) IN_CLASSD(a) #define IN_MULTICAST_NET 0xe0000000 -#define IN_BADCLASS(a) ((((long int) (a) ) == 0xffffffff) +#define IN_BADCLASS(a) (((long int) (a) ) == (long int)0xffffffff) #define IN_EXPERIMENTAL(a) IN_BADCLASS((a)) #define IN_CLASSE(a) ((((long int) (a)) & 0xf0000000) == 0xf0000000) diff --git a/tools/include/uapi/linux/tc_act/tc_bpf.h b/tools/include/uapi/linux/tc_act/tc_bpf.h index 6e89a5df49a4..653c4f94f76e 100644 --- a/tools/include/uapi/linux/tc_act/tc_bpf.h +++ b/tools/include/uapi/linux/tc_act/tc_bpf.h @@ -13,8 +13,6 @@ #include <linux/pkt_cls.h> -#define TCA_ACT_BPF 13 - struct tc_act_bpf { tc_gen; }; diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt index 095aebdc5bb7..e6150f21267d 100644 --- a/tools/perf/Documentation/perf-c2c.txt +++ b/tools/perf/Documentation/perf-c2c.txt @@ -19,8 +19,11 @@ C2C stands for Cache To Cache. The perf c2c tool provides means for Shared Data C2C/HITM analysis. It allows you to track down the cacheline contentions. -The tool is based on x86's load latency and precise store facility events -provided by Intel CPUs. These events provide: +On x86, the tool is based on load latency and precise store facility events +provided by Intel CPUs. On PowerPC, the tool uses random instruction sampling +with thresholding feature. + +These events provide: - memory address of the access - type of the access (load and store details) - latency (in cycles) of the load access @@ -46,7 +49,7 @@ RECORD OPTIONS -l:: --ldlat:: - Configure mem-loads latency. + Configure mem-loads latency. (x86 only) -k:: --all-kernel:: @@ -119,11 +122,16 @@ Following perf record options are configured by default: -W,-d,--phys-data,--sample-cpu Unless specified otherwise with '-e' option, following events are monitored by -default: +default on x86: cpu/mem-loads,ldlat=30/P cpu/mem-stores/P +and following on PowerPC: + + cpu/mem-loads/ + cpu/mem-stores/ + User can pass any 'perf record' option behind '--' mark, like (to enable callchains and system wide monitoring): diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index f8d2167cf3e7..199ea0f0a6c0 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -82,7 +82,7 @@ RECORD OPTIONS Be more verbose (show counter open errors, etc) --ldlat <n>:: - Specify desired latency for loads event. + Specify desired latency for loads event. (x86 only) In addition, for report all perf report options are valid, and for record all perf record options. diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index 2e6595310420..ba98bd006488 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -2,6 +2,7 @@ libperf-y += header.o libperf-y += sym-handling.o libperf-y += kvm-stat.o libperf-y += perf_regs.o +libperf-y += mem-events.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_DWARF) += skip-callchain-idx.o diff --git a/tools/perf/arch/powerpc/util/mem-events.c b/tools/perf/arch/powerpc/util/mem-events.c new file mode 100644 index 000000000000..d08311f04e95 --- /dev/null +++ b/tools/perf/arch/powerpc/util/mem-events.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "mem-events.h" + +/* PowerPC does not support 'ldlat' parameter. */ +char *perf_mem_events__name(int i) +{ + if (i == PERF_MEM_EVENTS__LOAD) + return (char *) "cpu/mem-loads/"; + + return (char *) "cpu/mem-stores/"; +} diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index d079f36d342d..ac221f137ed2 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1681,13 +1681,8 @@ static void perf_sample__fprint_metric(struct perf_script *script, .force_header = false, }; struct perf_evsel *ev2; - static bool init; u64 val; - if (!init) { - perf_stat__init_shadow_stats(); - init = true; - } if (!evsel->stats) perf_evlist__alloc_stats(script->session->evlist, false); if (evsel_script(evsel->leader)->gnum++ == 0) @@ -1794,7 +1789,7 @@ static void process_event(struct perf_script *script, return; } - if (PRINT_FIELD(TRACE)) { + if (PRINT_FIELD(TRACE) && sample->raw_data) { event_format__fprintf(evsel->tp_format, sample->cpu, sample->raw_data, sample->raw_size, fp); } @@ -2359,6 +2354,8 @@ static int __cmd_script(struct perf_script *script) signal(SIGINT, sig_handler); + perf_stat__init_shadow_stats(); + /* override event processing functions */ if (script->show_task_events) { script->tool.comm = process_comm_event; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ed4583128b9c..b36061cd1ab8 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2514,19 +2514,30 @@ static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist) { - struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname"); + bool found = false; + struct perf_evsel *evsel, *tmp; + struct parse_events_error err = { .idx = 0, }; + int ret = parse_events(evlist, "probe:vfs_getname*", &err); - if (IS_ERR(evsel)) + if (ret) return false; - if (perf_evsel__field(evsel, "pathname") == NULL) { + evlist__for_each_entry_safe(evlist, evsel, tmp) { + if (!strstarts(perf_evsel__name(evsel), "probe:vfs_getname")) + continue; + + if (perf_evsel__field(evsel, "pathname")) { + evsel->handler = trace__vfs_getname; + found = true; + continue; + } + + list_del_init(&evsel->node); + evsel->evlist = NULL; perf_evsel__delete(evsel); - return false; } - evsel->handler = trace__vfs_getname; - perf_evlist__add(evlist, evsel); - return true; + return found; } static struct perf_evsel *perf_evsel__new_pgfault(u64 config) diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py index 44090a9a19f3..e952127e4fb0 100644 --- a/tools/perf/tests/attr.py +++ b/tools/perf/tests/attr.py @@ -1,6 +1,8 @@ #! /usr/bin/python # SPDX-License-Identifier: GPL-2.0 +from __future__ import print_function + import os import sys import glob @@ -8,7 +10,11 @@ import optparse import tempfile import logging import shutil -import ConfigParser + +try: + import configparser +except ImportError: + import ConfigParser as configparser def data_equal(a, b): # Allow multiple values in assignment separated by '|' @@ -100,20 +106,20 @@ class Event(dict): def equal(self, other): for t in Event.terms: log.debug(" [%s] %s %s" % (t, self[t], other[t])); - if not self.has_key(t) or not other.has_key(t): + if t not in self or t not in other: return False if not data_equal(self[t], other[t]): return False return True def optional(self): - if self.has_key('optional') and self['optional'] == '1': + if 'optional' in self and self['optional'] == '1': return True return False def diff(self, other): for t in Event.terms: - if not self.has_key(t) or not other.has_key(t): + if t not in self or t not in other: continue if not data_equal(self[t], other[t]): log.warning("expected %s=%s, got %s" % (t, self[t], other[t])) @@ -134,7 +140,7 @@ class Event(dict): # - expected values assignments class Test(object): def __init__(self, path, options): - parser = ConfigParser.SafeConfigParser() + parser = configparser.SafeConfigParser() parser.read(path) log.warning("running '%s'" % path) @@ -193,7 +199,7 @@ class Test(object): return True def load_events(self, path, events): - parser_event = ConfigParser.SafeConfigParser() + parser_event = configparser.SafeConfigParser() parser_event.read(path) # The event record section header contains 'event' word, @@ -207,7 +213,7 @@ class Test(object): # Read parent event if there's any if (':' in section): base = section[section.index(':') + 1:] - parser_base = ConfigParser.SafeConfigParser() + parser_base = configparser.SafeConfigParser() parser_base.read(self.test_dir + '/' + base) base_items = parser_base.items('event') @@ -322,9 +328,9 @@ def run_tests(options): for f in glob.glob(options.test_dir + '/' + options.test): try: Test(f, options).run() - except Unsup, obj: + except Unsup as obj: log.warning("unsupp %s" % obj.getMsg()) - except Notest, obj: + except Notest as obj: log.warning("skipped %s" % obj.getMsg()) def setup_log(verbose): @@ -363,7 +369,7 @@ def main(): parser.add_option("-p", "--perf", action="store", type="string", dest="perf") parser.add_option("-v", "--verbose", - action="count", dest="verbose") + default=0, action="count", dest="verbose") options, args = parser.parse_args() if args: @@ -373,7 +379,7 @@ def main(): setup_log(options.verbose) if not options.test_dir: - print 'FAILED no -d option specified' + print('FAILED no -d option specified') sys.exit(-1) if not options.test: @@ -382,8 +388,8 @@ def main(): try: run_tests(options) - except Fail, obj: - print "FAILED %s" % obj.getMsg(); + except Fail as obj: + print("FAILED %s" % obj.getMsg()) sys.exit(-1) sys.exit(0) diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c index 5f8501c68da4..5cbba70bcdd0 100644 --- a/tools/perf/tests/evsel-tp-sched.c +++ b/tools/perf/tests/evsel-tp-sched.c @@ -17,7 +17,7 @@ static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name, return -1; } - is_signed = !!(field->flags | TEP_FIELD_IS_SIGNED); + is_signed = !!(field->flags & TEP_FIELD_IS_SIGNED); if (should_be_signed && !is_signed) { pr_debug("%s: \"%s\" signedness(%d) is wrong, should be %d\n", evsel->name, name, is_signed, should_be_signed); diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 1d00e5ec7906..82e16bf84466 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -224,20 +224,24 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser) return ret; } -static int disasm__cmp(struct annotation_line *a, struct annotation_line *b) +static double disasm__cmp(struct annotation_line *a, struct annotation_line *b, + int percent_type) { int i; for (i = 0; i < a->data_nr; i++) { - if (a->data[i].percent == b->data[i].percent) + if (a->data[i].percent[percent_type] == b->data[i].percent[percent_type]) continue; - return a->data[i].percent < b->data[i].percent; + return a->data[i].percent[percent_type] - + b->data[i].percent[percent_type]; } return 0; } -static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line *al) +static void disasm_rb_tree__insert(struct annotate_browser *browser, + struct annotation_line *al) { + struct rb_root *root = &browser->entries; struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; struct annotation_line *l; @@ -246,7 +250,7 @@ static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line parent = *p; l = rb_entry(parent, struct annotation_line, rb_node); - if (disasm__cmp(al, l)) + if (disasm__cmp(al, l, browser->opts->percent_type) < 0) p = &(*p)->rb_left; else p = &(*p)->rb_right; @@ -329,7 +333,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, RB_CLEAR_NODE(&pos->al.rb_node); continue; } - disasm_rb_tree__insert(&browser->entries, &pos->al); + disasm_rb_tree__insert(browser, &pos->al); } pthread_mutex_unlock(¬es->lock); diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp index 89512504551b..39c0004f2886 100644 --- a/tools/perf/util/c++/clang.cpp +++ b/tools/perf/util/c++/clang.cpp @@ -160,7 +160,7 @@ getBPFObjectFromModule(llvm::Module *Module) } PM.run(*Module); - return std::move(Buffer); + return Buffer; } } diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 1ccbd3342069..383674f448fc 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -134,7 +134,12 @@ struct cpu_map *cpu_map__new(const char *cpu_list) if (!cpu_list) return cpu_map__read_all_cpu_map(); - if (!isdigit(*cpu_list)) + /* + * must handle the case of empty cpumap to cover + * TOPOLOGY header for NUMA nodes with no CPU + * ( e.g., because of CPU hotplug) + */ + if (!isdigit(*cpu_list) && *cpu_list != '\0') goto out; while (isdigit(*cpu_list)) { @@ -181,8 +186,10 @@ struct cpu_map *cpu_map__new(const char *cpu_list) if (nr_cpus > 0) cpus = cpu_map__trim_new(nr_cpus, tmp_cpus); - else + else if (*cpu_list != '\0') cpus = cpu_map__default_new(); + else + cpus = cpu_map__dummy_new(); invalid: free(tmp_cpus); out: diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 93f74d8d3cdd..42c3e5a229d2 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -28,7 +28,7 @@ struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { static char mem_loads_name[100]; static bool mem_loads_name__init; -char *perf_mem_events__name(int i) +char * __weak perf_mem_events__name(int i) { if (i == PERF_MEM_EVENTS__LOAD) { if (!mem_loads_name__init) { diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index 897589507d97..ea523d3b248f 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -391,8 +391,10 @@ void ordered_events__free(struct ordered_events *oe) * Current buffer might not have all the events allocated * yet, we need to free only allocated ones ... */ - list_del(&oe->buffer->list); - ordered_events_buffer__free(oe->buffer, oe->buffer_idx, oe); + if (oe->buffer) { + list_del(&oe->buffer->list); + ordered_events_buffer__free(oe->buffer, oe->buffer_idx, oe); + } /* ... and continue with the rest */ list_for_each_entry_safe(buffer, tmp, &oe->to_free, list) { diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 63f758c655d5..64d1f36dee99 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -17,6 +17,8 @@ if cc == "clang": vars[var] = sub("-mcet", "", vars[var]) if not clang_has_option("-fcf-protection"): vars[var] = sub("-fcf-protection", "", vars[var]) + if not clang_has_option("-fstack-clash-protection"): + vars[var] = sub("-fstack-clash-protection", "", vars[var]) from distutils.core import setup, Extension diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 66a84d5846c8..dca7dfae69ad 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -19,6 +19,20 @@ #define EM_AARCH64 183 /* ARM 64 bit */ #endif +#ifndef ELF32_ST_VISIBILITY +#define ELF32_ST_VISIBILITY(o) ((o) & 0x03) +#endif + +/* For ELF64 the definitions are the same. */ +#ifndef ELF64_ST_VISIBILITY +#define ELF64_ST_VISIBILITY(o) ELF32_ST_VISIBILITY (o) +#endif + +/* How to extract information held in the st_other field. */ +#ifndef GELF_ST_VISIBILITY +#define GELF_ST_VISIBILITY(val) ELF64_ST_VISIBILITY (val) +#endif + typedef Elf64_Nhdr GElf_Nhdr; #ifdef HAVE_CPLUS_DEMANGLE_SUPPORT @@ -87,6 +101,11 @@ static inline uint8_t elf_sym__type(const GElf_Sym *sym) return GELF_ST_TYPE(sym->st_info); } +static inline uint8_t elf_sym__visibility(const GElf_Sym *sym) +{ + return GELF_ST_VISIBILITY(sym->st_other); +} + #ifndef STT_GNU_IFUNC #define STT_GNU_IFUNC 10 #endif @@ -111,7 +130,9 @@ static inline int elf_sym__is_label(const GElf_Sym *sym) return elf_sym__type(sym) == STT_NOTYPE && sym->st_name != 0 && sym->st_shndx != SHN_UNDEF && - sym->st_shndx != SHN_ABS; + sym->st_shndx != SHN_ABS && + elf_sym__visibility(sym) != STV_HIDDEN && + elf_sym__visibility(sym) != STV_INTERNAL; } static bool elf_sym__filter(GElf_Sym *sym) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 1a2bd15c5b6e..400ee81a3043 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -10,6 +10,7 @@ TARGETS += drivers/dma-buf TARGETS += efivarfs TARGETS += exec TARGETS += filesystems +TARGETS += filesystems/binderfs TARGETS += firmware TARGETS += ftrace TARGETS += futex diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index 197347031038..a29206ebbd13 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h @@ -13,7 +13,7 @@ static inline unsigned int bpf_num_possible_cpus(void) unsigned int start, end, possible_cpus = 0; char buff[128]; FILE *fp; - int n; + int len, n, i, j = 0; fp = fopen(fcpu, "r"); if (!fp) { @@ -21,17 +21,27 @@ static inline unsigned int bpf_num_possible_cpus(void) exit(1); } - while (fgets(buff, sizeof(buff), fp)) { - n = sscanf(buff, "%u-%u", &start, &end); - if (n == 0) { - printf("Failed to retrieve # possible CPUs!\n"); - exit(1); - } else if (n == 1) { - end = start; + if (!fgets(buff, sizeof(buff), fp)) { + printf("Failed to read %s!\n", fcpu); + exit(1); + } + + len = strlen(buff); + for (i = 0; i <= len; i++) { + if (buff[i] == ',' || buff[i] == '\0') { + buff[i] = '\0'; + n = sscanf(&buff[j], "%u-%u", &start, &end); + if (n <= 0) { + printf("Failed to retrieve # possible CPUs!\n"); + exit(1); + } else if (n == 1) { + end = start; + } + possible_cpus += end - start + 1; + j = i + 1; } - possible_cpus = start == 0 ? end + 1 : 0; - break; } + fclose(fp); return possible_cpus; diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index bbcacba39590..02d314383a9c 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -1902,13 +1902,12 @@ static struct btf_raw_test raw_tests[] = { }, { - .descr = "func proto (CONST=>TYPEDEF=>FUNC_PROTO)", + .descr = "func proto (TYPEDEF=>FUNC_PROTO)", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ - BTF_CONST_ENC(4), /* [3] */ - BTF_TYPEDEF_ENC(NAME_TBD, 5), /* [4] */ - BTF_FUNC_PROTO_ENC(0, 2), /* [5] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */ + BTF_FUNC_PROTO_ENC(0, 2), /* [4] */ BTF_FUNC_PROTO_ARG_ENC(0, 1), BTF_FUNC_PROTO_ARG_ENC(0, 2), BTF_END_RAW, @@ -1922,8 +1921,6 @@ static struct btf_raw_test raw_tests[] = { .key_type_id = 1, .value_type_id = 1, .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid type_id", }, { diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh index bab13dd025a6..0d26b5e3f966 100755 --- a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh +++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh @@ -37,6 +37,10 @@ prerequisite() exit $ksft_skip fi + present_cpus=`cat $SYSFS/devices/system/cpu/present` + present_max=${present_cpus##*-} + echo "present_cpus = $present_cpus present_max = $present_max" + echo -e "\t Cpus in online state: $online_cpus" offline_cpus=`cat $SYSFS/devices/system/cpu/offline` @@ -151,6 +155,8 @@ online_cpus=0 online_max=0 offline_cpus=0 offline_max=0 +present_cpus=0 +present_max=0 while getopts e:ahp: opt; do case $opt in @@ -190,9 +196,10 @@ if [ $allcpus -eq 0 ]; then online_cpu_expect_success $online_max if [[ $offline_cpus -gt 0 ]]; then - echo -e "\t offline to online to offline: cpu $offline_max" - online_cpu_expect_success $offline_max - offline_cpu_expect_success $offline_max + echo -e "\t offline to online to offline: cpu $present_max" + online_cpu_expect_success $present_max + offline_cpu_expect_success $present_max + online_cpu $present_max fi exit 0 else diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index f1922bf597b0..4d5b880b1d96 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -9,11 +9,11 @@ lib_dir=$(dirname $0)/../../../../net/forwarding ALL_TESTS="single_mask_test identical_filters_test two_masks_test \ multiple_masks_test ctcam_edge_cases_test delta_simple_test \ - delta_two_masks_one_key_test bloom_simple_test \ - bloom_complex_test bloom_delta_test" + delta_two_masks_one_key_test delta_simple_rehash_test \ + bloom_simple_test bloom_complex_test bloom_delta_test" NUM_NETIFS=2 source $lib_dir/tc_common.sh -source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh tcflags="skip_hw" @@ -494,6 +494,77 @@ delta_two_masks_one_key_test() log_test "delta two masks one key test ($tcflags)" } +delta_simple_rehash_test() +{ + RET=0 + + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + + devlink dev param set $DEVLINK_DEV \ + name acl_region_rehash_interval cmode runtime value 0 + check_err $? "Failed to set ACL region rehash interval" + + tp_record_all mlxsw:mlxsw_sp_acl_tcam_vregion_rehash 7 + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash + check_fail $? "Rehash trace was hit even when rehash should be disabled" + + devlink dev param set $DEVLINK_DEV \ + name acl_region_rehash_interval cmode runtime value 3000 + check_err $? "Failed to set ACL region rehash interval" + + sleep 1 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.1.0/25 action drop + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.0.2.2 action drop + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags dst_ip 192.0.3.0/24 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched a wrong filter" + + tc_check_packets "dev $h2 ingress" 103 1 + check_fail $? "Matched a wrong filter" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter" + + tp_record_all mlxsw:* 3 + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash + check_err $? "Rehash trace was not hit" + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate + check_err $? "Migrate trace was not hit" + tp_record_all mlxsw:* 3 + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash + check_err $? "Rehash trace was not hit" + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate + check_fail $? "Migrate trace was hit when no migration should happen" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched a wrong filter after rehash" + + tc_check_packets "dev $h2 ingress" 103 1 + check_fail $? "Matched a wrong filter after rehash" + + tc_check_packets "dev $h2 ingress" 102 2 + check_err $? "Did not match on correct filter after rehash" + + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + + log_test "delta simple rehash test ($tcflags)" +} + bloom_simple_test() { # Bloom filter requires that the eRP table is used. This test diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index a0a80e1a69e8..e7ffc79561b7 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -2,7 +2,6 @@ # SPDX-License-Identifier: GPL-2.0 NUM_NETIFS=6 -source ../../../../net/forwarding/lib.sh source ../../../../net/forwarding/tc_common.sh source devlink_lib_spectrum.sh diff --git a/tools/testing/selftests/filesystems/binderfs/.gitignore b/tools/testing/selftests/filesystems/binderfs/.gitignore new file mode 100644 index 000000000000..8a5d9bf63dd4 --- /dev/null +++ b/tools/testing/selftests/filesystems/binderfs/.gitignore @@ -0,0 +1 @@ +binderfs_test diff --git a/tools/testing/selftests/filesystems/binderfs/Makefile b/tools/testing/selftests/filesystems/binderfs/Makefile new file mode 100644 index 000000000000..58cb659b56b4 --- /dev/null +++ b/tools/testing/selftests/filesystems/binderfs/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += -I../../../../../usr/include/ +TEST_GEN_PROGS := binderfs_test + +include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c new file mode 100644 index 000000000000..8c2ed962e1c7 --- /dev/null +++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#include <linux/android/binder.h> +#include <linux/android/binderfs.h> +#include "../../kselftest.h" + +static ssize_t write_nointr(int fd, const void *buf, size_t count) +{ + ssize_t ret; +again: + ret = write(fd, buf, count); + if (ret < 0 && errno == EINTR) + goto again; + + return ret; +} + +static void write_to_file(const char *filename, const void *buf, size_t count, + int allowed_errno) +{ + int fd, saved_errno; + ssize_t ret; + + fd = open(filename, O_WRONLY | O_CLOEXEC); + if (fd < 0) + ksft_exit_fail_msg("%s - Failed to open file %s\n", + strerror(errno), filename); + + ret = write_nointr(fd, buf, count); + if (ret < 0) { + if (allowed_errno && (errno == allowed_errno)) { + close(fd); + return; + } + + goto on_error; + } + + if ((size_t)ret != count) + goto on_error; + + close(fd); + return; + +on_error: + saved_errno = errno; + close(fd); + errno = saved_errno; + + if (ret < 0) + ksft_exit_fail_msg("%s - Failed to write to file %s\n", + strerror(errno), filename); + + ksft_exit_fail_msg("Failed to write to file %s\n", filename); +} + +static void change_to_userns(void) +{ + int ret; + uid_t uid; + gid_t gid; + /* {g,u}id_map files only allow a max of 4096 bytes written to them */ + char idmap[4096]; + + uid = getuid(); + gid = getgid(); + + ret = unshare(CLONE_NEWUSER); + if (ret < 0) + ksft_exit_fail_msg("%s - Failed to unshare user namespace\n", + strerror(errno)); + + write_to_file("/proc/self/setgroups", "deny", strlen("deny"), ENOENT); + + ret = snprintf(idmap, sizeof(idmap), "0 %d 1", uid); + if (ret < 0 || (size_t)ret >= sizeof(idmap)) + ksft_exit_fail_msg("%s - Failed to prepare uid mapping\n", + strerror(errno)); + + write_to_file("/proc/self/uid_map", idmap, strlen(idmap), 0); + + ret = snprintf(idmap, sizeof(idmap), "0 %d 1", gid); + if (ret < 0 || (size_t)ret >= sizeof(idmap)) + ksft_exit_fail_msg("%s - Failed to prepare uid mapping\n", + strerror(errno)); + + write_to_file("/proc/self/gid_map", idmap, strlen(idmap), 0); + + ret = setgid(0); + if (ret) + ksft_exit_fail_msg("%s - Failed to setgid(0)\n", + strerror(errno)); + + ret = setuid(0); + if (ret) + ksft_exit_fail_msg("%s - Failed to setgid(0)\n", + strerror(errno)); +} + +static void change_to_mountns(void) +{ + int ret; + + ret = unshare(CLONE_NEWNS); + if (ret < 0) + ksft_exit_fail_msg("%s - Failed to unshare mount namespace\n", + strerror(errno)); + + ret = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0); + if (ret < 0) + ksft_exit_fail_msg("%s - Failed to mount / as private\n", + strerror(errno)); +} + +static void rmdir_protect_errno(const char *dir) +{ + int saved_errno = errno; + (void)rmdir(dir); + errno = saved_errno; +} + +static void __do_binderfs_test(void) +{ + int fd, ret, saved_errno; + size_t len; + ssize_t wret; + bool keep = false; + struct binderfs_device device = { 0 }; + struct binder_version version = { 0 }; + + change_to_mountns(); + + ret = mkdir("/dev/binderfs", 0755); + if (ret < 0) { + if (errno != EEXIST) + ksft_exit_fail_msg( + "%s - Failed to create binderfs mountpoint\n", + strerror(errno)); + + keep = true; + } + + ret = mount(NULL, "/dev/binderfs", "binder", 0, 0); + if (ret < 0) { + if (errno != ENODEV) + ksft_exit_fail_msg("%s - Failed to mount binderfs\n", + strerror(errno)); + + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_skip( + "The Android binderfs filesystem is not available\n"); + } + + /* binderfs mount test passed */ + ksft_inc_pass_cnt(); + + memcpy(device.name, "my-binder", strlen("my-binder")); + + fd = open("/dev/binderfs/binder-control", O_RDONLY | O_CLOEXEC); + if (fd < 0) + ksft_exit_fail_msg( + "%s - Failed to open binder-control device\n", + strerror(errno)); + + ret = ioctl(fd, BINDER_CTL_ADD, &device); + saved_errno = errno; + close(fd); + errno = saved_errno; + if (ret < 0) { + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_fail_msg( + "%s - Failed to allocate new binder device\n", + strerror(errno)); + } + + ksft_print_msg( + "Allocated new binder device with major %d, minor %d, and name %s\n", + device.major, device.minor, device.name); + + /* binder device allocation test passed */ + ksft_inc_pass_cnt(); + + fd = open("/dev/binderfs/my-binder", O_CLOEXEC | O_RDONLY); + if (fd < 0) { + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_fail_msg("%s - Failed to open my-binder device\n", + strerror(errno)); + } + + ret = ioctl(fd, BINDER_VERSION, &version); + saved_errno = errno; + close(fd); + errno = saved_errno; + if (ret < 0) { + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_fail_msg( + "%s - Failed to open perform BINDER_VERSION request\n", + strerror(errno)); + } + + ksft_print_msg("Detected binder version: %d\n", + version.protocol_version); + + /* binder transaction with binderfs binder device passed */ + ksft_inc_pass_cnt(); + + ret = unlink("/dev/binderfs/my-binder"); + if (ret < 0) { + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_fail_msg("%s - Failed to delete binder device\n", + strerror(errno)); + } + + /* binder device removal passed */ + ksft_inc_pass_cnt(); + + ret = unlink("/dev/binderfs/binder-control"); + if (!ret) { + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_fail_msg("Managed to delete binder-control device\n"); + } else if (errno != EPERM) { + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_fail_msg( + "%s - Failed to delete binder-control device but exited with unexpected error code\n", + strerror(errno)); + } + + /* binder-control device removal failed as expected */ + ksft_inc_xfail_cnt(); + +on_error: + ret = umount2("/dev/binderfs", MNT_DETACH); + keep ?: rmdir_protect_errno("/dev/binderfs"); + if (ret < 0) + ksft_exit_fail_msg("%s - Failed to unmount binderfs\n", + strerror(errno)); + + /* binderfs unmount test passed */ + ksft_inc_pass_cnt(); +} + +static void binderfs_test_privileged() +{ + if (geteuid() != 0) + ksft_print_msg( + "Tests are not run as root. Skipping privileged tests\n"); + else + __do_binderfs_test(); +} + +static void binderfs_test_unprivileged() +{ + change_to_userns(); + __do_binderfs_test(); +} + +int main(int argc, char *argv[]) +{ + binderfs_test_privileged(); + binderfs_test_unprivileged(); + ksft_exit_pass(); +} diff --git a/tools/testing/selftests/filesystems/binderfs/config b/tools/testing/selftests/filesystems/binderfs/config new file mode 100644 index 000000000000..02dd6cc9cf99 --- /dev/null +++ b/tools/testing/selftests/filesystems/binderfs/config @@ -0,0 +1,3 @@ +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDERFS=y +CONFIG_ANDROID_BINDER_IPC=y diff --git a/tools/testing/selftests/ir/Makefile b/tools/testing/selftests/ir/Makefile index f4ba8eb84b95..ad06489c22a5 100644 --- a/tools/testing/selftests/ir/Makefile +++ b/tools/testing/selftests/ir/Makefile @@ -1,5 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 TEST_PROGS := ir_loopback.sh TEST_GEN_PROGS_EXTENDED := ir_loopback +APIDIR := ../../../include/uapi +CFLAGS += -Wall -O2 -I$(APIDIR) include ../lib.mk diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index f8f3e90700c0..1e6d14d2825c 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -21,6 +21,6 @@ TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls KSFT_KHDR_INSTALL := 1 include ../lib.mk -$(OUTPUT)/reuseport_bpf_numa: LDFLAGS += -lnuma +$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma $(OUTPUT)/tcp_mmap: LDFLAGS += -lpthread $(OUTPUT)/tcp_inq: LDFLAGS += -lpthread diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index 5cd2aed97958..da96eff72a8e 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -10,3 +10,5 @@ CONFIG_NET_CLS_FLOWER=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_ACT_GACT=m CONFIG_VETH=m +CONFIG_NAMESPACES=y +CONFIG_NET_NS=y diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 47ed6cef93fb..c9ff2b47bd1c 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for netfilter selftests -TEST_PROGS := nft_trans_stress.sh +TEST_PROGS := nft_trans_stress.sh nft_nat.sh include ../lib.mk diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config index 1017313e41a8..59caa8f71cd8 100644 --- a/tools/testing/selftests/netfilter/config +++ b/tools/testing/selftests/netfilter/config @@ -1,2 +1,2 @@ CONFIG_NET_NS=y -NF_TABLES_INET=y +CONFIG_NF_TABLES_INET=y diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh new file mode 100755 index 000000000000..8ec76681605c --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -0,0 +1,762 @@ +#!/bin/bash +# +# This test is for basic NAT functionality: snat, dnat, redirect, masquerade. +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip netns add ns0 +ip netns add ns1 +ip netns add ns2 + +ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 +ip link add veth1 netns ns0 type veth peer name eth0 netns ns2 + +ip -net ns0 link set lo up +ip -net ns0 link set veth0 up +ip -net ns0 addr add 10.0.1.1/24 dev veth0 +ip -net ns0 addr add dead:1::1/64 dev veth0 + +ip -net ns0 link set veth1 up +ip -net ns0 addr add 10.0.2.1/24 dev veth1 +ip -net ns0 addr add dead:2::1/64 dev veth1 + +for i in 1 2; do + ip -net ns$i link set lo up + ip -net ns$i link set eth0 up + ip -net ns$i addr add 10.0.$i.99/24 dev eth0 + ip -net ns$i route add default via 10.0.$i.1 + ip -net ns$i addr add dead:$i::99/64 dev eth0 + ip -net ns$i route add default via dead:$i::1 +done + +bad_counter() +{ + local ns=$1 + local counter=$2 + local expect=$3 + + echo "ERROR: $counter counter in $ns has unexpected value (expected $expect)" 1>&2 + ip netns exec $ns nft list counter inet filter $counter 1>&2 +} + +check_counters() +{ + ns=$1 + local lret=0 + + cnt=$(ip netns exec $ns nft list counter inet filter ns0in | grep -q "packets 1 bytes 84") + if [ $? -ne 0 ]; then + bad_counter $ns ns0in "packets 1 bytes 84" + lret=1 + fi + cnt=$(ip netns exec $ns nft list counter inet filter ns0out | grep -q "packets 1 bytes 84") + if [ $? -ne 0 ]; then + bad_counter $ns ns0out "packets 1 bytes 84" + lret=1 + fi + + expect="packets 1 bytes 104" + cnt=$(ip netns exec $ns nft list counter inet filter ns0in6 | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter $ns ns0in6 "$expect" + lret=1 + fi + cnt=$(ip netns exec $ns nft list counter inet filter ns0out6 | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter $ns ns0out6 "$expect" + lret=1 + fi + + return $lret +} + +check_ns0_counters() +{ + local ns=$1 + local lret=0 + + cnt=$(ip netns exec ns0 nft list counter inet filter ns0in | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0in "packets 0 bytes 0" + lret=1 + fi + + cnt=$(ip netns exec ns0 nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0in6 "packets 0 bytes 0" + lret=1 + fi + + cnt=$(ip netns exec ns0 nft list counter inet filter ns0out | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0out "packets 0 bytes 0" + lret=1 + fi + cnt=$(ip netns exec ns0 nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0out6 "packets 0 bytes 0" + lret=1 + fi + + for dir in "in" "out" ; do + expect="packets 1 bytes 84" + cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 $ns$dir "$expect" + lret=1 + fi + + expect="packets 1 bytes 104" + cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir}6 | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 $ns$dir6 "$expect" + lret=1 + fi + done + + return $lret +} + +reset_counters() +{ + for i in 0 1 2;do + ip netns exec ns$i nft reset counters inet > /dev/null + done +} + +test_local_dnat6() +{ + local lret=0 +ip netns exec ns0 nft -f - <<EOF +table ip6 nat { + chain output { + type nat hook output priority 0; policy accept; + ip6 daddr dead:1::99 dnat to dead:2::99 + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add add ip6 dnat hook" + return $ksft_skip + fi + + # ping netns1, expect rewrite to netns2 + ip netns exec ns0 ping -q -c 1 dead:1::99 > /dev/null + if [ $? -ne 0 ]; then + lret=1 + echo "ERROR: ping6 failed" + return $lret + fi + + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns1$dir "$expect" + lret=1 + fi + done + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns2$dir "$expect" + lret=1 + fi + done + + # expect 0 count in ns1 + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # expect 1 packet in ns2 + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns0$dir "$expect" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ipv6 ping to ns1 was NATted to ns2" + ip netns exec ns0 nft flush chain ip6 nat output + + return $lret +} + +test_local_dnat() +{ + local lret=0 +ip netns exec ns0 nft -f - <<EOF +table ip nat { + chain output { + type nat hook output priority 0; policy accept; + ip daddr 10.0.1.99 dnat to 10.0.2.99 + } +} +EOF + # ping netns1, expect rewrite to netns2 + ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null + if [ $? -ne 0 ]; then + lret=1 + echo "ERROR: ping failed" + return $lret + fi + + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns1$dir "$expect" + lret=1 + fi + done + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns2$dir "$expect" + lret=1 + fi + done + + # expect 0 count in ns1 + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # expect 1 packet in ns2 + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns0$dir "$expect" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ping to ns1 was NATted to ns2" + + ip netns exec ns0 nft flush chain ip nat output + + reset_counters + ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null + if [ $? -ne 0 ]; then + lret=1 + echo "ERROR: ping failed" + return $lret + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns1$dir "$expect" + lret=1 + fi + done + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns2$dir "$expect" + lret=1 + fi + done + + # expect 1 count in ns1 + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0$dir "$expect" + lret=1 + fi + done + + # expect 0 packet in ns2 + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns2$dir "$expect" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ping to ns1 OK after nat output chain flush" + + return $lret +} + + +test_masquerade6() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 via ipv6" + return 1 + lret=1 + fi + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add masquerading rule +ip netns exec ns0 nft -f - <<EOF +table ip6 nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oif veth0 masquerade + } +} +EOF + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerading" + lret=1 + fi + + # ns1 should have seen packets from ns0, due to masquerade + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft flush chain ip6 nat postrouting + if [ $? -ne 0 ]; then + echo "ERROR: Could not flush ip6 nat postrouting" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IPv6 masquerade for ns2" + + return $lret +} + +test_masquerade() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: canot ping ns1 from ns2" + lret=1 + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add masquerading rule +ip netns exec ns0 nft -f - <<EOF +table ip nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oif veth0 masquerade + } +} +EOF + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip masquerading" + lret=1 + fi + + # ns1 should have seen packets from ns0, due to masquerade + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft flush chain ip nat postrouting + if [ $? -ne 0 ]; then + echo "ERROR: Could not flush nat postrouting" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IP masquerade for ns2" + + return $lret +} + +test_redirect6() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannnot ping ns1 from ns2 via ipv6" + lret=1 + fi + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add redirect rule +ip netns exec ns0 nft -f - <<EOF +table ip6 nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + meta iif veth1 meta l4proto icmpv6 ip6 saddr dead:2::99 ip6 daddr dead:1::99 redirect + } +} +EOF + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip6 redirect" + lret=1 + fi + + # ns1 should have seen no packets from ns2, due to redirection + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # ns0 should have seen packets from ns2, due to masquerade + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft delete table ip6 nat + if [ $? -ne 0 ]; then + echo "ERROR: Could not delete ip6 nat table" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IPv6 redirection for ns2" + + return $lret +} + +test_redirect() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2" + lret=1 + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add redirect rule +ip netns exec ns0 nft -f - <<EOF +table ip nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + meta iif veth1 ip protocol icmp ip saddr 10.0.2.99 ip daddr 10.0.1.99 redirect + } +} +EOF + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip redirect" + lret=1 + fi + + # ns1 should have seen no packets from ns2, due to redirection + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # ns0 should have seen packets from ns2, due to masquerade + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft delete table ip nat + if [ $? -ne 0 ]; then + echo "ERROR: Could not delete nat table" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IP redirection for ns2" + + return $lret +} + + +# ip netns exec ns0 ping -c 1 -q 10.0.$i.99 +for i in 0 1 2; do +ip netns exec ns$i nft -f - <<EOF +table inet filter { + counter ns0in {} + counter ns1in {} + counter ns2in {} + + counter ns0out {} + counter ns1out {} + counter ns2out {} + + counter ns0in6 {} + counter ns1in6 {} + counter ns2in6 {} + + counter ns0out6 {} + counter ns1out6 {} + counter ns2out6 {} + + map nsincounter { + type ipv4_addr : counter + elements = { 10.0.1.1 : "ns0in", + 10.0.2.1 : "ns0in", + 10.0.1.99 : "ns1in", + 10.0.2.99 : "ns2in" } + } + + map nsincounter6 { + type ipv6_addr : counter + elements = { dead:1::1 : "ns0in6", + dead:2::1 : "ns0in6", + dead:1::99 : "ns1in6", + dead:2::99 : "ns2in6" } + } + + map nsoutcounter { + type ipv4_addr : counter + elements = { 10.0.1.1 : "ns0out", + 10.0.2.1 : "ns0out", + 10.0.1.99: "ns1out", + 10.0.2.99: "ns2out" } + } + + map nsoutcounter6 { + type ipv6_addr : counter + elements = { dead:1::1 : "ns0out6", + dead:2::1 : "ns0out6", + dead:1::99 : "ns1out6", + dead:2::99 : "ns2out6" } + } + + chain input { + type filter hook input priority 0; policy accept; + counter name ip saddr map @nsincounter + icmpv6 type { "echo-request", "echo-reply" } counter name ip6 saddr map @nsincounter6 + } + chain output { + type filter hook output priority 0; policy accept; + counter name ip daddr map @nsoutcounter + icmpv6 type { "echo-request", "echo-reply" } counter name ip6 daddr map @nsoutcounter6 + } +} +EOF +done + +sleep 3 +# test basic connectivity +for i in 1 2; do + ip netns exec ns0 ping -c 1 -q 10.0.$i.99 > /dev/null + if [ $? -ne 0 ];then + echo "ERROR: Could not reach other namespace(s)" 1>&2 + ret=1 + fi + + ip netns exec ns0 ping -c 1 -q dead:$i::99 > /dev/null + if [ $? -ne 0 ];then + echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2 + ret=1 + fi + check_counters ns$i + if [ $? -ne 0 ]; then + ret=1 + fi + + check_ns0_counters ns$i + if [ $? -ne 0 ]; then + ret=1 + fi + reset_counters +done + +if [ $ret -eq 0 ];then + echo "PASS: netns routing/connectivity: ns0 can reach ns1 and ns2" +fi + +reset_counters +test_local_dnat +test_local_dnat6 + +reset_counters +test_masquerade +test_masquerade6 + +reset_counters +test_redirect +test_redirect6 + +for i in 0 1 2; do ip netns del ns$i;done + +exit $ret diff --git a/tools/testing/selftests/networking/timestamping/Makefile b/tools/testing/selftests/networking/timestamping/Makefile index 9050eeea5f5f..1de8bd8ccf5d 100644 --- a/tools/testing/selftests/networking/timestamping/Makefile +++ b/tools/testing/selftests/networking/timestamping/Makefile @@ -9,6 +9,3 @@ all: $(TEST_PROGS) top_srcdir = ../../../../.. KSFT_KHDR_INSTALL := 1 include ../../lib.mk - -clean: - rm -fr $(TEST_GEN_FILES) diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore index 82121a81681f..29bac5ef9a93 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore @@ -10,4 +10,5 @@ /proc-uptime-002 /read /self +/setns-dcache /thread-self diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile index 1c12c34cf85d..434d033ee067 100644 --- a/tools/testing/selftests/proc/Makefile +++ b/tools/testing/selftests/proc/Makefile @@ -14,6 +14,7 @@ TEST_GEN_PROGS += proc-uptime-001 TEST_GEN_PROGS += proc-uptime-002 TEST_GEN_PROGS += read TEST_GEN_PROGS += self +TEST_GEN_PROGS += setns-dcache TEST_GEN_PROGS += thread-self include ../lib.mk diff --git a/tools/testing/selftests/proc/setns-dcache.c b/tools/testing/selftests/proc/setns-dcache.c new file mode 100644 index 000000000000..60ab197a73fc --- /dev/null +++ b/tools/testing/selftests/proc/setns-dcache.c @@ -0,0 +1,129 @@ +/* + * Copyright © 2019 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* + * Test that setns(CLONE_NEWNET) points to new /proc/net content even + * if old one is in dcache. + * + * FIXME /proc/net/unix is under CONFIG_UNIX which can be disabled. + */ +#undef NDEBUG +#include <assert.h> +#include <errno.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <sys/socket.h> + +static pid_t pid = -1; + +static void f(void) +{ + if (pid > 0) { + kill(pid, SIGTERM); + } +} + +int main(void) +{ + int fd[2]; + char _ = 0; + int nsfd; + + atexit(f); + + /* Check for priviledges and syscall availability straight away. */ + if (unshare(CLONE_NEWNET) == -1) { + if (errno == ENOSYS || errno == EPERM) { + return 4; + } + return 1; + } + /* Distinguisher between two otherwise empty net namespaces. */ + if (socket(AF_UNIX, SOCK_STREAM, 0) == -1) { + return 1; + } + + if (pipe(fd) == -1) { + return 1; + } + + pid = fork(); + if (pid == -1) { + return 1; + } + + if (pid == 0) { + if (unshare(CLONE_NEWNET) == -1) { + return 1; + } + + if (write(fd[1], &_, 1) != 1) { + return 1; + } + + pause(); + + return 0; + } + + if (read(fd[0], &_, 1) != 1) { + return 1; + } + + { + char buf[64]; + snprintf(buf, sizeof(buf), "/proc/%u/ns/net", pid); + nsfd = open(buf, O_RDONLY); + if (nsfd == -1) { + return 1; + } + } + + /* Reliably pin dentry into dcache. */ + (void)open("/proc/net/unix", O_RDONLY); + + if (setns(nsfd, CLONE_NEWNET) == -1) { + return 1; + } + + kill(pid, SIGTERM); + pid = 0; + + { + char buf[4096]; + ssize_t rv; + int fd; + + fd = open("/proc/net/unix", O_RDONLY); + if (fd == -1) { + return 1; + } + +#define S "Num RefCount Protocol Flags Type St Inode Path\n" + rv = read(fd, buf, sizeof(buf)); + + assert(rv == strlen(S)); + assert(memcmp(buf, S, strlen(S)) == 0); + } + + return 0; +} diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 496a9a8c773a..7e632b465ab4 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1608,7 +1608,16 @@ TEST_F(TRACE_poke, getpid_runs_normally) #ifdef SYSCALL_NUM_RET_SHARE_REG # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) #else -# define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(val, action) +# define EXPECT_SYSCALL_RETURN(val, action) \ + do { \ + errno = 0; \ + if (val < 0) { \ + EXPECT_EQ(-1, action); \ + EXPECT_EQ(-(val), errno); \ + } else { \ + EXPECT_EQ(val, action); \ + } \ + } while (0) #endif /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for @@ -1647,7 +1656,7 @@ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) /* Architecture-specific syscall changing routine. */ void change_syscall(struct __test_metadata *_metadata, - pid_t tracee, int syscall) + pid_t tracee, int syscall, int result) { int ret; ARCH_REGS regs; @@ -1706,7 +1715,7 @@ void change_syscall(struct __test_metadata *_metadata, #ifdef SYSCALL_NUM_RET_SHARE_REG TH_LOG("Can't modify syscall return on this architecture"); #else - regs.SYSCALL_RET = EPERM; + regs.SYSCALL_RET = result; #endif #ifdef HAVE_GETREGS @@ -1734,14 +1743,19 @@ void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee, case 0x1002: /* change getpid to getppid. */ EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee)); - change_syscall(_metadata, tracee, __NR_getppid); + change_syscall(_metadata, tracee, __NR_getppid, 0); break; case 0x1003: - /* skip gettid. */ + /* skip gettid with valid return code. */ EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee)); - change_syscall(_metadata, tracee, -1); + change_syscall(_metadata, tracee, -1, 45000); break; case 0x1004: + /* skip openat with error. */ + EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee)); + change_syscall(_metadata, tracee, -1, -ESRCH); + break; + case 0x1005: /* do nothing (allow getppid) */ EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee)); break; @@ -1774,9 +1788,11 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, nr = get_syscall(_metadata, tracee); if (nr == __NR_getpid) - change_syscall(_metadata, tracee, __NR_getppid); + change_syscall(_metadata, tracee, __NR_getppid, 0); + if (nr == __NR_gettid) + change_syscall(_metadata, tracee, -1, 45000); if (nr == __NR_openat) - change_syscall(_metadata, tracee, -1); + change_syscall(_metadata, tracee, -1, -ESRCH); } FIXTURE_DATA(TRACE_syscall) { @@ -1793,8 +1809,10 @@ FIXTURE_SETUP(TRACE_syscall) BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002), BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; @@ -1842,15 +1860,26 @@ TEST_F(TRACE_syscall, ptrace_syscall_redirected) EXPECT_NE(self->mypid, syscall(__NR_getpid)); } -TEST_F(TRACE_syscall, ptrace_syscall_dropped) +TEST_F(TRACE_syscall, ptrace_syscall_errno) +{ + /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ + teardown_trace_fixture(_metadata, self->tracer); + self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, + true); + + /* Tracer should skip the open syscall, resulting in ESRCH. */ + EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); +} + +TEST_F(TRACE_syscall, ptrace_syscall_faked) { /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ teardown_trace_fixture(_metadata, self->tracer); self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, true); - /* Tracer should skip the open syscall, resulting in EPERM. */ - EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_openat)); + /* Tracer should skip the gettid syscall, resulting fake pid. */ + EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); } TEST_F(TRACE_syscall, syscall_allowed) @@ -1883,7 +1912,21 @@ TEST_F(TRACE_syscall, syscall_redirected) EXPECT_NE(self->mypid, syscall(__NR_getpid)); } -TEST_F(TRACE_syscall, syscall_dropped) +TEST_F(TRACE_syscall, syscall_errno) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + /* openat has been skipped and an errno return. */ + EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); +} + +TEST_F(TRACE_syscall, syscall_faked) { long ret; @@ -1894,8 +1937,7 @@ TEST_F(TRACE_syscall, syscall_dropped) ASSERT_EQ(0, ret); /* gettid has been skipped and an altered return value stored. */ - EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_gettid)); - EXPECT_NE(self->mytid, syscall(__NR_gettid)); + EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); } TEST_F(TRACE_syscall, skip_after_RET_TRACE) diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile index c02683cfb6c9..7656c7ce79d9 100644 --- a/tools/testing/selftests/timers/Makefile +++ b/tools/testing/selftests/timers/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -O3 -Wl,-no-as-needed -Wall -LDFLAGS += -lrt -lpthread -lm +LDLIBS += -lrt -lpthread -lm # these are all "safe" tests that don't modify # system time or require escalated privileges diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5ecea812cb6a..585845203db8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3000,8 +3000,10 @@ static int kvm_ioctl_create_device(struct kvm *kvm, if (ops->init) ops->init(dev); + kvm_get_kvm(kvm); ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC); if (ret < 0) { + kvm_put_kvm(kvm); mutex_lock(&kvm->lock); list_del(&dev->vm_node); mutex_unlock(&kvm->lock); @@ -3009,7 +3011,6 @@ static int kvm_ioctl_create_device(struct kvm *kvm, return ret; } - kvm_get_kvm(kvm); cd->fd = ret; return 0; } |