diff options
1155 files changed, 16876 insertions, 8108 deletions
@@ -384,6 +384,7 @@ Li Yang <leoyang.li@nxp.com> <leoli@freescale.com> Li Yang <leoyang.li@nxp.com> <leo@zh-kernel.org> Lior David <quic_liord@quicinc.com> <liord@codeaurora.org> Lorenzo Pieralisi <lpieralisi@kernel.org> <lorenzo.pieralisi@arm.com> +Lorenzo Stoakes <lorenzo.stoakes@oracle.com> <lstoakes@gmail.com> Luca Ceresoli <luca.ceresoli@bootlin.com> <luca@lucaceresoli.net> Lukasz Luba <lukasz.luba@arm.com> <l.luba@partner.samsung.com> Luo Jie <quic_luoj@quicinc.com> <luoj@codeaurora.org> @@ -608,6 +609,7 @@ Simon Kelley <simon@thekelleys.org.uk> Sricharan Ramabadhran <quic_srichara@quicinc.com> <sricharan@codeaurora.org> Srinivas Ramana <quic_sramana@quicinc.com> <sramana@codeaurora.org> Sriram R <quic_srirrama@quicinc.com> <srirrama@codeaurora.org> +Stanislav Fomichev <sdf@fomichev.me> <sdf@google.com> Stefan Wahren <wahrenst@gmx.net> <stefan.wahren@i2se.com> Stéphane Witzmann <stephane.witzmann@ubpmes.univ-bpclermont.fr> Stephen Hemminger <stephen@networkplumber.org> <shemminger@linux-foundation.org> @@ -1214,6 +1214,10 @@ D: UDF filesystem S: (ask for current address) S: USA +N: Larry Finger +E: Larry.Finger@lwfinger.net +D: Maintainer of wireless drivers, too many to list here + N: Jürgen Fischer E: fischer@norbit.de D: Author of Adaptec AHA-152x SCSI driver @@ -3146,9 +3150,11 @@ S: Triftstra=DFe 55 S: 13353 Berlin S: Germany -N: Gustavo Pimental +N: Gustavo Pimentel E: gustavo.pimentel@synopsys.com D: PCI driver for Synopsys DesignWare +D: Synopsys DesignWare eDMA driver +D: Synopsys DesignWare xData traffic generator N: Emanuel Pirker E: epirker@edu.uni-klu.ac.at diff --git a/Documentation/admin-guide/cifs/usage.rst b/Documentation/admin-guide/cifs/usage.rst index aa8290a29dc8..fd4b56c0996f 100644 --- a/Documentation/admin-guide/cifs/usage.rst +++ b/Documentation/admin-guide/cifs/usage.rst @@ -723,40 +723,26 @@ Configuration pseudo-files: ======================= ======================================================= SecurityFlags Flags which control security negotiation and also packet signing. Authentication (may/must) - flags (e.g. for NTLM and/or NTLMv2) may be combined with + flags (e.g. for NTLMv2) may be combined with the signing flags. Specifying two different password hashing mechanisms (as "must use") on the other hand does not make much sense. Default flags are:: - 0x07007 - - (NTLM, NTLMv2 and packet signing allowed). The maximum - allowable flags if you want to allow mounts to servers - using weaker password hashes is 0x37037 (lanman, - plaintext, ntlm, ntlmv2, signing allowed). Some - SecurityFlags require the corresponding menuconfig - options to be enabled. Enabling plaintext - authentication currently requires also enabling - lanman authentication in the security flags - because the cifs module only supports sending - laintext passwords using the older lanman dialect - form of the session setup SMB. (e.g. for authentication - using plain text passwords, set the SecurityFlags - to 0x30030):: + 0x00C5 + + (NTLMv2 and packet signing allowed). Some SecurityFlags + may require enabling a corresponding menuconfig option. may use packet signing 0x00001 must use packet signing 0x01001 - may use NTLM (most common password hash) 0x00002 - must use NTLM 0x02002 may use NTLMv2 0x00004 must use NTLMv2 0x04004 - may use Kerberos security 0x00008 - must use Kerberos 0x08008 - may use lanman (weak) password hash 0x00010 - must use lanman password hash 0x10010 - may use plaintext passwords 0x00020 - must use plaintext passwords 0x20020 - (reserved for future packet encryption) 0x00040 + may use Kerberos security (krb5) 0x00008 + must use Kerberos 0x08008 + may use NTLMSSP 0x00080 + must use NTLMSSP 0x80080 + seal (packet encryption) 0x00040 + must seal (not implemented yet) 0x40040 cifsFYI If set to non-zero value, additional debug information will be logged to the system error log. This field diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index b600df82669d..27ec49af1bf2 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -788,25 +788,6 @@ Documentation/networking/netconsole.rst for an alternative. - <DEVNAME>:<n>.<n>[,options] - Use the specified serial port on the serial core bus. - The addressing uses DEVNAME of the physical serial port - device, followed by the serial core controller instance, - and the serial port instance. The options are the same - as documented for the ttyS addressing above. - - The mapping of the serial ports to the tty instances - can be viewed with: - - $ ls -d /sys/bus/serial-base/devices/*:*.*/tty/* - /sys/bus/serial-base/devices/00:04:0.0/tty/ttyS0 - - In the above example, the console can be addressed with - console=00:04:0.0. Note that a console addressed this - way will only get added when the related device driver - is ready. The use of an earlycon parameter in addition to - the console may be desired for console output early on. - uart[8250],io,<addr>[,options] uart[8250],mmio,<addr>[,options] uart[8250],mmio16,<addr>[,options] @@ -2192,12 +2173,6 @@ Format: 0 | 1 Default set by CONFIG_INIT_ON_FREE_DEFAULT_ON. - init_mlocked_on_free= [MM] Fill freed userspace memory with zeroes if - it was mlock'ed and not explicitly munlock'ed - afterwards. - Format: 0 | 1 - Default set by CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON - init_pkru= [X86] Specify the default memory protection keys rights register contents for all processes. 0x55555554 by default (disallow access to all but pkey 0). Can diff --git a/Documentation/arch/riscv/cmodx.rst b/Documentation/arch/riscv/cmodx.rst index 1c0ca06b6c97..8c48bcff3df9 100644 --- a/Documentation/arch/riscv/cmodx.rst +++ b/Documentation/arch/riscv/cmodx.rst @@ -62,10 +62,10 @@ cmodx.c:: printf("Value before cmodx: %d\n", value); // Call prctl before first fence.i is called inside modify_instruction - prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_ON, PR_RISCV_CTX_SW_FENCEI, PR_RISCV_SCOPE_PER_PROCESS); + prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX, PR_RISCV_CTX_SW_FENCEI_ON, PR_RISCV_SCOPE_PER_PROCESS); modify_instruction(); // Call prctl after final fence.i is called in process - prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_OFF, PR_RISCV_CTX_SW_FENCEI, PR_RISCV_SCOPE_PER_PROCESS); + prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX, PR_RISCV_CTX_SW_FENCEI_OFF, PR_RISCV_SCOPE_PER_PROCESS); value = get_value(); printf("Value after cmodx: %d\n", value); diff --git a/Documentation/devicetree/bindings/cache/qcom,llcc.yaml b/Documentation/devicetree/bindings/cache/qcom,llcc.yaml index 07ccbda4a0ab..b9a9f2cf32a1 100644 --- a/Documentation/devicetree/bindings/cache/qcom,llcc.yaml +++ b/Documentation/devicetree/bindings/cache/qcom,llcc.yaml @@ -66,7 +66,6 @@ allOf: compatible: contains: enum: - - qcom,qdu1000-llcc - qcom,sc7180-llcc - qcom,sm6350-llcc then: @@ -104,6 +103,7 @@ allOf: compatible: contains: enum: + - qcom,qdu1000-llcc - qcom,sc8180x-llcc - qcom,sc8280xp-llcc - qcom,x1e80100-llcc diff --git a/Documentation/devicetree/bindings/dma/fsl,edma.yaml b/Documentation/devicetree/bindings/dma/fsl,edma.yaml index acfb4b2ee7a9..d54140f18d34 100644 --- a/Documentation/devicetree/bindings/dma/fsl,edma.yaml +++ b/Documentation/devicetree/bindings/dma/fsl,edma.yaml @@ -59,8 +59,8 @@ properties: - 3 dma-channels: - minItems: 1 - maxItems: 64 + minimum: 1 + maximum: 64 clocks: minItems: 1 diff --git a/Documentation/devicetree/bindings/i2c/atmel,at91sam-i2c.yaml b/Documentation/devicetree/bindings/i2c/atmel,at91sam-i2c.yaml index b1c13bab2472..b2d19cfb87ad 100644 --- a/Documentation/devicetree/bindings/i2c/atmel,at91sam-i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/atmel,at91sam-i2c.yaml @@ -77,7 +77,7 @@ required: - clocks allOf: - - $ref: i2c-controller.yaml + - $ref: /schemas/i2c/i2c-controller.yaml# - if: properties: compatible: diff --git a/Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml b/Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml index ab151c9db219..580003cdfff5 100644 --- a/Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml +++ b/Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml @@ -21,7 +21,7 @@ description: | google,cros-ec-spi or google,cros-ec-i2c. allOf: - - $ref: i2c-controller.yaml# + - $ref: /schemas/i2c/i2c-controller.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml b/Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml index c80c880a9dab..60aaf30d68ed 100644 --- a/Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml +++ b/Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml @@ -128,7 +128,6 @@ required: - cell-index - reg - fsl,fman-ports - - ptp-timer dependencies: pcs-handle-names: diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml index 50846a2d09c8..0bf2d9f093b5 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml @@ -29,7 +29,6 @@ properties: - qcom,pm7325-gpio - qcom,pm7550ba-gpio - qcom,pm8005-gpio - - qcom,pm8008-gpio - qcom,pm8018-gpio - qcom,pm8019-gpio - qcom,pm8038-gpio @@ -126,7 +125,6 @@ allOf: compatible: contains: enum: - - qcom,pm8008-gpio - qcom,pmi8950-gpio - qcom,pmr735d-gpio then: @@ -448,7 +446,6 @@ $defs: - gpio1-gpio10 for pm7325 - gpio1-gpio8 for pm7550ba - gpio1-gpio4 for pm8005 - - gpio1-gpio2 for pm8008 - gpio1-gpio6 for pm8018 - gpio1-gpio12 for pm8038 - gpio1-gpio40 for pm8058 diff --git a/Documentation/driver-api/cxl/memory-devices.rst b/Documentation/driver-api/cxl/memory-devices.rst index 5149ecdc53c7..d732c42526df 100644 --- a/Documentation/driver-api/cxl/memory-devices.rst +++ b/Documentation/driver-api/cxl/memory-devices.rst @@ -328,6 +328,12 @@ CXL Memory Device .. kernel-doc:: drivers/cxl/mem.c :doc: cxl mem +.. kernel-doc:: drivers/cxl/cxlmem.h + :internal: + +.. kernel-doc:: drivers/cxl/core/memdev.c + :identifiers: + CXL Port -------- .. kernel-doc:: drivers/cxl/port.c @@ -341,6 +347,15 @@ CXL Core .. kernel-doc:: drivers/cxl/cxl.h :internal: +.. kernel-doc:: drivers/cxl/core/hdm.c + :doc: cxl core hdm + +.. kernel-doc:: drivers/cxl/core/hdm.c + :identifiers: + +.. kernel-doc:: drivers/cxl/core/cdat.c + :identifiers: + .. kernel-doc:: drivers/cxl/core/port.c :doc: cxl core diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 8f5c1ee02e2f..e8e496d23e1d 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -34,6 +34,7 @@ algorithms work. seq_file sharedsubtree idmappings + iomap/index automount-support diff --git a/Documentation/filesystems/iomap/design.rst b/Documentation/filesystems/iomap/design.rst new file mode 100644 index 000000000000..f8ee3427bc1a --- /dev/null +++ b/Documentation/filesystems/iomap/design.rst @@ -0,0 +1,441 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. _iomap_design: + +.. + Dumb style notes to maintain the author's sanity: + Please try to start sentences on separate lines so that + sentence changes don't bleed colors in diff. + Heading decorations are documented in sphinx.rst. + +============== +Library Design +============== + +.. contents:: Table of Contents + :local: + +Introduction +============ + +iomap is a filesystem library for handling common file operations. +The library has two layers: + + 1. A lower layer that provides an iterator over ranges of file offsets. + This layer tries to obtain mappings of each file ranges to storage + from the filesystem, but the storage information is not necessarily + required. + + 2. An upper layer that acts upon the space mappings provided by the + lower layer iterator. + +The iteration can involve mappings of file's logical offset ranges to +physical extents, but the storage layer information is not necessarily +required, e.g. for walking cached file information. +The library exports various APIs for implementing file operations such +as: + + * Pagecache reads and writes + * Folio write faults to the pagecache + * Writeback of dirty folios + * Direct I/O reads and writes + * fsdax I/O reads, writes, loads, and stores + * FIEMAP + * lseek ``SEEK_DATA`` and ``SEEK_HOLE`` + * swapfile activation + +This origins of this library is the file I/O path that XFS once used; it +has now been extended to cover several other operations. + +Who Should Read This? +===================== + +The target audience for this document are filesystem, storage, and +pagecache programmers and code reviewers. + +If you are working on PCI, machine architectures, or device drivers, you +are most likely in the wrong place. + +How Is This Better? +=================== + +Unlike the classic Linux I/O model which breaks file I/O into small +units (generally memory pages or blocks) and looks up space mappings on +the basis of that unit, the iomap model asks the filesystem for the +largest space mappings that it can create for a given file operation and +initiates operations on that basis. +This strategy improves the filesystem's visibility into the size of the +operation being performed, which enables it to combat fragmentation with +larger space allocations when possible. +Larger space mappings improve runtime performance by amortizing the cost +of mapping function calls into the filesystem across a larger amount of +data. + +At a high level, an iomap operation `looks like this +<https://lore.kernel.org/all/ZGbVaewzcCysclPt@dread.disaster.area/>`_: + +1. For each byte in the operation range... + + 1. Obtain a space mapping via ``->iomap_begin`` + + 2. For each sub-unit of work... + + 1. Revalidate the mapping and go back to (1) above, if necessary. + So far only the pagecache operations need to do this. + + 2. Do the work + + 3. Increment operation cursor + + 4. Release the mapping via ``->iomap_end``, if necessary + +Each iomap operation will be covered in more detail below. +This library was covered previously by an `LWN article +<https://lwn.net/Articles/935934/>`_ and a `KernelNewbies page +<https://kernelnewbies.org/KernelProjects/iomap>`_. + +The goal of this document is to provide a brief discussion of the +design and capabilities of iomap, followed by a more detailed catalog +of the interfaces presented by iomap. +If you change iomap, please update this design document. + +File Range Iterator +=================== + +Definitions +----------- + + * **buffer head**: Shattered remnants of the old buffer cache. + + * ``fsblock``: The block size of a file, also known as ``i_blocksize``. + + * ``i_rwsem``: The VFS ``struct inode`` rwsemaphore. + Processes hold this in shared mode to read file state and contents. + Some filesystems may allow shared mode for writes. + Processes often hold this in exclusive mode to change file state and + contents. + + * ``invalidate_lock``: The pagecache ``struct address_space`` + rwsemaphore that protects against folio insertion and removal for + filesystems that support punching out folios below EOF. + Processes wishing to insert folios must hold this lock in shared + mode to prevent removal, though concurrent insertion is allowed. + Processes wishing to remove folios must hold this lock in exclusive + mode to prevent insertions. + Concurrent removals are not allowed. + + * ``dax_read_lock``: The RCU read lock that dax takes to prevent a + device pre-shutdown hook from returning before other threads have + released resources. + + * **filesystem mapping lock**: This synchronization primitive is + internal to the filesystem and must protect the file mapping data + from updates while a mapping is being sampled. + The filesystem author must determine how this coordination should + happen; it does not need to be an actual lock. + + * **iomap internal operation lock**: This is a general term for + synchronization primitives that iomap functions take while holding a + mapping. + A specific example would be taking the folio lock while reading or + writing the pagecache. + + * **pure overwrite**: A write operation that does not require any + metadata or zeroing operations to perform during either submission + or completion. + This implies that the fileystem must have already allocated space + on disk as ``IOMAP_MAPPED`` and the filesystem must not place any + constaints on IO alignment or size. + The only constraints on I/O alignment are device level (minimum I/O + size and alignment, typically sector size). + +``struct iomap`` +---------------- + +The filesystem communicates to the iomap iterator the mapping of +byte ranges of a file to byte ranges of a storage device with the +structure below: + +.. code-block:: c + + struct iomap { + u64 addr; + loff_t offset; + u64 length; + u16 type; + u16 flags; + struct block_device *bdev; + struct dax_device *dax_dev; + voidw *inline_data; + void *private; + const struct iomap_folio_ops *folio_ops; + u64 validity_cookie; + }; + +The fields are as follows: + + * ``offset`` and ``length`` describe the range of file offsets, in + bytes, covered by this mapping. + These fields must always be set by the filesystem. + + * ``type`` describes the type of the space mapping: + + * **IOMAP_HOLE**: No storage has been allocated. + This type must never be returned in response to an ``IOMAP_WRITE`` + operation because writes must allocate and map space, and return + the mapping. + The ``addr`` field must be set to ``IOMAP_NULL_ADDR``. + iomap does not support writing (whether via pagecache or direct + I/O) to a hole. + + * **IOMAP_DELALLOC**: A promise to allocate space at a later time + ("delayed allocation"). + If the filesystem returns IOMAP_F_NEW here and the write fails, the + ``->iomap_end`` function must delete the reservation. + The ``addr`` field must be set to ``IOMAP_NULL_ADDR``. + + * **IOMAP_MAPPED**: The file range maps to specific space on the + storage device. + The device is returned in ``bdev`` or ``dax_dev``. + The device address, in bytes, is returned via ``addr``. + + * **IOMAP_UNWRITTEN**: The file range maps to specific space on the + storage device, but the space has not yet been initialized. + The device is returned in ``bdev`` or ``dax_dev``. + The device address, in bytes, is returned via ``addr``. + Reads from this type of mapping will return zeroes to the caller. + For a write or writeback operation, the ioend should update the + mapping to MAPPED. + Refer to the sections about ioends for more details. + + * **IOMAP_INLINE**: The file range maps to the memory buffer + specified by ``inline_data``. + For write operation, the ``->iomap_end`` function presumably + handles persisting the data. + The ``addr`` field must be set to ``IOMAP_NULL_ADDR``. + + * ``flags`` describe the status of the space mapping. + These flags should be set by the filesystem in ``->iomap_begin``: + + * **IOMAP_F_NEW**: The space under the mapping is newly allocated. + Areas that will not be written to must be zeroed. + If a write fails and the mapping is a space reservation, the + reservation must be deleted. + + * **IOMAP_F_DIRTY**: The inode will have uncommitted metadata needed + to access any data written. + fdatasync is required to commit these changes to persistent + storage. + This needs to take into account metadata changes that *may* be made + at I/O completion, such as file size updates from direct I/O. + + * **IOMAP_F_SHARED**: The space under the mapping is shared. + Copy on write is necessary to avoid corrupting other file data. + + * **IOMAP_F_BUFFER_HEAD**: This mapping requires the use of buffer + heads for pagecache operations. + Do not add more uses of this. + + * **IOMAP_F_MERGED**: Multiple contiguous block mappings were + coalesced into this single mapping. + This is only useful for FIEMAP. + + * **IOMAP_F_XATTR**: The mapping is for extended attribute data, not + regular file data. + This is only useful for FIEMAP. + + * **IOMAP_F_PRIVATE**: Starting with this value, the upper bits can + be set by the filesystem for its own purposes. + + These flags can be set by iomap itself during file operations. + The filesystem should supply an ``->iomap_end`` function if it needs + to observe these flags: + + * **IOMAP_F_SIZE_CHANGED**: The file size has changed as a result of + using this mapping. + + * **IOMAP_F_STALE**: The mapping was found to be stale. + iomap will call ``->iomap_end`` on this mapping and then + ``->iomap_begin`` to obtain a new mapping. + + Currently, these flags are only set by pagecache operations. + + * ``addr`` describes the device address, in bytes. + + * ``bdev`` describes the block device for this mapping. + This only needs to be set for mapped or unwritten operations. + + * ``dax_dev`` describes the DAX device for this mapping. + This only needs to be set for mapped or unwritten operations, and + only for a fsdax operation. + + * ``inline_data`` points to a memory buffer for I/O involving + ``IOMAP_INLINE`` mappings. + This value is ignored for all other mapping types. + + * ``private`` is a pointer to `filesystem-private information + <https://lore.kernel.org/all/20180619164137.13720-7-hch@lst.de/>`_. + This value will be passed unchanged to ``->iomap_end``. + + * ``folio_ops`` will be covered in the section on pagecache operations. + + * ``validity_cookie`` is a magic freshness value set by the filesystem + that should be used to detect stale mappings. + For pagecache operations this is critical for correct operation + because page faults can occur, which implies that filesystem locks + should not be held between ``->iomap_begin`` and ``->iomap_end``. + Filesystems with completely static mappings need not set this value. + Only pagecache operations revalidate mappings; see the section about + ``iomap_valid`` for details. + +``struct iomap_ops`` +-------------------- + +Every iomap function requires the filesystem to pass an operations +structure to obtain a mapping and (optionally) to release the mapping: + +.. code-block:: c + + struct iomap_ops { + int (*iomap_begin)(struct inode *inode, loff_t pos, loff_t length, + unsigned flags, struct iomap *iomap, + struct iomap *srcmap); + + int (*iomap_end)(struct inode *inode, loff_t pos, loff_t length, + ssize_t written, unsigned flags, + struct iomap *iomap); + }; + +``->iomap_begin`` +~~~~~~~~~~~~~~~~~ + +iomap operations call ``->iomap_begin`` to obtain one file mapping for +the range of bytes specified by ``pos`` and ``length`` for the file +``inode``. +This mapping should be returned through the ``iomap`` pointer. +The mapping must cover at least the first byte of the supplied file +range, but it does not need to cover the entire requested range. + +Each iomap operation describes the requested operation through the +``flags`` argument. +The exact value of ``flags`` will be documented in the +operation-specific sections below. +These flags can, at least in principle, apply generally to iomap +operations: + + * ``IOMAP_DIRECT`` is set when the caller wishes to issue file I/O to + block storage. + + * ``IOMAP_DAX`` is set when the caller wishes to issue file I/O to + memory-like storage. + + * ``IOMAP_NOWAIT`` is set when the caller wishes to perform a best + effort attempt to avoid any operation that would result in blocking + the submitting task. + This is similar in intent to ``O_NONBLOCK`` for network APIs - it is + intended for asynchronous applications to keep doing other work + instead of waiting for the specific unavailable filesystem resource + to become available. + Filesystems implementing ``IOMAP_NOWAIT`` semantics need to use + trylock algorithms. + They need to be able to satisfy the entire I/O request range with a + single iomap mapping. + They need to avoid reading or writing metadata synchronously. + They need to avoid blocking memory allocations. + They need to avoid waiting on transaction reservations to allow + modifications to take place. + They probably should not be allocating new space. + And so on. + If there is any doubt in the filesystem developer's mind as to + whether any specific ``IOMAP_NOWAIT`` operation may end up blocking, + then they should return ``-EAGAIN`` as early as possible rather than + start the operation and force the submitting task to block. + ``IOMAP_NOWAIT`` is often set on behalf of ``IOCB_NOWAIT`` or + ``RWF_NOWAIT``. + +If it is necessary to read existing file contents from a `different +<https://lore.kernel.org/all/20191008071527.29304-9-hch@lst.de/>`_ +device or address range on a device, the filesystem should return that +information via ``srcmap``. +Only pagecache and fsdax operations support reading from one mapping and +writing to another. + +``->iomap_end`` +~~~~~~~~~~~~~~~ + +After the operation completes, the ``->iomap_end`` function, if present, +is called to signal that iomap is finished with a mapping. +Typically, implementations will use this function to tear down any +context that were set up in ``->iomap_begin``. +For example, a write might wish to commit the reservations for the bytes +that were operated upon and unreserve any space that was not operated +upon. +``written`` might be zero if no bytes were touched. +``flags`` will contain the same value passed to ``->iomap_begin``. +iomap ops for reads are not likely to need to supply this function. + +Both functions should return a negative errno code on error, or zero on +success. + +Preparing for File Operations +============================= + +iomap only handles mapping and I/O. +Filesystems must still call out to the VFS to check input parameters +and file state before initiating an I/O operation. +It does not handle obtaining filesystem freeze protection, updating of +timestamps, stripping privileges, or access control. + +Locking Hierarchy +================= + +iomap requires that filesystems supply their own locking model. +There are three categories of synchronization primitives, as far as +iomap is concerned: + + * The **upper** level primitive is provided by the filesystem to + coordinate access to different iomap operations. + The exact primitive is specifc to the filesystem and operation, + but is often a VFS inode, pagecache invalidation, or folio lock. + For example, a filesystem might take ``i_rwsem`` before calling + ``iomap_file_buffered_write`` and ``iomap_file_unshare`` to prevent + these two file operations from clobbering each other. + Pagecache writeback may lock a folio to prevent other threads from + accessing the folio until writeback is underway. + + * The **lower** level primitive is taken by the filesystem in the + ``->iomap_begin`` and ``->iomap_end`` functions to coordinate + access to the file space mapping information. + The fields of the iomap object should be filled out while holding + this primitive. + The upper level synchronization primitive, if any, remains held + while acquiring the lower level synchronization primitive. + For example, XFS takes ``ILOCK_EXCL`` and ext4 takes ``i_data_sem`` + while sampling mappings. + Filesystems with immutable mapping information may not require + synchronization here. + + * The **operation** primitive is taken by an iomap operation to + coordinate access to its own internal data structures. + The upper level synchronization primitive, if any, remains held + while acquiring this primitive. + The lower level primitive is not held while acquiring this + primitive. + For example, pagecache write operations will obtain a file mapping, + then grab and lock a folio to copy new contents. + It may also lock an internal folio state object to update metadata. + +The exact locking requirements are specific to the filesystem; for +certain operations, some of these locks can be elided. +All further mention of locking are *recommendations*, not mandates. +Each filesystem author must figure out the locking for themself. + +Bugs and Limitations +==================== + + * No support for fscrypt. + * No support for compression. + * No support for fsverity yet. + * Strong assumptions that IO should work the way it does on XFS. + * Does iomap *actually* work for non-regular file data? + +Patches welcome! diff --git a/Documentation/filesystems/iomap/index.rst b/Documentation/filesystems/iomap/index.rst new file mode 100644 index 000000000000..3c6a52440250 --- /dev/null +++ b/Documentation/filesystems/iomap/index.rst @@ -0,0 +1,13 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================= +VFS iomap Documentation +======================= + +.. toctree:: + :maxdepth: 2 + :numbered: + + design + operations + porting diff --git a/Documentation/filesystems/iomap/operations.rst b/Documentation/filesystems/iomap/operations.rst new file mode 100644 index 000000000000..8e6c721d2330 --- /dev/null +++ b/Documentation/filesystems/iomap/operations.rst @@ -0,0 +1,713 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. _iomap_operations: + +.. + Dumb style notes to maintain the author's sanity: + Please try to start sentences on separate lines so that + sentence changes don't bleed colors in diff. + Heading decorations are documented in sphinx.rst. + +========================= +Supported File Operations +========================= + +.. contents:: Table of Contents + :local: + +Below are a discussion of the high level file operations that iomap +implements. + +Buffered I/O +============ + +Buffered I/O is the default file I/O path in Linux. +File contents are cached in memory ("pagecache") to satisfy reads and +writes. +Dirty cache will be written back to disk at some point that can be +forced via ``fsync`` and variants. + +iomap implements nearly all the folio and pagecache management that +filesystems have to implement themselves under the legacy I/O model. +This means that the filesystem need not know the details of allocating, +mapping, managing uptodate and dirty state, or writeback of pagecache +folios. +Under the legacy I/O model, this was managed very inefficiently with +linked lists of buffer heads instead of the per-folio bitmaps that iomap +uses. +Unless the filesystem explicitly opts in to buffer heads, they will not +be used, which makes buffered I/O much more efficient, and the pagecache +maintainer much happier. + +``struct address_space_operations`` +----------------------------------- + +The following iomap functions can be referenced directly from the +address space operations structure: + + * ``iomap_dirty_folio`` + * ``iomap_release_folio`` + * ``iomap_invalidate_folio`` + * ``iomap_is_partially_uptodate`` + +The following address space operations can be wrapped easily: + + * ``read_folio`` + * ``readahead`` + * ``writepages`` + * ``bmap`` + * ``swap_activate`` + +``struct iomap_folio_ops`` +-------------------------- + +The ``->iomap_begin`` function for pagecache operations may set the +``struct iomap::folio_ops`` field to an ops structure to override +default behaviors of iomap: + +.. code-block:: c + + struct iomap_folio_ops { + struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos, + unsigned len); + void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied, + struct folio *folio); + bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap); + }; + +iomap calls these functions: + + - ``get_folio``: Called to allocate and return an active reference to + a locked folio prior to starting a write. + If this function is not provided, iomap will call + ``iomap_get_folio``. + This could be used to `set up per-folio filesystem state + <https://lore.kernel.org/all/20190429220934.10415-5-agruenba@redhat.com/>`_ + for a write. + + - ``put_folio``: Called to unlock and put a folio after a pagecache + operation completes. + If this function is not provided, iomap will ``folio_unlock`` and + ``folio_put`` on its own. + This could be used to `commit per-folio filesystem state + <https://lore.kernel.org/all/20180619164137.13720-6-hch@lst.de/>`_ + that was set up by ``->get_folio``. + + - ``iomap_valid``: The filesystem may not hold locks between + ``->iomap_begin`` and ``->iomap_end`` because pagecache operations + can take folio locks, fault on userspace pages, initiate writeback + for memory reclamation, or engage in other time-consuming actions. + If a file's space mapping data are mutable, it is possible that the + mapping for a particular pagecache folio can `change in the time it + takes + <https://lore.kernel.org/all/20221123055812.747923-8-david@fromorbit.com/>`_ + to allocate, install, and lock that folio. + + For the pagecache, races can happen if writeback doesn't take + ``i_rwsem`` or ``invalidate_lock`` and updates mapping information. + Races can also happen if the filesytem allows concurrent writes. + For such files, the mapping *must* be revalidated after the folio + lock has been taken so that iomap can manage the folio correctly. + + fsdax does not need this revalidation because there's no writeback + and no support for unwritten extents. + + Filesystems subject to this kind of race must provide a + ``->iomap_valid`` function to decide if the mapping is still valid. + If the mapping is not valid, the mapping will be sampled again. + + To support making the validity decision, the filesystem's + ``->iomap_begin`` function may set ``struct iomap::validity_cookie`` + at the same time that it populates the other iomap fields. + A simple validation cookie implementation is a sequence counter. + If the filesystem bumps the sequence counter every time it modifies + the inode's extent map, it can be placed in the ``struct + iomap::validity_cookie`` during ``->iomap_begin``. + If the value in the cookie is found to be different to the value + the filesystem holds when the mapping is passed back to + ``->iomap_valid``, then the iomap should considered stale and the + validation failed. + +These ``struct kiocb`` flags are significant for buffered I/O with iomap: + + * ``IOCB_NOWAIT``: Turns on ``IOMAP_NOWAIT``. + +Internal per-Folio State +------------------------ + +If the fsblock size matches the size of a pagecache folio, it is assumed +that all disk I/O operations will operate on the entire folio. +The uptodate (memory contents are at least as new as what's on disk) and +dirty (memory contents are newer than what's on disk) status of the +folio are all that's needed for this case. + +If the fsblock size is less than the size of a pagecache folio, iomap +tracks the per-fsblock uptodate and dirty state itself. +This enables iomap to handle both "bs < ps" `filesystems +<https://lore.kernel.org/all/20230725122932.144426-1-ritesh.list@gmail.com/>`_ +and large folios in the pagecache. + +iomap internally tracks two state bits per fsblock: + + * ``uptodate``: iomap will try to keep folios fully up to date. + If there are read(ahead) errors, those fsblocks will not be marked + uptodate. + The folio itself will be marked uptodate when all fsblocks within the + folio are uptodate. + + * ``dirty``: iomap will set the per-block dirty state when programs + write to the file. + The folio itself will be marked dirty when any fsblock within the + folio is dirty. + +iomap also tracks the amount of read and write disk IOs that are in +flight. +This structure is much lighter weight than ``struct buffer_head`` +because there is only one per folio, and the per-fsblock overhead is two +bits vs. 104 bytes. + +Filesystems wishing to turn on large folios in the pagecache should call +``mapping_set_large_folios`` when initializing the incore inode. + +Buffered Readahead and Reads +---------------------------- + +The ``iomap_readahead`` function initiates readahead to the pagecache. +The ``iomap_read_folio`` function reads one folio's worth of data into +the pagecache. +The ``flags`` argument to ``->iomap_begin`` will be set to zero. +The pagecache takes whatever locks it needs before calling the +filesystem. + +Buffered Writes +--------------- + +The ``iomap_file_buffered_write`` function writes an ``iocb`` to the +pagecache. +``IOMAP_WRITE`` or ``IOMAP_WRITE`` | ``IOMAP_NOWAIT`` will be passed as +the ``flags`` argument to ``->iomap_begin``. +Callers commonly take ``i_rwsem`` in either shared or exclusive mode +before calling this function. + +mmap Write Faults +~~~~~~~~~~~~~~~~~ + +The ``iomap_page_mkwrite`` function handles a write fault to a folio in +the pagecache. +``IOMAP_WRITE | IOMAP_FAULT`` will be passed as the ``flags`` argument +to ``->iomap_begin``. +Callers commonly take the mmap ``invalidate_lock`` in shared or +exclusive mode before calling this function. + +Buffered Write Failures +~~~~~~~~~~~~~~~~~~~~~~~ + +After a short write to the pagecache, the areas not written will not +become marked dirty. +The filesystem must arrange to `cancel +<https://lore.kernel.org/all/20221123055812.747923-6-david@fromorbit.com/>`_ +such `reservations +<https://lore.kernel.org/linux-xfs/20220817093627.GZ3600936@dread.disaster.area/>`_ +because writeback will not consume the reservation. +The ``iomap_file_buffered_write_punch_delalloc`` can be called from a +``->iomap_end`` function to find all the clean areas of the folios +caching a fresh (``IOMAP_F_NEW``) delalloc mapping. +It takes the ``invalidate_lock``. + +The filesystem must supply a function ``punch`` to be called for +each file range in this state. +This function must *only* remove delayed allocation reservations, in +case another thread racing with the current thread writes successfully +to the same region and triggers writeback to flush the dirty data out to +disk. + +Zeroing for File Operations +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Filesystems can call ``iomap_zero_range`` to perform zeroing of the +pagecache for non-truncation file operations that are not aligned to +the fsblock size. +``IOMAP_ZERO`` will be passed as the ``flags`` argument to +``->iomap_begin``. +Callers typically hold ``i_rwsem`` and ``invalidate_lock`` in exclusive +mode before calling this function. + +Unsharing Reflinked File Data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Filesystems can call ``iomap_file_unshare`` to force a file sharing +storage with another file to preemptively copy the shared data to newly +allocate storage. +``IOMAP_WRITE | IOMAP_UNSHARE`` will be passed as the ``flags`` argument +to ``->iomap_begin``. +Callers typically hold ``i_rwsem`` and ``invalidate_lock`` in exclusive +mode before calling this function. + +Truncation +---------- + +Filesystems can call ``iomap_truncate_page`` to zero the bytes in the +pagecache from EOF to the end of the fsblock during a file truncation +operation. +``truncate_setsize`` or ``truncate_pagecache`` will take care of +everything after the EOF block. +``IOMAP_ZERO`` will be passed as the ``flags`` argument to +``->iomap_begin``. +Callers typically hold ``i_rwsem`` and ``invalidate_lock`` in exclusive +mode before calling this function. + +Pagecache Writeback +------------------- + +Filesystems can call ``iomap_writepages`` to respond to a request to +write dirty pagecache folios to disk. +The ``mapping`` and ``wbc`` parameters should be passed unchanged. +The ``wpc`` pointer should be allocated by the filesystem and must +be initialized to zero. + +The pagecache will lock each folio before trying to schedule it for +writeback. +It does not lock ``i_rwsem`` or ``invalidate_lock``. + +The dirty bit will be cleared for all folios run through the +``->map_blocks`` machinery described below even if the writeback fails. +This is to prevent dirty folio clots when storage devices fail; an +``-EIO`` is recorded for userspace to collect via ``fsync``. + +The ``ops`` structure must be specified and is as follows: + +``struct iomap_writeback_ops`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: c + + struct iomap_writeback_ops { + int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode, + loff_t offset, unsigned len); + int (*prepare_ioend)(struct iomap_ioend *ioend, int status); + void (*discard_folio)(struct folio *folio, loff_t pos); + }; + +The fields are as follows: + + - ``map_blocks``: Sets ``wpc->iomap`` to the space mapping of the file + range (in bytes) given by ``offset`` and ``len``. + iomap calls this function for each dirty fs block in each dirty folio, + though it will `reuse mappings + <https://lore.kernel.org/all/20231207072710.176093-15-hch@lst.de/>`_ + for runs of contiguous dirty fsblocks within a folio. + Do not return ``IOMAP_INLINE`` mappings here; the ``->iomap_end`` + function must deal with persisting written data. + Do not return ``IOMAP_DELALLOC`` mappings here; iomap currently + requires mapping to allocated space. + Filesystems can skip a potentially expensive mapping lookup if the + mappings have not changed. + This revalidation must be open-coded by the filesystem; it is + unclear if ``iomap::validity_cookie`` can be reused for this + purpose. + This function must be supplied by the filesystem. + + - ``prepare_ioend``: Enables filesystems to transform the writeback + ioend or perform any other preparatory work before the writeback I/O + is submitted. + This might include pre-write space accounting updates, or installing + a custom ``->bi_end_io`` function for internal purposes, such as + deferring the ioend completion to a workqueue to run metadata update + transactions from process context. + This function is optional. + + - ``discard_folio``: iomap calls this function after ``->map_blocks`` + fails to schedule I/O for any part of a dirty folio. + The function should throw away any reservations that may have been + made for the write. + The folio will be marked clean and an ``-EIO`` recorded in the + pagecache. + Filesystems can use this callback to `remove + <https://lore.kernel.org/all/20201029163313.1766967-1-bfoster@redhat.com/>`_ + delalloc reservations to avoid having delalloc reservations for + clean pagecache. + This function is optional. + +Pagecache Writeback Completion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To handle the bookkeeping that must happen after disk I/O for writeback +completes, iomap creates chains of ``struct iomap_ioend`` objects that +wrap the ``bio`` that is used to write pagecache data to disk. +By default, iomap finishes writeback ioends by clearing the writeback +bit on the folios attached to the ``ioend``. +If the write failed, it will also set the error bits on the folios and +the address space. +This can happen in interrupt or process context, depending on the +storage device. + +Filesystems that need to update internal bookkeeping (e.g. unwritten +extent conversions) should provide a ``->prepare_ioend`` function to +set ``struct iomap_end::bio::bi_end_io`` to its own function. +This function should call ``iomap_finish_ioends`` after finishing its +own work (e.g. unwritten extent conversion). + +Some filesystems may wish to `amortize the cost of running metadata +transactions +<https://lore.kernel.org/all/20220120034733.221737-1-david@fromorbit.com/>`_ +for post-writeback updates by batching them. +They may also require transactions to run from process context, which +implies punting batches to a workqueue. +iomap ioends contain a ``list_head`` to enable batching. + +Given a batch of ioends, iomap has a few helpers to assist with +amortization: + + * ``iomap_sort_ioends``: Sort all the ioends in the list by file + offset. + + * ``iomap_ioend_try_merge``: Given an ioend that is not in any list and + a separate list of sorted ioends, merge as many of the ioends from + the head of the list into the given ioend. + ioends can only be merged if the file range and storage addresses are + contiguous; the unwritten and shared status are the same; and the + write I/O outcome is the same. + The merged ioends become their own list. + + * ``iomap_finish_ioends``: Finish an ioend that possibly has other + ioends linked to it. + +Direct I/O +========== + +In Linux, direct I/O is defined as file I/O that is issued directly to +storage, bypassing the pagecache. +The ``iomap_dio_rw`` function implements O_DIRECT (direct I/O) reads and +writes for files. + +.. code-block:: c + + ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, + const struct iomap_ops *ops, + const struct iomap_dio_ops *dops, + unsigned int dio_flags, void *private, + size_t done_before); + +The filesystem can provide the ``dops`` parameter if it needs to perform +extra work before or after the I/O is issued to storage. +The ``done_before`` parameter tells the how much of the request has +already been transferred. +It is used to continue a request asynchronously when `part of the +request +<https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=c03098d4b9ad76bca2966a8769dcfe59f7f85103>`_ +has already been completed synchronously. + +The ``done_before`` parameter should be set if writes for the ``iocb`` +have been initiated prior to the call. +The direction of the I/O is determined from the ``iocb`` passed in. + +The ``dio_flags`` argument can be set to any combination of the +following values: + + * ``IOMAP_DIO_FORCE_WAIT``: Wait for the I/O to complete even if the + kiocb is not synchronous. + + * ``IOMAP_DIO_OVERWRITE_ONLY``: Perform a pure overwrite for this range + or fail with ``-EAGAIN``. + This can be used by filesystems with complex unaligned I/O + write paths to provide an optimised fast path for unaligned writes. + If a pure overwrite can be performed, then serialisation against + other I/Os to the same filesystem block(s) is unnecessary as there is + no risk of stale data exposure or data loss. + If a pure overwrite cannot be performed, then the filesystem can + perform the serialisation steps needed to provide exclusive access + to the unaligned I/O range so that it can perform allocation and + sub-block zeroing safely. + Filesystems can use this flag to try to reduce locking contention, + but a lot of `detailed checking + <https://lore.kernel.org/linux-ext4/20230314130759.642710-1-bfoster@redhat.com/>`_ + is required to do it `correctly + <https://lore.kernel.org/linux-ext4/20230810165559.946222-1-bfoster@redhat.com/>`_. + + * ``IOMAP_DIO_PARTIAL``: If a page fault occurs, return whatever + progress has already been made. + The caller may deal with the page fault and retry the operation. + If the caller decides to retry the operation, it should pass the + accumulated return values of all previous calls as the + ``done_before`` parameter to the next call. + +These ``struct kiocb`` flags are significant for direct I/O with iomap: + + * ``IOCB_NOWAIT``: Turns on ``IOMAP_NOWAIT``. + + * ``IOCB_SYNC``: Ensure that the device has persisted data to disk + before completing the call. + In the case of pure overwrites, the I/O may be issued with FUA + enabled. + + * ``IOCB_HIPRI``: Poll for I/O completion instead of waiting for an + interrupt. + Only meaningful for asynchronous I/O, and only if the entire I/O can + be issued as a single ``struct bio``. + + * ``IOCB_DIO_CALLER_COMP``: Try to run I/O completion from the caller's + process context. + See ``linux/fs.h`` for more details. + +Filesystems should call ``iomap_dio_rw`` from ``->read_iter`` and +``->write_iter``, and set ``FMODE_CAN_ODIRECT`` in the ``->open`` +function for the file. +They should not set ``->direct_IO``, which is deprecated. + +If a filesystem wishes to perform its own work before direct I/O +completion, it should call ``__iomap_dio_rw``. +If its return value is not an error pointer or a NULL pointer, the +filesystem should pass the return value to ``iomap_dio_complete`` after +finishing its internal work. + +Return Values +------------- + +``iomap_dio_rw`` can return one of the following: + + * A non-negative number of bytes transferred. + + * ``-ENOTBLK``: Fall back to buffered I/O. + iomap itself will return this value if it cannot invalidate the page + cache before issuing the I/O to storage. + The ``->iomap_begin`` or ``->iomap_end`` functions may also return + this value. + + * ``-EIOCBQUEUED``: The asynchronous direct I/O request has been + queued and will be completed separately. + + * Any of the other negative error codes. + +Direct Reads +------------ + +A direct I/O read initiates a read I/O from the storage device to the +caller's buffer. +Dirty parts of the pagecache are flushed to storage before initiating +the read io. +The ``flags`` value for ``->iomap_begin`` will be ``IOMAP_DIRECT`` with +any combination of the following enhancements: + + * ``IOMAP_NOWAIT``, as defined previously. + +Callers commonly hold ``i_rwsem`` in shared mode before calling this +function. + +Direct Writes +------------- + +A direct I/O write initiates a write I/O to the storage device from the +caller's buffer. +Dirty parts of the pagecache are flushed to storage before initiating +the write io. +The pagecache is invalidated both before and after the write io. +The ``flags`` value for ``->iomap_begin`` will be ``IOMAP_DIRECT | +IOMAP_WRITE`` with any combination of the following enhancements: + + * ``IOMAP_NOWAIT``, as defined previously. + + * ``IOMAP_OVERWRITE_ONLY``: Allocating blocks and zeroing partial + blocks is not allowed. + The entire file range must map to a single written or unwritten + extent. + The file I/O range must be aligned to the filesystem block size + if the mapping is unwritten and the filesystem cannot handle zeroing + the unaligned regions without exposing stale contents. + +Callers commonly hold ``i_rwsem`` in shared or exclusive mode before +calling this function. + +``struct iomap_dio_ops:`` +------------------------- +.. code-block:: c + + struct iomap_dio_ops { + void (*submit_io)(const struct iomap_iter *iter, struct bio *bio, + loff_t file_offset); + int (*end_io)(struct kiocb *iocb, ssize_t size, int error, + unsigned flags); + struct bio_set *bio_set; + }; + +The fields of this structure are as follows: + + - ``submit_io``: iomap calls this function when it has constructed a + ``struct bio`` object for the I/O requested, and wishes to submit it + to the block device. + If no function is provided, ``submit_bio`` will be called directly. + Filesystems that would like to perform additional work before (e.g. + data replication for btrfs) should implement this function. + + - ``end_io``: This is called after the ``struct bio`` completes. + This function should perform post-write conversions of unwritten + extent mappings, handle write failures, etc. + The ``flags`` argument may be set to a combination of the following: + + * ``IOMAP_DIO_UNWRITTEN``: The mapping was unwritten, so the ioend + should mark the extent as written. + + * ``IOMAP_DIO_COW``: Writing to the space in the mapping required a + copy on write operation, so the ioend should switch mappings. + + - ``bio_set``: This allows the filesystem to provide a custom bio_set + for allocating direct I/O bios. + This enables filesystems to `stash additional per-bio information + <https://lore.kernel.org/all/20220505201115.937837-3-hch@lst.de/>`_ + for private use. + If this field is NULL, generic ``struct bio`` objects will be used. + +Filesystems that want to perform extra work after an I/O completion +should set a custom ``->bi_end_io`` function via ``->submit_io``. +Afterwards, the custom endio function must call +``iomap_dio_bio_end_io`` to finish the direct I/O. + +DAX I/O +======= + +Some storage devices can be directly mapped as memory. +These devices support a new access mode known as "fsdax" that allows +loads and stores through the CPU and memory controller. + +fsdax Reads +----------- + +A fsdax read performs a memcpy from storage device to the caller's +buffer. +The ``flags`` value for ``->iomap_begin`` will be ``IOMAP_DAX`` with any +combination of the following enhancements: + + * ``IOMAP_NOWAIT``, as defined previously. + +Callers commonly hold ``i_rwsem`` in shared mode before calling this +function. + +fsdax Writes +------------ + +A fsdax write initiates a memcpy to the storage device from the caller's +buffer. +The ``flags`` value for ``->iomap_begin`` will be ``IOMAP_DAX | +IOMAP_WRITE`` with any combination of the following enhancements: + + * ``IOMAP_NOWAIT``, as defined previously. + + * ``IOMAP_OVERWRITE_ONLY``: The caller requires a pure overwrite to be + performed from this mapping. + This requires the filesystem extent mapping to already exist as an + ``IOMAP_MAPPED`` type and span the entire range of the write I/O + request. + If the filesystem cannot map this request in a way that allows the + iomap infrastructure to perform a pure overwrite, it must fail the + mapping operation with ``-EAGAIN``. + +Callers commonly hold ``i_rwsem`` in exclusive mode before calling this +function. + +fsdax mmap Faults +~~~~~~~~~~~~~~~~~ + +The ``dax_iomap_fault`` function handles read and write faults to fsdax +storage. +For a read fault, ``IOMAP_DAX | IOMAP_FAULT`` will be passed as the +``flags`` argument to ``->iomap_begin``. +For a write fault, ``IOMAP_DAX | IOMAP_FAULT | IOMAP_WRITE`` will be +passed as the ``flags`` argument to ``->iomap_begin``. + +Callers commonly hold the same locks as they do to call their iomap +pagecache counterparts. + +fsdax Truncation, fallocate, and Unsharing +------------------------------------------ + +For fsdax files, the following functions are provided to replace their +iomap pagecache I/O counterparts. +The ``flags`` argument to ``->iomap_begin`` are the same as the +pagecache counterparts, with ``IOMAP_DAX`` added. + + * ``dax_file_unshare`` + * ``dax_zero_range`` + * ``dax_truncate_page`` + +Callers commonly hold the same locks as they do to call their iomap +pagecache counterparts. + +fsdax Deduplication +------------------- + +Filesystems implementing the ``FIDEDUPERANGE`` ioctl must call the +``dax_remap_file_range_prep`` function with their own iomap read ops. + +Seeking Files +============= + +iomap implements the two iterating whence modes of the ``llseek`` system +call. + +SEEK_DATA +--------- + +The ``iomap_seek_data`` function implements the SEEK_DATA "whence" value +for llseek. +``IOMAP_REPORT`` will be passed as the ``flags`` argument to +``->iomap_begin``. + +For unwritten mappings, the pagecache will be searched. +Regions of the pagecache with a folio mapped and uptodate fsblocks +within those folios will be reported as data areas. + +Callers commonly hold ``i_rwsem`` in shared mode before calling this +function. + +SEEK_HOLE +--------- + +The ``iomap_seek_hole`` function implements the SEEK_HOLE "whence" value +for llseek. +``IOMAP_REPORT`` will be passed as the ``flags`` argument to +``->iomap_begin``. + +For unwritten mappings, the pagecache will be searched. +Regions of the pagecache with no folio mapped, or a !uptodate fsblock +within a folio will be reported as sparse hole areas. + +Callers commonly hold ``i_rwsem`` in shared mode before calling this +function. + +Swap File Activation +==================== + +The ``iomap_swapfile_activate`` function finds all the base-page aligned +regions in a file and sets them up as swap space. +The file will be ``fsync()``'d before activation. +``IOMAP_REPORT`` will be passed as the ``flags`` argument to +``->iomap_begin``. +All mappings must be mapped or unwritten; cannot be dirty or shared, and +cannot span multiple block devices. +Callers must hold ``i_rwsem`` in exclusive mode; this is already +provided by ``swapon``. + +File Space Mapping Reporting +============================ + +iomap implements two of the file space mapping system calls. + +FS_IOC_FIEMAP +------------- + +The ``iomap_fiemap`` function exports file extent mappings to userspace +in the format specified by the ``FS_IOC_FIEMAP`` ioctl. +``IOMAP_REPORT`` will be passed as the ``flags`` argument to +``->iomap_begin``. +Callers commonly hold ``i_rwsem`` in shared mode before calling this +function. + +FIBMAP (deprecated) +------------------- + +``iomap_bmap`` implements FIBMAP. +The calling conventions are the same as for FIEMAP. +This function is only provided to maintain compatibility for filesystems +that implemented FIBMAP prior to conversion. +This ioctl is deprecated; do **not** add a FIBMAP implementation to +filesystems that do not have it. +Callers should probably hold ``i_rwsem`` in shared mode before calling +this function, but this is unclear. diff --git a/Documentation/filesystems/iomap/porting.rst b/Documentation/filesystems/iomap/porting.rst new file mode 100644 index 000000000000..3d49a32c0fff --- /dev/null +++ b/Documentation/filesystems/iomap/porting.rst @@ -0,0 +1,120 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. _iomap_porting: + +.. + Dumb style notes to maintain the author's sanity: + Please try to start sentences on separate lines so that + sentence changes don't bleed colors in diff. + Heading decorations are documented in sphinx.rst. + +======================= +Porting Your Filesystem +======================= + +.. contents:: Table of Contents + :local: + +Why Convert? +============ + +There are several reasons to convert a filesystem to iomap: + + 1. The classic Linux I/O path is not terribly efficient. + Pagecache operations lock a single base page at a time and then call + into the filesystem to return a mapping for only that page. + Direct I/O operations build I/O requests a single file block at a + time. + This worked well enough for direct/indirect-mapped filesystems such + as ext2, but is very inefficient for extent-based filesystems such + as XFS. + + 2. Large folios are only supported via iomap; there are no plans to + convert the old buffer_head path to use them. + + 3. Direct access to storage on memory-like devices (fsdax) is only + supported via iomap. + + 4. Lower maintenance overhead for individual filesystem maintainers. + iomap handles common pagecache related operations itself, such as + allocating, instantiating, locking, and unlocking of folios. + No ->write_begin(), ->write_end() or direct_IO + address_space_operations are required to be implemented by + filesystem using iomap. + +How Do I Convert a Filesystem? +============================== + +First, add ``#include <linux/iomap.h>`` from your source code and add +``select FS_IOMAP`` to your filesystem's Kconfig option. +Build the kernel, run fstests with the ``-g all`` option across a wide +variety of your filesystem's supported configurations to build a +baseline of which tests pass and which ones fail. + +The recommended approach is first to implement ``->iomap_begin`` (and +``->iomap_end`` if necessary) to allow iomap to obtain a read-only +mapping of a file range. +In most cases, this is a relatively trivial conversion of the existing +``get_block()`` function for read-only mappings. +``FS_IOC_FIEMAP`` is a good first target because it is trivial to +implement support for it and then to determine that the extent map +iteration is correct from userspace. +If FIEMAP is returning the correct information, it's a good sign that +other read-only mapping operations will do the right thing. + +Next, modify the filesystem's ``get_block(create = false)`` +implementation to use the new ``->iomap_begin`` implementation to map +file space for selected read operations. +Hide behind a debugging knob the ability to switch on the iomap mapping +functions for selected call paths. +It is necessary to write some code to fill out the bufferhead-based +mapping information from the ``iomap`` structure, but the new functions +can be tested without needing to implement any iomap APIs. + +Once the read-only functions are working like this, convert each high +level file operation one by one to use iomap native APIs instead of +going through ``get_block()``. +Done one at a time, regressions should be self evident. +You *do* have a regression test baseline for fstests, right? +It is suggested to convert swap file activation, ``SEEK_DATA``, and +``SEEK_HOLE`` before tackling the I/O paths. +A likely complexity at this point will be converting the buffered read +I/O path because of bufferheads. +The buffered read I/O paths doesn't need to be converted yet, though the +direct I/O read path should be converted in this phase. + +At this point, you should look over your ``->iomap_begin`` function. +If it switches between large blocks of code based on dispatching of the +``flags`` argument, you should consider breaking it up into +per-operation iomap ops with smaller, more cohesive functions. +XFS is a good example of this. + +The next thing to do is implement ``get_blocks(create == true)`` +functionality in the ``->iomap_begin``/``->iomap_end`` methods. +It is strongly recommended to create separate mapping functions and +iomap ops for write operations. +Then convert the direct I/O write path to iomap, and start running fsx +w/ DIO enabled in earnest on filesystem. +This will flush out lots of data integrity corner case bugs that the new +write mapping implementation introduces. + +Now, convert any remaining file operations to call the iomap functions. +This will get the entire filesystem using the new mapping functions, and +they should largely be debugged and working correctly after this step. + +Most likely at this point, the buffered read and write paths will still +need to be converted. +The mapping functions should all work correctly, so all that needs to be +done is rewriting all the code that interfaces with bufferheads to +interface with iomap and folios. +It is much easier first to get regular file I/O (without any fancy +features like fscrypt, fsverity, compression, or data=journaling) +converted to use iomap. +Some of those fancy features (fscrypt and compression) aren't +implemented yet in iomap. +For unjournalled filesystems that use the pagecache for symbolic links +and directories, you might also try converting their handling to iomap. + +The rest is left as an exercise for the reader, as it will be different +for every filesystem. +If you encounter problems, email the people and lists in +``get_maintainers.pl`` for help. diff --git a/Documentation/filesystems/mount_api.rst b/Documentation/filesystems/mount_api.rst index 9aaf6ef75eb5..317934c9e8fc 100644 --- a/Documentation/filesystems/mount_api.rst +++ b/Documentation/filesystems/mount_api.rst @@ -645,6 +645,8 @@ The members are as follows: fs_param_is_blockdev Blockdev path * Needs lookup fs_param_is_path Path * Needs lookup fs_param_is_fd File descriptor result->int_32 + fs_param_is_uid User ID (u32) result->uid + fs_param_is_gid Group ID (u32) result->gid ======================= ======================= ===================== Note that if the value is of fs_param_is_bool type, fs_parse() will try @@ -678,6 +680,8 @@ The members are as follows: fsparam_bdev() fs_param_is_blockdev fsparam_path() fs_param_is_path fsparam_fd() fs_param_is_fd + fsparam_uid() fs_param_is_uid + fsparam_gid() fs_param_is_gid ======================= =============================================== all of which take two arguments, name string and option number - for @@ -784,8 +788,9 @@ process the parameters it is given. option number (which it returns). If successful, and if the parameter type indicates the result is a - boolean, integer or enum type, the value is converted by this function and - the result stored in result->{boolean,int_32,uint_32,uint_64}. + boolean, integer, enum, uid, or gid type, the value is converted by this + function and the result stored in + result->{boolean,int_32,uint_32,uint_64,uid,gid}. If a match isn't initially made, the key is prefixed with "no" and no value is present then an attempt will be made to look up the key with the diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 7c3a565ffbef..82d142de3461 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -571,6 +571,7 @@ encoded manner. The codes are the following: um userfaultfd missing tracking uw userfaultfd wr-protect tracking ss shadow stack page + sl sealed == ======================================= Note that there is no guarantee that every flag and associated mnemonic will diff --git a/Documentation/i2c/i2c_bus.svg b/Documentation/i2c/i2c_bus.svg index 3170de976373..45801de4af7d 100644 --- a/Documentation/i2c/i2c_bus.svg +++ b/Documentation/i2c/i2c_bus.svg @@ -1,5 +1,6 @@ <?xml version="1.0" encoding="UTF-8" standalone="no"?> <!-- Created with Inkscape (http://www.inkscape.org/) --> +<!-- Updated to inclusive terminology by Wolfram Sang --> <svg xmlns:dc="http://purl.org/dc/elements/1.1/" @@ -1120,7 +1121,7 @@ <rect style="opacity:1;fill:#ffb9b9;fill-opacity:1;stroke:#f00000;stroke-width:2.8125;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" id="rect4424-3-2-9-7" - width="112.5" + width="134.5" height="113.75008" x="112.5" y="471.11221" @@ -1133,15 +1134,15 @@ y="521.46259" id="text4349"><tspan sodipodi:role="line" - x="167.5354" + x="178.5354" y="521.46259" style="font-size:25px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle" id="tspan1273">I2C</tspan><tspan sodipodi:role="line" - x="167.5354" + x="178.5354" y="552.71259" style="font-size:25px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle" - id="tspan1285">Master</tspan></text> + id="tspan1285">Controller</tspan></text> <rect style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#b9ffb9;fill-opacity:1;fill-rule:nonzero;stroke:#006400;stroke-width:2.8125;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" id="rect4424-3-2-9-7-3-3-5-3" @@ -1171,7 +1172,7 @@ x="318.59131" y="552.08752" style="font-size:25.00000191px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:1px" - id="tspan1287">Slave</tspan></text> + id="tspan1287">Target</tspan></text> <path style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968767;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" d="m 112.49995,677.36223 c 712.50005,0 712.50005,0 712.50005,0" @@ -1233,7 +1234,7 @@ x="468.59131" y="552.08746" style="font-size:25.00000191px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:1px" - id="tspan1287-6">Slave</tspan></text> + id="tspan1287-6">Target</tspan></text> <rect style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#b9ffb9;fill-opacity:1;fill-rule:nonzero;stroke:#006400;stroke-width:2.8125;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" id="rect4424-3-2-9-7-3-3-5-3-1" @@ -1258,7 +1259,7 @@ x="618.59131" y="552.08746" style="font-size:25.00000191px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:1px" - id="tspan1287-9">Slave</tspan></text> + id="tspan1287-9">Target</tspan></text> <path style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968743;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#DotM)" d="m 150,583.61221 v 93.75" diff --git a/Documentation/i2c/summary.rst b/Documentation/i2c/summary.rst index 786c618ba3be..579a1c7df200 100644 --- a/Documentation/i2c/summary.rst +++ b/Documentation/i2c/summary.rst @@ -3,29 +3,27 @@ Introduction to I2C and SMBus ============================= I²C (pronounce: I squared C and written I2C in the kernel documentation) is -a protocol developed by Philips. It is a slow two-wire protocol (variable -speed, up to 400 kHz), with a high speed extension (3.4 MHz). It provides +a protocol developed by Philips. It is a two-wire protocol with variable +speed (typically up to 400 kHz, high speed modes up to 5 MHz). It provides an inexpensive bus for connecting many types of devices with infrequent or -low bandwidth communications needs. I2C is widely used with embedded -systems. Some systems use variants that don't meet branding requirements, +low bandwidth communications needs. I2C is widely used with embedded +systems. Some systems use variants that don't meet branding requirements, and so are not advertised as being I2C but come under different names, e.g. TWI (Two Wire Interface), IIC. -The latest official I2C specification is the `"I2C-bus specification and user -manual" (UM10204) <https://www.nxp.com/webapp/Download?colCode=UM10204>`_ -published by NXP Semiconductors. However, you need to log-in to the site to -access the PDF. An older version of the specification (revision 6) is archived -`here <https://web.archive.org/web/20210813122132/https://www.nxp.com/docs/en/user-guide/UM10204.pdf>`_. +The latest official I2C specification is the `"I²C-bus specification and user +manual" (UM10204) <https://www.nxp.com/docs/en/user-guide/UM10204.pdf>`_ +published by NXP Semiconductors, version 7 as of this writing. SMBus (System Management Bus) is based on the I2C protocol, and is mostly -a subset of I2C protocols and signaling. Many I2C devices will work on an +a subset of I2C protocols and signaling. Many I2C devices will work on an SMBus, but some SMBus protocols add semantics beyond what is required to -achieve I2C branding. Modern PC mainboards rely on SMBus. The most common +achieve I2C branding. Modern PC mainboards rely on SMBus. The most common devices connected through SMBus are RAM modules configured using I2C EEPROMs, and hardware monitoring chips. Because the SMBus is mostly a subset of the generalized I2C bus, we can -use its protocols on many I2C systems. However, there are systems that don't +use its protocols on many I2C systems. However, there are systems that don't meet both SMBus and I2C electrical constraints; and others which can't implement all the common SMBus protocol semantics or messages. @@ -33,29 +31,52 @@ implement all the common SMBus protocol semantics or messages. Terminology =========== -Using the terminology from the official documentation, the I2C bus connects -one or more *master* chips and one or more *slave* chips. +The I2C bus connects one or more controller chips and one or more target chips. .. kernel-figure:: i2c_bus.svg - :alt: Simple I2C bus with one master and 3 slaves + :alt: Simple I2C bus with one controller and 3 targets Simple I2C bus -A **master** chip is a node that starts communications with slaves. In the -Linux kernel implementation it is called an **adapter** or bus. Adapter -drivers are in the ``drivers/i2c/busses/`` subdirectory. +A **controller** chip is a node that starts communications with targets. In the +Linux kernel implementation it is also called an "adapter" or "bus". Controller +drivers are usually in the ``drivers/i2c/busses/`` subdirectory. -An **algorithm** contains general code that can be used to implement a -whole class of I2C adapters. Each specific adapter driver either depends on -an algorithm driver in the ``drivers/i2c/algos/`` subdirectory, or includes -its own implementation. +An **algorithm** contains general code that can be used to implement a whole +class of I2C controllers. Each specific controller driver either depends on an +algorithm driver in the ``drivers/i2c/algos/`` subdirectory, or includes its +own implementation. -A **slave** chip is a node that responds to communications when addressed -by the master. In Linux it is called a **client**. Client drivers are kept -in a directory specific to the feature they provide, for example -``drivers/media/gpio/`` for GPIO expanders and ``drivers/media/i2c/`` for +A **target** chip is a node that responds to communications when addressed by a +controller. In the Linux kernel implementation it is also called a "client". +While targets are usually separate external chips, Linux can also act as a +target (needs hardware support) and respond to another controller on the bus. +This is then called a **local target**. In contrast, an external chip is called +a **remote target**. + +Target drivers are kept in a directory specific to the feature they provide, +for example ``drivers/gpio/`` for GPIO expanders and ``drivers/media/i2c/`` for video-related chips. -For the example configuration in figure, you will need a driver for your -I2C adapter, and drivers for your I2C devices (usually one driver for each -device). +For the example configuration in the figure above, you will need one driver for +the I2C controller, and drivers for your I2C targets. Usually one driver for +each target. + +Synonyms +-------- + +As mentioned above, the Linux I2C implementation historically uses the terms +"adapter" for controller and "client" for target. A number of data structures +have these synonyms in their name. So, when discussing implementation details, +you should be aware of these terms as well. The official wording is preferred, +though. + +Outdated terminology +-------------------- + +In earlier I2C specifications, controller was named "master" and target was +named "slave". These terms have been obsoleted with v7 of the specification and +their use is also discouraged by the Linux Kernel Code of Conduct. You may +still find them in references to documentation which has not been updated. The +general attitude, however, is to use the inclusive terms: controller and +target. Work to replace the old terminology in the Linux Kernel is on-going. diff --git a/Documentation/kbuild/modules.rst b/Documentation/kbuild/modules.rst index a1f3eb7a43e2..131863142cbb 100644 --- a/Documentation/kbuild/modules.rst +++ b/Documentation/kbuild/modules.rst @@ -128,7 +128,7 @@ executed to make module versioning work. modules_install Install the external module(s). The default location is - /lib/modules/<kernel_release>/extra/, but a prefix may + /lib/modules/<kernel_release>/updates/, but a prefix may be added with INSTALL_MOD_PATH (discussed in section 5). clean @@ -417,7 +417,7 @@ directory: And external modules are installed in: - /lib/modules/$(KERNELRELEASE)/extra/ + /lib/modules/$(KERNELRELEASE)/updates/ 5.1 INSTALL_MOD_PATH -------------------- @@ -438,10 +438,10 @@ And external modules are installed in: ------------------- External modules are by default installed to a directory under - /lib/modules/$(KERNELRELEASE)/extra/, but you may wish to + /lib/modules/$(KERNELRELEASE)/updates/, but you may wish to locate modules for a specific functionality in a separate directory. For this purpose, use INSTALL_MOD_DIR to specify an - alternative name to "extra.":: + alternative name to "updates.":: $ make INSTALL_MOD_DIR=gandalf -C $KDIR \ M=$PWD modules_install diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 00dc61358be8..4510e8d1adcb 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -1603,7 +1603,7 @@ operations: attributes: - header reply: - attributes: &pse + attributes: - header - podl-pse-admin-state - podl-pse-admin-control @@ -1620,7 +1620,10 @@ operations: do: request: - attributes: *pse + attributes: + - header + - podl-pse-admin-control + - c33-pse-admin-control - name: rss-get doc: Get RSS params. diff --git a/Documentation/netlink/specs/nfsd.yaml b/Documentation/netlink/specs/nfsd.yaml index d21234097167..6bda7a467301 100644 --- a/Documentation/netlink/specs/nfsd.yaml +++ b/Documentation/netlink/specs/nfsd.yaml @@ -123,8 +123,6 @@ operations: doc: dump pending nfsd rpc attribute-set: rpc-status dump: - pre: nfsd-nl-rpc-status-get-start - post: nfsd-nl-rpc-status-get-done reply: attributes: - xid diff --git a/Documentation/networking/devlink/devlink-region.rst b/Documentation/networking/devlink/devlink-region.rst index 9232cd7da301..5d0b68f752c0 100644 --- a/Documentation/networking/devlink/devlink-region.rst +++ b/Documentation/networking/devlink/devlink-region.rst @@ -49,7 +49,7 @@ example usage $ devlink region show [ DEV/REGION ] $ devlink region del DEV/REGION snapshot SNAPSHOT_ID $ devlink region dump DEV/REGION [ snapshot SNAPSHOT_ID ] - $ devlink region read DEV/REGION [ snapshot SNAPSHOT_ID ] address ADDRESS length length + $ devlink region read DEV/REGION [ snapshot SNAPSHOT_ID ] address ADDRESS length LENGTH # Show all of the exposed regions with region sizes: $ devlink region show diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst index 5926115ec0ed..8a251d71fa6e 100644 --- a/Documentation/userspace-api/index.rst +++ b/Documentation/userspace-api/index.rst @@ -32,6 +32,7 @@ Security-related interfaces seccomp_filter landlock lsm + mfd_noexec spec_ctrl tee diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index a141e8e65c5d..9a97030c6c8d 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -186,6 +186,7 @@ Code Seq# Include File Comments 'Q' all linux/soundcard.h 'R' 00-1F linux/random.h conflict! 'R' 01 linux/rfkill.h conflict! +'R' 20-2F linux/trace_mmap.h 'R' C0-DF net/bluetooth/rfcomm.h 'R' E0 uapi/linux/fsl_mc.h 'S' all linux/cdrom.h conflict! diff --git a/Documentation/userspace-api/mfd_noexec.rst b/Documentation/userspace-api/mfd_noexec.rst new file mode 100644 index 000000000000..7afcc480e38f --- /dev/null +++ b/Documentation/userspace-api/mfd_noexec.rst @@ -0,0 +1,86 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================================== +Introduction of non-executable mfd +================================== +:Author: + Daniel Verkamp <dverkamp@chromium.org> + Jeff Xu <jeffxu@chromium.org> + +:Contributor: + Aleksa Sarai <cyphar@cyphar.com> + +Since Linux introduced the memfd feature, memfds have always had their +execute bit set, and the memfd_create() syscall doesn't allow setting +it differently. + +However, in a secure-by-default system, such as ChromeOS, (where all +executables should come from the rootfs, which is protected by verified +boot), this executable nature of memfd opens a door for NoExec bypass +and enables “confused deputy attack”. E.g, in VRP bug [1]: cros_vm +process created a memfd to share the content with an external process, +however the memfd is overwritten and used for executing arbitrary code +and root escalation. [2] lists more VRP of this kind. + +On the other hand, executable memfd has its legit use: runc uses memfd’s +seal and executable feature to copy the contents of the binary then +execute them. For such a system, we need a solution to differentiate runc's +use of executable memfds and an attacker's [3]. + +To address those above: + - Let memfd_create() set X bit at creation time. + - Let memfd be sealed for modifying X bit when NX is set. + - Add a new pid namespace sysctl: vm.memfd_noexec to help applications in + migrating and enforcing non-executable MFD. + +User API +======== +``int memfd_create(const char *name, unsigned int flags)`` + +``MFD_NOEXEC_SEAL`` + When MFD_NOEXEC_SEAL bit is set in the ``flags``, memfd is created + with NX. F_SEAL_EXEC is set and the memfd can't be modified to + add X later. MFD_ALLOW_SEALING is also implied. + This is the most common case for the application to use memfd. + +``MFD_EXEC`` + When MFD_EXEC bit is set in the ``flags``, memfd is created with X. + +Note: + ``MFD_NOEXEC_SEAL`` implies ``MFD_ALLOW_SEALING``. In case that + an app doesn't want sealing, it can add F_SEAL_SEAL after creation. + + +Sysctl: +======== +``pid namespaced sysctl vm.memfd_noexec`` + +The new pid namespaced sysctl vm.memfd_noexec has 3 values: + + - 0: MEMFD_NOEXEC_SCOPE_EXEC + memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL acts like + MFD_EXEC was set. + + - 1: MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL + memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL acts like + MFD_NOEXEC_SEAL was set. + + - 2: MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED + memfd_create() without MFD_NOEXEC_SEAL will be rejected. + +The sysctl allows finer control of memfd_create for old software that +doesn't set the executable bit; for example, a container with +vm.memfd_noexec=1 means the old software will create non-executable memfd +by default while new software can create executable memfd by setting +MFD_EXEC. + +The value of vm.memfd_noexec is passed to child namespace at creation +time. In addition, the setting is hierarchical, i.e. during memfd_create, +we will search from current ns to root ns and use the most restrictive +setting. + +[1] https://crbug.com/1305267 + +[2] https://bugs.chromium.org/p/chromium/issues/list?q=type%3Dbug-security%20memfd%20escalation&can=1 + +[3] https://lwn.net/Articles/781013/ diff --git a/Documentation/virt/hyperv/clocks.rst b/Documentation/virt/hyperv/clocks.rst index a56f4837d443..176043265803 100644 --- a/Documentation/virt/hyperv/clocks.rst +++ b/Documentation/virt/hyperv/clocks.rst @@ -62,12 +62,21 @@ shared page with scale and offset values into user space. User space code performs the same algorithm of reading the TSC and applying the scale and offset to get the constant 10 MHz clock. -Linux clockevents are based on Hyper-V synthetic timer 0. While -Hyper-V offers 4 synthetic timers for each CPU, Linux only uses -timer 0. Interrupts from stimer0 are recorded on the "HVS" line in -/proc/interrupts. Clockevents based on the virtualized PIT and -local APIC timer also work, but the Hyper-V synthetic timer is -preferred. +Linux clockevents are based on Hyper-V synthetic timer 0 (stimer0). +While Hyper-V offers 4 synthetic timers for each CPU, Linux only uses +timer 0. In older versions of Hyper-V, an interrupt from stimer0 +results in a VMBus control message that is demultiplexed by +vmbus_isr() as described in the Documentation/virt/hyperv/vmbus.rst +documentation. In newer versions of Hyper-V, stimer0 interrupts can +be mapped to an architectural interrupt, which is referred to as +"Direct Mode". Linux prefers to use Direct Mode when available. Since +x86/x64 doesn't support per-CPU interrupts, Direct Mode statically +allocates an x86 interrupt vector (HYPERV_STIMER0_VECTOR) across all CPUs +and explicitly codes it to call the stimer0 interrupt handler. Hence +interrupts from stimer0 are recorded on the "HVS" line in /proc/interrupts +rather than being associated with a Linux IRQ. Clockevents based on the +virtualized PIT and local APIC timer also work, but Hyper-V stimer0 +is preferred. The driver for the Hyper-V synthetic system clock and timers is drivers/clocksource/hyperv_timer.c. diff --git a/Documentation/virt/hyperv/overview.rst b/Documentation/virt/hyperv/overview.rst index cd493332c88a..77408a89d1a4 100644 --- a/Documentation/virt/hyperv/overview.rst +++ b/Documentation/virt/hyperv/overview.rst @@ -40,7 +40,7 @@ Linux guests communicate with Hyper-V in four different ways: arm64, these synthetic registers must be accessed using explicit hypercalls. -* VMbus: VMbus is a higher-level software construct that is built on +* VMBus: VMBus is a higher-level software construct that is built on the other 3 mechanisms. It is a message passing interface between the Hyper-V host and the Linux guest. It uses memory that is shared between Hyper-V and the guest, along with various signaling @@ -54,8 +54,8 @@ x86/x64 architecture only. .. _Hyper-V Top Level Functional Spec (TLFS): https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/tlfs/tlfs -VMbus is not documented. This documentation provides a high-level -overview of VMbus and how it works, but the details can be discerned +VMBus is not documented. This documentation provides a high-level +overview of VMBus and how it works, but the details can be discerned only from the code. Sharing Memory @@ -74,7 +74,7 @@ follows: physical address space. How Hyper-V is told about the GPA or list of GPAs varies. In some cases, a single GPA is written to a synthetic register. In other cases, a GPA or list of GPAs is sent - in a VMbus message. + in a VMBus message. * Hyper-V translates the GPAs into "real" physical memory addresses, and creates a virtual mapping that it can use to access the memory. @@ -133,9 +133,9 @@ only the CPUs actually present in the VM, so Linux does not report any hot-add CPUs. A Linux guest CPU may be taken offline using the normal Linux -mechanisms, provided no VMbus channel interrupts are assigned to -the CPU. See the section on VMbus Interrupts for more details -on how VMbus channel interrupts can be re-assigned to permit +mechanisms, provided no VMBus channel interrupts are assigned to +the CPU. See the section on VMBus Interrupts for more details +on how VMBus channel interrupts can be re-assigned to permit taking a CPU offline. 32-bit and 64-bit @@ -169,14 +169,14 @@ and functionality. Hyper-V indicates feature/function availability via flags in synthetic MSRs that Hyper-V provides to the guest, and the guest code tests these flags. -VMbus has its own protocol version that is negotiated during the -initial VMbus connection from the guest to Hyper-V. This version +VMBus has its own protocol version that is negotiated during the +initial VMBus connection from the guest to Hyper-V. This version number is also output to dmesg during boot. This version number is checked in a few places in the code to determine if specific functionality is present. -Furthermore, each synthetic device on VMbus also has a protocol -version that is separate from the VMbus protocol version. Device +Furthermore, each synthetic device on VMBus also has a protocol +version that is separate from the VMBus protocol version. Device drivers for these synthetic devices typically negotiate the device protocol version, and may test that protocol version to determine if specific device functionality is present. diff --git a/Documentation/virt/hyperv/vmbus.rst b/Documentation/virt/hyperv/vmbus.rst index d2012d9022c5..1dcef6a7fda3 100644 --- a/Documentation/virt/hyperv/vmbus.rst +++ b/Documentation/virt/hyperv/vmbus.rst @@ -1,8 +1,8 @@ .. SPDX-License-Identifier: GPL-2.0 -VMbus +VMBus ===== -VMbus is a software construct provided by Hyper-V to guest VMs. It +VMBus is a software construct provided by Hyper-V to guest VMs. It consists of a control path and common facilities used by synthetic devices that Hyper-V presents to guest VMs. The control path is used to offer synthetic devices to the guest VM and, in some cases, @@ -12,9 +12,9 @@ and the synthetic device implementation that is part of Hyper-V, and signaling primitives to allow Hyper-V and the guest to interrupt each other. -VMbus is modeled in Linux as a bus, with the expected /sys/bus/vmbus -entry in a running Linux guest. The VMbus driver (drivers/hv/vmbus_drv.c) -establishes the VMbus control path with the Hyper-V host, then +VMBus is modeled in Linux as a bus, with the expected /sys/bus/vmbus +entry in a running Linux guest. The VMBus driver (drivers/hv/vmbus_drv.c) +establishes the VMBus control path with the Hyper-V host, then registers itself as a Linux bus driver. It implements the standard bus functions for adding and removing devices to/from the bus. @@ -49,9 +49,9 @@ synthetic NIC is referred to as "netvsc" and the Linux driver for the synthetic SCSI controller is "storvsc". These drivers contain functions with names like "storvsc_connect_to_vsp". -VMbus channels +VMBus channels -------------- -An instance of a synthetic device uses VMbus channels to communicate +An instance of a synthetic device uses VMBus channels to communicate between the VSP and the VSC. Channels are bi-directional and used for passing messages. Most synthetic devices use a single channel, but the synthetic SCSI controller and synthetic NIC may use multiple @@ -73,7 +73,7 @@ write indices and some control flags, followed by the memory for the actual ring. The size of the ring is determined by the VSC in the guest and is specific to each synthetic device. The list of GPAs making up the ring is communicated to the Hyper-V host over the -VMbus control path as a GPA Descriptor List (GPADL). See function +VMBus control path as a GPA Descriptor List (GPADL). See function vmbus_establish_gpadl(). Each ring buffer is mapped into contiguous Linux kernel virtual @@ -102,10 +102,10 @@ resources. For Windows Server 2019 and later, this limit is approximately 1280 Mbytes. For versions prior to Windows Server 2019, the limit is approximately 384 Mbytes. -VMbus messages --------------- -All VMbus messages have a standard header that includes the message -length, the offset of the message payload, some flags, and a +VMBus channel messages +---------------------- +All messages sent in a VMBus channel have a standard header that includes +the message length, the offset of the message payload, some flags, and a transactionID. The portion of the message after the header is unique to each VSP/VSC pair. @@ -137,7 +137,7 @@ control message contains a list of GPAs that describe the data buffer. For example, the storvsc driver uses this approach to specify the data buffers to/from which disk I/O is done. -Three functions exist to send VMbus messages: +Three functions exist to send VMBus channel messages: 1. vmbus_sendpacket(): Control-only messages and messages with embedded data -- no GPAs @@ -154,20 +154,51 @@ Historically, Linux guests have trusted Hyper-V to send well-formed and valid messages, and Linux drivers for synthetic devices did not fully validate messages. With the introduction of processor technologies that fully encrypt guest memory and that allow the -guest to not trust the hypervisor (AMD SNP-SEV, Intel TDX), trusting +guest to not trust the hypervisor (AMD SEV-SNP, Intel TDX), trusting the Hyper-V host is no longer a valid assumption. The drivers for -VMbus synthetic devices are being updated to fully validate any +VMBus synthetic devices are being updated to fully validate any values read from memory that is shared with Hyper-V, which includes -messages from VMbus devices. To facilitate such validation, +messages from VMBus devices. To facilitate such validation, messages read by the guest from the "in" ring buffer are copied to a temporary buffer that is not shared with Hyper-V. Validation is performed in this temporary buffer without the risk of Hyper-V maliciously modifying the message after it is validated but before it is used. -VMbus interrupts +Synthetic Interrupt Controller (synic) +-------------------------------------- +Hyper-V provides each guest CPU with a synthetic interrupt controller +that is used by VMBus for host-guest communication. While each synic +defines 16 synthetic interrupts (SINT), Linux uses only one of the 16 +(VMBUS_MESSAGE_SINT). All interrupts related to communication between +the Hyper-V host and a guest CPU use that SINT. + +The SINT is mapped to a single per-CPU architectural interrupt (i.e, +an 8-bit x86/x64 interrupt vector, or an arm64 PPI INTID). Because +each CPU in the guest has a synic and may receive VMBus interrupts, +they are best modeled in Linux as per-CPU interrupts. This model works +well on arm64 where a single per-CPU Linux IRQ is allocated for +VMBUS_MESSAGE_SINT. This IRQ appears in /proc/interrupts as an IRQ labelled +"Hyper-V VMbus". Since x86/x64 lacks support for per-CPU IRQs, an x86 +interrupt vector is statically allocated (HYPERVISOR_CALLBACK_VECTOR) +across all CPUs and explicitly coded to call vmbus_isr(). In this case, +there's no Linux IRQ, and the interrupts are visible in aggregate in +/proc/interrupts on the "HYP" line. + +The synic provides the means to demultiplex the architectural interrupt into +one or more logical interrupts and route the logical interrupt to the proper +VMBus handler in Linux. This demultiplexing is done by vmbus_isr() and +related functions that access synic data structures. + +The synic is not modeled in Linux as an irq chip or irq domain, +and the demultiplexed logical interrupts are not Linux IRQs. As such, +they don't appear in /proc/interrupts or /proc/irq. The CPU +affinity for one of these logical interrupts is controlled via an +entry under /sys/bus/vmbus as described below. + +VMBus interrupts ---------------- -VMbus provides a mechanism for the guest to interrupt the host when +VMBus provides a mechanism for the guest to interrupt the host when the guest has queued new messages in a ring buffer. The host expects that the guest will send an interrupt only when an "out" ring buffer transitions from empty to non-empty. If the guest sends @@ -176,63 +207,55 @@ unnecessary. If a guest sends an excessive number of unnecessary interrupts, the host may throttle that guest by suspending its execution for a few seconds to prevent a denial-of-service attack. -Similarly, the host will interrupt the guest when it sends a new -message on the VMbus control path, or when a VMbus channel "in" ring -buffer transitions from empty to non-empty. Each CPU in the guest -may receive VMbus interrupts, so they are best modeled as per-CPU -interrupts in Linux. This model works well on arm64 where a single -per-CPU IRQ is allocated for VMbus. Since x86/x64 lacks support for -per-CPU IRQs, an x86 interrupt vector is statically allocated (see -HYPERVISOR_CALLBACK_VECTOR) across all CPUs and explicitly coded to -call the VMbus interrupt service routine. These interrupts are -visible in /proc/interrupts on the "HYP" line. - -The guest CPU that a VMbus channel will interrupt is selected by the +Similarly, the host will interrupt the guest via the synic when +it sends a new message on the VMBus control path, or when a VMBus +channel "in" ring buffer transitions from empty to non-empty due to +the host inserting a new VMBus channel message. The control message stream +and each VMBus channel "in" ring buffer are separate logical interrupts +that are demultiplexed by vmbus_isr(). It demultiplexes by first checking +for channel interrupts by calling vmbus_chan_sched(), which looks at a synic +bitmap to determine which channels have pending interrupts on this CPU. +If multiple channels have pending interrupts for this CPU, they are +processed sequentially. When all channel interrupts have been processed, +vmbus_isr() checks for and processes any messages received on the VMBus +control path. + +The guest CPU that a VMBus channel will interrupt is selected by the guest when the channel is created, and the host is informed of that -selection. VMbus devices are broadly grouped into two categories: +selection. VMBus devices are broadly grouped into two categories: -1. "Slow" devices that need only one VMbus channel. The devices +1. "Slow" devices that need only one VMBus channel. The devices (such as keyboard, mouse, heartbeat, and timesync) generate - relatively few interrupts. Their VMbus channels are all + relatively few interrupts. Their VMBus channels are all assigned to interrupt the VMBUS_CONNECT_CPU, which is always CPU 0. -2. "High speed" devices that may use multiple VMbus channels for +2. "High speed" devices that may use multiple VMBus channels for higher parallelism and performance. These devices include the - synthetic SCSI controller and synthetic NIC. Their VMbus + synthetic SCSI controller and synthetic NIC. Their VMBus channels interrupts are assigned to CPUs that are spread out among the available CPUs in the VM so that interrupts on multiple channels can be processed in parallel. -The assignment of VMbus channel interrupts to CPUs is done in the +The assignment of VMBus channel interrupts to CPUs is done in the function init_vp_index(). This assignment is done outside of the normal Linux interrupt affinity mechanism, so the interrupts are neither "unmanaged" nor "managed" interrupts. -The CPU that a VMbus channel will interrupt can be seen in +The CPU that a VMBus channel will interrupt can be seen in /sys/bus/vmbus/devices/<deviceGUID>/ channels/<channelRelID>/cpu. When running on later versions of Hyper-V, the CPU can be changed -by writing a new value to this sysfs entry. Because the interrupt -assignment is done outside of the normal Linux affinity mechanism, -there are no entries in /proc/irq corresponding to individual -VMbus channel interrupts. +by writing a new value to this sysfs entry. Because VMBus channel +interrupts are not Linux IRQs, there are no entries in /proc/interrupts +or /proc/irq corresponding to individual VMBus channel interrupts. An online CPU in a Linux guest may not be taken offline if it has -VMbus channel interrupts assigned to it. Any such channel +VMBus channel interrupts assigned to it. Any such channel interrupts must first be manually reassigned to another CPU as described above. When no channel interrupts are assigned to the CPU, it can be taken offline. -When a guest CPU receives a VMbus interrupt from the host, the -function vmbus_isr() handles the interrupt. It first checks for -channel interrupts by calling vmbus_chan_sched(), which looks at a -bitmap setup by the host to determine which channels have pending -interrupts on this CPU. If multiple channels have pending -interrupts for this CPU, they are processed sequentially. When all -channel interrupts have been processed, vmbus_isr() checks for and -processes any message received on the VMbus control path. - -The VMbus channel interrupt handling code is designed to work +The VMBus channel interrupt handling code is designed to work correctly even if an interrupt is received on a CPU other than the CPU assigned to the channel. Specifically, the code does not use CPU-based exclusion for correctness. In normal operation, Hyper-V @@ -242,23 +265,23 @@ when Hyper-V will make the transition. The code must work correctly even if there is a time lag before Hyper-V starts interrupting the new CPU. See comments in target_cpu_store(). -VMbus device creation/deletion +VMBus device creation/deletion ------------------------------ Hyper-V and the Linux guest have a separate message-passing path that is used for synthetic device creation and deletion. This -path does not use a VMbus channel. See vmbus_post_msg() and +path does not use a VMBus channel. See vmbus_post_msg() and vmbus_on_msg_dpc(). The first step is for the guest to connect to the generic -Hyper-V VMbus mechanism. As part of establishing this connection, -the guest and Hyper-V agree on a VMbus protocol version they will +Hyper-V VMBus mechanism. As part of establishing this connection, +the guest and Hyper-V agree on a VMBus protocol version they will use. This negotiation allows newer Linux kernels to run on older Hyper-V versions, and vice versa. The guest then tells Hyper-V to "send offers". Hyper-V sends an offer message to the guest for each synthetic device that the VM -is configured to have. Each VMbus device type has a fixed GUID -known as the "class ID", and each VMbus device instance is also +is configured to have. Each VMBus device type has a fixed GUID +known as the "class ID", and each VMBus device instance is also identified by a GUID. The offer message from Hyper-V contains both GUIDs to uniquely (within the VM) identify the device. There is one offer message for each device instance, so a VM with @@ -275,7 +298,7 @@ type based on the class ID, and invokes the correct driver to set up the device. Driver/device matching is performed using the standard Linux mechanism. -The device driver probe function opens the primary VMbus channel to +The device driver probe function opens the primary VMBus channel to the corresponding VSP. It allocates guest memory for the channel ring buffers and shares the ring buffer with the Hyper-V host by giving the host a list of GPAs for the ring buffer memory. See @@ -285,7 +308,7 @@ Once the ring buffer is set up, the device driver and VSP exchange setup messages via the primary channel. These messages may include negotiating the device protocol version to be used between the Linux VSC and the VSP on the Hyper-V host. The setup messages may also -include creating additional VMbus channels, which are somewhat +include creating additional VMBus channels, which are somewhat mis-named as "sub-channels" since they are functionally equivalent to the primary channel once they are created. diff --git a/MAINTAINERS b/MAINTAINERS index ad96b9bd68ac..f6cc3af7b552 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -846,12 +846,6 @@ ALPS PS/2 TOUCHPAD DRIVER R: Pali Rohár <pali@kernel.org> F: drivers/input/mouse/alps.* -ALTERA I2C CONTROLLER DRIVER -M: Thor Thayer <thor.thayer@linux.intel.com> -S: Maintained -F: Documentation/devicetree/bindings/i2c/i2c-altera.txt -F: drivers/i2c/busses/i2c-altera.c - ALTERA MAILBOX DRIVER M: Mun Yew Tham <mun.yew.tham@intel.com> S: Maintained @@ -871,21 +865,6 @@ L: linux-gpio@vger.kernel.org S: Maintained F: drivers/gpio/gpio-altera.c -ALTERA SYSTEM MANAGER DRIVER -M: Thor Thayer <thor.thayer@linux.intel.com> -S: Maintained -F: drivers/mfd/altera-sysmgr.c -F: include/linux/mfd/altera-sysmgr.h - -ALTERA SYSTEM RESOURCE DRIVER FOR ARRIA10 DEVKIT -M: Thor Thayer <thor.thayer@linux.intel.com> -S: Maintained -F: drivers/gpio/gpio-altera-a10sr.c -F: drivers/mfd/altera-a10sr.c -F: drivers/reset/reset-a10sr.c -F: include/dt-bindings/reset/altr,rst-mgr-a10sr.h -F: include/linux/mfd/altera-a10sr.h - ALTERA TRIPLE SPEED ETHERNET DRIVER M: Joyce Ooi <joyce.ooi@intel.com> L: netdev@vger.kernel.org @@ -1044,7 +1023,7 @@ M: Joerg Roedel <joro@8bytes.org> R: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> L: iommu@lists.linux.dev S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git F: drivers/iommu/amd/ F: include/linux/amd-iommu.h @@ -2892,7 +2871,7 @@ F: drivers/edac/altera_edac.[ch] ARM/SPREADTRUM SoC SUPPORT M: Orson Zhai <orsonzhai@gmail.com> M: Baolin Wang <baolin.wang7@gmail.com> -M: Chunyan Zhang <zhang.lyra@gmail.com> +R: Chunyan Zhang <zhang.lyra@gmail.com> S: Maintained F: arch/arm64/boot/dts/sprd N: sprd @@ -3601,10 +3580,9 @@ W: https://wireless.wiki.kernel.org/en/users/Drivers/b43 F: drivers/net/wireless/broadcom/b43/ B43LEGACY WIRELESS DRIVER -M: Larry Finger <Larry.Finger@lwfinger.net> L: linux-wireless@vger.kernel.org L: b43-dev@lists.infradead.org -S: Maintained +S: Orphan W: https://wireless.wiki.kernel.org/en/users/Drivers/b43 F: drivers/net/wireless/broadcom/b43legacy/ @@ -3980,7 +3958,7 @@ R: Song Liu <song@kernel.org> R: Yonghong Song <yonghong.song@linux.dev> R: John Fastabend <john.fastabend@gmail.com> R: KP Singh <kpsingh@kernel.org> -R: Stanislav Fomichev <sdf@google.com> +R: Stanislav Fomichev <sdf@fomichev.me> R: Hao Luo <haoluo@google.com> R: Jiri Olsa <jolsa@kernel.org> L: bpf@vger.kernel.org @@ -4083,12 +4061,13 @@ F: kernel/bpf/ringbuf.c BPF [SECURITY & LSM] (Security Audit and Enforcement using BPF) M: KP Singh <kpsingh@kernel.org> -R: Matt Bobrowski <mattbobrowski@google.com> +M: Matt Bobrowski <mattbobrowski@google.com> L: bpf@vger.kernel.org S: Maintained F: Documentation/bpf/prog_lsm.rst F: include/linux/bpf_lsm.h F: kernel/bpf/bpf_lsm.c +F: kernel/trace/bpf_trace.c F: security/bpf/ BPF [SELFTESTS] (Test Runners & Infrastructure) @@ -5295,7 +5274,7 @@ F: drivers/infiniband/hw/usnic/ CLANG CONTROL FLOW INTEGRITY SUPPORT M: Sami Tolvanen <samitolvanen@google.com> -M: Kees Cook <keescook@chromium.org> +M: Kees Cook <kees@kernel.org> R: Nathan Chancellor <nathan@kernel.org> L: llvm@lists.linux.dev S: Supported @@ -6238,9 +6217,8 @@ S: Maintained F: drivers/usb/dwc3/ DESIGNWARE XDATA IP DRIVER -M: Gustavo Pimentel <gustavo.pimentel@synopsys.com> L: linux-pci@vger.kernel.org -S: Maintained +S: Orphan F: Documentation/misc-devices/dw-xdata-pcie.rst F: drivers/misc/dw-xdata-pcie.c @@ -8211,7 +8189,7 @@ F: rust/kernel/net/phy.rs EXEC & BINFMT API, ELF R: Eric Biederman <ebiederm@xmission.com> -R: Kees Cook <keescook@chromium.org> +R: Kees Cook <kees@kernel.org> L: linux-mm@kvack.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/execve @@ -8482,6 +8460,7 @@ R: Darrick J. Wong <djwong@kernel.org> L: linux-xfs@vger.kernel.org L: linux-fsdevel@vger.kernel.org S: Supported +F: Documentation/filesystems/iomap/* F: fs/iomap/ F: include/linux/iomap.h @@ -8612,7 +8591,7 @@ S: Maintained F: drivers/net/ethernet/nvidia/* FORTIFY_SOURCE -M: Kees Cook <keescook@chromium.org> +M: Kees Cook <kees@kernel.org> L: linux-hardening@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening @@ -8833,6 +8812,7 @@ F: drivers/spi/spi-fsl-qspi.c FREESCALE QUICC ENGINE LIBRARY M: Qiang Zhao <qiang.zhao@nxp.com> +M: Christophe Leroy <christophe.leroy@csgroup.eu> L: linuxppc-dev@lists.ozlabs.org S: Maintained F: drivers/soc/fsl/qe/ @@ -8882,9 +8862,10 @@ S: Maintained F: drivers/tty/serial/ucc_uart.c FREESCALE SOC DRIVERS +M: Christophe Leroy <christophe.leroy@csgroup.eu> L: linuxppc-dev@lists.ozlabs.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -S: Orphan +S: Maintained F: Documentation/devicetree/bindings/misc/fsl,dpaa2-console.yaml F: Documentation/devicetree/bindings/soc/fsl/ F: drivers/soc/fsl/ @@ -9102,7 +9083,7 @@ F: include/linux/mfd/gsc.h F: include/linux/platform_data/gsc_hwmon.h GCC PLUGINS -M: Kees Cook <keescook@chromium.org> +M: Kees Cook <kees@kernel.org> L: linux-hardening@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening @@ -9236,7 +9217,7 @@ S: Maintained F: drivers/input/touchscreen/resistive-adc-touch.c GENERIC STRING LIBRARY -M: Kees Cook <keescook@chromium.org> +M: Kees Cook <kees@kernel.org> R: Andy Shevchenko <andy@kernel.org> L: linux-hardening@vger.kernel.org S: Supported @@ -11156,7 +11137,7 @@ M: David Woodhouse <dwmw2@infradead.org> M: Lu Baolu <baolu.lu@linux.intel.com> L: iommu@lists.linux.dev S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git F: drivers/iommu/intel/ INTEL IPU3 CSI-2 CIO2 DRIVER @@ -11529,7 +11510,7 @@ IOMMU DMA-API LAYER M: Robin Murphy <robin.murphy@arm.com> L: iommu@lists.linux.dev S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git F: drivers/iommu/dma-iommu.c F: drivers/iommu/dma-iommu.h F: drivers/iommu/iova.c @@ -11541,7 +11522,7 @@ M: Will Deacon <will@kernel.org> R: Robin Murphy <robin.murphy@arm.com> L: iommu@lists.linux.dev S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git F: Documentation/devicetree/bindings/iommu/ F: Documentation/userspace-api/iommu.rst F: drivers/iommu/ @@ -11950,7 +11931,7 @@ F: scripts/package/ F: usr/ KERNEL HARDENING (not covered by other areas) -M: Kees Cook <keescook@chromium.org> +M: Kees Cook <kees@kernel.org> R: Gustavo A. R. Silva <gustavoars@kernel.org> L: linux-hardening@vger.kernel.org S: Supported @@ -12382,7 +12363,6 @@ F: drivers/video/backlight/ktz8866.c KVM PARAVIRT (KVM/paravirt) M: Paolo Bonzini <pbonzini@redhat.com> -R: Wanpeng Li <wanpengli@tencent.com> R: Vitaly Kuznetsov <vkuznets@redhat.com> L: kvm@vger.kernel.org S: Supported @@ -12478,7 +12458,7 @@ F: drivers/scsi/53c700* LEAKING_ADDRESSES M: Tycho Andersen <tycho@tycho.pizza> -R: Kees Cook <keescook@chromium.org> +R: Kees Cook <kees@kernel.org> L: linux-hardening@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening @@ -12774,7 +12754,7 @@ F: arch/powerpc/platforms/8xx/ F: arch/powerpc/platforms/83xx/ LINUX KERNEL DUMP TEST MODULE (LKDTM) -M: Kees Cook <keescook@chromium.org> +M: Kees Cook <kees@kernel.org> S: Maintained F: drivers/misc/lkdtm/* F: tools/testing/selftests/lkdtm/* @@ -12904,7 +12884,7 @@ Q: http://patchwork.linuxtv.org/project/linux-media/list/ F: drivers/media/usb/dvb-usb-v2/lmedm04* LOADPIN SECURITY MODULE -M: Kees Cook <keescook@chromium.org> +M: Kees Cook <kees@kernel.org> S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening F: Documentation/admin-guide/LSM/LoadPin.rst @@ -14474,7 +14454,7 @@ MEMORY MAPPING M: Andrew Morton <akpm@linux-foundation.org> R: Liam R. Howlett <Liam.Howlett@oracle.com> R: Vlastimil Babka <vbabka@suse.cz> -R: Lorenzo Stoakes <lstoakes@gmail.com> +R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> L: linux-mm@kvack.org S: Maintained W: http://www.linux-mm.org @@ -16447,7 +16427,7 @@ F: arch/arm/boot/dts/ti/omap/am335x-nano.dts OMAP1 SUPPORT M: Aaro Koskinen <aaro.koskinen@iki.fi> M: Janusz Krzysztofik <jmkrzyszt@gmail.com> -M: Tony Lindgren <tony@atomide.com> +R: Tony Lindgren <tony@atomide.com> L: linux-omap@vger.kernel.org S: Maintained Q: http://patchwork.kernel.org/project/linux-omap/list/ @@ -16459,10 +16439,13 @@ F: include/linux/platform_data/ams-delta-fiq.h F: include/linux/platform_data/i2c-omap.h OMAP2+ SUPPORT +M: Aaro Koskinen <aaro.koskinen@iki.fi> +M: Andreas Kemnade <andreas@kemnade.info> +M: Kevin Hilman <khilman@baylibre.com> +M: Roger Quadros <rogerq@kernel.org> M: Tony Lindgren <tony@atomide.com> L: linux-omap@vger.kernel.org S: Maintained -W: http://www.muru.com/linux/omap/ W: http://linux.omap.com/ Q: http://patchwork.kernel.org/project/linux-omap/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap.git @@ -17532,7 +17515,6 @@ F: include/linux/peci.h PENSANDO ETHERNET DRIVERS M: Shannon Nelson <shannon.nelson@amd.com> M: Brett Creeley <brett.creeley@amd.com> -M: drivers@pensando.io L: netdev@vger.kernel.org S: Supported F: Documentation/networking/device_drivers/ethernet/pensando/ionic.rst @@ -17996,7 +17978,7 @@ F: tools/testing/selftests/proc/ PROC SYSCTL M: Luis Chamberlain <mcgrof@kernel.org> -M: Kees Cook <keescook@chromium.org> +M: Kees Cook <kees@kernel.org> M: Joel Granados <j.granados@samsung.com> L: linux-kernel@vger.kernel.org L: linux-fsdevel@vger.kernel.org @@ -18052,7 +18034,7 @@ F: Documentation/devicetree/bindings/net/pse-pd/ F: drivers/net/pse-pd/ PSTORE FILESYSTEM -M: Kees Cook <keescook@chromium.org> +M: Kees Cook <kees@kernel.org> R: Tony Luck <tony.luck@intel.com> R: Guilherme G. Piccoli <gpiccoli@igalia.com> L: linux-hardening@vger.kernel.org @@ -18210,6 +18192,7 @@ QCOM AUDIO (ASoC) DRIVERS M: Srinivas Kandagatla <srinivas.kandagatla@linaro.org> M: Banajit Goswami <bgoswami@quicinc.com> L: alsa-devel@alsa-project.org (moderated for non-subscribers) +L: linux-arm-msm@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/soc/qcom/qcom,apr* F: Documentation/devicetree/bindings/sound/qcom,* @@ -18374,7 +18357,7 @@ M: Jeff Johnson <jjohnson@kernel.org> L: ath12k@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath12k -T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/ath/ath.git F: drivers/net/wireless/ath/ath12k/ N: ath12k @@ -18384,7 +18367,7 @@ M: Jeff Johnson <jjohnson@kernel.org> L: ath10k@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath10k -T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/ath/ath.git F: drivers/net/wireless/ath/ath10k/ N: ath10k @@ -18395,7 +18378,7 @@ L: ath11k@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath11k B: https://wireless.wiki.kernel.org/en/users/Drivers/ath11k/bugreport -T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/ath/ath.git F: drivers/net/wireless/ath/ath11k/ N: ath11k @@ -18404,7 +18387,7 @@ M: Toke Høiland-Jørgensen <toke@toke.dk> L: linux-wireless@vger.kernel.org S: Maintained W: https://wireless.wiki.kernel.org/en/users/Drivers/ath9k -T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/ath/ath.git F: Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml F: drivers/net/wireless/ath/ath9k/ @@ -19315,7 +19298,7 @@ F: drivers/perf/riscv_pmu_legacy.c F: drivers/perf/riscv_pmu_sbi.c RISC-V THEAD SoC SUPPORT -M: Jisheng Zhang <jszhang@kernel.org> +M: Drew Fustini <drew@pdp7.com> M: Guo Ren <guoren@kernel.org> M: Fu Wei <wefu@redhat.com> L: linux-riscv@lists.infradead.org @@ -19509,7 +19492,6 @@ F: drivers/net/wireless/realtek/rtl818x/rtl8180/ RTL8187 WIRELESS DRIVER M: Hin-Tak Leung <hintak.leung@gmail.com> -M: Larry Finger <Larry.Finger@lwfinger.net> L: linux-wireless@vger.kernel.org S: Maintained T: git https://github.com/pkshih/rtw.git @@ -20058,7 +20040,7 @@ F: drivers/media/cec/platform/seco/seco-cec.c F: drivers/media/cec/platform/seco/seco-cec.h SECURE COMPUTING -M: Kees Cook <keescook@chromium.org> +M: Kees Cook <kees@kernel.org> R: Andy Lutomirski <luto@amacapital.net> R: Will Drewry <wad@chromium.org> S: Supported @@ -21247,7 +21229,6 @@ W: http://wiki.laptop.org/go/DCON F: drivers/staging/olpc_dcon/ STAGING - REALTEK RTL8712U DRIVERS -M: Larry Finger <Larry.Finger@lwfinger.net> M: Florian Schilhabel <florian.c.schilhabel@googlemail.com>. S: Odd Fixes F: drivers/staging/rtl8712/ @@ -22746,7 +22727,7 @@ M: Jarkko Sakkinen <jarkko@kernel.org> R: Jason Gunthorpe <jgg@ziepe.ca> L: linux-integrity@vger.kernel.org S: Maintained -W: https://gitlab.com/jarkkojs/linux-tpmdd-test +W: https://codeberg.org/jarkko/linux-tpmdd-test Q: https://patchwork.kernel.org/project/linux-integrity/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-tpmdd.git F: Documentation/devicetree/bindings/tpm/ @@ -22972,7 +22953,7 @@ F: drivers/block/ublk_drv.c F: include/uapi/linux/ublk_cmd.h UBSAN -M: Kees Cook <keescook@chromium.org> +M: Kees Cook <kees@kernel.org> R: Marco Elver <elver@google.com> R: Andrey Konovalov <andreyknvl@gmail.com> R: Andrey Ryabinin <ryabinin.a.a@gmail.com> @@ -23863,8 +23844,8 @@ S: Maintained F: drivers/vhost/scsi.c VIRTIO I2C DRIVER -M: Conghui Chen <conghui.chen@intel.com> M: Viresh Kumar <viresh.kumar@linaro.org> +R: "Chen, Jian Jun" <jian.jun.chen@intel.com> L: linux-i2c@vger.kernel.org L: virtualization@lists.linux.dev S: Maintained @@ -23974,7 +23955,6 @@ VMALLOC M: Andrew Morton <akpm@linux-foundation.org> R: Uladzislau Rezki <urezki@gmail.com> R: Christoph Hellwig <hch@infradead.org> -R: Lorenzo Stoakes <lstoakes@gmail.com> L: linux-mm@kvack.org S: Maintained W: http://www.linux-mm.org @@ -24810,7 +24790,7 @@ F: drivers/net/hamradio/yam* F: include/linux/yam.h YAMA SECURITY MODULE -M: Kees Cook <keescook@chromium.org> +M: Kees Cook <kees@kernel.org> S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening F: Documentation/admin-guide/LSM/Yama.rst @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 10 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = NAME = Baby Opossum Posse # *DOCUMENTATION* diff --git a/arch/arm/boot/dts/nxp/imx/imx53-qsb-common.dtsi b/arch/arm/boot/dts/nxp/imx/imx53-qsb-common.dtsi index d80440446473..05d7a462ea25 100644 --- a/arch/arm/boot/dts/nxp/imx/imx53-qsb-common.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx53-qsb-common.dtsi @@ -85,7 +85,7 @@ }; }; - panel { + panel_dpi: panel { compatible = "sii,43wvf1g"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_display_power>; diff --git a/arch/arm/boot/dts/nxp/imx/imx53-qsb-hdmi.dtso b/arch/arm/boot/dts/nxp/imx/imx53-qsb-hdmi.dtso index c84e9b052527..151e9cee3c87 100644 --- a/arch/arm/boot/dts/nxp/imx/imx53-qsb-hdmi.dtso +++ b/arch/arm/boot/dts/nxp/imx/imx53-qsb-hdmi.dtso @@ -10,8 +10,6 @@ /plugin/; &{/} { - /delete-node/ panel; - hdmi: connector-hdmi { compatible = "hdmi-connector"; label = "hdmi"; @@ -82,6 +80,10 @@ }; }; +&panel_dpi { + status = "disabled"; +}; + &tve { status = "disabled"; }; diff --git a/arch/arm/boot/dts/rockchip/rk3066a.dtsi b/arch/arm/boot/dts/rockchip/rk3066a.dtsi index 30139f21de64..15cbd94d7ec0 100644 --- a/arch/arm/boot/dts/rockchip/rk3066a.dtsi +++ b/arch/arm/boot/dts/rockchip/rk3066a.dtsi @@ -128,6 +128,7 @@ pinctrl-0 = <&hdmii2c_xfer>, <&hdmi_hpd>; power-domains = <&power RK3066_PD_VIO>; rockchip,grf = <&grf>; + #sound-dai-cells = <0>; status = "disabled"; ports { diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index 78282ced5038..e408399d5f0e 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -14,6 +14,7 @@ #include <asm/mach/map.h> #include <asm/mmu_context.h> #include <asm/ptrace.h> +#include <asm/uaccess.h> #ifdef CONFIG_EFI void efi_init(void); @@ -25,6 +26,18 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md, boo #define arch_efi_call_virt_setup() efi_virtmap_load() #define arch_efi_call_virt_teardown() efi_virtmap_unload() +#ifdef CONFIG_CPU_TTBR0_PAN +#undef arch_efi_call_virt +#define arch_efi_call_virt(p, f, args...) ({ \ + unsigned int flags = uaccess_save_and_enable(); \ + efi_status_t res = _Generic((p)->f(args), \ + efi_status_t: (p)->f(args), \ + default: ((p)->f(args), EFI_ABORTED)); \ + uaccess_restore(flags); \ + res; \ +}) +#endif + #define ARCH_EFI_IRQ_FLAGS_MASK \ (PSR_J_BIT | PSR_E_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | \ PSR_T_BIT | MODE_MASK) diff --git a/arch/arm/mach-davinci/pm.c b/arch/arm/mach-davinci/pm.c index 8aa39db095d7..2c5155bd376b 100644 --- a/arch/arm/mach-davinci/pm.c +++ b/arch/arm/mach-davinci/pm.c @@ -61,7 +61,7 @@ static void davinci_pm_suspend(void) /* Configure sleep count in deep sleep register */ val = __raw_readl(pm_config.deepsleep_reg); - val &= ~DEEPSLEEP_SLEEPCOUNT_MASK, + val &= ~DEEPSLEEP_SLEEPCOUNT_MASK; val |= pm_config.sleepcount; __raw_writel(val, pm_config.deepsleep_reg); diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5d91259ee7b5..b6e8920364de 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1649,6 +1649,7 @@ config RODATA_FULL_DEFAULT_ENABLED config ARM64_SW_TTBR0_PAN bool "Emulate Privileged Access Never using TTBR0_EL1 switching" + depends on !KCSAN help Enabling this option prevents the kernel from accessing user-space memory directly by pointing TTBR0_EL1 to a reserved diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h64-remix-mini-pc.dts b/arch/arm64/boot/dts/allwinner/sun50i-h64-remix-mini-pc.dts index c204dd43c726..ce90327e1b2e 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h64-remix-mini-pc.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h64-remix-mini-pc.dts @@ -191,7 +191,7 @@ compatible = "x-powers,axp803"; reg = <0x3a3>; interrupt-parent = <&r_intc>; - interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_LOW>; + interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_LOW>; x-powers,drive-vbus-en; vin1-supply = <®_vcc5v>; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi index 4768b05fd765..98544741ce17 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi @@ -6,6 +6,7 @@ #include <dt-bindings/phy/phy-imx8-pcie.h> #include <dt-bindings/pwm/pwm.h> #include "imx8mm.dtsi" +#include "imx8mm-overdrive.dtsi" / { chosen { @@ -935,7 +936,7 @@ /* Verdin GPIO_9_DSI (pulled-up as active-low) */ pinctrl_gpio_9_dsi: gpio9dsigrp { fsl,pins = - <MX8MM_IOMUXC_NAND_RE_B_GPIO3_IO15 0x146>; /* SODIMM 17 */ + <MX8MM_IOMUXC_NAND_RE_B_GPIO3_IO15 0x1c6>; /* SODIMM 17 */ }; /* Verdin GPIO_10_DSI (pulled-up as active-low) */ diff --git a/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi index 43f1d45ccc96..f5115f9e8c47 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi @@ -254,7 +254,7 @@ <&clk IMX8MP_CLK_CLKOUT2>, <&clk IMX8MP_AUDIO_PLL2_OUT>; assigned-clock-parents = <&clk IMX8MP_AUDIO_PLL2_OUT>; - assigned-clock-rates = <13000000>, <13000000>, <156000000>; + assigned-clock-rates = <13000000>, <13000000>, <208000000>; reset-gpios = <&gpio4 1 GPIO_ACTIVE_HIGH>; status = "disabled"; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi index dec57fad6828..e2b5e7ac3e46 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi @@ -219,7 +219,7 @@ bluetooth { compatible = "brcm,bcm4330-bt"; - shutdown-gpios = <&gpio4 16 GPIO_ACTIVE_HIGH>; + shutdown-gpios = <&gpio1 3 GPIO_ACTIVE_HIGH>; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8qm-mek.dts b/arch/arm64/boot/dts/freescale/imx8qm-mek.dts index 5c6b39c6933f..6e05361c1ffb 100644 --- a/arch/arm64/boot/dts/freescale/imx8qm-mek.dts +++ b/arch/arm64/boot/dts/freescale/imx8qm-mek.dts @@ -36,7 +36,7 @@ regulator-name = "SD1_SPWR"; regulator-min-microvolt = <3000000>; regulator-max-microvolt = <3000000>; - gpio = <&lsio_gpio4 19 GPIO_ACTIVE_HIGH>; + gpio = <&lsio_gpio4 7 GPIO_ACTIVE_HIGH>; enable-active-high; }; diff --git a/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts b/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts index d400d85f42a9..bd98eff4d685 100644 --- a/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts @@ -296,7 +296,6 @@ vmmc-supply = <®_usdhc2_vmmc>; bus-width = <4>; status = "okay"; - no-sdio; no-mmc; }; diff --git a/arch/arm64/boot/dts/qcom/qdu1000.dtsi b/arch/arm64/boot/dts/qcom/qdu1000.dtsi index f2a5e2e40461..f90f03fa6a24 100644 --- a/arch/arm64/boot/dts/qcom/qdu1000.dtsi +++ b/arch/arm64/boot/dts/qcom/qdu1000.dtsi @@ -1459,9 +1459,23 @@ system-cache-controller@19200000 { compatible = "qcom,qdu1000-llcc"; - reg = <0 0x19200000 0 0xd80000>, + reg = <0 0x19200000 0 0x80000>, + <0 0x19300000 0 0x80000>, + <0 0x19600000 0 0x80000>, + <0 0x19700000 0 0x80000>, + <0 0x19a00000 0 0x80000>, + <0 0x19b00000 0 0x80000>, + <0 0x19e00000 0 0x80000>, + <0 0x19f00000 0 0x80000>, <0 0x1a200000 0 0x80000>; reg-names = "llcc0_base", + "llcc1_base", + "llcc2_base", + "llcc3_base", + "llcc4_base", + "llcc5_base", + "llcc6_base", + "llcc7_base", "llcc_broadcast_base"; interrupts = <GIC_SPI 266 IRQ_TYPE_LEVEL_HIGH>; }; diff --git a/arch/arm64/boot/dts/qcom/sa8775p.dtsi b/arch/arm64/boot/dts/qcom/sa8775p.dtsi index 31de73594839..1b3dc0ece54d 100644 --- a/arch/arm64/boot/dts/qcom/sa8775p.dtsi +++ b/arch/arm64/boot/dts/qcom/sa8775p.dtsi @@ -3605,7 +3605,7 @@ interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(8) | IRQ_TYPE_LEVEL_LOW)>, <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(8) | IRQ_TYPE_LEVEL_LOW)>, <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(8) | IRQ_TYPE_LEVEL_LOW)>, - <GIC_PPI 12 (GIC_CPU_MASK_SIMPLE(8) | IRQ_TYPE_LEVEL_LOW)>; + <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(8) | IRQ_TYPE_LEVEL_LOW)>; }; pcie0: pcie@1c00000 { diff --git a/arch/arm64/boot/dts/qcom/sc8180x.dtsi b/arch/arm64/boot/dts/qcom/sc8180x.dtsi index 067712310560..581a70c34fd2 100644 --- a/arch/arm64/boot/dts/qcom/sc8180x.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8180x.dtsi @@ -2647,11 +2647,14 @@ system-cache-controller@9200000 { compatible = "qcom,sc8180x-llcc"; - reg = <0 0x09200000 0 0x50000>, <0 0x09280000 0 0x50000>, - <0 0x09300000 0 0x50000>, <0 0x09380000 0 0x50000>, - <0 0x09600000 0 0x50000>; + reg = <0 0x09200000 0 0x58000>, <0 0x09280000 0 0x58000>, + <0 0x09300000 0 0x58000>, <0 0x09380000 0 0x58000>, + <0 0x09400000 0 0x58000>, <0 0x09480000 0 0x58000>, + <0 0x09500000 0 0x58000>, <0 0x09580000 0 0x58000>, + <0 0x09600000 0 0x58000>; reg-names = "llcc0_base", "llcc1_base", "llcc2_base", - "llcc3_base", "llcc_broadcast_base"; + "llcc3_base", "llcc4_base", "llcc5_base", + "llcc6_base", "llcc7_base", "llcc_broadcast_base"; interrupts = <GIC_SPI 582 IRQ_TYPE_LEVEL_HIGH>; }; diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts index 41215567b3ae..372b35fb844f 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts @@ -977,8 +977,7 @@ reset-n-pins { pins = "gpio99"; function = "gpio"; - output-high; - drive-strength = <16>; + bias-disable; }; }; diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts index e937732abede..4bf99b6b6e5f 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts @@ -655,15 +655,16 @@ status = "okay"; - /* FIXME: verify */ touchscreen@10 { - compatible = "hid-over-i2c"; + compatible = "elan,ekth5015m", "elan,ekth6915"; reg = <0x10>; - hid-descr-addr = <0x1>; interrupts-extended = <&tlmm 175 IRQ_TYPE_LEVEL_LOW>; - vdd-supply = <&vreg_misc_3p3>; - vddl-supply = <&vreg_s10b>; + reset-gpios = <&tlmm 99 (GPIO_ACTIVE_LOW | GPIO_OPEN_DRAIN)>; + no-reset-on-power-off; + + vcc33-supply = <&vreg_misc_3p3>; + vccio-supply = <&vreg_misc_3p3>; pinctrl-names = "default"; pinctrl-0 = <&ts0_default>; @@ -1496,8 +1497,8 @@ reset-n-pins { pins = "gpio99"; function = "gpio"; - output-high; - drive-strength = <16>; + drive-strength = <2>; + bias-disable; }; }; diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index 0549ba1fbeea..59f0a850671a 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -4623,6 +4623,8 @@ restart@c264000 { compatible = "qcom,pshold"; reg = <0 0x0c264000 0 0x4>; + /* TZ seems to block access */ + status = "reserved"; }; tsens1: thermal-sensor@c265000 { diff --git a/arch/arm64/boot/dts/qcom/sm6115.dtsi b/arch/arm64/boot/dts/qcom/sm6115.dtsi index aca0a87092e4..9ed062150aaf 100644 --- a/arch/arm64/boot/dts/qcom/sm6115.dtsi +++ b/arch/arm64/boot/dts/qcom/sm6115.dtsi @@ -1090,6 +1090,7 @@ power-domains = <&rpmpd SM6115_VDDCX>; operating-points-v2 = <&sdhc1_opp_table>; + iommus = <&apps_smmu 0x00c0 0x0>; interconnects = <&system_noc MASTER_SDCC_1 RPM_ALWAYS_TAG &bimc SLAVE_EBI_CH0 RPM_ALWAYS_TAG>, <&bimc MASTER_AMPSS_M0 RPM_ALWAYS_TAG diff --git a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts index c5c2895b37c7..be6b1e7d07ce 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts @@ -49,6 +49,15 @@ stdout-path = "serial0:115200n8"; }; + reserved-memory { + linux,cma { + compatible = "shared-dma-pool"; + size = <0x0 0x8000000>; + reusable; + linux,cma-default; + }; + }; + sound { compatible = "qcom,x1e80100-sndcard"; model = "X1E80100-CRD"; @@ -93,7 +102,7 @@ }; codec { - sound-dai = <&wcd938x 1>, <&swr2 0>, <&lpass_txmacro 0>; + sound-dai = <&wcd938x 1>, <&swr2 1>, <&lpass_txmacro 0>; }; platform { @@ -744,7 +753,7 @@ wcd_tx: codec@0,3 { compatible = "sdw20217010d00"; reg = <0 3>; - qcom,tx-port-mapping = <1 1 2 3>; + qcom,tx-port-mapping = <2 2 3 4>; }; }; diff --git a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts index 2061fbe7b75a..8f67c393b871 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts @@ -23,6 +23,15 @@ stdout-path = "serial0:115200n8"; }; + reserved-memory { + linux,cma { + compatible = "shared-dma-pool"; + size = <0x0 0x8000000>; + reusable; + linux,cma-default; + }; + }; + vph_pwr: vph-pwr-regulator { compatible = "regulator-fixed"; diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi index 5f90a0b3c016..05e4d491ec18 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi @@ -2737,15 +2737,17 @@ device_type = "pci"; compatible = "qcom,pcie-x1e80100"; reg = <0 0x01bf8000 0 0x3000>, - <0 0x70000000 0 0xf1d>, - <0 0x70000f20 0 0xa8>, + <0 0x70000000 0 0xf20>, + <0 0x70000f40 0 0xa8>, <0 0x70001000 0 0x1000>, - <0 0x70100000 0 0x100000>; + <0 0x70100000 0 0x100000>, + <0 0x01bfb000 0 0x1000>; reg-names = "parf", "dbi", "elbi", "atu", - "config"; + "config", + "mhi"; #address-cells = <3>; #size-cells = <2>; ranges = <0x01000000 0 0x00000000 0 0x70200000 0 0x100000>, diff --git a/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts b/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts index b47fe02c33fb..079101cddd65 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts +++ b/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts @@ -5,6 +5,8 @@ */ /dts-v1/; + +#include <dt-bindings/leds/common.h> #include "rk3308.dtsi" / { @@ -24,17 +26,21 @@ leds { compatible = "gpio-leds"; pinctrl-names = "default"; - pinctrl-0 = <&green_led_gio>, <&heartbeat_led_gpio>; + pinctrl-0 = <&green_led>, <&heartbeat_led>; green-led { + color = <LED_COLOR_ID_GREEN>; default-state = "on"; + function = LED_FUNCTION_POWER; gpios = <&gpio0 RK_PA6 GPIO_ACTIVE_HIGH>; label = "rockpis:green:power"; linux,default-trigger = "default-on"; }; blue-led { + color = <LED_COLOR_ID_BLUE>; default-state = "on"; + function = LED_FUNCTION_HEARTBEAT; gpios = <&gpio0 RK_PA5 GPIO_ACTIVE_HIGH>; label = "rockpis:blue:user"; linux,default-trigger = "heartbeat"; @@ -126,10 +132,12 @@ }; &emmc { - bus-width = <4>; cap-mmc-highspeed; - mmc-hs200-1_8v; + cap-sd-highspeed; + no-sdio; non-removable; + pinctrl-names = "default"; + pinctrl-0 = <&emmc_bus8 &emmc_clk &emmc_cmd>; vmmc-supply = <&vcc_io>; status = "okay"; }; @@ -214,11 +222,11 @@ pinctrl-0 = <&rtc_32k>; leds { - green_led_gio: green-led-gpio { + green_led: green-led { rockchip,pins = <0 RK_PA6 RK_FUNC_GPIO &pcfg_pull_none>; }; - heartbeat_led_gpio: heartbeat-led-gpio { + heartbeat_led: heartbeat-led { rockchip,pins = <0 RK_PA5 RK_FUNC_GPIO &pcfg_pull_none>; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3308.dtsi b/arch/arm64/boot/dts/rockchip/rk3308.dtsi index 962ea893999b..c00da150a22f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3308.dtsi @@ -811,7 +811,7 @@ clocks = <&cru SCLK_I2S2_8CH_TX_OUT>, <&cru SCLK_I2S2_8CH_RX_OUT>, <&cru PCLK_ACODEC>; - reset-names = "codec-reset"; + reset-names = "codec"; resets = <&cru SRST_ACODEC_P>; #sound-dai-cells = <0>; status = "disabled"; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts index f09d60bbe6c4..a608a219543e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts @@ -241,8 +241,8 @@ rk805: pmic@18 { compatible = "rockchip,rk805"; reg = <0x18>; - interrupt-parent = <&gpio2>; - interrupts = <6 IRQ_TYPE_LEVEL_LOW>; + interrupt-parent = <&gpio0>; + interrupts = <2 IRQ_TYPE_LEVEL_LOW>; #clock-cells = <1>; clock-output-names = "xin32k", "rk805-clkout2"; gpio-controller; diff --git a/arch/arm64/boot/dts/rockchip/rk3368.dtsi b/arch/arm64/boot/dts/rockchip/rk3368.dtsi index 734f87db4d11..73618df7a889 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3368.dtsi @@ -793,6 +793,7 @@ dma-names = "tx"; pinctrl-names = "default"; pinctrl-0 = <&spdif_tx>; + #sound-dai-cells = <0>; status = "disabled"; }; @@ -804,6 +805,7 @@ clocks = <&cru SCLK_I2S_2CH>, <&cru HCLK_I2S_2CH>; dmas = <&dmac_bus 6>, <&dmac_bus 7>; dma-names = "tx", "rx"; + #sound-dai-cells = <0>; status = "disabled"; }; @@ -817,6 +819,7 @@ dma-names = "tx", "rx"; pinctrl-names = "default"; pinctrl-0 = <&i2s_8ch_bus>; + #sound-dai-cells = <0>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi index 789fd0dcc88b..3cd63d1e8f15 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi @@ -450,7 +450,7 @@ ap_i2c_audio: &i2c8 { dlg,btn-cfg = <50>; dlg,mic-det-thr = <500>; dlg,jack-ins-deb = <20>; - dlg,jack-det-rate = "32ms_64ms"; + dlg,jack-det-rate = "32_64"; dlg,jack-rem-deb = <1>; dlg,a-d-btn-thr = <0xa>; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts index 26322a358d91..b908ce006c26 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts @@ -289,7 +289,7 @@ regulator-name = "vdd_gpu"; regulator-always-on; regulator-boot-on; - regulator-min-microvolt = <900000>; + regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; regulator-ramp-delay = <6001>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts index 1a604429fb26..e74871491ef5 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts @@ -444,6 +444,7 @@ &sdmmc { bus-width = <4>; cap-sd-highspeed; + cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>; disable-wp; max-frequency = <150000000>; no-sdio; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts b/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts index b4f22d95ac0e..e80caa36f8e4 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts @@ -435,6 +435,7 @@ &sdmmc { bus-width = <4>; cap-sd-highspeed; + cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>; disable-wp; max-frequency = <150000000>; no-sdio; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts index b8e15b76a8a6..2e7512676b7e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts @@ -383,6 +383,7 @@ bus-width = <4>; cap-mmc-highspeed; cap-sd-highspeed; + cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>; disable-wp; sd-uhs-sdr104; vmmc-supply = <&vcc_3v3_s3>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi index aebe1fedd2d8..615094bb8ba3 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi @@ -344,6 +344,11 @@ }; }; +&pwm0 { + pinctrl-0 = <&pwm0m1_pins>; + pinctrl-names = "default"; +}; + &saradc { vref-supply = <&vcc_1v8_s0>; status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts index 3b2ec1d0c542..074c316a9a69 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts @@ -288,9 +288,9 @@ pinctrl-0 = <&i2c7m0_xfer>; status = "okay"; - es8316: audio-codec@11 { + es8316: audio-codec@10 { compatible = "everest,es8316"; - reg = <0x11>; + reg = <0x10>; assigned-clocks = <&cru I2S0_8CH_MCLKOUT>; assigned-clock-rates = <12288000>; clocks = <&cru I2S0_8CH_MCLKOUT>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts b/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts index 8e2a07612d17..3b9a349362db 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts @@ -366,6 +366,7 @@ bus-width = <4>; cap-mmc-highspeed; cap-sd-highspeed; + cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>; disable-wp; max-frequency = <150000000>; no-sdio; @@ -393,6 +394,7 @@ pinctrl-0 = <&pmic_pins>, <&rk806_dvs1_null>, <&rk806_dvs2_null>, <&rk806_dvs3_null>; spi-max-frequency = <1000000>; + system-power-controller; vcc1-supply = <&vcc5v0_sys>; vcc2-supply = <&vcc5v0_sys>; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 57a9abe78ee4..2c7bf4da0b80 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1036,6 +1036,7 @@ CONFIG_SND_AUDIO_GRAPH_CARD2=m CONFIG_HID_MULTITOUCH=m CONFIG_I2C_HID_ACPI=m CONFIG_I2C_HID_OF=m +CONFIG_I2C_HID_OF_ELAN=m CONFIG_USB=y CONFIG_USB_OTG=y CONFIG_USB_XHCI_HCD=y diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h index 980d1dd8e1a3..b8a5861dc7b7 100644 --- a/arch/arm64/include/asm/asm-extable.h +++ b/arch/arm64/include/asm/asm-extable.h @@ -112,6 +112,9 @@ #define _ASM_EXTABLE_KACCESS_ERR(insn, fixup, err) \ _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, err, wzr) +#define _ASM_EXTABLE_KACCESS(insn, fixup) \ + _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, wzr, wzr) + #define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \ __DEFINE_ASM_GPR_NUMS \ __ASM_EXTABLE_RAW(#insn, #fixup, \ diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 9943ff0af4c9..1f60aa1bc750 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -170,6 +170,7 @@ #define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */ #define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */ #define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */ +#define PTE_SWBITS_MASK _AT(pteval_t, (BIT(63) | GENMASK(58, 55))) #define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (50 - PAGE_SHIFT)) - 1) << PAGE_SHIFT) #ifdef CONFIG_ARM64_PA_BITS_52 diff --git a/arch/arm64/include/asm/runtime-const.h b/arch/arm64/include/asm/runtime-const.h new file mode 100644 index 000000000000..be5915669d23 --- /dev/null +++ b/arch/arm64/include/asm/runtime-const.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RUNTIME_CONST_H +#define _ASM_RUNTIME_CONST_H + +#include <asm/cacheflush.h> + +/* Sigh. You can still run arm64 in BE mode */ +#include <asm/byteorder.h> + +#define runtime_const_ptr(sym) ({ \ + typeof(sym) __ret; \ + asm_inline("1:\t" \ + "movz %0, #0xcdef\n\t" \ + "movk %0, #0x89ab, lsl #16\n\t" \ + "movk %0, #0x4567, lsl #32\n\t" \ + "movk %0, #0x0123, lsl #48\n\t" \ + ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \ + ".long 1b - .\n\t" \ + ".popsection" \ + :"=r" (__ret)); \ + __ret; }) + +#define runtime_const_shift_right_32(val, sym) ({ \ + unsigned long __ret; \ + asm_inline("1:\t" \ + "lsr %w0,%w1,#12\n\t" \ + ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \ + ".long 1b - .\n\t" \ + ".popsection" \ + :"=r" (__ret) \ + :"r" (0u+(val))); \ + __ret; }) + +#define runtime_const_init(type, sym) do { \ + extern s32 __start_runtime_##type##_##sym[]; \ + extern s32 __stop_runtime_##type##_##sym[]; \ + runtime_const_fixup(__runtime_fixup_##type, \ + (unsigned long)(sym), \ + __start_runtime_##type##_##sym, \ + __stop_runtime_##type##_##sym); \ +} while (0) + +/* 16-bit immediate for wide move (movz and movk) in bits 5..20 */ +static inline void __runtime_fixup_16(__le32 *p, unsigned int val) +{ + u32 insn = le32_to_cpu(*p); + insn &= 0xffe0001f; + insn |= (val & 0xffff) << 5; + *p = cpu_to_le32(insn); +} + +static inline void __runtime_fixup_caches(void *where, unsigned int insns) +{ + unsigned long va = (unsigned long)where; + caches_clean_inval_pou(va, va + 4*insns); +} + +static inline void __runtime_fixup_ptr(void *where, unsigned long val) +{ + __le32 *p = lm_alias(where); + __runtime_fixup_16(p, val); + __runtime_fixup_16(p+1, val >> 16); + __runtime_fixup_16(p+2, val >> 32); + __runtime_fixup_16(p+3, val >> 48); + __runtime_fixup_caches(where, 4); +} + +/* Immediate value is 6 bits starting at bit #16 */ +static inline void __runtime_fixup_shift(void *where, unsigned long val) +{ + __le32 *p = lm_alias(where); + u32 insn = le32_to_cpu(*p); + insn &= 0xffc0ffff; + insn |= (val & 63) << 16; + *p = cpu_to_le32(insn); + __runtime_fixup_caches(where, 1); +} + +static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), + unsigned long val, s32 *start, s32 *end) +{ + while (start < end) { + fn(*start + (void *)start, val); + start++; + } +} + +#endif diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 14be5000c5a0..28f665e0975a 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -184,29 +184,40 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) * The "__xxx_error" versions set the third argument to -EFAULT if an error * occurs, and leave it unchanged on success. */ -#define __get_mem_asm(load, reg, x, addr, err, type) \ +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT +#define __get_mem_asm(load, reg, x, addr, label, type) \ + asm_goto_output( \ + "1: " load " " reg "0, [%1]\n" \ + _ASM_EXTABLE_##type##ACCESS_ERR(1b, %l2, %w0) \ + : "=r" (x) \ + : "r" (addr) : : label) +#else +#define __get_mem_asm(load, reg, x, addr, label, type) do { \ + int __gma_err = 0; \ asm volatile( \ "1: " load " " reg "1, [%2]\n" \ "2:\n" \ _ASM_EXTABLE_##type##ACCESS_ERR_ZERO(1b, 2b, %w0, %w1) \ - : "+r" (err), "=r" (x) \ - : "r" (addr)) + : "+r" (__gma_err), "=r" (x) \ + : "r" (addr)); \ + if (__gma_err) goto label; } while (0) +#endif -#define __raw_get_mem(ldr, x, ptr, err, type) \ +#define __raw_get_mem(ldr, x, ptr, label, type) \ do { \ unsigned long __gu_val; \ switch (sizeof(*(ptr))) { \ case 1: \ - __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), (err), type); \ + __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), label, type); \ break; \ case 2: \ - __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), (err), type); \ + __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), label, type); \ break; \ case 4: \ - __get_mem_asm(ldr, "%w", __gu_val, (ptr), (err), type); \ + __get_mem_asm(ldr, "%w", __gu_val, (ptr), label, type); \ break; \ case 8: \ - __get_mem_asm(ldr, "%x", __gu_val, (ptr), (err), type); \ + __get_mem_asm(ldr, "%x", __gu_val, (ptr), label, type); \ break; \ default: \ BUILD_BUG(); \ @@ -219,27 +230,34 @@ do { \ * uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions, * we must evaluate these outside of the critical section. */ -#define __raw_get_user(x, ptr, err) \ +#define __raw_get_user(x, ptr, label) \ do { \ __typeof__(*(ptr)) __user *__rgu_ptr = (ptr); \ __typeof__(x) __rgu_val; \ __chk_user_ptr(ptr); \ - \ - uaccess_ttbr0_enable(); \ - __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, err, U); \ - uaccess_ttbr0_disable(); \ - \ - (x) = __rgu_val; \ + do { \ + __label__ __rgu_failed; \ + uaccess_ttbr0_enable(); \ + __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, __rgu_failed, U); \ + uaccess_ttbr0_disable(); \ + (x) = __rgu_val; \ + break; \ + __rgu_failed: \ + uaccess_ttbr0_disable(); \ + goto label; \ + } while (0); \ } while (0) #define __get_user_error(x, ptr, err) \ do { \ + __label__ __gu_failed; \ __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ if (access_ok(__p, sizeof(*__p))) { \ __p = uaccess_mask_ptr(__p); \ - __raw_get_user((x), __p, (err)); \ + __raw_get_user((x), __p, __gu_failed); \ } else { \ + __gu_failed: \ (x) = (__force __typeof__(x))0; (err) = -EFAULT; \ } \ } while (0) @@ -262,40 +280,42 @@ do { \ do { \ __typeof__(dst) __gkn_dst = (dst); \ __typeof__(src) __gkn_src = (src); \ - int __gkn_err = 0; \ - \ - __mte_enable_tco_async(); \ - __raw_get_mem("ldr", *((type *)(__gkn_dst)), \ - (__force type *)(__gkn_src), __gkn_err, K); \ - __mte_disable_tco_async(); \ + do { \ + __label__ __gkn_label; \ \ - if (unlikely(__gkn_err)) \ + __mte_enable_tco_async(); \ + __raw_get_mem("ldr", *((type *)(__gkn_dst)), \ + (__force type *)(__gkn_src), __gkn_label, K); \ + __mte_disable_tco_async(); \ + break; \ + __gkn_label: \ + __mte_disable_tco_async(); \ goto err_label; \ + } while (0); \ } while (0) -#define __put_mem_asm(store, reg, x, addr, err, type) \ - asm volatile( \ - "1: " store " " reg "1, [%2]\n" \ +#define __put_mem_asm(store, reg, x, addr, label, type) \ + asm goto( \ + "1: " store " " reg "0, [%1]\n" \ "2:\n" \ - _ASM_EXTABLE_##type##ACCESS_ERR(1b, 2b, %w0) \ - : "+r" (err) \ - : "rZ" (x), "r" (addr)) + _ASM_EXTABLE_##type##ACCESS(1b, %l2) \ + : : "rZ" (x), "r" (addr) : : label) -#define __raw_put_mem(str, x, ptr, err, type) \ +#define __raw_put_mem(str, x, ptr, label, type) \ do { \ __typeof__(*(ptr)) __pu_val = (x); \ switch (sizeof(*(ptr))) { \ case 1: \ - __put_mem_asm(str "b", "%w", __pu_val, (ptr), (err), type); \ + __put_mem_asm(str "b", "%w", __pu_val, (ptr), label, type); \ break; \ case 2: \ - __put_mem_asm(str "h", "%w", __pu_val, (ptr), (err), type); \ + __put_mem_asm(str "h", "%w", __pu_val, (ptr), label, type); \ break; \ case 4: \ - __put_mem_asm(str, "%w", __pu_val, (ptr), (err), type); \ + __put_mem_asm(str, "%w", __pu_val, (ptr), label, type); \ break; \ case 8: \ - __put_mem_asm(str, "%x", __pu_val, (ptr), (err), type); \ + __put_mem_asm(str, "%x", __pu_val, (ptr), label, type); \ break; \ default: \ BUILD_BUG(); \ @@ -307,25 +327,34 @@ do { \ * uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions, * we must evaluate these outside of the critical section. */ -#define __raw_put_user(x, ptr, err) \ +#define __raw_put_user(x, ptr, label) \ do { \ + __label__ __rpu_failed; \ __typeof__(*(ptr)) __user *__rpu_ptr = (ptr); \ __typeof__(*(ptr)) __rpu_val = (x); \ __chk_user_ptr(__rpu_ptr); \ \ - uaccess_ttbr0_enable(); \ - __raw_put_mem("sttr", __rpu_val, __rpu_ptr, err, U); \ - uaccess_ttbr0_disable(); \ + do { \ + uaccess_ttbr0_enable(); \ + __raw_put_mem("sttr", __rpu_val, __rpu_ptr, __rpu_failed, U); \ + uaccess_ttbr0_disable(); \ + break; \ + __rpu_failed: \ + uaccess_ttbr0_disable(); \ + goto label; \ + } while (0); \ } while (0) #define __put_user_error(x, ptr, err) \ do { \ + __label__ __pu_failed; \ __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ if (access_ok(__p, sizeof(*__p))) { \ __p = uaccess_mask_ptr(__p); \ - __raw_put_user((x), __p, (err)); \ + __raw_put_user((x), __p, __pu_failed); \ } else { \ + __pu_failed: \ (err) = -EFAULT; \ } \ } while (0) @@ -348,15 +377,18 @@ do { \ do { \ __typeof__(dst) __pkn_dst = (dst); \ __typeof__(src) __pkn_src = (src); \ - int __pkn_err = 0; \ \ - __mte_enable_tco_async(); \ - __raw_put_mem("str", *((type *)(__pkn_src)), \ - (__force type *)(__pkn_dst), __pkn_err, K); \ - __mte_disable_tco_async(); \ - \ - if (unlikely(__pkn_err)) \ + do { \ + __label__ __pkn_err; \ + __mte_enable_tco_async(); \ + __raw_put_mem("str", *((type *)(__pkn_src)), \ + (__force type *)(__pkn_dst), __pkn_err, K); \ + __mte_disable_tco_async(); \ + break; \ + __pkn_err: \ + __mte_disable_tco_async(); \ goto err_label; \ + } while (0); \ } while(0) extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n); @@ -381,6 +413,51 @@ extern unsigned long __must_check __arch_copy_to_user(void __user *to, const voi __actu_ret; \ }) +static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len) +{ + if (unlikely(!access_ok(ptr,len))) + return 0; + uaccess_ttbr0_enable(); + return 1; +} +#define user_access_begin(a,b) user_access_begin(a,b) +#define user_access_end() uaccess_ttbr0_disable() +#define unsafe_put_user(x, ptr, label) \ + __raw_put_mem("sttr", x, uaccess_mask_ptr(ptr), label, U) +#define unsafe_get_user(x, ptr, label) \ + __raw_get_mem("ldtr", x, uaccess_mask_ptr(ptr), label, U) + +/* + * KCSAN uses these to save and restore ttbr state. + * We do not support KCSAN with ARM64_SW_TTBR0_PAN, so + * they are no-ops. + */ +static inline unsigned long user_access_save(void) { return 0; } +static inline void user_access_restore(unsigned long enabled) { } + +/* + * We want the unsafe accessors to always be inlined and use + * the error labels - thus the macro games. + */ +#define unsafe_copy_loop(dst, src, len, type, label) \ + while (len >= sizeof(type)) { \ + unsafe_put_user(*(type *)(src),(type __user *)(dst),label); \ + dst += sizeof(type); \ + src += sizeof(type); \ + len -= sizeof(type); \ + } + +#define unsafe_copy_to_user(_dst,_src,_len,label) \ +do { \ + char __user *__ucu_dst = (_dst); \ + const char *__ucu_src = (_src); \ + size_t __ucu_len = (_len); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label); \ +} while (0) + #define INLINE_COPY_TO_USER #define INLINE_COPY_FROM_USER diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 266b96acc014..1386e8e751f2 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -840,7 +840,7 @@ __SYSCALL(__NR_pselect6_time64, compat_sys_pselect6_time64) #define __NR_ppoll_time64 414 __SYSCALL(__NR_ppoll_time64, compat_sys_ppoll_time64) #define __NR_io_pgetevents_time64 416 -__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents) +__SYSCALL(__NR_io_pgetevents_time64, compat_sys_io_pgetevents_time64) #define __NR_recvmmsg_time64 417 __SYSCALL(__NR_recvmmsg_time64, compat_sys_recvmmsg_time64) #define __NR_mq_timedsend_time64 418 diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h index 14251abee23c..824ca6987a51 100644 --- a/arch/arm64/include/asm/word-at-a-time.h +++ b/arch/arm64/include/asm/word-at-a-time.h @@ -27,20 +27,15 @@ static inline unsigned long has_zero(unsigned long a, unsigned long *bits, } #define prep_zero_mask(a, bits, c) (bits) +#define create_zero_mask(bits) (bits) +#define find_zero(bits) (__ffs(bits) >> 3) -static inline unsigned long create_zero_mask(unsigned long bits) +static inline unsigned long zero_bytemask(unsigned long bits) { bits = (bits - 1) & ~bits; return bits >> 7; } -static inline unsigned long find_zero(unsigned long mask) -{ - return fls64(mask) >> 3; -} - -#define zero_bytemask(mask) (mask) - #else /* __AARCH64EB__ */ #include <asm-generic/word-at-a-time.h> #endif diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 4a92096db34e..712718aed5dd 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -9,6 +9,7 @@ #include <linux/efi.h> #include <linux/init.h> +#include <linux/kmemleak.h> #include <linux/screen_info.h> #include <linux/vmalloc.h> @@ -213,6 +214,7 @@ l: if (!p) { return -ENOMEM; } + kmemleak_not_leak(p); efi_rt_stack_top = p + THREAD_SIZE; return 0; } diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index dcdcccd40891..6174671be7c1 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -582,12 +582,9 @@ subsys_initcall(register_mte_tcf_preferred_sysctl); size_t mte_probe_user_range(const char __user *uaddr, size_t size) { const char __user *end = uaddr + size; - int err = 0; char val; - __raw_get_user(val, uaddr, err); - if (err) - return size; + __raw_get_user(val, uaddr, efault); uaddr = PTR_ALIGN(uaddr, MTE_GRANULE_SIZE); while (uaddr < end) { @@ -595,12 +592,13 @@ size_t mte_probe_user_range(const char __user *uaddr, size_t size) * A read is sufficient for mte, the caller should have probed * for the pte write permission if required. */ - __raw_get_user(val, uaddr, err); - if (err) - return end - uaddr; + __raw_get_user(val, uaddr, efault); uaddr += MTE_GRANULE_SIZE; } (void)val; return 0; + +efault: + return end - uaddr; } diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c index 5fa08e13e17e..f374a3e5a5fe 100644 --- a/arch/arm64/kernel/pi/map_kernel.c +++ b/arch/arm64/kernel/pi/map_kernel.c @@ -173,7 +173,7 @@ static void __init remap_idmap_for_lpa2(void) * Don't bother with the FDT, we no longer need it after this. */ memset(init_idmap_pg_dir, 0, - (u64)init_idmap_pg_dir - (u64)init_idmap_pg_end); + (u64)init_idmap_pg_end - (u64)init_idmap_pg_dir); create_init_idmap(init_idmap_pg_dir, mask); dsb(ishst); diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index ad198262b981..7230f6e20ab8 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -53,17 +53,15 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno, syscall_set_return_value(current, regs, 0, ret); /* - * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), - * but not enough for arm64 stack utilization comfort. To keep - * reasonable stack head room, reduce the maximum offset to 9 bits. + * This value will get limited by KSTACK_OFFSET_MAX(), which is 10 + * bits. The actual entropy will be further reduced by the compiler + * when applying stack alignment constraints: the AAPCS mandates a + * 16-byte aligned SP at function boundaries, which will remove the + * 4 low bits from any entropy chosen here. * - * The actual entropy will be further reduced by the compiler when - * applying stack alignment constraints: the AAPCS mandates a - * 16-byte (i.e. 4-bit) aligned SP at function boundaries. - * - * The resulting 5 bits of entropy is seen in SP[8:4]. + * The resulting 6 bits of entropy is seen in SP[9:4]. */ - choose_random_kstack_offset(get_random_u16() & 0x1FF); + choose_random_kstack_offset(get_random_u16()); } static inline bool has_syscall_work(unsigned long flags) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 755a22d4f840..55a8e310ea12 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -264,6 +264,9 @@ SECTIONS EXIT_DATA } + RUNTIME_CONST(shift, d_hash_shift) + RUNTIME_CONST(ptr, dentry_hashtable) + PERCPU_SECTION(L1_CACHE_BYTES) HYPERVISOR_PERCPU_SECTION diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index 02746f9d0980..efb053af331c 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -177,6 +177,14 @@ static void ffa_retrieve_req(struct arm_smccc_res *res, u32 len) res); } +static void ffa_rx_release(struct arm_smccc_res *res) +{ + arm_smccc_1_1_smc(FFA_RX_RELEASE, + 0, 0, + 0, 0, 0, 0, 0, + res); +} + static void do_ffa_rxtx_map(struct arm_smccc_res *res, struct kvm_cpu_context *ctxt) { @@ -543,16 +551,19 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res, if (WARN_ON(offset > len || fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE)) { ret = FFA_RET_ABORTED; + ffa_rx_release(res); goto out_unlock; } if (len > ffa_desc_buf.len) { ret = FFA_RET_NO_MEMORY; + ffa_rx_release(res); goto out_unlock; } buf = ffa_desc_buf.buf; memcpy(buf, hyp_buffers.rx, fraglen); + ffa_rx_release(res); for (fragoff = fraglen; fragoff < len; fragoff += fraglen) { ffa_mem_frag_rx(res, handle_lo, handle_hi, fragoff); @@ -563,6 +574,7 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res, fraglen = res->a3; memcpy((void *)buf + fragoff, hyp_buffers.rx, fraglen); + ffa_rx_release(res); } ffa_mem_reclaim(res, handle_lo, handle_hi, flags); diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 8f5b7a3e7009..7f68cf58b978 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -391,7 +391,7 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm) if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list) - vgic_v3_free_redist_region(rdreg); + vgic_v3_free_redist_region(kvm, rdreg); INIT_LIST_HEAD(&dist->rd_regions); } else { dist->vgic_cpu_base = VGIC_ADDR_UNDEF; diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index a3983a631b5a..9e50928f5d7d 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -919,8 +919,19 @@ free: return ret; } -void vgic_v3_free_redist_region(struct vgic_redist_region *rdreg) +void vgic_v3_free_redist_region(struct kvm *kvm, struct vgic_redist_region *rdreg) { + struct kvm_vcpu *vcpu; + unsigned long c; + + lockdep_assert_held(&kvm->arch.config_lock); + + /* Garbage collect the region */ + kvm_for_each_vcpu(c, vcpu, kvm) { + if (vcpu->arch.vgic_cpu.rdreg == rdreg) + vcpu->arch.vgic_cpu.rdreg = NULL; + } + list_del(&rdreg->list); kfree(rdreg); } @@ -945,7 +956,7 @@ int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count) mutex_lock(&kvm->arch.config_lock); rdreg = vgic_v3_rdist_region_from_index(kvm, index); - vgic_v3_free_redist_region(rdreg); + vgic_v3_free_redist_region(kvm, rdreg); mutex_unlock(&kvm->arch.config_lock); return ret; } diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 6106ebd5ba42..03d356a12377 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -316,7 +316,7 @@ vgic_v3_rd_region_size(struct kvm *kvm, struct vgic_redist_region *rdreg) struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm, u32 index); -void vgic_v3_free_redist_region(struct vgic_redist_region *rdreg); +void vgic_v3_free_redist_region(struct kvm *kvm, struct vgic_redist_region *rdreg); bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index c927e9312f10..353ea5dc32b8 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -124,7 +124,8 @@ bool pgattr_change_is_safe(u64 old, u64 new) * The following mapping attributes may be updated in live * kernel mappings without the need for break-before-make. */ - pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG; + pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG | + PTE_SWBITS_MASK; /* creating or taking down mappings is always safe */ if (!pte_valid(__pte(old)) || !pte_valid(__pte(new))) diff --git a/arch/csky/include/uapi/asm/unistd.h b/arch/csky/include/uapi/asm/unistd.h index 7ff6a2466af1..e0594b6370a6 100644 --- a/arch/csky/include/uapi/asm/unistd.h +++ b/arch/csky/include/uapi/asm/unistd.h @@ -6,6 +6,7 @@ #define __ARCH_WANT_SYS_CLONE3 #define __ARCH_WANT_SET_GET_RLIMIT #define __ARCH_WANT_TIME32_SYSCALLS +#define __ARCH_WANT_SYNC_FILE_RANGE2 #include <asm-generic/unistd.h> #define __NR_set_thread_area (__NR_arch_specific_syscall + 0) diff --git a/arch/csky/kernel/syscall.c b/arch/csky/kernel/syscall.c index 3d30e58a45d2..4540a271ee39 100644 --- a/arch/csky/kernel/syscall.c +++ b/arch/csky/kernel/syscall.c @@ -20,7 +20,7 @@ SYSCALL_DEFINE6(mmap2, unsigned long, prot, unsigned long, flags, unsigned long, fd, - off_t, offset) + unsigned long, offset) { if (unlikely(offset & (~PAGE_MASK >> 12))) return -EINVAL; diff --git a/arch/hexagon/include/asm/syscalls.h b/arch/hexagon/include/asm/syscalls.h new file mode 100644 index 000000000000..40f2d08bec92 --- /dev/null +++ b/arch/hexagon/include/asm/syscalls.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <asm-generic/syscalls.h> + +asmlinkage long sys_hexagon_fadvise64_64(int fd, int advice, + u32 a2, u32 a3, u32 a4, u32 a5); diff --git a/arch/hexagon/include/uapi/asm/unistd.h b/arch/hexagon/include/uapi/asm/unistd.h index 432c4db1b623..21ae22306b5d 100644 --- a/arch/hexagon/include/uapi/asm/unistd.h +++ b/arch/hexagon/include/uapi/asm/unistd.h @@ -36,5 +36,6 @@ #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_TIME32_SYSCALLS +#define __ARCH_WANT_SYNC_FILE_RANGE2 #include <asm-generic/unistd.h> diff --git a/arch/hexagon/kernel/syscalltab.c b/arch/hexagon/kernel/syscalltab.c index 0fadd582cfc7..5d98bdc494ec 100644 --- a/arch/hexagon/kernel/syscalltab.c +++ b/arch/hexagon/kernel/syscalltab.c @@ -14,6 +14,13 @@ #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = (call), +SYSCALL_DEFINE6(hexagon_fadvise64_64, int, fd, int, advice, + SC_ARG64(offset), SC_ARG64(len)) +{ + return ksys_fadvise64_64(fd, SC_VAL64(loff_t, offset), SC_VAL64(loff_t, len), advice); +} +#define sys_fadvise64_64 sys_hexagon_fadvise64_64 + void *sys_call_table[__NR_syscalls] = { #include <asm/unistd.h> }; diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index e38139c576ee..ddc042895d01 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -143,7 +143,7 @@ config LOONGARCH select HAVE_LIVEPATCH select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI - select HAVE_OBJTOOL if AS_HAS_EXPLICIT_RELOCS + select HAVE_OBJTOOL if AS_HAS_EXPLICIT_RELOCS && AS_HAS_THIN_ADD_SUB && !CC_IS_CLANG select HAVE_PCI select HAVE_PERF_EVENTS select HAVE_PERF_REGS @@ -261,6 +261,9 @@ config AS_HAS_EXPLICIT_RELOCS config AS_HAS_FCSR_CLASS def_bool $(as-instr,movfcsr2gr \$t0$(comma)\$fcsr0) +config AS_HAS_THIN_ADD_SUB + def_bool $(cc-option,-Wa$(comma)-mthin-add-sub) + config AS_HAS_LSX_EXTENSION def_bool $(as-instr,vld \$vr0$(comma)\$a0$(comma)0) diff --git a/arch/loongarch/Kconfig.debug b/arch/loongarch/Kconfig.debug index 98d60630c3d4..8b2ce5b5d43e 100644 --- a/arch/loongarch/Kconfig.debug +++ b/arch/loongarch/Kconfig.debug @@ -28,6 +28,7 @@ config UNWINDER_PROLOGUE config UNWINDER_ORC bool "ORC unwinder" + depends on HAVE_OBJTOOL select OBJTOOL help This option enables the ORC (Oops Rewind Capability) unwinder for diff --git a/arch/loongarch/include/asm/hw_breakpoint.h b/arch/loongarch/include/asm/hw_breakpoint.h index 21447fb1efc7..d78330916bd1 100644 --- a/arch/loongarch/include/asm/hw_breakpoint.h +++ b/arch/loongarch/include/asm/hw_breakpoint.h @@ -75,6 +75,8 @@ do { \ #define CSR_MWPC_NUM 0x3f #define CTRL_PLV_ENABLE 0x1e +#define CTRL_PLV0_ENABLE 0x02 +#define CTRL_PLV3_ENABLE 0x10 #define MWPnCFG3_LoadEn 8 #define MWPnCFG3_StoreEn 9 @@ -101,7 +103,7 @@ struct perf_event; struct perf_event_attr; extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, - int *gen_len, int *gen_type, int *offset); + int *gen_len, int *gen_type); extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw); extern int hw_breakpoint_arch_parse(struct perf_event *bp, const struct perf_event_attr *attr, diff --git a/arch/loongarch/kernel/hw_breakpoint.c b/arch/loongarch/kernel/hw_breakpoint.c index fc55c4de2a11..621ad7634df7 100644 --- a/arch/loongarch/kernel/hw_breakpoint.c +++ b/arch/loongarch/kernel/hw_breakpoint.c @@ -174,11 +174,21 @@ void flush_ptrace_hw_breakpoint(struct task_struct *tsk) static int hw_breakpoint_control(struct perf_event *bp, enum hw_breakpoint_ops ops) { - u32 ctrl; + u32 ctrl, privilege; int i, max_slots, enable; + struct pt_regs *regs; struct perf_event **slots; struct arch_hw_breakpoint *info = counter_arch_bp(bp); + if (arch_check_bp_in_kernelspace(info)) + privilege = CTRL_PLV0_ENABLE; + else + privilege = CTRL_PLV3_ENABLE; + + /* Whether bp belongs to a task. */ + if (bp->hw.target) + regs = task_pt_regs(bp->hw.target); + if (info->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) { /* Breakpoint */ slots = this_cpu_ptr(bp_on_reg); @@ -197,31 +207,38 @@ static int hw_breakpoint_control(struct perf_event *bp, switch (ops) { case HW_BREAKPOINT_INSTALL: /* Set the FWPnCFG/MWPnCFG 1~4 register. */ - write_wb_reg(CSR_CFG_ADDR, i, 0, info->address); - write_wb_reg(CSR_CFG_ADDR, i, 1, info->address); - write_wb_reg(CSR_CFG_MASK, i, 0, info->mask); - write_wb_reg(CSR_CFG_MASK, i, 1, info->mask); - write_wb_reg(CSR_CFG_ASID, i, 0, 0); - write_wb_reg(CSR_CFG_ASID, i, 1, 0); if (info->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) { - write_wb_reg(CSR_CFG_CTRL, i, 0, CTRL_PLV_ENABLE); + write_wb_reg(CSR_CFG_ADDR, i, 0, info->address); + write_wb_reg(CSR_CFG_MASK, i, 0, info->mask); + write_wb_reg(CSR_CFG_ASID, i, 0, 0); + write_wb_reg(CSR_CFG_CTRL, i, 0, privilege); } else { + write_wb_reg(CSR_CFG_ADDR, i, 1, info->address); + write_wb_reg(CSR_CFG_MASK, i, 1, info->mask); + write_wb_reg(CSR_CFG_ASID, i, 1, 0); ctrl = encode_ctrl_reg(info->ctrl); - write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | CTRL_PLV_ENABLE); + write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | privilege); } enable = csr_read64(LOONGARCH_CSR_CRMD); csr_write64(CSR_CRMD_WE | enable, LOONGARCH_CSR_CRMD); + if (bp->hw.target) + regs->csr_prmd |= CSR_PRMD_PWE; break; case HW_BREAKPOINT_UNINSTALL: /* Reset the FWPnCFG/MWPnCFG 1~4 register. */ - write_wb_reg(CSR_CFG_ADDR, i, 0, 0); - write_wb_reg(CSR_CFG_ADDR, i, 1, 0); - write_wb_reg(CSR_CFG_MASK, i, 0, 0); - write_wb_reg(CSR_CFG_MASK, i, 1, 0); - write_wb_reg(CSR_CFG_CTRL, i, 0, 0); - write_wb_reg(CSR_CFG_CTRL, i, 1, 0); - write_wb_reg(CSR_CFG_ASID, i, 0, 0); - write_wb_reg(CSR_CFG_ASID, i, 1, 0); + if (info->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) { + write_wb_reg(CSR_CFG_ADDR, i, 0, 0); + write_wb_reg(CSR_CFG_MASK, i, 0, 0); + write_wb_reg(CSR_CFG_CTRL, i, 0, 0); + write_wb_reg(CSR_CFG_ASID, i, 0, 0); + } else { + write_wb_reg(CSR_CFG_ADDR, i, 1, 0); + write_wb_reg(CSR_CFG_MASK, i, 1, 0); + write_wb_reg(CSR_CFG_CTRL, i, 1, 0); + write_wb_reg(CSR_CFG_ASID, i, 1, 0); + } + if (bp->hw.target) + regs->csr_prmd &= ~CSR_PRMD_PWE; break; } @@ -283,7 +300,7 @@ int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw) * to generic breakpoint descriptions. */ int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, - int *gen_len, int *gen_type, int *offset) + int *gen_len, int *gen_type) { /* Type */ switch (ctrl.type) { @@ -303,11 +320,6 @@ int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, return -EINVAL; } - if (!ctrl.len) - return -EINVAL; - - *offset = __ffs(ctrl.len); - /* Len */ switch (ctrl.len) { case LOONGARCH_BREAKPOINT_LEN_1: @@ -386,21 +398,17 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, struct arch_hw_breakpoint *hw) { int ret; - u64 alignment_mask, offset; + u64 alignment_mask; /* Build the arch_hw_breakpoint. */ ret = arch_build_bp_info(bp, attr, hw); if (ret) return ret; - if (hw->ctrl.type != LOONGARCH_BREAKPOINT_EXECUTE) - alignment_mask = 0x7; - else + if (hw->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) { alignment_mask = 0x3; - offset = hw->address & alignment_mask; - - hw->address &= ~alignment_mask; - hw->ctrl.len <<= offset; + hw->address &= ~alignment_mask; + } return 0; } @@ -471,12 +479,15 @@ void breakpoint_handler(struct pt_regs *regs) slots = this_cpu_ptr(bp_on_reg); for (i = 0; i < boot_cpu_data.watch_ireg_count; ++i) { - bp = slots[i]; - if (bp == NULL) - continue; - perf_bp_event(bp, regs); + if ((csr_read32(LOONGARCH_CSR_FWPS) & (0x1 << i))) { + bp = slots[i]; + if (bp == NULL) + continue; + perf_bp_event(bp, regs); + csr_write32(0x1 << i, LOONGARCH_CSR_FWPS); + update_bp_registers(regs, 0, 0); + } } - update_bp_registers(regs, 0, 0); } NOKPROBE_SYMBOL(breakpoint_handler); @@ -488,12 +499,15 @@ void watchpoint_handler(struct pt_regs *regs) slots = this_cpu_ptr(wp_on_reg); for (i = 0; i < boot_cpu_data.watch_dreg_count; ++i) { - wp = slots[i]; - if (wp == NULL) - continue; - perf_bp_event(wp, regs); + if ((csr_read32(LOONGARCH_CSR_MWPS) & (0x1 << i))) { + wp = slots[i]; + if (wp == NULL) + continue; + perf_bp_event(wp, regs); + csr_write32(0x1 << i, LOONGARCH_CSR_MWPS); + update_bp_registers(regs, 0, 1); + } } - update_bp_registers(regs, 0, 1); } NOKPROBE_SYMBOL(watchpoint_handler); diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c index c114c5ef1332..200109de1971 100644 --- a/arch/loongarch/kernel/ptrace.c +++ b/arch/loongarch/kernel/ptrace.c @@ -494,28 +494,14 @@ static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type, struct arch_hw_breakpoint_ctrl ctrl, struct perf_event_attr *attr) { - int err, len, type, offset; + int err, len, type; - err = arch_bp_generic_fields(ctrl, &len, &type, &offset); + err = arch_bp_generic_fields(ctrl, &len, &type); if (err) return err; - switch (note_type) { - case NT_LOONGARCH_HW_BREAK: - if ((type & HW_BREAKPOINT_X) != type) - return -EINVAL; - break; - case NT_LOONGARCH_HW_WATCH: - if ((type & HW_BREAKPOINT_RW) != type) - return -EINVAL; - break; - default: - return -EINVAL; - } - attr->bp_len = len; attr->bp_type = type; - attr->bp_addr += offset; return 0; } @@ -609,10 +595,27 @@ static int ptrace_hbp_set_ctrl(unsigned int note_type, return PTR_ERR(bp); attr = bp->attr; - decode_ctrl_reg(uctrl, &ctrl); - err = ptrace_hbp_fill_attr_ctrl(note_type, ctrl, &attr); - if (err) - return err; + + switch (note_type) { + case NT_LOONGARCH_HW_BREAK: + ctrl.type = LOONGARCH_BREAKPOINT_EXECUTE; + ctrl.len = LOONGARCH_BREAKPOINT_LEN_4; + break; + case NT_LOONGARCH_HW_WATCH: + decode_ctrl_reg(uctrl, &ctrl); + break; + default: + return -EINVAL; + } + + if (uctrl & CTRL_PLV_ENABLE) { + err = ptrace_hbp_fill_attr_ctrl(note_type, ctrl, &attr); + if (err) + return err; + attr.disabled = 0; + } else { + attr.disabled = 1; + } return modify_user_hw_breakpoint(bp, &attr); } @@ -643,6 +646,10 @@ static int ptrace_hbp_set_addr(unsigned int note_type, struct perf_event *bp; struct perf_event_attr attr; + /* Kernel-space address cannot be monitored by user-space */ + if ((unsigned long)addr >= XKPRANGE) + return -EINVAL; + bp = ptrace_hbp_get_initialised_bp(note_type, tsk, idx); if (IS_ERR(bp)) return PTR_ERR(bp); diff --git a/arch/loongarch/kernel/syscall.c b/arch/loongarch/kernel/syscall.c index b4c5acd7aa3b..8801611143ab 100644 --- a/arch/loongarch/kernel/syscall.c +++ b/arch/loongarch/kernel/syscall.c @@ -22,7 +22,7 @@ #define __SYSCALL(nr, call) [nr] = (call), SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, - prot, unsigned long, flags, unsigned long, fd, off_t, offset) + prot, unsigned long, flags, unsigned long, fd, unsigned long, offset) { if (offset & ~PAGE_MASK) return -EINVAL; diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index c86e099af5ca..a68573e091c0 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -761,7 +761,7 @@ static void kvm_handle_service(struct kvm_vcpu *vcpu) default: ret = KVM_HCALL_INVALID_CODE; break; - }; + } kvm_write_reg(vcpu, LOONGARCH_GPR_A0, ret); } diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c index ed9f34da1a2a..0850b099f300 100644 --- a/arch/microblaze/kernel/sys_microblaze.c +++ b/arch/microblaze/kernel/sys_microblaze.c @@ -35,7 +35,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, unsigned long, fd, - off_t, pgoff) + unsigned long, pgoff) { if (pgoff & ~PAGE_MASK) return -EINVAL; diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c index ec180ab92eaa..66a8ba19c287 100644 --- a/arch/mips/bmips/setup.c +++ b/arch/mips/bmips/setup.c @@ -110,7 +110,8 @@ static void bcm6358_quirks(void) * RAC flush causes kernel panics on BCM6358 when booting from TP1 * because the bootloader is not initializing it properly. */ - bmips_rac_flush_disable = !!(read_c0_brcm_cmt_local() & (1 << 31)); + bmips_rac_flush_disable = !!(read_c0_brcm_cmt_local() & (1 << 31)) || + !!BMIPS_GET_CBR(); } static void bcm6368_quirks(void) diff --git a/arch/mips/include/asm/mipsmtregs.h b/arch/mips/include/asm/mipsmtregs.h index 30e86861c206..b1ee3c48e84b 100644 --- a/arch/mips/include/asm/mipsmtregs.h +++ b/arch/mips/include/asm/mipsmtregs.h @@ -322,7 +322,7 @@ static inline void ehb(void) " .set push \n" \ " .set "MIPS_ISA_LEVEL" \n" \ _ASM_SET_MFTC0 \ - " mftc0 $1, " #rt ", " #sel " \n" \ + " mftc0 %0, " #rt ", " #sel " \n" \ _ASM_UNSET_MFTC0 \ " .set pop \n" \ : "=r" (__res)); \ diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index cc869f5d5693..953f5b7dc723 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -354,7 +354,7 @@ 412 n32 utimensat_time64 sys_utimensat 413 n32 pselect6_time64 compat_sys_pselect6_time64 414 n32 ppoll_time64 compat_sys_ppoll_time64 -416 n32 io_pgetevents_time64 sys_io_pgetevents +416 n32 io_pgetevents_time64 compat_sys_io_pgetevents_time64 417 n32 recvmmsg_time64 compat_sys_recvmmsg_time64 418 n32 mq_timedsend_time64 sys_mq_timedsend 419 n32 mq_timedreceive_time64 sys_mq_timedreceive diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 008ebe60263e..2439a2491cff 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -27,7 +27,7 @@ 17 o32 break sys_ni_syscall # 18 was sys_stat 18 o32 unused18 sys_ni_syscall -19 o32 lseek sys_lseek +19 o32 lseek sys_lseek compat_sys_lseek 20 o32 getpid sys_getpid 21 o32 mount sys_mount 22 o32 umount sys_oldumount @@ -403,7 +403,7 @@ 412 o32 utimensat_time64 sys_utimensat sys_utimensat 413 o32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64 414 o32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64 -416 o32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents +416 o32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64 417 o32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64 418 o32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend 419 o32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive diff --git a/arch/mips/pci/ops-rc32434.c b/arch/mips/pci/ops-rc32434.c index 874ed6df9768..34b9323bdabb 100644 --- a/arch/mips/pci/ops-rc32434.c +++ b/arch/mips/pci/ops-rc32434.c @@ -112,8 +112,8 @@ retry: * gives them time to settle */ if (where == PCI_VENDOR_ID) { - if (ret == 0xffffffff || ret == 0x00000000 || - ret == 0x0000ffff || ret == 0xffff0000) { + if (*val == 0xffffffff || *val == 0x00000000 || + *val == 0x0000ffff || *val == 0xffff0000) { if (delay > 4) return 0; delay *= 2; diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index daafeb20f993..dc9b902de8ea 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -16,6 +16,7 @@ config PARISC select ARCH_HAS_UBSAN select ARCH_HAS_PTE_SPECIAL select ARCH_NO_SG_CHAIN + select ARCH_SPLIT_ARG64 if !64BIT select ARCH_SUPPORTS_HUGETLBFS if PA20 select ARCH_SUPPORTS_MEMORY_FAILURE select ARCH_STACKWALK diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c index 2a12a547b447..826c8e51b585 100644 --- a/arch/parisc/kernel/sys_parisc32.c +++ b/arch/parisc/kernel/sys_parisc32.c @@ -23,12 +23,3 @@ asmlinkage long sys32_unimplemented(int r26, int r25, int r24, int r23, current->comm, current->pid, r20); return -ENOSYS; } - -asmlinkage long sys32_fanotify_mark(compat_int_t fanotify_fd, compat_uint_t flags, - compat_uint_t mask0, compat_uint_t mask1, compat_int_t dfd, - const char __user * pathname) -{ - return sys_fanotify_mark(fanotify_fd, flags, - ((__u64)mask1 << 32) | mask0, - dfd, pathname); -} diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index b13c21373974..66dc406b12e4 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -108,7 +108,7 @@ 95 common fchown sys_fchown 96 common getpriority sys_getpriority 97 common setpriority sys_setpriority -98 common recv sys_recv +98 common recv sys_recv compat_sys_recv 99 common statfs sys_statfs compat_sys_statfs 100 common fstatfs sys_fstatfs compat_sys_fstatfs 101 common stat64 sys_stat64 @@ -135,7 +135,7 @@ 120 common clone sys_clone_wrapper 121 common setdomainname sys_setdomainname 122 common sendfile sys_sendfile compat_sys_sendfile -123 common recvfrom sys_recvfrom +123 common recvfrom sys_recvfrom compat_sys_recvfrom 124 32 adjtimex sys_adjtimex_time32 124 64 adjtimex sys_adjtimex 125 common mprotect sys_mprotect @@ -364,7 +364,7 @@ 320 common accept4 sys_accept4 321 common prlimit64 sys_prlimit64 322 common fanotify_init sys_fanotify_init -323 common fanotify_mark sys_fanotify_mark sys32_fanotify_mark +323 common fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark 324 32 clock_adjtime sys_clock_adjtime32 324 64 clock_adjtime sys_clock_adjtime 325 common name_to_handle_at sys_name_to_handle_at diff --git a/arch/powerpc/crypto/.gitignore b/arch/powerpc/crypto/.gitignore index e1094f08f713..e9fe73aac8b6 100644 --- a/arch/powerpc/crypto/.gitignore +++ b/arch/powerpc/crypto/.gitignore @@ -1,3 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only aesp10-ppc.S +aesp8-ppc.S ghashp10-ppc.S +ghashp8-ppc.S diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index d1030bc52564..d283d281d28e 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -849,6 +849,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) { struct eeh_dev *edev; struct pci_dev *pdev; + struct pci_bus *bus = NULL; if (pe->type & EEH_PE_PHB) return pe->phb->bus; @@ -859,9 +860,11 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) /* Retrieve the parent PCI bus of first (top) PCI device */ edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry); + pci_lock_rescan_remove(); pdev = eeh_dev_to_pci_dev(edev); if (pdev) - return pdev->bus; + bus = pdev->bus; + pci_unlock_rescan_remove(); - return NULL; + return bus; } diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 4690c219bfa4..63432a33ec49 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -647,8 +647,9 @@ __after_prom_start: * Note: This process overwrites the OF exception vectors. */ LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET) - mr. r4,r26 /* In some cases the loader may */ - beq 9f /* have already put us at zero */ + mr r4,r26 /* Load the virtual source address into r4 */ + cmpld r3,r4 /* Check if source == dest */ + beq 9f /* If so skip the copy */ li r6,0x100 /* Start offset, the first 0x100 */ /* bytes were copied earlier. */ diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 3656f1ca7a21..ebae8415dfbb 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -230,8 +230,10 @@ 178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 179 32 pread64 sys_ppc_pread64 compat_sys_ppc_pread64 179 64 pread64 sys_pread64 +179 spu pread64 sys_pread64 180 32 pwrite64 sys_ppc_pwrite64 compat_sys_ppc_pwrite64 180 64 pwrite64 sys_pwrite64 +180 spu pwrite64 sys_pwrite64 181 common chown sys_chown 182 common getcwd sys_getcwd 183 common capget sys_capget @@ -246,6 +248,7 @@ 190 common ugetrlimit sys_getrlimit compat_sys_getrlimit 191 32 readahead sys_ppc_readahead compat_sys_ppc_readahead 191 64 readahead sys_readahead +191 spu readahead sys_readahead 192 32 mmap2 sys_mmap2 compat_sys_mmap2 193 32 truncate64 sys_ppc_truncate64 compat_sys_ppc_truncate64 194 32 ftruncate64 sys_ppc_ftruncate64 compat_sys_ppc_ftruncate64 @@ -293,6 +296,7 @@ 232 nospu set_tid_address sys_set_tid_address 233 32 fadvise64 sys_ppc32_fadvise64 compat_sys_ppc32_fadvise64 233 64 fadvise64 sys_fadvise64 +233 spu fadvise64 sys_fadvise64 234 nospu exit_group sys_exit_group 235 nospu lookup_dcookie sys_ni_syscall 236 common epoll_create sys_epoll_create @@ -502,7 +506,7 @@ 412 32 utimensat_time64 sys_utimensat sys_utimensat 413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64 414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents +416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend 419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c index 85050be08a23..72b12bc10f90 100644 --- a/arch/powerpc/kexec/core_64.c +++ b/arch/powerpc/kexec/core_64.c @@ -27,6 +27,7 @@ #include <asm/paca.h> #include <asm/mmu.h> #include <asm/sections.h> /* _end */ +#include <asm/setup.h> #include <asm/smp.h> #include <asm/hw_breakpoint.h> #include <asm/svm.h> @@ -317,6 +318,16 @@ void default_machine_kexec(struct kimage *image) if (!kdump_in_progress()) kexec_prepare_cpus(); +#ifdef CONFIG_PPC_PSERIES + /* + * This must be done after other CPUs have shut down, otherwise they + * could execute the 'scv' instruction, which is not supported with + * reloc disabled (see configure_exceptions()). + */ + if (firmware_has_feature(FW_FEATURE_SET_MODE)) + pseries_disable_reloc_on_exc(); +#endif + printk("kexec: Starting switchover sequence.\n"); /* switch to a staticly allocated stack. Based on irq stack code. diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index b569ebaa590e..3ff3de9a52ac 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -130,14 +130,16 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, } rcu_read_unlock(); - fdput(f); - - if (!found) + if (!found) { + fdput(f); return -EINVAL; + } table_group = iommu_group_get_iommudata(grp); - if (WARN_ON(!table_group)) + if (WARN_ON(!table_group)) { + fdput(f); return -EFAULT; + } for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { struct iommu_table *tbltmp = table_group->tables[i]; @@ -158,8 +160,10 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, break; } } - if (!tbl) + if (!tbl) { + fdput(f); return -EINVAL; + } rcu_read_lock(); list_for_each_entry_rcu(stit, &stt->iommu_tables, next) { @@ -170,6 +174,7 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, /* stit is being destroyed */ iommu_tce_table_put(tbl); rcu_read_unlock(); + fdput(f); return -ENOTTY; } /* @@ -177,6 +182,7 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, * its KVM reference counter and can return. */ rcu_read_unlock(); + fdput(f); return 0; } rcu_read_unlock(); @@ -184,6 +190,7 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, stit = kzalloc(sizeof(*stit), GFP_KERNEL); if (!stit) { iommu_tce_table_put(tbl); + fdput(f); return -ENOMEM; } @@ -192,6 +199,7 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, list_add_rcu(&stit->next, &stt->iommu_tables); + fdput(f); return 0; } diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c index 096d09ed89f6..431be156ca9b 100644 --- a/arch/powerpc/platforms/pseries/kexec.c +++ b/arch/powerpc/platforms/pseries/kexec.c @@ -61,11 +61,3 @@ void pseries_kexec_cpu_down(int crash_shutdown, int secondary) } else xics_kexec_teardown_cpu(secondary); } - -void pseries_machine_kexec(struct kimage *image) -{ - if (firmware_has_feature(FW_FEATURE_SET_MODE)) - pseries_disable_reloc_on_exc(); - - default_machine_kexec(image); -} diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index bba4ad192b0f..3968a6970fa8 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -38,7 +38,6 @@ static inline void smp_init_pseries(void) { } #endif extern void pseries_kexec_cpu_down(int crash_shutdown, int secondary); -void pseries_machine_kexec(struct kimage *image); extern void pSeries_final_fixup(void); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 284a6fa04b0c..b10a25325238 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -343,8 +343,8 @@ static int alloc_dispatch_log_kmem_cache(void) { void (*ctor)(void *) = get_dtl_cache_ctor(); - dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES, - DISPATCH_LOG_BYTES, 0, ctor); + dtl_cache = kmem_cache_create_usercopy("dtl", DISPATCH_LOG_BYTES, + DISPATCH_LOG_BYTES, 0, 0, DISPATCH_LOG_BYTES, ctor); if (!dtl_cache) { pr_warn("Failed to create dispatch trace log buffer cache\n"); pr_warn("Stolen time statistics will be unreliable\n"); @@ -1159,7 +1159,6 @@ define_machine(pseries) { .machine_check_exception = pSeries_machine_check_exception, .machine_check_log_err = pSeries_machine_check_log_err, #ifdef CONFIG_KEXEC_CORE - .machine_kexec = pseries_machine_kexec, .kexec_cpu_down = pseries_kexec_cpu_down, #endif #ifdef CONFIG_MEMORY_HOTPLUG diff --git a/arch/riscv/boot/dts/canaan/canaan_kd233.dts b/arch/riscv/boot/dts/canaan/canaan_kd233.dts index 8df4cf3656f2..a7d753b6fdfd 100644 --- a/arch/riscv/boot/dts/canaan/canaan_kd233.dts +++ b/arch/riscv/boot/dts/canaan/canaan_kd233.dts @@ -15,6 +15,10 @@ model = "Kendryte KD233"; compatible = "canaan,kendryte-kd233", "canaan,kendryte-k210"; + aliases { + serial0 = &uarths0; + }; + chosen { bootargs = "earlycon console=ttySIF0"; stdout-path = "serial0:115200n8"; @@ -46,7 +50,6 @@ &fpioa { pinctrl-0 = <&jtag_pinctrl>; pinctrl-names = "default"; - status = "okay"; jtag_pinctrl: jtag-pinmux { pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>, @@ -118,6 +121,7 @@ #sound-dai-cells = <1>; pinctrl-0 = <&i2s0_pinctrl>; pinctrl-names = "default"; + status = "okay"; }; &spi0 { @@ -125,6 +129,7 @@ pinctrl-names = "default"; num-cs = <1>; cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>; + status = "okay"; panel@0 { compatible = "canaan,kd233-tft", "ilitek,ili9341"; diff --git a/arch/riscv/boot/dts/canaan/k210.dtsi b/arch/riscv/boot/dts/canaan/k210.dtsi index f87c5164d9cf..4f5d40fa1e77 100644 --- a/arch/riscv/boot/dts/canaan/k210.dtsi +++ b/arch/riscv/boot/dts/canaan/k210.dtsi @@ -16,13 +16,6 @@ #size-cells = <1>; compatible = "canaan,kendryte-k210"; - aliases { - serial0 = &uarths0; - serial1 = &uart1; - serial2 = &uart2; - serial3 = &uart3; - }; - /* * The K210 has an sv39 MMU following the privileged specification v1.9. * Since this is a non-ratified draft specification, the kernel does not @@ -137,6 +130,7 @@ reg = <0x38000000 0x1000>; interrupts = <33>; clocks = <&sysclk K210_CLK_CPU>; + status = "disabled"; }; gpio0: gpio-controller@38001000 { @@ -152,6 +146,7 @@ <62>, <63>, <64>, <65>; gpio-controller; ngpios = <32>; + status = "disabled"; }; dmac0: dma-controller@50000000 { @@ -187,6 +182,7 @@ <&sysclk K210_CLK_GPIO>; clock-names = "bus", "db"; resets = <&sysrst K210_RST_GPIO>; + status = "disabled"; gpio1_0: gpio-port@0 { #gpio-cells = <2>; @@ -214,6 +210,7 @@ dsr-override; cts-override; ri-override; + status = "disabled"; }; uart2: serial@50220000 { @@ -230,6 +227,7 @@ dsr-override; cts-override; ri-override; + status = "disabled"; }; uart3: serial@50230000 { @@ -246,6 +244,7 @@ dsr-override; cts-override; ri-override; + status = "disabled"; }; spi2: spi@50240000 { @@ -259,6 +258,7 @@ <&sysclk K210_CLK_APB0>; clock-names = "ssi_clk", "pclk"; resets = <&sysrst K210_RST_SPI2>; + status = "disabled"; }; i2s0: i2s@50250000 { @@ -268,6 +268,7 @@ clocks = <&sysclk K210_CLK_I2S0>; clock-names = "i2sclk"; resets = <&sysrst K210_RST_I2S0>; + status = "disabled"; }; i2s1: i2s@50260000 { @@ -277,6 +278,7 @@ clocks = <&sysclk K210_CLK_I2S1>; clock-names = "i2sclk"; resets = <&sysrst K210_RST_I2S1>; + status = "disabled"; }; i2s2: i2s@50270000 { @@ -286,6 +288,7 @@ clocks = <&sysclk K210_CLK_I2S2>; clock-names = "i2sclk"; resets = <&sysrst K210_RST_I2S2>; + status = "disabled"; }; i2c0: i2c@50280000 { @@ -296,6 +299,7 @@ <&sysclk K210_CLK_APB0>; clock-names = "ref", "pclk"; resets = <&sysrst K210_RST_I2C0>; + status = "disabled"; }; i2c1: i2c@50290000 { @@ -306,6 +310,7 @@ <&sysclk K210_CLK_APB0>; clock-names = "ref", "pclk"; resets = <&sysrst K210_RST_I2C1>; + status = "disabled"; }; i2c2: i2c@502a0000 { @@ -316,6 +321,7 @@ <&sysclk K210_CLK_APB0>; clock-names = "ref", "pclk"; resets = <&sysrst K210_RST_I2C2>; + status = "disabled"; }; fpioa: pinmux@502b0000 { @@ -464,6 +470,7 @@ reset-names = "spi"; num-cs = <4>; reg-io-width = <4>; + status = "disabled"; }; spi1: spi@53000000 { @@ -479,6 +486,7 @@ reset-names = "spi"; num-cs = <4>; reg-io-width = <4>; + status = "disabled"; }; spi3: spi@54000000 { @@ -495,6 +503,7 @@ num-cs = <4>; reg-io-width = <4>; + status = "disabled"; }; }; }; diff --git a/arch/riscv/boot/dts/canaan/k210_generic.dts b/arch/riscv/boot/dts/canaan/k210_generic.dts index 396c8ca4d24d..5734cc03753b 100644 --- a/arch/riscv/boot/dts/canaan/k210_generic.dts +++ b/arch/riscv/boot/dts/canaan/k210_generic.dts @@ -15,6 +15,10 @@ model = "Kendryte K210 generic"; compatible = "canaan,kendryte-k210"; + aliases { + serial0 = &uarths0; + }; + chosen { bootargs = "earlycon console=ttySIF0"; stdout-path = "serial0:115200n8"; @@ -24,7 +28,6 @@ &fpioa { pinctrl-0 = <&jtag_pins>; pinctrl-names = "default"; - status = "okay"; jtag_pins: jtag-pinmux { pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>, diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts index 6d25bf07481a..2ab376d609d2 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts @@ -17,6 +17,10 @@ compatible = "sipeed,maix-bit", "sipeed,maix-bitm", "canaan,kendryte-k210"; + aliases { + serial0 = &uarths0; + }; + chosen { bootargs = "earlycon console=ttySIF0"; stdout-path = "serial0:115200n8"; @@ -58,7 +62,6 @@ &fpioa { pinctrl-names = "default"; pinctrl-0 = <&jtag_pinctrl>; - status = "okay"; jtag_pinctrl: jtag-pinmux { pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>, @@ -156,6 +159,7 @@ #sound-dai-cells = <1>; pinctrl-0 = <&i2s0_pinctrl>; pinctrl-names = "default"; + status = "okay"; }; &i2c1 { @@ -170,6 +174,7 @@ pinctrl-names = "default"; num-cs = <1>; cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>; + status = "okay"; panel@0 { compatible = "sitronix,st7789v"; @@ -199,6 +204,8 @@ }; &spi3 { + status = "okay"; + flash@0 { compatible = "jedec,spi-nor"; reg = <0>; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts index f4f4d8d5e8b8..d98e20775c07 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts @@ -17,6 +17,10 @@ compatible = "sipeed,maix-dock-m1", "sipeed,maix-dock-m1w", "canaan,kendryte-k210"; + aliases { + serial0 = &uarths0; + }; + chosen { bootargs = "earlycon console=ttySIF0"; stdout-path = "serial0:115200n8"; @@ -63,7 +67,6 @@ &fpioa { pinctrl-0 = <&jtag_pinctrl>; pinctrl-names = "default"; - status = "okay"; jtag_pinctrl: jtag-pinmux { pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>, @@ -159,6 +162,7 @@ #sound-dai-cells = <1>; pinctrl-0 = <&i2s0_pinctrl>; pinctrl-names = "default"; + status = "okay"; }; &i2c1 { @@ -173,6 +177,7 @@ pinctrl-names = "default"; num-cs = <1>; cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>; + status = "okay"; panel@0 { compatible = "sitronix,st7789v"; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts index 0d86df47e1ed..79ecd549700a 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts @@ -16,6 +16,10 @@ model = "SiPeed MAIX GO"; compatible = "sipeed,maix-go", "canaan,kendryte-k210"; + aliases { + serial0 = &uarths0; + }; + chosen { bootargs = "earlycon console=ttySIF0"; stdout-path = "serial0:115200n8"; @@ -69,7 +73,6 @@ &fpioa { pinctrl-0 = <&jtag_pinctrl>; pinctrl-names = "default"; - status = "okay"; jtag_pinctrl: jtag-pinmux { pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>, @@ -167,6 +170,7 @@ #sound-dai-cells = <1>; pinctrl-0 = <&i2s0_pinctrl>; pinctrl-names = "default"; + status = "okay"; }; &i2c1 { @@ -181,6 +185,7 @@ pinctrl-names = "default"; num-cs = <1>; cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>; + status = "okay"; panel@0 { compatible = "sitronix,st7789v"; @@ -209,6 +214,8 @@ }; &spi3 { + status = "okay"; + flash@0 { compatible = "jedec,spi-nor"; reg = <0>; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts index 5c05c498e2b8..019c03ae51f6 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts @@ -15,6 +15,10 @@ model = "SiPeed MAIXDUINO"; compatible = "sipeed,maixduino", "canaan,kendryte-k210"; + aliases { + serial0 = &uarths0; + }; + chosen { bootargs = "earlycon console=ttySIF0"; stdout-path = "serial0:115200n8"; @@ -39,8 +43,6 @@ }; &fpioa { - status = "okay"; - uarths_pinctrl: uarths-pinmux { pinmux = <K210_FPIOA(4, K210_PCF_UARTHS_RX)>, /* Header "0" */ <K210_FPIOA(5, K210_PCF_UARTHS_TX)>; /* Header "1" */ @@ -132,6 +134,7 @@ #sound-dai-cells = <1>; pinctrl-0 = <&i2s0_pinctrl>; pinctrl-names = "default"; + status = "okay"; }; &i2c1 { @@ -146,6 +149,7 @@ pinctrl-names = "default"; num-cs = <1>; cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>; + status = "okay"; panel@0 { compatible = "sitronix,st7789v"; @@ -174,6 +178,8 @@ }; &spi3 { + status = "okay"; + flash@0 { compatible = "jedec,spi-nor"; reg = <0>; diff --git a/arch/riscv/boot/dts/sophgo/cv1800b-milkv-duo.dts b/arch/riscv/boot/dts/sophgo/cv1800b-milkv-duo.dts index cd013588adc0..375ff2661b6e 100644 --- a/arch/riscv/boot/dts/sophgo/cv1800b-milkv-duo.dts +++ b/arch/riscv/boot/dts/sophgo/cv1800b-milkv-duo.dts @@ -45,6 +45,7 @@ no-1-8-v; no-mmc; no-sdio; + disable-wp; }; &uart0 { diff --git a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi index 8ff6ea64f048..68d16717db8c 100644 --- a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi +++ b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi @@ -244,7 +244,7 @@ regulator-boot-on; regulator-always-on; regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; + regulator-max-microvolt = <3300000>; regulator-name = "emmc_vdd"; }; }; diff --git a/arch/riscv/include/asm/insn.h b/arch/riscv/include/asm/insn.h index 06e439eeef9a..09fde95a5e8f 100644 --- a/arch/riscv/include/asm/insn.h +++ b/arch/riscv/include/asm/insn.h @@ -145,7 +145,7 @@ /* parts of opcode for RVF, RVD and RVQ */ #define RVFDQ_FL_FS_WIDTH_OFF 12 -#define RVFDQ_FL_FS_WIDTH_MASK GENMASK(3, 0) +#define RVFDQ_FL_FS_WIDTH_MASK GENMASK(2, 0) #define RVFDQ_FL_FS_WIDTH_W 2 #define RVFDQ_FL_FS_WIDTH_D 3 #define RVFDQ_LS_FS_WIDTH_Q 4 diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 87cbd86576b2..4b95c574fd04 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -120,9 +120,6 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) out = ftrace_make_nop(mod, rec, MCOUNT_ADDR); mutex_unlock(&text_mutex); - if (!mod) - local_flush_icache_range(rec->ip, rec->ip + MCOUNT_INSN_SIZE); - return out; } @@ -156,9 +153,9 @@ static int __ftrace_modify_code(void *data) } else { while (atomic_read(¶m->cpu_count) <= num_online_cpus()) cpu_relax(); - } - local_flush_icache_all(); + local_flush_icache_all(); + } return 0; } diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index ed9cad20c039..3c830a6f7ef4 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -121,20 +121,12 @@ static void machine_kexec_mask_interrupts(void) for_each_irq_desc(i, desc) { struct irq_chip *chip; - int ret; chip = irq_desc_get_chip(desc); if (!chip) continue; - /* - * First try to remove the active state. If this - * fails, try to EOI the interrupt. - */ - ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); - - if (ret && irqd_irq_inprogress(&desc->irq_data) && - chip->irq_eoi) + if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data)) chip->irq_eoi(&desc->irq_data); if (chip->irq_mask) diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 4007563fb607..ab03732d06c4 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -89,6 +89,14 @@ static int __patch_insn_set(void *addr, u8 c, size_t len) memset(waddr, c, len); + /* + * We could have just patched a function that is about to be + * called so make sure we don't execute partially patched + * instructions by flushing the icache as soon as possible. + */ + local_flush_icache_range((unsigned long)waddr, + (unsigned long)waddr + len); + patch_unmap(FIX_TEXT_POKE0); if (across_pages) @@ -135,6 +143,14 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len) ret = copy_to_kernel_nofault(waddr, insn, len); + /* + * We could have just patched a function that is about to be + * called so make sure we don't execute partially patched + * instructions by flushing the icache as soon as possible. + */ + local_flush_icache_range((unsigned long)waddr, + (unsigned long)waddr + len); + patch_unmap(FIX_TEXT_POKE0); if (across_pages) @@ -189,9 +205,6 @@ int patch_text_set_nosync(void *addr, u8 c, size_t len) ret = patch_insn_set(tp, c, len); - if (!ret) - flush_icache_range((uintptr_t)tp, (uintptr_t)tp + len); - return ret; } NOKPROBE_SYMBOL(patch_text_set_nosync); @@ -224,9 +237,6 @@ int patch_text_nosync(void *addr, const void *insns, size_t len) ret = patch_insn_write(tp, insns, len); - if (!ret) - flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len); - return ret; } NOKPROBE_SYMBOL(patch_text_nosync); @@ -253,9 +263,9 @@ static int patch_text_cb(void *data) } else { while (atomic_read(&patch->cpu_count) <= num_online_cpus()) cpu_relax(); - } - local_flush_icache_all(); + local_flush_icache_all(); + } return ret; } diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 528ec7cc9a62..10e311b2759d 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -32,6 +32,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, bool (*fn)(void *, unsigned long), void *arg) { unsigned long fp, sp, pc; + int graph_idx = 0; int level = 0; if (regs) { @@ -68,7 +69,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, pc = regs->ra; } else { fp = frame->fp; - pc = ftrace_graph_ret_addr(current, NULL, frame->ra, + pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra, &frame->ra); if (pc == (unsigned long)ret_from_exception) { if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc))) @@ -156,7 +157,7 @@ unsigned long __get_wchan(struct task_struct *task) return pc; } -noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, +noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct task_struct *task, struct pt_regs *regs) { walk_stackframe(task, regs, consume_entry, cookie); diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index 64155323cc92..d77afe05578f 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -23,7 +23,7 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len, #ifdef CONFIG_64BIT SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, - unsigned long, fd, off_t, offset) + unsigned long, fd, unsigned long, offset) { return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 0); } @@ -32,7 +32,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, #if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, - unsigned long, fd, off_t, offset) + unsigned long, fd, unsigned long, offset) { /* * Note that the shift for mmap2 is constant (12), diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index 04db1f993c47..bcf41d6e0df0 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -327,7 +327,7 @@ static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_att event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc); if (IS_ERR(event)) { - pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event)); + pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event)); return PTR_ERR(event); } diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 48ef5fe5c08a..5a36d5538dae 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -170,11 +170,14 @@ static void kaslr_adjust_got(unsigned long offset) u64 *entry; /* - * Even without -fPIE, Clang still uses a global offset table for some - * reason. Adjust the GOT entries. + * Adjust GOT entries, except for ones for undefined weak symbols + * that resolved to zero. This also skips the first three reserved + * entries on s390x that are zero. */ - for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++) - *entry += offset - __START_KERNEL; + for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++) { + if (*entry) + *entry += offset - __START_KERNEL; + } } /* diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 8c4adece8911..f3602414a961 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -601,17 +601,16 @@ CONFIG_WATCHDOG=y CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m CONFIG_DIAG288_WATCHDOG=m +CONFIG_DRM=m +CONFIG_DRM_VIRTIO_GPU=m CONFIG_FB=y # CONFIG_FB_DEVICE is not set -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y # CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_MLX4_INFINIBAND=m CONFIG_MLX5_INFINIBAND=m -CONFIG_SYNC_FILE=y CONFIG_VFIO=m CONFIG_VFIO_PCI=m CONFIG_MLX5_VFIO_PCI=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 6dd11d3b6aaa..d0d8925fdf09 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -592,17 +592,16 @@ CONFIG_WATCHDOG_CORE=y CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m CONFIG_DIAG288_WATCHDOG=m +CONFIG_DRM=m +CONFIG_DRM_VIRTIO_GPU=m CONFIG_FB=y # CONFIG_FB_DEVICE is not set -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y # CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_MLX4_INFINIBAND=m CONFIG_MLX5_INFINIBAND=m -CONFIG_SYNC_FILE=y CONFIG_VFIO=m CONFIG_VFIO_PCI=m CONFIG_MLX5_VFIO_PCI=m diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h index 7f5004065e8a..35555c944630 100644 --- a/arch/s390/include/asm/entry-common.h +++ b/arch/s390/include/asm/entry-common.h @@ -54,7 +54,7 @@ static __always_inline void arch_exit_to_user_mode(void) static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, unsigned long ti_work) { - choose_random_kstack_offset(get_tod_clock_fast() & 0xff); + choose_random_kstack_offset(get_tod_clock_fast()); } #define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 95990461888f..9281063636a7 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -427,6 +427,7 @@ struct kvm_vcpu_stat { u64 instruction_io_other; u64 instruction_lpsw; u64 instruction_lpswe; + u64 instruction_lpswey; u64 instruction_pfmf; u64 instruction_ptff; u64 instruction_sck; diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c index dc2355c623d6..50cbcbbaa03d 100644 --- a/arch/s390/kernel/syscall.c +++ b/arch/s390/kernel/syscall.c @@ -38,33 +38,6 @@ #include "entry.h" -/* - * Perform the mmap() system call. Linux for S/390 isn't able to handle more - * than 5 system call parameters, so this system call uses a memory block - * for parameter passing. - */ - -struct s390_mmap_arg_struct { - unsigned long addr; - unsigned long len; - unsigned long prot; - unsigned long flags; - unsigned long fd; - unsigned long offset; -}; - -SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg) -{ - struct s390_mmap_arg_struct a; - int error = -EFAULT; - - if (copy_from_user(&a, arg, sizeof(a))) - goto out; - error = ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); -out: - return error; -} - #ifdef CONFIG_SYSVIPC /* * sys_ipc() is the de-multiplexer for the SysV IPC calls. diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index bd0fee24ad10..01071182763e 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -418,7 +418,7 @@ 412 32 utimensat_time64 - sys_utimensat 413 32 pselect6_time64 - compat_sys_pselect6_time64 414 32 ppoll_time64 - compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 - sys_io_pgetevents +416 32 io_pgetevents_time64 - compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 - compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 - sys_mq_timedsend 419 32 mq_timedreceive_time64 - sys_mq_timedreceive diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 82e9631cd9ef..54b5b2565df8 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -132,6 +132,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, instruction_io_other), STATS_DESC_COUNTER(VCPU, instruction_lpsw), STATS_DESC_COUNTER(VCPU, instruction_lpswe), + STATS_DESC_COUNTER(VCPU, instruction_lpswey), STATS_DESC_COUNTER(VCPU, instruction_pfmf), STATS_DESC_COUNTER(VCPU, instruction_ptff), STATS_DESC_COUNTER(VCPU, instruction_sck), diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 111eb5c74784..bf8534218af3 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -138,6 +138,21 @@ static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, u8 *ar) return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; } +static inline u64 kvm_s390_get_base_disp_siy(struct kvm_vcpu *vcpu, u8 *ar) +{ + u32 base1 = vcpu->arch.sie_block->ipb >> 28; + s64 disp1; + + /* The displacement is a 20bit _SIGNED_ value */ + disp1 = sign_extend64(((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) + + ((vcpu->arch.sie_block->ipb & 0xff00) << 4), 19); + + if (ar) + *ar = base1; + + return (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1; +} + static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu, u64 *address1, u64 *address2, u8 *ar_b1, u8 *ar_b2) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 1be19cc9d73c..1a49b89706f8 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -797,6 +797,36 @@ static int handle_lpswe(struct kvm_vcpu *vcpu) return 0; } +static int handle_lpswey(struct kvm_vcpu *vcpu) +{ + psw_t new_psw; + u64 addr; + int rc; + u8 ar; + + vcpu->stat.instruction_lpswey++; + + if (!test_kvm_facility(vcpu->kvm, 193)) + return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + addr = kvm_s390_get_base_disp_siy(vcpu, &ar); + if (addr & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + + vcpu->arch.sie_block->gpsw = new_psw; + if (!is_valid_psw(&vcpu->arch.sie_block->gpsw)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + return 0; +} + static int handle_stidp(struct kvm_vcpu *vcpu) { u64 stidp_data = vcpu->kvm->arch.model.cpuid; @@ -1462,6 +1492,8 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu) case 0x61: case 0x62: return handle_ri(vcpu); + case 0x71: + return handle_lpswey(vcpu); default: return -EOPNOTSUPP; } diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index abb629d7e131..7e3e767ab87d 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -55,6 +55,8 @@ unsigned long *crst_table_alloc(struct mm_struct *mm) void crst_table_free(struct mm_struct *mm, unsigned long *table) { + if (!table) + return; pagetable_free(virt_to_ptdesc(table)); } @@ -262,6 +264,8 @@ static unsigned long *base_crst_alloc(unsigned long val) static void base_crst_free(unsigned long *table) { + if (!table) + return; pagetable_free(virt_to_ptdesc(table)); } diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index ff8f24854c64..0ef83b6ac0db 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -410,7 +410,7 @@ static void __init cpu_enable_directed_irq(void *unused) union zpci_sic_iib iib = {{0}}; union zpci_sic_iib ziib = {{0}}; - iib.cdiib.dibv_addr = (u64) zpci_ibv[smp_processor_id()]->vector; + iib.cdiib.dibv_addr = virt_to_phys(zpci_ibv[smp_processor_id()]->vector); zpci_set_irq_ctrl(SIC_IRQ_MODE_SET_CPU, 0, &iib); zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &ziib); diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c index 9dca568509a5..d6f4afcb0e87 100644 --- a/arch/sh/kernel/sys_sh32.c +++ b/arch/sh/kernel/sys_sh32.c @@ -59,3 +59,14 @@ asmlinkage int sys_fadvise64_64_wrapper(int fd, u32 offset0, u32 offset1, (u64)len0 << 32 | len1, advice); #endif } + +/* + * swap the arguments the way that libc wants them instead of + * moving flags ahead of the 64-bit nbytes argument + */ +SYSCALL_DEFINE6(sh_sync_file_range6, int, fd, SC_ARG64(offset), + SC_ARG64(nbytes), unsigned int, flags) +{ + return ksys_sync_file_range(fd, SC_VAL64(loff_t, offset), + SC_VAL64(loff_t, nbytes), flags); +} diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index bbf83a2db986..c55fd7696d40 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -321,7 +321,7 @@ 311 common set_robust_list sys_set_robust_list 312 common get_robust_list sys_get_robust_list 313 common splice sys_splice -314 common sync_file_range sys_sync_file_range +314 common sync_file_range sys_sh_sync_file_range6 315 common tee sys_tee 316 common vmsplice sys_vmsplice 317 common move_pages sys_move_pages @@ -395,6 +395,7 @@ 385 common pkey_alloc sys_pkey_alloc 386 common pkey_free sys_pkey_free 387 common rseq sys_rseq +388 common sync_file_range2 sys_sync_file_range2 # room for arch specific syscalls 393 common semget sys_semget 394 common semctl sys_semctl diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S index a45f0f31fe51..a3d308f2043e 100644 --- a/arch/sparc/kernel/sys32.S +++ b/arch/sparc/kernel/sys32.S @@ -18,224 +18,3 @@ sys32_mmap2: sethi %hi(sys_mmap), %g1 jmpl %g1 + %lo(sys_mmap), %g0 sllx %o5, 12, %o5 - - .align 32 - .globl sys32_socketcall -sys32_socketcall: /* %o0=call, %o1=args */ - cmp %o0, 1 - bl,pn %xcc, do_einval - cmp %o0, 18 - bg,pn %xcc, do_einval - sub %o0, 1, %o0 - sllx %o0, 5, %o0 - sethi %hi(__socketcall_table_begin), %g2 - or %g2, %lo(__socketcall_table_begin), %g2 - jmpl %g2 + %o0, %g0 - nop -do_einval: - retl - mov -EINVAL, %o0 - - .align 32 -__socketcall_table_begin: - - /* Each entry is exactly 32 bytes. */ -do_sys_socket: /* sys_socket(int, int, int) */ -1: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_socket), %g1 -2: ldswa [%o1 + 0x8] %asi, %o2 - jmpl %g1 + %lo(sys_socket), %g0 -3: ldswa [%o1 + 0x4] %asi, %o1 - nop - nop - nop -do_sys_bind: /* sys_bind(int fd, struct sockaddr *, int) */ -4: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_bind), %g1 -5: ldswa [%o1 + 0x8] %asi, %o2 - jmpl %g1 + %lo(sys_bind), %g0 -6: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop - nop -do_sys_connect: /* sys_connect(int, struct sockaddr *, int) */ -7: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_connect), %g1 -8: ldswa [%o1 + 0x8] %asi, %o2 - jmpl %g1 + %lo(sys_connect), %g0 -9: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop - nop -do_sys_listen: /* sys_listen(int, int) */ -10: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_listen), %g1 - jmpl %g1 + %lo(sys_listen), %g0 -11: ldswa [%o1 + 0x4] %asi, %o1 - nop - nop - nop - nop -do_sys_accept: /* sys_accept(int, struct sockaddr *, int *) */ -12: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_accept), %g1 -13: lduwa [%o1 + 0x8] %asi, %o2 - jmpl %g1 + %lo(sys_accept), %g0 -14: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop - nop -do_sys_getsockname: /* sys_getsockname(int, struct sockaddr *, int *) */ -15: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_getsockname), %g1 -16: lduwa [%o1 + 0x8] %asi, %o2 - jmpl %g1 + %lo(sys_getsockname), %g0 -17: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop - nop -do_sys_getpeername: /* sys_getpeername(int, struct sockaddr *, int *) */ -18: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_getpeername), %g1 -19: lduwa [%o1 + 0x8] %asi, %o2 - jmpl %g1 + %lo(sys_getpeername), %g0 -20: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop - nop -do_sys_socketpair: /* sys_socketpair(int, int, int, int *) */ -21: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_socketpair), %g1 -22: ldswa [%o1 + 0x8] %asi, %o2 -23: lduwa [%o1 + 0xc] %asi, %o3 - jmpl %g1 + %lo(sys_socketpair), %g0 -24: ldswa [%o1 + 0x4] %asi, %o1 - nop - nop -do_sys_send: /* sys_send(int, void *, size_t, unsigned int) */ -25: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_send), %g1 -26: lduwa [%o1 + 0x8] %asi, %o2 -27: lduwa [%o1 + 0xc] %asi, %o3 - jmpl %g1 + %lo(sys_send), %g0 -28: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop -do_sys_recv: /* sys_recv(int, void *, size_t, unsigned int) */ -29: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_recv), %g1 -30: lduwa [%o1 + 0x8] %asi, %o2 -31: lduwa [%o1 + 0xc] %asi, %o3 - jmpl %g1 + %lo(sys_recv), %g0 -32: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop -do_sys_sendto: /* sys_sendto(int, u32, compat_size_t, unsigned int, u32, int) */ -33: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_sendto), %g1 -34: lduwa [%o1 + 0x8] %asi, %o2 -35: lduwa [%o1 + 0xc] %asi, %o3 -36: lduwa [%o1 + 0x10] %asi, %o4 -37: ldswa [%o1 + 0x14] %asi, %o5 - jmpl %g1 + %lo(sys_sendto), %g0 -38: lduwa [%o1 + 0x4] %asi, %o1 -do_sys_recvfrom: /* sys_recvfrom(int, u32, compat_size_t, unsigned int, u32, u32) */ -39: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_recvfrom), %g1 -40: lduwa [%o1 + 0x8] %asi, %o2 -41: lduwa [%o1 + 0xc] %asi, %o3 -42: lduwa [%o1 + 0x10] %asi, %o4 -43: lduwa [%o1 + 0x14] %asi, %o5 - jmpl %g1 + %lo(sys_recvfrom), %g0 -44: lduwa [%o1 + 0x4] %asi, %o1 -do_sys_shutdown: /* sys_shutdown(int, int) */ -45: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_shutdown), %g1 - jmpl %g1 + %lo(sys_shutdown), %g0 -46: ldswa [%o1 + 0x4] %asi, %o1 - nop - nop - nop - nop -do_sys_setsockopt: /* sys_setsockopt(int, int, int, char *, int) */ -47: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_setsockopt), %g1 -48: ldswa [%o1 + 0x8] %asi, %o2 -49: lduwa [%o1 + 0xc] %asi, %o3 -50: ldswa [%o1 + 0x10] %asi, %o4 - jmpl %g1 + %lo(sys_setsockopt), %g0 -51: ldswa [%o1 + 0x4] %asi, %o1 - nop -do_sys_getsockopt: /* sys_getsockopt(int, int, int, u32, u32) */ -52: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_getsockopt), %g1 -53: ldswa [%o1 + 0x8] %asi, %o2 -54: lduwa [%o1 + 0xc] %asi, %o3 -55: lduwa [%o1 + 0x10] %asi, %o4 - jmpl %g1 + %lo(sys_getsockopt), %g0 -56: ldswa [%o1 + 0x4] %asi, %o1 - nop -do_sys_sendmsg: /* compat_sys_sendmsg(int, struct compat_msghdr *, unsigned int) */ -57: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(compat_sys_sendmsg), %g1 -58: lduwa [%o1 + 0x8] %asi, %o2 - jmpl %g1 + %lo(compat_sys_sendmsg), %g0 -59: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop - nop -do_sys_recvmsg: /* compat_sys_recvmsg(int, struct compat_msghdr *, unsigned int) */ -60: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(compat_sys_recvmsg), %g1 -61: lduwa [%o1 + 0x8] %asi, %o2 - jmpl %g1 + %lo(compat_sys_recvmsg), %g0 -62: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop - nop -do_sys_accept4: /* sys_accept4(int, struct sockaddr *, int *, int) */ -63: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_accept4), %g1 -64: lduwa [%o1 + 0x8] %asi, %o2 -65: ldswa [%o1 + 0xc] %asi, %o3 - jmpl %g1 + %lo(sys_accept4), %g0 -66: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop - - .section __ex_table,"a" - .align 4 - .word 1b, __retl_efault, 2b, __retl_efault - .word 3b, __retl_efault, 4b, __retl_efault - .word 5b, __retl_efault, 6b, __retl_efault - .word 7b, __retl_efault, 8b, __retl_efault - .word 9b, __retl_efault, 10b, __retl_efault - .word 11b, __retl_efault, 12b, __retl_efault - .word 13b, __retl_efault, 14b, __retl_efault - .word 15b, __retl_efault, 16b, __retl_efault - .word 17b, __retl_efault, 18b, __retl_efault - .word 19b, __retl_efault, 20b, __retl_efault - .word 21b, __retl_efault, 22b, __retl_efault - .word 23b, __retl_efault, 24b, __retl_efault - .word 25b, __retl_efault, 26b, __retl_efault - .word 27b, __retl_efault, 28b, __retl_efault - .word 29b, __retl_efault, 30b, __retl_efault - .word 31b, __retl_efault, 32b, __retl_efault - .word 33b, __retl_efault, 34b, __retl_efault - .word 35b, __retl_efault, 36b, __retl_efault - .word 37b, __retl_efault, 38b, __retl_efault - .word 39b, __retl_efault, 40b, __retl_efault - .word 41b, __retl_efault, 42b, __retl_efault - .word 43b, __retl_efault, 44b, __retl_efault - .word 45b, __retl_efault, 46b, __retl_efault - .word 47b, __retl_efault, 48b, __retl_efault - .word 49b, __retl_efault, 50b, __retl_efault - .word 51b, __retl_efault, 52b, __retl_efault - .word 53b, __retl_efault, 54b, __retl_efault - .word 55b, __retl_efault, 56b, __retl_efault - .word 57b, __retl_efault, 58b, __retl_efault - .word 59b, __retl_efault, 60b, __retl_efault - .word 61b, __retl_efault, 62b, __retl_efault - .word 63b, __retl_efault, 64b, __retl_efault - .word 65b, __retl_efault, 66b, __retl_efault - .previous diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index ac6c281ccfe0..cfdfb3707c16 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -117,7 +117,7 @@ 90 common dup2 sys_dup2 91 32 setfsuid32 sys_setfsuid 92 common fcntl sys_fcntl compat_sys_fcntl -93 common select sys_select +93 common select sys_select compat_sys_select 94 32 setfsgid32 sys_setfsgid 95 common fsync sys_fsync 96 common setpriority sys_setpriority @@ -155,7 +155,7 @@ 123 32 fchown sys_fchown16 123 64 fchown sys_fchown 124 common fchmod sys_fchmod -125 common recvfrom sys_recvfrom +125 common recvfrom sys_recvfrom compat_sys_recvfrom 126 32 setreuid sys_setreuid16 126 64 setreuid sys_setreuid 127 32 setregid sys_setregid16 @@ -247,7 +247,7 @@ 204 32 readdir sys_old_readdir compat_sys_old_readdir 204 64 readdir sys_nis_syscall 205 common readahead sys_readahead compat_sys_readahead -206 common socketcall sys_socketcall sys32_socketcall +206 common socketcall sys_socketcall compat_sys_socketcall 207 common syslog sys_syslog 208 common lookup_dcookie sys_ni_syscall 209 common fadvise64 sys_fadvise64 compat_sys_fadvise64 @@ -461,7 +461,7 @@ 412 32 utimensat_time64 sys_utimensat sys_utimensat 413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64 414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents +416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend 419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 11c9b8efdc4c..ed0a5f2dc129 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -89,10 +89,6 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) cld - IBRS_ENTER - UNTRAIN_RET - CLEAR_BRANCH_HISTORY - /* * SYSENTER doesn't filter flags, so we need to clear NT and AC * ourselves. To save a few cycles, we can check whether @@ -116,6 +112,16 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) jnz .Lsysenter_fix_flags .Lsysenter_flags_fixed: + /* + * CPU bugs mitigations mechanisms can call other functions. They + * should be invoked after making sure TF is cleared because + * single-step is ignored only for instructions inside the + * entry_SYSENTER_compat function. + */ + IBRS_ENTER + UNTRAIN_RET + CLEAR_BRANCH_HISTORY + movq %rsp, %rdi call do_SYSENTER_32 jmp sysret32_from_system_call diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 7fd1f57ad3d3..d6ebcab1d8b2 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -420,7 +420,7 @@ 412 i386 utimensat_time64 sys_utimensat 413 i386 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64 414 i386 ppoll_time64 sys_ppoll compat_sys_ppoll_time64 -416 i386 io_pgetevents_time64 sys_io_pgetevents +416 i386 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64 417 i386 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64 418 i386 mq_timedsend_time64 sys_mq_timedsend 419 i386 mq_timedreceive_time64 sys_mq_timedreceive diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index ed2797f132ce..62cef2113ca7 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -93,10 +93,9 @@ static __always_inline bool __try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, \ asm volatile(ALTERNATIVE(_lock_loc \ "call cmpxchg8b_emu", \ - _lock "cmpxchg8b %[ptr]", X86_FEATURE_CX8) \ - : [ptr] "+m" (*(_ptr)), \ - "+a" (o.low), "+d" (o.high) \ - : "b" (n.low), "c" (n.high), "S" (_ptr) \ + _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \ + : "+a" (o.low), "+d" (o.high) \ + : "b" (n.low), "c" (n.high), [ptr] "S" (_ptr) \ : "memory"); \ \ o.full; \ @@ -122,12 +121,11 @@ static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64 \ asm volatile(ALTERNATIVE(_lock_loc \ "call cmpxchg8b_emu", \ - _lock "cmpxchg8b %[ptr]", X86_FEATURE_CX8) \ + _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \ CC_SET(e) \ : CC_OUT(e) (ret), \ - [ptr] "+m" (*(_ptr)), \ "+a" (o.low), "+d" (o.high) \ - : "b" (n.low), "c" (n.high), "S" (_ptr) \ + : "b" (n.low), "c" (n.high), [ptr] "S" (_ptr) \ : "memory"); \ \ if (unlikely(!ret)) \ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 1dc600fa3ba5..481096177500 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -401,7 +401,6 @@ extern int __init efi_memmap_alloc(unsigned int num_entries, struct efi_memory_map_data *data); extern void __efi_memmap_free(u64 phys, unsigned long size, unsigned long flags); -#define __efi_memmap_free __efi_memmap_free extern int __init efi_memmap_install(struct efi_memory_map_data *data); extern int __init efi_memmap_split_count(efi_memory_desc_t *md, diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index 7e523bb3d2d3..fb2809b20b0a 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -73,19 +73,16 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, #endif /* - * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), - * but not enough for x86 stack utilization comfort. To keep - * reasonable stack head room, reduce the maximum offset to 8 bits. - * - * The actual entropy will be further reduced by the compiler when - * applying stack alignment constraints (see cc_stack_align4/8 in + * This value will get limited by KSTACK_OFFSET_MAX(), which is 10 + * bits. The actual entropy will be further reduced by the compiler + * when applying stack alignment constraints (see cc_stack_align4/8 in * arch/x86/Makefile), which will remove the 3 (x86_64) or 2 (ia32) * low bits from any entropy chosen here. * - * Therefore, final stack offset entropy will be 5 (x86_64) or - * 6 (ia32) bits. + * Therefore, final stack offset entropy will be 7 (x86_64) or + * 8 (ia32) bits. */ - choose_random_kstack_offset(rdtsc() & 0xFF); + choose_random_kstack_offset(rdtsc()); } #define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h new file mode 100644 index 000000000000..24e3a53ca255 --- /dev/null +++ b/arch/x86/include/asm/runtime-const.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RUNTIME_CONST_H +#define _ASM_RUNTIME_CONST_H + +#define runtime_const_ptr(sym) ({ \ + typeof(sym) __ret; \ + asm_inline("mov %1,%0\n1:\n" \ + ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \ + ".long 1b - %c2 - .\n\t" \ + ".popsection" \ + :"=r" (__ret) \ + :"i" ((unsigned long)0x0123456789abcdefull), \ + "i" (sizeof(long))); \ + __ret; }) + +// The 'typeof' will create at _least_ a 32-bit type, but +// will happily also take a bigger type and the 'shrl' will +// clear the upper bits +#define runtime_const_shift_right_32(val, sym) ({ \ + typeof(0u+(val)) __ret = (val); \ + asm_inline("shrl $12,%k0\n1:\n" \ + ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \ + ".long 1b - 1 - .\n\t" \ + ".popsection" \ + :"+r" (__ret)); \ + __ret; }) + +#define runtime_const_init(type, sym) do { \ + extern s32 __start_runtime_##type##_##sym[]; \ + extern s32 __stop_runtime_##type##_##sym[]; \ + runtime_const_fixup(__runtime_fixup_##type, \ + (unsigned long)(sym), \ + __start_runtime_##type##_##sym, \ + __stop_runtime_##type##_##sym); \ +} while (0) + +/* + * The text patching is trivial - you can only do this at init time, + * when the text section hasn't been marked RO, and before the text + * has ever been executed. + */ +static inline void __runtime_fixup_ptr(void *where, unsigned long val) +{ + *(unsigned long *)where = val; +} + +static inline void __runtime_fixup_shift(void *where, unsigned long val) +{ + *(unsigned char *)where = val; +} + +static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), + unsigned long val, s32 *start, s32 *end) +{ + while (start < end) { + fn(*start + (void *)start, val); + start++; + } +} + +#endif diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h index e8d7d4941c4c..422a47746657 100644 --- a/arch/x86/include/asm/word-at-a-time.h +++ b/arch/x86/include/asm/word-at-a-time.h @@ -5,45 +5,12 @@ #include <linux/bitops.h> #include <linux/wordpart.h> -/* - * This is largely generic for little-endian machines, but the - * optimal byte mask counting is probably going to be something - * that is architecture-specific. If you have a reliably fast - * bit count instruction, that might be better than the multiply - * and shift, for example. - */ struct word_at_a_time { const unsigned long one_bits, high_bits; }; #define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } -#ifdef CONFIG_64BIT - -/* - * Jan Achrenius on G+: microoptimized version of - * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56" - * that works for the bytemasks without having to - * mask them first. - */ -static inline long count_masked_bytes(unsigned long mask) -{ - return mask*0x0001020304050608ul >> 56; -} - -#else /* 32-bit case */ - -/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ -static inline long count_masked_bytes(long mask) -{ - /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ - long a = (0x0ff0001+mask) >> 23; - /* Fix the 1 for 00 case */ - return a & mask; -} - -#endif - /* Return nonzero if it has a zero */ static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c) { @@ -57,6 +24,22 @@ static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, return bits; } +#ifdef CONFIG_64BIT + +/* Keep the initial has_zero() value for both bitmask and size calc */ +#define create_zero_mask(bits) (bits) + +static inline unsigned long zero_bytemask(unsigned long bits) +{ + bits = (bits - 1) & ~bits; + return bits >> 7; +} + +#define find_zero(bits) (__ffs(bits) >> 3) + +#else + +/* Create the final mask for both bytemask and size */ static inline unsigned long create_zero_mask(unsigned long bits) { bits = (bits - 1) & ~bits; @@ -66,11 +49,17 @@ static inline unsigned long create_zero_mask(unsigned long bits) /* The mask we created is directly usable as a bytemask */ #define zero_bytemask(mask) (mask) +/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ static inline unsigned long find_zero(unsigned long mask) { - return count_masked_bytes(mask); + /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ + long a = (0x0ff0001+mask) >> 23; + /* Fix the 1 for 00 case */ + return a & mask; } +#endif + /* * Load an unaligned word from kernel space. * diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 2345e6836593..366f496ca3ce 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -519,7 +519,8 @@ void free_rmid(u32 closid, u32 rmid) * allows architectures that ignore the closid parameter to avoid an * unnecessary check. */ - if (idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID, + if (!resctrl_arch_mon_capable() || + idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID, RESCTRL_RESERVED_RMID)) return; diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index e42faa792c07..52e1f3f0b361 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -27,25 +27,7 @@ unsigned long profile_pc(struct pt_regs *regs) { - unsigned long pc = instruction_pointer(regs); - - if (!user_mode(regs) && in_lock_functions(pc)) { -#ifdef CONFIG_FRAME_POINTER - return *(unsigned long *)(regs->bp + sizeof(long)); -#else - unsigned long *sp = (unsigned long *)regs->sp; - /* - * Return address is either directly at stack pointer - * or above a saved flags. Eflags has bits 22-31 zero, - * kernel addresses don't. - */ - if (sp[0] >> 22) - return sp[0]; - if (sp[1] >> 22) - return sp[1]; -#endif - } - return pc; + return instruction_pointer(regs); } EXPORT_SYMBOL(profile_pc); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 3509afc6a672..6e73403e874f 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -357,6 +357,9 @@ SECTIONS PERCPU_SECTION(INTERNODE_CACHE_BYTES) #endif + RUNTIME_CONST(shift, d_hash_shift) + RUNTIME_CONST(ptr, dentry_hashtable) + . = ALIGN(PAGE_SIZE); /* freed after init ends here */ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 296c524988f9..c95d3900fe56 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2843,7 +2843,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (sev_es_prevent_msr_access(vcpu, msr_info)) { msr_info->data = 0; - return -EINVAL; + return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0; } switch (msr_info->index) { @@ -2998,7 +2998,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) u64 data = msr->data; if (sev_es_prevent_msr_access(vcpu, msr)) - return -EINVAL; + return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0; switch (ecx) { case MSR_AMD64_TSC_RATIO: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8c9e4281d978..0763a0f72a06 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10718,13 +10718,12 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256); + static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu); + if (irqchip_split(vcpu->kvm)) kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors); - else { - static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu); - if (ioapic_in_kernel(vcpu->kvm)) - kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); - } + else if (ioapic_in_kernel(vcpu->kvm)) + kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); if (is_guest_mode(vcpu)) vcpu->arch.load_eoi_exitmap_pending = true; diff --git a/arch/x86/platform/efi/memmap.c b/arch/x86/platform/efi/memmap.c index 4ef20b49eb5e..6ed1935504b9 100644 --- a/arch/x86/platform/efi/memmap.c +++ b/arch/x86/platform/efi/memmap.c @@ -92,12 +92,22 @@ int __init efi_memmap_alloc(unsigned int num_entries, */ int __init efi_memmap_install(struct efi_memory_map_data *data) { + unsigned long size = efi.memmap.desc_size * efi.memmap.nr_map; + unsigned long flags = efi.memmap.flags; + u64 phys = efi.memmap.phys_map; + int ret; + efi_memmap_unmap(); if (efi_enabled(EFI_PARAVIRT)) return 0; - return __efi_memmap_init(data); + ret = __efi_memmap_init(data); + if (ret) + return ret; + + __efi_memmap_free(phys, size, flags); + return 0; } /** diff --git a/arch/xtensa/include/asm/current.h b/arch/xtensa/include/asm/current.h index 08010dbf5e09..df275d554788 100644 --- a/arch/xtensa/include/asm/current.h +++ b/arch/xtensa/include/asm/current.h @@ -19,7 +19,7 @@ struct task_struct; -static inline struct task_struct *get_current(void) +static __always_inline struct task_struct *get_current(void) { return current_thread_info()->task; } diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h index 326db1c1d5d8..e0dffcc43b9e 100644 --- a/arch/xtensa/include/asm/thread_info.h +++ b/arch/xtensa/include/asm/thread_info.h @@ -91,7 +91,7 @@ struct thread_info { } /* how to get the thread information struct from C */ -static inline struct thread_info *current_thread_info(void) +static __always_inline struct thread_info *current_thread_info(void) { struct thread_info *ti; __asm__("extui %0, a1, 0, "__stringify(CURRENT_SHIFT)"\n\t" diff --git a/drivers/acpi/acpica/exregion.c b/drivers/acpi/acpica/exregion.c index 8907b8bf4267..c49b9f8de723 100644 --- a/drivers/acpi/acpica/exregion.c +++ b/drivers/acpi/acpica/exregion.c @@ -44,7 +44,6 @@ acpi_ex_system_memory_space_handler(u32 function, struct acpi_mem_mapping *mm = mem_info->cur_mm; u32 length; acpi_size map_length; - acpi_size page_boundary_map_length; #ifdef ACPI_MISALIGNMENT_NOT_SUPPORTED u32 remainder; #endif @@ -138,26 +137,8 @@ acpi_ex_system_memory_space_handler(u32 function, map_length = (acpi_size) ((mem_info->address + mem_info->length) - address); - /* - * If mapping the entire remaining portion of the region will cross - * a page boundary, just map up to the page boundary, do not cross. - * On some systems, crossing a page boundary while mapping regions - * can cause warnings if the pages have different attributes - * due to resource management. - * - * This has the added benefit of constraining a single mapping to - * one page, which is similar to the original code that used a 4k - * maximum window. - */ - page_boundary_map_length = (acpi_size) - (ACPI_ROUND_UP(address, ACPI_DEFAULT_PAGE_SIZE) - address); - if (page_boundary_map_length == 0) { - page_boundary_map_length = ACPI_DEFAULT_PAGE_SIZE; - } - - if (map_length > page_boundary_map_length) { - map_length = page_boundary_map_length; - } + if (map_length > ACPI_DEFAULT_PAGE_SIZE) + map_length = ACPI_DEFAULT_PAGE_SIZE; /* Create a new mapping starting at the address given */ diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 2a0e9fc7b74c..601b670356e5 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -302,6 +302,10 @@ void acpi_mipi_check_crs_csi2(acpi_handle handle); void acpi_mipi_scan_crs_csi2(void); void acpi_mipi_init_crs_csi2_swnodes(void); void acpi_mipi_crs_csi2_cleanup(void); +#ifdef CONFIG_X86 bool acpi_graph_ignore_port(acpi_handle handle); +#else +static inline bool acpi_graph_ignore_port(acpi_handle handle) { return false; } +#endif #endif /* _ACPI_INTERNAL_H_ */ diff --git a/drivers/acpi/mipi-disco-img.c b/drivers/acpi/mipi-disco-img.c index d05413a0672a..92b658f92dc0 100644 --- a/drivers/acpi/mipi-disco-img.c +++ b/drivers/acpi/mipi-disco-img.c @@ -725,14 +725,20 @@ void acpi_mipi_crs_csi2_cleanup(void) acpi_mipi_del_crs_csi2(csi2); } -static const struct dmi_system_id dmi_ignore_port_nodes[] = { - { - .matches = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "XPS 9315"), - }, - }, - { } +#ifdef CONFIG_X86 +#include <asm/cpu_device_id.h> +#include <asm/intel-family.h> + +/* CPU matches for Dell generations with broken ACPI MIPI DISCO info */ +static const struct x86_cpu_id dell_broken_mipi_disco_cpu_gens[] = { + X86_MATCH_VFM(INTEL_TIGERLAKE, NULL), + X86_MATCH_VFM(INTEL_TIGERLAKE_L, NULL), + X86_MATCH_VFM(INTEL_ALDERLAKE, NULL), + X86_MATCH_VFM(INTEL_ALDERLAKE_L, NULL), + X86_MATCH_VFM(INTEL_RAPTORLAKE, NULL), + X86_MATCH_VFM(INTEL_RAPTORLAKE_P, NULL), + X86_MATCH_VFM(INTEL_RAPTORLAKE_S, NULL), + {} }; static const char *strnext(const char *s1, const char *s2) @@ -761,7 +767,10 @@ bool acpi_graph_ignore_port(acpi_handle handle) static bool dmi_tested, ignore_port; if (!dmi_tested) { - ignore_port = dmi_first_match(dmi_ignore_port_nodes); + if (dmi_name_in_vendors("Dell Inc.") && + x86_match_cpu(dell_broken_mipi_disco_cpu_gens)) + ignore_port = true; + dmi_tested = true; } @@ -794,3 +803,4 @@ out_free: kfree(orig_path); return false; } +#endif diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index bd6a7857ce05..831fa4a12159 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -16,7 +16,6 @@ #include <linux/acpi.h> #include <linux/dmi.h> #include <linux/sched.h> /* need_resched() */ -#include <linux/sort.h> #include <linux/tick.h> #include <linux/cpuidle.h> #include <linux/cpu.h> @@ -386,25 +385,24 @@ static void acpi_processor_power_verify_c3(struct acpi_processor *pr, acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, 1); } -static int acpi_cst_latency_cmp(const void *a, const void *b) +static void acpi_cst_latency_sort(struct acpi_processor_cx *states, size_t length) { - const struct acpi_processor_cx *x = a, *y = b; + int i, j, k; - if (!(x->valid && y->valid)) - return 0; - if (x->latency > y->latency) - return 1; - if (x->latency < y->latency) - return -1; - return 0; -} -static void acpi_cst_latency_swap(void *a, void *b, int n) -{ - struct acpi_processor_cx *x = a, *y = b; + for (i = 1; i < length; i++) { + if (!states[i].valid) + continue; - if (!(x->valid && y->valid)) - return; - swap(x->latency, y->latency); + for (j = i - 1, k = i; j >= 0; j--) { + if (!states[j].valid) + continue; + + if (states[j].latency > states[k].latency) + swap(states[j].latency, states[k].latency); + + k = j; + } + } } static int acpi_processor_power_verify(struct acpi_processor *pr) @@ -449,10 +447,7 @@ static int acpi_processor_power_verify(struct acpi_processor *pr) if (buggy_latency) { pr_notice("FW issue: working around C-state latencies out of order\n"); - sort(&pr->power.states[1], max_cstate, - sizeof(struct acpi_processor_cx), - acpi_cst_latency_cmp, - acpi_cst_latency_swap); + acpi_cst_latency_sort(&pr->power.states[1], max_cstate); } lapic_timer_propagate_broadcast(pr); diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 07d66d2c5f0d..fc6fd583faf8 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1735,6 +1735,14 @@ static void ahci_update_initial_lpm_policy(struct ata_port *ap) if (ap->pflags & ATA_PFLAG_EXTERNAL) return; + /* If no LPM states are supported by the HBA, do not bother with LPM */ + if ((ap->host->flags & ATA_HOST_NO_PART) && + (ap->host->flags & ATA_HOST_NO_SSC) && + (ap->host->flags & ATA_HOST_NO_DEVSLP)) { + ata_port_dbg(ap, "no LPM states supported, not enabling LPM\n"); + return; + } + /* user modified policy via module param */ if (mobile_lpm_policy != -1) { policy = mobile_lpm_policy; @@ -1967,8 +1975,10 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) n_ports = max(ahci_nr_ports(hpriv->cap), fls(hpriv->port_map)); host = ata_host_alloc_pinfo(&pdev->dev, ppi, n_ports); - if (!host) - return -ENOMEM; + if (!host) { + rc = -ENOMEM; + goto err_rm_sysfs_file; + } host->private_data = hpriv; if (ahci_init_msi(pdev, n_ports, hpriv) < 0) { @@ -2023,11 +2033,11 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* initialize adapter */ rc = ahci_configure_dma_masks(pdev, hpriv); if (rc) - return rc; + goto err_rm_sysfs_file; rc = ahci_pci_reset_controller(host); if (rc) - return rc; + goto err_rm_sysfs_file; ahci_pci_init_controller(host); ahci_pci_print_info(host); @@ -2036,10 +2046,15 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rc = ahci_host_activate(host, &ahci_sht); if (rc) - return rc; + goto err_rm_sysfs_file; pm_runtime_put_noidle(&pdev->dev); return 0; + +err_rm_sysfs_file: + sysfs_remove_file_from_group(&pdev->dev.kobj, + &dev_attr_remapped_nvme.attr, NULL); + return rc; } static void ahci_shutdown_one(struct pci_dev *pdev) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index e1bf8a19b3c8..74b59b78d278 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4137,8 +4137,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "PIONEER BD-RW BDR-205", NULL, ATA_HORKAGE_NOLPM }, /* Crucial devices with broken LPM support */ - { "CT500BX100SSD1", NULL, ATA_HORKAGE_NOLPM }, - { "CT240BX500SSD1", NULL, ATA_HORKAGE_NOLPM }, + { "CT*0BX*00SSD1", NULL, ATA_HORKAGE_NOLPM }, /* 512GB MX100 with MU01 firmware has both queued TRIM and LPM issues */ { "Crucial_CT512MX100*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | @@ -5490,6 +5489,18 @@ struct ata_port *ata_port_alloc(struct ata_host *host) return ap; } +void ata_port_free(struct ata_port *ap) +{ + if (!ap) + return; + + kfree(ap->pmp_link); + kfree(ap->slave_link); + kfree(ap->ncq_sense_buf); + kfree(ap); +} +EXPORT_SYMBOL_GPL(ata_port_free); + static void ata_devres_release(struct device *gendev, void *res) { struct ata_host *host = dev_get_drvdata(gendev); @@ -5516,12 +5527,7 @@ static void ata_host_release(struct kref *kref) int i; for (i = 0; i < host->n_ports; i++) { - struct ata_port *ap = host->ports[i]; - - kfree(ap->pmp_link); - kfree(ap->slave_link); - kfree(ap->ncq_sense_buf); - kfree(ap); + ata_port_free(host->ports[i]); host->ports[i] = NULL; } kfree(host); @@ -5571,8 +5577,10 @@ struct ata_host *ata_host_alloc(struct device *dev, int max_ports) if (!host) return NULL; - if (!devres_open_group(dev, NULL, GFP_KERNEL)) - goto err_free; + if (!devres_open_group(dev, NULL, GFP_KERNEL)) { + kfree(host); + return NULL; + } dr = devres_alloc(ata_devres_release, 0, GFP_KERNEL); if (!dr) @@ -5604,8 +5612,6 @@ struct ata_host *ata_host_alloc(struct device *dev, int max_ports) err_out: devres_release_group(dev, NULL); - err_free: - kfree(host); return NULL; } EXPORT_SYMBOL_GPL(ata_host_alloc); @@ -5904,7 +5910,7 @@ int ata_host_register(struct ata_host *host, const struct scsi_host_template *sh * allocation time. */ for (i = host->n_ports; host->ports[i]; i++) - kfree(host->ports[i]); + ata_port_free(host->ports[i]); /* give ports names and add SCSI hosts */ for (i = 0; i < host->n_ports; i++) { diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 5b6805d87fcf..dd3c0626c72d 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -382,7 +382,7 @@ static int btintel_pcie_recv_frame(struct btintel_pcie_data *data, /* The first 4 bytes indicates the Intel PCIe specific packet type */ pdata = skb_pull_data(skb, BTINTEL_PCIE_HCI_TYPE_LEN); - if (!data) { + if (!pdata) { bt_dev_err(hdev, "Corrupted packet received"); ret = -EILSEQ; goto exit_error; diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c index 9d0c7e278114..9bfa9a6ad56c 100644 --- a/drivers/bluetooth/btnxpuart.c +++ b/drivers/bluetooth/btnxpuart.c @@ -281,7 +281,7 @@ static u8 crc8_table[CRC8_TABLE_SIZE]; /* Default configurations */ #define DEFAULT_H2C_WAKEUP_MODE WAKEUP_METHOD_BREAK -#define DEFAULT_PS_MODE PS_MODE_DISABLE +#define DEFAULT_PS_MODE PS_MODE_ENABLE #define FW_INIT_BAUDRATE HCI_NXP_PRI_BAUDRATE static struct sk_buff *nxp_drv_send_cmd(struct hci_dev *hdev, u16 opcode, diff --git a/drivers/bluetooth/hci_bcm4377.c b/drivers/bluetooth/hci_bcm4377.c index 0c2f15235b4c..d90858ea2fe5 100644 --- a/drivers/bluetooth/hci_bcm4377.c +++ b/drivers/bluetooth/hci_bcm4377.c @@ -495,6 +495,10 @@ struct bcm4377_data; * extended scanning * broken_mws_transport_config: Set to true if the chip erroneously claims to * support MWS Transport Configuration + * broken_le_ext_adv_report_phy: Set to true if this chip stuffs flags inside + * reserved bits of Primary/Secondary_PHY inside + * LE Extended Advertising Report events which + * have to be ignored * send_calibration: Optional callback to send calibration data * send_ptb: Callback to send "PTB" regulatory/calibration data */ @@ -513,6 +517,7 @@ struct bcm4377_hw { unsigned long broken_ext_scan : 1; unsigned long broken_mws_transport_config : 1; unsigned long broken_le_coded : 1; + unsigned long broken_le_ext_adv_report_phy : 1; int (*send_calibration)(struct bcm4377_data *bcm4377); int (*send_ptb)(struct bcm4377_data *bcm4377, @@ -716,7 +721,7 @@ static void bcm4377_handle_ack(struct bcm4377_data *bcm4377, ring->events[msgid] = NULL; } - bitmap_release_region(ring->msgids, msgid, ring->n_entries); + bitmap_release_region(ring->msgids, msgid, 0); unlock: spin_unlock_irqrestore(&ring->lock, flags); @@ -2373,6 +2378,8 @@ static int bcm4377_probe(struct pci_dev *pdev, const struct pci_device_id *id) set_bit(HCI_QUIRK_BROKEN_EXT_SCAN, &hdev->quirks); if (bcm4377->hw->broken_le_coded) set_bit(HCI_QUIRK_BROKEN_LE_CODED, &hdev->quirks); + if (bcm4377->hw->broken_le_ext_adv_report_phy) + set_bit(HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY, &hdev->quirks); pci_set_drvdata(pdev, bcm4377); hci_set_drvdata(hdev, bcm4377); @@ -2477,6 +2484,7 @@ static const struct bcm4377_hw bcm4377_hw_variants[] = { .clear_pciecfg_subsystem_ctrl_bit19 = true, .broken_mws_transport_config = true, .broken_le_coded = true, + .broken_le_ext_adv_report_phy = true, .send_calibration = bcm4387_send_calibration, .send_ptb = bcm4378_send_ptb, }, diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 0c9c9ee56592..9a0bc86f9aac 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2450,15 +2450,27 @@ static void qca_serdev_shutdown(struct device *dev) struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev); struct hci_uart *hu = &qcadev->serdev_hu; struct hci_dev *hdev = hu->hdev; - struct qca_data *qca = hu->priv; const u8 ibs_wake_cmd[] = { 0xFD }; const u8 edl_reset_soc_cmd[] = { 0x01, 0x00, 0xFC, 0x01, 0x05 }; if (qcadev->btsoc_type == QCA_QCA6390) { - if (test_bit(QCA_BT_OFF, &qca->flags) || - !test_bit(HCI_RUNNING, &hdev->flags)) + /* The purpose of sending the VSC is to reset SOC into a initial + * state and the state will ensure next hdev->setup() success. + * if HCI_QUIRK_NON_PERSISTENT_SETUP is set, it means that + * hdev->setup() can do its job regardless of SoC state, so + * don't need to send the VSC. + * if HCI_SETUP is set, it means that hdev->setup() was never + * invoked and the SOC is already in the initial state, so + * don't also need to send the VSC. + */ + if (test_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks) || + hci_dev_test_flag(hdev, HCI_SETUP)) return; + /* The serdev must be in open state when conrol logic arrives + * here, so also fix the use-after-free issue caused by that + * the serdev is flushed or wrote after it is closed. + */ serdev_device_write_flush(serdev); ret = serdev_device_write_buf(serdev, ibs_wake_cmd, sizeof(ibs_wake_cmd)); diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index d51fc8321d41..da32e8ed0830 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -269,8 +269,13 @@ hpet_read(struct file *file, char __user *buf, size_t count, loff_t * ppos) if (!devp->hd_ireqfreq) return -EIO; - if (count < sizeof(unsigned long)) - return -EINVAL; + if (in_compat_syscall()) { + if (count < sizeof(compat_ulong_t)) + return -EINVAL; + } else { + if (count < sizeof(unsigned long)) + return -EINVAL; + } add_wait_queue(&devp->hd_waitqueue, &wait); @@ -294,9 +299,16 @@ hpet_read(struct file *file, char __user *buf, size_t count, loff_t * ppos) schedule(); } - retval = put_user(data, (unsigned long __user *)buf); - if (!retval) - retval = sizeof(unsigned long); + if (in_compat_syscall()) { + retval = put_user(data, (compat_ulong_t __user *)buf); + if (!retval) + retval = sizeof(compat_ulong_t); + } else { + retval = put_user(data, (unsigned long __user *)buf); + if (!retval) + retval = sizeof(unsigned long); + } + out: __set_current_state(TASK_RUNNING); remove_wait_queue(&devp->hd_waitqueue, &wait); @@ -651,12 +663,24 @@ struct compat_hpet_info { unsigned short hi_timer; }; +/* 32-bit types would lead to different command codes which should be + * translated into 64-bit ones before passed to hpet_ioctl_common + */ +#define COMPAT_HPET_INFO _IOR('h', 0x03, struct compat_hpet_info) +#define COMPAT_HPET_IRQFREQ _IOW('h', 0x6, compat_ulong_t) + static long hpet_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct hpet_info info; int err; + if (cmd == COMPAT_HPET_INFO) + cmd = HPET_INFO; + + if (cmd == COMPAT_HPET_IRQFREQ) + cmd = HPET_IRQFREQ; + mutex_lock(&hpet_mutex); err = hpet_ioctl_common(file->private_data, cmd, arg, &info); mutex_unlock(&hpet_mutex); diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile index 4c695b0388f3..9bb142c75243 100644 --- a/drivers/char/tpm/Makefile +++ b/drivers/char/tpm/Makefile @@ -16,8 +16,8 @@ tpm-y += eventlog/common.o tpm-y += eventlog/tpm1.o tpm-y += eventlog/tpm2.o tpm-y += tpm-buf.o +tpm-y += tpm2-sessions.o -tpm-$(CONFIG_TCG_TPM2_HMAC) += tpm2-sessions.o tpm-$(CONFIG_ACPI) += tpm_ppi.o eventlog/acpi.o tpm-$(CONFIG_EFI) += eventlog/efi.o tpm-$(CONFIG_OF) += eventlog/of.o diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c index 907ac9956a78..2281d55df545 100644 --- a/drivers/char/tpm/tpm2-sessions.c +++ b/drivers/char/tpm/tpm2-sessions.c @@ -83,9 +83,6 @@ #define AES_KEY_BYTES AES_KEYSIZE_128 #define AES_KEY_BITS (AES_KEY_BYTES*8) -static int tpm2_create_primary(struct tpm_chip *chip, u32 hierarchy, - u32 *handle, u8 *name); - /* * This is the structure that carries all the auth information (like * session handle, nonces, session key and auth) from use to use it is @@ -148,6 +145,7 @@ struct tpm2_auth { u8 name[AUTH_MAX_NAMES][2 + SHA512_DIGEST_SIZE]; }; +#ifdef CONFIG_TCG_TPM2_HMAC /* * Name Size based on TPM algorithm (assumes no hash bigger than 255) */ @@ -163,6 +161,226 @@ static u8 name_size(const u8 *name) return size_map[alg] + 2; } +static int tpm2_parse_read_public(char *name, struct tpm_buf *buf) +{ + struct tpm_header *head = (struct tpm_header *)buf->data; + off_t offset = TPM_HEADER_SIZE; + u32 tot_len = be32_to_cpu(head->length); + u32 val; + + /* we're starting after the header so adjust the length */ + tot_len -= TPM_HEADER_SIZE; + + /* skip public */ + val = tpm_buf_read_u16(buf, &offset); + if (val > tot_len) + return -EINVAL; + offset += val; + /* name */ + val = tpm_buf_read_u16(buf, &offset); + if (val != name_size(&buf->data[offset])) + return -EINVAL; + memcpy(name, &buf->data[offset], val); + /* forget the rest */ + return 0; +} + +static int tpm2_read_public(struct tpm_chip *chip, u32 handle, char *name) +{ + struct tpm_buf buf; + int rc; + + rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_READ_PUBLIC); + if (rc) + return rc; + + tpm_buf_append_u32(&buf, handle); + rc = tpm_transmit_cmd(chip, &buf, 0, "read public"); + if (rc == TPM2_RC_SUCCESS) + rc = tpm2_parse_read_public(name, &buf); + + tpm_buf_destroy(&buf); + + return rc; +} +#endif /* CONFIG_TCG_TPM2_HMAC */ + +/** + * tpm_buf_append_name() - add a handle area to the buffer + * @chip: the TPM chip structure + * @buf: The buffer to be appended + * @handle: The handle to be appended + * @name: The name of the handle (may be NULL) + * + * In order to compute session HMACs, we need to know the names of the + * objects pointed to by the handles. For most objects, this is simply + * the actual 4 byte handle or an empty buf (in these cases @name + * should be NULL) but for volatile objects, permanent objects and NV + * areas, the name is defined as the hash (according to the name + * algorithm which should be set to sha256) of the public area to + * which the two byte algorithm id has been appended. For these + * objects, the @name pointer should point to this. If a name is + * required but @name is NULL, then TPM2_ReadPublic() will be called + * on the handle to obtain the name. + * + * As with most tpm_buf operations, success is assumed because failure + * will be caused by an incorrect programming model and indicated by a + * kernel message. + */ +void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf, + u32 handle, u8 *name) +{ +#ifdef CONFIG_TCG_TPM2_HMAC + enum tpm2_mso_type mso = tpm2_handle_mso(handle); + struct tpm2_auth *auth; + int slot; +#endif + + if (!tpm2_chip_auth(chip)) { + tpm_buf_append_u32(buf, handle); + /* count the number of handles in the upper bits of flags */ + buf->handles++; + return; + } + +#ifdef CONFIG_TCG_TPM2_HMAC + slot = (tpm_buf_length(buf) - TPM_HEADER_SIZE) / 4; + if (slot >= AUTH_MAX_NAMES) { + dev_err(&chip->dev, "TPM: too many handles\n"); + return; + } + auth = chip->auth; + WARN(auth->session != tpm_buf_length(buf), + "name added in wrong place\n"); + tpm_buf_append_u32(buf, handle); + auth->session += 4; + + if (mso == TPM2_MSO_PERSISTENT || + mso == TPM2_MSO_VOLATILE || + mso == TPM2_MSO_NVRAM) { + if (!name) + tpm2_read_public(chip, handle, auth->name[slot]); + } else { + if (name) + dev_err(&chip->dev, "TPM: Handle does not require name but one is specified\n"); + } + + auth->name_h[slot] = handle; + if (name) + memcpy(auth->name[slot], name, name_size(name)); +#endif +} +EXPORT_SYMBOL_GPL(tpm_buf_append_name); + +/** + * tpm_buf_append_hmac_session() - Append a TPM session element + * @chip: the TPM chip structure + * @buf: The buffer to be appended + * @attributes: The session attributes + * @passphrase: The session authority (NULL if none) + * @passphrase_len: The length of the session authority (0 if none) + * + * This fills in a session structure in the TPM command buffer, except + * for the HMAC which cannot be computed until the command buffer is + * complete. The type of session is controlled by the @attributes, + * the main ones of which are TPM2_SA_CONTINUE_SESSION which means the + * session won't terminate after tpm_buf_check_hmac_response(), + * TPM2_SA_DECRYPT which means this buffers first parameter should be + * encrypted with a session key and TPM2_SA_ENCRYPT, which means the + * response buffer's first parameter needs to be decrypted (confusing, + * but the defines are written from the point of view of the TPM). + * + * Any session appended by this command must be finalized by calling + * tpm_buf_fill_hmac_session() otherwise the HMAC will be incorrect + * and the TPM will reject the command. + * + * As with most tpm_buf operations, success is assumed because failure + * will be caused by an incorrect programming model and indicated by a + * kernel message. + */ +void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf, + u8 attributes, u8 *passphrase, + int passphrase_len) +{ +#ifdef CONFIG_TCG_TPM2_HMAC + u8 nonce[SHA256_DIGEST_SIZE]; + struct tpm2_auth *auth; + u32 len; +#endif + + if (!tpm2_chip_auth(chip)) { + /* offset tells us where the sessions area begins */ + int offset = buf->handles * 4 + TPM_HEADER_SIZE; + u32 len = 9 + passphrase_len; + + if (tpm_buf_length(buf) != offset) { + /* not the first session so update the existing length */ + len += get_unaligned_be32(&buf->data[offset]); + put_unaligned_be32(len, &buf->data[offset]); + } else { + tpm_buf_append_u32(buf, len); + } + /* auth handle */ + tpm_buf_append_u32(buf, TPM2_RS_PW); + /* nonce */ + tpm_buf_append_u16(buf, 0); + /* attributes */ + tpm_buf_append_u8(buf, 0); + /* passphrase */ + tpm_buf_append_u16(buf, passphrase_len); + tpm_buf_append(buf, passphrase, passphrase_len); + return; + } + +#ifdef CONFIG_TCG_TPM2_HMAC + /* + * The Architecture Guide requires us to strip trailing zeros + * before computing the HMAC + */ + while (passphrase && passphrase_len > 0 && passphrase[passphrase_len - 1] == '\0') + passphrase_len--; + + auth = chip->auth; + auth->attrs = attributes; + auth->passphrase_len = passphrase_len; + if (passphrase_len) + memcpy(auth->passphrase, passphrase, passphrase_len); + + if (auth->session != tpm_buf_length(buf)) { + /* we're not the first session */ + len = get_unaligned_be32(&buf->data[auth->session]); + if (4 + len + auth->session != tpm_buf_length(buf)) { + WARN(1, "session length mismatch, cannot append"); + return; + } + + /* add our new session */ + len += 9 + 2 * SHA256_DIGEST_SIZE; + put_unaligned_be32(len, &buf->data[auth->session]); + } else { + tpm_buf_append_u32(buf, 9 + 2 * SHA256_DIGEST_SIZE); + } + + /* random number for our nonce */ + get_random_bytes(nonce, sizeof(nonce)); + memcpy(auth->our_nonce, nonce, sizeof(nonce)); + tpm_buf_append_u32(buf, auth->handle); + /* our new nonce */ + tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE); + tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE); + tpm_buf_append_u8(buf, auth->attrs); + /* and put a placeholder for the hmac */ + tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE); + tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE); +#endif +} +EXPORT_SYMBOL_GPL(tpm_buf_append_hmac_session); + +#ifdef CONFIG_TCG_TPM2_HMAC + +static int tpm2_create_primary(struct tpm_chip *chip, u32 hierarchy, + u32 *handle, u8 *name); + /* * It turns out the crypto hmac(sha256) is hard for us to consume * because it assumes a fixed key and the TPM seems to change the key @@ -344,82 +562,6 @@ static void tpm_buf_append_salt(struct tpm_buf *buf, struct tpm_chip *chip) } /** - * tpm_buf_append_hmac_session() - Append a TPM session element - * @chip: the TPM chip structure - * @buf: The buffer to be appended - * @attributes: The session attributes - * @passphrase: The session authority (NULL if none) - * @passphrase_len: The length of the session authority (0 if none) - * - * This fills in a session structure in the TPM command buffer, except - * for the HMAC which cannot be computed until the command buffer is - * complete. The type of session is controlled by the @attributes, - * the main ones of which are TPM2_SA_CONTINUE_SESSION which means the - * session won't terminate after tpm_buf_check_hmac_response(), - * TPM2_SA_DECRYPT which means this buffers first parameter should be - * encrypted with a session key and TPM2_SA_ENCRYPT, which means the - * response buffer's first parameter needs to be decrypted (confusing, - * but the defines are written from the point of view of the TPM). - * - * Any session appended by this command must be finalized by calling - * tpm_buf_fill_hmac_session() otherwise the HMAC will be incorrect - * and the TPM will reject the command. - * - * As with most tpm_buf operations, success is assumed because failure - * will be caused by an incorrect programming model and indicated by a - * kernel message. - */ -void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf, - u8 attributes, u8 *passphrase, - int passphrase_len) -{ - u8 nonce[SHA256_DIGEST_SIZE]; - u32 len; - struct tpm2_auth *auth = chip->auth; - - /* - * The Architecture Guide requires us to strip trailing zeros - * before computing the HMAC - */ - while (passphrase && passphrase_len > 0 - && passphrase[passphrase_len - 1] == '\0') - passphrase_len--; - - auth->attrs = attributes; - auth->passphrase_len = passphrase_len; - if (passphrase_len) - memcpy(auth->passphrase, passphrase, passphrase_len); - - if (auth->session != tpm_buf_length(buf)) { - /* we're not the first session */ - len = get_unaligned_be32(&buf->data[auth->session]); - if (4 + len + auth->session != tpm_buf_length(buf)) { - WARN(1, "session length mismatch, cannot append"); - return; - } - - /* add our new session */ - len += 9 + 2 * SHA256_DIGEST_SIZE; - put_unaligned_be32(len, &buf->data[auth->session]); - } else { - tpm_buf_append_u32(buf, 9 + 2 * SHA256_DIGEST_SIZE); - } - - /* random number for our nonce */ - get_random_bytes(nonce, sizeof(nonce)); - memcpy(auth->our_nonce, nonce, sizeof(nonce)); - tpm_buf_append_u32(buf, auth->handle); - /* our new nonce */ - tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE); - tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE); - tpm_buf_append_u8(buf, auth->attrs); - /* and put a placeholder for the hmac */ - tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE); - tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE); -} -EXPORT_SYMBOL(tpm_buf_append_hmac_session); - -/** * tpm_buf_fill_hmac_session() - finalize the session HMAC * @chip: the TPM chip structure * @buf: The buffer to be appended @@ -449,6 +591,9 @@ void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf) u8 cphash[SHA256_DIGEST_SIZE]; struct sha256_state sctx; + if (!auth) + return; + /* save the command code in BE format */ auth->ordinal = head->ordinal; @@ -567,104 +712,6 @@ void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf) } EXPORT_SYMBOL(tpm_buf_fill_hmac_session); -static int tpm2_parse_read_public(char *name, struct tpm_buf *buf) -{ - struct tpm_header *head = (struct tpm_header *)buf->data; - off_t offset = TPM_HEADER_SIZE; - u32 tot_len = be32_to_cpu(head->length); - u32 val; - - /* we're starting after the header so adjust the length */ - tot_len -= TPM_HEADER_SIZE; - - /* skip public */ - val = tpm_buf_read_u16(buf, &offset); - if (val > tot_len) - return -EINVAL; - offset += val; - /* name */ - val = tpm_buf_read_u16(buf, &offset); - if (val != name_size(&buf->data[offset])) - return -EINVAL; - memcpy(name, &buf->data[offset], val); - /* forget the rest */ - return 0; -} - -static int tpm2_read_public(struct tpm_chip *chip, u32 handle, char *name) -{ - struct tpm_buf buf; - int rc; - - rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_READ_PUBLIC); - if (rc) - return rc; - - tpm_buf_append_u32(&buf, handle); - rc = tpm_transmit_cmd(chip, &buf, 0, "read public"); - if (rc == TPM2_RC_SUCCESS) - rc = tpm2_parse_read_public(name, &buf); - - tpm_buf_destroy(&buf); - - return rc; -} - -/** - * tpm_buf_append_name() - add a handle area to the buffer - * @chip: the TPM chip structure - * @buf: The buffer to be appended - * @handle: The handle to be appended - * @name: The name of the handle (may be NULL) - * - * In order to compute session HMACs, we need to know the names of the - * objects pointed to by the handles. For most objects, this is simply - * the actual 4 byte handle or an empty buf (in these cases @name - * should be NULL) but for volatile objects, permanent objects and NV - * areas, the name is defined as the hash (according to the name - * algorithm which should be set to sha256) of the public area to - * which the two byte algorithm id has been appended. For these - * objects, the @name pointer should point to this. If a name is - * required but @name is NULL, then TPM2_ReadPublic() will be called - * on the handle to obtain the name. - * - * As with most tpm_buf operations, success is assumed because failure - * will be caused by an incorrect programming model and indicated by a - * kernel message. - */ -void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf, - u32 handle, u8 *name) -{ - enum tpm2_mso_type mso = tpm2_handle_mso(handle); - struct tpm2_auth *auth = chip->auth; - int slot; - - slot = (tpm_buf_length(buf) - TPM_HEADER_SIZE)/4; - if (slot >= AUTH_MAX_NAMES) { - dev_err(&chip->dev, "TPM: too many handles\n"); - return; - } - WARN(auth->session != tpm_buf_length(buf), - "name added in wrong place\n"); - tpm_buf_append_u32(buf, handle); - auth->session += 4; - - if (mso == TPM2_MSO_PERSISTENT || - mso == TPM2_MSO_VOLATILE || - mso == TPM2_MSO_NVRAM) { - if (!name) - tpm2_read_public(chip, handle, auth->name[slot]); - } else { - if (name) - dev_err(&chip->dev, "TPM: Handle does not require name but one is specified\n"); - } - - auth->name_h[slot] = handle; - if (name) - memcpy(auth->name[slot], name, name_size(name)); -} -EXPORT_SYMBOL(tpm_buf_append_name); - /** * tpm_buf_check_hmac_response() - check the TPM return HMAC for correctness * @chip: the TPM chip structure @@ -705,6 +752,9 @@ int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf, u32 cc = be32_to_cpu(auth->ordinal); int parm_len, len, i, handles; + if (!auth) + return rc; + if (auth->session >= TPM_HEADER_SIZE) { WARN(1, "tpm session not filled correctly\n"); goto out; @@ -824,8 +874,13 @@ EXPORT_SYMBOL(tpm_buf_check_hmac_response); */ void tpm2_end_auth_session(struct tpm_chip *chip) { - tpm2_flush_context(chip, chip->auth->handle); - memzero_explicit(chip->auth, sizeof(*chip->auth)); + struct tpm2_auth *auth = chip->auth; + + if (!auth) + return; + + tpm2_flush_context(chip, auth->handle); + memzero_explicit(auth, sizeof(*auth)); } EXPORT_SYMBOL(tpm2_end_auth_session); @@ -907,6 +962,11 @@ int tpm2_start_auth_session(struct tpm_chip *chip) int rc; u32 null_key; + if (!auth) { + dev_warn_once(&chip->dev, "auth session is not active\n"); + return 0; + } + rc = tpm2_load_null(chip, &null_key); if (rc) goto out; @@ -1301,3 +1361,4 @@ int tpm2_sessions_init(struct tpm_chip *chip) return rc; } +#endif /* CONFIG_TCG_TPM2_HMAC */ diff --git a/drivers/clk/mediatek/clk-mt8183-mfgcfg.c b/drivers/clk/mediatek/clk-mt8183-mfgcfg.c index ba504e19d420..62d876e150e1 100644 --- a/drivers/clk/mediatek/clk-mt8183-mfgcfg.c +++ b/drivers/clk/mediatek/clk-mt8183-mfgcfg.c @@ -29,6 +29,7 @@ static const struct mtk_gate mfg_clks[] = { static const struct mtk_clk_desc mfg_desc = { .clks = mfg_clks, .num_clks = ARRAY_SIZE(mfg_clks), + .need_runtime_pm = true, }; static const struct of_device_id of_match_clk_mt8183_mfg[] = { diff --git a/drivers/clk/mediatek/clk-mtk.c b/drivers/clk/mediatek/clk-mtk.c index bd37ab4d1a9b..ba1d1c495bc2 100644 --- a/drivers/clk/mediatek/clk-mtk.c +++ b/drivers/clk/mediatek/clk-mtk.c @@ -496,14 +496,16 @@ static int __mtk_clk_simple_probe(struct platform_device *pdev, } - devm_pm_runtime_enable(&pdev->dev); - /* - * Do a pm_runtime_resume_and_get() to workaround a possible - * deadlock between clk_register() and the genpd framework. - */ - r = pm_runtime_resume_and_get(&pdev->dev); - if (r) - return r; + if (mcd->need_runtime_pm) { + devm_pm_runtime_enable(&pdev->dev); + /* + * Do a pm_runtime_resume_and_get() to workaround a possible + * deadlock between clk_register() and the genpd framework. + */ + r = pm_runtime_resume_and_get(&pdev->dev); + if (r) + return r; + } /* Calculate how many clk_hw_onecell_data entries to allocate */ num_clks = mcd->num_clks + mcd->num_composite_clks; @@ -585,7 +587,8 @@ static int __mtk_clk_simple_probe(struct platform_device *pdev, goto unregister_clks; } - pm_runtime_put(&pdev->dev); + if (mcd->need_runtime_pm) + pm_runtime_put(&pdev->dev); return r; @@ -618,7 +621,8 @@ free_base: if (mcd->shared_io && base) iounmap(base); - pm_runtime_put(&pdev->dev); + if (mcd->need_runtime_pm) + pm_runtime_put(&pdev->dev); return r; } diff --git a/drivers/clk/mediatek/clk-mtk.h b/drivers/clk/mediatek/clk-mtk.h index 22096501a60a..c17fe1c2d732 100644 --- a/drivers/clk/mediatek/clk-mtk.h +++ b/drivers/clk/mediatek/clk-mtk.h @@ -237,6 +237,8 @@ struct mtk_clk_desc { int (*clk_notifier_func)(struct device *dev, struct clk *clk); unsigned int mfg_clk_idx; + + bool need_runtime_pm; }; int mtk_clk_pdev_probe(struct platform_device *pdev); diff --git a/drivers/clk/qcom/apss-ipq-pll.c b/drivers/clk/qcom/apss-ipq-pll.c index 5f7f537e4ecb..e8632db2c542 100644 --- a/drivers/clk/qcom/apss-ipq-pll.c +++ b/drivers/clk/qcom/apss-ipq-pll.c @@ -70,7 +70,6 @@ static struct clk_alpha_pll ipq_pll_stromer_plus = { static const struct alpha_pll_config ipq5018_pll_config = { .l = 0x2a, .config_ctl_val = 0x4001075b, - .config_ctl_hi_val = 0x304, .main_output_mask = BIT(0), .aux_output_mask = BIT(1), .early_output_mask = BIT(3), @@ -84,7 +83,6 @@ static const struct alpha_pll_config ipq5018_pll_config = { static const struct alpha_pll_config ipq5332_pll_config = { .l = 0x2d, .config_ctl_val = 0x4001075b, - .config_ctl_hi_val = 0x304, .main_output_mask = BIT(0), .aux_output_mask = BIT(1), .early_output_mask = BIT(3), diff --git a/drivers/clk/qcom/clk-alpha-pll.c b/drivers/clk/qcom/clk-alpha-pll.c index d4227909d1fe..c51647e37df8 100644 --- a/drivers/clk/qcom/clk-alpha-pll.c +++ b/drivers/clk/qcom/clk-alpha-pll.c @@ -2574,6 +2574,9 @@ static int clk_alpha_pll_stromer_plus_set_rate(struct clk_hw *hw, regmap_write(pll->clkr.regmap, PLL_ALPHA_VAL_U(pll), a >> ALPHA_BITWIDTH); + regmap_update_bits(pll->clkr.regmap, PLL_USER_CTL(pll), + PLL_ALPHA_EN, PLL_ALPHA_EN); + regmap_write(pll->clkr.regmap, PLL_MODE(pll), PLL_BYPASSNL); /* Wait five micro seconds or more */ diff --git a/drivers/clk/qcom/gcc-ipq9574.c b/drivers/clk/qcom/gcc-ipq9574.c index 0a3f846695b8..f8b9a1e93bef 100644 --- a/drivers/clk/qcom/gcc-ipq9574.c +++ b/drivers/clk/qcom/gcc-ipq9574.c @@ -2140,9 +2140,10 @@ static struct clk_rcg2 pcnoc_bfdcd_clk_src = { static struct clk_branch gcc_crypto_axi_clk = { .halt_reg = 0x16010, + .halt_check = BRANCH_HALT_VOTED, .clkr = { - .enable_reg = 0x16010, - .enable_mask = BIT(0), + .enable_reg = 0xb004, + .enable_mask = BIT(15), .hw.init = &(const struct clk_init_data) { .name = "gcc_crypto_axi_clk", .parent_hws = (const struct clk_hw *[]) { @@ -2156,9 +2157,10 @@ static struct clk_branch gcc_crypto_axi_clk = { static struct clk_branch gcc_crypto_ahb_clk = { .halt_reg = 0x16014, + .halt_check = BRANCH_HALT_VOTED, .clkr = { - .enable_reg = 0x16014, - .enable_mask = BIT(0), + .enable_reg = 0xb004, + .enable_mask = BIT(16), .hw.init = &(const struct clk_init_data) { .name = "gcc_crypto_ahb_clk", .parent_hws = (const struct clk_hw *[]) { diff --git a/drivers/clk/qcom/gcc-sm6350.c b/drivers/clk/qcom/gcc-sm6350.c index cf4a7b6e0b23..0559a33faf00 100644 --- a/drivers/clk/qcom/gcc-sm6350.c +++ b/drivers/clk/qcom/gcc-sm6350.c @@ -100,8 +100,8 @@ static struct clk_alpha_pll gpll6 = { .enable_mask = BIT(6), .hw.init = &(struct clk_init_data){ .name = "gpll6", - .parent_hws = (const struct clk_hw*[]){ - &gpll0.clkr.hw, + .parent_data = &(const struct clk_parent_data){ + .fw_name = "bi_tcxo", }, .num_parents = 1, .ops = &clk_alpha_pll_fixed_fabia_ops, @@ -124,7 +124,7 @@ static struct clk_alpha_pll_postdiv gpll6_out_even = { .clkr.hw.init = &(struct clk_init_data){ .name = "gpll6_out_even", .parent_hws = (const struct clk_hw*[]){ - &gpll0.clkr.hw, + &gpll6.clkr.hw, }, .num_parents = 1, .ops = &clk_alpha_pll_postdiv_fabia_ops, @@ -139,8 +139,8 @@ static struct clk_alpha_pll gpll7 = { .enable_mask = BIT(7), .hw.init = &(struct clk_init_data){ .name = "gpll7", - .parent_hws = (const struct clk_hw*[]){ - &gpll0.clkr.hw, + .parent_data = &(const struct clk_parent_data){ + .fw_name = "bi_tcxo", }, .num_parents = 1, .ops = &clk_alpha_pll_fixed_fabia_ops, diff --git a/drivers/clk/sunxi-ng/ccu_common.c b/drivers/clk/sunxi-ng/ccu_common.c index ac0091b4ce24..be375ce0149c 100644 --- a/drivers/clk/sunxi-ng/ccu_common.c +++ b/drivers/clk/sunxi-ng/ccu_common.c @@ -132,7 +132,6 @@ static int sunxi_ccu_probe(struct sunxi_ccu *ccu, struct device *dev, for (i = 0; i < desc->hw_clks->num ; i++) { struct clk_hw *hw = desc->hw_clks->hws[i]; - struct ccu_common *common = hw_to_ccu_common(hw); const char *name; if (!hw) @@ -147,14 +146,21 @@ static int sunxi_ccu_probe(struct sunxi_ccu *ccu, struct device *dev, pr_err("Couldn't register clock %d - %s\n", i, name); goto err_clk_unreg; } + } + + for (i = 0; i < desc->num_ccu_clks; i++) { + struct ccu_common *cclk = desc->ccu_clks[i]; + + if (!cclk) + continue; - if (common->max_rate) - clk_hw_set_rate_range(hw, common->min_rate, - common->max_rate); + if (cclk->max_rate) + clk_hw_set_rate_range(&cclk->hw, cclk->min_rate, + cclk->max_rate); else - WARN(common->min_rate, + WARN(cclk->min_rate, "No max_rate, ignoring min_rate of clock %d - %s\n", - i, name); + i, clk_hw_get_name(&cclk->hw)); } ret = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, diff --git a/drivers/counter/ti-eqep.c b/drivers/counter/ti-eqep.c index 072b11fd6b32..825ae22c3ebc 100644 --- a/drivers/counter/ti-eqep.c +++ b/drivers/counter/ti-eqep.c @@ -6,6 +6,7 @@ */ #include <linux/bitops.h> +#include <linux/clk.h> #include <linux/counter.h> #include <linux/kernel.h> #include <linux/mod_devicetable.h> @@ -376,6 +377,7 @@ static int ti_eqep_probe(struct platform_device *pdev) struct counter_device *counter; struct ti_eqep_cnt *priv; void __iomem *base; + struct clk *clk; int err; counter = devm_counter_alloc(dev, sizeof(*priv)); @@ -415,6 +417,10 @@ static int ti_eqep_probe(struct platform_device *pdev) pm_runtime_enable(dev); pm_runtime_get_sync(dev); + clk = devm_clk_get_enabled(dev, NULL); + if (IS_ERR(clk)) + return dev_err_probe(dev, PTR_ERR(clk), "failed to enable clock\n"); + err = counter_add(counter); if (err < 0) { pm_runtime_put_sync(dev); diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 37f1cdf46d29..4ac3a35dcd98 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -890,8 +890,10 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency) pr_warn(FW_WARN "P-state 0 is not max freq\n"); - if (acpi_cpufreq_driver.set_boost) + if (acpi_cpufreq_driver.set_boost) { set_boost(policy, acpi_cpufreq_driver.boost_enabled); + policy->boost_enabled = acpi_cpufreq_driver.boost_enabled; + } return result; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index a45aac17c20f..9e5060b27864 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1431,7 +1431,8 @@ static int cpufreq_online(unsigned int cpu) } /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */ - policy->boost_enabled = cpufreq_boost_enabled() && policy_has_boost_freq(policy); + if (cpufreq_boost_enabled() && policy_has_boost_freq(policy)) + policy->boost_enabled = true; /* * The initialization has succeeded and the policy is online. diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 15de5e3d96fd..c31914a9876f 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -355,15 +355,14 @@ static void intel_pstate_set_itmt_prio(int cpu) int ret; ret = cppc_get_perf_caps(cpu, &cppc_perf); - if (ret) - return; - /* - * On some systems with overclocking enabled, CPPC.highest_perf is hardcoded to 0xff. - * In this case we can't use CPPC.highest_perf to enable ITMT. - * In this case we can look at MSR_HWP_CAPABILITIES bits [8:0] to decide. + * If CPPC is not available, fall back to MSR_HWP_CAPABILITIES bits [8:0]. + * + * Also, on some systems with overclocking enabled, CPPC.highest_perf is + * hardcoded to 0xff, so CPPC.highest_perf cannot be used to enable ITMT. + * Fall back to MSR_HWP_CAPABILITIES then too. */ - if (cppc_perf.highest_perf == CPPC_MAX_PERF) + if (ret || cppc_perf.highest_perf == CPPC_MAX_PERF) cppc_perf.highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached)); /* diff --git a/drivers/crypto/intel/qat/qat_common/Makefile b/drivers/crypto/intel/qat/qat_common/Makefile index 6f9266edc9f1..eac73cbfdd38 100644 --- a/drivers/crypto/intel/qat/qat_common/Makefile +++ b/drivers/crypto/intel/qat/qat_common/Makefile @@ -39,7 +39,8 @@ intel_qat-objs := adf_cfg.o \ adf_sysfs_rl.o \ qat_uclo.o \ qat_hal.o \ - qat_bl.o + qat_bl.o \ + qat_mig_dev.o intel_qat-$(CONFIG_DEBUG_FS) += adf_transport_debug.o \ adf_fw_counters.o \ @@ -56,6 +57,6 @@ intel_qat-$(CONFIG_DEBUG_FS) += adf_transport_debug.o \ intel_qat-$(CONFIG_PCI_IOV) += adf_sriov.o adf_vf_isr.o adf_pfvf_utils.o \ adf_pfvf_pf_msg.o adf_pfvf_pf_proto.o \ adf_pfvf_vf_msg.o adf_pfvf_vf_proto.o \ - adf_gen2_pfvf.o adf_gen4_pfvf.o qat_mig_dev.o + adf_gen2_pfvf.o adf_gen4_pfvf.o intel_qat-$(CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION) += adf_heartbeat_inject.o diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 784843fa2a22..3df10517a327 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -52,6 +52,14 @@ int devm_cxl_add_passthrough_decoder(struct cxl_port *port) struct cxl_dport *dport = NULL; int single_port_map[1]; unsigned long index; + struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev); + + /* + * Capability checks are moot for passthrough decoders, support + * any and all possibilities. + */ + cxlhdm->interleave_mask = ~0U; + cxlhdm->iw_cap_mask = ~0UL; cxlsd = cxl_switch_decoder_alloc(port, 1); if (IS_ERR(cxlsd)) @@ -79,6 +87,11 @@ static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm) cxlhdm->interleave_mask |= GENMASK(11, 8); if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_14_12, hdm_cap)) cxlhdm->interleave_mask |= GENMASK(14, 12); + cxlhdm->iw_cap_mask = BIT(1) | BIT(2) | BIT(4) | BIT(8); + if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY, hdm_cap)) + cxlhdm->iw_cap_mask |= BIT(3) | BIT(6) | BIT(12); + if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_16_WAY, hdm_cap)) + cxlhdm->iw_cap_mask |= BIT(16); } static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index e69625a8d6a1..c00f3a933164 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -62,10 +62,14 @@ static int match_nvdimm_bridge(struct device *dev, void *data) return is_cxl_nvdimm_bridge(dev); } -struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd) +/** + * cxl_find_nvdimm_bridge() - find a bridge device relative to a port + * @port: any descendant port of an nvdimm-bridge associated + * root-cxl-port + */ +struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_port *port) { - struct cxl_root *cxl_root __free(put_cxl_root) = - find_cxl_root(cxlmd->endpoint); + struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port); struct device *dev; if (!cxl_root) @@ -242,18 +246,20 @@ static void cxlmd_release_nvdimm(void *_cxlmd) /** * devm_cxl_add_nvdimm() - add a bridge between a cxl_memdev and an nvdimm + * @parent_port: parent port for the (to be added) @cxlmd endpoint port * @cxlmd: cxl_memdev instance that will perform LIBNVDIMM operations * * Return: 0 on success negative error code on failure. */ -int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd) +int devm_cxl_add_nvdimm(struct cxl_port *parent_port, + struct cxl_memdev *cxlmd) { struct cxl_nvdimm_bridge *cxl_nvb; struct cxl_nvdimm *cxl_nvd; struct device *dev; int rc; - cxl_nvb = cxl_find_nvdimm_bridge(cxlmd); + cxl_nvb = cxl_find_nvdimm_bridge(parent_port); if (!cxl_nvb) return -ENODEV; diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 3c2b6144be23..538ebd5a64fd 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1101,6 +1101,26 @@ static int cxl_port_attach_region(struct cxl_port *port, } cxld = cxl_rr->decoder; + /* + * the number of targets should not exceed the target_count + * of the decoder + */ + if (is_switch_decoder(&cxld->dev)) { + struct cxl_switch_decoder *cxlsd; + + cxlsd = to_cxl_switch_decoder(&cxld->dev); + if (cxl_rr->nr_targets > cxlsd->nr_targets) { + dev_dbg(&cxlr->dev, + "%s:%s %s add: %s:%s @ %d overflows targets: %d\n", + dev_name(port->uport_dev), dev_name(&port->dev), + dev_name(&cxld->dev), dev_name(&cxlmd->dev), + dev_name(&cxled->cxld.dev), pos, + cxlsd->nr_targets); + rc = -ENXIO; + goto out_erase; + } + } + rc = cxl_rr_ep_add(cxl_rr, cxled); if (rc) { dev_dbg(&cxlr->dev, @@ -1210,6 +1230,50 @@ static int check_last_peer(struct cxl_endpoint_decoder *cxled, return 0; } +static int check_interleave_cap(struct cxl_decoder *cxld, int iw, int ig) +{ + struct cxl_port *port = to_cxl_port(cxld->dev.parent); + struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev); + unsigned int interleave_mask; + u8 eiw; + u16 eig; + int high_pos, low_pos; + + if (!test_bit(iw, &cxlhdm->iw_cap_mask)) + return -ENXIO; + /* + * Per CXL specification r3.1(8.2.4.20.13 Decoder Protection), + * if eiw < 8: + * DPAOFFSET[51: eig + 8] = HPAOFFSET[51: eig + 8 + eiw] + * DPAOFFSET[eig + 7: 0] = HPAOFFSET[eig + 7: 0] + * + * when the eiw is 0, all the bits of HPAOFFSET[51: 0] are used, the + * interleave bits are none. + * + * if eiw >= 8: + * DPAOFFSET[51: eig + 8] = HPAOFFSET[51: eig + eiw] / 3 + * DPAOFFSET[eig + 7: 0] = HPAOFFSET[eig + 7: 0] + * + * when the eiw is 8, all the bits of HPAOFFSET[51: 0] are used, the + * interleave bits are none. + */ + ways_to_eiw(iw, &eiw); + if (eiw == 0 || eiw == 8) + return 0; + + granularity_to_eig(ig, &eig); + if (eiw > 8) + high_pos = eiw + eig - 1; + else + high_pos = eiw + eig + 7; + low_pos = eig + 8; + interleave_mask = GENMASK(high_pos, low_pos); + if (interleave_mask & ~cxlhdm->interleave_mask) + return -ENXIO; + + return 0; +} + static int cxl_port_setup_targets(struct cxl_port *port, struct cxl_region *cxlr, struct cxl_endpoint_decoder *cxled) @@ -1360,6 +1424,15 @@ static int cxl_port_setup_targets(struct cxl_port *port, return -ENXIO; } } else { + rc = check_interleave_cap(cxld, iw, ig); + if (rc) { + dev_dbg(&cxlr->dev, + "%s:%s iw: %d ig: %d is not supported\n", + dev_name(port->uport_dev), + dev_name(&port->dev), iw, ig); + return rc; + } + cxld->interleave_ways = iw; cxld->interleave_granularity = ig; cxld->hpa_range = (struct range) { @@ -1796,6 +1869,15 @@ static int cxl_region_attach(struct cxl_region *cxlr, struct cxl_dport *dport; int rc = -ENXIO; + rc = check_interleave_cap(&cxled->cxld, p->interleave_ways, + p->interleave_granularity); + if (rc) { + dev_dbg(&cxlr->dev, "%s iw: %d ig: %d is not supported\n", + dev_name(&cxled->cxld.dev), p->interleave_ways, + p->interleave_granularity); + return rc; + } + if (cxled->mode != cxlr->mode) { dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n", dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode); @@ -2688,22 +2770,33 @@ static int __cxl_dpa_to_region(struct device *dev, void *arg) { struct cxl_dpa_to_region_context *ctx = arg; struct cxl_endpoint_decoder *cxled; + struct cxl_region *cxlr; u64 dpa = ctx->dpa; if (!is_endpoint_decoder(dev)) return 0; cxled = to_cxl_endpoint_decoder(dev); - if (!cxled->dpa_res || !resource_size(cxled->dpa_res)) + if (!cxled || !cxled->dpa_res || !resource_size(cxled->dpa_res)) return 0; if (dpa > cxled->dpa_res->end || dpa < cxled->dpa_res->start) return 0; - dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa, - dev_name(&cxled->cxld.region->dev)); + /* + * Stop the region search (return 1) when an endpoint mapping is + * found. The region may not be fully constructed so offering + * the cxlr in the context structure is not guaranteed. + */ + cxlr = cxled->cxld.region; + if (cxlr) + dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa, + dev_name(&cxlr->dev)); + else + dev_dbg(dev, "dpa:0x%llx mapped in endpoint:%s\n", dpa, + dev_name(dev)); - ctx->cxlr = cxled->cxld.region; + ctx->cxlr = cxlr; return 1; } @@ -2847,7 +2940,7 @@ static int cxl_pmem_region_alloc(struct cxl_region *cxlr) * bridge for one device is the same for all. */ if (i == 0) { - cxl_nvb = cxl_find_nvdimm_bridge(cxlmd); + cxl_nvb = cxl_find_nvdimm_bridge(cxlmd->endpoint); if (!cxl_nvb) return -ENODEV; cxlr->cxl_nvb = cxl_nvb; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 603c0120cff8..a6613a6f8923 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -47,6 +47,8 @@ extern const struct nvdimm_security_ops *cxl_security_ops; #define CXL_HDM_DECODER_TARGET_COUNT_MASK GENMASK(7, 4) #define CXL_HDM_DECODER_INTERLEAVE_11_8 BIT(8) #define CXL_HDM_DECODER_INTERLEAVE_14_12 BIT(9) +#define CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY BIT(11) +#define CXL_HDM_DECODER_INTERLEAVE_16_WAY BIT(12) #define CXL_HDM_DECODER_CTRL_OFFSET 0x4 #define CXL_HDM_DECODER_ENABLE BIT(1) #define CXL_HDM_DECODER0_BASE_LOW_OFFSET(i) (0x20 * (i) + 0x10) @@ -855,8 +857,8 @@ struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host, struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev); bool is_cxl_nvdimm(struct device *dev); bool is_cxl_nvdimm_bridge(struct device *dev); -int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd); -struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd); +int devm_cxl_add_nvdimm(struct cxl_port *parent_port, struct cxl_memdev *cxlmd); +struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_port *port); #ifdef CONFIG_CXL_REGION bool is_cxl_pmem_region(struct device *dev); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 19aba81cdf13..af8169ccdbc0 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -395,9 +395,9 @@ enum cxl_devtype { /** * struct cxl_dpa_perf - DPA performance property entry - * @dpa_range - range for DPA address - * @coord - QoS performance data (i.e. latency, bandwidth) - * @qos_class - QoS Class cookies + * @dpa_range: range for DPA address + * @coord: QoS performance data (i.e. latency, bandwidth) + * @qos_class: QoS Class cookies */ struct cxl_dpa_perf { struct range dpa_range; @@ -464,13 +464,14 @@ struct cxl_dev_state { * @active_persistent_bytes: sum of hard + soft persistent * @next_volatile_bytes: volatile capacity change pending device reset * @next_persistent_bytes: persistent capacity change pending device reset + * @ram_perf: performance data entry matched to RAM partition + * @pmem_perf: performance data entry matched to PMEM partition * @event: event log driver state * @poison: poison driver state info * @security: security driver state info * @fw: firmware upload / activation state + * @mbox_wait: RCU wait for mbox send completely * @mbox_send: @dev specific transport for transmitting mailbox commands - * @ram_perf: performance data entry matched to RAM partition - * @pmem_perf: performance data entry matched to PMEM partition * * See CXL 3.0 8.2.9.8.2 Capacity Configuration and Label Storage for * details on capacity parameters. @@ -851,11 +852,21 @@ static inline void cxl_mem_active_dec(void) int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd); +/** + * struct cxl_hdm - HDM Decoder registers and cached / decoded capabilities + * @regs: mapped registers, see devm_cxl_setup_hdm() + * @decoder_count: number of decoders for this port + * @target_count: for switch decoders, max downstream port targets + * @interleave_mask: interleave granularity capability, see check_interleave_cap() + * @iw_cap_mask: bitmask of supported interleave ways, see check_interleave_cap() + * @port: mapped cxl_port, see devm_cxl_setup_hdm() + */ struct cxl_hdm { struct cxl_component_regs regs; unsigned int decoder_count; unsigned int target_count; unsigned int interleave_mask; + unsigned long iw_cap_mask; struct cxl_port *port; }; diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 0c79d9ce877c..2f1b49bfe162 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -152,6 +152,15 @@ static int cxl_mem_probe(struct device *dev) return -ENXIO; } + if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) { + rc = devm_cxl_add_nvdimm(parent_port, cxlmd); + if (rc) { + if (rc == -ENODEV) + dev_info(dev, "PMEM disabled by platform\n"); + return rc; + } + } + if (dport->rch) endpoint_parent = parent_port->uport_dev; else @@ -174,14 +183,6 @@ unlock: if (rc) return rc; - if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) { - rc = devm_cxl_add_nvdimm(cxlmd); - if (rc == -ENODEV) - dev_info(dev, "PMEM disabled by platform\n"); - else - return rc; - } - /* * The kernel may be operating out of CXL memory on this device, * there is no spec defined way to determine whether this device diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 002a5ec80620..9fc99cfbef08 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -394,7 +394,7 @@ config LS2X_APB_DMA config MCF_EDMA tristate "Freescale eDMA engine support, ColdFire mcf5441x SoCs" - depends on M5441x || COMPILE_TEST + depends on M5441x || (COMPILE_TEST && FSL_EDMA=n) select DMA_ENGINE select DMA_VIRTUAL_CHANNELS help diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 8dc029c86551..fc049c9c9892 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -611,11 +611,13 @@ static void irq_process_work_list(struct idxd_irq_entry *irq_entry) spin_unlock(&irq_entry->list_lock); - list_for_each_entry(desc, &flist, list) { + list_for_each_entry_safe(desc, n, &flist, list) { /* * Check against the original status as ABORT is software defined * and 0xff, which DSA_COMP_STATUS_MASK can mask out. */ + list_del(&desc->list); + if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { idxd_desc_complete(desc, IDXD_COMPLETE_ABORT, true); continue; diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index 9c364e92cb82..e8f45a7fded4 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -534,18 +534,6 @@ err_out: return err; } -static int ioat_register(struct ioatdma_device *ioat_dma) -{ - int err = dma_async_device_register(&ioat_dma->dma_dev); - - if (err) { - ioat_disable_interrupts(ioat_dma); - dma_pool_destroy(ioat_dma->completion_pool); - } - - return err; -} - static void ioat_dma_remove(struct ioatdma_device *ioat_dma) { struct dma_device *dma = &ioat_dma->dma_dev; @@ -1181,9 +1169,9 @@ static int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca) ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); } - err = ioat_register(ioat_dma); + err = dma_async_device_register(&ioat_dma->dma_dev); if (err) - return err; + goto err_disable_interrupts; ioat_kobject_add(ioat_dma, &ioat_ktype); @@ -1192,20 +1180,29 @@ static int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca) /* disable relaxed ordering */ err = pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &val16); - if (err) - return pcibios_err_to_errno(err); + if (err) { + err = pcibios_err_to_errno(err); + goto err_disable_interrupts; + } /* clear relaxed ordering enable */ val16 &= ~PCI_EXP_DEVCTL_RELAX_EN; err = pcie_capability_write_word(pdev, PCI_EXP_DEVCTL, val16); - if (err) - return pcibios_err_to_errno(err); + if (err) { + err = pcibios_err_to_errno(err); + goto err_disable_interrupts; + } if (ioat_dma->cap & IOAT_CAP_DPS) writeb(ioat_pending_level + 1, ioat_dma->reg_base + IOAT_PREFETCH_LIMIT_OFFSET); return 0; + +err_disable_interrupts: + ioat_disable_interrupts(ioat_dma); + dma_pool_destroy(ioat_dma->completion_pool); + return err; } static void ioat_shutdown(struct pci_dev *pdev) @@ -1350,6 +1347,8 @@ static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) void __iomem * const *iomap; struct device *dev = &pdev->dev; struct ioatdma_device *device; + unsigned int i; + u8 version; int err; err = pcim_enable_device(pdev); @@ -1363,6 +1362,10 @@ static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (!iomap) return -ENOMEM; + version = readb(iomap[IOAT_MMIO_BAR] + IOAT_VER_OFFSET); + if (version < IOAT_VER_3_0) + return -ENODEV; + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (err) return err; @@ -1373,17 +1376,18 @@ static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_master(pdev); pci_set_drvdata(pdev, device); - device->version = readb(device->reg_base + IOAT_VER_OFFSET); + device->version = version; if (device->version >= IOAT_VER_3_4) ioat_dca_enabled = 0; - if (device->version >= IOAT_VER_3_0) { - if (is_skx_ioat(pdev)) - device->version = IOAT_VER_3_2; - err = ioat3_dma_probe(device, ioat_dca_enabled); - } else - return -ENODEV; + if (is_skx_ioat(pdev)) + device->version = IOAT_VER_3_2; + + err = ioat3_dma_probe(device, ioat_dca_enabled); if (err) { + for (i = 0; i < IOAT_MAX_CHANS; i++) + kfree(device->idx[i]); + kfree(device); dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n"); return -ENODEV; } @@ -1445,6 +1449,7 @@ module_init(ioat_init_module); static void __exit ioat_exit_module(void) { pci_unregister_driver(&ioat_pci_driver); + kmem_cache_destroy(ioat_sed_cache); kmem_cache_destroy(ioat_cache); } module_exit(ioat_exit_module); diff --git a/drivers/dma/ti/k3-udma-glue.c b/drivers/dma/ti/k3-udma-glue.c index c9b93055dc9d..f0a399cf45b2 100644 --- a/drivers/dma/ti/k3-udma-glue.c +++ b/drivers/dma/ti/k3-udma-glue.c @@ -200,12 +200,9 @@ of_k3_udma_glue_parse_chn_by_id(struct device_node *udmax_np, struct k3_udma_glu ret = of_k3_udma_glue_parse(udmax_np, common); if (ret) - goto out_put_spec; + return ret; ret = of_k3_udma_glue_parse_chn_common(common, thread_id, tx_chn); - -out_put_spec: - of_node_put(udmax_np); return ret; } diff --git a/drivers/dma/xilinx/xdma.c b/drivers/dma/xilinx/xdma.c index e143a7330816..718842fdaf98 100644 --- a/drivers/dma/xilinx/xdma.c +++ b/drivers/dma/xilinx/xdma.c @@ -885,11 +885,11 @@ static irqreturn_t xdma_channel_isr(int irq, void *dev_id) u32 st; bool repeat_tx; + spin_lock(&xchan->vchan.lock); + if (xchan->stop_requested) complete(&xchan->last_interrupt); - spin_lock(&xchan->vchan.lock); - /* get submitted request */ vd = vchan_next_desc(&xchan->vchan); if (!vd) diff --git a/drivers/firmware/cirrus/cs_dsp.c b/drivers/firmware/cirrus/cs_dsp.c index 0d139e4de37c..8a347b938406 100644 --- a/drivers/firmware/cirrus/cs_dsp.c +++ b/drivers/firmware/cirrus/cs_dsp.c @@ -1107,9 +1107,16 @@ struct cs_dsp_coeff_parsed_coeff { int len; }; -static int cs_dsp_coeff_parse_string(int bytes, const u8 **pos, const u8 **str) +static int cs_dsp_coeff_parse_string(int bytes, const u8 **pos, unsigned int avail, + const u8 **str) { - int length; + int length, total_field_len; + + /* String fields are at least one __le32 */ + if (sizeof(__le32) > avail) { + *pos = NULL; + return 0; + } switch (bytes) { case 1: @@ -1122,10 +1129,16 @@ static int cs_dsp_coeff_parse_string(int bytes, const u8 **pos, const u8 **str) return 0; } + total_field_len = ((length + bytes) + 3) & ~0x03; + if ((unsigned int)total_field_len > avail) { + *pos = NULL; + return 0; + } + if (str) *str = *pos + bytes; - *pos += ((length + bytes) + 3) & ~0x03; + *pos += total_field_len; return length; } @@ -1150,71 +1163,134 @@ static int cs_dsp_coeff_parse_int(int bytes, const u8 **pos) return val; } -static inline void cs_dsp_coeff_parse_alg(struct cs_dsp *dsp, const u8 **data, - struct cs_dsp_coeff_parsed_alg *blk) +static int cs_dsp_coeff_parse_alg(struct cs_dsp *dsp, + const struct wmfw_region *region, + struct cs_dsp_coeff_parsed_alg *blk) { const struct wmfw_adsp_alg_data *raw; + unsigned int data_len = le32_to_cpu(region->len); + unsigned int pos; + const u8 *tmp; + + raw = (const struct wmfw_adsp_alg_data *)region->data; switch (dsp->fw_ver) { case 0: case 1: - raw = (const struct wmfw_adsp_alg_data *)*data; - *data = raw->data; + if (sizeof(*raw) > data_len) + return -EOVERFLOW; blk->id = le32_to_cpu(raw->id); blk->name = raw->name; - blk->name_len = strlen(raw->name); + blk->name_len = strnlen(raw->name, ARRAY_SIZE(raw->name)); blk->ncoeff = le32_to_cpu(raw->ncoeff); + + pos = sizeof(*raw); break; default: - blk->id = cs_dsp_coeff_parse_int(sizeof(raw->id), data); - blk->name_len = cs_dsp_coeff_parse_string(sizeof(u8), data, + if (sizeof(raw->id) > data_len) + return -EOVERFLOW; + + tmp = region->data; + blk->id = cs_dsp_coeff_parse_int(sizeof(raw->id), &tmp); + pos = tmp - region->data; + + tmp = ®ion->data[pos]; + blk->name_len = cs_dsp_coeff_parse_string(sizeof(u8), &tmp, data_len - pos, &blk->name); - cs_dsp_coeff_parse_string(sizeof(u16), data, NULL); - blk->ncoeff = cs_dsp_coeff_parse_int(sizeof(raw->ncoeff), data); + if (!tmp) + return -EOVERFLOW; + + pos = tmp - region->data; + cs_dsp_coeff_parse_string(sizeof(u16), &tmp, data_len - pos, NULL); + if (!tmp) + return -EOVERFLOW; + + pos = tmp - region->data; + if (sizeof(raw->ncoeff) > (data_len - pos)) + return -EOVERFLOW; + + blk->ncoeff = cs_dsp_coeff_parse_int(sizeof(raw->ncoeff), &tmp); + pos += sizeof(raw->ncoeff); break; } + if ((int)blk->ncoeff < 0) + return -EOVERFLOW; + cs_dsp_dbg(dsp, "Algorithm ID: %#x\n", blk->id); cs_dsp_dbg(dsp, "Algorithm name: %.*s\n", blk->name_len, blk->name); cs_dsp_dbg(dsp, "# of coefficient descriptors: %#x\n", blk->ncoeff); + + return pos; } -static inline void cs_dsp_coeff_parse_coeff(struct cs_dsp *dsp, const u8 **data, - struct cs_dsp_coeff_parsed_coeff *blk) +static int cs_dsp_coeff_parse_coeff(struct cs_dsp *dsp, + const struct wmfw_region *region, + unsigned int pos, + struct cs_dsp_coeff_parsed_coeff *blk) { const struct wmfw_adsp_coeff_data *raw; + unsigned int data_len = le32_to_cpu(region->len); + unsigned int blk_len, blk_end_pos; const u8 *tmp; - int length; + + raw = (const struct wmfw_adsp_coeff_data *)®ion->data[pos]; + if (sizeof(raw->hdr) > (data_len - pos)) + return -EOVERFLOW; + + blk_len = le32_to_cpu(raw->hdr.size); + if (blk_len > S32_MAX) + return -EOVERFLOW; + + if (blk_len > (data_len - pos - sizeof(raw->hdr))) + return -EOVERFLOW; + + blk_end_pos = pos + sizeof(raw->hdr) + blk_len; + + blk->offset = le16_to_cpu(raw->hdr.offset); + blk->mem_type = le16_to_cpu(raw->hdr.type); switch (dsp->fw_ver) { case 0: case 1: - raw = (const struct wmfw_adsp_coeff_data *)*data; - *data = *data + sizeof(raw->hdr) + le32_to_cpu(raw->hdr.size); + if (sizeof(*raw) > (data_len - pos)) + return -EOVERFLOW; - blk->offset = le16_to_cpu(raw->hdr.offset); - blk->mem_type = le16_to_cpu(raw->hdr.type); blk->name = raw->name; - blk->name_len = strlen(raw->name); + blk->name_len = strnlen(raw->name, ARRAY_SIZE(raw->name)); blk->ctl_type = le16_to_cpu(raw->ctl_type); blk->flags = le16_to_cpu(raw->flags); blk->len = le32_to_cpu(raw->len); break; default: - tmp = *data; - blk->offset = cs_dsp_coeff_parse_int(sizeof(raw->hdr.offset), &tmp); - blk->mem_type = cs_dsp_coeff_parse_int(sizeof(raw->hdr.type), &tmp); - length = cs_dsp_coeff_parse_int(sizeof(raw->hdr.size), &tmp); - blk->name_len = cs_dsp_coeff_parse_string(sizeof(u8), &tmp, + pos += sizeof(raw->hdr); + tmp = ®ion->data[pos]; + blk->name_len = cs_dsp_coeff_parse_string(sizeof(u8), &tmp, data_len - pos, &blk->name); - cs_dsp_coeff_parse_string(sizeof(u8), &tmp, NULL); - cs_dsp_coeff_parse_string(sizeof(u16), &tmp, NULL); + if (!tmp) + return -EOVERFLOW; + + pos = tmp - region->data; + cs_dsp_coeff_parse_string(sizeof(u8), &tmp, data_len - pos, NULL); + if (!tmp) + return -EOVERFLOW; + + pos = tmp - region->data; + cs_dsp_coeff_parse_string(sizeof(u16), &tmp, data_len - pos, NULL); + if (!tmp) + return -EOVERFLOW; + + pos = tmp - region->data; + if (sizeof(raw->ctl_type) + sizeof(raw->flags) + sizeof(raw->len) > + (data_len - pos)) + return -EOVERFLOW; + blk->ctl_type = cs_dsp_coeff_parse_int(sizeof(raw->ctl_type), &tmp); + pos += sizeof(raw->ctl_type); blk->flags = cs_dsp_coeff_parse_int(sizeof(raw->flags), &tmp); + pos += sizeof(raw->flags); blk->len = cs_dsp_coeff_parse_int(sizeof(raw->len), &tmp); - - *data = *data + sizeof(raw->hdr) + length; break; } @@ -1224,6 +1300,8 @@ static inline void cs_dsp_coeff_parse_coeff(struct cs_dsp *dsp, const u8 **data, cs_dsp_dbg(dsp, "\tCoefficient flags: %#x\n", blk->flags); cs_dsp_dbg(dsp, "\tALSA control type: %#x\n", blk->ctl_type); cs_dsp_dbg(dsp, "\tALSA control len: %#x\n", blk->len); + + return blk_end_pos; } static int cs_dsp_check_coeff_flags(struct cs_dsp *dsp, @@ -1247,12 +1325,16 @@ static int cs_dsp_parse_coeff(struct cs_dsp *dsp, struct cs_dsp_alg_region alg_region = {}; struct cs_dsp_coeff_parsed_alg alg_blk; struct cs_dsp_coeff_parsed_coeff coeff_blk; - const u8 *data = region->data; - int i, ret; + int i, pos, ret; + + pos = cs_dsp_coeff_parse_alg(dsp, region, &alg_blk); + if (pos < 0) + return pos; - cs_dsp_coeff_parse_alg(dsp, &data, &alg_blk); for (i = 0; i < alg_blk.ncoeff; i++) { - cs_dsp_coeff_parse_coeff(dsp, &data, &coeff_blk); + pos = cs_dsp_coeff_parse_coeff(dsp, region, pos, &coeff_blk); + if (pos < 0) + return pos; switch (coeff_blk.ctl_type) { case WMFW_CTL_TYPE_BYTES: @@ -1321,6 +1403,10 @@ static unsigned int cs_dsp_adsp1_parse_sizes(struct cs_dsp *dsp, const struct wmfw_adsp1_sizes *adsp1_sizes; adsp1_sizes = (void *)&firmware->data[pos]; + if (sizeof(*adsp1_sizes) > firmware->size - pos) { + cs_dsp_err(dsp, "%s: file truncated\n", file); + return 0; + } cs_dsp_dbg(dsp, "%s: %d DM, %d PM, %d ZM\n", file, le32_to_cpu(adsp1_sizes->dm), le32_to_cpu(adsp1_sizes->pm), @@ -1337,6 +1423,10 @@ static unsigned int cs_dsp_adsp2_parse_sizes(struct cs_dsp *dsp, const struct wmfw_adsp2_sizes *adsp2_sizes; adsp2_sizes = (void *)&firmware->data[pos]; + if (sizeof(*adsp2_sizes) > firmware->size - pos) { + cs_dsp_err(dsp, "%s: file truncated\n", file); + return 0; + } cs_dsp_dbg(dsp, "%s: %d XM, %d YM %d PM, %d ZM\n", file, le32_to_cpu(adsp2_sizes->xm), le32_to_cpu(adsp2_sizes->ym), @@ -1376,7 +1466,6 @@ static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware, struct regmap *regmap = dsp->regmap; unsigned int pos = 0; const struct wmfw_header *header; - const struct wmfw_adsp1_sizes *adsp1_sizes; const struct wmfw_footer *footer; const struct wmfw_region *region; const struct cs_dsp_region *mem; @@ -1392,10 +1481,8 @@ static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware, ret = -EINVAL; - pos = sizeof(*header) + sizeof(*adsp1_sizes) + sizeof(*footer); - if (pos >= firmware->size) { - cs_dsp_err(dsp, "%s: file too short, %zu bytes\n", - file, firmware->size); + if (sizeof(*header) >= firmware->size) { + ret = -EOVERFLOW; goto out_fw; } @@ -1423,22 +1510,36 @@ static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware, pos = sizeof(*header); pos = dsp->ops->parse_sizes(dsp, file, pos, firmware); + if ((pos == 0) || (sizeof(*footer) > firmware->size - pos)) { + ret = -EOVERFLOW; + goto out_fw; + } footer = (void *)&firmware->data[pos]; pos += sizeof(*footer); if (le32_to_cpu(header->len) != pos) { - cs_dsp_err(dsp, "%s: unexpected header length %d\n", - file, le32_to_cpu(header->len)); + ret = -EOVERFLOW; goto out_fw; } cs_dsp_dbg(dsp, "%s: timestamp %llu\n", file, le64_to_cpu(footer->timestamp)); - while (pos < firmware->size && - sizeof(*region) < firmware->size - pos) { + while (pos < firmware->size) { + /* Is there enough data for a complete block header? */ + if (sizeof(*region) > firmware->size - pos) { + ret = -EOVERFLOW; + goto out_fw; + } + region = (void *)&(firmware->data[pos]); + + if (le32_to_cpu(region->len) > firmware->size - pos - sizeof(*region)) { + ret = -EOVERFLOW; + goto out_fw; + } + region_name = "Unknown"; reg = 0; text = NULL; @@ -1495,16 +1596,6 @@ static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware, regions, le32_to_cpu(region->len), offset, region_name); - if (le32_to_cpu(region->len) > - firmware->size - pos - sizeof(*region)) { - cs_dsp_err(dsp, - "%s.%d: %s region len %d bytes exceeds file length %zu\n", - file, regions, region_name, - le32_to_cpu(region->len), firmware->size); - ret = -EINVAL; - goto out_fw; - } - if (text) { memcpy(text, region->data, le32_to_cpu(region->len)); cs_dsp_info(dsp, "%s: %s\n", file, text); @@ -1555,6 +1646,9 @@ out_fw: cs_dsp_buf_free(&buf_list); kfree(text); + if (ret == -EOVERFLOW) + cs_dsp_err(dsp, "%s: file content overflows file data\n", file); + return ret; } @@ -2122,10 +2216,20 @@ static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware pos = le32_to_cpu(hdr->len); blocks = 0; - while (pos < firmware->size && - sizeof(*blk) < firmware->size - pos) { + while (pos < firmware->size) { + /* Is there enough data for a complete block header? */ + if (sizeof(*blk) > firmware->size - pos) { + ret = -EOVERFLOW; + goto out_fw; + } + blk = (void *)(&firmware->data[pos]); + if (le32_to_cpu(blk->len) > firmware->size - pos - sizeof(*blk)) { + ret = -EOVERFLOW; + goto out_fw; + } + type = le16_to_cpu(blk->type); offset = le16_to_cpu(blk->offset); version = le32_to_cpu(blk->ver) >> 8; @@ -2222,17 +2326,6 @@ static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware } if (reg) { - if (le32_to_cpu(blk->len) > - firmware->size - pos - sizeof(*blk)) { - cs_dsp_err(dsp, - "%s.%d: %s region len %d bytes exceeds file length %zu\n", - file, blocks, region_name, - le32_to_cpu(blk->len), - firmware->size); - ret = -EINVAL; - goto out_fw; - } - buf = cs_dsp_buf_alloc(blk->data, le32_to_cpu(blk->len), &buf_list); @@ -2272,6 +2365,10 @@ out_fw: regmap_async_complete(regmap); cs_dsp_buf_free(&buf_list); kfree(text); + + if (ret == -EOVERFLOW) + cs_dsp_err(dsp, "%s: file content overflows file data\n", file); + return ret; } diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c index 3365944f7965..34109fd86c55 100644 --- a/drivers/firmware/efi/memmap.c +++ b/drivers/firmware/efi/memmap.c @@ -15,10 +15,6 @@ #include <asm/early_ioremap.h> #include <asm/efi.h> -#ifndef __efi_memmap_free -#define __efi_memmap_free(phys, size, flags) do { } while (0) -#endif - /** * __efi_memmap_init - Common code for mapping the EFI memory map * @data: EFI memory map data @@ -51,11 +47,6 @@ int __init __efi_memmap_init(struct efi_memory_map_data *data) return -ENOMEM; } - if (efi.memmap.flags & (EFI_MEMMAP_MEMBLOCK | EFI_MEMMAP_SLAB)) - __efi_memmap_free(efi.memmap.phys_map, - efi.memmap.desc_size * efi.memmap.nr_map, - efi.memmap.flags); - map.phys_map = data->phys_map; map.nr_map = data->size / data->desc_size; map.map_end = map.map + data->size; diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index d9629ff87861..2328ca58bba6 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -497,10 +497,12 @@ int psci_cpu_suspend_enter(u32 state) static int psci_system_suspend(unsigned long unused) { + int err; phys_addr_t pa_cpu_resume = __pa_symbol(cpu_resume); - return invoke_psci_fn(PSCI_FN_NATIVE(1_0, SYSTEM_SUSPEND), + err = invoke_psci_fn(PSCI_FN_NATIVE(1_0, SYSTEM_SUSPEND), pa_cpu_resume, 0, 0); + return psci_to_linux_errno(err); } static int psci_system_suspend_enter(suspend_state_t state) diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c index 880ffcb50088..921f61507ae8 100644 --- a/drivers/firmware/sysfb.c +++ b/drivers/firmware/sysfb.c @@ -101,8 +101,10 @@ static __init struct device *sysfb_parent_dev(const struct screen_info *si) if (IS_ERR(pdev)) { return ERR_CAST(pdev); } else if (pdev) { - if (!sysfb_pci_dev_is_enabled(pdev)) + if (!sysfb_pci_dev_is_enabled(pdev)) { + pci_dev_put(pdev); return ERR_PTR(-ENODEV); + } return &pdev->dev; } @@ -137,7 +139,7 @@ static __init int sysfb_init(void) if (compatible) { pd = sysfb_create_simplefb(si, &mode, parent); if (!IS_ERR(pd)) - goto unlock_mutex; + goto put_device; } /* if the FB is incompatible, create a legacy framebuffer device */ @@ -155,7 +157,7 @@ static __init int sysfb_init(void) pd = platform_device_alloc(name, 0); if (!pd) { ret = -ENOMEM; - goto unlock_mutex; + goto put_device; } pd->dev.parent = parent; @@ -170,9 +172,11 @@ static __init int sysfb_init(void) if (ret) goto err; - goto unlock_mutex; + goto put_device; err: platform_device_put(pd); +put_device: + put_device(parent); unlock_mutex: mutex_unlock(&disable_lock); return ret; diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c index bb499e362912..1d0175d6350b 100644 --- a/drivers/gpio/gpio-davinci.c +++ b/drivers/gpio/gpio-davinci.c @@ -225,6 +225,11 @@ static int davinci_gpio_probe(struct platform_device *pdev) else nirq = DIV_ROUND_UP(ngpio, 16); + if (nirq > MAX_INT_PER_BANK) { + dev_err(dev, "Too many IRQs!\n"); + return -EINVAL; + } + chips = devm_kzalloc(dev, sizeof(*chips), GFP_KERNEL); if (!chips) return -ENOMEM; diff --git a/drivers/gpio/gpio-graniterapids.c b/drivers/gpio/gpio-graniterapids.c index c693fe05d50f..f2e911a3d2ca 100644 --- a/drivers/gpio/gpio-graniterapids.c +++ b/drivers/gpio/gpio-graniterapids.c @@ -296,6 +296,8 @@ static int gnr_gpio_probe(struct platform_device *pdev) if (!priv) return -ENOMEM; + raw_spin_lock_init(&priv->lock); + regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(regs)) return PTR_ERR(regs); diff --git a/drivers/gpio/gpio-mmio.c b/drivers/gpio/gpio-mmio.c index 71e1af7c2184..d89e78f0ead3 100644 --- a/drivers/gpio/gpio-mmio.c +++ b/drivers/gpio/gpio-mmio.c @@ -619,8 +619,6 @@ int bgpio_init(struct gpio_chip *gc, struct device *dev, ret = gpiochip_get_ngpios(gc, dev); if (ret) gc->ngpio = gc->bgpio_bits; - else - gc->bgpio_bits = roundup_pow_of_two(round_up(gc->ngpio, 8)); ret = bgpio_setup_io(gc, dat, set, clr, flags); if (ret) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 77a2812f2974..732a6964748c 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -758,6 +758,8 @@ static void pca953x_irq_bus_sync_unlock(struct irq_data *d) int level; if (chip->driver_data & PCA_PCAL) { + guard(mutex)(&chip->i2c_lock); + /* Enable latch on interrupt-enabled inputs */ pca953x_write_regs(chip, PCAL953X_IN_LATCH, chip->irq_mask); diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index 9dad67ea2597..5639abce6ec5 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -89,6 +89,10 @@ struct linehandle_state { GPIOHANDLE_REQUEST_OPEN_DRAIN | \ GPIOHANDLE_REQUEST_OPEN_SOURCE) +#define GPIOHANDLE_REQUEST_DIRECTION_FLAGS \ + (GPIOHANDLE_REQUEST_INPUT | \ + GPIOHANDLE_REQUEST_OUTPUT) + static int linehandle_validate_flags(u32 flags) { /* Return an error if an unknown flag is set */ @@ -169,21 +173,21 @@ static long linehandle_set_config(struct linehandle_state *lh, if (ret) return ret; + /* Lines must be reconfigured explicitly as input or output. */ + if (!(lflags & GPIOHANDLE_REQUEST_DIRECTION_FLAGS)) + return -EINVAL; + for (i = 0; i < lh->num_descs; i++) { desc = lh->descs[i]; - linehandle_flags_to_desc_flags(gcnf.flags, &desc->flags); + linehandle_flags_to_desc_flags(lflags, &desc->flags); - /* - * Lines have to be requested explicitly for input - * or output, else the line will be treated "as is". - */ if (lflags & GPIOHANDLE_REQUEST_OUTPUT) { int val = !!gcnf.default_values[i]; ret = gpiod_direction_output(desc, val); if (ret) return ret; - } else if (lflags & GPIOHANDLE_REQUEST_INPUT) { + } else { ret = gpiod_direction_input(desc); if (ret) return ret; @@ -1530,12 +1534,14 @@ static long linereq_set_config(struct linereq *lr, void __user *ip) line = &lr->lines[i]; desc = lr->lines[i].desc; flags = gpio_v2_line_config_flags(&lc, i); - gpio_v2_line_config_flags_to_desc_flags(flags, &desc->flags); - edflags = flags & GPIO_V2_LINE_EDGE_DETECTOR_FLAGS; /* - * Lines have to be requested explicitly for input - * or output, else the line will be treated "as is". + * Lines not explicitly reconfigured as input or output + * are left unchanged. */ + if (!(flags & GPIO_V2_LINE_DIRECTION_FLAGS)) + continue; + gpio_v2_line_config_flags_to_desc_flags(flags, &desc->flags); + edflags = flags & GPIO_V2_LINE_EDGE_DETECTOR_FLAGS; if (flags & GPIO_V2_LINE_FLAG_OUTPUT) { int val = gpio_v2_line_config_output_value(&lc, i); @@ -1543,7 +1549,7 @@ static long linereq_set_config(struct linereq *lr, void __user *ip) ret = gpiod_direction_output(desc, val); if (ret) return ret; - } else if (flags & GPIO_V2_LINE_FLAG_INPUT) { + } else { ret = gpiod_direction_input(desc); if (ret) return ret; diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index d75f6ee37028..89d5e64cf68b 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -203,6 +203,24 @@ static void of_gpio_try_fixup_polarity(const struct device_node *np, */ { "qi,lb60", "rb-gpios", true }, #endif +#if IS_ENABLED(CONFIG_PCI_LANTIQ) + /* + * According to the PCI specification, the RST# pin is an + * active-low signal. However, most of the device trees that + * have been widely used for a long time incorrectly describe + * reset GPIO as active-high, and were also using wrong name + * for the property. + */ + { "lantiq,pci-xway", "gpio-reset", false }, +#endif +#if IS_ENABLED(CONFIG_TOUCHSCREEN_TSC2005) + /* + * DTS for Nokia N900 incorrectly specified "active high" + * polarity for the reset line, while the chip actually + * treats it as "active low". + */ + { "ti,tsc2005", "reset-gpios", false }, +#endif }; unsigned int i; @@ -504,9 +522,9 @@ static struct gpio_desc *of_find_gpio_rename(struct device_node *np, { "reset", "reset-n-io", "marvell,nfc-uart" }, { "reset", "reset-n-io", "mrvl,nfc-uart" }, #endif -#if !IS_ENABLED(CONFIG_PCI_LANTIQ) +#if IS_ENABLED(CONFIG_PCI_LANTIQ) /* MIPS Lantiq PCI */ - { "reset", "gpios-reset", "lantiq,pci-xway" }, + { "reset", "gpio-reset", "lantiq,pci-xway" }, #endif /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 108003bdf1e9..2e13c7c4b2b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -400,7 +400,7 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, mem_channel_number = vram_info->v30.channel_num; mem_channel_width = vram_info->v30.channel_width; if (vram_width) - *vram_width = mem_channel_number * (1 << mem_channel_width); + *vram_width = mem_channel_number * 16; break; default: return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index ec888fc6ead8..916b6b8cf7d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1093,6 +1093,21 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) unsigned int i; int r; + /* + * We can't use gang submit on with reserved VMIDs when the VM changes + * can't be invalidated by more than one engine at the same time. + */ + if (p->gang_size > 1 && !p->adev->vm_manager.concurrent_flush) { + for (i = 0; i < p->gang_size; ++i) { + struct drm_sched_entity *entity = p->entities[i]; + struct drm_gpu_scheduler *sched = entity->rq->sched; + struct amdgpu_ring *ring = to_amdgpu_ring(sched); + + if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub)) + return -EINVAL; + } + } + r = amdgpu_vm_clear_freed(adev, vm, NULL); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 932dc93b2e63..33f791d92ddf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5220,11 +5220,14 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev) dev_info(adev->dev, "GPU mode1 reset\n"); + /* Cache the state before bus master disable. The saved config space + * values are used in other cases like restore after mode-2 reset. + */ + amdgpu_device_cache_pci_state(adev->pdev); + /* disable BM */ pci_clear_master(adev->pdev); - amdgpu_device_cache_pci_state(adev->pdev); - if (amdgpu_dpm_is_mode1_reset_supported(adev)) { dev_info(adev->dev, "GPU smu mode1 reset\n"); ret = amdgpu_dpm_mode1_reset(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 055ba2ea4c12..662d0f28f358 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -41,8 +41,6 @@ #include <linux/dma-buf.h> #include <linux/dma-fence-array.h> #include <linux/pci-p2pdma.h> -#include <linux/pm_runtime.h> -#include "amdgpu_trace.h" /** * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation @@ -58,42 +56,11 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, struct drm_gem_object *obj = dmabuf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - int r; if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) attach->peer2peer = false; - r = pm_runtime_get_sync(adev_to_drm(adev)->dev); - trace_amdgpu_runpm_reference_dumps(1, __func__); - if (r < 0) - goto out; - return 0; - -out: - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - trace_amdgpu_runpm_reference_dumps(0, __func__); - return r; -} - -/** - * amdgpu_dma_buf_detach - &dma_buf_ops.detach implementation - * - * @dmabuf: DMA-buf where we remove the attachment from - * @attach: the attachment to remove - * - * Called when an attachment is removed from the DMA-buf. - */ -static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf, - struct dma_buf_attachment *attach) -{ - struct drm_gem_object *obj = dmabuf->priv; - struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - trace_amdgpu_runpm_reference_dumps(0, __func__); } /** @@ -267,7 +234,6 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, const struct dma_buf_ops amdgpu_dmabuf_ops = { .attach = amdgpu_dma_buf_attach, - .detach = amdgpu_dma_buf_detach, .pin = amdgpu_dma_buf_pin, .unpin = amdgpu_dma_buf_unpin, .map_dma_buf = amdgpu_dma_buf_map, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 10832b470448..bc3ac73b6b8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -181,7 +181,6 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, flags | AMDGPU_FENCE_FLAG_INT); pm_runtime_get_noresume(adev_to_drm(adev)->dev); - trace_amdgpu_runpm_reference_dumps(1, __func__); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; if (unlikely(rcu_dereference_protected(*ptr, 1))) { struct dma_fence *old; @@ -309,7 +308,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) dma_fence_put(fence); pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - trace_amdgpu_runpm_reference_dumps(0, __func__); } while (last_seq != seq); return true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index be4629cdac04..08b9dfb65335 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -684,12 +684,17 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring; struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst]; unsigned int ndw; - signed long r; + int r; uint32_t seq; - if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready || - !down_read_trylock(&adev->reset_domain->sem)) { + /* + * A GPU reset should flush all TLBs anyway, so no need to do + * this while one is ongoing. + */ + if (!down_read_trylock(&adev->reset_domain->sem)) + return 0; + if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready) { if (adev->gmc.flush_tlb_needs_extra_type_2) adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid, 2, all_hub, @@ -703,43 +708,40 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub, inst); - return 0; - } + r = 0; + } else { + /* 2 dwords flush + 8 dwords fence */ + ndw = kiq->pmf->invalidate_tlbs_size + 8; - /* 2 dwords flush + 8 dwords fence */ - ndw = kiq->pmf->invalidate_tlbs_size + 8; + if (adev->gmc.flush_tlb_needs_extra_type_2) + ndw += kiq->pmf->invalidate_tlbs_size; - if (adev->gmc.flush_tlb_needs_extra_type_2) - ndw += kiq->pmf->invalidate_tlbs_size; + if (adev->gmc.flush_tlb_needs_extra_type_0) + ndw += kiq->pmf->invalidate_tlbs_size; - if (adev->gmc.flush_tlb_needs_extra_type_0) - ndw += kiq->pmf->invalidate_tlbs_size; + spin_lock(&adev->gfx.kiq[inst].ring_lock); + amdgpu_ring_alloc(ring, ndw); + if (adev->gmc.flush_tlb_needs_extra_type_2) + kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub); - spin_lock(&adev->gfx.kiq[inst].ring_lock); - amdgpu_ring_alloc(ring, ndw); - if (adev->gmc.flush_tlb_needs_extra_type_2) - kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub); + if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0) + kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub); - if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0) - kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub); + kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) { + amdgpu_ring_undo(ring); + spin_unlock(&adev->gfx.kiq[inst].ring_lock); + goto error_unlock_reset; + } - kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub); - r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); - if (r) { - amdgpu_ring_undo(ring); + amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq[inst].ring_lock); - goto error_unlock_reset; - } - - amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq[inst].ring_lock); - r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); - if (r < 1) { - dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); - r = -ETIME; - goto error_unlock_reset; + if (amdgpu_fence_wait_polling(ring, seq, usec_timeout) < 1) { + dev_err(adev->dev, "timeout waiting for kiq fence\n"); + r = -ETIME; + } } - r = 0; error_unlock_reset: up_read(&adev->reset_domain->sem); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 3d7fcdeaf8cf..e8f6e4dbc5a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -406,7 +406,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r || !idle) goto error; - if (vm->reserved_vmid[vmhub] || (enforce_isolation && (vmhub == AMDGPU_GFXHUB(0)))) { + if (amdgpu_vmid_uses_reserved(vm, vmhub)) { r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence); if (r || !id) goto error; @@ -456,6 +456,19 @@ error: return r; } +/* + * amdgpu_vmid_uses_reserved - check if a VM will use a reserved VMID + * @vm: the VM to check + * @vmhub: the VMHUB which will be used + * + * Returns: True if the VM will use a reserved VMID. + */ +bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub) +{ + return vm->reserved_vmid[vmhub] || + (enforce_isolation && (vmhub == AMDGPU_GFXHUB(0))); +} + int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, unsigned vmhub) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index fa8c42c83d5d..240fa6751260 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -78,6 +78,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, struct amdgpu_vmid *id); +bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub); int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, unsigned vmhub); void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 4bd4602d11b1..cef9dd0a012b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -640,6 +640,20 @@ static const char *psp_gfx_cmd_name(enum psp_gfx_cmd_id cmd_id) } } +static bool psp_err_warn(struct psp_context *psp) +{ + struct psp_gfx_cmd_resp *cmd = psp->cmd_buf_mem; + + /* This response indicates reg list is already loaded */ + if (amdgpu_ip_version(psp->adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 2) && + cmd->cmd_id == GFX_CMD_ID_LOAD_IP_FW && + cmd->cmd.cmd_load_ip_fw.fw_type == GFX_FW_TYPE_REG_LIST && + cmd->resp.status == TEE_ERROR_CANCEL) + return false; + + return true; +} + static int psp_cmd_submit_buf(struct psp_context *psp, struct amdgpu_firmware_info *ucode, @@ -699,10 +713,13 @@ psp_cmd_submit_buf(struct psp_context *psp, dev_warn(psp->adev->dev, "failed to load ucode %s(0x%X) ", amdgpu_ucode_name(ucode->ucode_id), ucode->ucode_id); - dev_warn(psp->adev->dev, - "psp gfx command %s(0x%X) failed and response status is (0x%X)\n", - psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id), psp->cmd_buf_mem->cmd_id, - psp->cmd_buf_mem->resp.status); + if (psp_err_warn(psp)) + dev_warn( + psp->adev->dev, + "psp gfx command %s(0x%X) failed and response status is (0x%X)\n", + psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id), + psp->cmd_buf_mem->cmd_id, + psp->cmd_buf_mem->resp.status); /* If any firmware (including CAP) load fails under SRIOV, it should * return failure to stop the VF from initializing. * Also return failure in case of timeout diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 7aafeb763e5d..383fce40d4dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -554,21 +554,6 @@ TRACE_EVENT(amdgpu_reset_reg_dumps, __entry->value) ); -TRACE_EVENT(amdgpu_runpm_reference_dumps, - TP_PROTO(uint32_t index, const char *func), - TP_ARGS(index, func), - TP_STRUCT__entry( - __field(uint32_t, index) - __string(func, func) - ), - TP_fast_assign( - __entry->index = index; - __assign_str(func); - ), - TP_printk("amdgpu runpm reference dump 0x%x: 0x%s\n", - __entry->index, - __get_str(func)) -); #undef AMDGPU_JOB_GET_TIMELINE_NAME #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index e30eecd02ae1..fde66225c481 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -3,6 +3,7 @@ #include <drm/drm_atomic_helper.h> #include <drm/drm_edid.h> #include <drm/drm_simple_kms_helper.h> +#include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_vblank.h> #include "amdgpu.h" @@ -314,7 +315,13 @@ static int amdgpu_vkms_prepare_fb(struct drm_plane *plane, return 0; } afb = to_amdgpu_framebuffer(new_state->fb); - obj = new_state->fb->obj[0]; + + obj = drm_gem_fb_get_obj(new_state->fb, 0); + if (!obj) { + DRM_ERROR("Failed to get obj from framebuffer\n"); + return -EINVAL; + } + rbo = gem_to_amdgpu_bo(obj); adev = amdgpu_ttm_adev(rbo->tbo.bdev); @@ -368,12 +375,19 @@ static void amdgpu_vkms_cleanup_fb(struct drm_plane *plane, struct drm_plane_state *old_state) { struct amdgpu_bo *rbo; + struct drm_gem_object *obj; int r; if (!old_state->fb) return; - rbo = gem_to_amdgpu_bo(old_state->fb->obj[0]); + obj = drm_gem_fb_get_obj(old_state->fb, 0); + if (!obj) { + DRM_ERROR("Failed to get obj from framebuffer\n"); + return; + } + + rbo = gem_to_amdgpu_bo(obj); r = amdgpu_bo_reserve(rbo, false); if (unlikely(r)) { DRM_ERROR("failed to reserve rbo before unpin\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 7b16e8cca86a..f5b9f443cfdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -4195,9 +4195,10 @@ static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev, int xcc_i static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info) { - int i, j, k, counter, xcc_id, active_cu_number = 0; - u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; + int i, j, k, prev_counter, counter, xcc_id, active_cu_number = 0; + u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0, tmp; unsigned disable_masks[4 * 4]; + bool is_symmetric_cus; if (!adev || !cu_info) return -EINVAL; @@ -4215,6 +4216,7 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, mutex_lock(&adev->grbm_idx_mutex); for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) { + is_symmetric_cus = true; for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { mask = 1; @@ -4242,6 +4244,15 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); cu_info->ao_cu_bitmap[i][j] = ao_bitmap; } + if (i && is_symmetric_cus && prev_counter != counter) + is_symmetric_cus = false; + prev_counter = counter; + } + if (is_symmetric_cus) { + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_DEBUG); + tmp = REG_SET_FIELD(tmp, CP_CPC_DEBUG, CPC_HARVESTING_RELAUNCH_DISABLE, 1); + tmp = REG_SET_FIELD(tmp, CP_CPC_DEBUG, CPC_HARVESTING_DISPATCH_DISABLE, 1); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_DEBUG, tmp); } gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, xcc_id); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 0d1407f25005..32d4519541c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -154,18 +154,18 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, void *pkt, int size, int api_status_off) { - int ndw = size / 4; - signed long r; - union MESAPI__MISC *x_pkt = pkt; - struct MES_API_STATUS *api_status; + union MESAPI__QUERY_MES_STATUS mes_status_pkt; + signed long timeout = 3000000; /* 3000 ms */ struct amdgpu_device *adev = mes->adev; struct amdgpu_ring *ring = &mes->ring; - unsigned long flags; - signed long timeout = 3000000; /* 3000 ms */ + struct MES_API_STATUS *api_status; + union MESAPI__MISC *x_pkt = pkt; const char *op_str, *misc_op_str; - u32 fence_offset; - u64 fence_gpu_addr; - u64 *fence_ptr; + unsigned long flags; + u64 status_gpu_addr; + u32 status_offset; + u64 *status_ptr; + signed long r; int ret; if (x_pkt->header.opcode >= MES_SCH_API_MAX) @@ -177,28 +177,38 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */ timeout = 15 * 600 * 1000; } - BUG_ON(size % 4 != 0); - ret = amdgpu_device_wb_get(adev, &fence_offset); + ret = amdgpu_device_wb_get(adev, &status_offset); if (ret) return ret; - fence_gpu_addr = - adev->wb.gpu_addr + (fence_offset * 4); - fence_ptr = (u64 *)&adev->wb.wb[fence_offset]; - *fence_ptr = 0; + + status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4); + status_ptr = (u64 *)&adev->wb.wb[status_offset]; + *status_ptr = 0; spin_lock_irqsave(&mes->ring_lock, flags); - if (amdgpu_ring_alloc(ring, ndw)) { - spin_unlock_irqrestore(&mes->ring_lock, flags); - amdgpu_device_wb_free(adev, fence_offset); - return -ENOMEM; - } + r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); + if (r) + goto error_unlock_free; api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); - api_status->api_completion_fence_addr = fence_gpu_addr; + api_status->api_completion_fence_addr = status_gpu_addr; api_status->api_completion_fence_value = 1; - amdgpu_ring_write_multiple(ring, pkt, ndw); + amdgpu_ring_write_multiple(ring, pkt, size / 4); + + memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); + mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; + mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + mes_status_pkt.api_status.api_completion_fence_addr = + ring->fence_drv.gpu_addr; + mes_status_pkt.api_status.api_completion_fence_value = + ++ring->fence_drv.sync_seq; + + amdgpu_ring_write_multiple(ring, &mes_status_pkt, + sizeof(mes_status_pkt) / 4); + amdgpu_ring_commit(ring); spin_unlock_irqrestore(&mes->ring_lock, flags); @@ -206,15 +216,16 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, misc_op_str = mes_v11_0_get_misc_op_string(x_pkt); if (misc_op_str) - dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, misc_op_str); + dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, + misc_op_str); else if (op_str) dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str); else - dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode); + dev_dbg(adev->dev, "MES msg=%d was emitted\n", + x_pkt->header.opcode); - r = amdgpu_mes_fence_wait_polling(fence_ptr, (u64)1, timeout); - amdgpu_device_wb_free(adev, fence_offset); - if (r < 1) { + r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); + if (r < 1 || !*status_ptr) { if (misc_op_str) dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n", @@ -229,10 +240,19 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, while (halt_if_hws_hang) schedule(); - return -ETIMEDOUT; + r = -ETIMEDOUT; + goto error_wb_free; } + amdgpu_device_wb_free(adev, status_offset); return 0; + +error_unlock_free: + spin_unlock_irqrestore(&mes->ring_lock, flags); + +error_wb_free: + amdgpu_device_wb_free(adev, status_offset); + return r; } static int convert_to_mes_queue_type(int queue_type) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 7566973ed8f5..37b5ddd6f13b 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -464,8 +464,9 @@ struct psp_gfx_rb_frame #define PSP_ERR_UNKNOWN_COMMAND 0x00000100 enum tee_error_code { - TEE_SUCCESS = 0x00000000, - TEE_ERROR_NOT_SUPPORTED = 0xFFFF000A, + TEE_SUCCESS = 0x00000000, + TEE_ERROR_CANCEL = 0xFFFF0002, + TEE_ERROR_NOT_SUPPORTED = 0xFFFF000A, }; #endif /* _PSP_TEE_GFX_IF_H_ */ diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c index f08a32c18694..40b28298af30 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c @@ -32,7 +32,9 @@ #include "mp/mp_14_0_2_sh_mask.h" MODULE_FIRMWARE("amdgpu/psp_14_0_2_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_2_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_3_ta.bin"); /* For large FW files the time to complete can be very long */ #define USBC_PD_POLLING_LIMIT_S 240 @@ -66,6 +68,9 @@ static int psp_v14_0_init_microcode(struct psp_context *psp) err = psp_init_sos_microcode(psp, ucode_prefix); if (err) return err; + err = psp_init_ta_microcode(psp, ucode_prefix); + if (err) + return err; break; default: BUG(); diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 5fcd4f778dc3..47b8b49da8a7 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -8,7 +8,7 @@ config DRM_AMD_DC depends on BROKEN || !CC_IS_CLANG || ARM64 || RISCV || SPARC64 || X86_64 select SND_HDA_COMPONENT if SND_HDA_CORE # !CC_IS_CLANG: https://github.com/ClangBuiltLinux/linux/issues/1752 - select DRM_AMD_DC_FP if ARCH_HAS_KERNEL_FPU_SUPPORT && (!ARM64 || !CC_IS_CLANG) + select DRM_AMD_DC_FP if ARCH_HAS_KERNEL_FPU_SUPPORT && !(CC_IS_CLANG && (ARM64 || RISCV)) help Choose this option if you want to use the new display engine support for AMDGPU. This adds required support for Vega and diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f1d67c6f4b98..a622aca8c649 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9169,9 +9169,6 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) trace_amdgpu_dm_atomic_commit_tail_begin(state); - if (dm->dc->caps.ips_support && dm->dc->idle_optimizations_allowed) - dc_allow_idle_optimizations(dm->dc, false); - drm_atomic_helper_update_legacy_modeset_state(dev, state); drm_dp_mst_atomic_wait_for_dependencies(state); @@ -11184,6 +11181,49 @@ static bool parse_edid_cea(struct amdgpu_dm_connector *aconnector, return ret; } +static void parse_edid_displayid_vrr(struct drm_connector *connector, + struct edid *edid) +{ + u8 *edid_ext = NULL; + int i; + int j = 0; + u16 min_vfreq; + u16 max_vfreq; + + if (edid == NULL || edid->extensions == 0) + return; + + /* Find DisplayID extension */ + for (i = 0; i < edid->extensions; i++) { + edid_ext = (void *)(edid + (i + 1)); + if (edid_ext[0] == DISPLAYID_EXT) + break; + } + + if (edid_ext == NULL) + return; + + while (j < EDID_LENGTH) { + /* Get dynamic video timing range from DisplayID if available */ + if (EDID_LENGTH - j > 13 && edid_ext[j] == 0x25 && + (edid_ext[j+1] & 0xFE) == 0 && (edid_ext[j+2] == 9)) { + min_vfreq = edid_ext[j+9]; + if (edid_ext[j+1] & 7) + max_vfreq = edid_ext[j+10] + ((edid_ext[j+11] & 3) << 8); + else + max_vfreq = edid_ext[j+10]; + + if (max_vfreq && min_vfreq) { + connector->display_info.monitor_range.max_vfreq = max_vfreq; + connector->display_info.monitor_range.min_vfreq = min_vfreq; + + return; + } + } + j++; + } +} + static int parse_amd_vsdb(struct amdgpu_dm_connector *aconnector, struct edid *edid, struct amdgpu_hdmi_vsdb_info *vsdb_info) { @@ -11305,6 +11345,11 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, if (!adev->dm.freesync_module) goto update; + /* Some eDP panels only have the refresh rate range info in DisplayID */ + if ((connector->display_info.monitor_range.min_vfreq == 0 || + connector->display_info.monitor_range.max_vfreq == 0)) + parse_edid_displayid_vrr(connector, edid); + if (edid && (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT || sink->sink_signal == SIGNAL_TYPE_EDP)) { bool edid_check_required = false; @@ -11312,9 +11357,11 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, if (is_dp_capable_without_timing_msa(adev->dm.dc, amdgpu_dm_connector)) { if (edid->features & DRM_EDID_FEATURE_CONTINUOUS_FREQ) { - freesync_capable = true; amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq; amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq; + if (amdgpu_dm_connector->max_vfreq - + amdgpu_dm_connector->min_vfreq > 10) + freesync_capable = true; } else { edid_check_required = edid->version > 1 || (edid->version == 1 && @@ -11440,6 +11487,12 @@ void amdgpu_dm_trigger_timing_sync(struct drm_device *dev) mutex_unlock(&adev->dm.dc_lock); } +static inline void amdgpu_dm_exit_ips_for_hw_access(struct dc *dc) +{ + if (dc->ctx->dmub_srv && !dc->ctx->dmub_srv->idle_exit_counter) + dc_exit_ips_for_hw_access(dc); +} + void dm_write_reg_func(const struct dc_context *ctx, uint32_t address, u32 value, const char *func_name) { @@ -11450,6 +11503,8 @@ void dm_write_reg_func(const struct dc_context *ctx, uint32_t address, return; } #endif + + amdgpu_dm_exit_ips_for_hw_access(ctx->dc); cgs_write_register(ctx->cgs_device, address, value); trace_amdgpu_dc_wreg(&ctx->perf_trace->write_count, address, value); } @@ -11473,6 +11528,8 @@ uint32_t dm_read_reg_func(const struct dc_context *ctx, uint32_t address, return 0; } + amdgpu_dm_exit_ips_for_hw_access(ctx->dc); + value = cgs_read_register(ctx->cgs_device, address); trace_amdgpu_dc_rreg(&ctx->perf_trace->read_count, address, value); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 6c84b0fa40f4..0782a34689a0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -3364,6 +3364,9 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &mode_lib->vba.UrgentBurstFactorLumaPre[k], &mode_lib->vba.UrgentBurstFactorChromaPre[k], &mode_lib->vba.NotUrgentLatencyHidingPre[k]); + + v->cursor_bw_pre[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0] * mode_lib->vba.CursorBPP[k][0] / + 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * v->VRatioPreY[i][j][k]; } { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index 60f251cf973b..beed7adbbd43 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -177,7 +177,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .urgent_latency_pixel_data_only_us = 4.0, .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, .urgent_latency_vm_data_only_us = 4.0, - .dram_clock_change_latency_us = 11.72, + .dram_clock_change_latency_us = 34.0, .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c index e4f333d4fb54..a201dbb743d7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c @@ -215,7 +215,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_51_soc = { .urgent_latency_pixel_data_only_us = 4.0, .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, .urgent_latency_vm_data_only_us = 4.0, - .dram_clock_change_latency_us = 11.72, + .dram_clock_change_latency_us = 34, .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index a41812598ce8..8ecc972dbffd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -234,6 +234,7 @@ void dml2_init_socbb_params(struct dml2_context *dml2, const struct dc *in_dc, s out->round_trip_ping_latency_dcfclk_cycles = 106; out->smn_latency_us = 2; out->dispclk_dppclk_vco_speed_mhz = 3600; + out->pct_ideal_dram_bw_after_urgent_pixel_only = 65.0; break; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c index 0f8b3336e26d..cbd1c1f26b7a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c @@ -294,7 +294,7 @@ void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *cont context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = (unsigned int)in_ctx->v20.dml_core_ctx.mp.DCFCLKDeepSleep * 1000; context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; - if (in_ctx->v20.dml_core_ctx.ms.support.FCLKChangeSupport[in_ctx->v20.scratch.mode_support_params.out_lowest_state_idx] == dml_fclock_change_unsupported) + if (in_ctx->v20.dml_core_ctx.ms.support.FCLKChangeSupport[0] == dml_fclock_change_unsupported) context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = false; else context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = true; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 5295f52e4fc8..dcced89c07b3 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -1439,3 +1439,75 @@ void dcn35_set_long_vblank(struct pipe_ctx **pipe_ctx, } } } + +static bool should_avoid_empty_tu(struct pipe_ctx *pipe_ctx) +{ + /* Calculate average pixel count per TU, return false if under ~2.00 to + * avoid empty TUs. This is only required for DPIA tunneling as empty TUs + * are legal to generate for native DP links. Assume TU size 64 as there + * is currently no scenario where it's reprogrammed from HW default. + * MTPs have no such limitation, so this does not affect MST use cases. + */ + unsigned int pix_clk_mhz; + unsigned int symclk_mhz; + unsigned int avg_pix_per_tu_x1000; + unsigned int tu_size_bytes = 64; + struct dc_crtc_timing *timing = &pipe_ctx->stream->timing; + struct dc_link_settings *link_settings = &pipe_ctx->link_config.dp_link_settings; + const struct dc *dc = pipe_ctx->stream->link->dc; + + if (pipe_ctx->stream->link->ep_type != DISPLAY_ENDPOINT_USB4_DPIA) + return false; + + // Not necessary for MST configurations + if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) + return false; + + pix_clk_mhz = timing->pix_clk_100hz / 10000; + + // If this is true, can't block due to dynamic ODM + if (pix_clk_mhz > dc->clk_mgr->bw_params->clk_table.entries[0].dispclk_mhz) + return false; + + switch (link_settings->link_rate) { + case LINK_RATE_LOW: + symclk_mhz = 162; + break; + case LINK_RATE_HIGH: + symclk_mhz = 270; + break; + case LINK_RATE_HIGH2: + symclk_mhz = 540; + break; + case LINK_RATE_HIGH3: + symclk_mhz = 810; + break; + default: + // We shouldn't be tunneling any other rates, something is wrong + ASSERT(0); + return false; + } + + avg_pix_per_tu_x1000 = (1000 * pix_clk_mhz * tu_size_bytes) + / (symclk_mhz * link_settings->lane_count); + + // Add small empirically-decided margin to account for potential jitter + return (avg_pix_per_tu_x1000 < 2020); +} + +bool dcn35_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx) +{ + struct dc *dc = pipe_ctx->stream->ctx->dc; + + if (!is_h_timing_divisible_by_2(pipe_ctx->stream)) + return false; + + if (should_avoid_empty_tu(pipe_ctx)) + return false; + + if (dc_is_dp_signal(pipe_ctx->stream->signal) && !dc->link_srv->dp_is_128b_132b_signal(pipe_ctx) && + dc->debug.enable_dp_dig_pixel_rate_div_policy) + return true; + + return false; +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h index a731c8880d60..f0ea7d1511ae 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h @@ -95,4 +95,6 @@ void dcn35_set_static_screen_control(struct pipe_ctx **pipe_ctx, void dcn35_set_long_vblank(struct pipe_ctx **pipe_ctx, int num_pipes, uint32_t v_total_min, uint32_t v_total_max); +bool dcn35_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx); + #endif /* __DC_HWSS_DCN35_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c index df3bf77f3fb4..199781233fd5 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c @@ -158,7 +158,7 @@ static const struct hwseq_private_funcs dcn35_private_funcs = { .setup_hpo_hw_control = dcn35_setup_hpo_hw_control, .calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values, .set_pixels_per_cycle = dcn32_set_pixels_per_cycle, - .is_dp_dig_pixel_rate_div_policy = dcn32_is_dp_dig_pixel_rate_div_policy, + .is_dp_dig_pixel_rate_div_policy = dcn35_is_dp_dig_pixel_rate_div_policy, .dsc_pg_control = dcn35_dsc_pg_control, .dsc_pg_status = dcn32_dsc_pg_status, .enable_plane = dcn35_enable_plane, diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index a01d0842bf8e..d487dfcd219b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -1590,9 +1590,17 @@ static bool retrieve_link_cap(struct dc_link *link) return false; } - if (dp_is_lttpr_present(link)) + if (dp_is_lttpr_present(link)) { configure_lttpr_mode_transparent(link); + // Echo TOTAL_LTTPR_CNT back downstream + core_link_write_dpcd( + link, + DP_TOTAL_LTTPR_CNT, + &link->dpcd_caps.lttpr_caps.phy_repeater_cnt, + sizeof(link->dpcd_caps.lttpr_caps.phy_repeater_cnt)); + } + /* Read DP tunneling information. */ status = dpcd_get_tunneling_device_data(link); diff --git a/drivers/gpu/drm/amd/display/include/dpcd_defs.h b/drivers/gpu/drm/amd/display/include/dpcd_defs.h index 914f28e9f224..aee5170f5fb2 100644 --- a/drivers/gpu/drm/amd/display/include/dpcd_defs.h +++ b/drivers/gpu/drm/amd/display/include/dpcd_defs.h @@ -177,4 +177,9 @@ enum dpcd_psr_sink_states { #define DP_SINK_PR_PIXEL_DEVIATION_PER_LINE 0x379 #define DP_SINK_PR_MAX_NUMBER_OF_DEVIATION_LINE 0x37A +/* Remove once drm_dp_helper.h is updated upstream */ +#ifndef DP_TOTAL_LTTPR_CNT +#define DP_TOTAL_LTTPR_CNT 0xF000A /* 2.1 */ +#endif + #endif /* __DAL_DPCD_DEFS_H__ */ diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index 571691837200..09cbc3afd6d8 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -734,7 +734,7 @@ struct atom_gpio_pin_lut_v2_1 { struct atom_common_table_header table_header; /*the real number of this included in the structure is calcualted by using the (whole structure size - the header size)/size of atom_gpio_pin_lut */ - struct atom_gpio_pin_assignment gpio_pin[8]; + struct atom_gpio_pin_assignment gpio_pin[]; }; diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c index 6bb42d04b247..e8b6989a40f3 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c @@ -164,6 +164,8 @@ static void sumo_construct_vid_mapping_table(struct amdgpu_device *adev, for (i = 0; i < SUMO_MAX_HARDWARE_POWERLEVELS; i++) { if (table[i].ulSupportedSCLK != 0) { + if (table[i].usVoltageIndex >= SUMO_MAX_NUMBER_VOLTAGES) + continue; vid_mapping_table->entries[table[i].usVoltageIndex].vid_7bit = table[i].usVoltageID; vid_mapping_table->entries[table[i].usVoltageIndex].vid_2bit = diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 7789b313285c..e1796ecf9c05 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -324,6 +324,18 @@ static int smu_dpm_set_umsch_mm_enable(struct smu_context *smu, return ret; } +static int smu_set_mall_enable(struct smu_context *smu) +{ + int ret = 0; + + if (!smu->ppt_funcs->set_mall_enable) + return 0; + + ret = smu->ppt_funcs->set_mall_enable(smu); + + return ret; +} + /** * smu_dpm_set_power_gate - power gate/ungate the specific IP block * @@ -1791,6 +1803,7 @@ static int smu_hw_init(void *handle) smu_dpm_set_jpeg_enable(smu, true); smu_dpm_set_vpe_enable(smu, true); smu_dpm_set_umsch_mm_enable(smu, true); + smu_set_mall_enable(smu); smu_set_gfx_cgpg(smu, true); } diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 0917dec8efe3..64ccdb5f14ea 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -1395,6 +1395,11 @@ struct pptable_funcs { int (*dpm_set_umsch_mm_enable)(struct smu_context *smu, bool enable); /** + * @set_mall_enable: Init MALL power gating control. + */ + int (*set_mall_enable)(struct smu_context *smu); + + /** * @notify_rlc_state: Notify RLC power state to SMU. */ int (*notify_rlc_state)(struct smu_context *smu, bool en); diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h index c4dc5881d8df..e7f5ef49049f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h @@ -106,8 +106,8 @@ #define PPSMC_MSG_DisableLSdma 0x35 ///< Disable LSDMA #define PPSMC_MSG_SetSoftMaxVpe 0x36 ///< #define PPSMC_MSG_SetSoftMinVpe 0x37 ///< -#define PPSMC_MSG_AllocMALLCache 0x38 ///< Allocating MALL Cache -#define PPSMC_MSG_ReleaseMALLCache 0x39 ///< Releasing MALL Cache +#define PPSMC_MSG_MALLPowerController 0x38 ///< Set MALL control +#define PPSMC_MSG_MALLPowerState 0x39 ///< Enter/Exit MALL PG #define PPSMC_Message_Count 0x3A ///< Total number of PPSMC messages /** @}*/ diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index c48214e3dc8e..2e32b085824a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -272,7 +272,9 @@ __SMU_DUMMY_MAP(SetSoftMinVpe), \ __SMU_DUMMY_MAP(GetMetricsVersion), \ __SMU_DUMMY_MAP(EnableUCLKShadow), \ - __SMU_DUMMY_MAP(RmaDueToBadPageThreshold), + __SMU_DUMMY_MAP(RmaDueToBadPageThreshold), \ + __SMU_DUMMY_MAP(MALLPowerController), \ + __SMU_DUMMY_MAP(MALLPowerState), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c index e4419e1561ef..18abfbd6d059 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c @@ -52,6 +52,19 @@ #define mmMP1_SMN_C2PMSG_90 0x029a #define mmMP1_SMN_C2PMSG_90_BASE_IDX 0 +/* MALLPowerController message arguments (Defines for the Cache mode control) */ +#define SMU_MALL_PMFW_CONTROL 0 +#define SMU_MALL_DRIVER_CONTROL 1 + +/* + * MALLPowerState message arguments + * (Defines for the Allocate/Release Cache mode if in driver mode) + */ +#define SMU_MALL_EXIT_PG 0 +#define SMU_MALL_ENTER_PG 1 + +#define SMU_MALL_PG_CONFIG_DEFAULT SMU_MALL_PG_CONFIG_DRIVER_CONTROL_ALWAYS_ON + #define FEATURE_MASK(feature) (1ULL << feature) #define SMC_DPM_FEATURE ( \ FEATURE_MASK(FEATURE_CCLK_DPM_BIT) | \ @@ -66,6 +79,12 @@ FEATURE_MASK(FEATURE_GFX_DPM_BIT) | \ FEATURE_MASK(FEATURE_VPE_DPM_BIT)) +enum smu_mall_pg_config { + SMU_MALL_PG_CONFIG_PMFW_CONTROL = 0, + SMU_MALL_PG_CONFIG_DRIVER_CONTROL_ALWAYS_ON = 1, + SMU_MALL_PG_CONFIG_DRIVER_CONTROL_ALWAYS_OFF = 2, +}; + static struct cmn2asic_msg_mapping smu_v14_0_0_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetPmfwVersion, 1), @@ -113,6 +132,8 @@ static struct cmn2asic_msg_mapping smu_v14_0_0_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(PowerDownUmsch, PPSMC_MSG_PowerDownUmsch, 1), MSG_MAP(SetSoftMaxVpe, PPSMC_MSG_SetSoftMaxVpe, 1), MSG_MAP(SetSoftMinVpe, PPSMC_MSG_SetSoftMinVpe, 1), + MSG_MAP(MALLPowerController, PPSMC_MSG_MALLPowerController, 1), + MSG_MAP(MALLPowerState, PPSMC_MSG_MALLPowerState, 1), }; static struct cmn2asic_mapping smu_v14_0_0_feature_mask_map[SMU_FEATURE_COUNT] = { @@ -1423,6 +1444,57 @@ static int smu_v14_0_common_get_dpm_table(struct smu_context *smu, struct dpm_cl return 0; } +static int smu_v14_0_1_init_mall_power_gating(struct smu_context *smu, enum smu_mall_pg_config pg_config) +{ + struct amdgpu_device *adev = smu->adev; + int ret = 0; + + if (pg_config == SMU_MALL_PG_CONFIG_PMFW_CONTROL) { + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_MALLPowerController, + SMU_MALL_PMFW_CONTROL, NULL); + if (ret) { + dev_err(adev->dev, "Init MALL PMFW CONTROL Failure\n"); + return ret; + } + } else { + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_MALLPowerController, + SMU_MALL_DRIVER_CONTROL, NULL); + if (ret) { + dev_err(adev->dev, "Init MALL Driver CONTROL Failure\n"); + return ret; + } + + if (pg_config == SMU_MALL_PG_CONFIG_DRIVER_CONTROL_ALWAYS_ON) { + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_MALLPowerState, + SMU_MALL_EXIT_PG, NULL); + if (ret) { + dev_err(adev->dev, "EXIT MALL PG Failure\n"); + return ret; + } + } else if (pg_config == SMU_MALL_PG_CONFIG_DRIVER_CONTROL_ALWAYS_OFF) { + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_MALLPowerState, + SMU_MALL_ENTER_PG, NULL); + if (ret) { + dev_err(adev->dev, "Enter MALL PG Failure\n"); + return ret; + } + } + } + + return ret; +} + +static int smu_v14_0_common_set_mall_enable(struct smu_context *smu) +{ + enum smu_mall_pg_config pg_config = SMU_MALL_PG_CONFIG_DEFAULT; + int ret = 0; + + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1)) + ret = smu_v14_0_1_init_mall_power_gating(smu, pg_config); + + return ret; +} + static const struct pptable_funcs smu_v14_0_0_ppt_funcs = { .check_fw_status = smu_v14_0_check_fw_status, .check_fw_version = smu_v14_0_check_fw_version, @@ -1454,6 +1526,7 @@ static const struct pptable_funcs smu_v14_0_0_ppt_funcs = { .dpm_set_vpe_enable = smu_v14_0_0_set_vpe_enable, .dpm_set_umsch_mm_enable = smu_v14_0_0_set_umsch_mm_enable, .get_dpm_clock_table = smu_v14_0_common_get_dpm_table, + .set_mall_enable = smu_v14_0_common_set_mall_enable, }; static void smu_v14_0_0_set_smu_mailbox_registers(struct smu_context *smu) diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511.h b/drivers/gpu/drm/bridge/adv7511/adv7511.h index ea271f62b214..ec0b7f3d889c 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511.h +++ b/drivers/gpu/drm/bridge/adv7511/adv7511.h @@ -401,7 +401,7 @@ struct adv7511 { #ifdef CONFIG_DRM_I2C_ADV7511_CEC int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511); -void adv7511_cec_irq_process(struct adv7511 *adv7511, unsigned int irq1); +int adv7511_cec_irq_process(struct adv7511 *adv7511, unsigned int irq1); #else static inline int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511) { diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c b/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c index 44451a9658a3..2e9c88a2b5ed 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c @@ -119,7 +119,7 @@ static void adv7511_cec_rx(struct adv7511 *adv7511, int rx_buf) cec_received_msg(adv7511->cec_adap, &msg); } -void adv7511_cec_irq_process(struct adv7511 *adv7511, unsigned int irq1) +int adv7511_cec_irq_process(struct adv7511 *adv7511, unsigned int irq1) { unsigned int offset = adv7511->info->reg_cec_offset; const u32 irq_tx_mask = ADV7511_INT1_CEC_TX_READY | @@ -131,16 +131,19 @@ void adv7511_cec_irq_process(struct adv7511 *adv7511, unsigned int irq1) unsigned int rx_status; int rx_order[3] = { -1, -1, -1 }; int i; + int irq_status = IRQ_NONE; - if (irq1 & irq_tx_mask) + if (irq1 & irq_tx_mask) { adv_cec_tx_raw_status(adv7511, irq1); + irq_status = IRQ_HANDLED; + } if (!(irq1 & irq_rx_mask)) - return; + return irq_status; if (regmap_read(adv7511->regmap_cec, ADV7511_REG_CEC_RX_STATUS + offset, &rx_status)) - return; + return irq_status; /* * ADV7511_REG_CEC_RX_STATUS[5:0] contains the reception order of RX @@ -172,6 +175,8 @@ void adv7511_cec_irq_process(struct adv7511 *adv7511, unsigned int irq1) adv7511_cec_rx(adv7511, rx_buf); } + + return IRQ_HANDLED; } static int adv7511_cec_adap_enable(struct cec_adapter *adap, bool enable) diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index 66ccb61e2a66..c8d2c4a157b2 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -469,6 +469,8 @@ static int adv7511_irq_process(struct adv7511 *adv7511, bool process_hpd) { unsigned int irq0, irq1; int ret; + int cec_status = IRQ_NONE; + int irq_status = IRQ_NONE; ret = regmap_read(adv7511->regmap, ADV7511_REG_INT(0), &irq0); if (ret < 0) @@ -478,29 +480,31 @@ static int adv7511_irq_process(struct adv7511 *adv7511, bool process_hpd) if (ret < 0) return ret; - /* If there is no IRQ to handle, exit indicating no IRQ data */ - if (!(irq0 & (ADV7511_INT0_HPD | ADV7511_INT0_EDID_READY)) && - !(irq1 & ADV7511_INT1_DDC_ERROR)) - return -ENODATA; - regmap_write(adv7511->regmap, ADV7511_REG_INT(0), irq0); regmap_write(adv7511->regmap, ADV7511_REG_INT(1), irq1); - if (process_hpd && irq0 & ADV7511_INT0_HPD && adv7511->bridge.encoder) + if (process_hpd && irq0 & ADV7511_INT0_HPD && adv7511->bridge.encoder) { schedule_work(&adv7511->hpd_work); + irq_status = IRQ_HANDLED; + } if (irq0 & ADV7511_INT0_EDID_READY || irq1 & ADV7511_INT1_DDC_ERROR) { adv7511->edid_read = true; if (adv7511->i2c_main->irq) wake_up_all(&adv7511->wq); + irq_status = IRQ_HANDLED; } #ifdef CONFIG_DRM_I2C_ADV7511_CEC - adv7511_cec_irq_process(adv7511, irq1); + cec_status = adv7511_cec_irq_process(adv7511, irq1); #endif - return 0; + /* If there is no IRQ to handle, exit indicating no IRQ data */ + if (irq_status == IRQ_HANDLED || cec_status == IRQ_HANDLED) + return IRQ_HANDLED; + + return IRQ_NONE; } static irqreturn_t adv7511_irq_handler(int irq, void *devid) @@ -509,7 +513,7 @@ static irqreturn_t adv7511_irq_handler(int irq, void *devid) int ret; ret = adv7511_irq_process(adv7511, true); - return ret < 0 ? IRQ_NONE : IRQ_HANDLED; + return ret < 0 ? IRQ_NONE : ret; } /* ----------------------------------------------------------------------------- diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index d612133e2cf7..117237d3528b 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -524,6 +524,9 @@ struct fb_info *drm_fb_helper_alloc_info(struct drm_fb_helper *fb_helper) if (!info) return ERR_PTR(-ENOMEM); + if (!drm_leak_fbdev_smem) + info->flags |= FBINFO_HIDE_SMEM_START; + ret = fb_alloc_cmap(&info->cmap, 256, 0); if (ret) goto err_release; @@ -1860,9 +1863,6 @@ __drm_fb_helper_initial_config_and_unlock(struct drm_fb_helper *fb_helper) info = fb_helper->info; info->var.pixclock = 0; - if (!drm_leak_fbdev_smem) - info->flags |= FBINFO_HIDE_SMEM_START; - /* Need to drop locks to avoid recursive deadlock in * register_framebuffer. This is ok because the only thing left to do is * register the fbdev emulation instance in kernel_fb_helper_list. */ diff --git a/drivers/gpu/drm/drm_fbdev_dma.c b/drivers/gpu/drm/drm_fbdev_dma.c index 6c9427bb4053..13cd754af311 100644 --- a/drivers/gpu/drm/drm_fbdev_dma.c +++ b/drivers/gpu/drm/drm_fbdev_dma.c @@ -130,7 +130,10 @@ static int drm_fbdev_dma_helper_fb_probe(struct drm_fb_helper *fb_helper, info->flags |= FBINFO_READS_FAST; /* signal caching */ info->screen_size = sizes->surface_height * fb->pitches[0]; info->screen_buffer = map.vaddr; - info->fix.smem_start = page_to_phys(virt_to_page(info->screen_buffer)); + if (!(info->flags & FBINFO_HIDE_SMEM_START)) { + if (!drm_WARN_ON(dev, is_vmalloc_addr(info->screen_buffer))) + info->fix.smem_start = page_to_phys(virt_to_page(info->screen_buffer)); + } info->fix.smem_len = info->screen_size; return 0; diff --git a/drivers/gpu/drm/drm_fbdev_generic.c b/drivers/gpu/drm/drm_fbdev_generic.c index 97e579c33d84..1e200d815e1a 100644 --- a/drivers/gpu/drm/drm_fbdev_generic.c +++ b/drivers/gpu/drm/drm_fbdev_generic.c @@ -84,7 +84,8 @@ static int drm_fbdev_generic_helper_fb_probe(struct drm_fb_helper *fb_helper, sizes->surface_width, sizes->surface_height, sizes->surface_bpp); - format = drm_mode_legacy_fb_format(sizes->surface_bpp, sizes->surface_depth); + format = drm_driver_legacy_fb_format(dev, sizes->surface_bpp, + sizes->surface_depth); buffer = drm_client_framebuffer_create(client, sizes->surface_width, sizes->surface_height, format); if (IS_ERR(buffer)) diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index 638ffa4444f5..714e42b05108 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -469,14 +469,12 @@ void drm_file_update_pid(struct drm_file *filp) dev = filp->minor->dev; mutex_lock(&dev->filelist_mutex); + get_pid(pid); old = rcu_replace_pointer(filp->pid, pid, 1); mutex_unlock(&dev->filelist_mutex); - if (pid != old) { - get_pid(pid); - synchronize_rcu(); - put_pid(old); - } + synchronize_rcu(); + put_pid(old); } /** diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 2166208a961d..3860a8ce1e2d 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -420,13 +420,20 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Galaxy Book 10.6"), }, .driver_data = (void *)&lcd1280x1920_rightside_up, - }, { /* Valve Steam Deck */ + }, { /* Valve Steam Deck (Jupiter) */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Valve"), DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Jupiter"), DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "1"), }, .driver_data = (void *)&lcd800x1280_rightside_up, + }, { /* Valve Steam Deck (Galileo) */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Valve"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Galileo"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "1"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* VIOS LTH17 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "VIOS"), diff --git a/drivers/gpu/drm/gma500/cdv_intel_lvds.c b/drivers/gpu/drm/gma500/cdv_intel_lvds.c index f08a6803dc18..3adc2c9ab72d 100644 --- a/drivers/gpu/drm/gma500/cdv_intel_lvds.c +++ b/drivers/gpu/drm/gma500/cdv_intel_lvds.c @@ -311,6 +311,9 @@ static int cdv_intel_lvds_get_modes(struct drm_connector *connector) if (mode_dev->panel_fixed_mode != NULL) { struct drm_display_mode *mode = drm_mode_duplicate(dev, mode_dev->panel_fixed_mode); + if (!mode) + return 0; + drm_mode_probed_add(connector, mode); return 1; } diff --git a/drivers/gpu/drm/gma500/psb_intel_lvds.c b/drivers/gpu/drm/gma500/psb_intel_lvds.c index 8486de230ec9..8d1be94a443b 100644 --- a/drivers/gpu/drm/gma500/psb_intel_lvds.c +++ b/drivers/gpu/drm/gma500/psb_intel_lvds.c @@ -504,6 +504,9 @@ static int psb_intel_lvds_get_modes(struct drm_connector *connector) if (mode_dev->panel_fixed_mode != NULL) { struct drm_display_mode *mode = drm_mode_duplicate(dev, mode_dev->panel_fixed_mode); + if (!mode) + return 0; + drm_mode_probed_add(connector, mode); return 1; } diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 3c3fc53376ce..6bff169fa8d4 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -2088,6 +2088,9 @@ icl_program_mg_dp_mode(struct intel_digital_port *dig_port, u32 ln0, ln1, pin_assignment; u8 width; + if (DISPLAY_VER(dev_priv) >= 14) + return; + if (!intel_encoder_is_tc(&dig_port->base) || intel_tc_port_in_tbt_alt_mode(dig_port)) return; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index e05e25cd4a94..5b3b6ae1e3d7 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -442,6 +442,10 @@ bool intel_dp_has_bigjoiner(struct intel_dp *intel_dp) struct intel_encoder *encoder = &intel_dig_port->base; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + /* eDP MSO is not compatible with joiner */ + if (intel_dp->mso_link_count) + return false; + return DISPLAY_VER(dev_priv) >= 12 || (DISPLAY_VER(dev_priv) == 11 && encoder->port != PORT_A); diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c index 40371b8a9bbb..93bc1cc1ee7e 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c @@ -298,6 +298,7 @@ void i915_vma_revoke_fence(struct i915_vma *vma) return; GEM_BUG_ON(fence->vma != vma); + i915_active_wait(&fence->active); GEM_BUG_ON(!i915_active_is_idle(&fence->active)); GEM_BUG_ON(atomic_read(&fence->pin_count)); diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 17a5cca007e2..4bd0baa2a4f5 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -250,29 +250,20 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) if (ret) goto free_drm; ret = meson_canvas_alloc(priv->canvas, &priv->canvas_id_vd1_0); - if (ret) { - meson_canvas_free(priv->canvas, priv->canvas_id_osd1); - goto free_drm; - } + if (ret) + goto free_canvas_osd1; ret = meson_canvas_alloc(priv->canvas, &priv->canvas_id_vd1_1); - if (ret) { - meson_canvas_free(priv->canvas, priv->canvas_id_osd1); - meson_canvas_free(priv->canvas, priv->canvas_id_vd1_0); - goto free_drm; - } + if (ret) + goto free_canvas_vd1_0; ret = meson_canvas_alloc(priv->canvas, &priv->canvas_id_vd1_2); - if (ret) { - meson_canvas_free(priv->canvas, priv->canvas_id_osd1); - meson_canvas_free(priv->canvas, priv->canvas_id_vd1_0); - meson_canvas_free(priv->canvas, priv->canvas_id_vd1_1); - goto free_drm; - } + if (ret) + goto free_canvas_vd1_1; priv->vsync_irq = platform_get_irq(pdev, 0); ret = drm_vblank_init(drm, 1); if (ret) - goto free_drm; + goto free_canvas_vd1_2; /* Assign limits per soc revision/package */ for (i = 0 ; i < ARRAY_SIZE(meson_drm_soc_attrs) ; ++i) { @@ -288,11 +279,11 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) */ ret = drm_aperture_remove_framebuffers(&meson_driver); if (ret) - goto free_drm; + goto free_canvas_vd1_2; ret = drmm_mode_config_init(drm); if (ret) - goto free_drm; + goto free_canvas_vd1_2; drm->mode_config.max_width = 3840; drm->mode_config.max_height = 2160; drm->mode_config.funcs = &meson_mode_config_funcs; @@ -307,7 +298,7 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) if (priv->afbcd.ops) { ret = priv->afbcd.ops->init(priv); if (ret) - goto free_drm; + goto free_canvas_vd1_2; } /* Encoder Initialization */ @@ -371,6 +362,14 @@ uninstall_irq: exit_afbcd: if (priv->afbcd.ops) priv->afbcd.ops->exit(priv); +free_canvas_vd1_2: + meson_canvas_free(priv->canvas, priv->canvas_id_vd1_2); +free_canvas_vd1_1: + meson_canvas_free(priv->canvas, priv->canvas_id_vd1_1); +free_canvas_vd1_0: + meson_canvas_free(priv->canvas, priv->canvas_id_vd1_0); +free_canvas_osd1: + meson_canvas_free(priv->canvas, priv->canvas_id_osd1); free_drm: drm_dev_put(drm); diff --git a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c index 670c9739e5e1..2033214c4b78 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c +++ b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c @@ -209,6 +209,8 @@ static int nv17_tv_get_ld_modes(struct drm_encoder *encoder, struct drm_display_mode *mode; mode = drm_mode_duplicate(encoder->dev, tv_mode); + if (!mode) + continue; mode->clock = tv_norm->tv_enc_mode.vrefresh * mode->htotal / 1000 * @@ -258,6 +260,8 @@ static int nv17_tv_get_hd_modes(struct drm_encoder *encoder, if (modes[i].hdisplay == output_mode->hdisplay && modes[i].vdisplay == output_mode->vdisplay) { mode = drm_mode_duplicate(encoder->dev, output_mode); + if (!mode) + continue; mode->type |= DRM_MODE_TYPE_PREFERRED; } else { @@ -265,6 +269,8 @@ static int nv17_tv_get_hd_modes(struct drm_encoder *encoder, modes[i].vdisplay, 60, false, (output_mode->flags & DRM_MODE_FLAG_INTERLACE), false); + if (!mode) + continue; } /* CVT modes are sometimes unsuitable... */ diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 856b3ef5edb8..0c71d761d378 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -1001,6 +1001,9 @@ nouveau_connector_get_modes(struct drm_connector *connector) struct drm_display_mode *mode; mode = drm_mode_duplicate(dev, nv_connector->native_mode); + if (!mode) + return 0; + drm_mode_probed_add(connector, mode); ret = 1; } diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index dcb6d0b6ced0..c8cdc8356c58 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -2752,6 +2752,7 @@ static const struct display_timing koe_tx26d202vm0bwa_timing = { .vfront_porch = { 3, 5, 10 }, .vback_porch = { 2, 5, 10 }, .vsync_len = { 5, 5, 5 }, + .flags = DISPLAY_FLAGS_DE_HIGH, }; static const struct panel_desc koe_tx26d202vm0bwa = { diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index b8a84f26b3ef..b5e7b919f241 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -86,15 +86,15 @@ panthor_get_uobj_array(const struct drm_panthor_obj_array *in, u32 min_stride, int ret = 0; void *out_alloc; + if (!in->count) + return NULL; + /* User stride must be at least the minimum object size, otherwise it might * lack useful information. */ if (in->stride < min_stride) return ERR_PTR(-EINVAL); - if (!in->count) - return NULL; - out_alloc = kvmalloc_array(in->count, obj_size, GFP_KERNEL); if (!out_alloc) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 79ffcbc41d78..9a0ff48f7061 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -459,6 +459,16 @@ struct panthor_queue { atomic64_t seqno; /** + * @last_fence: Fence of the last submitted job. + * + * We return this fence when we get an empty command stream. + * This way, we are guaranteed that all earlier jobs have completed + * when drm_sched_job::s_fence::finished without having to feed + * the CS ring buffer with a dummy job that only signals the fence. + */ + struct dma_fence *last_fence; + + /** * @in_flight_jobs: List containing all in-flight jobs. * * Used to keep track and signal panthor_job::done_fence when the @@ -829,6 +839,9 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue * panthor_kernel_bo_destroy(queue->ringbuf); panthor_kernel_bo_destroy(queue->iface.mem); + /* Release the last_fence we were holding, if any. */ + dma_fence_put(queue->fence_ctx.last_fence); + kfree(queue); } @@ -2784,9 +2797,6 @@ static void group_sync_upd_work(struct work_struct *work) spin_lock(&queue->fence_ctx.lock); list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) { - if (!job->call_info.size) - continue; - if (syncobj->seqno < job->done_fence->seqno) break; @@ -2865,11 +2875,14 @@ queue_run_job(struct drm_sched_job *sched_job) static_assert(sizeof(call_instrs) % 64 == 0, "call_instrs is not aligned on a cacheline"); - /* Stream size is zero, nothing to do => return a NULL fence and let - * drm_sched signal the parent. + /* Stream size is zero, nothing to do except making sure all previously + * submitted jobs are done before we signal the + * drm_sched_job::s_fence::finished fence. */ - if (!job->call_info.size) - return NULL; + if (!job->call_info.size) { + job->done_fence = dma_fence_get(queue->fence_ctx.last_fence); + return dma_fence_get(job->done_fence); + } ret = pm_runtime_resume_and_get(ptdev->base.dev); if (drm_WARN_ON(&ptdev->base, ret)) @@ -2928,6 +2941,10 @@ queue_run_job(struct drm_sched_job *sched_job) } } + /* Update the last fence. */ + dma_fence_put(queue->fence_ctx.last_fence); + queue->fence_ctx.last_fence = dma_fence_get(job->done_fence); + done_fence = dma_fence_get(job->done_fence); out_unlock: @@ -3378,10 +3395,15 @@ panthor_job_create(struct panthor_file *pfile, goto err_put_job; } - job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL); - if (!job->done_fence) { - ret = -ENOMEM; - goto err_put_job; + /* Empty command streams don't need a fence, they'll pick the one from + * the previously submitted job. + */ + if (job->call_info.size) { + job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL); + if (!job->done_fence) { + ret = -ENOMEM; + goto err_put_job; + } } ret = drm_sched_job_init(&job->base, diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 2ef201a072f1..e66a230331ee 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -642,7 +642,7 @@ static void radeon_gem_va_update_vm(struct radeon_device *rdev, if (r) goto error_unlock; - if (bo_va->it.start) + if (bo_va->it.start && bo_va->bo) r = radeon_vm_bo_update(rdev, bo_va, bo_va->bo->tbo.resource); error_unlock: diff --git a/drivers/gpu/drm/radeon/sumo_dpm.c b/drivers/gpu/drm/radeon/sumo_dpm.c index 21d27e6235f3..b11f7c5bbcbe 100644 --- a/drivers/gpu/drm/radeon/sumo_dpm.c +++ b/drivers/gpu/drm/radeon/sumo_dpm.c @@ -1619,6 +1619,8 @@ void sumo_construct_vid_mapping_table(struct radeon_device *rdev, for (i = 0; i < SUMO_MAX_HARDWARE_POWERLEVELS; i++) { if (table[i].ulSupportedSCLK != 0) { + if (table[i].usVoltageIndex >= SUMO_MAX_NUMBER_VOLTAGES) + continue; vid_mapping_table->entries[table[i].usVoltageIndex].vid_7bit = table[i].usVoltageID; vid_mapping_table->entries[table[i].usVoltageIndex].vid_2bit = diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 6396dece0db1..2427be8bc97f 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -346,6 +346,7 @@ static void ttm_bo_release(struct kref *kref) if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP) || (want_init_on_free() && (bo->ttm != NULL)) || + bo->type == ttm_bo_type_sg || !dma_resv_trylock(bo->base.resv)) { /* The BO is not idle, resurrect it for delayed destroy */ ttm_bo_flush_all_fences(bo); diff --git a/drivers/gpu/drm/vmwgfx/Kconfig b/drivers/gpu/drm/vmwgfx/Kconfig index faddae3d6ac2..6f1ac940cbae 100644 --- a/drivers/gpu/drm/vmwgfx/Kconfig +++ b/drivers/gpu/drm/vmwgfx/Kconfig @@ -2,7 +2,7 @@ config DRM_VMWGFX tristate "DRM driver for VMware Virtual GPU" depends on DRM && PCI && MMU - depends on X86 || ARM64 + depends on (X86 && HYPERVISOR_GUEST) || ARM64 select DRM_TTM select DRM_TTM_HELPER select MAPPING_DIRTY_HELPERS diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index d46f87a039f2..b3d3c065dd9d 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -159,12 +159,16 @@ void intel_hdcp_gsc_fini(struct xe_device *xe) { struct intel_hdcp_gsc_message *hdcp_message = xe->display.hdcp.hdcp_message; + struct i915_hdcp_arbiter *arb = xe->display.hdcp.arbiter; - if (!hdcp_message) - return; + if (hdcp_message) { + xe_bo_unpin_map_no_vm(hdcp_message->hdcp_bo); + kfree(hdcp_message); + xe->display.hdcp.hdcp_message = NULL; + } - xe_bo_unpin_map_no_vm(hdcp_message->hdcp_bo); - kfree(hdcp_message); + kfree(arb); + xe->display.hdcp.arbiter = NULL; } static int xe_gsc_send_sync(struct xe_device *xe, diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index bc1f794e3e61..b6f3a43d637f 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -317,7 +317,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, struct xe_device *xe = xe_bo_device(bo); struct xe_ttm_tt *tt; unsigned long extra_pages; - enum ttm_caching caching; + enum ttm_caching caching = ttm_cached; int err; tt = kzalloc(sizeof(*tt), GFP_KERNEL); @@ -331,26 +331,35 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size), PAGE_SIZE); - switch (bo->cpu_caching) { - case DRM_XE_GEM_CPU_CACHING_WC: - caching = ttm_write_combined; - break; - default: - caching = ttm_cached; - break; - } - - WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching); - /* - * Display scanout is always non-coherent with the CPU cache. - * - * For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and - * require a CPU:WC mapping. + * DGFX system memory is always WB / ttm_cached, since + * other caching modes are only supported on x86. DGFX + * GPU system memory accesses are always coherent with the + * CPU. */ - if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) || - (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_FLAG_PAGETABLE)) - caching = ttm_write_combined; + if (!IS_DGFX(xe)) { + switch (bo->cpu_caching) { + case DRM_XE_GEM_CPU_CACHING_WC: + caching = ttm_write_combined; + break; + default: + caching = ttm_cached; + break; + } + + WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching); + + /* + * Display scanout is always non-coherent with the CPU cache. + * + * For Xe_LPG and beyond, PPGTT PTE lookups are also + * non-coherent and require a CPU:WC mapping. + */ + if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) || + (xe->info.graphics_verx100 >= 1270 && + bo->flags & XE_BO_FLAG_PAGETABLE)) + caching = ttm_write_combined; + } err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages); if (err) { diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 86422e113d39..10450f1fbbde 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -66,7 +66,8 @@ struct xe_bo { /** * @cpu_caching: CPU caching mode. Currently only used for userspace - * objects. + * objects. Exceptions are system memory on DGFX, which is always + * WB. */ u16 cpu_caching; diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 577bd7043740..0443e07880a0 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -342,7 +342,7 @@ static void init_steering_oaddrm(struct xe_gt *gt) else gt->steering[OADDRM].group_target = 1; - gt->steering[DSS].instance_target = 0; /* unused */ + gt->steering[OADDRM].instance_target = 0; /* unused */ } static void init_steering_sqidi_psmi(struct xe_gt *gt) @@ -357,8 +357,8 @@ static void init_steering_sqidi_psmi(struct xe_gt *gt) static void init_steering_inst0(struct xe_gt *gt) { - gt->steering[DSS].group_target = 0; /* unused */ - gt->steering[DSS].instance_target = 0; /* unused */ + gt->steering[INSTANCE0].group_target = 0; /* unused */ + gt->steering[INSTANCE0].instance_target = 0; /* unused */ } static const struct { diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 240e7a4bbff1..5faca4fc2fef 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -631,8 +631,6 @@ int xe_guc_enable_communication(struct xe_guc *guc) struct xe_device *xe = guc_to_xe(guc); int err; - guc_enable_irq(guc); - if (IS_SRIOV_VF(xe) && xe_device_has_memirq(xe)) { struct xe_gt *gt = guc_to_gt(guc); struct xe_tile *tile = gt_to_tile(gt); @@ -640,6 +638,8 @@ int xe_guc_enable_communication(struct xe_guc *guc) err = xe_memirq_init_guc(&tile->sriov.vf.memirq, guc); if (err) return err; + } else { + guc_enable_irq(guc); } xe_mmio_rmw32(guc_to_gt(guc), PMINTRMSK, diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 65e5a3f4c340..198f5c2189cb 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1334,7 +1334,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, GFP_KERNEL, true, 0); if (IS_ERR(sa_bo)) { err = PTR_ERR(sa_bo); - goto err; + goto err_bb; } ppgtt_ofs = NUM_KERNEL_PDE + @@ -1385,7 +1385,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, update_idx); if (IS_ERR(job)) { err = PTR_ERR(job); - goto err_bb; + goto err_sa; } /* Wait on BO move */ @@ -1434,12 +1434,12 @@ xe_migrate_update_pgtables(struct xe_migrate *m, err_job: xe_sched_job_put(job); +err_sa: + drm_suballoc_free(sa_bo, NULL); err_bb: if (!q) mutex_unlock(&m->job_mutex); xe_bb_free(bb, NULL); -err: - drm_suballoc_free(sa_bo, NULL); return ERR_PTR(err); } diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index a8ad728354cb..e0d676c74f14 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -45,8 +45,8 @@ int hv_init(void) * This involves a hypercall. */ int hv_post_message(union hv_connection_id connection_id, - enum hv_message_type message_type, - void *payload, size_t payload_size) + enum hv_message_type message_type, + void *payload, size_t payload_size) { struct hv_input_post_message *aligned_msg; unsigned long flags; @@ -86,7 +86,7 @@ int hv_post_message(union hv_connection_id connection_id, status = HV_STATUS_INVALID_PARAMETER; } else { status = hv_do_hypercall(HVCALL_POST_MESSAGE, - aligned_msg, NULL); + aligned_msg, NULL); } local_irq_restore(flags); @@ -111,7 +111,7 @@ int hv_synic_alloc(void) hv_context.hv_numa_map = kcalloc(nr_node_ids, sizeof(struct cpumask), GFP_KERNEL); - if (hv_context.hv_numa_map == NULL) { + if (!hv_context.hv_numa_map) { pr_err("Unable to allocate NUMA map\n"); goto err; } @@ -120,11 +120,11 @@ int hv_synic_alloc(void) hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu); tasklet_init(&hv_cpu->msg_dpc, - vmbus_on_msg_dpc, (unsigned long) hv_cpu); + vmbus_on_msg_dpc, (unsigned long)hv_cpu); if (ms_hyperv.paravisor_present && hv_isolation_type_tdx()) { hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC); - if (hv_cpu->post_msg_page == NULL) { + if (!hv_cpu->post_msg_page) { pr_err("Unable to allocate post msg page\n"); goto err; } @@ -147,14 +147,14 @@ int hv_synic_alloc(void) if (!ms_hyperv.paravisor_present && !hv_root_partition) { hv_cpu->synic_message_page = (void *)get_zeroed_page(GFP_ATOMIC); - if (hv_cpu->synic_message_page == NULL) { + if (!hv_cpu->synic_message_page) { pr_err("Unable to allocate SYNIC message page\n"); goto err; } hv_cpu->synic_event_page = (void *)get_zeroed_page(GFP_ATOMIC); - if (hv_cpu->synic_event_page == NULL) { + if (!hv_cpu->synic_event_page) { pr_err("Unable to allocate SYNIC event page\n"); free_page((unsigned long)hv_cpu->synic_message_page); @@ -203,14 +203,13 @@ err: return ret; } - void hv_synic_free(void) { int cpu, ret; for_each_present_cpu(cpu) { - struct hv_per_cpu_context *hv_cpu - = per_cpu_ptr(hv_context.cpu_context, cpu); + struct hv_per_cpu_context *hv_cpu = + per_cpu_ptr(hv_context.cpu_context, cpu); /* It's better to leak the page if the encryption fails. */ if (ms_hyperv.paravisor_present && hv_isolation_type_tdx()) { @@ -262,8 +261,8 @@ void hv_synic_free(void) */ void hv_synic_enable_regs(unsigned int cpu) { - struct hv_per_cpu_context *hv_cpu - = per_cpu_ptr(hv_context.cpu_context, cpu); + struct hv_per_cpu_context *hv_cpu = + per_cpu_ptr(hv_context.cpu_context, cpu); union hv_synic_simp simp; union hv_synic_siefp siefp; union hv_synic_sint shared_sint; @@ -277,8 +276,8 @@ void hv_synic_enable_regs(unsigned int cpu) /* Mask out vTOM bit. ioremap_cache() maps decrypted */ u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) & ~ms_hyperv.shared_gpa_boundary; - hv_cpu->synic_message_page - = (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE); + hv_cpu->synic_message_page = + (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE); if (!hv_cpu->synic_message_page) pr_err("Fail to map synic message page.\n"); } else { @@ -296,8 +295,8 @@ void hv_synic_enable_regs(unsigned int cpu) /* Mask out vTOM bit. ioremap_cache() maps decrypted */ u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) & ~ms_hyperv.shared_gpa_boundary; - hv_cpu->synic_event_page - = (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE); + hv_cpu->synic_event_page = + (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE); if (!hv_cpu->synic_event_page) pr_err("Fail to map synic event page.\n"); } else { @@ -348,8 +347,8 @@ int hv_synic_init(unsigned int cpu) */ void hv_synic_disable_regs(unsigned int cpu) { - struct hv_per_cpu_context *hv_cpu - = per_cpu_ptr(hv_context.cpu_context, cpu); + struct hv_per_cpu_context *hv_cpu = + per_cpu_ptr(hv_context.cpu_context, cpu); union hv_synic_sint shared_sint; union hv_synic_simp simp; union hv_synic_siefp siefp; diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index e000fa3b9f97..0e7427c2baf5 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -25,6 +25,7 @@ #include <linux/notifier.h> #include <linux/percpu_counter.h> #include <linux/page_reporting.h> +#include <linux/sizes.h> #include <linux/hyperv.h> #include <asm/hyperv-tlfs.h> @@ -41,8 +42,6 @@ * Begin protocol definitions. */ - - /* * Protocol versions. The low word is the minor version, the high word the major * version. @@ -71,8 +70,6 @@ enum { DYNMEM_PROTOCOL_VERSION_CURRENT = DYNMEM_PROTOCOL_VERSION_WIN10 }; - - /* * Message Types */ @@ -101,7 +98,6 @@ enum dm_message_type { DM_VERSION_1_MAX = 12 }; - /* * Structures defining the dynamic memory management * protocol. @@ -115,7 +111,6 @@ union dm_version { __u32 version; } __packed; - union dm_caps { struct { __u64 balloon:1; @@ -148,8 +143,6 @@ union dm_mem_page_range { __u64 page_range; } __packed; - - /* * The header for all dynamic memory messages: * @@ -174,7 +167,6 @@ struct dm_message { __u8 data[]; /* enclosed message */ } __packed; - /* * Specific message types supporting the dynamic memory protocol. */ @@ -271,7 +263,6 @@ struct dm_status { __u32 io_diff; } __packed; - /* * Message to ask the guest to allocate memory - balloon up message. * This message is sent from the host to the guest. The guest may not be @@ -286,14 +277,13 @@ struct dm_balloon { __u32 reservedz; } __packed; - /* * Balloon response message; this message is sent from the guest * to the host in response to the balloon message. * * reservedz: Reserved; must be set to zero. * more_pages: If FALSE, this is the last message of the transaction. - * if TRUE there will atleast one more message from the guest. + * if TRUE there will be at least one more message from the guest. * * range_count: The number of ranges in the range array. * @@ -314,7 +304,7 @@ struct dm_balloon_response { * to the guest to give guest more memory. * * more_pages: If FALSE, this is the last message of the transaction. - * if TRUE there will atleast one more message from the guest. + * if TRUE there will be at least one more message from the guest. * * reservedz: Reserved; must be set to zero. * @@ -342,7 +332,6 @@ struct dm_unballoon_response { struct dm_header hdr; } __packed; - /* * Hot add request message. Message sent from the host to the guest. * @@ -390,7 +379,6 @@ enum dm_info_type { MAX_INFO_TYPE }; - /* * Header for the information message. */ @@ -425,11 +413,11 @@ struct dm_info_msg { * The range start_pfn : end_pfn specifies the range * that the host has asked us to hot add. The range * start_pfn : ha_end_pfn specifies the range that we have - * currently hot added. We hot add in multiples of 128M - * chunks; it is possible that we may not be able to bring - * online all the pages in the region. The range + * currently hot added. We hot add in chunks equal to the + * memory block size; it is possible that we may not be able + * to bring online all the pages in the region. The range * covered_start_pfn:covered_end_pfn defines the pages that can - * be brough online. + * be brought online. */ struct hv_hotadd_state { @@ -480,10 +468,10 @@ static unsigned long last_post_time; static int hv_hypercall_multi_failure; -module_param(hot_add, bool, (S_IRUGO | S_IWUSR)); +module_param(hot_add, bool, 0644); MODULE_PARM_DESC(hot_add, "If set attempt memory hot_add"); -module_param(pressure_report_delay, uint, (S_IRUGO | S_IWUSR)); +module_param(pressure_report_delay, uint, 0644); MODULE_PARM_DESC(pressure_report_delay, "Delay in secs in reporting pressure"); static atomic_t trans_id = ATOMIC_INIT(0); @@ -502,11 +490,13 @@ enum hv_dm_state { DM_INIT_ERROR }; - static __u8 recv_buffer[HV_HYP_PAGE_SIZE]; static __u8 balloon_up_send_buffer[HV_HYP_PAGE_SIZE]; + +static unsigned long ha_pages_in_chunk; +#define HA_BYTES_IN_CHUNK (ha_pages_in_chunk << PAGE_SHIFT) + #define PAGES_IN_2M (2 * 1024 * 1024 / PAGE_SIZE) -#define HA_CHUNK (128 * 1024 * 1024 / PAGE_SIZE) struct hv_dynmem_device { struct hv_device *dev; @@ -595,12 +585,12 @@ static inline bool has_pfn_is_backed(struct hv_hotadd_state *has, struct hv_hotadd_gap *gap; /* The page is not backed. */ - if ((pfn < has->covered_start_pfn) || (pfn >= has->covered_end_pfn)) + if (pfn < has->covered_start_pfn || pfn >= has->covered_end_pfn) return false; /* Check for gaps. */ list_for_each_entry(gap, &has->gap_list, list) { - if ((pfn >= gap->start_pfn) && (pfn < gap->end_pfn)) + if (pfn >= gap->start_pfn && pfn < gap->end_pfn) return false; } @@ -724,28 +714,21 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, unsigned long processed_pfn; unsigned long total_pfn = pfn_count; - for (i = 0; i < (size/HA_CHUNK); i++) { - start_pfn = start + (i * HA_CHUNK); + for (i = 0; i < (size/ha_pages_in_chunk); i++) { + start_pfn = start + (i * ha_pages_in_chunk); scoped_guard(spinlock_irqsave, &dm_device.ha_lock) { - has->ha_end_pfn += HA_CHUNK; - - if (total_pfn > HA_CHUNK) { - processed_pfn = HA_CHUNK; - total_pfn -= HA_CHUNK; - } else { - processed_pfn = total_pfn; - total_pfn = 0; - } - - has->covered_end_pfn += processed_pfn; + has->ha_end_pfn += ha_pages_in_chunk; + processed_pfn = umin(total_pfn, ha_pages_in_chunk); + total_pfn -= processed_pfn; + has->covered_end_pfn += processed_pfn; } reinit_completion(&dm_device.ol_waitevent); nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn)); ret = add_memory(nid, PFN_PHYS((start_pfn)), - (HA_CHUNK << PAGE_SHIFT), MHP_MERGE_RESOURCE); + HA_BYTES_IN_CHUNK, MHP_MERGE_RESOURCE); if (ret) { pr_err("hot_add memory failed error is %d\n", ret); @@ -760,7 +743,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, do_hot_add = false; } scoped_guard(spinlock_irqsave, &dm_device.ha_lock) { - has->ha_end_pfn -= HA_CHUNK; + has->ha_end_pfn -= ha_pages_in_chunk; has->covered_end_pfn -= processed_pfn; } break; @@ -787,8 +770,8 @@ static void hv_online_page(struct page *pg, unsigned int order) guard(spinlock_irqsave)(&dm_device.ha_lock); list_for_each_entry(has, &dm_device.ha_region_list, list) { /* The page belongs to a different HAS. */ - if ((pfn < has->start_pfn) || - (pfn + (1UL << order) > has->end_pfn)) + if (pfn < has->start_pfn || + (pfn + (1UL << order) > has->end_pfn)) continue; hv_bring_pgs_online(has, pfn, 1UL << order); @@ -800,7 +783,7 @@ static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt) { struct hv_hotadd_state *has; struct hv_hotadd_gap *gap; - unsigned long residual, new_inc; + unsigned long residual; int ret = 0; guard(spinlock_irqsave)(&dm_device.ha_lock); @@ -836,15 +819,9 @@ static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt) * our current limit; extend it. */ if ((start_pfn + pfn_cnt) > has->end_pfn) { + /* Extend the region by multiples of ha_pages_in_chunk */ residual = (start_pfn + pfn_cnt - has->end_pfn); - /* - * Extend the region by multiples of HA_CHUNK. - */ - new_inc = (residual / HA_CHUNK) * HA_CHUNK; - if (residual % HA_CHUNK) - new_inc += HA_CHUNK; - - has->end_pfn += new_inc; + has->end_pfn += ALIGN(residual, ha_pages_in_chunk); } ret = 1; @@ -855,7 +832,7 @@ static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt) } static unsigned long handle_pg_range(unsigned long pg_start, - unsigned long pg_count) + unsigned long pg_count) { unsigned long start_pfn = pg_start; unsigned long pfn_cnt = pg_count; @@ -866,7 +843,7 @@ static unsigned long handle_pg_range(unsigned long pg_start, unsigned long res = 0, flags; pr_debug("Hot adding %lu pages starting at pfn 0x%lx.\n", pg_count, - pg_start); + pg_start); spin_lock_irqsave(&dm_device.ha_lock, flags); list_for_each_entry(has, &dm_device.ha_region_list, list) { @@ -902,22 +879,19 @@ static unsigned long handle_pg_range(unsigned long pg_start, if (start_pfn > has->start_pfn && online_section_nr(pfn_to_section_nr(start_pfn))) hv_bring_pgs_online(has, start_pfn, pgs_ol); - } - if ((has->ha_end_pfn < has->end_pfn) && (pfn_cnt > 0)) { + if (has->ha_end_pfn < has->end_pfn && pfn_cnt > 0) { /* * We have some residual hot add range * that needs to be hot added; hot add * it now. Hot add a multiple of - * HA_CHUNK that fully covers the pages + * ha_pages_in_chunk that fully covers the pages * we have. */ size = (has->end_pfn - has->ha_end_pfn); if (pfn_cnt <= size) { - size = ((pfn_cnt / HA_CHUNK) * HA_CHUNK); - if (pfn_cnt % HA_CHUNK) - size += HA_CHUNK; + size = ALIGN(pfn_cnt, ha_pages_in_chunk); } else { pfn_cnt = size; } @@ -1010,10 +984,7 @@ static void hot_add_req(struct work_struct *dummy) rg_start = dm->ha_wrk.ha_region_range.finfo.start_page; rg_sz = dm->ha_wrk.ha_region_range.finfo.page_cnt; - if ((rg_start == 0) && (!dm->host_specified_ha_region)) { - unsigned long region_size; - unsigned long region_start; - + if (rg_start == 0 && !dm->host_specified_ha_region) { /* * The host has not specified the hot-add region. * Based on the hot-add page range being specified, @@ -1021,19 +992,13 @@ static void hot_add_req(struct work_struct *dummy) * that need to be hot-added while ensuring the alignment * and size requirements of Linux as it relates to hot-add. */ - region_size = (pfn_cnt / HA_CHUNK) * HA_CHUNK; - if (pfn_cnt % HA_CHUNK) - region_size += HA_CHUNK; - - region_start = (pg_start / HA_CHUNK) * HA_CHUNK; - - rg_start = region_start; - rg_sz = region_size; + rg_start = ALIGN_DOWN(pg_start, ha_pages_in_chunk); + rg_sz = ALIGN(pfn_cnt, ha_pages_in_chunk); } if (do_hot_add) resp.page_count = process_hot_add(pg_start, pfn_cnt, - rg_start, rg_sz); + rg_start, rg_sz); dm->num_pages_added += resp.page_count; #endif @@ -1211,11 +1176,10 @@ static void post_status(struct hv_dynmem_device *dm) sizeof(struct dm_status), (unsigned long)NULL, VM_PKT_DATA_INBAND, 0); - } static void free_balloon_pages(struct hv_dynmem_device *dm, - union dm_mem_page_range *range_array) + union dm_mem_page_range *range_array) { int num_pages = range_array->finfo.page_cnt; __u64 start_frame = range_array->finfo.start_page; @@ -1231,8 +1195,6 @@ static void free_balloon_pages(struct hv_dynmem_device *dm, } } - - static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm, unsigned int num_pages, struct dm_balloon_response *bl_resp, @@ -1278,7 +1240,6 @@ static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm, page_to_pfn(pg); bl_resp->range_array[i].finfo.page_cnt = alloc_unit; bl_resp->hdr.size += sizeof(union dm_mem_page_range); - } return i * alloc_unit; @@ -1332,7 +1293,7 @@ static void balloon_up(struct work_struct *dummy) if (num_ballooned == 0 || num_ballooned == num_pages) { pr_debug("Ballooned %u out of %u requested pages.\n", - num_pages, dm_device.balloon_wrk.num_pages); + num_pages, dm_device.balloon_wrk.num_pages); bl_resp->more_pages = 0; done = true; @@ -1366,16 +1327,15 @@ static void balloon_up(struct work_struct *dummy) for (i = 0; i < bl_resp->range_count; i++) free_balloon_pages(&dm_device, - &bl_resp->range_array[i]); + &bl_resp->range_array[i]); done = true; } } - } static void balloon_down(struct hv_dynmem_device *dm, - struct dm_unballoon_request *req) + struct dm_unballoon_request *req) { union dm_mem_page_range *range_array = req->range_array; int range_count = req->range_count; @@ -1389,7 +1349,7 @@ static void balloon_down(struct hv_dynmem_device *dm, } pr_debug("Freed %u ballooned pages.\n", - prev_pages_ballooned - dm->num_pages_ballooned); + prev_pages_ballooned - dm->num_pages_ballooned); if (req->more_pages == 1) return; @@ -1414,8 +1374,7 @@ static int dm_thread_func(void *dm_dev) struct hv_dynmem_device *dm = dm_dev; while (!kthread_should_stop()) { - wait_for_completion_interruptible_timeout( - &dm_device.config_event, 1*HZ); + wait_for_completion_interruptible_timeout(&dm_device.config_event, 1 * HZ); /* * The host expects us to post information on the memory * pressure every second. @@ -1439,9 +1398,8 @@ static int dm_thread_func(void *dm_dev) return 0; } - static void version_resp(struct hv_dynmem_device *dm, - struct dm_version_response *vresp) + struct dm_version_response *vresp) { struct dm_version_request version_req; int ret; @@ -1502,7 +1460,7 @@ version_error: } static void cap_resp(struct hv_dynmem_device *dm, - struct dm_capabilities_resp_msg *cap_resp) + struct dm_capabilities_resp_msg *cap_resp) { if (!cap_resp->is_accepted) { pr_err("Capabilities not accepted by host\n"); @@ -1535,7 +1493,7 @@ static void balloon_onchannelcallback(void *context) switch (dm_hdr->type) { case DM_VERSION_RESPONSE: version_resp(dm, - (struct dm_version_response *)dm_msg); + (struct dm_version_response *)dm_msg); break; case DM_CAPABILITIES_RESPONSE: @@ -1565,7 +1523,7 @@ static void balloon_onchannelcallback(void *context) dm->state = DM_BALLOON_DOWN; balloon_down(dm, - (struct dm_unballoon_request *)recv_buffer); + (struct dm_unballoon_request *)recv_buffer); break; case DM_MEM_HOT_ADD_REQUEST: @@ -1603,17 +1561,15 @@ static void balloon_onchannelcallback(void *context) default: pr_warn_ratelimited("Unhandled message: type: %d\n", dm_hdr->type); - } } - } #define HV_LARGE_REPORTING_ORDER 9 #define HV_LARGE_REPORTING_LEN (HV_HYP_PAGE_SIZE << \ HV_LARGE_REPORTING_ORDER) static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info, - struct scatterlist *sgl, unsigned int nents) + struct scatterlist *sgl, unsigned int nents) { unsigned long flags; struct hv_memory_hint *hint; @@ -1648,7 +1604,7 @@ static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info, */ /* page reporting for pages 2MB or higher */ - if (order >= HV_LARGE_REPORTING_ORDER ) { + if (order >= HV_LARGE_REPORTING_ORDER) { range->page.largepage = 1; range->page_size = HV_GPA_PAGE_RANGE_PAGE_SIZE_2MB; range->base_large_pfn = page_to_hvpfn( @@ -1662,23 +1618,21 @@ static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info, range->page.additional_pages = (sg->length / HV_HYP_PAGE_SIZE) - 1; } - } status = hv_do_rep_hypercall(HV_EXT_CALL_MEMORY_HEAT_HINT, nents, 0, hint, NULL); local_irq_restore(flags); if (!hv_result_success(status)) { - pr_err("Cold memory discard hypercall failed with status %llx\n", - status); + status); if (hv_hypercall_multi_failure > 0) hv_hypercall_multi_failure++; if (hv_result(status) == HV_STATUS_INVALID_PARAMETER) { pr_err("Underlying Hyper-V does not support order less than 9. Hypercall failed\n"); pr_err("Defaulting to page_reporting_order %d\n", - pageblock_order); + pageblock_order); page_reporting_order = pageblock_order; hv_hypercall_multi_failure++; return -EINVAL; @@ -1712,7 +1666,7 @@ static void enable_page_reporting(void) pr_err("Failed to enable cold memory discard: %d\n", ret); } else { pr_info("Cold memory discard hint enabled with order %d\n", - page_reporting_order); + page_reporting_order); } } @@ -1795,7 +1749,7 @@ static int balloon_connect_vsp(struct hv_device *dev) if (ret) goto out; - t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ); + t = wait_for_completion_timeout(&dm_device.host_event, 5 * HZ); if (t == 0) { ret = -ETIMEDOUT; goto out; @@ -1831,10 +1785,13 @@ static int balloon_connect_vsp(struct hv_device *dev) cap_msg.caps.cap_bits.hot_add = hot_add_enabled(); /* - * Specify our alignment requirements as it relates - * memory hot-add. Specify 128MB alignment. + * Specify our alignment requirements for memory hot-add. The value is + * the log base 2 of the number of megabytes in a chunk. For example, + * with 256 MiB chunks, the value is 8. The number of MiB in a chunk + * must be a power of 2. */ - cap_msg.caps.cap_bits.hot_add_alignment = 7; + cap_msg.caps.cap_bits.hot_add_alignment = + ilog2(HA_BYTES_IN_CHUNK / SZ_1M); /* * Currently the host does not use these @@ -1850,7 +1807,7 @@ static int balloon_connect_vsp(struct hv_device *dev) if (ret) goto out; - t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ); + t = wait_for_completion_timeout(&dm_device.host_event, 5 * HZ); if (t == 0) { ret = -ETIMEDOUT; goto out; @@ -1891,8 +1848,8 @@ static int hv_balloon_debug_show(struct seq_file *f, void *offset) char *sname; seq_printf(f, "%-22s: %u.%u\n", "host_version", - DYNMEM_MAJOR_VERSION(dm->version), - DYNMEM_MINOR_VERSION(dm->version)); + DYNMEM_MAJOR_VERSION(dm->version), + DYNMEM_MINOR_VERSION(dm->version)); seq_printf(f, "%-22s:", "capabilities"); if (ballooning_enabled()) @@ -1941,10 +1898,10 @@ static int hv_balloon_debug_show(struct seq_file *f, void *offset) seq_printf(f, "%-22s: %u\n", "pages_ballooned", dm->num_pages_ballooned); seq_printf(f, "%-22s: %lu\n", "total_pages_committed", - get_pages_committed(dm)); + get_pages_committed(dm)); seq_printf(f, "%-22s: %llu\n", "max_dynamic_page_count", - dm->max_dynamic_page_count); + dm->max_dynamic_page_count); return 0; } @@ -1954,7 +1911,7 @@ DEFINE_SHOW_ATTRIBUTE(hv_balloon_debug); static void hv_balloon_debugfs_init(struct hv_dynmem_device *b) { debugfs_create_file("hv-balloon", 0444, NULL, b, - &hv_balloon_debug_fops); + &hv_balloon_debug_fops); } static void hv_balloon_debugfs_exit(struct hv_dynmem_device *b) @@ -1984,8 +1941,23 @@ static int balloon_probe(struct hv_device *dev, hot_add = false; #ifdef CONFIG_MEMORY_HOTPLUG + /* + * Hot-add must operate in chunks that are of size equal to the + * memory block size because that's what the core add_memory() + * interface requires. The Hyper-V interface requires that the memory + * block size be a power of 2, which is guaranteed by the check in + * memory_dev_init(). + */ + ha_pages_in_chunk = memory_block_size_bytes() / PAGE_SIZE; do_hot_add = hot_add; #else + /* + * Without MEMORY_HOTPLUG, the guest returns a failure status for all + * hot add requests from Hyper-V, and the chunk size is used only to + * specify alignment to Hyper-V as required by the host/guest protocol. + * Somewhat arbitrarily, use 128 MiB. + */ + ha_pages_in_chunk = SZ_128M / PAGE_SIZE; do_hot_add = false; #endif dm_device.dev = dev; @@ -2097,7 +2069,6 @@ static int balloon_suspend(struct hv_device *hv_dev) tasklet_enable(&hv_dev->channel->callback_event); return 0; - } static int balloon_resume(struct hv_device *dev) @@ -2156,7 +2127,6 @@ static struct hv_driver balloon_drv = { static int __init init_balloon_drv(void) { - return vmbus_driver_register(&balloon_drv); } diff --git a/drivers/i2c/Kconfig b/drivers/i2c/Kconfig index 9388823bb0bb..44710267d669 100644 --- a/drivers/i2c/Kconfig +++ b/drivers/i2c/Kconfig @@ -135,7 +135,7 @@ config I2C_SLAVE_EEPROM Documentation/i2c/slave-eeprom-backend.rst for further details. config I2C_SLAVE_TESTUNIT - tristate "I2C eeprom testunit driver" + tristate "I2C testunit driver" help This backend can be used to trigger test cases for I2C bus masters which require a remote device with certain capabilities, e.g. diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile index 3d65934f5eb4..78d0561339e5 100644 --- a/drivers/i2c/busses/Makefile +++ b/drivers/i2c/busses/Makefile @@ -29,8 +29,7 @@ obj-$(CONFIG_I2C_SIS630) += i2c-sis630.o obj-$(CONFIG_I2C_SIS96X) += i2c-sis96x.o obj-$(CONFIG_I2C_VIA) += i2c-via.o obj-$(CONFIG_I2C_VIAPRO) += i2c-viapro.o -i2c-zhaoxin-objs := i2c-viai2c-zhaoxin.o i2c-viai2c-common.o -obj-$(CONFIG_I2C_ZHAOXIN) += i2c-zhaoxin.o +obj-$(CONFIG_I2C_ZHAOXIN) += i2c-viai2c-zhaoxin.o i2c-viai2c-common.o # Mac SMBus host controller drivers obj-$(CONFIG_I2C_HYDRA) += i2c-hydra.o @@ -120,8 +119,7 @@ obj-$(CONFIG_I2C_TEGRA_BPMP) += i2c-tegra-bpmp.o obj-$(CONFIG_I2C_UNIPHIER) += i2c-uniphier.o obj-$(CONFIG_I2C_UNIPHIER_F) += i2c-uniphier-f.o obj-$(CONFIG_I2C_VERSATILE) += i2c-versatile.o -i2c-wmt-objs := i2c-viai2c-wmt.o i2c-viai2c-common.o -obj-$(CONFIG_I2C_WMT) += i2c-wmt.o +obj-$(CONFIG_I2C_WMT) += i2c-viai2c-wmt.o i2c-viai2c-common.o i2c-octeon-objs := i2c-octeon-core.o i2c-octeon-platdrv.o obj-$(CONFIG_I2C_OCTEON) += i2c-octeon.o i2c-thunderx-objs := i2c-octeon-core.o i2c-thunderx-pcidrv.o diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c index 56a4dabf5a38..4ad670a80a63 100644 --- a/drivers/i2c/busses/i2c-ocores.c +++ b/drivers/i2c/busses/i2c-ocores.c @@ -431,8 +431,8 @@ static int ocores_init(struct device *dev, struct ocores_i2c *i2c) oc_setreg(i2c, OCI2C_PREHIGH, prescale >> 8); /* Init the device */ - oc_setreg(i2c, OCI2C_CMD, OCI2C_CMD_IACK); oc_setreg(i2c, OCI2C_CONTROL, ctrl | OCI2C_CTRL_EN); + oc_setreg(i2c, OCI2C_CMD, OCI2C_CMD_IACK); return 0; } diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c index a12525b3186b..f448505d5468 100644 --- a/drivers/i2c/busses/i2c-pnx.c +++ b/drivers/i2c/busses/i2c-pnx.c @@ -15,7 +15,6 @@ #include <linux/ioport.h> #include <linux/delay.h> #include <linux/i2c.h> -#include <linux/timer.h> #include <linux/completion.h> #include <linux/platform_device.h> #include <linux/io.h> @@ -32,7 +31,6 @@ struct i2c_pnx_mif { int ret; /* Return value */ int mode; /* Interface mode */ struct completion complete; /* I/O completion */ - struct timer_list timer; /* Timeout */ u8 * buf; /* Data buffer */ int len; /* Length of data buffer */ int order; /* RX Bytes to order via TX */ @@ -117,24 +115,6 @@ static inline int wait_reset(struct i2c_pnx_algo_data *data) return (timeout <= 0); } -static inline void i2c_pnx_arm_timer(struct i2c_pnx_algo_data *alg_data) -{ - struct timer_list *timer = &alg_data->mif.timer; - unsigned long expires = msecs_to_jiffies(alg_data->timeout); - - if (expires <= 1) - expires = 2; - - del_timer_sync(timer); - - dev_dbg(&alg_data->adapter.dev, "Timer armed at %lu plus %lu jiffies.\n", - jiffies, expires); - - timer->expires = jiffies + expires; - - add_timer(timer); -} - /** * i2c_pnx_start - start a device * @slave_addr: slave address @@ -259,8 +239,6 @@ static int i2c_pnx_master_xmit(struct i2c_pnx_algo_data *alg_data) ~(mcntrl_afie | mcntrl_naie | mcntrl_drmie), I2C_REG_CTL(alg_data)); - del_timer_sync(&alg_data->mif.timer); - dev_dbg(&alg_data->adapter.dev, "%s(): Waking up xfer routine.\n", __func__); @@ -276,8 +254,6 @@ static int i2c_pnx_master_xmit(struct i2c_pnx_algo_data *alg_data) ~(mcntrl_afie | mcntrl_naie | mcntrl_drmie), I2C_REG_CTL(alg_data)); - /* Stop timer. */ - del_timer_sync(&alg_data->mif.timer); dev_dbg(&alg_data->adapter.dev, "%s(): Waking up xfer routine after zero-xfer.\n", __func__); @@ -364,8 +340,6 @@ static int i2c_pnx_master_rcv(struct i2c_pnx_algo_data *alg_data) mcntrl_drmie | mcntrl_daie); iowrite32(ctl, I2C_REG_CTL(alg_data)); - /* Kill timer. */ - del_timer_sync(&alg_data->mif.timer); complete(&alg_data->mif.complete); } } @@ -400,8 +374,6 @@ static irqreturn_t i2c_pnx_interrupt(int irq, void *dev_id) mcntrl_drmie); iowrite32(ctl, I2C_REG_CTL(alg_data)); - /* Stop timer, to prevent timeout. */ - del_timer_sync(&alg_data->mif.timer); complete(&alg_data->mif.complete); } else if (stat & mstatus_nai) { /* Slave did not acknowledge, generate a STOP */ @@ -419,8 +391,6 @@ static irqreturn_t i2c_pnx_interrupt(int irq, void *dev_id) /* Our return value. */ alg_data->mif.ret = -EIO; - /* Stop timer, to prevent timeout. */ - del_timer_sync(&alg_data->mif.timer); complete(&alg_data->mif.complete); } else { /* @@ -453,9 +423,8 @@ static irqreturn_t i2c_pnx_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static void i2c_pnx_timeout(struct timer_list *t) +static void i2c_pnx_timeout(struct i2c_pnx_algo_data *alg_data) { - struct i2c_pnx_algo_data *alg_data = from_timer(alg_data, t, mif.timer); u32 ctl; dev_err(&alg_data->adapter.dev, @@ -472,7 +441,6 @@ static void i2c_pnx_timeout(struct timer_list *t) iowrite32(ctl, I2C_REG_CTL(alg_data)); wait_reset(alg_data); alg_data->mif.ret = -EIO; - complete(&alg_data->mif.complete); } static inline void bus_reset_if_active(struct i2c_pnx_algo_data *alg_data) @@ -514,6 +482,7 @@ i2c_pnx_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) struct i2c_msg *pmsg; int rc = 0, completed = 0, i; struct i2c_pnx_algo_data *alg_data = adap->algo_data; + unsigned long time_left; u32 stat; dev_dbg(&alg_data->adapter.dev, @@ -548,7 +517,6 @@ i2c_pnx_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) dev_dbg(&alg_data->adapter.dev, "%s(): mode %d, %d bytes\n", __func__, alg_data->mif.mode, alg_data->mif.len); - i2c_pnx_arm_timer(alg_data); /* initialize the completion var */ init_completion(&alg_data->mif.complete); @@ -564,7 +532,10 @@ i2c_pnx_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) break; /* Wait for completion */ - wait_for_completion(&alg_data->mif.complete); + time_left = wait_for_completion_timeout(&alg_data->mif.complete, + alg_data->timeout); + if (time_left == 0) + i2c_pnx_timeout(alg_data); if (!(rc = alg_data->mif.ret)) completed++; @@ -653,7 +624,10 @@ static int i2c_pnx_probe(struct platform_device *pdev) alg_data->adapter.algo_data = alg_data; alg_data->adapter.nr = pdev->id; - alg_data->timeout = I2C_PNX_TIMEOUT_DEFAULT; + alg_data->timeout = msecs_to_jiffies(I2C_PNX_TIMEOUT_DEFAULT); + if (alg_data->timeout <= 1) + alg_data->timeout = 2; + #ifdef CONFIG_OF alg_data->adapter.dev.of_node = of_node_get(pdev->dev.of_node); if (pdev->dev.of_node) { @@ -673,8 +647,6 @@ static int i2c_pnx_probe(struct platform_device *pdev) if (IS_ERR(alg_data->clk)) return PTR_ERR(alg_data->clk); - timer_setup(&alg_data->mif.timer, i2c_pnx_timeout, 0); - snprintf(alg_data->adapter.name, sizeof(alg_data->adapter.name), "%s", pdev->name); diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index 828aa2ea0fe4..185a5d60f101 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -257,6 +257,14 @@ static void rcar_i2c_init(struct rcar_i2c_priv *priv) } } +static void rcar_i2c_reset_slave(struct rcar_i2c_priv *priv) +{ + rcar_i2c_write(priv, ICSIER, 0); + rcar_i2c_write(priv, ICSSR, 0); + rcar_i2c_write(priv, ICSCR, SDBS); + rcar_i2c_write(priv, ICSAR, 0); /* Gen2: must be 0 if not using slave */ +} + static int rcar_i2c_bus_barrier(struct rcar_i2c_priv *priv) { int ret; @@ -875,6 +883,10 @@ static int rcar_i2c_do_reset(struct rcar_i2c_priv *priv) { int ret; + /* Don't reset if a slave instance is currently running */ + if (priv->slave) + return -EISCONN; + ret = reset_control_reset(priv->rstc); if (ret) return ret; @@ -903,10 +915,10 @@ static int rcar_i2c_master_xfer(struct i2c_adapter *adap, /* Gen3+ needs a reset. That also allows RXDMA once */ if (priv->devtype >= I2C_RCAR_GEN3) { - priv->flags &= ~ID_P_NO_RXDMA; ret = rcar_i2c_do_reset(priv); if (ret) goto out; + priv->flags &= ~ID_P_NO_RXDMA; } rcar_i2c_init(priv); @@ -1033,11 +1045,8 @@ static int rcar_unreg_slave(struct i2c_client *slave) /* ensure no irq is running before clearing ptr */ disable_irq(priv->irq); - rcar_i2c_write(priv, ICSIER, 0); - rcar_i2c_write(priv, ICSSR, 0); + rcar_i2c_reset_slave(priv); enable_irq(priv->irq); - rcar_i2c_write(priv, ICSCR, SDBS); - rcar_i2c_write(priv, ICSAR, 0); /* Gen2: must be 0 if not using slave */ priv->slave = NULL; @@ -1152,7 +1161,9 @@ static int rcar_i2c_probe(struct platform_device *pdev) goto out_pm_disable; } - rcar_i2c_write(priv, ICSAR, 0); /* Gen2: must be 0 if not using slave */ + /* Bring hardware to known state */ + rcar_i2c_init(priv); + rcar_i2c_reset_slave(priv); if (priv->devtype < I2C_RCAR_GEN3) { irqflags |= IRQF_NO_THREAD; @@ -1168,6 +1179,7 @@ static int rcar_i2c_probe(struct platform_device *pdev) if (of_property_read_bool(dev->of_node, "smbus")) priv->flags |= ID_P_HOST_NOTIFY; + /* R-Car Gen3+ needs a reset before every transfer */ if (priv->devtype >= I2C_RCAR_GEN3) { priv->rstc = devm_reset_control_get_exclusive(&pdev->dev, NULL); if (IS_ERR(priv->rstc)) { @@ -1178,6 +1190,9 @@ static int rcar_i2c_probe(struct platform_device *pdev) ret = reset_control_status(priv->rstc); if (ret < 0) goto out_pm_put; + + /* hard reset disturbs HostNotify local target, so disable it */ + priv->flags &= ~ID_P_HOST_NOTIFY; } ret = platform_get_irq(pdev, 0); diff --git a/drivers/i2c/busses/i2c-viai2c-common.c b/drivers/i2c/busses/i2c-viai2c-common.c index 1844d13f1f79..162b31306cba 100644 --- a/drivers/i2c/busses/i2c-viai2c-common.c +++ b/drivers/i2c/busses/i2c-viai2c-common.c @@ -17,6 +17,7 @@ int viai2c_wait_bus_not_busy(struct viai2c *i2c) return 0; } +EXPORT_SYMBOL_GPL(viai2c_wait_bus_not_busy); static int viai2c_write(struct viai2c *i2c, struct i2c_msg *pmsg, int last) { @@ -121,6 +122,7 @@ int viai2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) return (ret < 0) ? ret : i; } +EXPORT_SYMBOL_GPL(viai2c_xfer); /* * Main process of the byte mode xfer @@ -130,7 +132,7 @@ int viai2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) * 0: there is still data that needs to be transferred * -EIO: error occurred */ -static int viai2c_irq_xfer(struct viai2c *i2c) +int viai2c_irq_xfer(struct viai2c *i2c) { u16 val; struct i2c_msg *msg = i2c->msg; @@ -171,51 +173,11 @@ static int viai2c_irq_xfer(struct viai2c *i2c) return i2c->xfered_len == msg->len; } - -int __weak viai2c_fifo_irq_xfer(struct viai2c *i2c, bool irq) -{ - return 0; -} - -static irqreturn_t viai2c_isr(int irq, void *data) -{ - struct viai2c *i2c = data; - u8 status; - - /* save the status and write-clear it */ - status = readw(i2c->base + VIAI2C_REG_ISR); - if (!status && i2c->platform == VIAI2C_PLAT_ZHAOXIN) - return IRQ_NONE; - - writew(status, i2c->base + VIAI2C_REG_ISR); - - i2c->ret = 0; - if (status & VIAI2C_ISR_NACK_ADDR) - i2c->ret = -EIO; - - if (i2c->platform == VIAI2C_PLAT_WMT && (status & VIAI2C_ISR_SCL_TIMEOUT)) - i2c->ret = -ETIMEDOUT; - - if (!i2c->ret) { - if (i2c->mode == VIAI2C_BYTE_MODE) - i2c->ret = viai2c_irq_xfer(i2c); - else - i2c->ret = viai2c_fifo_irq_xfer(i2c, true); - } - - /* All the data has been successfully transferred or error occurred */ - if (i2c->ret) - complete(&i2c->complete); - - return IRQ_HANDLED; -} +EXPORT_SYMBOL_GPL(viai2c_irq_xfer); int viai2c_init(struct platform_device *pdev, struct viai2c **pi2c, int plat) { - int err; - int irq_flags; struct viai2c *i2c; - struct device_node *np = pdev->dev.of_node; i2c = devm_kzalloc(&pdev->dev, sizeof(*i2c), GFP_KERNEL); if (!i2c) @@ -225,28 +187,8 @@ int viai2c_init(struct platform_device *pdev, struct viai2c **pi2c, int plat) if (IS_ERR(i2c->base)) return PTR_ERR(i2c->base); - if (plat == VIAI2C_PLAT_WMT) { - irq_flags = 0; - i2c->irq = irq_of_parse_and_map(np, 0); - if (!i2c->irq) - return -EINVAL; - } else if (plat == VIAI2C_PLAT_ZHAOXIN) { - irq_flags = IRQF_SHARED; - i2c->irq = platform_get_irq(pdev, 0); - if (i2c->irq < 0) - return i2c->irq; - } else { - return dev_err_probe(&pdev->dev, -EINVAL, "wrong platform type\n"); - } - i2c->platform = plat; - err = devm_request_irq(&pdev->dev, i2c->irq, viai2c_isr, - irq_flags, pdev->name, i2c); - if (err) - return dev_err_probe(&pdev->dev, err, - "failed to request irq %i\n", i2c->irq); - i2c->dev = &pdev->dev; init_completion(&i2c->complete); platform_set_drvdata(pdev, i2c); @@ -254,3 +196,8 @@ int viai2c_init(struct platform_device *pdev, struct viai2c **pi2c, int plat) *pi2c = i2c; return 0; } +EXPORT_SYMBOL_GPL(viai2c_init); + +MODULE_DESCRIPTION("Via/Wondermedia/Zhaoxin I2C master-mode bus adapter"); +MODULE_AUTHOR("Tony Prisk <linux@prisktech.co.nz>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/i2c-viai2c-common.h b/drivers/i2c/busses/i2c-viai2c-common.h index 81e827c54434..00f17733223c 100644 --- a/drivers/i2c/busses/i2c-viai2c-common.h +++ b/drivers/i2c/busses/i2c-viai2c-common.h @@ -80,6 +80,6 @@ struct viai2c { int viai2c_wait_bus_not_busy(struct viai2c *i2c); int viai2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num); int viai2c_init(struct platform_device *pdev, struct viai2c **pi2c, int plat); -int viai2c_fifo_irq_xfer(struct viai2c *i2c, bool irq); +int viai2c_irq_xfer(struct viai2c *i2c); #endif diff --git a/drivers/i2c/busses/i2c-viai2c-wmt.c b/drivers/i2c/busses/i2c-viai2c-wmt.c index e1988f946026..420fd10fe3aa 100644 --- a/drivers/i2c/busses/i2c-viai2c-wmt.c +++ b/drivers/i2c/busses/i2c-viai2c-wmt.c @@ -72,6 +72,32 @@ static int wmt_i2c_reset_hardware(struct viai2c *i2c) return 0; } +static irqreturn_t wmt_i2c_isr(int irq, void *data) +{ + struct viai2c *i2c = data; + u8 status; + + /* save the status and write-clear it */ + status = readw(i2c->base + VIAI2C_REG_ISR); + writew(status, i2c->base + VIAI2C_REG_ISR); + + i2c->ret = 0; + if (status & VIAI2C_ISR_NACK_ADDR) + i2c->ret = -EIO; + + if (status & VIAI2C_ISR_SCL_TIMEOUT) + i2c->ret = -ETIMEDOUT; + + if (!i2c->ret) + i2c->ret = viai2c_irq_xfer(i2c); + + /* All the data has been successfully transferred or error occurred */ + if (i2c->ret) + complete(&i2c->complete); + + return IRQ_HANDLED; +} + static int wmt_i2c_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; @@ -84,6 +110,16 @@ static int wmt_i2c_probe(struct platform_device *pdev) if (err) return err; + i2c->irq = platform_get_irq(pdev, 0); + if (i2c->irq < 0) + return i2c->irq; + + err = devm_request_irq(&pdev->dev, i2c->irq, wmt_i2c_isr, + 0, pdev->name, i2c); + if (err) + return dev_err_probe(&pdev->dev, err, + "failed to request irq %i\n", i2c->irq); + i2c->clk = of_clk_get(np, 0); if (IS_ERR(i2c->clk)) { dev_err(&pdev->dev, "unable to request clock\n"); diff --git a/drivers/i2c/busses/i2c-viai2c-zhaoxin.c b/drivers/i2c/busses/i2c-viai2c-zhaoxin.c index 7e3ac2a3e1fd..ab3e44e147e9 100644 --- a/drivers/i2c/busses/i2c-viai2c-zhaoxin.c +++ b/drivers/i2c/busses/i2c-viai2c-zhaoxin.c @@ -49,8 +49,7 @@ struct viai2c_zhaoxin { u16 xfer_len; }; -/* 'irq == true' means in interrupt context */ -int viai2c_fifo_irq_xfer(struct viai2c *i2c, bool irq) +static int viai2c_fifo_xfer(struct viai2c *i2c) { u16 i; u8 tmp; @@ -59,17 +58,6 @@ int viai2c_fifo_irq_xfer(struct viai2c *i2c, bool irq) bool read = !!(msg->flags & I2C_M_RD); struct viai2c_zhaoxin *priv = i2c->pltfm_priv; - if (irq) { - /* get the received data */ - if (read) - for (i = 0; i < priv->xfer_len; i++) - msg->buf[i2c->xfered_len + i] = ioread8(base + ZXI2C_REG_HRDR); - - i2c->xfered_len += priv->xfer_len; - if (i2c->xfered_len == msg->len) - return 1; - } - /* reset fifo buffer */ tmp = ioread8(base + ZXI2C_REG_HCR); iowrite8(tmp | ZXI2C_HCR_RST_FIFO, base + ZXI2C_REG_HCR); @@ -92,18 +80,59 @@ int viai2c_fifo_irq_xfer(struct viai2c *i2c, bool irq) iowrite8(tmp, base + VIAI2C_REG_CR); } - if (irq) { - /* continue transmission */ - tmp = ioread8(base + VIAI2C_REG_CR); - iowrite8(tmp |= VIAI2C_CR_CPU_RDY, base + VIAI2C_REG_CR); + u16 tcr_val = i2c->tcr; + + /* start transmission */ + tcr_val |= read ? VIAI2C_TCR_READ : 0; + writew(tcr_val | msg->addr, base + VIAI2C_REG_TCR); + + return 0; +} + +static int viai2c_fifo_irq_xfer(struct viai2c *i2c) +{ + u16 i; + u8 tmp; + struct i2c_msg *msg = i2c->msg; + void __iomem *base = i2c->base; + bool read = !!(msg->flags & I2C_M_RD); + struct viai2c_zhaoxin *priv = i2c->pltfm_priv; + + /* get the received data */ + if (read) + for (i = 0; i < priv->xfer_len; i++) + msg->buf[i2c->xfered_len + i] = ioread8(base + ZXI2C_REG_HRDR); + + i2c->xfered_len += priv->xfer_len; + if (i2c->xfered_len == msg->len) + return 1; + + /* reset fifo buffer */ + tmp = ioread8(base + ZXI2C_REG_HCR); + iowrite8(tmp | ZXI2C_HCR_RST_FIFO, base + ZXI2C_REG_HCR); + + /* set xfer len */ + priv->xfer_len = min_t(u16, msg->len - i2c->xfered_len, ZXI2C_FIFO_SIZE); + if (read) { + iowrite8(priv->xfer_len - 1, base + ZXI2C_REG_HRLR); } else { - u16 tcr_val = i2c->tcr; + iowrite8(priv->xfer_len - 1, base + ZXI2C_REG_HTLR); + /* set write data */ + for (i = 0; i < priv->xfer_len; i++) + iowrite8(msg->buf[i2c->xfered_len + i], base + ZXI2C_REG_HTDR); + } - /* start transmission */ - tcr_val |= read ? VIAI2C_TCR_READ : 0; - writew(tcr_val | msg->addr, base + VIAI2C_REG_TCR); + /* prepare to stop transmission */ + if (priv->hrv && msg->len == (i2c->xfered_len + priv->xfer_len)) { + tmp = ioread8(base + VIAI2C_REG_CR); + tmp |= read ? VIAI2C_CR_RX_END : VIAI2C_CR_TX_END; + iowrite8(tmp, base + VIAI2C_REG_CR); } + /* continue transmission */ + tmp = ioread8(base + VIAI2C_REG_CR); + iowrite8(tmp |= VIAI2C_CR_CPU_RDY, base + VIAI2C_REG_CR); + return 0; } @@ -135,7 +164,7 @@ static int zxi2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int priv->xfer_len = 0; i2c->xfered_len = 0; - viai2c_fifo_irq_xfer(i2c, 0); + viai2c_fifo_xfer(i2c); if (!wait_for_completion_timeout(&i2c->complete, VIAI2C_TIMEOUT)) return -ETIMEDOUT; @@ -228,6 +257,36 @@ static void zxi2c_get_bus_speed(struct viai2c *i2c) dev_info(i2c->dev, "speed mode is %s\n", i2c_freq_mode_string(params[0])); } +static irqreturn_t zxi2c_isr(int irq, void *data) +{ + struct viai2c *i2c = data; + u8 status; + + /* save the status and write-clear it */ + status = readw(i2c->base + VIAI2C_REG_ISR); + if (!status) + return IRQ_NONE; + + writew(status, i2c->base + VIAI2C_REG_ISR); + + i2c->ret = 0; + if (status & VIAI2C_ISR_NACK_ADDR) + i2c->ret = -EIO; + + if (!i2c->ret) { + if (i2c->mode == VIAI2C_BYTE_MODE) + i2c->ret = viai2c_irq_xfer(i2c); + else + i2c->ret = viai2c_fifo_irq_xfer(i2c); + } + + /* All the data has been successfully transferred or error occurred */ + if (i2c->ret) + complete(&i2c->complete); + + return IRQ_HANDLED; +} + static int zxi2c_probe(struct platform_device *pdev) { int error; @@ -239,6 +298,16 @@ static int zxi2c_probe(struct platform_device *pdev) if (error) return error; + i2c->irq = platform_get_irq(pdev, 0); + if (i2c->irq < 0) + return i2c->irq; + + error = devm_request_irq(&pdev->dev, i2c->irq, zxi2c_isr, + IRQF_SHARED, pdev->name, i2c); + if (error) + return dev_err_probe(&pdev->dev, error, + "failed to request irq %i\n", i2c->irq); + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index db0d1ac82910..7e7b15440832 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -1067,6 +1067,7 @@ EXPORT_SYMBOL(i2c_find_device_by_fwnode); static const struct i2c_device_id dummy_id[] = { { "dummy", 0 }, + { "smbus_host_notify", 0 }, { }, }; diff --git a/drivers/i2c/i2c-slave-testunit.c b/drivers/i2c/i2c-slave-testunit.c index a49642bbae4b..23a11e4e9256 100644 --- a/drivers/i2c/i2c-slave-testunit.c +++ b/drivers/i2c/i2c-slave-testunit.c @@ -118,9 +118,19 @@ static int i2c_slave_testunit_slave_cb(struct i2c_client *client, queue_delayed_work(system_long_wq, &tu->worker, msecs_to_jiffies(10 * tu->regs[TU_REG_DELAY])); } - fallthrough; + + /* + * Reset reg_idx to avoid that work gets queued again in case of + * STOP after a following read message. But do not clear TU regs + * here because we still need them in the workqueue! + */ + tu->reg_idx = 0; + break; case I2C_SLAVE_WRITE_REQUESTED: + if (test_bit(TU_FLAG_IN_PROCESS, &tu->flags)) + return -EBUSY; + memset(tu->regs, 0, TU_NUM_REGS); tu->reg_idx = 0; break; diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig index c2da5066e9a7..80b57d3ee3a7 100644 --- a/drivers/iio/accel/Kconfig +++ b/drivers/iio/accel/Kconfig @@ -330,6 +330,8 @@ config DMARD10 config FXLS8962AF tristate depends on I2C || !I2C # cannot be built-in for modular I2C + select IIO_BUFFER + select IIO_KFIFO_BUF config FXLS8962AF_I2C tristate "NXP FXLS8962AF/FXLS8964AF Accelerometer I2C Driver" diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c index 353a97f9c086..13ea8a1073d2 100644 --- a/drivers/iio/adc/ad7266.c +++ b/drivers/iio/adc/ad7266.c @@ -157,6 +157,8 @@ static int ad7266_read_raw(struct iio_dev *indio_dev, ret = ad7266_read_single(st, val, chan->address); iio_device_release_direct_mode(indio_dev); + if (ret < 0) + return ret; *val = (*val >> 2) & 0xfff; if (chan->scan_type.sign == 's') *val = sign_extend32(*val, diff --git a/drivers/iio/adc/xilinx-ams.c b/drivers/iio/adc/xilinx-ams.c index f0b71a1220e0..f52abf759260 100644 --- a/drivers/iio/adc/xilinx-ams.c +++ b/drivers/iio/adc/xilinx-ams.c @@ -414,8 +414,12 @@ static void ams_enable_channel_sequence(struct iio_dev *indio_dev) /* Run calibration of PS & PL as part of the sequence */ scan_mask = BIT(0) | BIT(AMS_PS_SEQ_MAX); - for (i = 0; i < indio_dev->num_channels; i++) - scan_mask |= BIT_ULL(indio_dev->channels[i].scan_index); + for (i = 0; i < indio_dev->num_channels; i++) { + const struct iio_chan_spec *chan = &indio_dev->channels[i]; + + if (chan->scan_index < AMS_CTRL_SEQ_BASE) + scan_mask |= BIT_ULL(chan->scan_index); + } if (ams->ps_base) { /* put sysmon in a soft reset to change the sequence */ diff --git a/drivers/iio/chemical/bme680.h b/drivers/iio/chemical/bme680.h index 4edc5d21cb9f..f959252a4fe6 100644 --- a/drivers/iio/chemical/bme680.h +++ b/drivers/iio/chemical/bme680.h @@ -54,7 +54,9 @@ #define BME680_NB_CONV_MASK GENMASK(3, 0) #define BME680_REG_MEAS_STAT_0 0x1D +#define BME680_NEW_DATA_BIT BIT(7) #define BME680_GAS_MEAS_BIT BIT(6) +#define BME680_MEAS_BIT BIT(5) /* Calibration Parameters */ #define BME680_T2_LSB_REG 0x8A diff --git a/drivers/iio/chemical/bme680_core.c b/drivers/iio/chemical/bme680_core.c index ef5e0e46fd34..500f56834b01 100644 --- a/drivers/iio/chemical/bme680_core.c +++ b/drivers/iio/chemical/bme680_core.c @@ -10,6 +10,7 @@ */ #include <linux/acpi.h> #include <linux/bitfield.h> +#include <linux/delay.h> #include <linux/device.h> #include <linux/module.h> #include <linux/log2.h> @@ -38,7 +39,7 @@ struct bme680_calib { s8 par_h3; s8 par_h4; s8 par_h5; - s8 par_h6; + u8 par_h6; s8 par_h7; s8 par_gh1; s16 par_gh2; @@ -342,10 +343,10 @@ static s16 bme680_compensate_temp(struct bme680_data *data, if (!calib->par_t2) bme680_read_calib(data, calib); - var1 = (adc_temp >> 3) - (calib->par_t1 << 1); + var1 = (adc_temp >> 3) - ((s32)calib->par_t1 << 1); var2 = (var1 * calib->par_t2) >> 11; var3 = ((var1 >> 1) * (var1 >> 1)) >> 12; - var3 = (var3 * (calib->par_t3 << 4)) >> 14; + var3 = (var3 * ((s32)calib->par_t3 << 4)) >> 14; data->t_fine = var2 + var3; calc_temp = (data->t_fine * 5 + 128) >> 8; @@ -368,9 +369,9 @@ static u32 bme680_compensate_press(struct bme680_data *data, var1 = (data->t_fine >> 1) - 64000; var2 = ((((var1 >> 2) * (var1 >> 2)) >> 11) * calib->par_p6) >> 2; var2 = var2 + (var1 * calib->par_p5 << 1); - var2 = (var2 >> 2) + (calib->par_p4 << 16); + var2 = (var2 >> 2) + ((s32)calib->par_p4 << 16); var1 = (((((var1 >> 2) * (var1 >> 2)) >> 13) * - (calib->par_p3 << 5)) >> 3) + + ((s32)calib->par_p3 << 5)) >> 3) + ((calib->par_p2 * var1) >> 1); var1 = var1 >> 18; var1 = ((32768 + var1) * calib->par_p1) >> 15; @@ -388,7 +389,7 @@ static u32 bme680_compensate_press(struct bme680_data *data, var3 = ((press_comp >> 8) * (press_comp >> 8) * (press_comp >> 8) * calib->par_p10) >> 17; - press_comp += (var1 + var2 + var3 + (calib->par_p7 << 7)) >> 4; + press_comp += (var1 + var2 + var3 + ((s32)calib->par_p7 << 7)) >> 4; return press_comp; } @@ -414,7 +415,7 @@ static u32 bme680_compensate_humid(struct bme680_data *data, (((temp_scaled * ((temp_scaled * calib->par_h5) / 100)) >> 6) / 100) + (1 << 14))) >> 10; var3 = var1 * var2; - var4 = calib->par_h6 << 7; + var4 = (s32)calib->par_h6 << 7; var4 = (var4 + ((temp_scaled * calib->par_h7) / 100)) >> 4; var5 = ((var3 >> 14) * (var3 >> 14)) >> 10; var6 = (var4 * var5) >> 1; @@ -532,6 +533,43 @@ static u8 bme680_oversampling_to_reg(u8 val) return ilog2(val) + 1; } +/* + * Taken from Bosch BME680 API: + * https://github.com/boschsensortec/BME68x_SensorAPI/blob/v4.4.8/bme68x.c#L490 + */ +static int bme680_wait_for_eoc(struct bme680_data *data) +{ + struct device *dev = regmap_get_device(data->regmap); + unsigned int check; + int ret; + /* + * (Sum of oversampling ratios * time per oversampling) + + * TPH measurement + gas measurement + wait transition from forced mode + * + heater duration + */ + int wait_eoc_us = ((data->oversampling_temp + data->oversampling_press + + data->oversampling_humid) * 1936) + (477 * 4) + + (477 * 5) + 1000 + (data->heater_dur * 1000); + + usleep_range(wait_eoc_us, wait_eoc_us + 100); + + ret = regmap_read(data->regmap, BME680_REG_MEAS_STAT_0, &check); + if (ret) { + dev_err(dev, "failed to read measurement status register.\n"); + return ret; + } + if (check & BME680_MEAS_BIT) { + dev_err(dev, "Device measurement cycle incomplete.\n"); + return -EBUSY; + } + if (!(check & BME680_NEW_DATA_BIT)) { + dev_err(dev, "No new data available from the device.\n"); + return -ENODATA; + } + + return 0; +} + static int bme680_chip_config(struct bme680_data *data) { struct device *dev = regmap_get_device(data->regmap); @@ -622,6 +660,10 @@ static int bme680_read_temp(struct bme680_data *data, int *val) if (ret < 0) return ret; + ret = bme680_wait_for_eoc(data); + if (ret) + return ret; + ret = regmap_bulk_read(data->regmap, BME680_REG_TEMP_MSB, &tmp, 3); if (ret < 0) { @@ -678,7 +720,7 @@ static int bme680_read_press(struct bme680_data *data, } *val = bme680_compensate_press(data, adc_press); - *val2 = 100; + *val2 = 1000; return IIO_VAL_FRACTIONAL; } @@ -738,6 +780,10 @@ static int bme680_read_gas(struct bme680_data *data, if (ret < 0) return ret; + ret = bme680_wait_for_eoc(data); + if (ret) + return ret; + ret = regmap_read(data->regmap, BME680_REG_MEAS_STAT_0, &check); if (check & BME680_GAS_MEAS_BIT) { dev_err(dev, "gas measurement incomplete\n"); diff --git a/drivers/iio/dac/Kconfig b/drivers/iio/dac/Kconfig index 3c2bf620f00f..ee0d9798d8b4 100644 --- a/drivers/iio/dac/Kconfig +++ b/drivers/iio/dac/Kconfig @@ -133,7 +133,7 @@ config AD5624R_SPI config AD9739A tristate "Analog Devices AD9739A RF DAC spi driver" - depends on SPI || COMPILE_TEST + depends on SPI select REGMAP_SPI select IIO_BACKEND help diff --git a/drivers/iio/humidity/hdc3020.c b/drivers/iio/humidity/hdc3020.c index cdc4789213ba..a82dcc3da421 100644 --- a/drivers/iio/humidity/hdc3020.c +++ b/drivers/iio/humidity/hdc3020.c @@ -19,6 +19,7 @@ #include <linux/i2c.h> #include <linux/init.h> #include <linux/interrupt.h> +#include <linux/math64.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/pm.h> @@ -66,8 +67,10 @@ #define HDC3020_CRC8_POLYNOMIAL 0x31 -#define HDC3020_MIN_TEMP -40 -#define HDC3020_MAX_TEMP 125 +#define HDC3020_MIN_TEMP_MICRO -39872968 +#define HDC3020_MAX_TEMP_MICRO 124875639 +#define HDC3020_MAX_TEMP_HYST_MICRO 164748607 +#define HDC3020_MAX_HUM_MICRO 99220264 struct hdc3020_data { struct i2c_client *client; @@ -368,6 +371,105 @@ static int hdc3020_write_raw(struct iio_dev *indio_dev, return -EINVAL; } +static int hdc3020_thresh_get_temp(u16 thresh) +{ + int temp; + + /* + * Get the temperature threshold from 9 LSBs, shift them to get + * the truncated temperature threshold representation and + * calculate the threshold according to the formula in the + * datasheet. Result is degree celsius scaled by 65535. + */ + temp = FIELD_GET(HDC3020_THRESH_TEMP_MASK, thresh) << + HDC3020_THRESH_TEMP_TRUNC_SHIFT; + + return -2949075 + (175 * temp); +} + +static int hdc3020_thresh_get_hum(u16 thresh) +{ + int hum; + + /* + * Get the humidity threshold from 7 MSBs, shift them to get the + * truncated humidity threshold representation and calculate the + * threshold according to the formula in the datasheet. Result is + * percent scaled by 65535. + */ + hum = FIELD_GET(HDC3020_THRESH_HUM_MASK, thresh) << + HDC3020_THRESH_HUM_TRUNC_SHIFT; + + return hum * 100; +} + +static u16 hdc3020_thresh_set_temp(int s_temp, u16 curr_thresh) +{ + u64 temp; + u16 thresh; + + /* + * Calculate temperature threshold, shift it down to get the + * truncated threshold representation in the 9LSBs while keeping + * the current humidity threshold in the 7 MSBs. + */ + temp = (u64)(s_temp + 45000000) * 65535ULL; + temp = div_u64(temp, 1000000 * 175) >> HDC3020_THRESH_TEMP_TRUNC_SHIFT; + thresh = FIELD_PREP(HDC3020_THRESH_TEMP_MASK, temp); + thresh |= (FIELD_GET(HDC3020_THRESH_HUM_MASK, curr_thresh) << + HDC3020_THRESH_HUM_TRUNC_SHIFT); + + return thresh; +} + +static u16 hdc3020_thresh_set_hum(int s_hum, u16 curr_thresh) +{ + u64 hum; + u16 thresh; + + /* + * Calculate humidity threshold, shift it down and up to get the + * truncated threshold representation in the 7MSBs while keeping + * the current temperature threshold in the 9 LSBs. + */ + hum = (u64)(s_hum) * 65535ULL; + hum = div_u64(hum, 1000000 * 100) >> HDC3020_THRESH_HUM_TRUNC_SHIFT; + thresh = FIELD_PREP(HDC3020_THRESH_HUM_MASK, hum); + thresh |= FIELD_GET(HDC3020_THRESH_TEMP_MASK, curr_thresh); + + return thresh; +} + +static +int hdc3020_thresh_clr(s64 s_thresh, s64 s_hyst, enum iio_event_direction dir) +{ + s64 s_clr; + + /* + * Include directions when calculation the clear value, + * since hysteresis is unsigned by definition and the + * clear value is an absolute value which is signed. + */ + if (dir == IIO_EV_DIR_RISING) + s_clr = s_thresh - s_hyst; + else + s_clr = s_thresh + s_hyst; + + /* Divide by 65535 to get units of micro */ + return div_s64(s_clr, 65535); +} + +static int _hdc3020_write_thresh(struct hdc3020_data *data, u16 reg, u16 val) +{ + u8 buf[5]; + + put_unaligned_be16(reg, buf); + put_unaligned_be16(val, buf + 2); + buf[4] = crc8(hdc3020_crc8_table, buf + 2, 2, CRC8_INIT_VALUE); + + return hdc3020_write_bytes(data, buf, 5); +} + static int hdc3020_write_thresh(struct iio_dev *indio_dev, const struct iio_chan_spec *chan, enum iio_event_type type, @@ -376,67 +478,126 @@ static int hdc3020_write_thresh(struct iio_dev *indio_dev, int val, int val2) { struct hdc3020_data *data = iio_priv(indio_dev); - u8 buf[5]; - u64 tmp; - u16 reg; - int ret; - - /* Supported temperature range is from –40 to 125 degree celsius */ - if (val < HDC3020_MIN_TEMP || val > HDC3020_MAX_TEMP) - return -EINVAL; - - /* Select threshold register */ - if (info == IIO_EV_INFO_VALUE) { - if (dir == IIO_EV_DIR_RISING) - reg = HDC3020_S_T_RH_THRESH_HIGH; - else - reg = HDC3020_S_T_RH_THRESH_LOW; + u16 reg, reg_val, reg_thresh_rd, reg_clr_rd, reg_thresh_wr, reg_clr_wr; + s64 s_thresh, s_hyst, s_clr; + int s_val, thresh, clr, ret; + + /* Select threshold registers */ + if (dir == IIO_EV_DIR_RISING) { + reg_thresh_rd = HDC3020_R_T_RH_THRESH_HIGH; + reg_thresh_wr = HDC3020_S_T_RH_THRESH_HIGH; + reg_clr_rd = HDC3020_R_T_RH_THRESH_HIGH_CLR; + reg_clr_wr = HDC3020_S_T_RH_THRESH_HIGH_CLR; } else { - if (dir == IIO_EV_DIR_RISING) - reg = HDC3020_S_T_RH_THRESH_HIGH_CLR; - else - reg = HDC3020_S_T_RH_THRESH_LOW_CLR; + reg_thresh_rd = HDC3020_R_T_RH_THRESH_LOW; + reg_thresh_wr = HDC3020_S_T_RH_THRESH_LOW; + reg_clr_rd = HDC3020_R_T_RH_THRESH_LOW_CLR; + reg_clr_wr = HDC3020_S_T_RH_THRESH_LOW_CLR; } guard(mutex)(&data->lock); - ret = hdc3020_read_be16(data, reg); + ret = hdc3020_read_be16(data, reg_thresh_rd); + if (ret < 0) + return ret; + + thresh = ret; + ret = hdc3020_read_be16(data, reg_clr_rd); if (ret < 0) return ret; + clr = ret; + /* Scale value to include decimal part into calculations */ + s_val = (val < 0) ? (val * 1000000 - val2) : (val * 1000000 + val2); switch (chan->type) { case IIO_TEMP: - /* - * Calculate temperature threshold, shift it down to get the - * truncated threshold representation in the 9LSBs while keeping - * the current humidity threshold in the 7 MSBs. - */ - tmp = ((u64)(((val + 45) * MICRO) + val2)) * 65535ULL; - tmp = div_u64(tmp, MICRO * 175); - val = tmp >> HDC3020_THRESH_TEMP_TRUNC_SHIFT; - val = FIELD_PREP(HDC3020_THRESH_TEMP_MASK, val); - val |= (FIELD_GET(HDC3020_THRESH_HUM_MASK, ret) << - HDC3020_THRESH_HUM_TRUNC_SHIFT); + switch (info) { + case IIO_EV_INFO_VALUE: + s_val = max(s_val, HDC3020_MIN_TEMP_MICRO); + s_val = min(s_val, HDC3020_MAX_TEMP_MICRO); + reg = reg_thresh_wr; + reg_val = hdc3020_thresh_set_temp(s_val, thresh); + ret = _hdc3020_write_thresh(data, reg, reg_val); + if (ret < 0) + return ret; + + /* Calculate old hysteresis */ + s_thresh = (s64)hdc3020_thresh_get_temp(thresh) * 1000000; + s_clr = (s64)hdc3020_thresh_get_temp(clr) * 1000000; + s_hyst = div_s64(abs(s_thresh - s_clr), 65535); + /* Set new threshold */ + thresh = reg_val; + /* Set old hysteresis */ + s_val = s_hyst; + fallthrough; + case IIO_EV_INFO_HYSTERESIS: + /* + * Function hdc3020_thresh_get_temp returns temperature + * in degree celsius scaled by 65535. Scale by 1000000 + * to be able to subtract scaled hysteresis value. + */ + s_thresh = (s64)hdc3020_thresh_get_temp(thresh) * 1000000; + /* + * Units of s_val are in micro degree celsius, scale by + * 65535 to get same units as s_thresh. + */ + s_val = min(abs(s_val), HDC3020_MAX_TEMP_HYST_MICRO); + s_hyst = (s64)s_val * 65535; + s_clr = hdc3020_thresh_clr(s_thresh, s_hyst, dir); + s_clr = max(s_clr, HDC3020_MIN_TEMP_MICRO); + s_clr = min(s_clr, HDC3020_MAX_TEMP_MICRO); + reg = reg_clr_wr; + reg_val = hdc3020_thresh_set_temp(s_clr, clr); + break; + default: + return -EOPNOTSUPP; + } break; case IIO_HUMIDITYRELATIVE: - /* - * Calculate humidity threshold, shift it down and up to get the - * truncated threshold representation in the 7MSBs while keeping - * the current temperature threshold in the 9 LSBs. - */ - tmp = ((u64)((val * MICRO) + val2)) * 65535ULL; - tmp = div_u64(tmp, MICRO * 100); - val = tmp >> HDC3020_THRESH_HUM_TRUNC_SHIFT; - val = FIELD_PREP(HDC3020_THRESH_HUM_MASK, val); - val |= FIELD_GET(HDC3020_THRESH_TEMP_MASK, ret); + s_val = (s_val < 0) ? 0 : min(s_val, HDC3020_MAX_HUM_MICRO); + switch (info) { + case IIO_EV_INFO_VALUE: + reg = reg_thresh_wr; + reg_val = hdc3020_thresh_set_hum(s_val, thresh); + ret = _hdc3020_write_thresh(data, reg, reg_val); + if (ret < 0) + return ret; + + /* Calculate old hysteresis */ + s_thresh = (s64)hdc3020_thresh_get_hum(thresh) * 1000000; + s_clr = (s64)hdc3020_thresh_get_hum(clr) * 1000000; + s_hyst = div_s64(abs(s_thresh - s_clr), 65535); + /* Set new threshold */ + thresh = reg_val; + /* Try to set old hysteresis */ + s_val = min(abs(s_hyst), HDC3020_MAX_HUM_MICRO); + fallthrough; + case IIO_EV_INFO_HYSTERESIS: + /* + * Function hdc3020_thresh_get_hum returns relative + * humidity in percent scaled by 65535. Scale by 1000000 + * to be able to subtract scaled hysteresis value. + */ + s_thresh = (s64)hdc3020_thresh_get_hum(thresh) * 1000000; + /* + * Units of s_val are in micro percent, scale by 65535 + * to get same units as s_thresh. + */ + s_hyst = (s64)s_val * 65535; + s_clr = hdc3020_thresh_clr(s_thresh, s_hyst, dir); + s_clr = max(s_clr, 0); + s_clr = min(s_clr, HDC3020_MAX_HUM_MICRO); + reg = reg_clr_wr; + reg_val = hdc3020_thresh_set_hum(s_clr, clr); + break; + default: + return -EOPNOTSUPP; + } break; default: return -EOPNOTSUPP; } - put_unaligned_be16(reg, buf); - put_unaligned_be16(val, buf + 2); - buf[4] = crc8(hdc3020_crc8_table, buf + 2, 2, CRC8_INIT_VALUE); - return hdc3020_write_bytes(data, buf, 5); + return _hdc3020_write_thresh(data, reg, reg_val); } static int hdc3020_read_thresh(struct iio_dev *indio_dev, @@ -447,48 +608,60 @@ static int hdc3020_read_thresh(struct iio_dev *indio_dev, int *val, int *val2) { struct hdc3020_data *data = iio_priv(indio_dev); - u16 reg; - int ret; + u16 reg_thresh, reg_clr; + int thresh, clr, ret; - /* Select threshold register */ - if (info == IIO_EV_INFO_VALUE) { - if (dir == IIO_EV_DIR_RISING) - reg = HDC3020_R_T_RH_THRESH_HIGH; - else - reg = HDC3020_R_T_RH_THRESH_LOW; + /* Select threshold registers */ + if (dir == IIO_EV_DIR_RISING) { + reg_thresh = HDC3020_R_T_RH_THRESH_HIGH; + reg_clr = HDC3020_R_T_RH_THRESH_HIGH_CLR; } else { - if (dir == IIO_EV_DIR_RISING) - reg = HDC3020_R_T_RH_THRESH_HIGH_CLR; - else - reg = HDC3020_R_T_RH_THRESH_LOW_CLR; + reg_thresh = HDC3020_R_T_RH_THRESH_LOW; + reg_clr = HDC3020_R_T_RH_THRESH_LOW_CLR; } guard(mutex)(&data->lock); - ret = hdc3020_read_be16(data, reg); + ret = hdc3020_read_be16(data, reg_thresh); if (ret < 0) return ret; switch (chan->type) { case IIO_TEMP: - /* - * Get the temperature threshold from 9 LSBs, shift them to get - * the truncated temperature threshold representation and - * calculate the threshold according to the formula in the - * datasheet. - */ - *val = FIELD_GET(HDC3020_THRESH_TEMP_MASK, ret); - *val = *val << HDC3020_THRESH_TEMP_TRUNC_SHIFT; - *val = -2949075 + (175 * (*val)); + thresh = hdc3020_thresh_get_temp(ret); + switch (info) { + case IIO_EV_INFO_VALUE: + *val = thresh; + break; + case IIO_EV_INFO_HYSTERESIS: + ret = hdc3020_read_be16(data, reg_clr); + if (ret < 0) + return ret; + + clr = hdc3020_thresh_get_temp(ret); + *val = abs(thresh - clr); + break; + default: + return -EOPNOTSUPP; + } *val2 = 65535; return IIO_VAL_FRACTIONAL; case IIO_HUMIDITYRELATIVE: - /* - * Get the humidity threshold from 7 MSBs, shift them to get the - * truncated humidity threshold representation and calculate the - * threshold according to the formula in the datasheet. - */ - *val = FIELD_GET(HDC3020_THRESH_HUM_MASK, ret); - *val = (*val << HDC3020_THRESH_HUM_TRUNC_SHIFT) * 100; + thresh = hdc3020_thresh_get_hum(ret); + switch (info) { + case IIO_EV_INFO_VALUE: + *val = thresh; + break; + case IIO_EV_INFO_HYSTERESIS: + ret = hdc3020_read_be16(data, reg_clr); + if (ret < 0) + return ret; + + clr = hdc3020_thresh_get_hum(ret); + *val = abs(thresh - clr); + break; + default: + return -EOPNOTSUPP; + } *val2 = 65535; return IIO_VAL_FRACTIONAL; default: diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c index 16de57846bd9..2e84776f4fbd 100644 --- a/drivers/iio/industrialio-trigger.c +++ b/drivers/iio/industrialio-trigger.c @@ -315,7 +315,7 @@ int iio_trigger_attach_poll_func(struct iio_trigger *trig, * this is the case if the IIO device and the trigger device share the * same parent device. */ - if (iio_validate_own_trigger(pf->indio_dev, trig)) + if (!iio_validate_own_trigger(pf->indio_dev, trig)) trig->attached_own_device = true; return ret; diff --git a/drivers/iio/light/apds9306.c b/drivers/iio/light/apds9306.c index d6627b3e6000..66a063ea3db4 100644 --- a/drivers/iio/light/apds9306.c +++ b/drivers/iio/light/apds9306.c @@ -583,8 +583,8 @@ static int apds9306_intg_time_set(struct apds9306_data *data, int val2) return ret; intg_old = iio_gts_find_int_time_by_sel(&data->gts, intg_time_idx); - if (ret < 0) - return ret; + if (intg_old < 0) + return intg_old; if (intg_old == val2) return 0; diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index 9dca451ed522..6974922e5609 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -107,8 +107,6 @@ struct bnxt_re_gsi_context { struct bnxt_re_sqp_entries *sqp_tbl; }; -#define BNXT_RE_MIN_MSIX 2 -#define BNXT_RE_MAX_MSIX 9 #define BNXT_RE_AEQ_IDX 0 #define BNXT_RE_NQ_IDX 1 #define BNXT_RE_GEN_P5_MAX_VF 64 @@ -168,7 +166,7 @@ struct bnxt_re_dev { struct bnxt_qplib_rcfw rcfw; /* NQ */ - struct bnxt_qplib_nq nq[BNXT_RE_MAX_MSIX]; + struct bnxt_qplib_nq nq[BNXT_MAX_ROCE_MSIX]; /* Device Resources */ struct bnxt_qplib_dev_attr dev_attr; diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c index 4f13423ecdbd..887b09dd86e7 100644 --- a/drivers/infiniband/hw/mana/mr.c +++ b/drivers/infiniband/hw/mana/mr.c @@ -112,6 +112,7 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length, "start 0x%llx, iova 0x%llx length 0x%llx access_flags 0x%x", start, iova, length, access_flags); + access_flags &= ~IB_ACCESS_OPTIONAL; if (access_flags & ~VALID_MR_FLAGS) return ERR_PTR(-EINVAL); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 2366c46eebc8..43660c831b22 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3759,10 +3759,10 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) spin_lock_init(&dev->dm.lock); dev->dm.dev = mdev; return 0; -err: - mlx5r_macsec_dealloc_gids(dev); err_mp: mlx5_ib_cleanup_multiport_master(dev); +err: + mlx5r_macsec_dealloc_gids(dev); return err; } diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index ecc111ed5d86..d3c1f63791a2 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -246,6 +246,7 @@ static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc) MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3); MLX5_SET(mkc, mkc, access_mode_4_2, (ent->rb_key.access_mode >> 2) & 0x7); + MLX5_SET(mkc, mkc, ma_translation_mode, !!ent->rb_key.ats); MLX5_SET(mkc, mkc, translations_octword_size, get_mkc_octo_size(ent->rb_key.access_mode, @@ -641,10 +642,8 @@ static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache, new = &((*new)->rb_left); if (cmp < 0) new = &((*new)->rb_right); - if (cmp == 0) { - mutex_unlock(&cache->rb_lock); + if (cmp == 0) return -EEXIST; - } } /* Add new node and rebalance tree. */ @@ -719,6 +718,8 @@ static struct mlx5_ib_mr *_mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, } mr->mmkey.cache_ent = ent; mr->mmkey.type = MLX5_MKEY_MR; + mr->mmkey.rb_key = ent->rb_key; + mr->mmkey.cacheable = true; init_waitqueue_head(&mr->mmkey.wait); return mr; } @@ -1169,7 +1170,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, mr->ibmr.pd = pd; mr->umem = umem; mr->page_shift = order_base_2(page_size); - mr->mmkey.cacheable = true; set_mr_fields(dev, mr, umem->length, access_flags, iova); return mr; diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index a056ea835da5..84be0c3d5699 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -199,17 +199,20 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq, int err; struct mlx5_srq_attr in = {}; __u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); + __u32 max_sge_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq) / + sizeof(struct mlx5_wqe_data_seg); if (init_attr->srq_type != IB_SRQT_BASIC && init_attr->srq_type != IB_SRQT_XRC && init_attr->srq_type != IB_SRQT_TM) return -EOPNOTSUPP; - /* Sanity check SRQ size before proceeding */ - if (init_attr->attr.max_wr >= max_srq_wqes) { - mlx5_ib_dbg(dev, "max_wr %d, cap %d\n", - init_attr->attr.max_wr, - max_srq_wqes); + /* Sanity check SRQ and sge size before proceeding */ + if (init_attr->attr.max_wr >= max_srq_wqes || + init_attr->attr.max_sge > max_sge_sz) { + mlx5_ib_dbg(dev, "max_wr %d,wr_cap %d,max_sge %d, sge_cap:%d\n", + init_attr->attr.max_wr, max_srq_wqes, + init_attr->attr.max_sge, max_sge_sz); return -EINVAL; } diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index c6a7fa3054fa..6596a85723c9 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -344,6 +344,19 @@ static enum resp_states rxe_resp_check_length(struct rxe_qp *qp, * receive buffer later. For rmda operations additional * length checks are performed in check_rkey. */ + if ((qp_type(qp) == IB_QPT_GSI) || (qp_type(qp) == IB_QPT_UD)) { + unsigned int payload = payload_size(pkt); + unsigned int recv_buffer_len = 0; + int i; + + for (i = 0; i < qp->resp.wqe->dma.num_sge; i++) + recv_buffer_len += qp->resp.wqe->dma.sge[i].length; + if (payload + 40 > recv_buffer_len) { + rxe_dbg_qp(qp, "The receive buffer is too small for this UD packet.\n"); + return RESPST_ERR_LENGTH; + } + } + if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) || (qp_type(qp) == IB_QPT_UC))) { unsigned int mtu = qp->mtu; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index c7d4d8ab5a09..de6238ee4379 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -812,7 +812,7 @@ static void copy_inline_data_to_wqe(struct rxe_send_wqe *wqe, int i; for (i = 0; i < ibwr->num_sge; i++, sge++) { - memcpy(p, ib_virt_dma_to_page(sge->addr), sge->length); + memcpy(p, ib_virt_dma_to_ptr(sge->addr), sge->length); p += sge->length; } } diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 70f0654c58b6..2b8370ecf42a 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -209,6 +209,7 @@ static const struct xpad_device { { 0x0738, 0xf738, "Super SFIV FightStick TE S", 0, XTYPE_XBOX360 }, { 0x07ff, 0xffff, "Mad Catz GamePad", 0, XTYPE_XBOX360 }, { 0x0b05, 0x1a38, "ASUS ROG RAIKIRI", 0, XTYPE_XBOXONE }, + { 0x0b05, 0x1abb, "ASUS ROG RAIKIRI PRO", 0, XTYPE_XBOXONE }, { 0x0c12, 0x0005, "Intec wireless", 0, XTYPE_XBOX }, { 0x0c12, 0x8801, "Nyko Xbox Controller", 0, XTYPE_XBOX }, { 0x0c12, 0x8802, "Zeroplus Xbox Controller", 0, XTYPE_XBOX }, diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 4e38229404b4..b4723ea395eb 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1477,15 +1477,46 @@ static void elantech_disconnect(struct psmouse *psmouse) } /* + * Some hw_version 4 models fail to properly activate absolute mode on + * resume without going through disable/enable cycle. + */ +static const struct dmi_system_id elantech_needs_reenable[] = { +#if defined(CONFIG_DMI) && defined(CONFIG_X86) + { + /* Lenovo N24 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "81AF"), + }, + }, +#endif + { } +}; + +/* * Put the touchpad back into absolute mode when reconnecting */ static int elantech_reconnect(struct psmouse *psmouse) { + int err; + psmouse_reset(psmouse); if (elantech_detect(psmouse, 0)) return -1; + if (dmi_check_system(elantech_needs_reenable)) { + err = ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_DISABLE); + if (err) + psmouse_warn(psmouse, "failed to deactivate mouse on %s: %d\n", + psmouse->ps2dev.serio->phys, err); + + err = ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_ENABLE); + if (err) + psmouse_warn(psmouse, "failed to reactivate mouse on %s: %d\n", + psmouse->ps2dev.serio->phys, err); + } + if (elantech_set_absolute_mode(psmouse)) { psmouse_err(psmouse, "failed to put touchpad back into absolute mode.\n"); diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index dfc6c581873b..5b50475ec414 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -76,7 +76,7 @@ static inline void i8042_write_command(int val) #define SERIO_QUIRK_PROBE_DEFER BIT(5) #define SERIO_QUIRK_RESET_ALWAYS BIT(6) #define SERIO_QUIRK_RESET_NEVER BIT(7) -#define SERIO_QUIRK_DIECT BIT(8) +#define SERIO_QUIRK_DIRECT BIT(8) #define SERIO_QUIRK_DUMBKBD BIT(9) #define SERIO_QUIRK_NOLOOP BIT(10) #define SERIO_QUIRK_NOTIMEOUT BIT(11) @@ -1332,6 +1332,20 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, + { + /* + * The Ayaneo Kun is a handheld device where some the buttons + * are handled by an AT keyboard. The keyboard is usually + * detected as raw, but sometimes, usually after a cold boot, + * it is detected as translated. Make sure that the keyboard + * is always in raw mode. + */ + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AYANEO"), + DMI_MATCH(DMI_BOARD_NAME, "KUN"), + }, + .driver_data = (void *)(SERIO_QUIRK_DIRECT) + }, { } }; @@ -1655,7 +1669,7 @@ static void __init i8042_check_quirks(void) if (quirks & SERIO_QUIRK_RESET_NEVER) i8042_reset = I8042_RESET_NEVER; } - if (quirks & SERIO_QUIRK_DIECT) + if (quirks & SERIO_QUIRK_DIRECT) i8042_direct = true; if (quirks & SERIO_QUIRK_DUMBKBD) i8042_dumbkbd = true; diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index d2bbb436a77d..4d13db13b9e5 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -1111,6 +1111,16 @@ static const struct of_device_id ads7846_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, ads7846_dt_ids); +static const struct spi_device_id ads7846_spi_ids[] = { + { "tsc2046", 7846 }, + { "ads7843", 7843 }, + { "ads7845", 7845 }, + { "ads7846", 7846 }, + { "ads7873", 7873 }, + { }, +}; +MODULE_DEVICE_TABLE(spi, ads7846_spi_ids); + static const struct ads7846_platform_data *ads7846_get_props(struct device *dev) { struct ads7846_platform_data *pdata; @@ -1386,10 +1396,10 @@ static struct spi_driver ads7846_driver = { }, .probe = ads7846_probe, .remove = ads7846_remove, + .id_table = ads7846_spi_ids, }; module_spi_driver(ads7846_driver); MODULE_DESCRIPTION("ADS7846 TouchScreen Driver"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("spi:ads7846"); diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c index 31ffdc2a93f3..79bdb2b10949 100644 --- a/drivers/input/touchscreen/ili210x.c +++ b/drivers/input/touchscreen/ili210x.c @@ -261,8 +261,8 @@ static int ili251x_read_touch_data(struct i2c_client *client, u8 *data) if (!error && data[0] == 2) { error = i2c_master_recv(client, data + ILI251X_DATA_SIZE1, ILI251X_DATA_SIZE2); - if (error >= 0 && error != ILI251X_DATA_SIZE2) - error = -EIO; + if (error >= 0) + error = error == ILI251X_DATA_SIZE2 ? 0 : -EIO; } return error; diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 161248067776..c89d85b54a1a 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -2743,6 +2743,7 @@ static void early_enable_iommu(struct amd_iommu *iommu) iommu_enable_command_buffer(iommu); iommu_enable_event_buffer(iommu); iommu_set_exclusion_range(iommu); + iommu_enable_gt(iommu); iommu_enable_ga(iommu); iommu_enable_xt(iommu); iommu_enable_irtcachedis(iommu); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index c2703599bb16..b19e8c0f48fa 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2061,6 +2061,12 @@ static void do_detach(struct iommu_dev_data *dev_data) struct protection_domain *domain = dev_data->domain; struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); + /* Clear DTE and flush the entry */ + amd_iommu_dev_update_dte(dev_data, false); + + /* Flush IOTLB and wait for the flushes to finish */ + amd_iommu_domain_flush_all(domain); + /* Clear GCR3 table */ if (pdom_is_sva_capable(domain)) destroy_gcr3_table(dev_data, domain); @@ -2069,12 +2075,6 @@ static void do_detach(struct iommu_dev_data *dev_data) dev_data->domain = NULL; list_del(&dev_data->list); - /* Clear DTE and flush the entry */ - amd_iommu_dev_update_dte(dev_data, false); - - /* Flush IOTLB and wait for the flushes to finish */ - amd_iommu_domain_flush_all(domain); - /* decrease reference counters - needs to happen after the flushes */ domain->dev_iommu[iommu->index] -= 1; domain->dev_cnt -= 1; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 2e9811bf2a4e..fd11a080380c 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2114,12 +2114,6 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, if (ret) return ret; - ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID); - if (ret) { - domain_detach_iommu(domain, iommu); - return ret; - } - info->domain = domain; spin_lock_irqsave(&domain->lock, flags); list_add(&info->link, &domain->devices); @@ -2137,15 +2131,21 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, else ret = intel_pasid_setup_second_level(iommu, domain, dev, IOMMU_NO_PASID); - if (ret) { - device_block_translation(dev); - return ret; - } + if (ret) + goto out_block_translation; if (sm_supported(info->iommu) || !domain_type_is_si(info->domain)) iommu_enable_pci_caps(info); + ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID); + if (ret) + goto out_block_translation; + return 0; + +out_block_translation: + device_block_translation(dev); + return ret; } /** diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c index c7ddebf312ad..b1f2080be2be 100644 --- a/drivers/irqchip/irq-loongson-eiointc.c +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -15,6 +15,7 @@ #include <linux/irqchip/chained_irq.h> #include <linux/kernel.h> #include <linux/syscore_ops.h> +#include <asm/numa.h> #define EIOINTC_REG_NODEMAP 0x14a0 #define EIOINTC_REG_IPMAP 0x14c0 @@ -339,7 +340,7 @@ static int __init pch_msi_parse_madt(union acpi_subtable_headers *header, int node; if (cpu_has_flatmode) - node = cpu_to_node(eiointc_priv[nr_pics - 1]->node * CORES_PER_EIO_NODE); + node = early_cpu_to_node(eiointc_priv[nr_pics - 1]->node * CORES_PER_EIO_NODE); else node = eiointc_priv[nr_pics - 1]->node; @@ -431,7 +432,7 @@ int __init eiointc_acpi_init(struct irq_domain *parent, goto out_free_handle; if (cpu_has_flatmode) - node = cpu_to_node(acpi_eiointc->node * CORES_PER_EIO_NODE); + node = early_cpu_to_node(acpi_eiointc->node * CORES_PER_EIO_NODE); else node = acpi_eiointc->node; acpi_set_vec_parent(node, priv->eiointc_domain, pch_group); diff --git a/drivers/irqchip/irq-loongson-liointc.c b/drivers/irqchip/irq-loongson-liointc.c index e4b33aed1c97..7c4fe7ab4b83 100644 --- a/drivers/irqchip/irq-loongson-liointc.c +++ b/drivers/irqchip/irq-loongson-liointc.c @@ -28,7 +28,7 @@ #define LIOINTC_INTC_CHIP_START 0x20 -#define LIOINTC_REG_INTC_STATUS (LIOINTC_INTC_CHIP_START + 0x20) +#define LIOINTC_REG_INTC_STATUS(core) (LIOINTC_INTC_CHIP_START + 0x20 + (core) * 8) #define LIOINTC_REG_INTC_EN_STATUS (LIOINTC_INTC_CHIP_START + 0x04) #define LIOINTC_REG_INTC_ENABLE (LIOINTC_INTC_CHIP_START + 0x08) #define LIOINTC_REG_INTC_DISABLE (LIOINTC_INTC_CHIP_START + 0x0c) @@ -217,7 +217,7 @@ static int liointc_init(phys_addr_t addr, unsigned long size, int revision, goto out_free_priv; for (i = 0; i < LIOINTC_NUM_CORES; i++) - priv->core_isr[i] = base + LIOINTC_REG_INTC_STATUS; + priv->core_isr[i] = base + LIOINTC_REG_INTC_STATUS(i); for (i = 0; i < LIOINTC_NUM_PARENT; i++) priv->handler[i].parent_int_map = parent_int_map[i]; diff --git a/drivers/md/dm-vdo/dm-vdo-target.c b/drivers/md/dm-vdo/dm-vdo-target.c index b423bec6458b..9d51f72a9d66 100644 --- a/drivers/md/dm-vdo/dm-vdo-target.c +++ b/drivers/md/dm-vdo/dm-vdo-target.c @@ -945,7 +945,7 @@ static void vdo_io_hints(struct dm_target *ti, struct queue_limits *limits) * The value is used by dm-thin to determine whether to pass down discards. The block layer * splits large discards on this boundary when this is set. */ - limits->max_discard_sectors = + limits->max_hw_discard_sectors = (vdo->device_config->max_discard_blocks * VDO_SECTORS_PER_BLOCK); /* diff --git a/drivers/media/pci/intel/ipu6/ipu6-isys-video.c b/drivers/media/pci/intel/ipu6/ipu6-isys-video.c index c8a33e1e910c..06090cc0a476 100644 --- a/drivers/media/pci/intel/ipu6/ipu6-isys-video.c +++ b/drivers/media/pci/intel/ipu6/ipu6-isys-video.c @@ -943,7 +943,7 @@ ipu6_isys_query_stream_by_source(struct ipu6_isys *isys, int source, u8 vc) return NULL; if (source < 0) { - dev_err(&stream->isys->adev->auxdev.dev, + dev_err(&isys->adev->auxdev.dev, "query stream with invalid port number\n"); return NULL; } diff --git a/drivers/media/pci/intel/ipu6/ipu6-isys.c b/drivers/media/pci/intel/ipu6/ipu6-isys.c index 8b9b77719bb1..c4aff2e2009b 100644 --- a/drivers/media/pci/intel/ipu6/ipu6-isys.c +++ b/drivers/media/pci/intel/ipu6/ipu6-isys.c @@ -799,7 +799,7 @@ static int isys_register_devices(struct ipu6_isys *isys) isys->v4l2_dev.mdev = &isys->media_dev; isys->v4l2_dev.ctrl_handler = NULL; - ret = v4l2_device_register(&pdev->dev, &isys->v4l2_dev); + ret = v4l2_device_register(dev, &isys->v4l2_dev); if (ret < 0) goto out_media_device_unregister; diff --git a/drivers/media/pci/intel/ivsc/Kconfig b/drivers/media/pci/intel/ivsc/Kconfig index 407a800c81bc..a7d9607ecdc6 100644 --- a/drivers/media/pci/intel/ivsc/Kconfig +++ b/drivers/media/pci/intel/ivsc/Kconfig @@ -4,6 +4,7 @@ config INTEL_VSC tristate "Intel Visual Sensing Controller" depends on INTEL_MEI && ACPI && VIDEO_DEV + depends on IPU_BRIDGE || !IPU_BRIDGE select MEDIA_CONTROLLER select VIDEO_V4L2_SUBDEV_API select V4L2_FWNODE diff --git a/drivers/mfd/axp20x.c b/drivers/mfd/axp20x.c index f2c0f144c0fc..dacd3c96c9f5 100644 --- a/drivers/mfd/axp20x.c +++ b/drivers/mfd/axp20x.c @@ -210,6 +210,7 @@ static const struct regmap_access_table axp313a_volatile_table = { static const struct regmap_range axp717_writeable_ranges[] = { regmap_reg_range(AXP717_IRQ0_EN, AXP717_IRQ4_EN), + regmap_reg_range(AXP717_IRQ0_STATE, AXP717_IRQ4_STATE), regmap_reg_range(AXP717_DCDC_OUTPUT_CONTROL, AXP717_CPUSLDO_CONTROL), }; diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 4c67e2c5a82e..a7a2bcedb37e 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -1238,6 +1238,7 @@ static int fastrpc_init_create_static_process(struct fastrpc_user *fl, struct fastrpc_phy_page pages[1]; char *name; int err; + bool scm_done = false; struct { int pgid; u32 namelen; @@ -1289,6 +1290,7 @@ static int fastrpc_init_create_static_process(struct fastrpc_user *fl, fl->cctx->remote_heap->phys, fl->cctx->remote_heap->size, err); goto err_map; } + scm_done = true; } } @@ -1320,10 +1322,11 @@ static int fastrpc_init_create_static_process(struct fastrpc_user *fl, goto err_invoke; kfree(args); + kfree(name); return 0; err_invoke: - if (fl->cctx->vmcount) { + if (fl->cctx->vmcount && scm_done) { u64 src_perms = 0; struct qcom_scm_vmperm dst_perms; u32 i; @@ -1693,16 +1696,20 @@ static int fastrpc_get_info_from_dsp(struct fastrpc_user *fl, uint32_t *dsp_attr { struct fastrpc_invoke_args args[2] = { 0 }; - /* Capability filled in userspace */ + /* + * Capability filled in userspace. This carries the information + * about the remoteproc support which is fetched from the remoteproc + * sysfs node by userspace. + */ dsp_attr_buf[0] = 0; + dsp_attr_buf_len -= 1; args[0].ptr = (u64)(uintptr_t)&dsp_attr_buf_len; args[0].length = sizeof(dsp_attr_buf_len); args[0].fd = -1; args[1].ptr = (u64)(uintptr_t)&dsp_attr_buf[1]; - args[1].length = dsp_attr_buf_len; + args[1].length = dsp_attr_buf_len * sizeof(u32); args[1].fd = -1; - fl->pd = USER_PD; return fastrpc_internal_invoke(fl, true, FASTRPC_DSP_UTILITIES_HANDLE, FASTRPC_SCALARS(0, 1, 1), args); @@ -1730,7 +1737,7 @@ static int fastrpc_get_info_from_kernel(struct fastrpc_ioctl_capability *cap, if (!dsp_attributes) return -ENOMEM; - err = fastrpc_get_info_from_dsp(fl, dsp_attributes, FASTRPC_MAX_DSP_ATTRIBUTES_LEN); + err = fastrpc_get_info_from_dsp(fl, dsp_attributes, FASTRPC_MAX_DSP_ATTRIBUTES); if (err == DSP_UNSUPPORTED_API) { dev_info(&cctx->rpdev->dev, "Warning: DSP capabilities not supported on domain: %d\n", domain); @@ -1783,7 +1790,7 @@ static int fastrpc_get_dsp_info(struct fastrpc_user *fl, char __user *argp) if (err) return err; - if (copy_to_user(argp, &cap.capability, sizeof(cap.capability))) + if (copy_to_user(argp, &cap, sizeof(cap))) return -EFAULT; return 0; @@ -2080,6 +2087,16 @@ err_invoke: return err; } +static int is_attach_rejected(struct fastrpc_user *fl) +{ + /* Check if the device node is non-secure */ + if (!fl->is_secure_dev) { + dev_dbg(&fl->cctx->rpdev->dev, "untrusted app trying to attach to privileged DSP PD\n"); + return -EACCES; + } + return 0; +} + static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -2092,13 +2109,19 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, err = fastrpc_invoke(fl, argp); break; case FASTRPC_IOCTL_INIT_ATTACH: - err = fastrpc_init_attach(fl, ROOT_PD); + err = is_attach_rejected(fl); + if (!err) + err = fastrpc_init_attach(fl, ROOT_PD); break; case FASTRPC_IOCTL_INIT_ATTACH_SNS: - err = fastrpc_init_attach(fl, SENSORS_PD); + err = is_attach_rejected(fl); + if (!err) + err = fastrpc_init_attach(fl, SENSORS_PD); break; case FASTRPC_IOCTL_INIT_CREATE_STATIC: - err = fastrpc_init_create_static_process(fl, argp); + err = is_attach_rejected(fl); + if (!err) + err = fastrpc_init_create_static_process(fl, argp); break; case FASTRPC_IOCTL_INIT_CREATE: err = fastrpc_init_create_process(fl, argp); diff --git a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_otpe2p.c b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_otpe2p.c index 16695cb5e69c..7c3d8bedf90b 100644 --- a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_otpe2p.c +++ b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_otpe2p.c @@ -153,7 +153,6 @@ static int pci1xxxx_eeprom_read(void *priv_t, unsigned int off, buf[byte] = readl(rb + MMAP_EEPROM_OFFSET(EEPROM_DATA_REG)); } - ret = byte; error: release_sys_lock(priv); return ret; @@ -197,7 +196,6 @@ static int pci1xxxx_eeprom_write(void *priv_t, unsigned int off, goto error; } } - ret = byte; error: release_sys_lock(priv); return ret; @@ -258,7 +256,6 @@ static int pci1xxxx_otp_read(void *priv_t, unsigned int off, buf[byte] = readl(rb + MMAP_OTP_OFFSET(OTP_RD_DATA_OFFSET)); } - ret = byte; error: release_sys_lock(priv); return ret; @@ -315,7 +312,6 @@ static int pci1xxxx_otp_write(void *priv_t, unsigned int off, goto error; } } - ret = byte; error: release_sys_lock(priv); return ret; diff --git a/drivers/misc/mei/platform-vsc.c b/drivers/misc/mei/platform-vsc.c index 1ec65d87488a..d02f6e881139 100644 --- a/drivers/misc/mei/platform-vsc.c +++ b/drivers/misc/mei/platform-vsc.c @@ -28,8 +28,8 @@ #define MEI_VSC_MAX_MSG_SIZE 512 -#define MEI_VSC_POLL_DELAY_US (50 * USEC_PER_MSEC) -#define MEI_VSC_POLL_TIMEOUT_US (200 * USEC_PER_MSEC) +#define MEI_VSC_POLL_DELAY_US (100 * USEC_PER_MSEC) +#define MEI_VSC_POLL_TIMEOUT_US (400 * USEC_PER_MSEC) #define mei_dev_to_vsc_hw(dev) ((struct mei_vsc_hw *)((dev)->hw)) diff --git a/drivers/misc/mei/vsc-fw-loader.c b/drivers/misc/mei/vsc-fw-loader.c index 596a9d695dfc..084d0205f97d 100644 --- a/drivers/misc/mei/vsc-fw-loader.c +++ b/drivers/misc/mei/vsc-fw-loader.c @@ -204,7 +204,7 @@ struct vsc_img_frag { /** * struct vsc_fw_loader - represent vsc firmware loader - * @dev: device used to request fimware + * @dev: device used to request firmware * @tp: transport layer used with the firmware loader * @csi: CSI image * @ace: ACE image diff --git a/drivers/misc/mei/vsc-tp.c b/drivers/misc/mei/vsc-tp.c index e6a98dba8a73..1618cca9a731 100644 --- a/drivers/misc/mei/vsc-tp.c +++ b/drivers/misc/mei/vsc-tp.c @@ -331,12 +331,12 @@ int vsc_tp_rom_xfer(struct vsc_tp *tp, const void *obuf, void *ibuf, size_t len) return ret; } - ret = vsc_tp_dev_xfer(tp, tp->tx_buf, tp->rx_buf, len); + ret = vsc_tp_dev_xfer(tp, tp->tx_buf, ibuf ? tp->rx_buf : NULL, len); if (ret) return ret; if (ibuf) - cpu_to_be32_array(ibuf, tp->rx_buf, words); + be32_to_cpu_array(ibuf, tp->rx_buf, words); return ret; } @@ -568,6 +568,19 @@ static void vsc_tp_remove(struct spi_device *spi) free_irq(spi->irq, tp); } +static void vsc_tp_shutdown(struct spi_device *spi) +{ + struct vsc_tp *tp = spi_get_drvdata(spi); + + platform_device_unregister(tp->pdev); + + mutex_destroy(&tp->mutex); + + vsc_tp_reset(tp); + + free_irq(spi->irq, tp); +} + static const struct acpi_device_id vsc_tp_acpi_ids[] = { { "INTC1009" }, /* Raptor Lake */ { "INTC1058" }, /* Tiger Lake */ @@ -580,6 +593,7 @@ MODULE_DEVICE_TABLE(acpi, vsc_tp_acpi_ids); static struct spi_driver vsc_tp_driver = { .probe = vsc_tp_probe, .remove = vsc_tp_remove, + .shutdown = vsc_tp_shutdown, .driver = { .name = "vsc-tp", .acpi_match_table = vsc_tp_acpi_ids, diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c index d7427894e0bc..c302eb380e42 100644 --- a/drivers/mmc/host/davinci_mmc.c +++ b/drivers/mmc/host/davinci_mmc.c @@ -224,6 +224,9 @@ static void davinci_fifo_data_trans(struct mmc_davinci_host *host, } p = sgm->addr; + if (n > sgm->length) + n = sgm->length; + /* NOTE: we never transfer more than rw_threshold bytes * to/from the fifo here; there's no I/O overlap. * This also assumes that access width( i.e. ACCWD) is 4 bytes diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c index 9a5f75163aca..8ede4ce93271 100644 --- a/drivers/mmc/host/moxart-mmc.c +++ b/drivers/mmc/host/moxart-mmc.c @@ -131,10 +131,12 @@ struct moxart_host { struct dma_async_tx_descriptor *tx_desc; struct mmc_host *mmc; struct mmc_request *mrq; + struct scatterlist *cur_sg; struct completion dma_complete; struct completion pio_complete; - struct sg_mapping_iter sg_miter; + u32 num_sg; + u32 data_remain; u32 data_len; u32 fifo_width; u32 timeout; @@ -146,6 +148,35 @@ struct moxart_host { bool is_removed; }; +static inline void moxart_init_sg(struct moxart_host *host, + struct mmc_data *data) +{ + host->cur_sg = data->sg; + host->num_sg = data->sg_len; + host->data_remain = host->cur_sg->length; + + if (host->data_remain > host->data_len) + host->data_remain = host->data_len; +} + +static inline int moxart_next_sg(struct moxart_host *host) +{ + int remain; + struct mmc_data *data = host->mrq->cmd->data; + + host->cur_sg++; + host->num_sg--; + + if (host->num_sg > 0) { + host->data_remain = host->cur_sg->length; + remain = host->data_len - data->bytes_xfered; + if (remain > 0 && remain < host->data_remain) + host->data_remain = remain; + } + + return host->num_sg; +} + static int moxart_wait_for_status(struct moxart_host *host, u32 mask, u32 *status) { @@ -278,29 +309,14 @@ static void moxart_transfer_dma(struct mmc_data *data, struct moxart_host *host) static void moxart_transfer_pio(struct moxart_host *host) { - struct sg_mapping_iter *sgm = &host->sg_miter; struct mmc_data *data = host->mrq->cmd->data; u32 *sgp, len = 0, remain, status; if (host->data_len == data->bytes_xfered) return; - /* - * By updating sgm->consumes this will get a proper pointer into the - * buffer at any time. - */ - if (!sg_miter_next(sgm)) { - /* This shold not happen */ - dev_err(mmc_dev(host->mmc), "ran out of scatterlist prematurely\n"); - data->error = -EINVAL; - complete(&host->pio_complete); - return; - } - sgp = sgm->addr; - remain = sgm->length; - if (remain > host->data_len) - remain = host->data_len; - sgm->consumed = 0; + sgp = sg_virt(host->cur_sg); + remain = host->data_remain; if (data->flags & MMC_DATA_WRITE) { while (remain > 0) { @@ -315,7 +331,6 @@ static void moxart_transfer_pio(struct moxart_host *host) sgp++; len += 4; } - sgm->consumed += len; remain -= len; } @@ -332,22 +347,22 @@ static void moxart_transfer_pio(struct moxart_host *host) sgp++; len += 4; } - sgm->consumed += len; remain -= len; } } - data->bytes_xfered += sgm->consumed; - if (host->data_len == data->bytes_xfered) { + data->bytes_xfered += host->data_remain - remain; + host->data_remain = remain; + + if (host->data_len != data->bytes_xfered) + moxart_next_sg(host); + else complete(&host->pio_complete); - return; - } } static void moxart_prepare_data(struct moxart_host *host) { struct mmc_data *data = host->mrq->cmd->data; - unsigned int flags = SG_MITER_ATOMIC; /* Used from IRQ */ u32 datactrl; int blksz_bits; @@ -358,19 +373,15 @@ static void moxart_prepare_data(struct moxart_host *host) blksz_bits = ffs(data->blksz) - 1; BUG_ON(1 << blksz_bits != data->blksz); + moxart_init_sg(host, data); + datactrl = DCR_DATA_EN | (blksz_bits & DCR_BLK_SIZE); - if (data->flags & MMC_DATA_WRITE) { - flags |= SG_MITER_FROM_SG; + if (data->flags & MMC_DATA_WRITE) datactrl |= DCR_DATA_WRITE; - } else { - flags |= SG_MITER_TO_SG; - } if (moxart_use_dma(host)) datactrl |= DCR_DMA_EN; - else - sg_miter_start(&host->sg_miter, data->sg, data->sg_len, flags); writel(DCR_DATA_FIFO_RESET, host->base + REG_DATA_CONTROL); writel(MASK_DATA | FIFO_URUN | FIFO_ORUN, host->base + REG_CLEAR); @@ -443,9 +454,6 @@ static void moxart_request(struct mmc_host *mmc, struct mmc_request *mrq) } request_done: - if (!moxart_use_dma(host)) - sg_miter_stop(&host->sg_miter); - spin_unlock_irqrestore(&host->lock, flags); mmc_request_done(host->mmc, mrq); } diff --git a/drivers/mmc/host/sdhci-brcmstb.c b/drivers/mmc/host/sdhci-brcmstb.c index 9053526fa212..150fb477b7cc 100644 --- a/drivers/mmc/host/sdhci-brcmstb.c +++ b/drivers/mmc/host/sdhci-brcmstb.c @@ -24,6 +24,7 @@ #define BRCMSTB_MATCH_FLAGS_NO_64BIT BIT(0) #define BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT BIT(1) #define BRCMSTB_MATCH_FLAGS_HAS_CLOCK_GATE BIT(2) +#define BRCMSTB_MATCH_FLAGS_USE_CARD_BUSY BIT(4) #define BRCMSTB_PRIV_FLAGS_HAS_CQE BIT(0) #define BRCMSTB_PRIV_FLAGS_GATE_CLOCK BIT(1) @@ -384,6 +385,9 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev) if (match_priv->flags & BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT) host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL; + if (!(match_priv->flags & BRCMSTB_MATCH_FLAGS_USE_CARD_BUSY)) + host->mmc_host_ops.card_busy = NULL; + /* Change the base clock frequency if the DT property exists */ if (device_property_read_u32(&pdev->dev, "clock-frequency", &priv->base_freq_hz) != 0) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index ef89ec382bfe..23e6ba70144c 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -1326,7 +1326,7 @@ static int jmicron_pmos(struct sdhci_pci_chip *chip, int on) ret = pci_read_config_byte(chip->pdev, 0xAE, &scratch); if (ret) - return ret; + goto fail; /* * Turn PMOS on [bit 0], set over current detection to 2.4 V @@ -1337,7 +1337,10 @@ static int jmicron_pmos(struct sdhci_pci_chip *chip, int on) else scratch &= ~0x47; - return pci_write_config_byte(chip->pdev, 0xAE, scratch); + ret = pci_write_config_byte(chip->pdev, 0xAE, scratch); + +fail: + return pcibios_err_to_errno(ret); } static int jmicron_probe(struct sdhci_pci_chip *chip) @@ -2202,7 +2205,7 @@ static int sdhci_pci_probe(struct pci_dev *pdev, ret = pci_read_config_byte(pdev, PCI_SLOT_INFO, &slots); if (ret) - return ret; + return pcibios_err_to_errno(ret); slots = PCI_SLOT_INFO_SLOTS(slots) + 1; dev_dbg(&pdev->dev, "found %d slot(s)\n", slots); @@ -2211,7 +2214,7 @@ static int sdhci_pci_probe(struct pci_dev *pdev, ret = pci_read_config_byte(pdev, PCI_SLOT_INFO, &first_bar); if (ret) - return ret; + return pcibios_err_to_errno(ret); first_bar &= PCI_SLOT_INFO_FIRST_BAR_MASK; diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c index d4a02184784a..058bef1c7e41 100644 --- a/drivers/mmc/host/sdhci-pci-o2micro.c +++ b/drivers/mmc/host/sdhci-pci-o2micro.c @@ -823,7 +823,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch); if (ret) - return ret; + goto read_fail; scratch &= 0x7f; pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch); @@ -834,7 +834,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) ret = pci_read_config_byte(chip->pdev, O2_SD_CLKREQ, &scratch); if (ret) - return ret; + goto read_fail; scratch |= 0x20; pci_write_config_byte(chip->pdev, O2_SD_CLKREQ, scratch); @@ -843,7 +843,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) */ ret = pci_read_config_byte(chip->pdev, O2_SD_CAPS, &scratch); if (ret) - return ret; + goto read_fail; scratch |= 0x01; pci_write_config_byte(chip->pdev, O2_SD_CAPS, scratch); pci_write_config_byte(chip->pdev, O2_SD_CAPS, 0x73); @@ -856,7 +856,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) ret = pci_read_config_byte(chip->pdev, O2_SD_INF_MOD, &scratch); if (ret) - return ret; + goto read_fail; scratch |= 0x08; pci_write_config_byte(chip->pdev, O2_SD_INF_MOD, scratch); @@ -864,7 +864,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch); if (ret) - return ret; + goto read_fail; scratch |= 0x80; pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch); break; @@ -875,7 +875,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch); if (ret) - return ret; + goto read_fail; scratch &= 0x7f; pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch); @@ -886,7 +886,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) O2_SD_FUNC_REG0, &scratch_32); if (ret) - return ret; + goto read_fail; scratch_32 = ((scratch_32 & 0xFF000000) >> 24); /* Check Whether subId is 0x11 or 0x12 */ @@ -898,7 +898,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) O2_SD_FUNC_REG4, &scratch_32); if (ret) - return ret; + goto read_fail; /* Enable Base Clk setting change */ scratch_32 |= O2_SD_FREG4_ENABLE_CLK_SET; @@ -921,7 +921,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) ret = pci_read_config_dword(chip->pdev, O2_SD_CLK_SETTING, &scratch_32); if (ret) - return ret; + goto read_fail; scratch_32 &= ~(0xFF00); scratch_32 |= 0x07E0C800; @@ -931,14 +931,14 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) ret = pci_read_config_dword(chip->pdev, O2_SD_CLKREQ, &scratch_32); if (ret) - return ret; + goto read_fail; scratch_32 |= 0x3; pci_write_config_dword(chip->pdev, O2_SD_CLKREQ, scratch_32); ret = pci_read_config_dword(chip->pdev, O2_SD_PLL_SETTING, &scratch_32); if (ret) - return ret; + goto read_fail; scratch_32 &= ~(0x1F3F070E); scratch_32 |= 0x18270106; @@ -949,7 +949,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) ret = pci_read_config_dword(chip->pdev, O2_SD_CAP_REG2, &scratch_32); if (ret) - return ret; + goto read_fail; scratch_32 &= ~(0xE0); pci_write_config_dword(chip->pdev, O2_SD_CAP_REG2, scratch_32); @@ -961,7 +961,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch); if (ret) - return ret; + goto read_fail; scratch |= 0x80; pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch); break; @@ -971,7 +971,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch); if (ret) - return ret; + goto read_fail; scratch &= 0x7f; pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch); @@ -979,7 +979,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) ret = pci_read_config_dword(chip->pdev, O2_SD_PLL_SETTING, &scratch_32); if (ret) - return ret; + goto read_fail; if ((scratch_32 & 0xff000000) == 0x01000000) { scratch_32 &= 0x0000FFFF; @@ -998,7 +998,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) O2_SD_FUNC_REG4, &scratch_32); if (ret) - return ret; + goto read_fail; scratch_32 |= (1 << 22); pci_write_config_dword(chip->pdev, O2_SD_FUNC_REG4, scratch_32); @@ -1017,7 +1017,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch); if (ret) - return ret; + goto read_fail; scratch |= 0x80; pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch); break; @@ -1028,7 +1028,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) /* UnLock WP */ ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch); if (ret) - return ret; + goto read_fail; scratch &= 0x7f; pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch); @@ -1057,13 +1057,16 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) /* Lock WP */ ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch); if (ret) - return ret; + goto read_fail; scratch |= 0x80; pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch); break; } return 0; + +read_fail: + return pcibios_err_to_errno(ret); } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 746f4cf7ab03..fbf7a91bed35 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2515,26 +2515,29 @@ EXPORT_SYMBOL_GPL(sdhci_get_cd_nogpio); static int sdhci_check_ro(struct sdhci_host *host) { - unsigned long flags; + bool allow_invert = false; int is_readonly; - spin_lock_irqsave(&host->lock, flags); - - if (host->flags & SDHCI_DEVICE_DEAD) + if (host->flags & SDHCI_DEVICE_DEAD) { is_readonly = 0; - else if (host->ops->get_ro) + } else if (host->ops->get_ro) { is_readonly = host->ops->get_ro(host); - else if (mmc_can_gpio_ro(host->mmc)) + } else if (mmc_can_gpio_ro(host->mmc)) { is_readonly = mmc_gpio_get_ro(host->mmc); - else + /* Do not invert twice */ + allow_invert = !(host->mmc->caps2 & MMC_CAP2_RO_ACTIVE_HIGH); + } else { is_readonly = !(sdhci_readl(host, SDHCI_PRESENT_STATE) & SDHCI_WRITE_PROTECT); + allow_invert = true; + } - spin_unlock_irqrestore(&host->lock, flags); + if (is_readonly >= 0 && + allow_invert && + (host->quirks & SDHCI_QUIRK_INVERTED_WRITE_PROTECT)) + is_readonly = !is_readonly; - /* This quirk needs to be replaced by a callback-function later */ - return host->quirks & SDHCI_QUIRK_INVERTED_WRITE_PROTECT ? - !is_readonly : is_readonly; + return is_readonly; } #define SAMPLE_COUNT 5 @@ -4724,6 +4727,21 @@ int sdhci_setup_host(struct sdhci_host *host) if (host->quirks & SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC) { host->max_adma = 65532; /* 32-bit alignment */ mmc->max_seg_size = 65535; + /* + * sdhci_adma_table_pre() expects to define 1 DMA + * descriptor per segment, so the maximum segment size + * is set accordingly. SDHCI allows up to 64KiB per DMA + * descriptor (16-bit field), but some controllers do + * not support "zero means 65536" reducing the maximum + * for them to 65535. That is a problem if PAGE_SIZE is + * 64KiB because the block layer does not support + * max_seg_size < PAGE_SIZE, however + * sdhci_adma_table_pre() has a workaround to handle + * that case, and split the descriptor. Refer also + * comment in sdhci_adma_table_pre(). + */ + if (mmc->max_seg_size < PAGE_SIZE) + mmc->max_seg_size = PAGE_SIZE; } else { mmc->max_seg_size = 65536; } diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index d7dbbd469b89..53e16d39af4b 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -1093,28 +1093,32 @@ static int nand_fill_column_cycles(struct nand_chip *chip, u8 *addrs, unsigned int offset_in_page) { struct mtd_info *mtd = nand_to_mtd(chip); + bool ident_stage = !mtd->writesize; - /* Make sure the offset is less than the actual page size. */ - if (offset_in_page > mtd->writesize + mtd->oobsize) - return -EINVAL; + /* Bypass all checks during NAND identification */ + if (likely(!ident_stage)) { + /* Make sure the offset is less than the actual page size. */ + if (offset_in_page > mtd->writesize + mtd->oobsize) + return -EINVAL; - /* - * On small page NANDs, there's a dedicated command to access the OOB - * area, and the column address is relative to the start of the OOB - * area, not the start of the page. Asjust the address accordingly. - */ - if (mtd->writesize <= 512 && offset_in_page >= mtd->writesize) - offset_in_page -= mtd->writesize; + /* + * On small page NANDs, there's a dedicated command to access the OOB + * area, and the column address is relative to the start of the OOB + * area, not the start of the page. Asjust the address accordingly. + */ + if (mtd->writesize <= 512 && offset_in_page >= mtd->writesize) + offset_in_page -= mtd->writesize; - /* - * The offset in page is expressed in bytes, if the NAND bus is 16-bit - * wide, then it must be divided by 2. - */ - if (chip->options & NAND_BUSWIDTH_16) { - if (WARN_ON(offset_in_page % 2)) - return -EINVAL; + /* + * The offset in page is expressed in bytes, if the NAND bus is 16-bit + * wide, then it must be divided by 2. + */ + if (chip->options & NAND_BUSWIDTH_16) { + if (WARN_ON(offset_in_page % 2)) + return -EINVAL; - offset_in_page /= 2; + offset_in_page /= 2; + } } addrs[0] = offset_in_page; @@ -1123,7 +1127,7 @@ static int nand_fill_column_cycles(struct nand_chip *chip, u8 *addrs, * Small page NANDs use 1 cycle for the columns, while large page NANDs * need 2 */ - if (mtd->writesize <= 512) + if (!ident_stage && mtd->writesize <= 512) return 1; addrs[1] = offset_in_page >> 8; @@ -1436,16 +1440,19 @@ int nand_change_read_column_op(struct nand_chip *chip, unsigned int len, bool force_8bit) { struct mtd_info *mtd = nand_to_mtd(chip); + bool ident_stage = !mtd->writesize; if (len && !buf) return -EINVAL; - if (offset_in_page + len > mtd->writesize + mtd->oobsize) - return -EINVAL; + if (!ident_stage) { + if (offset_in_page + len > mtd->writesize + mtd->oobsize) + return -EINVAL; - /* Small page NANDs do not support column change. */ - if (mtd->writesize <= 512) - return -ENOTSUPP; + /* Small page NANDs do not support column change. */ + if (mtd->writesize <= 512) + return -ENOTSUPP; + } if (nand_has_exec_op(chip)) { const struct nand_interface_config *conf = @@ -2173,7 +2180,7 @@ EXPORT_SYMBOL_GPL(nand_reset_op); int nand_read_data_op(struct nand_chip *chip, void *buf, unsigned int len, bool force_8bit, bool check_only) { - if (!len || !buf) + if (!len || (!check_only && !buf)) return -EINVAL; if (nand_has_exec_op(chip)) { @@ -6301,6 +6308,7 @@ static const struct nand_ops rawnand_ops = { static int nand_scan_tail(struct nand_chip *chip) { struct mtd_info *mtd = nand_to_mtd(chip); + struct nand_device *base = &chip->base; struct nand_ecc_ctrl *ecc = &chip->ecc; int ret, i; @@ -6445,9 +6453,13 @@ static int nand_scan_tail(struct nand_chip *chip) if (!ecc->write_oob_raw) ecc->write_oob_raw = ecc->write_oob; - /* propagate ecc info to mtd_info */ + /* Propagate ECC info to the generic NAND and MTD layers */ mtd->ecc_strength = ecc->strength; + if (!base->ecc.ctx.conf.strength) + base->ecc.ctx.conf.strength = ecc->strength; mtd->ecc_step_size = ecc->size; + if (!base->ecc.ctx.conf.step_size) + base->ecc.ctx.conf.step_size = ecc->size; /* * Set the number of read / write steps for one page depending on ECC @@ -6455,6 +6467,8 @@ static int nand_scan_tail(struct nand_chip *chip) */ if (!ecc->steps) ecc->steps = mtd->writesize / ecc->size; + if (!base->ecc.ctx.nsteps) + base->ecc.ctx.nsteps = ecc->steps; if (ecc->steps * ecc->size != mtd->writesize) { WARN(1, "Invalid ECC parameters\n"); ret = -EINVAL; diff --git a/drivers/mtd/nand/raw/rockchip-nand-controller.c b/drivers/mtd/nand/raw/rockchip-nand-controller.c index 7baaef69d70a..55580447633b 100644 --- a/drivers/mtd/nand/raw/rockchip-nand-controller.c +++ b/drivers/mtd/nand/raw/rockchip-nand-controller.c @@ -420,13 +420,13 @@ static int rk_nfc_setup_interface(struct nand_chip *chip, int target, u32 rate, tc2rw, trwpw, trw2c; u32 temp; - if (target < 0) - return 0; - timings = nand_get_sdr_timings(conf); if (IS_ERR(timings)) return -EOPNOTSUPP; + if (target < 0) + return 0; + if (IS_ERR(nfc->nfc_clk)) rate = clk_get_rate(nfc->ahb_clk); else diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 3c3fcce4acd4..d19aabf5d4fb 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -5773,6 +5773,9 @@ static int bond_ethtool_get_ts_info(struct net_device *bond_dev, if (real_dev) { ret = ethtool_get_ts_info_by_layer(real_dev, info); } else { + info->phc_index = -1; + info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; /* Check if all slaves support software tx timestamping */ rcu_read_lock(); bond_for_each_slave_rcu(bond, slave, iter) { diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 0cacd7027e35..bc80fb6397dc 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -1214,9 +1214,9 @@ static int bond_option_arp_ip_targets_set(struct bonding *bond, __be32 target; if (newval->string) { - if (!in4_pton(newval->string+1, -1, (u8 *)&target, -1, NULL)) { - netdev_err(bond->dev, "invalid ARP target %pI4 specified\n", - &target); + if (strlen(newval->string) < 1 || + !in4_pton(newval->string + 1, -1, (u8 *)&target, -1, NULL)) { + netdev_err(bond->dev, "invalid ARP target specified\n"); return ret; } if (newval->string[0] == '+') diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 1d9057dc44f2..bf1589aef1fc 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -1618,11 +1618,20 @@ static int mcp251xfd_open(struct net_device *ndev) clear_bit(MCP251XFD_FLAGS_DOWN, priv->flags); can_rx_offload_enable(&priv->offload); + priv->wq = alloc_ordered_workqueue("%s-mcp251xfd_wq", + WQ_FREEZABLE | WQ_MEM_RECLAIM, + dev_name(&spi->dev)); + if (!priv->wq) { + err = -ENOMEM; + goto out_can_rx_offload_disable; + } + INIT_WORK(&priv->tx_work, mcp251xfd_tx_obj_write_sync); + err = request_threaded_irq(spi->irq, NULL, mcp251xfd_irq, IRQF_SHARED | IRQF_ONESHOT, dev_name(&spi->dev), priv); if (err) - goto out_can_rx_offload_disable; + goto out_destroy_workqueue; err = mcp251xfd_chip_interrupts_enable(priv); if (err) @@ -1634,6 +1643,8 @@ static int mcp251xfd_open(struct net_device *ndev) out_free_irq: free_irq(spi->irq, priv); + out_destroy_workqueue: + destroy_workqueue(priv->wq); out_can_rx_offload_disable: can_rx_offload_disable(&priv->offload); set_bit(MCP251XFD_FLAGS_DOWN, priv->flags); @@ -1661,6 +1672,7 @@ static int mcp251xfd_stop(struct net_device *ndev) hrtimer_cancel(&priv->tx_irq_timer); mcp251xfd_chip_interrupts_disable(priv); free_irq(ndev->irq, priv); + destroy_workqueue(priv->wq); can_rx_offload_disable(&priv->offload); mcp251xfd_timestamp_stop(priv); mcp251xfd_chip_stop(priv, CAN_STATE_STOPPED); diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c index 160528d3cc26..b1de8052a45c 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c @@ -131,6 +131,39 @@ mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv, tx_obj->xfer[0].len = len; } +static void mcp251xfd_tx_failure_drop(const struct mcp251xfd_priv *priv, + struct mcp251xfd_tx_ring *tx_ring, + int err) +{ + struct net_device *ndev = priv->ndev; + struct net_device_stats *stats = &ndev->stats; + unsigned int frame_len = 0; + u8 tx_head; + + tx_ring->head--; + stats->tx_dropped++; + tx_head = mcp251xfd_get_tx_head(tx_ring); + can_free_echo_skb(ndev, tx_head, &frame_len); + netdev_completed_queue(ndev, 1, frame_len); + netif_wake_queue(ndev); + + if (net_ratelimit()) + netdev_err(priv->ndev, "ERROR in %s: %d\n", __func__, err); +} + +void mcp251xfd_tx_obj_write_sync(struct work_struct *work) +{ + struct mcp251xfd_priv *priv = container_of(work, struct mcp251xfd_priv, + tx_work); + struct mcp251xfd_tx_obj *tx_obj = priv->tx_work_obj; + struct mcp251xfd_tx_ring *tx_ring = priv->tx; + int err; + + err = spi_sync(priv->spi, &tx_obj->msg); + if (err) + mcp251xfd_tx_failure_drop(priv, tx_ring, err); +} + static int mcp251xfd_tx_obj_write(const struct mcp251xfd_priv *priv, struct mcp251xfd_tx_obj *tx_obj) { @@ -162,6 +195,11 @@ static bool mcp251xfd_tx_busy(const struct mcp251xfd_priv *priv, return false; } +static bool mcp251xfd_work_busy(struct work_struct *work) +{ + return work_busy(work); +} + netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb, struct net_device *ndev) { @@ -175,7 +213,8 @@ netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb, if (can_dev_dropped_skb(ndev, skb)) return NETDEV_TX_OK; - if (mcp251xfd_tx_busy(priv, tx_ring)) + if (mcp251xfd_tx_busy(priv, tx_ring) || + mcp251xfd_work_busy(&priv->tx_work)) return NETDEV_TX_BUSY; tx_obj = mcp251xfd_get_tx_obj_next(tx_ring); @@ -193,13 +232,13 @@ netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb, netdev_sent_queue(priv->ndev, frame_len); err = mcp251xfd_tx_obj_write(priv, tx_obj); - if (err) - goto out_err; - - return NETDEV_TX_OK; - - out_err: - netdev_err(priv->ndev, "ERROR in %s: %d\n", __func__, err); + if (err == -EBUSY) { + netif_stop_queue(ndev); + priv->tx_work_obj = tx_obj; + queue_work(priv->wq, &priv->tx_work); + } else if (err) { + mcp251xfd_tx_failure_drop(priv, tx_ring, err); + } return NETDEV_TX_OK; } diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h index 24510b3b8020..b35bfebd23f2 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h @@ -633,6 +633,10 @@ struct mcp251xfd_priv { struct mcp251xfd_rx_ring *rx[MCP251XFD_FIFO_RX_NUM]; struct mcp251xfd_tx_ring tx[MCP251XFD_FIFO_TX_NUM]; + struct workqueue_struct *wq; + struct work_struct tx_work; + struct mcp251xfd_tx_obj *tx_work_obj; + DECLARE_BITMAP(flags, __MCP251XFD_FLAGS_SIZE__); u8 rx_ring_num; @@ -952,6 +956,7 @@ void mcp251xfd_skb_set_timestamp(const struct mcp251xfd_priv *priv, void mcp251xfd_timestamp_init(struct mcp251xfd_priv *priv); void mcp251xfd_timestamp_stop(struct mcp251xfd_priv *priv); +void mcp251xfd_tx_obj_write_sync(struct work_struct *work); netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb, struct net_device *ndev); diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c index 8faf8a462c05..024169461cad 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c @@ -125,6 +125,7 @@ static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leaf_err_liste static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leafimx = { .quirks = 0, + .family = KVASER_LEAF, .ops = &kvaser_usb_leaf_dev_ops, }; @@ -294,7 +295,7 @@ int kvaser_usb_send_cmd_async(struct kvaser_usb_net_priv *priv, void *cmd, } usb_free_urb(urb); - return 0; + return err; } int kvaser_usb_can_rx_over_error(struct net_device *netdev) diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index 02f07b870f10..268949939636 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -1047,31 +1047,31 @@ static int lan9303_get_sset_count(struct dsa_switch *ds, int port, int sset) return ARRAY_SIZE(lan9303_mib); } -static int lan9303_phy_read(struct dsa_switch *ds, int phy, int regnum) +static int lan9303_phy_read(struct dsa_switch *ds, int port, int regnum) { struct lan9303 *chip = ds->priv; int phy_base = chip->phy_addr_base; - if (phy == phy_base) + if (port == 0) return lan9303_virt_phy_reg_read(chip, regnum); - if (phy > phy_base + 2) + if (port > 2) return -ENODEV; - return chip->ops->phy_read(chip, phy, regnum); + return chip->ops->phy_read(chip, phy_base + port, regnum); } -static int lan9303_phy_write(struct dsa_switch *ds, int phy, int regnum, +static int lan9303_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val) { struct lan9303 *chip = ds->priv; int phy_base = chip->phy_addr_base; - if (phy == phy_base) + if (port == 0) return lan9303_virt_phy_reg_write(chip, regnum, val); - if (phy > phy_base + 2) + if (port > 2) return -ENODEV; - return chip->ops->phy_write(chip, phy, regnum, val); + return chip->ops->phy_write(chip, phy_base + port, regnum, val); } static int lan9303_port_enable(struct dsa_switch *ds, int port, @@ -1099,7 +1099,7 @@ static void lan9303_port_disable(struct dsa_switch *ds, int port) vlan_vid_del(dsa_port_to_conduit(dp), htons(ETH_P_8021Q), port); lan9303_disable_processing_port(chip, port); - lan9303_phy_write(ds, chip->phy_addr_base + port, MII_BMCR, BMCR_PDOWN); + lan9303_phy_write(ds, port, MII_BMCR, BMCR_PDOWN); } static int lan9303_port_bridge_join(struct dsa_switch *ds, int port, @@ -1374,8 +1374,6 @@ static const struct dsa_switch_ops lan9303_switch_ops = { static int lan9303_register_switch(struct lan9303 *chip) { - int base; - chip->ds = devm_kzalloc(chip->dev, sizeof(*chip->ds), GFP_KERNEL); if (!chip->ds) return -ENOMEM; @@ -1385,8 +1383,7 @@ static int lan9303_register_switch(struct lan9303 *chip) chip->ds->priv = chip; chip->ds->ops = &lan9303_switch_ops; chip->ds->phylink_mac_ops = &lan9303_phylink_mac_ops; - base = chip->phy_addr_base; - chip->ds->phys_mii_mask = GENMASK(LAN9303_NUM_PORTS - 1 + base, base); + chip->ds->phys_mii_mask = GENMASK(LAN9303_NUM_PORTS - 1, 0); return dsa_register_switch(chip->ds); } diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index f8ad7833f5d9..425e20daf1e9 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -355,10 +355,8 @@ int ksz9477_reset_switch(struct ksz_device *dev) SPI_AUTO_EDGE_DETECTION, 0); /* default configuration */ - ksz_read8(dev, REG_SW_LUE_CTRL_1, &data8); - data8 = SW_AGING_ENABLE | SW_LINK_AUTO_AGING | - SW_SRC_ADDR_FILTER | SW_FLUSH_STP_TABLE | SW_FLUSH_MSTP_TABLE; - ksz_write8(dev, REG_SW_LUE_CTRL_1, data8); + ksz_write8(dev, REG_SW_LUE_CTRL_1, + SW_AGING_ENABLE | SW_LINK_AUTO_AGING | SW_SRC_ADDR_FILTER); /* disable interrupts */ ksz_write32(dev, REG_SW_INT_MASK__4, SWITCH_INT_MASK); @@ -429,6 +427,57 @@ void ksz9477_freeze_mib(struct ksz_device *dev, int port, bool freeze) mutex_unlock(&p->mib.cnt_mutex); } +int ksz9477_errata_monitor(struct ksz_device *dev, int port, + u64 tx_late_col) +{ + u32 pmavbc; + u8 status; + u16 pqm; + int ret; + + ret = ksz_pread8(dev, port, REG_PORT_STATUS_0, &status); + if (ret) + return ret; + if (!(FIELD_GET(PORT_INTF_SPEED_MASK, status) == PORT_INTF_SPEED_NONE) && + !(status & PORT_INTF_FULL_DUPLEX)) { + /* Errata DS80000754 recommends monitoring potential faults in + * half-duplex mode. The switch might not be able to communicate anymore + * in these states. + * If you see this message, please read the errata-sheet for more information: + * https://ww1.microchip.com/downloads/aemDocuments/documents/UNG/ProductDocuments/Errata/KSZ9477S-Errata-DS80000754.pdf + * To workaround this issue, half-duplex mode should be avoided. + * A software reset could be implemented to recover from this state. + */ + dev_warn_once(dev->dev, + "Half-duplex detected on port %d, transmission halt may occur\n", + port); + if (tx_late_col != 0) { + /* Transmission halt with late collisions */ + dev_crit_once(dev->dev, + "TX late collisions detected, transmission may be halted on port %d\n", + port); + } + ret = ksz_read8(dev, REG_SW_LUE_CTRL_0, &status); + if (ret) + return ret; + if (status & SW_VLAN_ENABLE) { + ret = ksz_pread16(dev, port, REG_PORT_QM_TX_CNT_0__4, &pqm); + if (ret) + return ret; + ret = ksz_read32(dev, REG_PMAVBC, &pmavbc); + if (ret) + return ret; + if ((FIELD_GET(PMAVBC_MASK, pmavbc) <= PMAVBC_MIN) || + (FIELD_GET(PORT_QM_TX_CNT_M, pqm) >= PORT_QM_TX_CNT_MAX)) { + /* Transmission halt with Half-Duplex and VLAN */ + dev_crit_once(dev->dev, + "resources out of limits, transmission may be halted\n"); + } + } + } + return ret; +} + void ksz9477_port_init_cnt(struct ksz_device *dev, int port) { struct ksz_port_mib *mib = &dev->ports[port].mib; @@ -1299,6 +1348,10 @@ int ksz9477_setup(struct dsa_switch *ds) /* Enable REG_SW_MTU__2 reg by setting SW_JUMBO_PACKET */ ksz_cfg(dev, REG_SW_MAC_CTRL_1, SW_JUMBO_PACKET, true); + /* Use collision based back pressure mode. */ + ksz_cfg(dev, REG_SW_MAC_CTRL_1, SW_BACK_PRESSURE, + SW_BACK_PRESSURE_COLLISION); + /* Now we can configure default MTU value */ ret = regmap_update_bits(ksz_regmap_16(dev), REG_SW_MTU__2, REG_SW_MTU_MASK, VLAN_ETH_FRAME_LEN + ETH_FCS_LEN); diff --git a/drivers/net/dsa/microchip/ksz9477.h b/drivers/net/dsa/microchip/ksz9477.h index ce1e656b800b..239a281da10b 100644 --- a/drivers/net/dsa/microchip/ksz9477.h +++ b/drivers/net/dsa/microchip/ksz9477.h @@ -36,6 +36,8 @@ int ksz9477_port_mirror_add(struct ksz_device *dev, int port, bool ingress, struct netlink_ext_ack *extack); void ksz9477_port_mirror_del(struct ksz_device *dev, int port, struct dsa_mall_mirror_tc_entry *mirror); +int ksz9477_errata_monitor(struct ksz_device *dev, int port, + u64 tx_late_col); void ksz9477_get_caps(struct ksz_device *dev, int port, struct phylink_config *config); int ksz9477_fdb_dump(struct ksz_device *dev, int port, diff --git a/drivers/net/dsa/microchip/ksz9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h index f3a205ee483f..d5354c600ea1 100644 --- a/drivers/net/dsa/microchip/ksz9477_reg.h +++ b/drivers/net/dsa/microchip/ksz9477_reg.h @@ -247,6 +247,7 @@ #define REG_SW_MAC_CTRL_1 0x0331 #define SW_BACK_PRESSURE BIT(5) +#define SW_BACK_PRESSURE_COLLISION 0 #define FAIR_FLOW_CTRL BIT(4) #define NO_EXC_COLLISION_DROP BIT(3) #define SW_JUMBO_PACKET BIT(2) @@ -842,8 +843,8 @@ #define REG_PORT_STATUS_0 0x0030 -#define PORT_INTF_SPEED_M 0x3 -#define PORT_INTF_SPEED_S 3 +#define PORT_INTF_SPEED_MASK GENMASK(4, 3) +#define PORT_INTF_SPEED_NONE GENMASK(1, 0) #define PORT_INTF_FULL_DUPLEX BIT(2) #define PORT_TX_FLOW_CTRL BIT(1) #define PORT_RX_FLOW_CTRL BIT(0) @@ -1167,6 +1168,11 @@ #define PORT_RMII_CLK_SEL BIT(7) #define PORT_MII_SEL_EDGE BIT(5) +#define REG_PMAVBC 0x03AC + +#define PMAVBC_MASK GENMASK(26, 16) +#define PMAVBC_MIN 0x580 + /* 4 - MAC */ #define REG_PORT_MAC_CTRL_0 0x0400 @@ -1494,6 +1500,7 @@ #define PORT_QM_TX_CNT_USED_S 0 #define PORT_QM_TX_CNT_M (BIT(11) - 1) +#define PORT_QM_TX_CNT_MAX 0x200 #define REG_PORT_QM_TX_CNT_1__4 0x0A14 diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 2818e24e2a51..0580b2fee21c 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -1382,6 +1382,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { .tc_cbs_supported = true, .ops = &ksz9477_dev_ops, .phylink_mac_ops = &ksz9477_phylink_mac_ops, + .phy_errata_9477 = true, .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), .reg_mib_cnt = MIB_COUNTER_NUM, @@ -1416,6 +1417,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { .num_ipms = 8, .ops = &ksz9477_dev_ops, .phylink_mac_ops = &ksz9477_phylink_mac_ops, + .phy_errata_9477 = true, .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), .reg_mib_cnt = MIB_COUNTER_NUM, @@ -1450,6 +1452,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { .num_ipms = 8, .ops = &ksz9477_dev_ops, .phylink_mac_ops = &ksz9477_phylink_mac_ops, + .phy_errata_9477 = true, .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), .reg_mib_cnt = MIB_COUNTER_NUM, @@ -1540,6 +1543,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { .tc_cbs_supported = true, .ops = &ksz9477_dev_ops, .phylink_mac_ops = &ksz9477_phylink_mac_ops, + .phy_errata_9477 = true, .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), .reg_mib_cnt = MIB_COUNTER_NUM, @@ -1820,6 +1824,7 @@ void ksz_r_mib_stats64(struct ksz_device *dev, int port) struct rtnl_link_stats64 *stats; struct ksz_stats_raw *raw; struct ksz_port_mib *mib; + int ret; mib = &dev->ports[port].mib; stats = &mib->stats64; @@ -1861,6 +1866,12 @@ void ksz_r_mib_stats64(struct ksz_device *dev, int port) pstats->rx_pause_frames = raw->rx_pause; spin_unlock(&mib->stats64_lock); + + if (dev->info->phy_errata_9477) { + ret = ksz9477_errata_monitor(dev, port, raw->tx_late_col); + if (ret) + dev_err(dev->dev, "Failed to monitor transmission halt\n"); + } } void ksz88xx_r_mib_stats64(struct ksz_device *dev, int port) @@ -2185,7 +2196,7 @@ static void ksz_irq_bus_sync_unlock(struct irq_data *d) struct ksz_device *dev = kirq->dev; int ret; - ret = ksz_write32(dev, kirq->reg_mask, kirq->masked); + ret = ksz_write8(dev, kirq->reg_mask, kirq->masked); if (ret) dev_err(dev->dev, "failed to change IRQ mask\n"); diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index c784fd23a993..ee7db46e469d 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -66,6 +66,7 @@ struct ksz_chip_data { bool tc_cbs_supported; const struct ksz_dev_ops *ops; const struct phylink_mac_ops *phylink_mac_ops; + bool phy_errata_9477; bool ksz87xx_eee_link_erratum; const struct ksz_mib_names *mib_names; int mib_cnt; diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c index a806dadc4196..20c6529ec135 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c @@ -1380,6 +1380,7 @@ static int bcmasp_probe(struct platform_device *pdev) dev_err(dev, "Cannot create eth interface %d\n", i); bcmasp_remove_intfs(priv); of_node_put(intf_node); + ret = -ENOMEM; goto of_put_exit; } list_add_tail(&intf->list, &priv->intfs); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index e2a4e1088b7f..9580ab83d387 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1262,7 +1262,7 @@ enum { struct bnx2x_fw_stats_req { struct stats_query_header hdr; - struct stats_query_entry query[FP_SB_MAX_E1x+ + struct stats_query_entry query[FP_SB_MAX_E2 + BNX2X_FIRST_QUEUE_QUERY_IDX]; }; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c437ca1c0fd3..43952689bfb0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -732,9 +732,6 @@ tx_done: return NETDEV_TX_OK; tx_dma_error: - if (BNXT_TX_PTP_IS_SET(lflags)) - atomic_inc(&bp->ptp_cfg->tx_avail); - last_frag = i; /* start back at beginning and unmap skb */ @@ -756,6 +753,8 @@ tx_dma_error: tx_free: dev_kfree_skb_any(skb); tx_kick_pending: + if (BNXT_TX_PTP_IS_SET(lflags)) + atomic_inc(&bp->ptp_cfg->tx_avail); if (txr->kick_pending) bnxt_txr_db_kick(bp, txr, txr->tx_prod); txr->tx_buf_ring[txr->tx_prod].skb = NULL; @@ -6147,6 +6146,24 @@ static u16 bnxt_get_max_rss_ring(struct bnxt *bp) return max_ring; } +u16 bnxt_get_max_rss_ctx_ring(struct bnxt *bp) +{ + u16 i, tbl_size, max_ring = 0; + struct bnxt_rss_ctx *rss_ctx; + + if (!BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) + return 0; + + tbl_size = bnxt_get_rxfh_indir_size(bp->dev); + + list_for_each_entry(rss_ctx, &bp->rss_ctx_list, list) { + for (i = 0; i < tbl_size; i++) + max_ring = max(max_ring, rss_ctx->rss_indir_tbl[i]); + } + + return max_ring; +} + int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings) { if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { @@ -8996,6 +9013,7 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) memcpy(vf->mac_addr, resp->mac_address, ETH_ALEN); #endif } + bp->tso_max_segs = le16_to_cpu(resp->max_tso_segs); hwrm_func_qcaps_exit: hwrm_req_drop(bp, req); @@ -12669,7 +12687,11 @@ bool bnxt_rfs_capable(struct bnxt *bp, bool new_rss_ctx) if (!BNXT_NEW_RM(bp)) return true; - if (hwr.vnic == bp->hw_resc.resv_vnics && + /* Do not reduce VNIC and RSS ctx reservations. There is a FW + * issue that will mess up the default VNIC if we reduce the + * reservations. + */ + if (hwr.vnic <= bp->hw_resc.resv_vnics && hwr.rss_ctx <= bp->hw_resc.resv_rsscos_ctxs) return true; @@ -15363,6 +15385,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->priv_flags |= IFF_UNICAST_FLT; netif_set_tso_max_size(dev, GSO_MAX_SIZE); + if (bp->tso_max_segs) + netif_set_tso_max_segs(dev, bp->tso_max_segs); dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | NETDEV_XDP_ACT_RX_SG; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index bbc7edccd5a4..6b10a09ee1af 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2318,6 +2318,7 @@ struct bnxt { u8 rss_hash_key_updated:1; u16 max_mtu; + u16 tso_max_segs; u8 max_tc; u8 max_lltc; /* lossless TCs */ struct bnxt_queue_info q_info[BNXT_MAX_QUEUE]; @@ -2775,6 +2776,7 @@ int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, struct bnxt_vnic_info *vnic, void bnxt_fill_ipv6_mask(__be32 mask[4]); int bnxt_alloc_rss_indir_tbl(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx); void bnxt_set_dflt_rss_indir_tbl(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx); +u16 bnxt_get_max_rss_ctx_ring(struct bnxt *bp); int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings); int bnxt_hwrm_vnic_cfg(struct bnxt *bp, struct bnxt_vnic_info *vnic); int bnxt_hwrm_vnic_alloc(struct bnxt *bp, struct bnxt_vnic_info *vnic, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 8763f8a01457..79c09c1cdf93 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -961,6 +961,12 @@ static int bnxt_set_channels(struct net_device *dev, return rc; } + if (req_rx_rings < bp->rx_nr_rings && + req_rx_rings <= bnxt_get_max_rss_ctx_ring(bp)) { + netdev_warn(dev, "Can't deactivate rings used by RSS contexts\n"); + return -EINVAL; + } + if (bnxt_get_nr_rss_ctxs(bp, req_rx_rings) != bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) && netif_is_rxfh_configured(dev)) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h index 06ea86c80be1..f219709f9563 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h @@ -2,7 +2,7 @@ * * Copyright (c) 2014-2016 Broadcom Corporation * Copyright (c) 2014-2018 Broadcom Limited - * Copyright (c) 2018-2023 Broadcom Inc. + * Copyright (c) 2018-2024 Broadcom Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -500,7 +500,11 @@ struct cmd_nums { #define HWRM_TFC_IF_TBL_GET 0x399UL #define HWRM_TFC_TBL_SCOPE_CONFIG_GET 0x39aUL #define HWRM_TFC_RESC_USAGE_QUERY 0x39bUL + #define HWRM_QUEUE_PFCWD_TIMEOUT_QCAPS 0x39cUL + #define HWRM_QUEUE_PFCWD_TIMEOUT_CFG 0x39dUL + #define HWRM_QUEUE_PFCWD_TIMEOUT_QCFG 0x39eUL #define HWRM_SV 0x400UL + #define HWRM_DBG_LOG_BUFFER_FLUSH 0xff0fUL #define HWRM_DBG_READ_DIRECT 0xff10UL #define HWRM_DBG_READ_INDIRECT 0xff11UL #define HWRM_DBG_WRITE_DIRECT 0xff12UL @@ -609,8 +613,8 @@ struct hwrm_err_output { #define HWRM_VERSION_MAJOR 1 #define HWRM_VERSION_MINOR 10 #define HWRM_VERSION_UPDATE 3 -#define HWRM_VERSION_RSVD 39 -#define HWRM_VERSION_STR "1.10.3.39" +#define HWRM_VERSION_RSVD 44 +#define HWRM_VERSION_STR "1.10.3.44" /* hwrm_ver_get_input (size:192b/24B) */ struct hwrm_ver_get_input { @@ -664,6 +668,7 @@ struct hwrm_ver_get_output { #define VER_GET_RESP_DEV_CAPS_CFG_CFA_TFLIB_SUPPORTED 0x2000UL #define VER_GET_RESP_DEV_CAPS_CFG_CFA_TRUFLOW_SUPPORTED 0x4000UL #define VER_GET_RESP_DEV_CAPS_CFG_SECURE_BOOT_CAPABLE 0x8000UL + #define VER_GET_RESP_DEV_CAPS_CFG_SECURE_SOC_CAPABLE 0x10000UL u8 roce_fw_maj_8b; u8 roce_fw_min_8b; u8 roce_fw_bld_8b; @@ -843,7 +848,9 @@ struct hwrm_async_event_cmpl { #define ASYNC_EVENT_CMPL_EVENT_ID_HW_DOORBELL_RECOVERY_READ_ERROR 0x49UL #define ASYNC_EVENT_CMPL_EVENT_ID_CTX_ERROR 0x4aUL #define ASYNC_EVENT_CMPL_EVENT_ID_UDCC_SESSION_CHANGE 0x4bUL - #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID 0x4cUL + #define ASYNC_EVENT_CMPL_EVENT_ID_DBG_BUF_PRODUCER 0x4cUL + #define ASYNC_EVENT_CMPL_EVENT_ID_PEER_MMAP_CHANGE 0x4dUL + #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID 0x4eUL #define ASYNC_EVENT_CMPL_EVENT_ID_FW_TRACE_MSG 0xfeUL #define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR 0xffUL #define ASYNC_EVENT_CMPL_EVENT_ID_LAST ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR @@ -1326,13 +1333,13 @@ struct hwrm_async_event_cmpl_error_report_base { u8 timestamp_lo; __le16 timestamp_hi; __le32 event_data1; - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_MASK 0xffUL - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_SFT 0 - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_RESERVED 0x0UL - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_PAUSE_STORM 0x1UL - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_INVALID_SIGNAL 0x2UL - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_NVM 0x3UL - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD 0x4UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_MASK 0xffUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_RESERVED 0x0UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_PAUSE_STORM 0x1UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_INVALID_SIGNAL 0x2UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_NVM 0x3UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD 0x4UL #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_THERMAL_THRESHOLD 0x5UL #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUAL_DATA_RATE_NOT_SUPPORTED 0x6UL #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUAL_DATA_RATE_NOT_SUPPORTED @@ -1814,6 +1821,9 @@ struct hwrm_func_qcaps_output { #define FUNC_QCAPS_RESP_FLAGS_EXT2_SW_MAX_RESOURCE_LIMITS_SUPPORTED 0x800000UL #define FUNC_QCAPS_RESP_FLAGS_EXT2_TF_INGRESS_NIC_FLOW_SUPPORTED 0x1000000UL #define FUNC_QCAPS_RESP_FLAGS_EXT2_LPBK_STATS_SUPPORTED 0x2000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_TF_EGRESS_NIC_FLOW_SUPPORTED 0x4000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_MULTI_LOSSLESS_QUEUES_SUPPORTED 0x8000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_PEER_MMAP_SUPPORTED 0x10000000UL __le16 tunnel_disable_flag; #define FUNC_QCAPS_RESP_TUNNEL_DISABLE_FLAG_DISABLE_VXLAN 0x1UL #define FUNC_QCAPS_RESP_TUNNEL_DISABLE_FLAG_DISABLE_NGE 0x2UL @@ -1828,7 +1838,7 @@ struct hwrm_func_qcaps_output { #define FUNC_QCAPS_RESP_XID_PARTITION_CAP_RX_CK 0x2UL u8 device_serial_number[8]; __le16 ctxs_per_partition; - u8 unused_2[2]; + __le16 max_tso_segs; __le32 roce_vf_max_av; __le32 roce_vf_max_cq; __le32 roce_vf_max_mrw; @@ -2449,6 +2459,7 @@ struct hwrm_func_drv_rgtr_input { #define FUNC_DRV_RGTR_REQ_FLAGS_NPAR_1_2_SUPPORT 0x200UL #define FUNC_DRV_RGTR_REQ_FLAGS_ASYM_QUEUE_CFG_SUPPORT 0x400UL #define FUNC_DRV_RGTR_REQ_FLAGS_TF_INGRESS_NIC_FLOW_MODE 0x800UL + #define FUNC_DRV_RGTR_REQ_FLAGS_TF_EGRESS_NIC_FLOW_MODE 0x1000UL __le32 enables; #define FUNC_DRV_RGTR_REQ_ENABLES_OS_TYPE 0x1UL #define FUNC_DRV_RGTR_REQ_ENABLES_VER 0x2UL @@ -3660,22 +3671,24 @@ struct hwrm_func_backing_store_cfg_v2_input { __le16 target_id; __le64 resp_addr; __le16 type; - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_QP 0x0UL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRQ 0x1UL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CQ 0x2UL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_VNIC 0x3UL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_STAT 0x4UL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SP_TQM_RING 0x5UL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_FP_TQM_RING 0x6UL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MRAV 0xeUL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TIM 0xfUL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MP_TQM_RING 0x15UL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SQ_DB_SHADOW 0x16UL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RQ_DB_SHADOW 0x17UL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRQ_DB_SHADOW 0x18UL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CQ_DB_SHADOW 0x19UL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TBL_SCOPE 0x1cUL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_XID_PARTITION 0x1dUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_QP 0x0UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRQ 0x1UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CQ 0x2UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_VNIC 0x3UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_STAT 0x4UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SP_TQM_RING 0x5UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_FP_TQM_RING 0x6UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MRAV 0xeUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TIM 0xfUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TX_CK 0x13UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RX_CK 0x14UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MP_TQM_RING 0x15UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SQ_DB_SHADOW 0x16UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RQ_DB_SHADOW 0x17UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRQ_DB_SHADOW 0x18UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CQ_DB_SHADOW 0x19UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TBL_SCOPE 0x1cUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_XID_PARTITION 0x1dUL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRT_TRACE 0x1eUL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRT2_TRACE 0x1fUL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CRT_TRACE 0x20UL @@ -3772,18 +3785,20 @@ struct hwrm_func_backing_store_qcfg_v2_output { __le16 seq_id; __le16 resp_len; __le16 type; - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_QP 0x0UL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRQ 0x1UL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CQ 0x2UL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_VNIC 0x3UL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_STAT 0x4UL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SP_TQM_RING 0x5UL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_FP_TQM_RING 0x6UL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MRAV 0xeUL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TIM 0xfUL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MP_TQM_RING 0x15UL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TBL_SCOPE 0x1cUL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_XID_PARTITION 0x1dUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_QP 0x0UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRQ 0x1UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CQ 0x2UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_VNIC 0x3UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_STAT 0x4UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SP_TQM_RING 0x5UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_FP_TQM_RING 0x6UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MRAV 0xeUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TIM 0xfUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TX_CK 0x13UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_RX_CK 0x14UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MP_TQM_RING 0x15UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TBL_SCOPE 0x1cUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_XID_PARTITION 0x1dUL #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRT_TRACE 0x1eUL #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRT2_TRACE 0x1fUL #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CRT_TRACE 0x20UL @@ -3876,22 +3891,24 @@ struct hwrm_func_backing_store_qcaps_v2_input { __le16 target_id; __le64 resp_addr; __le16 type; - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QP 0x0UL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ 0x1UL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ 0x2UL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_VNIC 0x3UL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_STAT 0x4UL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SP_TQM_RING 0x5UL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_FP_TQM_RING 0x6UL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MRAV 0xeUL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TIM 0xfUL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MP_TQM_RING 0x15UL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SQ_DB_SHADOW 0x16UL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RQ_DB_SHADOW 0x17UL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ_DB_SHADOW 0x18UL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ_DB_SHADOW 0x19UL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TBL_SCOPE 0x1cUL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_XID_PARTITION 0x1dUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QP 0x0UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ 0x1UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ 0x2UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_VNIC 0x3UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_STAT 0x4UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SP_TQM_RING 0x5UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_FP_TQM_RING 0x6UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MRAV 0xeUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TIM 0xfUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TX_CK 0x13UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RX_CK 0x14UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MP_TQM_RING 0x15UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SQ_DB_SHADOW 0x16UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RQ_DB_SHADOW 0x17UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ_DB_SHADOW 0x18UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ_DB_SHADOW 0x19UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TBL_SCOPE 0x1cUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_XID_PARTITION 0x1dUL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRT_TRACE 0x1eUL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRT2_TRACE 0x1fUL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CRT_TRACE 0x20UL @@ -3911,22 +3928,24 @@ struct hwrm_func_backing_store_qcaps_v2_output { __le16 seq_id; __le16 resp_len; __le16 type; - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_QP 0x0UL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRQ 0x1UL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CQ 0x2UL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_VNIC 0x3UL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_STAT 0x4UL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SP_TQM_RING 0x5UL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_FP_TQM_RING 0x6UL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MRAV 0xeUL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TIM 0xfUL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MP_TQM_RING 0x15UL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SQ_DB_SHADOW 0x16UL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RQ_DB_SHADOW 0x17UL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRQ_DB_SHADOW 0x18UL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CQ_DB_SHADOW 0x19UL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TBL_SCOPE 0x1cUL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_XID_PARTITION 0x1dUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_QP 0x0UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRQ 0x1UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CQ 0x2UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_VNIC 0x3UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_STAT 0x4UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SP_TQM_RING 0x5UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_FP_TQM_RING 0x6UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MRAV 0xeUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TIM 0xfUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TX_CK 0x13UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RX_CK 0x14UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MP_TQM_RING 0x15UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SQ_DB_SHADOW 0x16UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RQ_DB_SHADOW 0x17UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRQ_DB_SHADOW 0x18UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CQ_DB_SHADOW 0x19UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TBL_SCOPE 0x1cUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_XID_PARTITION 0x1dUL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRT_TRACE 0x1eUL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRT2_TRACE 0x1fUL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CRT_TRACE 0x20UL @@ -4202,7 +4221,8 @@ struct hwrm_port_phy_cfg_input { #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_100GB_PAM4_112 0x3eaUL #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_200GB_PAM4_112 0x7d2UL #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_400GB_PAM4_112 0xfa2UL - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_LAST PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_400GB_PAM4_112 + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_800GB_PAM4_112 0x1f42UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_LAST PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_800GB_PAM4_112 __le16 auto_link_speeds2_mask; #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_1GB 0x1UL #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_10GB 0x2UL @@ -4217,6 +4237,7 @@ struct hwrm_port_phy_cfg_input { #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_100GB_PAM4_112 0x400UL #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_200GB_PAM4_112 0x800UL #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_400GB_PAM4_112 0x1000UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_800GB_PAM4_112 0x2000UL u8 unused_2[6]; }; @@ -4292,6 +4313,7 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_LINK_SPEED_100GB 0x3e8UL #define PORT_PHY_QCFG_RESP_LINK_SPEED_200GB 0x7d0UL #define PORT_PHY_QCFG_RESP_LINK_SPEED_400GB 0xfa0UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_800GB 0x1f40UL #define PORT_PHY_QCFG_RESP_LINK_SPEED_10MB 0xffffUL #define PORT_PHY_QCFG_RESP_LINK_SPEED_LAST PORT_PHY_QCFG_RESP_LINK_SPEED_10MB u8 duplex_cfg; @@ -4451,7 +4473,13 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASESR4 0x35UL #define PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASELR4 0x36UL #define PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASEER4 0x37UL - #define PORT_PHY_QCFG_RESP_PHY_TYPE_LAST PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASEER4 + #define PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASECR8 0x38UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASESR8 0x39UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASELR8 0x3aUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASEER8 0x3bUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASEFR8 0x3cUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASEDR8 0x3dUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_LAST PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASEDR8 u8 media_type; #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_UNKNOWN 0x0UL #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP 0x1UL @@ -5049,33 +5077,43 @@ struct hwrm_port_qstats_ext_output { u8 valid; }; -/* hwrm_port_lpbk_qstats_input (size:128b/16B) */ +/* hwrm_port_lpbk_qstats_input (size:256b/32B) */ struct hwrm_port_lpbk_qstats_input { __le16 req_type; __le16 cmpl_ring; __le16 seq_id; __le16 target_id; __le64 resp_addr; + __le16 lpbk_stat_size; + u8 flags; + #define PORT_LPBK_QSTATS_REQ_FLAGS_COUNTER_MASK 0x1UL + u8 unused_0[5]; + __le64 lpbk_stat_host_addr; }; -/* hwrm_port_lpbk_qstats_output (size:768b/96B) */ +/* hwrm_port_lpbk_qstats_output (size:128b/16B) */ struct hwrm_port_lpbk_qstats_output { __le16 error_code; __le16 req_type; __le16 seq_id; __le16 resp_len; + __le16 lpbk_stat_size; + u8 unused_0[5]; + u8 valid; +}; + +/* port_lpbk_stats (size:640b/80B) */ +struct port_lpbk_stats { __le64 lpbk_ucast_frames; __le64 lpbk_mcast_frames; __le64 lpbk_bcast_frames; __le64 lpbk_ucast_bytes; __le64 lpbk_mcast_bytes; __le64 lpbk_bcast_bytes; - __le64 tx_stat_discard; - __le64 tx_stat_error; - __le64 rx_stat_discard; - __le64 rx_stat_error; - u8 unused_0[7]; - u8 valid; + __le64 lpbk_tx_discards; + __le64 lpbk_tx_errors; + __le64 lpbk_rx_discards; + __le64 lpbk_rx_errors; }; /* hwrm_port_ecn_qstats_input (size:256b/32B) */ @@ -5140,13 +5178,15 @@ struct hwrm_port_clr_stats_output { u8 valid; }; -/* hwrm_port_lpbk_clr_stats_input (size:128b/16B) */ +/* hwrm_port_lpbk_clr_stats_input (size:192b/24B) */ struct hwrm_port_lpbk_clr_stats_input { __le16 req_type; __le16 cmpl_ring; __le16 seq_id; __le16 target_id; __le64 resp_addr; + __le16 port_id; + u8 unused_0[6]; }; /* hwrm_port_lpbk_clr_stats_output (size:128b/16B) */ @@ -5287,10 +5327,11 @@ struct hwrm_port_phy_qcaps_output { #define PORT_PHY_QCAPS_RESP_SUPPORTED_PAM4_SPEEDS_FORCE_MODE_100G 0x2UL #define PORT_PHY_QCAPS_RESP_SUPPORTED_PAM4_SPEEDS_FORCE_MODE_200G 0x4UL __le16 flags2; - #define PORT_PHY_QCAPS_RESP_FLAGS2_PAUSE_UNSUPPORTED 0x1UL - #define PORT_PHY_QCAPS_RESP_FLAGS2_PFC_UNSUPPORTED 0x2UL - #define PORT_PHY_QCAPS_RESP_FLAGS2_BANK_ADDR_SUPPORTED 0x4UL - #define PORT_PHY_QCAPS_RESP_FLAGS2_SPEEDS2_SUPPORTED 0x8UL + #define PORT_PHY_QCAPS_RESP_FLAGS2_PAUSE_UNSUPPORTED 0x1UL + #define PORT_PHY_QCAPS_RESP_FLAGS2_PFC_UNSUPPORTED 0x2UL + #define PORT_PHY_QCAPS_RESP_FLAGS2_BANK_ADDR_SUPPORTED 0x4UL + #define PORT_PHY_QCAPS_RESP_FLAGS2_SPEEDS2_SUPPORTED 0x8UL + #define PORT_PHY_QCAPS_RESP_FLAGS2_REMOTE_LPBK_UNSUPPORTED 0x10UL u8 internal_port_cnt; u8 unused_0; __le16 supported_speeds2_force_mode; @@ -7443,17 +7484,17 @@ struct hwrm_cfa_l2_filter_cfg_input { __le16 target_id; __le64 resp_addr; __le32 flags; - #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH 0x1UL - #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_TX 0x0UL - #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_RX 0x1UL - #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_LAST CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_RX - #define CFA_L2_FILTER_CFG_REQ_FLAGS_DROP 0x2UL - #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_MASK 0xcUL - #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_SFT 2 - #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_NO_ROCE_L2 (0x0UL << 2) - #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_L2 (0x1UL << 2) - #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_ROCE (0x2UL << 2) - #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_LAST CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_ROCE + #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH 0x1UL + #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_TX 0x0UL + #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_RX 0x1UL + #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_LAST CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_RX + #define CFA_L2_FILTER_CFG_REQ_FLAGS_DROP 0x2UL + #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_MASK 0xcUL + #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_SFT 2 + #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_NO_ROCE_L2 (0x0UL << 2) + #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_L2 (0x1UL << 2) + #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_ROCE (0x2UL << 2) + #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_LAST CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_ROCE #define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_MASK 0x30UL #define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_SFT 4 #define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_NO_UPDATE (0x0UL << 4) @@ -8520,17 +8561,17 @@ struct hwrm_tunnel_dst_port_query_input { __le16 target_id; __le64 resp_addr; u8 tunnel_type; - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN 0x1UL - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GENEVE 0x5UL - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_L2_ETYPE 0xbUL - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_CUSTOM_GRE 0xdUL - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ECPRI 0xeUL - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_SRV6 0xfUL - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GRE 0x11UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_L2_ETYPE 0xbUL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_CUSTOM_GRE 0xdUL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ECPRI 0xeUL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_SRV6 0xfUL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GRE 0x11UL #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR 0x12UL #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES01 0x13UL #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES02 0x14UL @@ -8576,17 +8617,17 @@ struct hwrm_tunnel_dst_port_alloc_input { __le16 target_id; __le64 resp_addr; u8 tunnel_type; - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_L2_ETYPE 0xbUL - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_CUSTOM_GRE 0xdUL - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ECPRI 0xeUL - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_SRV6 0xfUL - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GRE 0x11UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_L2_ETYPE 0xbUL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_CUSTOM_GRE 0xdUL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ECPRI 0xeUL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_SRV6 0xfUL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GRE 0x11UL #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR 0x12UL #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES01 0x13UL #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES02 0x14UL @@ -8635,17 +8676,17 @@ struct hwrm_tunnel_dst_port_free_input { __le16 target_id; __le64 resp_addr; u8 tunnel_type; - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN 0x1UL - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE 0x5UL - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_L2_ETYPE 0xbUL - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_CUSTOM_GRE 0xdUL - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ECPRI 0xeUL - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_SRV6 0xfUL - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GRE 0x11UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_L2_ETYPE 0xbUL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_CUSTOM_GRE 0xdUL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ECPRI 0xeUL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_SRV6 0xfUL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GRE 0x11UL #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR 0x12UL #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES01 0x13UL #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES02 0x14UL @@ -9109,6 +9150,7 @@ struct hwrm_struct_hdr { #define STRUCT_HDR_STRUCT_ID_LLDP_GENERIC 0x424UL #define STRUCT_HDR_STRUCT_ID_LLDP_DEVICE 0x426UL #define STRUCT_HDR_STRUCT_ID_POWER_BKUP 0x427UL + #define STRUCT_HDR_STRUCT_ID_PEER_MMAP 0x429UL #define STRUCT_HDR_STRUCT_ID_AFM_OPAQUE 0x1UL #define STRUCT_HDR_STRUCT_ID_PORT_DESCRIPTION 0xaUL #define STRUCT_HDR_STRUCT_ID_RSS_V2 0x64UL @@ -9758,6 +9800,9 @@ struct hwrm_dbg_coredump_initiate_input { __le16 instance; __le16 unused_0; u8 seg_flags; + #define DBG_COREDUMP_INITIATE_REQ_SEG_FLAGS_LIVE_DATA 0x1UL + #define DBG_COREDUMP_INITIATE_REQ_SEG_FLAGS_CRASH_DATA 0x2UL + #define DBG_COREDUMP_INITIATE_REQ_SEG_FLAGS_COLLECT_CTX_L1_CACHE 0x4UL u8 unused_1[7]; }; @@ -10433,13 +10478,13 @@ struct hwrm_selftest_irq_output { /* dbc_dbc (size:64b/8B) */ struct dbc_dbc { - u32 index; + __le32 index; #define DBC_DBC_INDEX_MASK 0xffffffUL #define DBC_DBC_INDEX_SFT 0 #define DBC_DBC_EPOCH 0x1000000UL #define DBC_DBC_TOGGLE_MASK 0x6000000UL #define DBC_DBC_TOGGLE_SFT 25 - u32 type_path_xid; + __le32 type_path_xid; #define DBC_DBC_XID_MASK 0xfffffUL #define DBC_DBC_XID_SFT 0 #define DBC_DBC_PATH_MASK 0x3000000UL diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 5e9a93bdb518..23ebeb143987 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2482,6 +2482,18 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) (tx_pool->consumer_index + 1) % tx_pool->num_buffers; tx_buff = &tx_pool->tx_buff[bufidx]; + + /* Sanity checks on our free map to make sure it points to an index + * that is not being occupied by another skb. If skb memory is + * not freed then we see congestion control kick in and halt tx. + */ + if (unlikely(tx_buff->skb)) { + dev_warn_ratelimited(dev, "TX free map points to untracked skb (%s %d idx=%d)\n", + skb_is_gso(skb) ? "tso_pool" : "tx_pool", + queue_num, bufidx); + dev_kfree_skb_any(tx_buff->skb); + } + tx_buff->skb = skb; tx_buff->index = bufidx; tx_buff->pool_index = queue_num; @@ -4061,6 +4073,12 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free) adapter->num_active_tx_scrqs = 0; } + /* Clean any remaining outstanding SKBs + * we freed the irq so we won't be hearing + * from them + */ + clean_tx_pools(adapter); + if (adapter->rx_scrq) { for (i = 0; i < adapter->num_active_rx_scrqs; i++) { if (!adapter->rx_scrq[i]) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 2e98a2a0bead..ce227b56cf72 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1109,6 +1109,46 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link) } /** + * e1000e_force_smbus - Force interfaces to transition to SMBUS mode. + * @hw: pointer to the HW structure + * + * Force the MAC and the PHY to SMBUS mode. Assumes semaphore already + * acquired. + * + * Return: 0 on success, negative errno on failure. + **/ +static s32 e1000e_force_smbus(struct e1000_hw *hw) +{ + u16 smb_ctrl = 0; + u32 ctrl_ext; + s32 ret_val; + + /* Switching PHY interface always returns MDI error + * so disable retry mechanism to avoid wasting time + */ + e1000e_disable_phy_retry(hw); + + /* Force SMBus mode in the PHY */ + ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &smb_ctrl); + if (ret_val) { + e1000e_enable_phy_retry(hw); + return ret_val; + } + + smb_ctrl |= CV_SMB_CTRL_FORCE_SMBUS; + e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, smb_ctrl); + + e1000e_enable_phy_retry(hw); + + /* Force SMBus mode in the MAC */ + ctrl_ext = er32(CTRL_EXT); + ctrl_ext |= E1000_CTRL_EXT_FORCE_SMBUS; + ew32(CTRL_EXT, ctrl_ext); + + return 0; +} + +/** * e1000_enable_ulp_lpt_lp - configure Ultra Low Power mode for LynxPoint-LP * @hw: pointer to the HW structure * @to_sx: boolean indicating a system power state transition to Sx @@ -1165,6 +1205,14 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx) if (ret_val) goto out; + if (hw->mac.type != e1000_pch_mtp) { + ret_val = e1000e_force_smbus(hw); + if (ret_val) { + e_dbg("Failed to force SMBUS: %d\n", ret_val); + goto release; + } + } + /* Si workaround for ULP entry flow on i127/rev6 h/w. Enable * LPLU and disable Gig speed when entering ULP */ @@ -1225,27 +1273,12 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx) } release: - /* Switching PHY interface always returns MDI error - * so disable retry mechanism to avoid wasting time - */ - e1000e_disable_phy_retry(hw); - - /* Force SMBus mode in PHY */ - ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg); - if (ret_val) { - e1000e_enable_phy_retry(hw); - hw->phy.ops.release(hw); - goto out; + if (hw->mac.type == e1000_pch_mtp) { + ret_val = e1000e_force_smbus(hw); + if (ret_val) + e_dbg("Failed to force SMBUS over MTL system: %d\n", + ret_val); } - phy_reg |= CV_SMB_CTRL_FORCE_SMBUS; - e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg); - - e1000e_enable_phy_retry(hw); - - /* Force SMBus mode in MAC */ - mac_reg = er32(CTRL_EXT); - mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS; - ew32(CTRL_EXT, mac_reg); hw->phy.ops.release(hw); out: diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index da5c59daf8ba..3cd161c6672b 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6363,49 +6363,49 @@ static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter) mac_data |= E1000_EXTCNF_CTRL_GATE_PHY_CFG; ew32(EXTCNF_CTRL, mac_data); - /* Enable the Dynamic Power Gating in the MAC */ - mac_data = er32(FEXTNVM7); - mac_data |= BIT(22); - ew32(FEXTNVM7, mac_data); - /* Disable disconnected cable conditioning for Power Gating */ mac_data = er32(DPGFR); mac_data |= BIT(2); ew32(DPGFR, mac_data); - /* Don't wake from dynamic Power Gating with clock request */ - mac_data = er32(FEXTNVM12); - mac_data |= BIT(12); - ew32(FEXTNVM12, mac_data); - - /* Ungate PGCB clock */ - mac_data = er32(FEXTNVM9); - mac_data &= ~BIT(28); - ew32(FEXTNVM9, mac_data); - - /* Enable K1 off to enable mPHY Power Gating */ - mac_data = er32(FEXTNVM6); - mac_data |= BIT(31); - ew32(FEXTNVM6, mac_data); - - /* Enable mPHY power gating for any link and speed */ - mac_data = er32(FEXTNVM8); - mac_data |= BIT(9); - ew32(FEXTNVM8, mac_data); - /* Enable the Dynamic Clock Gating in the DMA and MAC */ mac_data = er32(CTRL_EXT); mac_data |= E1000_CTRL_EXT_DMA_DYN_CLK_EN; ew32(CTRL_EXT, mac_data); - - /* No MAC DPG gating SLP_S0 in modern standby - * Switch the logic of the lanphypc to use PMC counter - */ - mac_data = er32(FEXTNVM5); - mac_data |= BIT(7); - ew32(FEXTNVM5, mac_data); } + /* Enable the Dynamic Power Gating in the MAC */ + mac_data = er32(FEXTNVM7); + mac_data |= BIT(22); + ew32(FEXTNVM7, mac_data); + + /* Don't wake from dynamic Power Gating with clock request */ + mac_data = er32(FEXTNVM12); + mac_data |= BIT(12); + ew32(FEXTNVM12, mac_data); + + /* Ungate PGCB clock */ + mac_data = er32(FEXTNVM9); + mac_data &= ~BIT(28); + ew32(FEXTNVM9, mac_data); + + /* Enable K1 off to enable mPHY Power Gating */ + mac_data = er32(FEXTNVM6); + mac_data |= BIT(31); + ew32(FEXTNVM6, mac_data); + + /* Enable mPHY power gating for any link and speed */ + mac_data = er32(FEXTNVM8); + mac_data |= BIT(9); + ew32(FEXTNVM8, mac_data); + + /* No MAC DPG gating SLP_S0 in modern standby + * Switch the logic of the lanphypc to use PMC counter + */ + mac_data = er32(FEXTNVM5); + mac_data |= BIT(7); + ew32(FEXTNVM5, mac_data); + /* Disable the time synchronization clock */ mac_data = er32(FEXTNVM7); mac_data |= BIT(31); @@ -6498,33 +6498,6 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter) } else { /* Request driver unconfigure the device from S0ix */ - /* Disable the Dynamic Power Gating in the MAC */ - mac_data = er32(FEXTNVM7); - mac_data &= 0xFFBFFFFF; - ew32(FEXTNVM7, mac_data); - - /* Disable mPHY power gating for any link and speed */ - mac_data = er32(FEXTNVM8); - mac_data &= ~BIT(9); - ew32(FEXTNVM8, mac_data); - - /* Disable K1 off */ - mac_data = er32(FEXTNVM6); - mac_data &= ~BIT(31); - ew32(FEXTNVM6, mac_data); - - /* Disable Ungate PGCB clock */ - mac_data = er32(FEXTNVM9); - mac_data |= BIT(28); - ew32(FEXTNVM9, mac_data); - - /* Cancel not waking from dynamic - * Power Gating with clock request - */ - mac_data = er32(FEXTNVM12); - mac_data &= ~BIT(12); - ew32(FEXTNVM12, mac_data); - /* Cancel disable disconnected cable conditioning * for Power Gating */ @@ -6537,13 +6510,6 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter) mac_data &= 0xFFF7FFFF; ew32(CTRL_EXT, mac_data); - /* Revert the lanphypc logic to use the internal Gbe counter - * and not the PMC counter - */ - mac_data = er32(FEXTNVM5); - mac_data &= 0xFFFFFF7F; - ew32(FEXTNVM5, mac_data); - /* Enable the periodic inband message, * Request PCIe clock in K1 page770_17[10:9] =01b */ @@ -6581,6 +6547,40 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter) mac_data &= ~BIT(31); mac_data |= BIT(0); ew32(FEXTNVM7, mac_data); + + /* Disable the Dynamic Power Gating in the MAC */ + mac_data = er32(FEXTNVM7); + mac_data &= 0xFFBFFFFF; + ew32(FEXTNVM7, mac_data); + + /* Disable mPHY power gating for any link and speed */ + mac_data = er32(FEXTNVM8); + mac_data &= ~BIT(9); + ew32(FEXTNVM8, mac_data); + + /* Disable K1 off */ + mac_data = er32(FEXTNVM6); + mac_data &= ~BIT(31); + ew32(FEXTNVM6, mac_data); + + /* Disable Ungate PGCB clock */ + mac_data = er32(FEXTNVM9); + mac_data |= BIT(28); + ew32(FEXTNVM9, mac_data); + + /* Cancel not waking from dynamic + * Power Gating with clock request + */ + mac_data = er32(FEXTNVM12); + mac_data &= ~BIT(12); + ew32(FEXTNVM12, mac_data); + + /* Revert the lanphypc logic to use the internal Gbe counter + * and not the PMC counter + */ + mac_data = er32(FEXTNVM5); + mac_data &= 0xFFFFFF7F; + ew32(FEXTNVM5, mac_data); } static int e1000e_pm_freeze(struct device *dev) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.h b/drivers/net/ethernet/intel/i40e/i40e_adminq.h index ee86d2c53079..55b5bb884d73 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.h @@ -109,10 +109,6 @@ static inline int i40e_aq_rc_to_posix(int aq_ret, int aq_rc) -EFBIG, /* I40E_AQ_RC_EFBIG */ }; - /* aq_rc is invalid if AQ timed out */ - if (aq_ret == -EIO) - return -EAGAIN; - if (!((u32)aq_rc < (sizeof(aq_to_posix) / sizeof((aq_to_posix)[0])))) return -ERANGE; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 284c3fad5a6e..310513d9321b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -13293,6 +13293,10 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog, bool need_reset; int i; + /* VSI shall be deleted in a moment, block loading new programs */ + if (prog && test_bit(__I40E_IN_REMOVE, pf->state)) + return -EINVAL; + /* Don't allow frames that span over multiple buffers */ if (vsi->netdev->mtu > frame_size - I40E_PACKET_HDR_PAD) { NL_SET_ERR_MSG_MOD(extack, "MTU too large for linear frames and XDP prog does not support frags"); @@ -13301,14 +13305,9 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog, /* When turning XDP on->off/off->on we reset and rebuild the rings. */ need_reset = (i40e_enabled_xdp_vsi(vsi) != !!prog); - if (need_reset) i40e_prep_for_reset(pf); - /* VSI shall be deleted in a moment, just return EINVAL */ - if (test_bit(__I40E_IN_REMOVE, pf->state)) - return -EINVAL; - old_prog = xchg(&vsi->xdp_prog, prog); if (need_reset) { diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c index ce5034ed2b24..f182179529b7 100644 --- a/drivers/net/ethernet/intel/ice/ice_ddp.c +++ b/drivers/net/ethernet/intel/ice/ice_ddp.c @@ -1339,6 +1339,7 @@ ice_dwnld_cfg_bufs_no_lock(struct ice_hw *hw, struct ice_buf *bufs, u32 start, for (i = 0; i < count; i++) { bool last = false; + int try_cnt = 0; int status; bh = (struct ice_buf_hdr *)(bufs + start + i); @@ -1346,8 +1347,26 @@ ice_dwnld_cfg_bufs_no_lock(struct ice_hw *hw, struct ice_buf *bufs, u32 start, if (indicate_last) last = ice_is_last_download_buffer(bh, i, count); - status = ice_aq_download_pkg(hw, bh, ICE_PKG_BUF_SIZE, last, - &offset, &info, NULL); + while (1) { + status = ice_aq_download_pkg(hw, bh, ICE_PKG_BUF_SIZE, + last, &offset, &info, + NULL); + if (hw->adminq.sq_last_status != ICE_AQ_RC_ENOSEC && + hw->adminq.sq_last_status != ICE_AQ_RC_EBADSIG) + break; + + try_cnt++; + + if (try_cnt == 5) + break; + + msleep(20); + } + + if (try_cnt) + dev_dbg(ice_hw_to_dev(hw), + "ice_aq_download_pkg number of retries: %d\n", + try_cnt); /* Save AQ status from download package */ if (status) { diff --git a/drivers/net/ethernet/intel/ice/ice_hwmon.c b/drivers/net/ethernet/intel/ice/ice_hwmon.c index e4c2c1bff6c0..b7aa6812510a 100644 --- a/drivers/net/ethernet/intel/ice/ice_hwmon.c +++ b/drivers/net/ethernet/intel/ice/ice_hwmon.c @@ -96,7 +96,7 @@ static bool ice_is_internal_reading_supported(struct ice_pf *pf) unsigned long sensors = pf->hw.dev_caps.supported_sensors; - return _test_bit(ICE_SENSOR_SUPPORT_E810_INT_TEMP_BIT, &sensors); + return test_bit(ICE_SENSOR_SUPPORT_E810_INT_TEMP_BIT, &sensors); }; void ice_hwmon_init(struct ice_pf *pf) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 1b61ca3a6eb6..55a42aad92a5 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -805,6 +805,9 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup) } switch (vsi->port_info->phy.link_info.link_speed) { + case ICE_AQ_LINK_SPEED_200GB: + speed = "200 G"; + break; case ICE_AQ_LINK_SPEED_100GB: speed = "100 G"; break; @@ -4136,7 +4139,7 @@ bool ice_is_wol_supported(struct ice_hw *hw) int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked) { struct ice_pf *pf = vsi->back; - int err = 0, timeout = 50; + int i, err = 0, timeout = 50; if (!new_rx && !new_tx) return -EINVAL; @@ -4162,6 +4165,14 @@ int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked) ice_vsi_close(vsi); ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT); + + ice_for_each_traffic_class(i) { + if (vsi->tc_cfg.ena_tc & BIT(i)) + netdev_set_tc_queue(vsi->netdev, + vsi->tc_cfg.tc_info[i].netdev_tc, + vsi->tc_cfg.tc_info[i].qcount_tx, + vsi->tc_cfg.tc_info[i].qoffset); + } ice_pf_dcb_recfg(pf, locked); ice_vsi_open(vsi); done: @@ -5564,7 +5575,7 @@ static int ice_suspend(struct device *dev) */ disabled = ice_service_task_stop(pf); - ice_unplug_aux_dev(pf); + ice_deinit_rdma(pf); /* Already suspended?, then there is nothing to do */ if (test_and_set_bit(ICE_SUSPENDED, pf->state)) { @@ -5644,6 +5655,11 @@ static int ice_resume(struct device *dev) if (ret) dev_err(dev, "Cannot restore interrupt scheme: %d\n", ret); + ret = ice_init_rdma(pf); + if (ret) + dev_err(dev, "Reinitialize RDMA during resume failed: %d\n", + ret); + clear_bit(ICE_DOWN, pf->state); /* Now perform PF reset and rebuild */ reset_type = ICE_RESET_PFR; diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 0f17fc1181d2..fefaf52fd677 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1559,6 +1559,10 @@ void ice_ptp_extts_event(struct ice_pf *pf) u8 chan, tmr_idx; u32 hi, lo; + /* Don't process timestamp events if PTP is not ready */ + if (pf->ptp.state != ICE_PTP_READY) + return; + tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; /* Event time is captured by one of the two matched registers * GLTSYN_EVNT_L: 32 LSB of sampled time event @@ -1584,27 +1588,33 @@ void ice_ptp_extts_event(struct ice_pf *pf) /** * ice_ptp_cfg_extts - Configure EXTTS pin and channel * @pf: Board private structure - * @ena: true to enable; false to disable * @chan: GPIO channel (0-3) - * @gpio_pin: GPIO pin - * @extts_flags: request flags from the ptp_extts_request.flags + * @config: desired EXTTS configuration. + * @store: If set to true, the values will be stored + * + * Configure an external timestamp event on the requested channel. + * + * Return: 0 on success, -EOPNOTUSPP on unsupported flags */ -static int -ice_ptp_cfg_extts(struct ice_pf *pf, bool ena, unsigned int chan, u32 gpio_pin, - unsigned int extts_flags) +static int ice_ptp_cfg_extts(struct ice_pf *pf, unsigned int chan, + struct ice_extts_channel *config, bool store) { u32 func, aux_reg, gpio_reg, irq_reg; struct ice_hw *hw = &pf->hw; u8 tmr_idx; - if (chan > (unsigned int)pf->ptp.info.n_ext_ts) - return -EINVAL; + /* Reject requests with unsupported flags */ + if (config->flags & ~(PTP_ENABLE_FEATURE | + PTP_RISING_EDGE | + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS)) + return -EOPNOTSUPP; tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; irq_reg = rd32(hw, PFINT_OICR_ENA); - if (ena) { + if (config->ena) { /* Enable the interrupt */ irq_reg |= PFINT_OICR_TSYN_EVNT_M; aux_reg = GLTSYN_AUX_IN_0_INT_ENA_M; @@ -1613,9 +1623,9 @@ ice_ptp_cfg_extts(struct ice_pf *pf, bool ena, unsigned int chan, u32 gpio_pin, #define GLTSYN_AUX_IN_0_EVNTLVL_FALLING_EDGE BIT(1) /* set event level to requested edge */ - if (extts_flags & PTP_FALLING_EDGE) + if (config->flags & PTP_FALLING_EDGE) aux_reg |= GLTSYN_AUX_IN_0_EVNTLVL_FALLING_EDGE; - if (extts_flags & PTP_RISING_EDGE) + if (config->flags & PTP_RISING_EDGE) aux_reg |= GLTSYN_AUX_IN_0_EVNTLVL_RISING_EDGE; /* Write GPIO CTL reg. @@ -1636,12 +1646,52 @@ ice_ptp_cfg_extts(struct ice_pf *pf, bool ena, unsigned int chan, u32 gpio_pin, wr32(hw, PFINT_OICR_ENA, irq_reg); wr32(hw, GLTSYN_AUX_IN(chan, tmr_idx), aux_reg); - wr32(hw, GLGEN_GPIO_CTL(gpio_pin), gpio_reg); + wr32(hw, GLGEN_GPIO_CTL(config->gpio_pin), gpio_reg); + + if (store) + memcpy(&pf->ptp.extts_channels[chan], config, sizeof(*config)); return 0; } /** + * ice_ptp_disable_all_extts - Disable all EXTTS channels + * @pf: Board private structure + */ +static void ice_ptp_disable_all_extts(struct ice_pf *pf) +{ + struct ice_extts_channel extts_cfg = {}; + int i; + + for (i = 0; i < pf->ptp.info.n_ext_ts; i++) { + if (pf->ptp.extts_channels[i].ena) { + extts_cfg.gpio_pin = pf->ptp.extts_channels[i].gpio_pin; + extts_cfg.ena = false; + ice_ptp_cfg_extts(pf, i, &extts_cfg, false); + } + } + + synchronize_irq(pf->oicr_irq.virq); +} + +/** + * ice_ptp_enable_all_extts - Enable all EXTTS channels + * @pf: Board private structure + * + * Called during reset to restore user configuration. + */ +static void ice_ptp_enable_all_extts(struct ice_pf *pf) +{ + int i; + + for (i = 0; i < pf->ptp.info.n_ext_ts; i++) { + if (pf->ptp.extts_channels[i].ena) + ice_ptp_cfg_extts(pf, i, &pf->ptp.extts_channels[i], + false); + } +} + +/** * ice_ptp_cfg_clkout - Configure clock to generate periodic wave * @pf: Board private structure * @chan: GPIO channel (0-3) @@ -1659,6 +1709,9 @@ static int ice_ptp_cfg_clkout(struct ice_pf *pf, unsigned int chan, u32 func, val, gpio_pin; u8 tmr_idx; + if (config && config->flags & ~PTP_PEROUT_PHASE) + return -EOPNOTSUPP; + tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; /* 0. Reset mode & out_en in AUX_OUT */ @@ -1795,17 +1848,18 @@ ice_ptp_gpio_enable_e810(struct ptp_clock_info *info, struct ptp_clock_request *rq, int on) { struct ice_pf *pf = ptp_info_to_pf(info); - struct ice_perout_channel clk_cfg = {0}; bool sma_pres = false; unsigned int chan; u32 gpio_pin; - int err; if (ice_is_feature_supported(pf, ICE_F_SMA_CTRL)) sma_pres = true; switch (rq->type) { case PTP_CLK_REQ_PEROUT: + { + struct ice_perout_channel clk_cfg = {}; + chan = rq->perout.index; if (sma_pres) { if (chan == ice_pin_desc_e810t[SMA1].chan) @@ -1825,15 +1879,19 @@ ice_ptp_gpio_enable_e810(struct ptp_clock_info *info, clk_cfg.gpio_pin = chan; } + clk_cfg.flags = rq->perout.flags; clk_cfg.period = ((rq->perout.period.sec * NSEC_PER_SEC) + rq->perout.period.nsec); clk_cfg.start_time = ((rq->perout.start.sec * NSEC_PER_SEC) + rq->perout.start.nsec); clk_cfg.ena = !!on; - err = ice_ptp_cfg_clkout(pf, chan, &clk_cfg, true); - break; + return ice_ptp_cfg_clkout(pf, chan, &clk_cfg, true); + } case PTP_CLK_REQ_EXTTS: + { + struct ice_extts_channel extts_cfg = {}; + chan = rq->extts.index; if (sma_pres) { if (chan < ice_pin_desc_e810t[SMA2].chan) @@ -1849,14 +1907,15 @@ ice_ptp_gpio_enable_e810(struct ptp_clock_info *info, gpio_pin = chan; } - err = ice_ptp_cfg_extts(pf, !!on, chan, gpio_pin, - rq->extts.flags); - break; + extts_cfg.flags = rq->extts.flags; + extts_cfg.gpio_pin = gpio_pin; + extts_cfg.ena = !!on; + + return ice_ptp_cfg_extts(pf, chan, &extts_cfg, true); + } default: return -EOPNOTSUPP; } - - return err; } /** @@ -1869,26 +1928,32 @@ static int ice_ptp_gpio_enable_e823(struct ptp_clock_info *info, struct ptp_clock_request *rq, int on) { struct ice_pf *pf = ptp_info_to_pf(info); - struct ice_perout_channel clk_cfg = {0}; - int err; switch (rq->type) { case PTP_CLK_REQ_PPS: + { + struct ice_perout_channel clk_cfg = {}; + + clk_cfg.flags = rq->perout.flags; clk_cfg.gpio_pin = PPS_PIN_INDEX; clk_cfg.period = NSEC_PER_SEC; clk_cfg.ena = !!on; - err = ice_ptp_cfg_clkout(pf, PPS_CLK_GEN_CHAN, &clk_cfg, true); - break; + return ice_ptp_cfg_clkout(pf, PPS_CLK_GEN_CHAN, &clk_cfg, true); + } case PTP_CLK_REQ_EXTTS: - err = ice_ptp_cfg_extts(pf, !!on, rq->extts.index, - TIME_SYNC_PIN_INDEX, rq->extts.flags); - break; + { + struct ice_extts_channel extts_cfg = {}; + + extts_cfg.flags = rq->extts.flags; + extts_cfg.gpio_pin = TIME_SYNC_PIN_INDEX; + extts_cfg.ena = !!on; + + return ice_ptp_cfg_extts(pf, rq->extts.index, &extts_cfg, true); + } default: return -EOPNOTSUPP; } - - return err; } /** @@ -2720,6 +2785,10 @@ static int ice_ptp_rebuild_owner(struct ice_pf *pf) ice_ptp_restart_all_phy(pf); } + /* Re-enable all periodic outputs and external timestamp events */ + ice_ptp_enable_all_clkout(pf); + ice_ptp_enable_all_extts(pf); + return 0; } @@ -3275,6 +3344,8 @@ void ice_ptp_release(struct ice_pf *pf) ice_ptp_release_tx_tracker(pf, &pf->ptp.port.tx); + ice_ptp_disable_all_extts(pf); + kthread_cancel_delayed_work_sync(&pf->ptp.work); ice_ptp_port_phy_stop(&pf->ptp.port); diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index 3af20025043a..e2af9749061c 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -29,10 +29,17 @@ enum ice_ptp_pin_e810t { struct ice_perout_channel { bool ena; u32 gpio_pin; + u32 flags; u64 period; u64 start_time; }; +struct ice_extts_channel { + bool ena; + u32 gpio_pin; + u32 flags; +}; + /* The ice hardware captures Tx hardware timestamps in the PHY. The timestamp * is stored in a buffer of registers. Depending on the specific hardware, * this buffer might be shared across multiple PHY ports. @@ -226,6 +233,7 @@ enum ice_ptp_state { * @ext_ts_irq: the external timestamp IRQ in use * @kworker: kwork thread for handling periodic work * @perout_channels: periodic output data + * @extts_channels: channels for external timestamps * @info: structure defining PTP hardware capabilities * @clock: pointer to registered PTP clock device * @tstamp_config: hardware timestamping configuration @@ -249,6 +257,7 @@ struct ice_ptp { u8 ext_ts_irq; struct kthread_worker *kworker; struct ice_perout_channel perout_channels[GLTSYN_TGT_H_IDX_MAX]; + struct ice_extts_channel extts_channels[GLTSYN_TGT_H_IDX_MAX]; struct ptp_clock_info info; struct ptp_clock *clock; struct hwtstamp_config tstamp_config; diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 94d6670d0901..1191031b2a43 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -1899,7 +1899,8 @@ ice_aq_alloc_free_vsi_list(struct ice_hw *hw, u16 *vsi_list_id, lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC || lkup_type == ICE_SW_LKUP_PROMISC || lkup_type == ICE_SW_LKUP_PROMISC_VLAN || - lkup_type == ICE_SW_LKUP_DFLT) { + lkup_type == ICE_SW_LKUP_DFLT || + lkup_type == ICE_SW_LKUP_LAST) { sw_buf->res_type = cpu_to_le16(ICE_AQC_RES_TYPE_VSI_LIST_REP); } else if (lkup_type == ICE_SW_LKUP_VLAN) { if (opc == ice_aqc_opc_alloc_res) @@ -2922,7 +2923,8 @@ ice_update_vsi_list_rule(struct ice_hw *hw, u16 *vsi_handle_arr, u16 num_vsi, lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC || lkup_type == ICE_SW_LKUP_PROMISC || lkup_type == ICE_SW_LKUP_PROMISC_VLAN || - lkup_type == ICE_SW_LKUP_DFLT) + lkup_type == ICE_SW_LKUP_DFLT || + lkup_type == ICE_SW_LKUP_LAST) rule_type = remove ? ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR : ICE_AQC_SW_RULES_T_VSI_LIST_SET; else if (lkup_type == ICE_SW_LKUP_VLAN) diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 5352fee62d2b..0b9982804370 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -217,9 +217,9 @@ ltq_etop_free_channel(struct net_device *dev, struct ltq_etop_chan *ch) if (ch->dma.irq) free_irq(ch->dma.irq, priv); if (IS_RX(ch->idx)) { - int desc; + struct ltq_dma_channel *dma = &ch->dma; - for (desc = 0; desc < LTQ_DESC_NUM; desc++) + for (dma->desc = 0; dma->desc < LTQ_DESC_NUM; dma->desc++) dev_kfree_skb_any(ch->skb[ch->dma.desc]); } } diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index e91486c48de3..9adf4301c9b1 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -4014,7 +4014,10 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi, } } - skb = build_skb(data, frag_size); + if (frag_size) + skb = build_skb(data, frag_size); + else + skb = slab_build_skb(data); if (!skb) { netdev_warn(port->dev, "skb build failed\n"); goto err_drop_frame; @@ -6904,6 +6907,7 @@ static int mvpp2_port_probe(struct platform_device *pdev, /* 9704 == 9728 - 20 and rounding to 8 */ dev->max_mtu = MVPP2_BM_JUMBO_PKT_SIZE; device_set_node(&dev->dev, port_fwnode); + dev->dev_port = port->id; port->pcs_gmac.ops = &mvpp2_phylink_gmac_pcs_ops; port->pcs_gmac.neg_mode = true; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 4a77f6fe2622..05b84581d5c5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -1745,7 +1745,7 @@ struct cpt_lf_alloc_req_msg { u16 nix_pf_func; u16 sso_pf_func; u16 eng_grpmsk; - int blkaddr; + u8 blkaddr; u8 ctx_ilen_valid : 1; u8 ctx_ilen : 7; }; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h index d883157393ea..6c3aca6f278d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h @@ -63,8 +63,13 @@ enum npc_kpu_lb_ltype { NPC_LT_LB_CUSTOM1 = 0xF, }; +/* Don't modify ltypes up to IP6_EXT, otherwise length and checksum of IP + * headers may not be checked correctly. IPv4 ltypes and IPv6 ltypes must + * differ only at bit 0 so mask 0xE can be used to detect extended headers. + */ enum npc_kpu_lc_ltype { - NPC_LT_LC_IP = 1, + NPC_LT_LC_PTP = 1, + NPC_LT_LC_IP, NPC_LT_LC_IP_OPT, NPC_LT_LC_IP6, NPC_LT_LC_IP6_EXT, @@ -72,7 +77,6 @@ enum npc_kpu_lc_ltype { NPC_LT_LC_RARP, NPC_LT_LC_MPLS, NPC_LT_LC_NSH, - NPC_LT_LC_PTP, NPC_LT_LC_FCOE, NPC_LT_LC_NGIO, NPC_LT_LC_CUSTOM0 = 0xE, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index ff78251f92d4..5f661e67ccbc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -1643,7 +1643,7 @@ static int rvu_check_rsrc_availability(struct rvu *rvu, if (req->ssow > block->lf.max) { dev_err(&rvu->pdev->dev, "Func 0x%x: Invalid SSOW req, %d > max %d\n", - pcifunc, req->sso, block->lf.max); + pcifunc, req->ssow, block->lf.max); return -EINVAL; } mappedlfs = rvu_get_rsrc_mapcount(pfvf, block->addr); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index f047185f38e0..3e09d2285814 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -696,7 +696,8 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req, struct cpt_rd_wr_reg_msg *rsp) { - int blkaddr; + u64 offset = req->reg_offset; + int blkaddr, lf; blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr); if (blkaddr < 0) @@ -707,17 +708,25 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu, !is_cpt_vf(rvu, req->hdr.pcifunc)) return CPT_AF_ERR_ACCESS_DENIED; - rsp->reg_offset = req->reg_offset; - rsp->ret_val = req->ret_val; - rsp->is_write = req->is_write; - if (!is_valid_offset(rvu, req)) return CPT_AF_ERR_ACCESS_DENIED; + /* Translate local LF used by VFs to global CPT LF */ + lf = rvu_get_lf(rvu, &rvu->hw->block[blkaddr], req->hdr.pcifunc, + (offset & 0xFFF) >> 3); + + /* Translate local LF's offset to global CPT LF's offset */ + offset &= 0xFF000; + offset += lf << 3; + + rsp->reg_offset = offset; + rsp->ret_val = req->ret_val; + rsp->is_write = req->is_write; + if (req->is_write) - rvu_write64(rvu, blkaddr, req->reg_offset, req->val); + rvu_write64(rvu, blkaddr, offset, req->val); else - rsp->val = rvu_read64(rvu, blkaddr, req->reg_offset); + rsp->val = rvu_read64(rvu, blkaddr, offset); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 00af8888e329..3dc828cf6c5a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -3864,6 +3864,11 @@ static int get_flowkey_alg_idx(struct nix_hw *nix_hw, u32 flow_cfg) return -ERANGE; } +/* Mask to match ipv6(NPC_LT_LC_IP6) and ipv6 ext(NPC_LT_LC_IP6_EXT) */ +#define NPC_LT_LC_IP6_MATCH_MSK ((~(NPC_LT_LC_IP6 ^ NPC_LT_LC_IP6_EXT)) & 0xf) +/* Mask to match both ipv4(NPC_LT_LC_IP) and ipv4 ext(NPC_LT_LC_IP_OPT) */ +#define NPC_LT_LC_IP_MATCH_MSK ((~(NPC_LT_LC_IP ^ NPC_LT_LC_IP_OPT)) & 0xf) + static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) { int idx, nr_field, key_off, field_marker, keyoff_marker; @@ -3933,7 +3938,7 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) field->hdr_offset = 9; /* offset */ field->bytesm1 = 0; /* 1 byte */ field->ltype_match = NPC_LT_LC_IP; - field->ltype_mask = 0xF; + field->ltype_mask = NPC_LT_LC_IP_MATCH_MSK; break; case NIX_FLOW_KEY_TYPE_IPV4: case NIX_FLOW_KEY_TYPE_INNR_IPV4: @@ -3960,8 +3965,7 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) field->bytesm1 = 3; /* DIP, 4 bytes */ } } - - field->ltype_mask = 0xF; /* Match only IPv4 */ + field->ltype_mask = NPC_LT_LC_IP_MATCH_MSK; keyoff_marker = false; break; case NIX_FLOW_KEY_TYPE_IPV6: @@ -3990,7 +3994,7 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) field->bytesm1 = 15; /* DIP,16 bytes */ } } - field->ltype_mask = 0xF; /* Match only IPv6 */ + field->ltype_mask = NPC_LT_LC_IP6_MATCH_MSK; break; case NIX_FLOW_KEY_TYPE_TCP: case NIX_FLOW_KEY_TYPE_UDP: diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile index 5664f768cb0c..64a97a0a10ed 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile @@ -9,10 +9,9 @@ obj-$(CONFIG_OCTEONTX2_VF) += rvu_nicvf.o otx2_ptp.o rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \ otx2_flows.o otx2_tc.o cn10k.o otx2_dmac_flt.o \ otx2_devlink.o qos_sq.o qos.o -rvu_nicvf-y := otx2_vf.o otx2_devlink.o +rvu_nicvf-y := otx2_vf.o rvu_nicpf-$(CONFIG_DCB) += otx2_dcbnl.o -rvu_nicvf-$(CONFIG_DCB) += otx2_dcbnl.o rvu_nicpf-$(CONFIG_MACSEC) += cn10k_macsec.o ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index a85ac039d779..87d5776e3b88 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -648,14 +648,14 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl, int prio, bool txschq_for } else if (lvl == NIX_TXSCH_LVL_TL4) { parent = schq_list[NIX_TXSCH_LVL_TL3][prio]; req->reg[0] = NIX_AF_TL4X_PARENT(schq); - req->regval[0] = parent << 16; + req->regval[0] = (u64)parent << 16; req->num_regs++; req->reg[1] = NIX_AF_TL4X_SCHEDULE(schq); req->regval[1] = dwrr_val; } else if (lvl == NIX_TXSCH_LVL_TL3) { parent = schq_list[NIX_TXSCH_LVL_TL2][prio]; req->reg[0] = NIX_AF_TL3X_PARENT(schq); - req->regval[0] = parent << 16; + req->regval[0] = (u64)parent << 16; req->num_regs++; req->reg[1] = NIX_AF_TL3X_SCHEDULE(schq); req->regval[1] = dwrr_val; @@ -670,11 +670,11 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl, int prio, bool txschq_for } else if (lvl == NIX_TXSCH_LVL_TL2) { parent = schq_list[NIX_TXSCH_LVL_TL1][prio]; req->reg[0] = NIX_AF_TL2X_PARENT(schq); - req->regval[0] = parent << 16; + req->regval[0] = (u64)parent << 16; req->num_regs++; req->reg[1] = NIX_AF_TL2X_SCHEDULE(schq); - req->regval[1] = TXSCH_TL1_DFLT_RR_PRIO << 24 | dwrr_val; + req->regval[1] = (u64)hw->txschq_aggr_lvl_rr_prio << 24 | dwrr_val; if (lvl == hw->txschq_link_cfg_lvl) { req->num_regs++; @@ -698,7 +698,7 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl, int prio, bool txschq_for req->num_regs++; req->reg[1] = NIX_AF_TL1X_TOPOLOGY(schq); - req->regval[1] = (TXSCH_TL1_DFLT_RR_PRIO << 1); + req->regval[1] = hw->txschq_aggr_lvl_rr_prio << 1; req->num_regs++; req->reg[2] = NIX_AF_TL1X_CIR(schq); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c index 28fb643d2917..aa01110f04a3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c @@ -54,6 +54,7 @@ int otx2_pfc_txschq_config(struct otx2_nic *pfvf) return 0; } +EXPORT_SYMBOL(otx2_pfc_txschq_config); static int otx2_pfc_txschq_alloc_one(struct otx2_nic *pfvf, u8 prio) { @@ -122,6 +123,7 @@ int otx2_pfc_txschq_alloc(struct otx2_nic *pfvf) return 0; } +EXPORT_SYMBOL(otx2_pfc_txschq_alloc); static int otx2_pfc_txschq_stop_one(struct otx2_nic *pfvf, u8 prio) { @@ -260,6 +262,7 @@ update_sq_smq_map: return 0; } +EXPORT_SYMBOL(otx2_pfc_txschq_update); int otx2_pfc_txschq_stop(struct otx2_nic *pfvf) { @@ -282,6 +285,7 @@ int otx2_pfc_txschq_stop(struct otx2_nic *pfvf) return 0; } +EXPORT_SYMBOL(otx2_pfc_txschq_stop); int otx2_config_priority_flow_ctrl(struct otx2_nic *pfvf) { @@ -321,6 +325,7 @@ unlock: mutex_unlock(&pfvf->mbox.lock); return err; } +EXPORT_SYMBOL(otx2_config_priority_flow_ctrl); void otx2_update_bpid_in_rqctx(struct otx2_nic *pfvf, int vlan_prio, int qidx, bool pfc_enable) @@ -385,6 +390,7 @@ out: "Updating BPIDs in CQ and Aura contexts of RQ%d failed with err %d\n", qidx, err); } +EXPORT_SYMBOL(otx2_update_bpid_in_rqctx); static int otx2_dcbnl_ieee_getpfc(struct net_device *dev, struct ieee_pfc *pfc) { @@ -472,3 +478,4 @@ int otx2_dcbnl_set_ops(struct net_device *dev) return 0; } +EXPORT_SYMBOL(otx2_dcbnl_set_ops); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c index 99ddf31269d9..458d34a62e18 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c @@ -113,6 +113,7 @@ err_dl: devlink_free(dl); return err; } +EXPORT_SYMBOL(otx2_register_dl); void otx2_unregister_dl(struct otx2_nic *pfvf) { @@ -124,3 +125,4 @@ void otx2_unregister_dl(struct otx2_nic *pfvf) ARRAY_SIZE(otx2_dl_params)); devlink_free(dl); } +EXPORT_SYMBOL(otx2_unregister_dl); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h index 45a32e4b49d1..e3aee6e36215 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h @@ -139,33 +139,34 @@ #define NIX_LF_CINTX_ENA_W1C(a) (NIX_LFBASE | 0xD50 | (a) << 12) /* NIX AF transmit scheduler registers */ -#define NIX_AF_SMQX_CFG(a) (0x700 | (a) << 16) -#define NIX_AF_TL1X_SCHEDULE(a) (0xC00 | (a) << 16) -#define NIX_AF_TL1X_CIR(a) (0xC20 | (a) << 16) -#define NIX_AF_TL1X_TOPOLOGY(a) (0xC80 | (a) << 16) -#define NIX_AF_TL2X_PARENT(a) (0xE88 | (a) << 16) -#define NIX_AF_TL2X_SCHEDULE(a) (0xE00 | (a) << 16) -#define NIX_AF_TL2X_TOPOLOGY(a) (0xE80 | (a) << 16) -#define NIX_AF_TL2X_CIR(a) (0xE20 | (a) << 16) -#define NIX_AF_TL2X_PIR(a) (0xE30 | (a) << 16) -#define NIX_AF_TL3X_PARENT(a) (0x1088 | (a) << 16) -#define NIX_AF_TL3X_SCHEDULE(a) (0x1000 | (a) << 16) -#define NIX_AF_TL3X_SHAPE(a) (0x1010 | (a) << 16) -#define NIX_AF_TL3X_CIR(a) (0x1020 | (a) << 16) -#define NIX_AF_TL3X_PIR(a) (0x1030 | (a) << 16) -#define NIX_AF_TL3X_TOPOLOGY(a) (0x1080 | (a) << 16) -#define NIX_AF_TL4X_PARENT(a) (0x1288 | (a) << 16) -#define NIX_AF_TL4X_SCHEDULE(a) (0x1200 | (a) << 16) -#define NIX_AF_TL4X_SHAPE(a) (0x1210 | (a) << 16) -#define NIX_AF_TL4X_CIR(a) (0x1220 | (a) << 16) -#define NIX_AF_TL4X_PIR(a) (0x1230 | (a) << 16) -#define NIX_AF_TL4X_TOPOLOGY(a) (0x1280 | (a) << 16) -#define NIX_AF_MDQX_SCHEDULE(a) (0x1400 | (a) << 16) -#define NIX_AF_MDQX_SHAPE(a) (0x1410 | (a) << 16) -#define NIX_AF_MDQX_CIR(a) (0x1420 | (a) << 16) -#define NIX_AF_MDQX_PIR(a) (0x1430 | (a) << 16) -#define NIX_AF_MDQX_PARENT(a) (0x1480 | (a) << 16) -#define NIX_AF_TL3_TL2X_LINKX_CFG(a, b) (0x1700 | (a) << 16 | (b) << 3) +#define NIX_AF_SMQX_CFG(a) (0x700 | (u64)(a) << 16) +#define NIX_AF_TL4X_SDP_LINK_CFG(a) (0xB10 | (u64)(a) << 16) +#define NIX_AF_TL1X_SCHEDULE(a) (0xC00 | (u64)(a) << 16) +#define NIX_AF_TL1X_CIR(a) (0xC20 | (u64)(a) << 16) +#define NIX_AF_TL1X_TOPOLOGY(a) (0xC80 | (u64)(a) << 16) +#define NIX_AF_TL2X_PARENT(a) (0xE88 | (u64)(a) << 16) +#define NIX_AF_TL2X_SCHEDULE(a) (0xE00 | (u64)(a) << 16) +#define NIX_AF_TL2X_TOPOLOGY(a) (0xE80 | (u64)(a) << 16) +#define NIX_AF_TL2X_CIR(a) (0xE20 | (u64)(a) << 16) +#define NIX_AF_TL2X_PIR(a) (0xE30 | (u64)(a) << 16) +#define NIX_AF_TL3X_PARENT(a) (0x1088 | (u64)(a) << 16) +#define NIX_AF_TL3X_SCHEDULE(a) (0x1000 | (u64)(a) << 16) +#define NIX_AF_TL3X_SHAPE(a) (0x1010 | (u64)(a) << 16) +#define NIX_AF_TL3X_CIR(a) (0x1020 | (u64)(a) << 16) +#define NIX_AF_TL3X_PIR(a) (0x1030 | (u64)(a) << 16) +#define NIX_AF_TL3X_TOPOLOGY(a) (0x1080 | (u64)(a) << 16) +#define NIX_AF_TL4X_PARENT(a) (0x1288 | (u64)(a) << 16) +#define NIX_AF_TL4X_SCHEDULE(a) (0x1200 | (u64)(a) << 16) +#define NIX_AF_TL4X_SHAPE(a) (0x1210 | (u64)(a) << 16) +#define NIX_AF_TL4X_CIR(a) (0x1220 | (u64)(a) << 16) +#define NIX_AF_TL4X_PIR(a) (0x1230 | (u64)(a) << 16) +#define NIX_AF_TL4X_TOPOLOGY(a) (0x1280 | (u64)(a) << 16) +#define NIX_AF_MDQX_SCHEDULE(a) (0x1400 | (u64)(a) << 16) +#define NIX_AF_MDQX_SHAPE(a) (0x1410 | (u64)(a) << 16) +#define NIX_AF_MDQX_CIR(a) (0x1420 | (u64)(a) << 16) +#define NIX_AF_MDQX_PIR(a) (0x1430 | (u64)(a) << 16) +#define NIX_AF_MDQX_PARENT(a) (0x1480 | (u64)(a) << 16) +#define NIX_AF_TL3_TL2X_LINKX_CFG(a, b) (0x1700 | (u64)(a) << 16 | (b) << 3) /* LMT LF registers */ #define LMT_LFBASE BIT_ULL(RVU_FUNC_BLKADDR_SHIFT) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index a16e9f244117..3eb85949677a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -513,7 +513,7 @@ process_cqe: static void otx2_adjust_adaptive_coalese(struct otx2_nic *pfvf, struct otx2_cq_poll *cq_poll) { - struct dim_sample dim_sample; + struct dim_sample dim_sample = { 0 }; u64 rx_frames, rx_bytes; u64 tx_frames, tx_bytes; @@ -1174,8 +1174,11 @@ bool otx2_sq_append_skb(struct net_device *netdev, struct otx2_snd_queue *sq, if (skb_shinfo(skb)->gso_size && !is_hw_tso_supported(pfvf, skb)) { /* Insert vlan tag before giving pkt to tso */ - if (skb_vlan_tag_present(skb)) + if (skb_vlan_tag_present(skb)) { skb = __vlan_hwaccel_push_inside(skb); + if (!skb) + return true; + } otx2_sq_append_tso(pfvf, sq, skb, qidx); return true; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c index edac008099c0..0f844c14485a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c @@ -153,7 +153,6 @@ static void __otx2_qos_txschq_cfg(struct otx2_nic *pfvf, num_regs++; otx2_config_sched_shaping(pfvf, node, cfg, &num_regs); - } else if (level == NIX_TXSCH_LVL_TL4) { otx2_config_sched_shaping(pfvf, node, cfg, &num_regs); } else if (level == NIX_TXSCH_LVL_TL3) { @@ -176,7 +175,7 @@ static void __otx2_qos_txschq_cfg(struct otx2_nic *pfvf, /* check if node is root */ if (node->qid == OTX2_QOS_QID_INNER && !node->parent) { cfg->reg[num_regs] = NIX_AF_TL2X_SCHEDULE(node->schq); - cfg->regval[num_regs] = TXSCH_TL1_DFLT_RR_PRIO << 24 | + cfg->regval[num_regs] = (u64)hw->txschq_aggr_lvl_rr_prio << 24 | mtu_to_dwrr_weight(pfvf, pfvf->tx_max_pktlen); num_regs++; diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index 31aebeb2e285..25989c79c92e 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -1524,6 +1524,7 @@ static int mtk_star_probe(struct platform_device *pdev) { struct device_node *of_node; struct mtk_star_priv *priv; + struct phy_device *phydev; struct net_device *ndev; struct device *dev; void __iomem *base; @@ -1649,6 +1650,12 @@ static int mtk_star_probe(struct platform_device *pdev) netif_napi_add(ndev, &priv->rx_napi, mtk_star_rx_poll); netif_napi_add_tx(ndev, &priv->tx_napi, mtk_star_tx_poll); + phydev = of_phy_find_device(priv->phy_node); + if (phydev) { + phydev->mac_managed_pm = true; + put_device(&phydev->mdio.dev); + } + return devm_register_netdev(dev, ndev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index c54fd01ea635..3d274599015b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -989,7 +989,12 @@ static void mlx5e_xfrm_update_stats(struct xfrm_state *x) struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule; struct net *net = dev_net(x->xso.dev); + u64 trailer_packets = 0, trailer_bytes = 0; + u64 replay_packets = 0, replay_bytes = 0; + u64 auth_packets = 0, auth_bytes = 0; + u64 success_packets, success_bytes; u64 packets, bytes, lastuse; + size_t headers; lockdep_assert(lockdep_is_held(&x->lock) || lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_cfg_mutex) || @@ -999,26 +1004,43 @@ static void mlx5e_xfrm_update_stats(struct xfrm_state *x) return; if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) { - mlx5_fc_query_cached(ipsec_rule->auth.fc, &bytes, &packets, &lastuse); - x->stats.integrity_failed += packets; - XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR, packets); - - mlx5_fc_query_cached(ipsec_rule->trailer.fc, &bytes, &packets, &lastuse); - XFRM_ADD_STATS(net, LINUX_MIB_XFRMINHDRERROR, packets); + mlx5_fc_query_cached(ipsec_rule->auth.fc, &auth_bytes, + &auth_packets, &lastuse); + x->stats.integrity_failed += auth_packets; + XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR, auth_packets); + + mlx5_fc_query_cached(ipsec_rule->trailer.fc, &trailer_bytes, + &trailer_packets, &lastuse); + XFRM_ADD_STATS(net, LINUX_MIB_XFRMINHDRERROR, trailer_packets); } if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) return; - mlx5_fc_query_cached(ipsec_rule->fc, &bytes, &packets, &lastuse); - x->curlft.packets += packets; - x->curlft.bytes += bytes; - if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) { - mlx5_fc_query_cached(ipsec_rule->replay.fc, &bytes, &packets, &lastuse); - x->stats.replay += packets; - XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR, packets); + mlx5_fc_query_cached(ipsec_rule->replay.fc, &replay_bytes, + &replay_packets, &lastuse); + x->stats.replay += replay_packets; + XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR, replay_packets); } + + mlx5_fc_query_cached(ipsec_rule->fc, &bytes, &packets, &lastuse); + success_packets = packets - auth_packets - trailer_packets - replay_packets; + x->curlft.packets += success_packets; + /* NIC counts all bytes passed through flow steering and doesn't have + * an ability to count payload data size which is needed for SA. + * + * To overcome HW limitestion, let's approximate the payload size + * by removing always available headers. + */ + headers = sizeof(struct ethhdr); + if (sa_entry->attrs.family == AF_INET) + headers += sizeof(struct iphdr); + else + headers += sizeof(struct ipv6hdr); + + success_bytes = bytes - auth_bytes - trailer_bytes - replay_bytes; + x->curlft.bytes += success_bytes - headers * success_packets; } static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a605eae56685..eedbcba22689 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5868,6 +5868,11 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv) kfree(priv->htb_qos_sq_stats[i]); kvfree(priv->htb_qos_sq_stats); + if (priv->mqprio_rl) { + mlx5e_mqprio_rl_cleanup(priv->mqprio_rl); + mlx5e_mqprio_rl_free(priv->mqprio_rl); + } + memset(priv, 0, sizeof(*priv)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 5693986ae656..ac1565c0c8af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -1197,9 +1197,7 @@ static int get_num_eqs(struct mlx5_core_dev *dev) if (!mlx5_core_is_eth_enabled(dev) && mlx5_eth_supported(dev)) return 1; - max_dev_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? - MLX5_CAP_GEN(dev, max_num_eqs) : - 1 << MLX5_CAP_GEN(dev, log_max_eq); + max_dev_eqs = mlx5_max_eq_cap_get(dev); num_eqs = min_t(int, mlx5_irq_table_get_num_comp(eq_table->irq_table), max_dev_eqs - MLX5_MAX_ASYNC_EQS); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c index 50d2ea323979..a436ce895e45 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c @@ -6,6 +6,9 @@ #include "helper.h" #include "ofld.h" +static int +acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); + static bool esw_acl_ingress_prio_tag_enabled(struct mlx5_eswitch *esw, const struct mlx5_vport *vport) @@ -123,18 +126,31 @@ static int esw_acl_ingress_src_port_drop_create(struct mlx5_eswitch *esw, { struct mlx5_flow_act flow_act = {}; struct mlx5_flow_handle *flow_rule; + bool created = false; int err = 0; + if (!vport->ingress.acl) { + err = acl_ingress_ofld_setup(esw, vport); + if (err) + return err; + created = true; + } + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; flow_act.fg = vport->ingress.offloads.drop_grp; flow_rule = mlx5_add_flow_rules(vport->ingress.acl, NULL, &flow_act, NULL, 0); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); - goto out; + goto err_out; } vport->ingress.offloads.drop_rule = flow_rule; -out: + + return 0; +err_out: + /* Only destroy ingress acl created in this function. */ + if (created) + esw_acl_ingress_ofld_cleanup(esw, vport); return err; } @@ -299,16 +315,12 @@ static void esw_acl_ingress_ofld_groups_destroy(struct mlx5_vport *vport) } } -int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +static int +acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { int num_ftes = 0; int err; - if (!mlx5_eswitch_vport_match_metadata_enabled(esw) && - !esw_acl_ingress_prio_tag_enabled(esw, vport)) - return 0; - esw_acl_ingress_allow_rule_destroy(vport); if (mlx5_eswitch_vport_match_metadata_enabled(esw)) @@ -347,6 +359,15 @@ group_err: return err; } +int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + if (!mlx5_eswitch_vport_match_metadata_enabled(esw) && + !esw_acl_ingress_prio_tag_enabled(esw, vport)) + return 0; + + return acl_ingress_ofld_setup(esw, vport); +} + void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 592143d5e1da..72949cb85244 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -4600,20 +4600,26 @@ mlx5_devlink_port_fn_max_io_eqs_get(struct devlink_port *port, u32 *max_io_eqs, return -EOPNOTSUPP; } + if (!MLX5_CAP_GEN_2(esw->dev, max_num_eqs_24b)) { + NL_SET_ERR_MSG_MOD(extack, + "Device doesn't support getting the max number of EQs"); + return -EOPNOTSUPP; + } + query_ctx = kzalloc(query_out_sz, GFP_KERNEL); if (!query_ctx) return -ENOMEM; mutex_lock(&esw->state_lock); err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx, - MLX5_CAP_GENERAL); + MLX5_CAP_GENERAL_2); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps"); goto out; } hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); - max_eqs = MLX5_GET(cmd_hca_cap, hca_caps, max_num_eqs); + max_eqs = MLX5_GET(cmd_hca_cap_2, hca_caps, max_num_eqs_24b); if (max_eqs < MLX5_ESW_MAX_CTRL_EQS) *max_io_eqs = 0; else @@ -4644,6 +4650,12 @@ mlx5_devlink_port_fn_max_io_eqs_set(struct devlink_port *port, u32 max_io_eqs, return -EOPNOTSUPP; } + if (!MLX5_CAP_GEN_2(esw->dev, max_num_eqs_24b)) { + NL_SET_ERR_MSG_MOD(extack, + "Device doesn't support changing the max number of EQs"); + return -EOPNOTSUPP; + } + if (check_add_overflow(max_io_eqs, MLX5_ESW_MAX_CTRL_EQS, &max_eqs)) { NL_SET_ERR_MSG_MOD(extack, "Supplied value out of range"); return -EINVAL; @@ -4655,17 +4667,17 @@ mlx5_devlink_port_fn_max_io_eqs_set(struct devlink_port *port, u32 max_io_eqs, mutex_lock(&esw->state_lock); err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx, - MLX5_CAP_GENERAL); + MLX5_CAP_GENERAL_2); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps"); goto out; } hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); - MLX5_SET(cmd_hca_cap, hca_caps, max_num_eqs, max_eqs); + MLX5_SET(cmd_hca_cap_2, hca_caps, max_num_eqs_24b, max_eqs); err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport_num, - MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2); if (err) NL_SET_ERR_MSG_MOD(extack, "Failed setting HCA caps"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index c38342b9f320..a7fd18888b6e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -383,4 +383,14 @@ static inline int mlx5_vport_to_func_id(const struct mlx5_core_dev *dev, u16 vpo : vport; } +static inline int mlx5_max_eq_cap_get(const struct mlx5_core_dev *dev) +{ + if (MLX5_CAP_GEN_2(dev, max_num_eqs_24b)) + return MLX5_CAP_GEN_2(dev, max_num_eqs_24b); + + if (MLX5_CAP_GEN(dev, max_num_eqs)) + return MLX5_CAP_GEN(dev, max_num_eqs); + + return 1 << MLX5_CAP_GEN(dev, log_max_eq); +} #endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index fb8787e30d3f..401d39069680 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -711,9 +711,7 @@ int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table) int mlx5_irq_table_create(struct mlx5_core_dev *dev) { - int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? - MLX5_CAP_GEN(dev, max_num_eqs) : - 1 << MLX5_CAP_GEN(dev, log_max_eq); + int num_eqs = mlx5_max_eq_cap_get(dev); int total_vec; int pcif_vec; int req_vec; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c b/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c index 025e0db983fe..b032d5a4b3b8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c @@ -1484,6 +1484,7 @@ err_type_file_file_validate: vfree(types_info->data); err_data_alloc: kfree(types_info); + linecards->types_info = NULL; return err; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index bf66d996e32e..c0ced4d315f3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1594,18 +1594,25 @@ static int mlxsw_pci_sys_ready_wait(struct mlxsw_pci *mlxsw_pci, return -EBUSY; } -static int mlxsw_pci_reset_at_pci_disable(struct mlxsw_pci *mlxsw_pci) +static int mlxsw_pci_reset_at_pci_disable(struct mlxsw_pci *mlxsw_pci, + bool pci_reset_sbr_supported) { struct pci_dev *pdev = mlxsw_pci->pdev; char mrsr_pl[MLXSW_REG_MRSR_LEN]; int err; + if (!pci_reset_sbr_supported) { + pci_dbg(pdev, "Performing PCI hot reset instead of \"all reset\"\n"); + goto sbr; + } + mlxsw_reg_mrsr_pack(mrsr_pl, MLXSW_REG_MRSR_COMMAND_RESET_AT_PCI_DISABLE); err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl); if (err) return err; +sbr: device_lock_assert(&pdev->dev); pci_cfg_access_lock(pdev); @@ -1633,6 +1640,7 @@ static int mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id) { struct pci_dev *pdev = mlxsw_pci->pdev; + bool pci_reset_sbr_supported = false; char mcam_pl[MLXSW_REG_MCAM_LEN]; bool pci_reset_supported = false; u32 sys_status; @@ -1652,13 +1660,17 @@ mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id) mlxsw_reg_mcam_pack(mcam_pl, MLXSW_REG_MCAM_FEATURE_GROUP_ENHANCED_FEATURES); err = mlxsw_reg_query(mlxsw_pci->core, MLXSW_REG(mcam), mcam_pl); - if (!err) + if (!err) { mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET, &pci_reset_supported); + mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET_SBR, + &pci_reset_sbr_supported); + } if (pci_reset_supported) { pci_dbg(pdev, "Starting PCI reset flow\n"); - err = mlxsw_pci_reset_at_pci_disable(mlxsw_pci); + err = mlxsw_pci_reset_at_pci_disable(mlxsw_pci, + pci_reset_sbr_supported); } else { pci_dbg(pdev, "Starting software reset flow\n"); err = mlxsw_pci_reset_sw(mlxsw_pci); diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 8adf86a6f5cc..3bb89045eaf5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -10671,6 +10671,8 @@ enum mlxsw_reg_mcam_mng_feature_cap_mask_bits { MLXSW_REG_MCAM_MCIA_128B = 34, /* If set, MRSR.command=6 is supported. */ MLXSW_REG_MCAM_PCI_RESET = 48, + /* If set, MRSR.command=6 is supported with Secondary Bus Reset. */ + MLXSW_REG_MCAM_PCI_RESET_SBR = 67, }; #define MLXSW_REG_BYTES_PER_DWORD 0x4 diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index c9f1c79f3f9d..ba090262e27e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -1607,8 +1607,8 @@ static void mlxsw_sp_sb_sr_occ_query_cb(struct mlxsw_core *mlxsw_core, int mlxsw_sp_sb_occ_snapshot(struct mlxsw_core *mlxsw_core, unsigned int sb_index) { + u16 local_port, local_port_1, first_local_port, last_local_port; struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); - u16 local_port, local_port_1, last_local_port; struct mlxsw_sp_sb_sr_occ_query_cb_ctx cb_ctx; u8 masked_count, current_page = 0; unsigned long cb_priv = 0; @@ -1628,6 +1628,7 @@ next_batch: masked_count = 0; mlxsw_reg_sbsr_pack(sbsr_pl, false); mlxsw_reg_sbsr_port_page_set(sbsr_pl, current_page); + first_local_port = current_page * MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE; last_local_port = current_page * MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE + MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE - 1; @@ -1645,9 +1646,12 @@ next_batch: if (local_port != MLXSW_PORT_CPU_PORT) { /* Ingress quotas are not supported for the CPU port */ mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, - local_port, 1); + local_port - first_local_port, + 1); } - mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1); + mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, + local_port - first_local_port, + 1); for (i = 0; i < mlxsw_sp->sb_vals->pool_count; i++) { err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i, &bulk_list); @@ -1684,7 +1688,7 @@ int mlxsw_sp_sb_occ_max_clear(struct mlxsw_core *mlxsw_core, unsigned int sb_index) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); - u16 local_port, last_local_port; + u16 local_port, first_local_port, last_local_port; LIST_HEAD(bulk_list); unsigned int masked_count; u8 current_page = 0; @@ -1702,6 +1706,7 @@ next_batch: masked_count = 0; mlxsw_reg_sbsr_pack(sbsr_pl, true); mlxsw_reg_sbsr_port_page_set(sbsr_pl, current_page); + first_local_port = current_page * MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE; last_local_port = current_page * MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE + MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE - 1; @@ -1719,9 +1724,12 @@ next_batch: if (local_port != MLXSW_PORT_CPU_PORT) { /* Ingress quotas are not supported for the CPU port */ mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, - local_port, 1); + local_port - first_local_port, + 1); } - mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1); + mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, + local_port - first_local_port, + 1); for (i = 0; i < mlxsw_sp->sb_vals->pool_count; i++) { err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i, &bulk_list); diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c index 6453c92f0fa7..7fa1820db9cc 100644 --- a/drivers/net/ethernet/micrel/ks8851_common.c +++ b/drivers/net/ethernet/micrel/ks8851_common.c @@ -352,11 +352,11 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) netif_dbg(ks, intr, ks->netdev, "%s: txspace %d\n", __func__, tx_space); - spin_lock(&ks->statelock); + spin_lock_bh(&ks->statelock); ks->tx_space = tx_space; if (netif_queue_stopped(ks->netdev)) netif_wake_queue(ks->netdev); - spin_unlock(&ks->statelock); + spin_unlock_bh(&ks->statelock); } if (status & IRQ_SPIBEI) { @@ -482,6 +482,7 @@ static int ks8851_net_open(struct net_device *dev) ks8851_wrreg16(ks, KS_IER, ks->rc_ier); ks->queued_len = 0; + ks->tx_space = ks8851_rdreg16(ks, KS_TXMIR); netif_start_queue(ks->netdev); netif_dbg(ks, ifup, ks->netdev, "network device up\n"); @@ -635,14 +636,14 @@ static void ks8851_set_rx_mode(struct net_device *dev) /* schedule work to do the actual set of the data if needed */ - spin_lock(&ks->statelock); + spin_lock_bh(&ks->statelock); if (memcmp(&rxctrl, &ks->rxctrl, sizeof(rxctrl)) != 0) { memcpy(&ks->rxctrl, &rxctrl, sizeof(ks->rxctrl)); schedule_work(&ks->rxctrl_work); } - spin_unlock(&ks->statelock); + spin_unlock_bh(&ks->statelock); } static int ks8851_set_mac_address(struct net_device *dev, void *addr) @@ -1101,7 +1102,6 @@ int ks8851_probe_common(struct net_device *netdev, struct device *dev, int ret; ks->netdev = netdev; - ks->tx_space = 6144; ks->gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); ret = PTR_ERR_OR_ZERO(ks->gpio); diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c index 670c1de966db..3062cc0f9199 100644 --- a/drivers/net/ethernet/micrel/ks8851_spi.c +++ b/drivers/net/ethernet/micrel/ks8851_spi.c @@ -340,10 +340,10 @@ static void ks8851_tx_work(struct work_struct *work) tx_space = ks8851_rdreg16_spi(ks, KS_TXMIR); - spin_lock(&ks->statelock); + spin_lock_bh(&ks->statelock); ks->queued_len -= dequeued_len; ks->tx_space = tx_space; - spin_unlock(&ks->statelock); + spin_unlock_bh(&ks->statelock); ks8851_unlock_spi(ks, &flags); } diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.c b/drivers/net/ethernet/microchip/lan743x_ethtool.c index d0f4ff4ee075..0d1740d64676 100644 --- a/drivers/net/ethernet/microchip/lan743x_ethtool.c +++ b/drivers/net/ethernet/microchip/lan743x_ethtool.c @@ -1127,8 +1127,12 @@ static void lan743x_ethtool_get_wol(struct net_device *netdev, if (netdev->phydev) phy_ethtool_get_wol(netdev->phydev, wol); - wol->supported |= WAKE_BCAST | WAKE_UCAST | WAKE_MCAST | - WAKE_MAGIC | WAKE_PHY | WAKE_ARP; + if (wol->supported != adapter->phy_wol_supported) + netif_warn(adapter, drv, adapter->netdev, + "PHY changed its supported WOL! old=%x, new=%x\n", + adapter->phy_wol_supported, wol->supported); + + wol->supported |= MAC_SUPPORTED_WAKES; if (adapter->is_pci11x1x) wol->supported |= WAKE_MAGICSECURE; @@ -1143,7 +1147,39 @@ static int lan743x_ethtool_set_wol(struct net_device *netdev, { struct lan743x_adapter *adapter = netdev_priv(netdev); + /* WAKE_MAGICSEGURE is a modifier of and only valid together with + * WAKE_MAGIC + */ + if ((wol->wolopts & WAKE_MAGICSECURE) && !(wol->wolopts & WAKE_MAGIC)) + return -EINVAL; + + if (netdev->phydev) { + struct ethtool_wolinfo phy_wol; + int ret; + + phy_wol.wolopts = wol->wolopts & adapter->phy_wol_supported; + + /* If WAKE_MAGICSECURE was requested, filter out WAKE_MAGIC + * for PHYs that do not support WAKE_MAGICSECURE + */ + if (wol->wolopts & WAKE_MAGICSECURE && + !(adapter->phy_wol_supported & WAKE_MAGICSECURE)) + phy_wol.wolopts &= ~WAKE_MAGIC; + + ret = phy_ethtool_set_wol(netdev->phydev, &phy_wol); + if (ret && (ret != -EOPNOTSUPP)) + return ret; + + if (ret == -EOPNOTSUPP) + adapter->phy_wolopts = 0; + else + adapter->phy_wolopts = phy_wol.wolopts; + } else { + adapter->phy_wolopts = 0; + } + adapter->wolopts = 0; + wol->wolopts &= ~adapter->phy_wolopts; if (wol->wolopts & WAKE_UCAST) adapter->wolopts |= WAKE_UCAST; if (wol->wolopts & WAKE_MCAST) @@ -1164,10 +1200,10 @@ static int lan743x_ethtool_set_wol(struct net_device *netdev, memset(adapter->sopass, 0, sizeof(u8) * SOPASS_MAX); } + wol->wolopts = adapter->wolopts | adapter->phy_wolopts; device_set_wakeup_enable(&adapter->pdev->dev, (bool)wol->wolopts); - return netdev->phydev ? phy_ethtool_set_wol(netdev->phydev, wol) - : -ENETDOWN; + return 0; } #endif /* CONFIG_PM */ diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 6be8a43c908a..e418539565b1 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -3118,6 +3118,17 @@ static int lan743x_netdev_open(struct net_device *netdev) if (ret) goto close_tx; } + +#ifdef CONFIG_PM + if (adapter->netdev->phydev) { + struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL }; + + phy_ethtool_get_wol(netdev->phydev, &wol); + adapter->phy_wol_supported = wol.supported; + adapter->phy_wolopts = wol.wolopts; + } +#endif + return 0; close_tx: @@ -3575,7 +3586,7 @@ static void lan743x_pm_set_wol(struct lan743x_adapter *adapter) /* clear wake settings */ pmtctl = lan743x_csr_read(adapter, PMT_CTL); - pmtctl |= PMT_CTL_WUPS_MASK_; + pmtctl |= PMT_CTL_WUPS_MASK_ | PMT_CTL_RES_CLR_WKP_MASK_; pmtctl &= ~(PMT_CTL_GPIO_WAKEUP_EN_ | PMT_CTL_EEE_WAKEUP_EN_ | PMT_CTL_WOL_EN_ | PMT_CTL_MAC_D3_RX_CLK_OVR_ | PMT_CTL_RX_FCT_RFE_D3_CLK_OVR_ | PMT_CTL_ETH_PHY_WAKE_EN_); @@ -3587,10 +3598,9 @@ static void lan743x_pm_set_wol(struct lan743x_adapter *adapter) pmtctl |= PMT_CTL_ETH_PHY_D3_COLD_OVR_ | PMT_CTL_ETH_PHY_D3_OVR_; - if (adapter->wolopts & WAKE_PHY) { - pmtctl |= PMT_CTL_ETH_PHY_EDPD_PLL_CTL_; + if (adapter->phy_wolopts) pmtctl |= PMT_CTL_ETH_PHY_WAKE_EN_; - } + if (adapter->wolopts & WAKE_MAGIC) { wucsr |= MAC_WUCSR_MPEN_; macrx |= MAC_RX_RXEN_; @@ -3686,7 +3696,7 @@ static int lan743x_pm_suspend(struct device *dev) lan743x_csr_write(adapter, MAC_WUCSR2, 0); lan743x_csr_write(adapter, MAC_WK_SRC, 0xFFFFFFFF); - if (adapter->wolopts) + if (adapter->wolopts || adapter->phy_wolopts) lan743x_pm_set_wol(adapter); if (adapter->is_pci11x1x) { @@ -3710,6 +3720,7 @@ static int lan743x_pm_resume(struct device *dev) struct pci_dev *pdev = to_pci_dev(dev); struct net_device *netdev = pci_get_drvdata(pdev); struct lan743x_adapter *adapter = netdev_priv(netdev); + u32 data; int ret; pci_set_power_state(pdev, PCI_D0); @@ -3728,6 +3739,30 @@ static int lan743x_pm_resume(struct device *dev) return ret; } + ret = lan743x_csr_read(adapter, MAC_WK_SRC); + netif_dbg(adapter, drv, adapter->netdev, + "Wakeup source : 0x%08X\n", ret); + + /* Clear the wol configuration and status bits. Note that + * the status bits are "Write One to Clear (W1C)" + */ + data = MAC_WUCSR_EEE_TX_WAKE_ | MAC_WUCSR_EEE_RX_WAKE_ | + MAC_WUCSR_RFE_WAKE_FR_ | MAC_WUCSR_PFDA_FR_ | MAC_WUCSR_WUFR_ | + MAC_WUCSR_MPR_ | MAC_WUCSR_BCAST_FR_; + lan743x_csr_write(adapter, MAC_WUCSR, data); + + data = MAC_WUCSR2_NS_RCD_ | MAC_WUCSR2_ARP_RCD_ | + MAC_WUCSR2_IPV6_TCPSYN_RCD_ | MAC_WUCSR2_IPV4_TCPSYN_RCD_; + lan743x_csr_write(adapter, MAC_WUCSR2, data); + + data = MAC_WK_SRC_ETH_PHY_WK_ | MAC_WK_SRC_IPV6_TCPSYN_RCD_WK_ | + MAC_WK_SRC_IPV4_TCPSYN_RCD_WK_ | MAC_WK_SRC_EEE_TX_WK_ | + MAC_WK_SRC_EEE_RX_WK_ | MAC_WK_SRC_RFE_FR_WK_ | + MAC_WK_SRC_PFDA_FR_WK_ | MAC_WK_SRC_MP_FR_WK_ | + MAC_WK_SRC_BCAST_FR_WK_ | MAC_WK_SRC_WU_FR_WK_ | + MAC_WK_SRC_WK_FR_SAVED_; + lan743x_csr_write(adapter, MAC_WK_SRC, data); + /* open netdev when netdev is at running state while resume. * For instance, it is true when system wakesup after pm-suspend * However, it is false when system wakes up after suspend GUI menu @@ -3736,9 +3771,6 @@ static int lan743x_pm_resume(struct device *dev) lan743x_netdev_open(netdev); netif_device_attach(netdev); - ret = lan743x_csr_read(adapter, MAC_WK_SRC); - netif_info(adapter, drv, adapter->netdev, - "Wakeup source : 0x%08X\n", ret); return 0; } diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index 645bc048e52e..3b2585a384e2 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -61,6 +61,7 @@ #define PMT_CTL_RX_FCT_RFE_D3_CLK_OVR_ BIT(18) #define PMT_CTL_GPIO_WAKEUP_EN_ BIT(15) #define PMT_CTL_EEE_WAKEUP_EN_ BIT(13) +#define PMT_CTL_RES_CLR_WKP_MASK_ GENMASK(9, 8) #define PMT_CTL_READY_ BIT(7) #define PMT_CTL_ETH_PHY_RST_ BIT(4) #define PMT_CTL_WOL_EN_ BIT(3) @@ -227,12 +228,31 @@ #define MAC_WUCSR (0x140) #define MAC_MP_SO_EN_ BIT(21) #define MAC_WUCSR_RFE_WAKE_EN_ BIT(14) +#define MAC_WUCSR_EEE_TX_WAKE_ BIT(13) +#define MAC_WUCSR_EEE_RX_WAKE_ BIT(11) +#define MAC_WUCSR_RFE_WAKE_FR_ BIT(9) +#define MAC_WUCSR_PFDA_FR_ BIT(7) +#define MAC_WUCSR_WUFR_ BIT(6) +#define MAC_WUCSR_MPR_ BIT(5) +#define MAC_WUCSR_BCAST_FR_ BIT(4) #define MAC_WUCSR_PFDA_EN_ BIT(3) #define MAC_WUCSR_WAKE_EN_ BIT(2) #define MAC_WUCSR_MPEN_ BIT(1) #define MAC_WUCSR_BCST_EN_ BIT(0) #define MAC_WK_SRC (0x144) +#define MAC_WK_SRC_ETH_PHY_WK_ BIT(17) +#define MAC_WK_SRC_IPV6_TCPSYN_RCD_WK_ BIT(16) +#define MAC_WK_SRC_IPV4_TCPSYN_RCD_WK_ BIT(15) +#define MAC_WK_SRC_EEE_TX_WK_ BIT(14) +#define MAC_WK_SRC_EEE_RX_WK_ BIT(13) +#define MAC_WK_SRC_RFE_FR_WK_ BIT(12) +#define MAC_WK_SRC_PFDA_FR_WK_ BIT(11) +#define MAC_WK_SRC_MP_FR_WK_ BIT(10) +#define MAC_WK_SRC_BCAST_FR_WK_ BIT(9) +#define MAC_WK_SRC_WU_FR_WK_ BIT(8) +#define MAC_WK_SRC_WK_FR_SAVED_ BIT(7) + #define MAC_MP_SO_HI (0x148) #define MAC_MP_SO_LO (0x14C) @@ -295,6 +315,10 @@ #define RFE_INDX(index) (0x580 + (index << 2)) #define MAC_WUCSR2 (0x600) +#define MAC_WUCSR2_NS_RCD_ BIT(7) +#define MAC_WUCSR2_ARP_RCD_ BIT(6) +#define MAC_WUCSR2_IPV6_TCPSYN_RCD_ BIT(5) +#define MAC_WUCSR2_IPV4_TCPSYN_RCD_ BIT(4) #define SGMII_ACC (0x720) #define SGMII_ACC_SGMII_BZY_ BIT(31) @@ -1018,6 +1042,8 @@ enum lan743x_sgmii_lsd { LINK_2500_SLAVE }; +#define MAC_SUPPORTED_WAKES (WAKE_BCAST | WAKE_UCAST | WAKE_MCAST | \ + WAKE_MAGIC | WAKE_ARP) struct lan743x_adapter { struct net_device *netdev; struct mii_bus *mdiobus; @@ -1025,6 +1051,8 @@ struct lan743x_adapter { #ifdef CONFIG_PM u32 wolopts; u8 sopass[SOPASS_MAX]; + u32 phy_wolopts; + u32 phy_wol_supported; #endif struct pci_dev *pdev; struct lan743x_csr csr; diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index d087cf954f75..608ad31a9702 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -2798,6 +2798,8 @@ static int add_adev(struct gdma_dev *gd) if (ret) goto init_fail; + /* madev is owned by the auxiliary device */ + madev = NULL; ret = auxiliary_device_add(adev); if (ret) goto add_fail; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index f30eee4a5a80..b6c01a88098d 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -375,7 +375,9 @@ typedef void (*ionic_cq_done_cb)(void *done_arg); unsigned int ionic_cq_service(struct ionic_cq *cq, unsigned int work_to_do, ionic_cq_cb cb, ionic_cq_done_cb done_cb, void *done_arg); -unsigned int ionic_tx_cq_service(struct ionic_cq *cq, unsigned int work_to_do); +unsigned int ionic_tx_cq_service(struct ionic_cq *cq, + unsigned int work_to_do, + bool in_napi); int ionic_q_init(struct ionic_lif *lif, struct ionic_dev *idev, struct ionic_queue *q, unsigned int index, const char *name, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 1934e9d6d9e4..1837a30ba08a 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -1189,7 +1189,7 @@ static int ionic_adminq_napi(struct napi_struct *napi, int budget) ionic_rx_service, NULL, NULL); if (lif->hwstamp_txq) - tx_work = ionic_tx_cq_service(&lif->hwstamp_txq->cq, budget); + tx_work = ionic_tx_cq_service(&lif->hwstamp_txq->cq, budget, !!budget); work_done = max(max(n_work, a_work), max(rx_work, tx_work)); if (work_done < budget && napi_complete_done(napi, work_done)) { diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 2427610f4306..9fdd7cd3ef19 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -23,7 +23,8 @@ static void ionic_tx_desc_unmap_bufs(struct ionic_queue *q, static void ionic_tx_clean(struct ionic_queue *q, struct ionic_tx_desc_info *desc_info, - struct ionic_txq_comp *comp); + struct ionic_txq_comp *comp, + bool in_napi); static inline void ionic_txq_post(struct ionic_queue *q, bool ring_dbell) { @@ -480,6 +481,20 @@ int ionic_xdp_xmit(struct net_device *netdev, int n, return nxmit; } +static void ionic_xdp_rx_put_bufs(struct ionic_queue *q, + struct ionic_buf_info *buf_info, + int nbufs) +{ + int i; + + for (i = 0; i < nbufs; i++) { + dma_unmap_page(q->dev, buf_info->dma_addr, + IONIC_PAGE_SIZE, DMA_FROM_DEVICE); + buf_info->page = NULL; + buf_info++; + } +} + static bool ionic_run_xdp(struct ionic_rx_stats *stats, struct net_device *netdev, struct bpf_prog *xdp_prog, @@ -493,6 +508,7 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats, struct netdev_queue *nq; struct xdp_frame *xdpf; int remain_len; + int nbufs = 1; int frag_len; int err = 0; @@ -542,6 +558,7 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats, if (page_is_pfmemalloc(bi->page)) xdp_buff_set_frag_pfmemalloc(&xdp_buf); } while (remain_len > 0); + nbufs += sinfo->nr_frags; } xdp_action = bpf_prog_run_xdp(xdp_prog, &xdp_buf); @@ -574,9 +591,6 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats, goto out_xdp_abort; } - dma_unmap_page(rxq->dev, buf_info->dma_addr, - IONIC_PAGE_SIZE, DMA_FROM_DEVICE); - err = ionic_xdp_post_frame(txq, xdpf, XDP_TX, buf_info->page, buf_info->page_offset, @@ -586,23 +600,19 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats, netdev_dbg(netdev, "tx ionic_xdp_post_frame err %d\n", err); goto out_xdp_abort; } - buf_info->page = NULL; + ionic_xdp_rx_put_bufs(rxq, buf_info, nbufs); stats->xdp_tx++; /* the Tx completion will free the buffers */ break; case XDP_REDIRECT: - /* unmap the pages before handing them to a different device */ - dma_unmap_page(rxq->dev, buf_info->dma_addr, - IONIC_PAGE_SIZE, DMA_FROM_DEVICE); - err = xdp_do_redirect(netdev, &xdp_buf, xdp_prog); if (err) { netdev_dbg(netdev, "xdp_do_redirect err %d\n", err); goto out_xdp_abort; } - buf_info->page = NULL; + ionic_xdp_rx_put_bufs(rxq, buf_info, nbufs); rxq->xdp_flush = true; stats->xdp_redirect++; break; @@ -935,7 +945,7 @@ int ionic_tx_napi(struct napi_struct *napi, int budget) u32 work_done = 0; u32 flags = 0; - work_done = ionic_tx_cq_service(cq, budget); + work_done = ionic_tx_cq_service(cq, budget, !!budget); if (unlikely(!budget)) return budget; @@ -1019,7 +1029,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget) txqcq = lif->txqcqs[qi]; txcq = &lif->txqcqs[qi]->cq; - tx_work_done = ionic_tx_cq_service(txcq, IONIC_TX_BUDGET_DEFAULT); + tx_work_done = ionic_tx_cq_service(txcq, IONIC_TX_BUDGET_DEFAULT, !!budget); if (unlikely(!budget)) return budget; @@ -1152,7 +1162,8 @@ static void ionic_tx_desc_unmap_bufs(struct ionic_queue *q, static void ionic_tx_clean(struct ionic_queue *q, struct ionic_tx_desc_info *desc_info, - struct ionic_txq_comp *comp) + struct ionic_txq_comp *comp, + bool in_napi) { struct ionic_tx_stats *stats = q_to_tx_stats(q); struct ionic_qcq *qcq = q_to_qcq(q); @@ -1204,11 +1215,13 @@ static void ionic_tx_clean(struct ionic_queue *q, desc_info->bytes = skb->len; stats->clean++; - napi_consume_skb(skb, 1); + napi_consume_skb(skb, likely(in_napi) ? 1 : 0); } static bool ionic_tx_service(struct ionic_cq *cq, - unsigned int *total_pkts, unsigned int *total_bytes) + unsigned int *total_pkts, + unsigned int *total_bytes, + bool in_napi) { struct ionic_tx_desc_info *desc_info; struct ionic_queue *q = cq->bound_q; @@ -1230,7 +1243,7 @@ static bool ionic_tx_service(struct ionic_cq *cq, desc_info->bytes = 0; index = q->tail_idx; q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1); - ionic_tx_clean(q, desc_info, comp); + ionic_tx_clean(q, desc_info, comp, in_napi); if (desc_info->skb) { pkts++; bytes += desc_info->bytes; @@ -1244,7 +1257,9 @@ static bool ionic_tx_service(struct ionic_cq *cq, return true; } -unsigned int ionic_tx_cq_service(struct ionic_cq *cq, unsigned int work_to_do) +unsigned int ionic_tx_cq_service(struct ionic_cq *cq, + unsigned int work_to_do, + bool in_napi) { unsigned int work_done = 0; unsigned int bytes = 0; @@ -1253,7 +1268,7 @@ unsigned int ionic_tx_cq_service(struct ionic_cq *cq, unsigned int work_to_do) if (work_to_do == 0) return 0; - while (ionic_tx_service(cq, &pkts, &bytes)) { + while (ionic_tx_service(cq, &pkts, &bytes, in_napi)) { if (cq->tail_idx == cq->num_descs - 1) cq->done_color = !cq->done_color; cq->tail_idx = (cq->tail_idx + 1) & (cq->num_descs - 1); @@ -1279,7 +1294,7 @@ void ionic_tx_flush(struct ionic_cq *cq) { u32 work_done; - work_done = ionic_tx_cq_service(cq, cq->num_descs); + work_done = ionic_tx_cq_service(cq, cq->num_descs, false); if (work_done) ionic_intr_credits(cq->idev->intr_ctrl, cq->bound_intr->index, work_done, IONIC_INTR_CRED_RESET_COALESCE); @@ -1296,7 +1311,7 @@ void ionic_tx_empty(struct ionic_queue *q) desc_info = &q->tx_info[q->tail_idx]; desc_info->bytes = 0; q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1); - ionic_tx_clean(q, desc_info, NULL); + ionic_tx_clean(q, desc_info, NULL, false); if (desc_info->skb) { pkts++; bytes += desc_info->bytes; diff --git a/drivers/net/ethernet/qualcomm/qca_debug.c b/drivers/net/ethernet/qualcomm/qca_debug.c index ff3b89e9028e..ad06da0fdaa0 100644 --- a/drivers/net/ethernet/qualcomm/qca_debug.c +++ b/drivers/net/ethernet/qualcomm/qca_debug.c @@ -98,10 +98,8 @@ qcaspi_info_show(struct seq_file *s, void *what) seq_printf(s, "IRQ : %d\n", qca->spi_dev->irq); - seq_printf(s, "INTR REQ : %u\n", - qca->intr_req); - seq_printf(s, "INTR SVC : %u\n", - qca->intr_svc); + seq_printf(s, "INTR : %lx\n", + qca->intr); seq_printf(s, "SPI max speed : %lu\n", (unsigned long)qca->spi_dev->max_speed_hz); diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index 5799ecc88a87..8f7ce6b51a1c 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -35,6 +35,8 @@ #define MAX_DMA_BURST_LEN 5000 +#define SPI_INTR 0 + /* Modules parameters */ #define QCASPI_CLK_SPEED_MIN 1000000 #define QCASPI_CLK_SPEED_MAX 16000000 @@ -579,14 +581,14 @@ qcaspi_spi_thread(void *data) continue; } - if ((qca->intr_req == qca->intr_svc) && + if (!test_bit(SPI_INTR, &qca->intr) && !qca->txr.skb[qca->txr.head]) schedule(); set_current_state(TASK_RUNNING); - netdev_dbg(qca->net_dev, "have work to do. int: %d, tx_skb: %p\n", - qca->intr_req - qca->intr_svc, + netdev_dbg(qca->net_dev, "have work to do. int: %lu, tx_skb: %p\n", + qca->intr, qca->txr.skb[qca->txr.head]); qcaspi_qca7k_sync(qca, QCASPI_EVENT_UPDATE); @@ -600,8 +602,7 @@ qcaspi_spi_thread(void *data) msleep(QCASPI_QCA7K_REBOOT_TIME_MS); } - if (qca->intr_svc != qca->intr_req) { - qca->intr_svc = qca->intr_req; + if (test_and_clear_bit(SPI_INTR, &qca->intr)) { start_spi_intr_handling(qca, &intr_cause); if (intr_cause & SPI_INT_CPU_ON) { @@ -663,7 +664,7 @@ qcaspi_intr_handler(int irq, void *data) { struct qcaspi *qca = data; - qca->intr_req++; + set_bit(SPI_INTR, &qca->intr); if (qca->spi_thread) wake_up_process(qca->spi_thread); @@ -679,8 +680,7 @@ qcaspi_netdev_open(struct net_device *dev) if (!qca) return -EINVAL; - qca->intr_req = 1; - qca->intr_svc = 0; + set_bit(SPI_INTR, &qca->intr); qca->sync = QCASPI_SYNC_UNKNOWN; qcafrm_fsm_init_spi(&qca->frm_handle); diff --git a/drivers/net/ethernet/qualcomm/qca_spi.h b/drivers/net/ethernet/qualcomm/qca_spi.h index d59cb2352cee..8f4808695e82 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.h +++ b/drivers/net/ethernet/qualcomm/qca_spi.h @@ -81,8 +81,7 @@ struct qcaspi { struct qcafrm_handle frm_handle; struct sk_buff *rx_skb; - unsigned int intr_req; - unsigned int intr_svc; + unsigned long intr; u16 reset_count; #ifdef CONFIG_DEBUG_FS diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index dcab638c57fe..24c90d8f5a44 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -871,13 +871,13 @@ static void rswitch_tx_free(struct net_device *ndev) dma_rmb(); skb = gq->skbs[gq->dirty]; if (skb) { + rdev->ndev->stats.tx_packets++; + rdev->ndev->stats.tx_bytes += skb->len; dma_unmap_single(ndev->dev.parent, gq->unmap_addrs[gq->dirty], skb->len, DMA_TO_DEVICE); dev_kfree_skb_any(gq->skbs[gq->dirty]); gq->skbs[gq->dirty] = NULL; - rdev->ndev->stats.tx_packets++; - rdev->ndev->stats.tx_bytes += skb->len; } desc->desc.die_dt = DT_EEMPTY; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 65d7370b47d5..466c4002f00d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -272,7 +272,7 @@ static const struct ethqos_emac_por emac_v4_0_0_por[] = { static const struct ethqos_emac_driver_data emac_v4_0_0_data = { .por = emac_v4_0_0_por, - .num_por = ARRAY_SIZE(emac_v3_0_0_por), + .num_por = ARRAY_SIZE(emac_v4_0_0_por), .rgmii_config_loopback_en = false, .has_emac_ge_3 = true, .link_clk_name = "phyaux", diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index f05bd757dfe5..5ef52ef2698f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -218,6 +218,7 @@ static void timestamp_interrupt(struct stmmac_priv *priv) { u32 num_snapshot, ts_status, tsync_int; struct ptp_clock_event event; + u32 acr_value, channel; unsigned long flags; u64 ptp_time; int i; @@ -243,12 +244,15 @@ static void timestamp_interrupt(struct stmmac_priv *priv) num_snapshot = (ts_status & GMAC_TIMESTAMP_ATSNS_MASK) >> GMAC_TIMESTAMP_ATSNS_SHIFT; + acr_value = readl(priv->ptpaddr + PTP_ACR); + channel = ilog2(FIELD_GET(PTP_ACR_MASK, acr_value)); + for (i = 0; i < num_snapshot; i++) { read_lock_irqsave(&priv->ptp_lock, flags); get_ptptime(priv->ptpaddr, &ptp_time); read_unlock_irqrestore(&priv->ptp_lock, flags); event.type = PTP_CLOCK_EXTTS; - event.index = 0; + event.index = channel; event.timestamp = ptp_time; ptp_clock_event(priv->ptp_clock, &event); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index b3afc7cb7d72..c58782c41417 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -7662,9 +7662,10 @@ int stmmac_dvr_probe(struct device *device, #ifdef STMMAC_VLAN_TAG_USED /* Both mac100 and gmac support receive VLAN tag detection */ ndev->features |= NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX; - ndev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; - priv->hw->hw_vlan_en = true; - + if (priv->plat->has_gmac4) { + ndev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; + priv->hw->hw_vlan_en = true; + } if (priv->dma_cap.vlhash) { ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; ndev->features |= NETIF_F_HW_VLAN_STAG_FILTER; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 1562fbdd0a04..996f2bcd07a2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -358,24 +358,28 @@ static int tc_setup_cbs(struct stmmac_priv *priv, port_transmit_rate_kbps = qopt->idleslope - qopt->sendslope; - /* Port Transmit Rate and Speed Divider */ - switch (div_s64(port_transmit_rate_kbps, 1000)) { - case SPEED_10000: - case SPEED_5000: - ptr = 32; - break; - case SPEED_2500: - case SPEED_1000: - ptr = 8; - break; - case SPEED_100: - ptr = 4; - break; - default: - netdev_err(priv->dev, - "Invalid portTransmitRate %lld (idleSlope - sendSlope)\n", - port_transmit_rate_kbps); - return -EINVAL; + if (qopt->enable) { + /* Port Transmit Rate and Speed Divider */ + switch (div_s64(port_transmit_rate_kbps, 1000)) { + case SPEED_10000: + case SPEED_5000: + ptr = 32; + break; + case SPEED_2500: + case SPEED_1000: + ptr = 8; + break; + case SPEED_100: + ptr = 4; + break; + default: + netdev_err(priv->dev, + "Invalid portTransmitRate %lld (idleSlope - sendSlope)\n", + port_transmit_rate_kbps); + return -EINVAL; + } + } else { + ptr = 0; } mode_to_use = priv->plat->tx_queues_cfg[queue].mode_to_use; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index 7c4b6881a93f..d1b682ce9c6d 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -1959,6 +1959,7 @@ int wx_sw_init(struct wx *wx) } bitmap_zero(wx->state, WX_STATE_NBITS); + wx->misc_irq_domain = false; return 0; } diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 68bde91b67a0..81bedc8ee8d4 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -1686,6 +1686,7 @@ static int wx_set_interrupt_capability(struct wx *wx) } pdev->irq = pci_irq_vector(pdev, 0); + wx->num_q_vectors = 1; return 0; } @@ -1996,7 +1997,8 @@ void wx_free_irq(struct wx *wx) int vector; if (!(pdev->msix_enabled)) { - free_irq(pdev->irq, wx); + if (!wx->misc_irq_domain) + free_irq(pdev->irq, wx); return; } @@ -2011,7 +2013,7 @@ void wx_free_irq(struct wx *wx) free_irq(entry->vector, q_vector); } - if (wx->mac.type == wx_mac_em) + if (!wx->misc_irq_domain) free_irq(wx->msix_entry->vector, wx); } EXPORT_SYMBOL(wx_free_irq); @@ -2026,6 +2028,9 @@ int wx_setup_isb_resources(struct wx *wx) { struct pci_dev *pdev = wx->pdev; + if (wx->isb_mem) + return 0; + wx->isb_mem = dma_alloc_coherent(&pdev->dev, sizeof(u32) * 4, &wx->isb_dma, @@ -2385,7 +2390,6 @@ static void wx_free_all_tx_resources(struct wx *wx) void wx_free_resources(struct wx *wx) { - wx_free_isb_resources(wx); wx_free_all_rx_resources(wx); wx_free_all_tx_resources(wx); } diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 5aaf7b1fa2db..0df7f5712b6f 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -1058,6 +1058,7 @@ struct wx { dma_addr_t isb_dma; u32 *isb_mem; u32 isb_tag[WX_ISB_MAX]; + bool misc_irq_domain; #define WX_MAX_RETA_ENTRIES 128 #define WX_RSS_INDIR_TBL_MAX 64 diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index e894e01d030d..af30ca0312b8 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -387,6 +387,7 @@ err_dis_phy: err_free_irq: wx_free_irq(wx); err_free_resources: + wx_free_isb_resources(wx); wx_free_resources(wx); return err; } @@ -408,6 +409,7 @@ static int ngbe_close(struct net_device *netdev) ngbe_down(wx); wx_free_irq(wx); + wx_free_isb_resources(wx); wx_free_resources(wx); phylink_disconnect_phy(wx->phylink); wx_control_hw(wx, false); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c index b3e3605d1edb..a4cf682dca65 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c @@ -27,57 +27,19 @@ void txgbe_irq_enable(struct wx *wx, bool queues) } /** - * txgbe_intr - msi/legacy mode Interrupt Handler - * @irq: interrupt number - * @data: pointer to a network interface device structure - **/ -static irqreturn_t txgbe_intr(int __always_unused irq, void *data) -{ - struct wx_q_vector *q_vector; - struct wx *wx = data; - struct pci_dev *pdev; - u32 eicr; - - q_vector = wx->q_vector[0]; - pdev = wx->pdev; - - eicr = wx_misc_isb(wx, WX_ISB_VEC0); - if (!eicr) { - /* shared interrupt alert! - * the interrupt that we masked before the ICR read. - */ - if (netif_running(wx->netdev)) - txgbe_irq_enable(wx, true); - return IRQ_NONE; /* Not our interrupt */ - } - wx->isb_mem[WX_ISB_VEC0] = 0; - if (!(pdev->msi_enabled)) - wr32(wx, WX_PX_INTA, 1); - - wx->isb_mem[WX_ISB_MISC] = 0; - /* would disable interrupts here but it is auto disabled */ - napi_schedule_irqoff(&q_vector->napi); - - /* re-enable link(maybe) and non-queue interrupts, no flush. - * txgbe_poll will re-enable the queue interrupts - */ - if (netif_running(wx->netdev)) - txgbe_irq_enable(wx, false); - - return IRQ_HANDLED; -} - -/** - * txgbe_request_msix_irqs - Initialize MSI-X interrupts + * txgbe_request_queue_irqs - Initialize MSI-X queue interrupts * @wx: board private structure * - * Allocate MSI-X vectors and request interrupts from the kernel. + * Allocate MSI-X queue vectors and request interrupts from the kernel. **/ -static int txgbe_request_msix_irqs(struct wx *wx) +int txgbe_request_queue_irqs(struct wx *wx) { struct net_device *netdev = wx->netdev; int vector, err; + if (!wx->pdev->msix_enabled) + return 0; + for (vector = 0; vector < wx->num_q_vectors; vector++) { struct wx_q_vector *q_vector = wx->q_vector[vector]; struct msix_entry *entry = &wx->msix_q_entries[vector]; @@ -110,34 +72,6 @@ free_queue_irqs: return err; } -/** - * txgbe_request_irq - initialize interrupts - * @wx: board private structure - * - * Attempt to configure interrupts using the best available - * capabilities of the hardware and kernel. - **/ -int txgbe_request_irq(struct wx *wx) -{ - struct net_device *netdev = wx->netdev; - struct pci_dev *pdev = wx->pdev; - int err; - - if (pdev->msix_enabled) - err = txgbe_request_msix_irqs(wx); - else if (pdev->msi_enabled) - err = request_irq(wx->pdev->irq, &txgbe_intr, 0, - netdev->name, wx); - else - err = request_irq(wx->pdev->irq, &txgbe_intr, IRQF_SHARED, - netdev->name, wx); - - if (err) - wx_err(wx, "request_irq failed, Error %d\n", err); - - return err; -} - static int txgbe_request_gpio_irq(struct txgbe *txgbe) { txgbe->gpio_irq = irq_find_mapping(txgbe->misc.domain, TXGBE_IRQ_GPIO); @@ -178,6 +112,36 @@ static const struct irq_domain_ops txgbe_misc_irq_domain_ops = { static irqreturn_t txgbe_misc_irq_handle(int irq, void *data) { + struct wx_q_vector *q_vector; + struct txgbe *txgbe = data; + struct wx *wx = txgbe->wx; + u32 eicr; + + if (wx->pdev->msix_enabled) + return IRQ_WAKE_THREAD; + + eicr = wx_misc_isb(wx, WX_ISB_VEC0); + if (!eicr) { + /* shared interrupt alert! + * the interrupt that we masked before the ICR read. + */ + if (netif_running(wx->netdev)) + txgbe_irq_enable(wx, true); + return IRQ_NONE; /* Not our interrupt */ + } + wx->isb_mem[WX_ISB_VEC0] = 0; + if (!(wx->pdev->msi_enabled)) + wr32(wx, WX_PX_INTA, 1); + + /* would disable interrupts here but it is auto disabled */ + q_vector = wx->q_vector[0]; + napi_schedule_irqoff(&q_vector->napi); + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t txgbe_misc_irq_thread_fn(int irq, void *data) +{ struct txgbe *txgbe = data; struct wx *wx = txgbe->wx; unsigned int nhandled = 0; @@ -223,6 +187,7 @@ void txgbe_free_misc_irq(struct txgbe *txgbe) int txgbe_setup_misc_irq(struct txgbe *txgbe) { + unsigned long flags = IRQF_ONESHOT; struct wx *wx = txgbe->wx; int hwirq, err; @@ -236,14 +201,17 @@ int txgbe_setup_misc_irq(struct txgbe *txgbe) irq_create_mapping(txgbe->misc.domain, hwirq); txgbe->misc.chip = txgbe_irq_chip; - if (wx->pdev->msix_enabled) + if (wx->pdev->msix_enabled) { txgbe->misc.irq = wx->msix_entry->vector; - else + } else { txgbe->misc.irq = wx->pdev->irq; + if (!wx->pdev->msi_enabled) + flags |= IRQF_SHARED; + } - err = request_threaded_irq(txgbe->misc.irq, NULL, - txgbe_misc_irq_handle, - IRQF_ONESHOT, + err = request_threaded_irq(txgbe->misc.irq, txgbe_misc_irq_handle, + txgbe_misc_irq_thread_fn, + flags, wx->netdev->name, txgbe); if (err) goto del_misc_irq; @@ -256,6 +224,8 @@ int txgbe_setup_misc_irq(struct txgbe *txgbe) if (err) goto free_gpio_irq; + wx->misc_irq_domain = true; + return 0; free_gpio_irq: diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.h index b77945e7a0f2..e6285b94625e 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.h @@ -2,6 +2,6 @@ /* Copyright (c) 2015 - 2024 Beijing WangXun Technology Co., Ltd. */ void txgbe_irq_enable(struct wx *wx, bool queues); -int txgbe_request_irq(struct wx *wx); +int txgbe_request_queue_irqs(struct wx *wx); void txgbe_free_misc_irq(struct txgbe *txgbe); int txgbe_setup_misc_irq(struct txgbe *txgbe); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index 8c7a74981b90..ca74d9422065 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -294,9 +294,9 @@ static int txgbe_open(struct net_device *netdev) wx_configure(wx); - err = txgbe_request_irq(wx); + err = txgbe_request_queue_irqs(wx); if (err) - goto err_free_isb; + goto err_free_resources; /* Notify the stack of the actual queue counts. */ err = netif_set_real_num_tx_queues(netdev, wx->num_tx_queues); @@ -313,8 +313,8 @@ static int txgbe_open(struct net_device *netdev) err_free_irq: wx_free_irq(wx); -err_free_isb: - wx_free_isb_resources(wx); +err_free_resources: + wx_free_resources(wx); err_reset: txgbe_reset(wx); @@ -729,6 +729,7 @@ static void txgbe_remove(struct pci_dev *pdev) txgbe_remove_phy(txgbe); txgbe_free_misc_irq(txgbe); + wx_free_isb_resources(wx); pci_release_selected_regions(pdev, pci_select_bars(pdev, IORESOURCE_MEM)); diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index ffe7f463e16e..ef6df0e37bea 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -119,7 +119,7 @@ static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp, void *qp_data, skb->protocol = eth_type_trans(skb, ndev); skb->ip_summed = CHECKSUM_NONE; - if (__netif_rx(skb) == NET_RX_DROP) { + if (netif_rx(skb) == NET_RX_DROP) { ndev->stats.rx_errors++; ndev->stats.rx_dropped++; } else { diff --git a/drivers/net/phy/aquantia/aquantia.h b/drivers/net/phy/aquantia/aquantia.h index 1c19ae74ad2b..4830b25e6c7d 100644 --- a/drivers/net/phy/aquantia/aquantia.h +++ b/drivers/net/phy/aquantia/aquantia.h @@ -6,6 +6,9 @@ * Author: Heiner Kallweit <hkallweit1@gmail.com> */ +#ifndef AQUANTIA_H +#define AQUANTIA_H + #include <linux/device.h> #include <linux/phy.h> @@ -120,3 +123,5 @@ static inline int aqr_hwmon_probe(struct phy_device *phydev) { return 0; } #endif int aqr_firmware_load(struct phy_device *phydev); + +#endif /* AQUANTIA_H */ diff --git a/drivers/net/phy/dp83tg720.c b/drivers/net/phy/dp83tg720.c index 326c9770a6dc..c706429b225a 100644 --- a/drivers/net/phy/dp83tg720.c +++ b/drivers/net/phy/dp83tg720.c @@ -17,6 +17,11 @@ #define DP83TG720S_PHY_RESET 0x1f #define DP83TG720S_HW_RESET BIT(15) +#define DP83TG720S_LPS_CFG3 0x18c +/* Power modes are documented as bit fields but used as values */ +/* Power Mode 0 is Normal mode */ +#define DP83TG720S_LPS_CFG3_PWR_MODE_0 BIT(0) + #define DP83TG720S_RGMII_DELAY_CTRL 0x602 /* In RGMII mode, Enable or disable the internal delay for RXD */ #define DP83TG720S_RGMII_RX_CLK_SEL BIT(1) @@ -31,11 +36,20 @@ static int dp83tg720_config_aneg(struct phy_device *phydev) { + int ret; + /* Autoneg is not supported and this PHY supports only one speed. * We need to care only about master/slave configuration if it was * changed by user. */ - return genphy_c45_pma_baset1_setup_master_slave(phydev); + ret = genphy_c45_pma_baset1_setup_master_slave(phydev); + if (ret) + return ret; + + /* Re-read role configuration to make changes visible even if + * the link is in administrative down state. + */ + return genphy_c45_pma_baset1_read_master_slave(phydev); } static int dp83tg720_read_status(struct phy_device *phydev) @@ -64,6 +78,8 @@ static int dp83tg720_read_status(struct phy_device *phydev) return ret; /* After HW reset we need to restore master/slave configuration. + * genphy_c45_pma_baset1_read_master_slave() call will be done + * by the dp83tg720_config_aneg() function. */ ret = dp83tg720_config_aneg(phydev); if (ret) @@ -154,10 +170,24 @@ static int dp83tg720_config_init(struct phy_device *phydev) */ usleep_range(1000, 2000); - if (phy_interface_is_rgmii(phydev)) - return dp83tg720_config_rgmii_delay(phydev); + if (phy_interface_is_rgmii(phydev)) { + ret = dp83tg720_config_rgmii_delay(phydev); + if (ret) + return ret; + } + + /* In case the PHY is bootstrapped in managed mode, we need to + * wake it. + */ + ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, DP83TG720S_LPS_CFG3, + DP83TG720S_LPS_CFG3_PWR_MODE_0); + if (ret) + return ret; - return 0; + /* Make role configuration visible for ethtool on init and after + * rest. + */ + return genphy_c45_pma_baset1_read_master_slave(phydev); } static struct phy_driver dp83tg720_driver[] = { diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 5aada7cf3da7..ebafedde0ab7 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -5607,6 +5607,7 @@ static struct mdio_device_id __maybe_unused micrel_tbl[] = { { PHY_ID_KSZ8081, MICREL_PHY_ID_MASK }, { PHY_ID_KSZ8873MLL, MICREL_PHY_ID_MASK }, { PHY_ID_KSZ886X, MICREL_PHY_ID_MASK }, + { PHY_ID_KSZ9477, MICREL_PHY_ID_MASK }, { PHY_ID_LAN8814, MICREL_PHY_ID_MASK }, { PHY_ID_LAN8804, MICREL_PHY_ID_MASK }, { PHY_ID_LAN8841, MICREL_PHY_ID_MASK }, diff --git a/drivers/net/phy/microchip_t1.c b/drivers/net/phy/microchip_t1.c index a838b61cd844..a35528497a57 100644 --- a/drivers/net/phy/microchip_t1.c +++ b/drivers/net/phy/microchip_t1.c @@ -748,7 +748,7 @@ static int lan87xx_cable_test_report(struct phy_device *phydev) ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_A, lan87xx_cable_test_report_trans(detect)); - return 0; + return phy_init_hw(phydev); } static int lan87xx_cable_test_get_status(struct phy_device *phydev, diff --git a/drivers/net/phy/mxl-gpy.c b/drivers/net/phy/mxl-gpy.c index b2d36a3a96f1..e5f8ac4b4604 100644 --- a/drivers/net/phy/mxl-gpy.c +++ b/drivers/net/phy/mxl-gpy.c @@ -107,6 +107,7 @@ struct gpy_priv { u8 fw_major; u8 fw_minor; + u32 wolopts; /* It takes 3 seconds to fully switch out of loopback mode before * it can safely re-enter loopback mode. Record the time when @@ -221,6 +222,15 @@ static int gpy_hwmon_register(struct phy_device *phydev) } #endif +static int gpy_ack_interrupt(struct phy_device *phydev) +{ + int ret; + + /* Clear all pending interrupts */ + ret = phy_read(phydev, PHY_ISTAT); + return ret < 0 ? ret : 0; +} + static int gpy_mbox_read(struct phy_device *phydev, u32 addr) { struct gpy_priv *priv = phydev->priv; @@ -262,16 +272,8 @@ out: static int gpy_config_init(struct phy_device *phydev) { - int ret; - - /* Mask all interrupts */ - ret = phy_write(phydev, PHY_IMASK, 0); - if (ret) - return ret; - - /* Clear all pending interrupts */ - ret = phy_read(phydev, PHY_ISTAT); - return ret < 0 ? ret : 0; + /* Nothing to configure. Configuration Requirement Placeholder */ + return 0; } static int gpy21x_config_init(struct phy_device *phydev) @@ -627,11 +629,23 @@ static int gpy_read_status(struct phy_device *phydev) static int gpy_config_intr(struct phy_device *phydev) { + struct gpy_priv *priv = phydev->priv; u16 mask = 0; + int ret; + + ret = gpy_ack_interrupt(phydev); + if (ret) + return ret; if (phydev->interrupts == PHY_INTERRUPT_ENABLED) mask = PHY_IMASK_MASK; + if (priv->wolopts & WAKE_MAGIC) + mask |= PHY_IMASK_WOL; + + if (priv->wolopts & WAKE_PHY) + mask |= PHY_IMASK_LSTC; + return phy_write(phydev, PHY_IMASK, mask); } @@ -678,6 +692,7 @@ static int gpy_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol) { struct net_device *attach_dev = phydev->attached_dev; + struct gpy_priv *priv = phydev->priv; int ret; if (wol->wolopts & WAKE_MAGIC) { @@ -725,6 +740,8 @@ static int gpy_set_wol(struct phy_device *phydev, ret = phy_read(phydev, PHY_ISTAT); if (ret < 0) return ret; + + priv->wolopts |= WAKE_MAGIC; } else { /* Disable magic packet matching */ ret = phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, @@ -732,6 +749,13 @@ static int gpy_set_wol(struct phy_device *phydev, WOL_EN); if (ret < 0) return ret; + + /* Disable the WOL interrupt */ + ret = phy_clear_bits(phydev, PHY_IMASK, PHY_IMASK_WOL); + if (ret < 0) + return ret; + + priv->wolopts &= ~WAKE_MAGIC; } if (wol->wolopts & WAKE_PHY) { @@ -748,9 +772,11 @@ static int gpy_set_wol(struct phy_device *phydev, if (ret & (PHY_IMASK_MASK & ~PHY_IMASK_LSTC)) phy_trigger_machine(phydev); + priv->wolopts |= WAKE_PHY; return 0; } + priv->wolopts &= ~WAKE_PHY; /* Disable the link state change interrupt */ return phy_clear_bits(phydev, PHY_IMASK, PHY_IMASK_LSTC); } @@ -758,18 +784,10 @@ static int gpy_set_wol(struct phy_device *phydev, static void gpy_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol) { - int ret; + struct gpy_priv *priv = phydev->priv; wol->supported = WAKE_MAGIC | WAKE_PHY; - wol->wolopts = 0; - - ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, VPSPEC2_WOL_CTL); - if (ret & WOL_EN) - wol->wolopts |= WAKE_MAGIC; - - ret = phy_read(phydev, PHY_IMASK); - if (ret & PHY_IMASK_LSTC) - wol->wolopts |= WAKE_PHY; + wol->wolopts = priv->wolopts; } static int gpy_loopback(struct phy_device *phydev, bool enable) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 0a65b6d690fe..eb9acfcaeb09 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -70,6 +70,7 @@ #define MPHDRLEN_SSN 4 /* ditto with short sequence numbers */ #define PPP_PROTO_LEN 2 +#define PPP_LCP_HDRLEN 4 /* * An instance of /dev/ppp can be associated with either a ppp @@ -493,6 +494,15 @@ static ssize_t ppp_read(struct file *file, char __user *buf, return ret; } +static bool ppp_check_packet(struct sk_buff *skb, size_t count) +{ + /* LCP packets must include LCP header which 4 bytes long: + * 1-byte code, 1-byte identifier, and 2-byte length. + */ + return get_unaligned_be16(skb->data) != PPP_LCP || + count >= PPP_PROTO_LEN + PPP_LCP_HDRLEN; +} + static ssize_t ppp_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { @@ -515,6 +525,11 @@ static ssize_t ppp_write(struct file *file, const char __user *buf, kfree_skb(skb); goto out; } + ret = -EINVAL; + if (unlikely(!ppp_check_packet(skb, count))) { + kfree_skb(skb); + goto out; + } switch (pf->kind) { case INTERFACE: diff --git a/drivers/net/pse-pd/Kconfig b/drivers/net/pse-pd/Kconfig index 577ea904b3d9..7fab916a7f46 100644 --- a/drivers/net/pse-pd/Kconfig +++ b/drivers/net/pse-pd/Kconfig @@ -23,6 +23,7 @@ config PSE_REGULATOR config PSE_PD692X0 tristate "PD692X0 PSE controller" depends on I2C + select FW_LOADER select FW_UPLOAD help This module provides support for PD692x0 regulator based Ethernet diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 51c295e1e823..b034ef8a73ea 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -174,7 +174,6 @@ struct ax88179_data { u32 wol_supported; u32 wolopts; u8 disconnecting; - u8 initialized; }; struct ax88179_int_data { @@ -327,7 +326,8 @@ static void ax88179_status(struct usbnet *dev, struct urb *urb) if (netif_carrier_ok(dev->net) != link) { usbnet_link_change(dev, link, 1); - netdev_info(dev->net, "ax88179 - Link status is: %d\n", link); + if (!link) + netdev_info(dev->net, "ax88179 - Link status is: 0\n"); } } @@ -1543,6 +1543,7 @@ static int ax88179_link_reset(struct usbnet *dev) GMII_PHY_PHYSR, 2, &tmp16); if (!(tmp16 & GMII_PHY_PHYSR_LINK)) { + netdev_info(dev->net, "ax88179 - Link status is: 0\n"); return 0; } else if (GMII_PHY_PHYSR_GIGA == (tmp16 & GMII_PHY_PHYSR_SMASK)) { mode |= AX_MEDIUM_GIGAMODE | AX_MEDIUM_EN_125MHZ; @@ -1580,6 +1581,8 @@ static int ax88179_link_reset(struct usbnet *dev) netif_carrier_on(dev->net); + netdev_info(dev->net, "ax88179 - Link status is: 1\n"); + return 0; } @@ -1678,12 +1681,21 @@ static int ax88179_reset(struct usbnet *dev) static int ax88179_net_reset(struct usbnet *dev) { - struct ax88179_data *ax179_data = dev->driver_priv; + u16 tmp16; - if (ax179_data->initialized) + ax88179_read_cmd(dev, AX_ACCESS_PHY, AX88179_PHY_ID, GMII_PHY_PHYSR, + 2, &tmp16); + if (tmp16) { + ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE, + 2, 2, &tmp16); + if (!(tmp16 & AX_MEDIUM_RECEIVE_EN)) { + tmp16 |= AX_MEDIUM_RECEIVE_EN; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE, + 2, 2, &tmp16); + } + } else { ax88179_reset(dev); - else - ax179_data->initialized = 1; + } return 0; } diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 663e46348ce3..386d62769ded 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1372,6 +1372,8 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1bc7, 0x1260, 2)}, /* Telit LE910Cx */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1261, 2)}, /* Telit LE910Cx */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1900, 1)}, /* Telit LN940 series */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x3000, 0)}, /* Telit FN912 series */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x3001, 0)}, /* Telit FN912 series */ {QMI_FIXED_INTF(0x1c9e, 0x9801, 3)}, /* Telewell TW-3G HSPA+ */ {QMI_FIXED_INTF(0x1c9e, 0x9803, 4)}, /* Telewell TW-3G HSPA+ */ {QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)}, /* XS Stick W100-2 from 4G Systems */ diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index 97afd7335d86..01a3b2417a54 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -778,7 +778,8 @@ static int rtl8150_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *ecmd) { rtl8150_t *dev = netdev_priv(netdev); - short lpa, bmcr; + short lpa = 0; + short bmcr = 0; u32 supported; supported = (SUPPORTED_10baseT_Half | diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 61a57d134544..ea10db9a09fa 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1360,6 +1360,10 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev, if (unlikely(hdr->hdr.gso_type)) goto err_xdp; + /* Partially checksummed packets must be dropped. */ + if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) + goto err_xdp; + buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); @@ -1677,6 +1681,10 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, if (unlikely(hdr->hdr.gso_type)) return NULL; + /* Partially checksummed packets must be dropped. */ + if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) + return NULL; + /* Now XDP core assumes frag size is PAGE_SIZE, but buffers * with headroom may add hole in truesize, which * make their length exceed PAGE_SIZE. So we disabled the @@ -1943,6 +1951,7 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, struct net_device *dev = vi->dev; struct sk_buff *skb; struct virtio_net_common_hdr *hdr; + u8 flags; if (unlikely(len < vi->hdr_len + ETH_HLEN)) { pr_debug("%s: short packet %i\n", dev->name, len); @@ -1951,6 +1960,15 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, return; } + /* 1. Save the flags early, as the XDP program might overwrite them. + * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID + * stay valid after XDP processing. + * 2. XDP doesn't work with partially checksummed packets (refer to + * virtnet_xdp_set()), so packets marked as + * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing. + */ + flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; + if (vi->mergeable_rx_bufs) skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); @@ -1966,7 +1984,7 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) virtio_skb_set_hash(&hdr->hash_v1_hdr, skb); - if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID) + if (flags & VIRTIO_NET_HDR_F_DATA_VALID) skb->ip_summed = CHECKSUM_UNNECESSARY; if (virtio_net_hdr_to_skb(skb, &hdr->hdr, @@ -5666,8 +5684,16 @@ static int virtnet_probe(struct virtio_device *vdev) dev->features |= dev->hw_features & NETIF_F_ALL_TSO; /* (!csum && gso) case will be fixed by register_netdev() */ } - if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM)) - dev->features |= NETIF_F_RXCSUM; + + /* 1. With VIRTIO_NET_F_GUEST_CSUM negotiation, the driver doesn't + * need to calculate checksums for partially checksummed packets, + * as they're considered valid by the upper layer. + * 2. Without VIRTIO_NET_F_GUEST_CSUM negotiation, the driver only + * receives fully checksummed packets. The device may assist in + * validating these packets' checksums, so the driver won't have to. + */ + dev->features |= NETIF_F_RXCSUM; + if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) dev->features |= NETIF_F_GRO_HW; diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 567cb3faab70..ba59e92ab941 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2339,7 +2339,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct ip_tunnel_key *pkey; struct ip_tunnel_key key; struct vxlan_dev *vxlan = netdev_priv(dev); - const struct iphdr *old_iph = ip_hdr(skb); + const struct iphdr *old_iph; struct vxlan_metadata _md; struct vxlan_metadata *md = &_md; unsigned int pkt_len = skb->len; @@ -2353,8 +2353,15 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, bool use_cache; bool udp_sum = false; bool xnet = !net_eq(vxlan->net, dev_net(vxlan->dev)); + bool no_eth_encap; __be32 vni = 0; + no_eth_encap = flags & VXLAN_F_GPE && skb->protocol != htons(ETH_P_TEB); + if (!skb_vlan_inet_prepare(skb, no_eth_encap)) + goto drop; + + old_iph = ip_hdr(skb); + info = skb_tunnel_info(skb); use_cache = ip_tunnel_dst_cache_usable(skb, info); diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c index 0ba714ca5185..4b8528206cc8 100644 --- a/drivers/net/wireguard/allowedips.c +++ b/drivers/net/wireguard/allowedips.c @@ -15,8 +15,8 @@ static void swap_endian(u8 *dst, const u8 *src, u8 bits) if (bits == 32) { *(u32 *)dst = be32_to_cpu(*(const __be32 *)src); } else if (bits == 128) { - ((u64 *)dst)[0] = be64_to_cpu(((const __be64 *)src)[0]); - ((u64 *)dst)[1] = be64_to_cpu(((const __be64 *)src)[1]); + ((u64 *)dst)[0] = get_unaligned_be64(src); + ((u64 *)dst)[1] = get_unaligned_be64(src + 8); } } diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h index 1ea4f874e367..7eb76724b3ed 100644 --- a/drivers/net/wireguard/queueing.h +++ b/drivers/net/wireguard/queueing.h @@ -124,10 +124,10 @@ static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id) */ static inline int wg_cpumask_next_online(int *last_cpu) { - int cpu = cpumask_next(*last_cpu, cpu_online_mask); + int cpu = cpumask_next(READ_ONCE(*last_cpu), cpu_online_mask); if (cpu >= nr_cpu_ids) cpu = cpumask_first(cpu_online_mask); - *last_cpu = cpu; + WRITE_ONCE(*last_cpu, cpu); return cpu; } diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c index 0d48e0f4a1ba..26e09c30d596 100644 --- a/drivers/net/wireguard/send.c +++ b/drivers/net/wireguard/send.c @@ -222,7 +222,7 @@ void wg_packet_send_keepalive(struct wg_peer *peer) { struct sk_buff *skb; - if (skb_queue_empty(&peer->staged_packet_queue)) { + if (skb_queue_empty_lockless(&peer->staged_packet_queue)) { skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH, GFP_ATOMIC); if (unlikely(!skb)) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index de9f0b446545..dac6155ae1bd 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -654,7 +654,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) hw->wiphy->features |= NL80211_FEATURE_WFA_TPC_IE_IN_PROBES; if (iwl_fw_lookup_cmd_ver(mvm->fw, WOWLAN_KEK_KCK_MATERIAL, - IWL_FW_CMD_VER_UNKNOWN) == 3) + IWL_FW_CMD_VER_UNKNOWN) >= 3) hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK; if (fw_has_api(&mvm->fw->ucode_capa, @@ -1656,7 +1656,8 @@ static void iwl_mvm_prevent_esr_done_wk(struct wiphy *wiphy, struct iwl_mvm_vif *mvmvif = container_of(wk, struct iwl_mvm_vif, prevent_esr_done_wk.work); struct iwl_mvm *mvm = mvmvif->mvm; - struct ieee80211_vif *vif = iwl_mvm_get_bss_vif(mvm); + struct ieee80211_vif *vif = + container_of((void *)mvmvif, struct ieee80211_vif, drv_priv); mutex_lock(&mvm->mutex); iwl_mvm_unblock_esr(mvm, vif, IWL_MVM_ESR_BLOCKED_PREVENTION); @@ -1682,7 +1683,8 @@ static void iwl_mvm_unblock_esr_tpt(struct wiphy *wiphy, struct wiphy_work *wk) struct iwl_mvm_vif *mvmvif = container_of(wk, struct iwl_mvm_vif, unblock_esr_tpt_wk); struct iwl_mvm *mvm = mvmvif->mvm; - struct ieee80211_vif *vif = iwl_mvm_get_bss_vif(mvm); + struct ieee80211_vif *vif = + container_of((void *)mvmvif, struct ieee80211_vif, drv_priv); mutex_lock(&mvm->mutex); iwl_mvm_unblock_esr(mvm, vif, IWL_MVM_ESR_BLOCKED_TPT); @@ -4795,7 +4797,7 @@ static int iwl_mvm_roc_station(struct iwl_mvm *mvm, if (fw_ver == IWL_FW_CMD_VER_UNKNOWN) { ret = iwl_mvm_send_aux_roc_cmd(mvm, channel, vif, duration); - } else if (fw_ver == 3) { + } else if (fw_ver >= 3) { ret = iwl_mvm_roc_add_cmd(mvm, channel, vif, duration, ROC_ACTIVITY_HOTSPOT); } else { @@ -6410,11 +6412,9 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm, if (sync) { lockdep_assert_held(&mvm->mutex); ret = wait_event_timeout(mvm->rx_sync_waitq, - READ_ONCE(mvm->queue_sync_state) == 0 || - iwl_mvm_is_radio_hw_killed(mvm), + READ_ONCE(mvm->queue_sync_state) == 0, SYNC_RX_QUEUE_TIMEOUT); - WARN_ONCE(!ret && !iwl_mvm_is_radio_hw_killed(mvm), - "queue sync: failed to sync, state is 0x%lx, cookie %d\n", + WARN_ONCE(!ret, "queue sync: failed to sync, state is 0x%lx, cookie %d\n", mvm->queue_sync_state, mvm->queue_sync_cookie); } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 53283d052e18..d343432474db 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -153,7 +153,7 @@ static void iwl_mvm_rx_esr_mode_notif(struct iwl_mvm *mvm, struct ieee80211_vif *vif = iwl_mvm_get_bss_vif(mvm); /* FW recommendations is only for entering EMLSR */ - if (!vif || iwl_mvm_vif_from_mac80211(vif)->esr_active) + if (IS_ERR_OR_NULL(vif) || iwl_mvm_vif_from_mac80211(vif)->esr_active) return; if (le32_to_cpu(notif->action) == ESR_RECOMMEND_ENTER) @@ -1912,12 +1912,10 @@ static bool iwl_mvm_set_hw_rfkill_state(struct iwl_op_mode *op_mode, bool state) bool rfkill_safe_init_done = READ_ONCE(mvm->rfkill_safe_init_done); bool unified = iwl_mvm_has_unified_ucode(mvm); - if (state) { + if (state) set_bit(IWL_MVM_STATUS_HW_RFKILL, &mvm->status); - wake_up(&mvm->rx_sync_waitq); - } else { + else clear_bit(IWL_MVM_STATUS_HW_RFKILL, &mvm->status); - } iwl_mvm_set_rfkill_state(mvm); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c index 4fa8066a89b6..6e933907f985 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c @@ -557,12 +557,10 @@ struct iwl_mvm_stat_data_all_macs { }; static void iwl_mvm_update_link_sig(struct ieee80211_vif *vif, int sig, - struct iwl_mvm_vif_link_info *link_info) + struct iwl_mvm_vif_link_info *link_info, + struct ieee80211_bss_conf *bss_conf) { struct iwl_mvm *mvm = iwl_mvm_vif_from_mac80211(vif)->mvm; - struct ieee80211_bss_conf *bss_conf = - iwl_mvm_rcu_fw_link_id_to_link_conf(mvm, link_info->fw_link_id, - false); int thold = bss_conf->cqm_rssi_thold; int hyst = bss_conf->cqm_rssi_hyst; int last_event; @@ -670,7 +668,7 @@ static void iwl_mvm_stat_iterator(void *_data, u8 *mac, mvmvif->deflink.beacon_stats.num_beacons; /* This is used in pre-MLO API so use deflink */ - iwl_mvm_update_link_sig(vif, sig, &mvmvif->deflink); + iwl_mvm_update_link_sig(vif, sig, &mvmvif->deflink, &vif->bss_conf); } static void iwl_mvm_stat_iterator_all_macs(void *_data, u8 *mac, @@ -705,7 +703,7 @@ static void iwl_mvm_stat_iterator_all_macs(void *_data, u8 *mac, sig = -le32_to_cpu(mac_stats->beacon_filter_average_energy); /* This is used in pre-MLO API so use deflink */ - iwl_mvm_update_link_sig(vif, sig, &mvmvif->deflink); + iwl_mvm_update_link_sig(vif, sig, &mvmvif->deflink, &vif->bss_conf); } static inline void @@ -921,7 +919,8 @@ iwl_mvm_stat_iterator_all_links(struct iwl_mvm *mvm, mvmvif->link[link_id]->beacon_stats.num_beacons; sig = -le32_to_cpu(link_stats->beacon_filter_average_energy); - iwl_mvm_update_link_sig(bss_conf->vif, sig, link_info); + iwl_mvm_update_link_sig(bss_conf->vif, sig, link_info, + bss_conf); if (WARN_ONCE(mvmvif->id >= MAC_INDEX_AUX, "invalid mvmvif id: %d", mvmvif->id)) @@ -967,7 +966,7 @@ static void iwl_mvm_update_esr_mode_tpt(struct iwl_mvm *mvm) lockdep_assert_held(&mvm->mutex); - if (!bss_vif) + if (IS_ERR_OR_NULL(bss_vif)) return; mvmvif = iwl_mvm_vif_from_mac80211(bss_vif); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index b5f664ae5a17..e975f5ff17b5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -1830,7 +1830,7 @@ iwl_mvm_umac_scan_cfg_channels_v7_6g(struct iwl_mvm *mvm, */ if (!iwl_mvm_is_scan_fragmented(params->type)) { if (!cfg80211_channel_is_psc(params->channels[i]) || - flags & IWL_UHB_CHAN_CFG_FLAG_PSC_CHAN_NO_LISTEN) { + psc_no_listen) { if (unsolicited_probe_on_chan) { max_s_ssids = 2; max_bssids = 6; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c index 8ee4498f4245..31bc80cdcb7d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c @@ -1238,6 +1238,7 @@ void iwl_mvm_stop_roc(struct iwl_mvm *mvm, struct ieee80211_vif *vif) if (te_data->id >= SESSION_PROTECT_CONF_MAX_ID) { IWL_DEBUG_TE(mvm, "No remain on channel event\n"); + mutex_unlock(&mvm->mutex); return; } @@ -1253,6 +1254,7 @@ void iwl_mvm_stop_roc(struct iwl_mvm *mvm, struct ieee80211_vif *vif) te_data = iwl_mvm_get_roc_te(mvm); if (!te_data) { IWL_WARN(mvm, "No remain on channel event\n"); + mutex_unlock(&mvm->mutex); return; } diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c index f1085ccb7eed..7719e4f3e2a2 100644 --- a/drivers/net/wireless/microchip/wilc1000/hif.c +++ b/drivers/net/wireless/microchip/wilc1000/hif.c @@ -382,7 +382,8 @@ wilc_parse_join_bss_param(struct cfg80211_bss *bss, struct ieee80211_p2p_noa_attr noa_attr; const struct cfg80211_bss_ies *ies; struct wilc_join_bss_param *param; - u8 rates_len = 0, ies_len; + u8 rates_len = 0; + int ies_len; int ret; param = kzalloc(sizeof(*param), GFP_KERNEL); diff --git a/drivers/net/wireless/ti/wlcore/cmd.c b/drivers/net/wireless/ti/wlcore/cmd.c index a939fd89a7f5..92fc2d456c2c 100644 --- a/drivers/net/wireless/ti/wlcore/cmd.c +++ b/drivers/net/wireless/ti/wlcore/cmd.c @@ -1566,13 +1566,6 @@ int wl12xx_cmd_add_peer(struct wl1271 *wl, struct wl12xx_vif *wlvif, cpu_to_le32(wl1271_tx_enabled_rates_get(wl, sta_rates, wlvif->band)); - if (!cmd->supported_rates) { - wl1271_debug(DEBUG_CMD, - "peer has no supported rates yet, configuring basic rates: 0x%x", - wlvif->basic_rate_set); - cmd->supported_rates = cpu_to_le32(wlvif->basic_rate_set); - } - wl1271_debug(DEBUG_CMD, "new peer rates=0x%x queues=0x%x", cmd->supported_rates, sta->uapsd_queues); diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index ef12169f8044..492cd7aef44f 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -5139,19 +5139,23 @@ static int wl12xx_update_sta_state(struct wl1271 *wl, /* Add station (AP mode) */ if (is_ap && - old_state == IEEE80211_STA_NOTEXIST && - new_state == IEEE80211_STA_NONE) { + old_state == IEEE80211_STA_AUTH && + new_state == IEEE80211_STA_ASSOC) { ret = wl12xx_sta_add(wl, wlvif, sta); if (ret) return ret; + wl_sta->fw_added = true; + wlcore_update_inconn_sta(wl, wlvif, wl_sta, true); } /* Remove station (AP mode) */ if (is_ap && - old_state == IEEE80211_STA_NONE && - new_state == IEEE80211_STA_NOTEXIST) { + old_state == IEEE80211_STA_ASSOC && + new_state == IEEE80211_STA_AUTH) { + wl_sta->fw_added = false; + /* must not fail */ wl12xx_sta_remove(wl, wlvif, sta); @@ -5165,11 +5169,6 @@ static int wl12xx_update_sta_state(struct wl1271 *wl, if (ret < 0) return ret; - /* reconfigure rates */ - ret = wl12xx_cmd_add_peer(wl, wlvif, sta, wl_sta->hlid); - if (ret < 0) - return ret; - ret = wl1271_acx_set_ht_capabilities(wl, &sta->deflink.ht_cap, true, wl_sta->hlid); diff --git a/drivers/net/wireless/ti/wlcore/tx.c b/drivers/net/wireless/ti/wlcore/tx.c index 7bd3ce2f0804..464587d16ab2 100644 --- a/drivers/net/wireless/ti/wlcore/tx.c +++ b/drivers/net/wireless/ti/wlcore/tx.c @@ -140,11 +140,8 @@ EXPORT_SYMBOL(wl12xx_is_dummy_packet); static u8 wl12xx_tx_get_hlid_ap(struct wl1271 *wl, struct wl12xx_vif *wlvif, struct sk_buff *skb, struct ieee80211_sta *sta) { - if (sta) { - struct wl1271_station *wl_sta; - - wl_sta = (struct wl1271_station *)sta->drv_priv; - return wl_sta->hlid; + if (sta && wl1271_station(sta)->fw_added) { + return wl1271_station(sta)->hlid; } else { struct ieee80211_hdr *hdr; diff --git a/drivers/net/wireless/ti/wlcore/wlcore_i.h b/drivers/net/wireless/ti/wlcore/wlcore_i.h index eefae3f867b9..817a8a61cac6 100644 --- a/drivers/net/wireless/ti/wlcore/wlcore_i.h +++ b/drivers/net/wireless/ti/wlcore/wlcore_i.h @@ -324,6 +324,7 @@ struct wl12xx_rx_filter { struct wl1271_station { u8 hlid; + bool fw_added; bool in_connection; /* @@ -335,6 +336,11 @@ struct wl1271_station { u64 total_freed_pkts; }; +static inline struct wl1271_station *wl1271_station(struct ieee80211_sta *sta) +{ + return (struct wl1271_station *)sta->drv_priv; +} + struct wl12xx_vif { struct wl1271 *wl; struct list_head list; diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index dd6ec0865141..0cfa39361d3b 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -1602,4 +1602,5 @@ static struct platform_driver apple_nvme_driver = { module_platform_driver(apple_nvme_driver); MODULE_AUTHOR("Sven Peter <sven@svenpeter.dev>"); +MODULE_DESCRIPTION("Apple ANS NVM Express device driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index f3a41133ac3f..68b400f9c42d 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -502,7 +502,7 @@ static inline bool nvme_ns_head_multipath(struct nvme_ns_head *head) enum nvme_ns_features { NVME_NS_EXT_LBAS = 1 << 0, /* support extended LBA format */ NVME_NS_METADATA_SUPPORTED = 1 << 1, /* support getting generated md */ - NVME_NS_DEAC, /* DEAC bit in Write Zeores supported */ + NVME_NS_DEAC = 1 << 2, /* DEAC bit in Write Zeores supported */ }; struct nvme_ns { diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index bd87dfd173a4..685e89b35d33 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -410,7 +410,29 @@ static ssize_t nvmet_addr_tsas_show(struct config_item *item, return sprintf(page, "%s\n", nvmet_addr_tsas_rdma[i].name); } } - return sprintf(page, "reserved\n"); + return sprintf(page, "\n"); +} + +static u8 nvmet_addr_tsas_rdma_store(const char *page) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(nvmet_addr_tsas_rdma); i++) { + if (sysfs_streq(page, nvmet_addr_tsas_rdma[i].name)) + return nvmet_addr_tsas_rdma[i].type; + } + return NVMF_RDMA_QPTYPE_INVALID; +} + +static u8 nvmet_addr_tsas_tcp_store(const char *page) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(nvmet_addr_tsas_tcp); i++) { + if (sysfs_streq(page, nvmet_addr_tsas_tcp[i].name)) + return nvmet_addr_tsas_tcp[i].type; + } + return NVMF_TCP_SECTYPE_INVALID; } static ssize_t nvmet_addr_tsas_store(struct config_item *item, @@ -418,20 +440,19 @@ static ssize_t nvmet_addr_tsas_store(struct config_item *item, { struct nvmet_port *port = to_nvmet_port(item); u8 treq = nvmet_port_disc_addr_treq_mask(port); - u8 sectype; - int i; + u8 sectype, qptype; if (nvmet_is_port_enabled(port, __func__)) return -EACCES; - if (port->disc_addr.trtype != NVMF_TRTYPE_TCP) - return -EINVAL; - - for (i = 0; i < ARRAY_SIZE(nvmet_addr_tsas_tcp); i++) { - if (sysfs_streq(page, nvmet_addr_tsas_tcp[i].name)) { - sectype = nvmet_addr_tsas_tcp[i].type; + if (port->disc_addr.trtype == NVMF_TRTYPE_RDMA) { + qptype = nvmet_addr_tsas_rdma_store(page); + if (qptype == port->disc_addr.tsas.rdma.qptype) + return count; + } else if (port->disc_addr.trtype == NVMF_TRTYPE_TCP) { + sectype = nvmet_addr_tsas_tcp_store(page); + if (sectype != NVMF_TCP_SECTYPE_INVALID) goto found; - } } pr_err("Invalid value '%s' for tsas\n", page); diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 337ee1cb09ae..381b4394731f 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -148,7 +148,7 @@ struct nvmet_fc_tgt_queue { struct workqueue_struct *work_q; struct kref ref; /* array of fcp_iods */ - struct nvmet_fc_fcp_iod fod[] __counted_by(sqsize); + struct nvmet_fc_fcp_iod fod[] /* __counted_by(sqsize) */; } __aligned(sizeof(unsigned long long)); struct nvmet_fc_hostport { diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index e1ec3b7200d7..f8dd7eb40fbe 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -396,10 +396,9 @@ static int nvmem_sysfs_setup_compat(struct nvmem_device *nvmem, if (!config->base_dev) return -EINVAL; - if (config->type == NVMEM_TYPE_FRAM) - bin_attr_nvmem_eeprom_compat.attr.name = "fram"; - nvmem->eeprom = bin_attr_nvmem_eeprom_compat; + if (config->type == NVMEM_TYPE_FRAM) + nvmem->eeprom.attr.name = "fram"; nvmem->eeprom.attr.mode = nvmem_bin_attr_get_umode(nvmem); nvmem->eeprom.size = nvmem->size; #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -463,7 +462,7 @@ static int nvmem_populate_sysfs_cells(struct nvmem_device *nvmem) "%s@%x,%x", entry->name, entry->offset, entry->bit_offset); - attrs[i].attr.mode = 0444; + attrs[i].attr.mode = 0444 & nvmem_bin_attr_get_umode(nvmem); attrs[i].size = entry->bytes; attrs[i].read = &nvmem_cell_attr_read; attrs[i].private = entry; diff --git a/drivers/nvmem/meson-efuse.c b/drivers/nvmem/meson-efuse.c index 33678d0af2c2..6c2f80e166e2 100644 --- a/drivers/nvmem/meson-efuse.c +++ b/drivers/nvmem/meson-efuse.c @@ -18,18 +18,24 @@ static int meson_efuse_read(void *context, unsigned int offset, void *val, size_t bytes) { struct meson_sm_firmware *fw = context; + int ret; - return meson_sm_call_read(fw, (u8 *)val, bytes, SM_EFUSE_READ, offset, - bytes, 0, 0, 0); + ret = meson_sm_call_read(fw, (u8 *)val, bytes, SM_EFUSE_READ, offset, + bytes, 0, 0, 0); + + return ret < 0 ? ret : 0; } static int meson_efuse_write(void *context, unsigned int offset, void *val, size_t bytes) { struct meson_sm_firmware *fw = context; + int ret; + + ret = meson_sm_call_write(fw, (u8 *)val, bytes, SM_EFUSE_WRITE, offset, + bytes, 0, 0, 0); - return meson_sm_call_write(fw, (u8 *)val, bytes, SM_EFUSE_WRITE, offset, - bytes, 0, 0, 0); + return ret < 0 ? ret : 0; } static const struct of_device_id meson_efuse_match[] = { diff --git a/drivers/nvmem/rmem.c b/drivers/nvmem/rmem.c index 752d0bf4445e..7f907c5a445e 100644 --- a/drivers/nvmem/rmem.c +++ b/drivers/nvmem/rmem.c @@ -46,7 +46,10 @@ static int rmem_read(void *context, unsigned int offset, memunmap(addr); - return count; + if (count < 0) + return count; + + return count == bytes ? 0 : -EIO; } static int rmem_probe(struct platform_device *pdev) diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 462375b293e4..c94203ce65bb 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -81,7 +81,8 @@ EXPORT_SYMBOL_GPL(of_irq_find_parent); /* * These interrupt controllers abuse interrupt-map for unspeakable * reasons and rely on the core code to *ignore* it (the drivers do - * their own parsing of the property). + * their own parsing of the property). The PAsemi entry covers a + * non-sensical interrupt-map that is better left ignored. * * If you think of adding to the list for something *new*, think * again. There is a high chance that you will be sent back to the @@ -95,6 +96,7 @@ static const char * const of_irq_imap_abusers[] = { "fsl,ls1043a-extirq", "fsl,ls1088a-extirq", "renesas,rza1-irqc", + "pasemi,rootbus", NULL, }; @@ -293,20 +295,8 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq) imaplen -= imap - oldimap; pr_debug(" -> imaplen=%d\n", imaplen); } - if (!match) { - if (intc) { - /* - * The PASEMI Nemo is a known offender, so - * let's only warn for anyone else. - */ - WARN(!IS_ENABLED(CONFIG_PPC_PASEMI), - "%pOF interrupt-map failed, using interrupt-controller\n", - ipar); - return 0; - } - + if (!match) goto fail; - } /* * Successfully parsed an interrupt-map translation; copy new diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index c5625dd9bf49..3a45879d85db 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -352,7 +352,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec, struct irq_affinity *affd) { struct irq_affinity_desc *masks = NULL; - struct msi_desc *entry; + struct msi_desc *entry, desc; int ret; /* Reject multi-MSI early on irq domain enabled architectures */ @@ -377,6 +377,12 @@ static int msi_capability_init(struct pci_dev *dev, int nvec, /* All MSIs are unmasked by default; mask them all */ entry = msi_first_desc(&dev->dev, MSI_DESC_ALL); pci_msi_mask(entry, msi_multi_mask(entry)); + /* + * Copy the MSI descriptor for the error path because + * pci_msi_setup_msi_irqs() will free it for the hierarchical + * interrupt domain case. + */ + memcpy(&desc, entry, sizeof(desc)); /* Configure MSI capability structure */ ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI); @@ -396,7 +402,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec, goto unlock; err: - pci_msi_unmask(entry, msi_multi_mask(entry)); + pci_msi_unmask(&desc, msi_multi_mask(&desc)); pci_free_msi_irqs(dev); fail: dev->msi_enabled = 0; diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c index 78c490e0505a..0a02e85a8951 100644 --- a/drivers/perf/riscv_pmu.c +++ b/drivers/perf/riscv_pmu.c @@ -167,7 +167,7 @@ u64 riscv_pmu_event_update(struct perf_event *event) unsigned long cmask; u64 oldval, delta; - if (!rvpmu->ctr_read) + if (!rvpmu->ctr_read || (hwc->state & PERF_HES_UPTODATE)) return 0; cmask = riscv_pmu_ctr_get_width_mask(event); diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index a2e4005e1fd0..4e842dcedfba 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -20,6 +20,7 @@ #include <linux/cpu_pm.h> #include <linux/sched/clock.h> #include <linux/soc/andes/irq.h> +#include <linux/workqueue.h> #include <asm/errata_list.h> #include <asm/sbi.h> @@ -114,7 +115,7 @@ struct sbi_pmu_event_data { }; }; -static const struct sbi_pmu_event_data pmu_hw_event_map[] = { +static struct sbi_pmu_event_data pmu_hw_event_map[] = { [PERF_COUNT_HW_CPU_CYCLES] = {.hw_gen_event = { SBI_PMU_HW_CPU_CYCLES, SBI_PMU_EVENT_TYPE_HW, 0}}, @@ -148,7 +149,7 @@ static const struct sbi_pmu_event_data pmu_hw_event_map[] = { }; #define C(x) PERF_COUNT_HW_CACHE_##x -static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX] +static struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { [C(L1D)] = { @@ -293,6 +294,34 @@ static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_M }, }; +static void pmu_sbi_check_event(struct sbi_pmu_event_data *edata) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, + 0, cmask, 0, edata->event_idx, 0, 0); + if (!ret.error) { + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, + ret.value, 0x1, SBI_PMU_STOP_FLAG_RESET, 0, 0, 0); + } else if (ret.error == SBI_ERR_NOT_SUPPORTED) { + /* This event cannot be monitored by any counter */ + edata->event_idx = -EINVAL; + } +} + +static void pmu_sbi_check_std_events(struct work_struct *work) +{ + for (int i = 0; i < ARRAY_SIZE(pmu_hw_event_map); i++) + pmu_sbi_check_event(&pmu_hw_event_map[i]); + + for (int i = 0; i < ARRAY_SIZE(pmu_cache_event_map); i++) + for (int j = 0; j < ARRAY_SIZE(pmu_cache_event_map[i]); j++) + for (int k = 0; k < ARRAY_SIZE(pmu_cache_event_map[i][j]); k++) + pmu_sbi_check_event(&pmu_cache_event_map[i][j][k]); +} + +static DECLARE_WORK(check_std_events_work, pmu_sbi_check_std_events); + static int pmu_sbi_ctr_get_width(int idx) { return pmu_ctr_list[idx].width; @@ -478,6 +507,12 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig) u64 raw_config_val; int ret; + /* + * Ensure we are finished checking standard hardware events for + * validity before allowing userspace to configure any events. + */ + flush_work(&check_std_events_work); + switch (type) { case PERF_TYPE_HARDWARE: if (config >= PERF_COUNT_HW_MAX) @@ -762,7 +797,7 @@ static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu) * which may include counters that are not enabled yet. */ sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, - 0, pmu->cmask, 0, 0, 0, 0); + 0, pmu->cmask, SBI_PMU_STOP_FLAG_RESET, 0, 0, 0); } static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu) @@ -1359,6 +1394,9 @@ static int pmu_sbi_device_probe(struct platform_device *pdev) if (ret) goto out_unregister; + /* Asynchronously check which standard events are available */ + schedule_work(&check_std_events_work); + return 0; out_unregister: diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 7f999e8a433d..7b00945f7191 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -187,6 +187,31 @@ static const unsigned int qmp_v6_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_TX_TRANSCEIVER_BIAS_EN] = QSERDES_V6_TX_TRANSCEIVER_BIAS_EN, }; +static const unsigned int qmp_v6_n4_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { + [QPHY_SW_RESET] = QPHY_V6_N4_PCS_SW_RESET, + [QPHY_START_CTRL] = QPHY_V6_N4_PCS_START_CONTROL, + [QPHY_PCS_STATUS] = QPHY_V6_N4_PCS_PCS_STATUS1, + [QPHY_PCS_POWER_DOWN_CONTROL] = QPHY_V6_N4_PCS_POWER_DOWN_CONTROL, + + /* In PCS_USB */ + [QPHY_PCS_AUTONOMOUS_MODE_CTRL] = QPHY_V6_PCS_USB3_AUTONOMOUS_MODE_CTRL, + [QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR] = QPHY_V6_PCS_USB3_LFPS_RXTERM_IRQ_CLEAR, + + [QPHY_COM_RESETSM_CNTRL] = QSERDES_V6_COM_RESETSM_CNTRL, + [QPHY_COM_C_READY_STATUS] = QSERDES_V6_COM_C_READY_STATUS, + [QPHY_COM_CMN_STATUS] = QSERDES_V6_COM_CMN_STATUS, + [QPHY_COM_BIAS_EN_CLKBUFLR_EN] = QSERDES_V6_COM_PLL_BIAS_EN_CLK_BUFLR_EN, + + [QPHY_DP_PHY_STATUS] = QSERDES_V6_DP_PHY_STATUS, + [QPHY_DP_PHY_VCO_DIV] = QSERDES_V6_DP_PHY_VCO_DIV, + + [QPHY_TX_TX_POL_INV] = QSERDES_V6_N4_TX_TX_POL_INV, + [QPHY_TX_TX_DRV_LVL] = QSERDES_V6_N4_TX_TX_DRV_LVL, + [QPHY_TX_TX_EMP_POST1_LVL] = QSERDES_V6_N4_TX_TX_EMP_POST1_LVL, + [QPHY_TX_HIGHZ_DRVR_EN] = QSERDES_V6_N4_TX_HIGHZ_DRVR_EN, + [QPHY_TX_TRANSCEIVER_BIAS_EN] = QSERDES_V6_N4_TX_TRANSCEIVER_BIAS_EN, +}; + static const struct qmp_phy_init_tbl qmp_v3_usb3_serdes_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V3_COM_PLL_IVCO, 0x07), QMP_PHY_INIT_CFG(QSERDES_V3_COM_SYSCLK_EN_SEL, 0x14), @@ -997,6 +1022,31 @@ static const struct qmp_phy_init_tbl qmp_v6_dp_serdes_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V6_COM_CORE_CLK_EN, 0x0f), }; +static const struct qmp_phy_init_tbl qmp_v6_n4_dp_serdes_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SVS_MODE_CLK_SEL, 0x15), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SYSCLK_EN_SEL, 0x3b), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SYS_CLK_CTRL, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CLK_ENABLE1, 0x0c), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SYSCLK_BUF_ENABLE, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CLK_SELECT, 0x30), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_IVCO, 0x07), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CCTRL_MODE0, 0x36), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_RCTRL_MODE0, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_MODE0, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x34), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START1_MODE0, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START2_MODE0, 0xc0), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CMN_CONFIG_1, 0x12), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_INTEGLOOP_GAIN0_MODE0, 0x3f), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_INTEGLOOP_GAIN1_MODE0, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE_MAP, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BG_TIMER, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CORE_CLK_DIV_MODE0, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE_CTRL, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_BIAS_EN_CLK_BUFLR_EN, 0x17), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CORE_CLK_EN, 0x0f), +}; + static const struct qmp_phy_init_tbl qmp_v6_dp_tx_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V6_TX_VMODE_CTRL1, 0x40), QMP_PHY_INIT_CFG(QSERDES_V6_TX_PRE_STALL_LDO_BOOST_EN, 0x30), @@ -1011,6 +1061,19 @@ static const struct qmp_phy_init_tbl qmp_v6_dp_tx_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V6_TX_TX_BAND, 0x4), }; +static const struct qmp_phy_init_tbl qmp_v6_n4_dp_tx_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_VMODE_CTRL1, 0x40), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_PRE_STALL_LDO_BOOST_EN, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_INTERFACE_SELECT, 0xff), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_CLKBUF_ENABLE, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_RESET_TSYNC_EN, 0x03), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_TRAN_DRVR_EMP_EN, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_PARRATE_REC_DETECT_IDLE_EN, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_RES_CODE_LANE_OFFSET_TX, 0x11), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_RES_CODE_LANE_OFFSET_RX, 0x11), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_TX_BAND, 0x1), +}; + static const struct qmp_phy_init_tbl qmp_v6_dp_serdes_tbl_rbr[] = { QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_SEL_1, 0x05), QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x34), @@ -1059,6 +1122,74 @@ static const struct qmp_phy_init_tbl qmp_v6_dp_serdes_tbl_hbr3[] = { QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x0c), }; +static const struct qmp_phy_init_tbl qmp_v6_n4_dp_serdes_tbl_rbr[] = { + QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_SEL_1, 0x05), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x34), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP_EN, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START3_MODE0, 0x0b), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE0, 0x37), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE0, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0x71), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x0c), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_EN_CENTER, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_ADJ_PER1, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER1, 0x6b), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER2, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE1_MODE0, 0x92), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE2_MODE0, 0x01), +}; + +static const struct qmp_phy_init_tbl qmp_v6_n4_dp_serdes_tbl_hbr[] = { + QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_SEL_1, 0x03), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x34), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP_EN, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START3_MODE0, 0x0b), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE0, 0x07), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE0, 0x07), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0x71), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x0c), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_EN_CENTER, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_ADJ_PER1, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER1, 0x6b), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER2, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE1_MODE0, 0x92), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE2_MODE0, 0x01), +}; + +static const struct qmp_phy_init_tbl qmp_v6_n4_dp_serdes_tbl_hbr2[] = { + QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_SEL_1, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x46), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP_EN, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START3_MODE0, 0x05), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE0, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE0, 0x0e), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0x97), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x10), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_EN_CENTER, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_ADJ_PER1, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER1, 0x6b), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER2, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE1_MODE0, 0x18), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE2_MODE0, 0x02), +}; + +static const struct qmp_phy_init_tbl qmp_v6_n4_dp_serdes_tbl_hbr3[] = { + QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_SEL_1, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x34), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP_EN, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START3_MODE0, 0x0b), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE0, 0x17), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE0, 0x15), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0x71), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x0c), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_EN_CENTER, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_ADJ_PER1, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER1, 0x6b), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER2, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE1_MODE0, 0x92), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE2_MODE0, 0x01), +}; + static const struct qmp_phy_init_tbl sc8280xp_usb43dp_serdes_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_EN_CENTER, 0x01), QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_PER1, 0x31), @@ -1273,20 +1404,20 @@ static const struct qmp_phy_init_tbl x1e80100_usb43dp_rx_tbl[] = { }; static const struct qmp_phy_init_tbl x1e80100_usb43dp_pcs_tbl[] = { - QMP_PHY_INIT_CFG(QPHY_V6_PCS_RCVR_DTCT_DLY_P1U2_L, 0xe7), - QMP_PHY_INIT_CFG(QPHY_V6_PCS_RCVR_DTCT_DLY_P1U2_H, 0x03), - QMP_PHY_INIT_CFG(QPHY_V6_PCS_LOCK_DETECT_CONFIG1, 0xc4), - QMP_PHY_INIT_CFG(QPHY_V6_PCS_LOCK_DETECT_CONFIG2, 0x89), - QMP_PHY_INIT_CFG(QPHY_V6_PCS_LOCK_DETECT_CONFIG3, 0x20), - QMP_PHY_INIT_CFG(QPHY_V6_PCS_LOCK_DETECT_CONFIG6, 0x13), - QMP_PHY_INIT_CFG(QPHY_V6_PCS_REFGEN_REQ_CONFIG1, 0x21), - QMP_PHY_INIT_CFG(QPHY_V6_PCS_RX_SIGDET_LVL, 0x55), - QMP_PHY_INIT_CFG(QPHY_V6_PCS_CDR_RESET_TIME, 0x0a), - QMP_PHY_INIT_CFG(QPHY_V6_PCS_ALIGN_DETECT_CONFIG1, 0xd4), - QMP_PHY_INIT_CFG(QPHY_V6_PCS_ALIGN_DETECT_CONFIG2, 0x30), - QMP_PHY_INIT_CFG(QPHY_V6_PCS_PCS_TX_RX_CONFIG, 0x0c), - QMP_PHY_INIT_CFG(QPHY_V6_PCS_EQ_CONFIG1, 0x4b), - QMP_PHY_INIT_CFG(QPHY_V6_PCS_EQ_CONFIG5, 0x10), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_RCVR_DTCT_DLY_P1U2_L, 0xe7), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_RCVR_DTCT_DLY_P1U2_H, 0x03), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_LOCK_DETECT_CONFIG1, 0xc4), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_LOCK_DETECT_CONFIG2, 0x89), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_LOCK_DETECT_CONFIG3, 0x20), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_LOCK_DETECT_CONFIG6, 0x13), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_REFGEN_REQ_CONFIG1, 0x21), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_RX_SIGDET_LVL, 0x55), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_RX_CONFIG, 0x0a), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_ALIGN_DETECT_CONFIG1, 0xd4), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_ALIGN_DETECT_CONFIG2, 0x30), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_PCS_TX_RX_CONFIG, 0x0c), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_EQ_CONFIG1, 0x4b), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_EQ_CONFIG5, 0x10), }; static const struct qmp_phy_init_tbl x1e80100_usb43dp_pcs_usb_tbl[] = { @@ -1794,22 +1925,22 @@ static const struct qmp_phy_cfg x1e80100_usb3dpphy_cfg = { .pcs_usb_tbl = x1e80100_usb43dp_pcs_usb_tbl, .pcs_usb_tbl_num = ARRAY_SIZE(x1e80100_usb43dp_pcs_usb_tbl), - .dp_serdes_tbl = qmp_v6_dp_serdes_tbl, - .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl), - .dp_tx_tbl = qmp_v6_dp_tx_tbl, - .dp_tx_tbl_num = ARRAY_SIZE(qmp_v6_dp_tx_tbl), + .dp_serdes_tbl = qmp_v6_n4_dp_serdes_tbl, + .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v6_n4_dp_serdes_tbl), + .dp_tx_tbl = qmp_v6_n4_dp_tx_tbl, + .dp_tx_tbl_num = ARRAY_SIZE(qmp_v6_n4_dp_tx_tbl), - .serdes_tbl_rbr = qmp_v6_dp_serdes_tbl_rbr, - .serdes_tbl_rbr_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl_rbr), - .serdes_tbl_hbr = qmp_v6_dp_serdes_tbl_hbr, - .serdes_tbl_hbr_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl_hbr), - .serdes_tbl_hbr2 = qmp_v6_dp_serdes_tbl_hbr2, - .serdes_tbl_hbr2_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl_hbr2), - .serdes_tbl_hbr3 = qmp_v6_dp_serdes_tbl_hbr3, - .serdes_tbl_hbr3_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl_hbr3), + .serdes_tbl_rbr = qmp_v6_n4_dp_serdes_tbl_rbr, + .serdes_tbl_rbr_num = ARRAY_SIZE(qmp_v6_n4_dp_serdes_tbl_rbr), + .serdes_tbl_hbr = qmp_v6_n4_dp_serdes_tbl_hbr, + .serdes_tbl_hbr_num = ARRAY_SIZE(qmp_v6_n4_dp_serdes_tbl_hbr), + .serdes_tbl_hbr2 = qmp_v6_n4_dp_serdes_tbl_hbr2, + .serdes_tbl_hbr2_num = ARRAY_SIZE(qmp_v6_n4_dp_serdes_tbl_hbr2), + .serdes_tbl_hbr3 = qmp_v6_n4_dp_serdes_tbl_hbr3, + .serdes_tbl_hbr3_num = ARRAY_SIZE(qmp_v6_n4_dp_serdes_tbl_hbr3), - .swing_hbr_rbr = &qmp_dp_v5_voltage_swing_hbr_rbr, - .pre_emphasis_hbr_rbr = &qmp_dp_v5_pre_emphasis_hbr_rbr, + .swing_hbr_rbr = &qmp_dp_v6_voltage_swing_hbr_rbr, + .pre_emphasis_hbr_rbr = &qmp_dp_v6_pre_emphasis_hbr_rbr, .swing_hbr3_hbr2 = &qmp_dp_v5_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v5_pre_emphasis_hbr3_hbr2, @@ -1822,7 +1953,7 @@ static const struct qmp_phy_cfg x1e80100_usb3dpphy_cfg = { .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), - .regs = qmp_v45_usb3phy_regs_layout, + .regs = qmp_v6_n4_usb3phy_regs_layout, }; static const struct qmp_phy_cfg sm6350_usb3dpphy_cfg = { diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v6-n4.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v6-n4.h new file mode 100644 index 000000000000..b3024714dab4 --- /dev/null +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v6-n4.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023, Linaro Limited + */ + +#ifndef QCOM_PHY_QMP_PCS_V6_N4_H_ +#define QCOM_PHY_QMP_PCS_V6_N4_H_ + +/* Only for QMP V6 N4 PHY - USB/PCIe PCS registers */ +#define QPHY_V6_N4_PCS_SW_RESET 0x000 +#define QPHY_V6_N4_PCS_PCS_STATUS1 0x014 +#define QPHY_V6_N4_PCS_POWER_DOWN_CONTROL 0x040 +#define QPHY_V6_N4_PCS_START_CONTROL 0x044 +#define QPHY_V6_N4_PCS_POWER_STATE_CONFIG1 0x090 +#define QPHY_V6_N4_PCS_LOCK_DETECT_CONFIG1 0x0c4 +#define QPHY_V6_N4_PCS_LOCK_DETECT_CONFIG2 0x0c8 +#define QPHY_V6_N4_PCS_LOCK_DETECT_CONFIG3 0x0cc +#define QPHY_V6_N4_PCS_LOCK_DETECT_CONFIG6 0x0d8 +#define QPHY_V6_N4_PCS_REFGEN_REQ_CONFIG1 0x0dc +#define QPHY_V6_N4_PCS_RX_SIGDET_LVL 0x188 +#define QPHY_V6_N4_PCS_RCVR_DTCT_DLY_P1U2_L 0x190 +#define QPHY_V6_N4_PCS_RCVR_DTCT_DLY_P1U2_H 0x194 +#define QPHY_V6_N4_PCS_RATE_SLEW_CNTRL1 0x198 +#define QPHY_V6_N4_PCS_RX_CONFIG 0x1b0 +#define QPHY_V6_N4_PCS_ALIGN_DETECT_CONFIG1 0x1c0 +#define QPHY_V6_N4_PCS_ALIGN_DETECT_CONFIG2 0x1c4 +#define QPHY_V6_N4_PCS_PCS_TX_RX_CONFIG 0x1d0 +#define QPHY_V6_N4_PCS_EQ_CONFIG1 0x1dc +#define QPHY_V6_N4_PCS_EQ_CONFIG2 0x1e0 +#define QPHY_V6_N4_PCS_EQ_CONFIG5 0x1ec + +#endif diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6_n4.h b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6_n4.h index a814ad11af07..d37cc0d4fd36 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6_n4.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6_n4.h @@ -6,11 +6,24 @@ #ifndef QCOM_PHY_QMP_QSERDES_TXRX_V6_N4_H_ #define QCOM_PHY_QMP_QSERDES_TXRX_V6_N4_H_ +#define QSERDES_V6_N4_TX_CLKBUF_ENABLE 0x08 +#define QSERDES_V6_N4_TX_TX_EMP_POST1_LVL 0x0c +#define QSERDES_V6_N4_TX_TX_DRV_LVL 0x14 +#define QSERDES_V6_N4_TX_RESET_TSYNC_EN 0x1c +#define QSERDES_V6_N4_TX_PRE_STALL_LDO_BOOST_EN 0x20 #define QSERDES_V6_N4_TX_RES_CODE_LANE_OFFSET_TX 0x30 #define QSERDES_V6_N4_TX_RES_CODE_LANE_OFFSET_RX 0x34 +#define QSERDES_V6_N4_TX_TRANSCEIVER_BIAS_EN 0x48 +#define QSERDES_V6_N4_TX_HIGHZ_DRVR_EN 0x4c +#define QSERDES_V6_N4_TX_TX_POL_INV 0x50 +#define QSERDES_V6_N4_TX_PARRATE_REC_DETECT_IDLE_EN 0x54 #define QSERDES_V6_N4_TX_LANE_MODE_1 0x78 #define QSERDES_V6_N4_TX_LANE_MODE_2 0x7c #define QSERDES_V6_N4_TX_LANE_MODE_3 0x80 +#define QSERDES_V6_N4_TX_TRAN_DRVR_EMP_EN 0xac +#define QSERDES_V6_N4_TX_TX_BAND 0xd8 +#define QSERDES_V6_N4_TX_INTERFACE_SELECT 0xe4 +#define QSERDES_V6_N4_TX_VMODE_CTRL1 0xb0 #define QSERDES_V6_N4_RX_UCDR_FO_GAIN_RATE2 0x8 #define QSERDES_V6_N4_RX_UCDR_SO_GAIN_RATE2 0x18 diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.h b/drivers/phy/qualcomm/phy-qcom-qmp.h index d10b8f653c4b..d0f41e4aaa85 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp.h @@ -46,6 +46,8 @@ #include "phy-qcom-qmp-pcs-v6.h" +#include "phy-qcom-qmp-pcs-v6-n4.h" + #include "phy-qcom-qmp-pcs-v6_20.h" #include "phy-qcom-qmp-pcs-v7.h" diff --git a/drivers/pinctrl/bcm/pinctrl-bcm2835.c b/drivers/pinctrl/bcm/pinctrl-bcm2835.c index 7178a38475cc..27fd54795791 100644 --- a/drivers/pinctrl/bcm/pinctrl-bcm2835.c +++ b/drivers/pinctrl/bcm/pinctrl-bcm2835.c @@ -245,7 +245,7 @@ static const char * const irq_type_names[] = { }; static bool persist_gpio_outputs; -module_param(persist_gpio_outputs, bool, 0644); +module_param(persist_gpio_outputs, bool, 0444); MODULE_PARM_DESC(persist_gpio_outputs, "Enable GPIO_OUT persistence when pin is freed"); static inline u32 bcm2835_gpio_rd(struct bcm2835_pinctrl *pc, unsigned reg) diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index cffeb869130d..f424a57f0013 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -1106,8 +1106,8 @@ static struct pinctrl *create_pinctrl(struct device *dev, * an -EPROBE_DEFER later, as that is the worst case. */ if (ret == -EPROBE_DEFER) { - pinctrl_free(p, false); mutex_unlock(&pinctrl_maps_mutex); + pinctrl_free(p, false); return ERR_PTR(ret); } } diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index 3bedf36a0019..3f56991f5b89 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -634,23 +634,68 @@ static struct rockchip_mux_recalced_data rk3308_mux_recalced_data[] = { static struct rockchip_mux_recalced_data rk3328_mux_recalced_data[] = { { - .num = 2, - .pin = 12, - .reg = 0x24, - .bit = 8, - .mask = 0x3 - }, { + /* gpio2_b7_sel */ .num = 2, .pin = 15, .reg = 0x28, .bit = 0, .mask = 0x7 }, { + /* gpio2_c7_sel */ .num = 2, .pin = 23, .reg = 0x30, .bit = 14, .mask = 0x3 + }, { + /* gpio3_b1_sel */ + .num = 3, + .pin = 9, + .reg = 0x44, + .bit = 2, + .mask = 0x3 + }, { + /* gpio3_b2_sel */ + .num = 3, + .pin = 10, + .reg = 0x44, + .bit = 4, + .mask = 0x3 + }, { + /* gpio3_b3_sel */ + .num = 3, + .pin = 11, + .reg = 0x44, + .bit = 6, + .mask = 0x3 + }, { + /* gpio3_b4_sel */ + .num = 3, + .pin = 12, + .reg = 0x44, + .bit = 8, + .mask = 0x3 + }, { + /* gpio3_b5_sel */ + .num = 3, + .pin = 13, + .reg = 0x44, + .bit = 10, + .mask = 0x3 + }, { + /* gpio3_b6_sel */ + .num = 3, + .pin = 14, + .reg = 0x44, + .bit = 12, + .mask = 0x3 + }, { + /* gpio3_b7_sel */ + .num = 3, + .pin = 15, + .reg = 0x44, + .bit = 14, + .mask = 0x3 }, }; @@ -2433,6 +2478,7 @@ static int rockchip_get_pull(struct rockchip_pin_bank *bank, int pin_num) case RK3188: case RK3288: case RK3308: + case RK3328: case RK3368: case RK3399: case RK3568: @@ -2491,6 +2537,7 @@ static int rockchip_set_pull(struct rockchip_pin_bank *bank, case RK3188: case RK3288: case RK3308: + case RK3328: case RK3368: case RK3399: case RK3568: @@ -2704,8 +2751,10 @@ static int rockchip_pmx_set(struct pinctrl_dev *pctldev, unsigned selector, if (ret) { /* revert the already done pin settings */ - for (cnt--; cnt >= 0; cnt--) + for (cnt--; cnt >= 0; cnt--) { + bank = pin_to_bank(info, pins[cnt]); rockchip_set_mux(bank, pins[cnt] - bank->pin_base, 0); + } return ret; } @@ -2753,6 +2802,7 @@ static bool rockchip_pinconf_pull_valid(struct rockchip_pin_ctrl *ctrl, case RK3188: case RK3288: case RK3308: + case RK3328: case RK3368: case RK3399: case RK3568: @@ -3763,7 +3813,7 @@ static struct rockchip_pin_bank rk3328_pin_banks[] = { PIN_BANK_IOMUX_FLAGS(0, 32, "gpio0", 0, 0, 0, 0), PIN_BANK_IOMUX_FLAGS(1, 32, "gpio1", 0, 0, 0, 0), PIN_BANK_IOMUX_FLAGS(2, 32, "gpio2", 0, - IOMUX_WIDTH_3BIT, + 0, IOMUX_WIDTH_3BIT, 0), PIN_BANK_IOMUX_FLAGS(3, 32, "gpio3", @@ -3777,7 +3827,7 @@ static struct rockchip_pin_ctrl rk3328_pin_ctrl = { .pin_banks = rk3328_pin_banks, .nr_banks = ARRAY_SIZE(rk3328_pin_banks), .label = "RK3328-GPIO", - .type = RK3288, + .type = RK3328, .grf_mux_offset = 0x0, .iomux_recalced = rk3328_mux_recalced_data, .niomux_recalced = ARRAY_SIZE(rk3328_mux_recalced_data), diff --git a/drivers/pinctrl/pinctrl-rockchip.h b/drivers/pinctrl/pinctrl-rockchip.h index 4759f336941e..849266f8b191 100644 --- a/drivers/pinctrl/pinctrl-rockchip.h +++ b/drivers/pinctrl/pinctrl-rockchip.h @@ -193,6 +193,7 @@ enum rockchip_pinctrl_type { RK3188, RK3288, RK3308, + RK3328, RK3368, RK3399, RK3568, diff --git a/drivers/pinctrl/pinctrl-tps6594.c b/drivers/pinctrl/pinctrl-tps6594.c index 085047320853..5e7c7cf93445 100644 --- a/drivers/pinctrl/pinctrl-tps6594.c +++ b/drivers/pinctrl/pinctrl-tps6594.c @@ -486,6 +486,7 @@ static int tps6594_pinctrl_probe(struct platform_device *pdev) break; case TPS6593: case TPS6594: + case LP8764: pctrl_desc->pins = tps6594_pins; pctrl_desc->npins = ARRAY_SIZE(tps6594_pins); diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c index 4e80c7204e5f..4abd6f18bbef 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c @@ -1207,7 +1207,6 @@ static const struct of_device_id pmic_gpio_of_match[] = { { .compatible = "qcom,pm7325-gpio", .data = (void *) 10 }, { .compatible = "qcom,pm7550ba-gpio", .data = (void *) 8}, { .compatible = "qcom,pm8005-gpio", .data = (void *) 4 }, - { .compatible = "qcom,pm8008-gpio", .data = (void *) 2 }, { .compatible = "qcom,pm8019-gpio", .data = (void *) 6 }, /* pm8150 has 10 GPIOs with holes on 2, 5, 7 and 8 */ { .compatible = "qcom,pm8150-gpio", .data = (void *) 10 }, diff --git a/drivers/pinctrl/renesas/pinctrl-rzg2l.c b/drivers/pinctrl/renesas/pinctrl-rzg2l.c index c3256bfde502..60be78da9f52 100644 --- a/drivers/pinctrl/renesas/pinctrl-rzg2l.c +++ b/drivers/pinctrl/renesas/pinctrl-rzg2l.c @@ -2071,11 +2071,11 @@ static void rzg2l_gpio_irq_restore(struct rzg2l_pinctrl *pctrl) * This has to be atomically executed to protect against a concurrent * interrupt. */ - raw_spin_lock_irqsave(&pctrl->lock.rlock, flags); + spin_lock_irqsave(&pctrl->lock, flags); ret = rzg2l_gpio_irq_set_type(data, irqd_get_trigger_type(data)); if (!ret && !irqd_irq_disabled(data)) rzg2l_gpio_irq_enable(data); - raw_spin_unlock_irqrestore(&pctrl->lock.rlock, flags); + spin_unlock_irqrestore(&pctrl->lock, flags); if (ret) dev_crit(pctrl->dev, "Failed to set IRQ type for virq=%u\n", virq); diff --git a/drivers/platform/mellanox/nvsw-sn2201.c b/drivers/platform/mellanox/nvsw-sn2201.c index 3ef655591424..abe7be602f84 100644 --- a/drivers/platform/mellanox/nvsw-sn2201.c +++ b/drivers/platform/mellanox/nvsw-sn2201.c @@ -1198,6 +1198,7 @@ static int nvsw_sn2201_config_pre_init(struct nvsw_sn2201 *nvsw_sn2201) static int nvsw_sn2201_probe(struct platform_device *pdev) { struct nvsw_sn2201 *nvsw_sn2201; + int ret; nvsw_sn2201 = devm_kzalloc(&pdev->dev, sizeof(*nvsw_sn2201), GFP_KERNEL); if (!nvsw_sn2201) @@ -1205,8 +1206,10 @@ static int nvsw_sn2201_probe(struct platform_device *pdev) nvsw_sn2201->dev = &pdev->dev; platform_set_drvdata(pdev, nvsw_sn2201); - platform_device_add_resources(pdev, nvsw_sn2201_lpc_io_resources, + ret = platform_device_add_resources(pdev, nvsw_sn2201_lpc_io_resources, ARRAY_SIZE(nvsw_sn2201_lpc_io_resources)); + if (ret) + return ret; nvsw_sn2201->main_mux_deferred_nr = NVSW_SN2201_MAIN_MUX_DEFER_NR; nvsw_sn2201->main_mux_devs = nvsw_sn2201_main_mux_brdinfo; diff --git a/drivers/platform/x86/amilo-rfkill.c b/drivers/platform/x86/amilo-rfkill.c index efcf909786a5..2423dc91debb 100644 --- a/drivers/platform/x86/amilo-rfkill.c +++ b/drivers/platform/x86/amilo-rfkill.c @@ -171,6 +171,7 @@ static void __exit amilo_rfkill_exit(void) } MODULE_AUTHOR("Ben Hutchings <ben@decadent.org.uk>"); +MODULE_DESCRIPTION("Fujitsu-Siemens Amilo rfkill support"); MODULE_LICENSE("GPL"); MODULE_DEVICE_TABLE(dmi, amilo_rfkill_id_table); diff --git a/drivers/platform/x86/firmware_attributes_class.c b/drivers/platform/x86/firmware_attributes_class.c index dd8240009565..182a07d8ae3d 100644 --- a/drivers/platform/x86/firmware_attributes_class.c +++ b/drivers/platform/x86/firmware_attributes_class.c @@ -49,4 +49,5 @@ int fw_attributes_class_put(void) EXPORT_SYMBOL_GPL(fw_attributes_class_put); MODULE_AUTHOR("Mark Pearson <markpearson@lenovo.com>"); +MODULE_DESCRIPTION("Firmware attributes class helper module"); MODULE_LICENSE("GPL"); diff --git a/drivers/platform/x86/ibm_rtl.c b/drivers/platform/x86/ibm_rtl.c index 1d4bbae115f1..231b37909801 100644 --- a/drivers/platform/x86/ibm_rtl.c +++ b/drivers/platform/x86/ibm_rtl.c @@ -29,6 +29,7 @@ static bool debug; module_param(debug, bool, 0644); MODULE_PARM_DESC(debug, "Show debug output"); +MODULE_DESCRIPTION("IBM Premium Real Time Mode (PRTM) driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Keith Mannthey <kmmanth@us.ibm.com>"); MODULE_AUTHOR("Vernon Mauery <vernux@us.ibm.com>"); diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c index c7a827645864..10cd65497cc1 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c @@ -38,6 +38,7 @@ MODULE_PARM_DESC(enable_sw_tablet_mode, /* When NOT in tablet mode, VGBS returns with the flag 0x40 */ #define TABLET_MODE_FLAG BIT(6) +MODULE_DESCRIPTION("Intel HID Event hotkey driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Alex Hung"); diff --git a/drivers/platform/x86/intel/pmc/pltdrv.c b/drivers/platform/x86/intel/pmc/pltdrv.c index ddfba38c2104..f2cb87dc2d37 100644 --- a/drivers/platform/x86/intel/pmc/pltdrv.c +++ b/drivers/platform/x86/intel/pmc/pltdrv.c @@ -86,4 +86,5 @@ static void __exit pmc_core_platform_exit(void) module_init(pmc_core_platform_init); module_exit(pmc_core_platform_exit); +MODULE_DESCRIPTION("Intel PMC Core platform driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/platform/x86/intel/rst.c b/drivers/platform/x86/intel/rst.c index 6bc9c4a603e0..f3a60e14d4c1 100644 --- a/drivers/platform/x86/intel/rst.c +++ b/drivers/platform/x86/intel/rst.c @@ -7,6 +7,7 @@ #include <linux/module.h> #include <linux/slab.h> +MODULE_DESCRIPTION("Intel Rapid Start Technology Driver"); MODULE_LICENSE("GPL"); static ssize_t irst_show_wakeup_events(struct device *dev, diff --git a/drivers/platform/x86/intel/smartconnect.c b/drivers/platform/x86/intel/smartconnect.c index cd25d0585324..31019a1a6d5e 100644 --- a/drivers/platform/x86/intel/smartconnect.c +++ b/drivers/platform/x86/intel/smartconnect.c @@ -6,6 +6,7 @@ #include <linux/acpi.h> #include <linux/module.h> +MODULE_DESCRIPTION("Intel Smart Connect disabling driver"); MODULE_LICENSE("GPL"); static int smartconnect_acpi_init(struct acpi_device *acpi) diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c index 84c1353eb12b..9b7ce03ba085 100644 --- a/drivers/platform/x86/intel/vbtn.c +++ b/drivers/platform/x86/intel/vbtn.c @@ -24,6 +24,7 @@ #define VGBS_TABLET_MODE_FLAGS (VGBS_TABLET_MODE_FLAG | VGBS_TABLET_MODE_FLAG_ALT) +MODULE_DESCRIPTION("Intel Virtual Button driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("AceLan Kao"); diff --git a/drivers/platform/x86/lg-laptop.c b/drivers/platform/x86/lg-laptop.c index d0fee5d375d7..9c7857842caf 100644 --- a/drivers/platform/x86/lg-laptop.c +++ b/drivers/platform/x86/lg-laptop.c @@ -39,8 +39,6 @@ MODULE_LICENSE("GPL"); #define WMI_METHOD_WMBB "2B4F501A-BD3C-4394-8DCF-00A7D2BC8210" #define WMI_EVENT_GUID WMI_EVENT_GUID0 -#define WMAB_METHOD "\\XINI.WMAB" -#define WMBB_METHOD "\\XINI.WMBB" #define SB_GGOV_METHOD "\\_SB.GGOV" #define GOV_TLED 0x2020008 #define WM_GET 1 @@ -74,7 +72,7 @@ static u32 inited; static int battery_limit_use_wmbb; static struct led_classdev kbd_backlight; -static enum led_brightness get_kbd_backlight_level(void); +static enum led_brightness get_kbd_backlight_level(struct device *dev); static const struct key_entry wmi_keymap[] = { {KE_KEY, 0x70, {KEY_F15} }, /* LG control panel (F1) */ @@ -84,7 +82,6 @@ static const struct key_entry wmi_keymap[] = { * this key both sends an event and * changes backlight level. */ - {KE_KEY, 0x80, {KEY_RFKILL} }, {KE_END, 0} }; @@ -128,11 +125,10 @@ static int ggov(u32 arg0) return res; } -static union acpi_object *lg_wmab(u32 method, u32 arg1, u32 arg2) +static union acpi_object *lg_wmab(struct device *dev, u32 method, u32 arg1, u32 arg2) { union acpi_object args[3]; acpi_status status; - acpi_handle handle; struct acpi_object_list arg; struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; @@ -143,29 +139,22 @@ static union acpi_object *lg_wmab(u32 method, u32 arg1, u32 arg2) args[2].type = ACPI_TYPE_INTEGER; args[2].integer.value = arg2; - status = acpi_get_handle(NULL, (acpi_string) WMAB_METHOD, &handle); - if (ACPI_FAILURE(status)) { - pr_err("Cannot get handle"); - return NULL; - } - arg.count = 3; arg.pointer = args; - status = acpi_evaluate_object(handle, NULL, &arg, &buffer); + status = acpi_evaluate_object(ACPI_HANDLE(dev), "WMAB", &arg, &buffer); if (ACPI_FAILURE(status)) { - acpi_handle_err(handle, "WMAB: call failed.\n"); + dev_err(dev, "WMAB: call failed.\n"); return NULL; } return buffer.pointer; } -static union acpi_object *lg_wmbb(u32 method_id, u32 arg1, u32 arg2) +static union acpi_object *lg_wmbb(struct device *dev, u32 method_id, u32 arg1, u32 arg2) { union acpi_object args[3]; acpi_status status; - acpi_handle handle; struct acpi_object_list arg; struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; u8 buf[32]; @@ -181,18 +170,12 @@ static union acpi_object *lg_wmbb(u32 method_id, u32 arg1, u32 arg2) args[2].buffer.length = 32; args[2].buffer.pointer = buf; - status = acpi_get_handle(NULL, (acpi_string)WMBB_METHOD, &handle); - if (ACPI_FAILURE(status)) { - pr_err("Cannot get handle"); - return NULL; - } - arg.count = 3; arg.pointer = args; - status = acpi_evaluate_object(handle, NULL, &arg, &buffer); + status = acpi_evaluate_object(ACPI_HANDLE(dev), "WMBB", &arg, &buffer); if (ACPI_FAILURE(status)) { - acpi_handle_err(handle, "WMAB: call failed.\n"); + dev_err(dev, "WMBB: call failed.\n"); return NULL; } @@ -223,7 +206,7 @@ static void wmi_notify(u32 value, void *context) if (eventcode == 0x10000000) { led_classdev_notify_brightness_hw_changed( - &kbd_backlight, get_kbd_backlight_level()); + &kbd_backlight, get_kbd_backlight_level(kbd_backlight.dev->parent)); } else { key = sparse_keymap_entry_from_scancode( wmi_input_dev, eventcode); @@ -272,14 +255,7 @@ static void wmi_input_setup(void) static void acpi_notify(struct acpi_device *device, u32 event) { - struct key_entry *key; - acpi_handle_debug(device->handle, "notify: %d\n", event); - if (inited & INIT_SPARSE_KEYMAP) { - key = sparse_keymap_entry_from_scancode(wmi_input_dev, 0x80); - if (key && key->type == KE_KEY) - sparse_keymap_report_entry(wmi_input_dev, key, 1, true); - } } static ssize_t fan_mode_store(struct device *dev, @@ -295,7 +271,7 @@ static ssize_t fan_mode_store(struct device *dev, if (ret) return ret; - r = lg_wmab(WM_FAN_MODE, WM_GET, 0); + r = lg_wmab(dev, WM_FAN_MODE, WM_GET, 0); if (!r) return -EIO; @@ -306,9 +282,9 @@ static ssize_t fan_mode_store(struct device *dev, m = r->integer.value; kfree(r); - r = lg_wmab(WM_FAN_MODE, WM_SET, (m & 0xffffff0f) | (value << 4)); + r = lg_wmab(dev, WM_FAN_MODE, WM_SET, (m & 0xffffff0f) | (value << 4)); kfree(r); - r = lg_wmab(WM_FAN_MODE, WM_SET, (m & 0xfffffff0) | value); + r = lg_wmab(dev, WM_FAN_MODE, WM_SET, (m & 0xfffffff0) | value); kfree(r); return count; @@ -320,7 +296,7 @@ static ssize_t fan_mode_show(struct device *dev, unsigned int status; union acpi_object *r; - r = lg_wmab(WM_FAN_MODE, WM_GET, 0); + r = lg_wmab(dev, WM_FAN_MODE, WM_GET, 0); if (!r) return -EIO; @@ -347,7 +323,7 @@ static ssize_t usb_charge_store(struct device *dev, if (ret) return ret; - r = lg_wmbb(WMBB_USB_CHARGE, WM_SET, value); + r = lg_wmbb(dev, WMBB_USB_CHARGE, WM_SET, value); if (!r) return -EIO; @@ -361,7 +337,7 @@ static ssize_t usb_charge_show(struct device *dev, unsigned int status; union acpi_object *r; - r = lg_wmbb(WMBB_USB_CHARGE, WM_GET, 0); + r = lg_wmbb(dev, WMBB_USB_CHARGE, WM_GET, 0); if (!r) return -EIO; @@ -389,7 +365,7 @@ static ssize_t reader_mode_store(struct device *dev, if (ret) return ret; - r = lg_wmab(WM_READER_MODE, WM_SET, value); + r = lg_wmab(dev, WM_READER_MODE, WM_SET, value); if (!r) return -EIO; @@ -403,7 +379,7 @@ static ssize_t reader_mode_show(struct device *dev, unsigned int status; union acpi_object *r; - r = lg_wmab(WM_READER_MODE, WM_GET, 0); + r = lg_wmab(dev, WM_READER_MODE, WM_GET, 0); if (!r) return -EIO; @@ -431,7 +407,7 @@ static ssize_t fn_lock_store(struct device *dev, if (ret) return ret; - r = lg_wmab(WM_FN_LOCK, WM_SET, value); + r = lg_wmab(dev, WM_FN_LOCK, WM_SET, value); if (!r) return -EIO; @@ -445,7 +421,7 @@ static ssize_t fn_lock_show(struct device *dev, unsigned int status; union acpi_object *r; - r = lg_wmab(WM_FN_LOCK, WM_GET, 0); + r = lg_wmab(dev, WM_FN_LOCK, WM_GET, 0); if (!r) return -EIO; @@ -475,9 +451,9 @@ static ssize_t charge_control_end_threshold_store(struct device *dev, union acpi_object *r; if (battery_limit_use_wmbb) - r = lg_wmbb(WMBB_BATT_LIMIT, WM_SET, value); + r = lg_wmbb(&pf_device->dev, WMBB_BATT_LIMIT, WM_SET, value); else - r = lg_wmab(WM_BATT_LIMIT, WM_SET, value); + r = lg_wmab(&pf_device->dev, WM_BATT_LIMIT, WM_SET, value); if (!r) return -EIO; @@ -496,7 +472,7 @@ static ssize_t charge_control_end_threshold_show(struct device *device, union acpi_object *r; if (battery_limit_use_wmbb) { - r = lg_wmbb(WMBB_BATT_LIMIT, WM_GET, 0); + r = lg_wmbb(&pf_device->dev, WMBB_BATT_LIMIT, WM_GET, 0); if (!r) return -EIO; @@ -507,7 +483,7 @@ static ssize_t charge_control_end_threshold_show(struct device *device, status = r->buffer.pointer[0x10]; } else { - r = lg_wmab(WM_BATT_LIMIT, WM_GET, 0); + r = lg_wmab(&pf_device->dev, WM_BATT_LIMIT, WM_GET, 0); if (!r) return -EIO; @@ -586,7 +562,7 @@ static void tpad_led_set(struct led_classdev *cdev, { union acpi_object *r; - r = lg_wmab(WM_TLED, WM_SET, brightness > LED_OFF); + r = lg_wmab(cdev->dev->parent, WM_TLED, WM_SET, brightness > LED_OFF); kfree(r); } @@ -608,16 +584,16 @@ static void kbd_backlight_set(struct led_classdev *cdev, val = 0; if (brightness >= LED_FULL) val = 0x24; - r = lg_wmab(WM_KEY_LIGHT, WM_SET, val); + r = lg_wmab(cdev->dev->parent, WM_KEY_LIGHT, WM_SET, val); kfree(r); } -static enum led_brightness get_kbd_backlight_level(void) +static enum led_brightness get_kbd_backlight_level(struct device *dev) { union acpi_object *r; int val; - r = lg_wmab(WM_KEY_LIGHT, WM_GET, 0); + r = lg_wmab(dev, WM_KEY_LIGHT, WM_GET, 0); if (!r) return LED_OFF; @@ -645,7 +621,7 @@ static enum led_brightness get_kbd_backlight_level(void) static enum led_brightness kbd_backlight_get(struct led_classdev *cdev) { - return get_kbd_backlight_level(); + return get_kbd_backlight_level(cdev->dev->parent); } static LED_DEVICE(kbd_backlight, 255, LED_BRIGHT_HW_CHANGED); @@ -672,6 +648,11 @@ static struct platform_driver pf_driver = { static int acpi_add(struct acpi_device *device) { + struct platform_device_info pdev_info = { + .fwnode = acpi_fwnode_handle(device), + .name = PLATFORM_NAME, + .id = PLATFORM_DEVID_NONE, + }; int ret; const char *product; int year = 2017; @@ -683,9 +664,7 @@ static int acpi_add(struct acpi_device *device) if (ret) return ret; - pf_device = platform_device_register_simple(PLATFORM_NAME, - PLATFORM_DEVID_NONE, - NULL, 0); + pf_device = platform_device_register_full(&pdev_info); if (IS_ERR(pf_device)) { ret = PTR_ERR(pf_device); pf_device = NULL; @@ -776,7 +755,7 @@ static void acpi_remove(struct acpi_device *device) } static const struct acpi_device_id device_ids[] = { - {"LGEX0815", 0}, + {"LGEX0820", 0}, {"", 0} }; MODULE_DEVICE_TABLE(acpi, device_ids); diff --git a/drivers/platform/x86/siemens/simatic-ipc-batt-apollolake.c b/drivers/platform/x86/siemens/simatic-ipc-batt-apollolake.c index 31a139d87d9a..5edc294de6e4 100644 --- a/drivers/platform/x86/siemens/simatic-ipc-batt-apollolake.c +++ b/drivers/platform/x86/siemens/simatic-ipc-batt-apollolake.c @@ -45,6 +45,7 @@ static struct platform_driver simatic_ipc_batt_driver = { module_platform_driver(simatic_ipc_batt_driver); +MODULE_DESCRIPTION("CMOS Battery monitoring for Simatic IPCs based on Apollo Lake GPIO"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:" KBUILD_MODNAME); MODULE_SOFTDEP("pre: simatic-ipc-batt platform:apollolake-pinctrl"); diff --git a/drivers/platform/x86/siemens/simatic-ipc-batt-elkhartlake.c b/drivers/platform/x86/siemens/simatic-ipc-batt-elkhartlake.c index a7676f224075..e6a56d14b505 100644 --- a/drivers/platform/x86/siemens/simatic-ipc-batt-elkhartlake.c +++ b/drivers/platform/x86/siemens/simatic-ipc-batt-elkhartlake.c @@ -45,6 +45,7 @@ static struct platform_driver simatic_ipc_batt_driver = { module_platform_driver(simatic_ipc_batt_driver); +MODULE_DESCRIPTION("CMOS Battery monitoring for Simatic IPCs based on Elkhart Lake GPIO"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:" KBUILD_MODNAME); MODULE_SOFTDEP("pre: simatic-ipc-batt platform:elkhartlake-pinctrl"); diff --git a/drivers/platform/x86/siemens/simatic-ipc-batt-f7188x.c b/drivers/platform/x86/siemens/simatic-ipc-batt-f7188x.c index 5e77e05fdb5d..f8849d0e48a8 100644 --- a/drivers/platform/x86/siemens/simatic-ipc-batt-f7188x.c +++ b/drivers/platform/x86/siemens/simatic-ipc-batt-f7188x.c @@ -81,6 +81,7 @@ static struct platform_driver simatic_ipc_batt_driver = { module_platform_driver(simatic_ipc_batt_driver); +MODULE_DESCRIPTION("CMOS Battery monitoring for Simatic IPCs based on Nuvoton GPIO"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:" KBUILD_MODNAME); MODULE_SOFTDEP("pre: simatic-ipc-batt gpio_f7188x platform:elkhartlake-pinctrl platform:alderlake-pinctrl"); diff --git a/drivers/platform/x86/siemens/simatic-ipc-batt.c b/drivers/platform/x86/siemens/simatic-ipc-batt.c index c6dd263b4ee3..d9aff10608cf 100644 --- a/drivers/platform/x86/siemens/simatic-ipc-batt.c +++ b/drivers/platform/x86/siemens/simatic-ipc-batt.c @@ -247,6 +247,7 @@ static struct platform_driver simatic_ipc_batt_driver = { module_platform_driver(simatic_ipc_batt_driver); +MODULE_DESCRIPTION("CMOS core battery driver for Siemens Simatic IPCs"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:" KBUILD_MODNAME); MODULE_AUTHOR("Henning Schild <henning.schild@siemens.com>"); diff --git a/drivers/platform/x86/siemens/simatic-ipc.c b/drivers/platform/x86/siemens/simatic-ipc.c index 8ca6e277fa03..7039874d8f11 100644 --- a/drivers/platform/x86/siemens/simatic-ipc.c +++ b/drivers/platform/x86/siemens/simatic-ipc.c @@ -231,6 +231,7 @@ static void __exit simatic_ipc_exit_module(void) module_init(simatic_ipc_init_module); module_exit(simatic_ipc_exit_module); +MODULE_DESCRIPTION("Siemens SIMATIC IPC platform driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Gerd Haeussler <gerd.haeussler.ext@siemens.com>"); MODULE_ALIAS("dmi:*:svnSIEMENSAG:*"); diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index 3a8d8df89186..78a5aac2dcfd 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -3271,7 +3271,7 @@ static const char *find_hci_method(acpi_handle handle) */ #define QUIRK_HCI_HOTKEY_QUICKSTART BIT(1) -static const struct dmi_system_id toshiba_dmi_quirks[] = { +static const struct dmi_system_id toshiba_dmi_quirks[] __initconst = { { /* Toshiba Portégé R700 */ /* https://bugzilla.kernel.org/show_bug.cgi?id=21012 */ @@ -3299,6 +3299,7 @@ static const struct dmi_system_id toshiba_dmi_quirks[] = { }, .driver_data = (void *)(QUIRK_TURN_ON_PANEL_ON_RESUME | QUIRK_HCI_HOTKEY_QUICKSTART), }, + { } }; static int toshiba_acpi_add(struct acpi_device *acpi_dev) @@ -3306,8 +3307,6 @@ static int toshiba_acpi_add(struct acpi_device *acpi_dev) struct toshiba_acpi_dev *dev; const char *hci_method; u32 dummy; - const struct dmi_system_id *dmi_id; - long quirks = 0; int ret = 0; if (toshiba_acpi) @@ -3460,16 +3459,6 @@ iio_error: } #endif - dmi_id = dmi_first_match(toshiba_dmi_quirks); - if (dmi_id) - quirks = (long)dmi_id->driver_data; - - if (turn_on_panel_on_resume == -1) - turn_on_panel_on_resume = !!(quirks & QUIRK_TURN_ON_PANEL_ON_RESUME); - - if (hci_hotkey_quickstart == -1) - hci_hotkey_quickstart = !!(quirks & QUIRK_HCI_HOTKEY_QUICKSTART); - toshiba_wwan_available(dev); if (dev->wwan_supported) toshiba_acpi_setup_wwan_rfkill(dev); @@ -3618,10 +3607,27 @@ static struct acpi_driver toshiba_acpi_driver = { .drv.pm = &toshiba_acpi_pm, }; +static void __init toshiba_dmi_init(void) +{ + const struct dmi_system_id *dmi_id; + long quirks = 0; + + dmi_id = dmi_first_match(toshiba_dmi_quirks); + if (dmi_id) + quirks = (long)dmi_id->driver_data; + + if (turn_on_panel_on_resume == -1) + turn_on_panel_on_resume = !!(quirks & QUIRK_TURN_ON_PANEL_ON_RESUME); + + if (hci_hotkey_quickstart == -1) + hci_hotkey_quickstart = !!(quirks & QUIRK_HCI_HOTKEY_QUICKSTART); +} + static int __init toshiba_acpi_init(void) { int ret; + toshiba_dmi_init(); toshiba_proc_dir = proc_mkdir(PROC_TOSHIBA, acpi_root_dir); if (!toshiba_proc_dir) { pr_err("Unable to create proc dir " PROC_TOSHIBA "\n"); diff --git a/drivers/platform/x86/uv_sysfs.c b/drivers/platform/x86/uv_sysfs.c index 37372d7cc54a..f6a0627f36db 100644 --- a/drivers/platform/x86/uv_sysfs.c +++ b/drivers/platform/x86/uv_sysfs.c @@ -929,4 +929,5 @@ module_init(uv_sysfs_init); module_exit(uv_sysfs_exit); MODULE_AUTHOR("Hewlett Packard Enterprise"); +MODULE_DESCRIPTION("Sysfs structure for HPE UV systems"); MODULE_LICENSE("GPL"); diff --git a/drivers/platform/x86/wireless-hotkey.c b/drivers/platform/x86/wireless-hotkey.c index e95cdbbfb708..a220fe4f9ef8 100644 --- a/drivers/platform/x86/wireless-hotkey.c +++ b/drivers/platform/x86/wireless-hotkey.c @@ -14,11 +14,13 @@ #include <linux/acpi.h> #include <acpi/acpi_bus.h> +MODULE_DESCRIPTION("Airplane mode button for AMD, HP & Xiaomi laptops"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Alex Hung"); MODULE_ALIAS("acpi*:HPQ6001:*"); MODULE_ALIAS("acpi*:WSTADEF:*"); MODULE_ALIAS("acpi*:AMDI0051:*"); +MODULE_ALIAS("acpi*:LGEX0815:*"); struct wl_button { struct input_dev *input_dev; @@ -29,6 +31,7 @@ static const struct acpi_device_id wl_ids[] = { {"HPQ6001", 0}, {"WSTADEF", 0}, {"AMDI0051", 0}, + {"LGEX0815", 0}, {"", 0}, }; diff --git a/drivers/platform/x86/xo1-rfkill.c b/drivers/platform/x86/xo1-rfkill.c index e64d5646b4c7..5fe68296501c 100644 --- a/drivers/platform/x86/xo1-rfkill.c +++ b/drivers/platform/x86/xo1-rfkill.c @@ -74,5 +74,6 @@ static struct platform_driver xo1_rfkill_driver = { module_platform_driver(xo1_rfkill_driver); MODULE_AUTHOR("Daniel Drake <dsd@laptop.org>"); +MODULE_DESCRIPTION("OLPC XO-1 software RF kill switch"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:xo1-rfkill"); diff --git a/drivers/pmdomain/qcom/rpmhpd.c b/drivers/pmdomain/qcom/rpmhpd.c index de9121ef4216..d2cb4271a1ca 100644 --- a/drivers/pmdomain/qcom/rpmhpd.c +++ b/drivers/pmdomain/qcom/rpmhpd.c @@ -40,6 +40,7 @@ * @addr: Resource address as looped up using resource name from * cmd-db * @state_synced: Indicator that sync_state has been invoked for the rpmhpd resource + * @skip_retention_level: Indicate that retention level should not be used for the power domain */ struct rpmhpd { struct device *dev; @@ -56,6 +57,7 @@ struct rpmhpd { const char *res_name; u32 addr; bool state_synced; + bool skip_retention_level; }; struct rpmhpd_desc { @@ -173,6 +175,7 @@ static struct rpmhpd mxc = { .pd = { .name = "mxc", }, .peer = &mxc_ao, .res_name = "mxc.lvl", + .skip_retention_level = true, }; static struct rpmhpd mxc_ao = { @@ -180,6 +183,7 @@ static struct rpmhpd mxc_ao = { .active_only = true, .peer = &mxc, .res_name = "mxc.lvl", + .skip_retention_level = true, }; static struct rpmhpd nsp = { @@ -819,6 +823,9 @@ static int rpmhpd_update_level_mapping(struct rpmhpd *rpmhpd) return -EINVAL; for (i = 0; i < rpmhpd->level_count; i++) { + if (rpmhpd->skip_retention_level && buf[i] == RPMH_REGULATOR_LEVEL_RETENTION) + continue; + rpmhpd->level[i] = buf[i]; /* Remember the first corner with non-zero level */ diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c index a15460aaa03b..6b1b8f57cd95 100644 --- a/drivers/ptp/ptp_sysfs.c +++ b/drivers/ptp/ptp_sysfs.c @@ -296,8 +296,7 @@ static ssize_t max_vclocks_store(struct device *dev, if (max < ptp->n_vclocks) goto out; - size = sizeof(int) * max; - vclock_index = kzalloc(size, GFP_KERNEL); + vclock_index = kcalloc(max, sizeof(int), GFP_KERNEL); if (!vclock_index) { err = -ENOMEM; goto out; diff --git a/drivers/pwm/pwm-stm32.c b/drivers/pwm/pwm-stm32.c index a2f231d13a9f..8bae3fd2b330 100644 --- a/drivers/pwm/pwm-stm32.c +++ b/drivers/pwm/pwm-stm32.c @@ -321,22 +321,30 @@ static int stm32_pwm_config(struct stm32_pwm *priv, unsigned int ch, * First we need to find the minimal value for prescaler such that * * period_ns * clkrate - * ------------------------------ + * ------------------------------ < max_arr + 1 * NSEC_PER_SEC * (prescaler + 1) * - * isn't bigger than max_arr. + * This equation is equivalent to + * + * period_ns * clkrate + * ---------------------------- < prescaler + 1 + * NSEC_PER_SEC * (max_arr + 1) + * + * Using integer division and knowing that the right hand side is + * integer, this is further equivalent to + * + * (period_ns * clkrate) // (NSEC_PER_SEC * (max_arr + 1)) ≤ prescaler */ prescaler = mul_u64_u64_div_u64(period_ns, clk_get_rate(priv->clk), - (u64)NSEC_PER_SEC * priv->max_arr); - if (prescaler > 0) - prescaler -= 1; - + (u64)NSEC_PER_SEC * ((u64)priv->max_arr + 1)); if (prescaler > MAX_TIM_PSC) return -EINVAL; prd = mul_u64_u64_div_u64(period_ns, clk_get_rate(priv->clk), (u64)NSEC_PER_SEC * (prescaler + 1)); + if (!prd) + return -EINVAL; /* * All channels share the same prescaler and counter so when two @@ -673,7 +681,8 @@ static int stm32_pwm_probe(struct platform_device *pdev) * .apply() won't overflow. */ if (clk_get_rate(priv->clk) > 1000000000) - return dev_err_probe(dev, -EINVAL, "Failed to lock clock\n"); + return dev_err_probe(dev, -EINVAL, "Clock freq too high (%lu)\n", + clk_get_rate(priv->clk)); chip->ops = &stm32pwm_ops; diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c index 34fcdd82b2ea..f3c447ecdc3b 100644 --- a/drivers/regulator/axp20x-regulator.c +++ b/drivers/regulator/axp20x-regulator.c @@ -140,7 +140,7 @@ #define AXP717_DCDC1_NUM_VOLTAGES 88 #define AXP717_DCDC2_NUM_VOLTAGES 107 -#define AXP717_DCDC3_NUM_VOLTAGES 104 +#define AXP717_DCDC3_NUM_VOLTAGES 103 #define AXP717_DCDC_V_OUT_MASK GENMASK(6, 0) #define AXP717_LDO_V_OUT_MASK GENMASK(4, 0) @@ -763,10 +763,15 @@ static const struct linear_range axp717_dcdc1_ranges[] = { REGULATOR_LINEAR_RANGE(1220000, 71, 87, 20000), }; +/* + * The manual says that the last voltage is 3.4V, encoded as 0b1101011 (107), + * but every other method proves that this is wrong, so it's really 106 that + * programs the final 3.4V. + */ static const struct linear_range axp717_dcdc2_ranges[] = { REGULATOR_LINEAR_RANGE(500000, 0, 70, 10000), REGULATOR_LINEAR_RANGE(1220000, 71, 87, 20000), - REGULATOR_LINEAR_RANGE(1600000, 88, 107, 100000), + REGULATOR_LINEAR_RANGE(1600000, 88, 106, 100000), }; static const struct linear_range axp717_dcdc3_ranges[] = { @@ -790,40 +795,40 @@ static const struct regulator_desc axp717_regulators[] = { AXP_DESC(AXP717, DCDC4, "dcdc4", "vin4", 1000, 3700, 100, AXP717_DCDC4_CONTROL, AXP717_DCDC_V_OUT_MASK, AXP717_DCDC_OUTPUT_CONTROL, BIT(3)), - AXP_DESC(AXP717, ALDO1, "aldo1", "vin1", 500, 3500, 100, + AXP_DESC(AXP717, ALDO1, "aldo1", "aldoin", 500, 3500, 100, AXP717_ALDO1_CONTROL, AXP717_LDO_V_OUT_MASK, AXP717_LDO0_OUTPUT_CONTROL, BIT(0)), - AXP_DESC(AXP717, ALDO2, "aldo2", "vin1", 500, 3500, 100, + AXP_DESC(AXP717, ALDO2, "aldo2", "aldoin", 500, 3500, 100, AXP717_ALDO2_CONTROL, AXP717_LDO_V_OUT_MASK, AXP717_LDO0_OUTPUT_CONTROL, BIT(1)), - AXP_DESC(AXP717, ALDO3, "aldo3", "vin1", 500, 3500, 100, + AXP_DESC(AXP717, ALDO3, "aldo3", "aldoin", 500, 3500, 100, AXP717_ALDO3_CONTROL, AXP717_LDO_V_OUT_MASK, AXP717_LDO0_OUTPUT_CONTROL, BIT(2)), - AXP_DESC(AXP717, ALDO4, "aldo4", "vin1", 500, 3500, 100, + AXP_DESC(AXP717, ALDO4, "aldo4", "aldoin", 500, 3500, 100, AXP717_ALDO4_CONTROL, AXP717_LDO_V_OUT_MASK, AXP717_LDO0_OUTPUT_CONTROL, BIT(3)), - AXP_DESC(AXP717, BLDO1, "bldo1", "vin1", 500, 3500, 100, + AXP_DESC(AXP717, BLDO1, "bldo1", "bldoin", 500, 3500, 100, AXP717_BLDO1_CONTROL, AXP717_LDO_V_OUT_MASK, AXP717_LDO0_OUTPUT_CONTROL, BIT(4)), - AXP_DESC(AXP717, BLDO2, "bldo2", "vin1", 500, 3500, 100, + AXP_DESC(AXP717, BLDO2, "bldo2", "bldoin", 500, 3500, 100, AXP717_BLDO2_CONTROL, AXP717_LDO_V_OUT_MASK, AXP717_LDO0_OUTPUT_CONTROL, BIT(5)), - AXP_DESC(AXP717, BLDO3, "bldo3", "vin1", 500, 3500, 100, + AXP_DESC(AXP717, BLDO3, "bldo3", "bldoin", 500, 3500, 100, AXP717_BLDO3_CONTROL, AXP717_LDO_V_OUT_MASK, AXP717_LDO0_OUTPUT_CONTROL, BIT(6)), - AXP_DESC(AXP717, BLDO4, "bldo4", "vin1", 500, 3500, 100, + AXP_DESC(AXP717, BLDO4, "bldo4", "bldoin", 500, 3500, 100, AXP717_BLDO4_CONTROL, AXP717_LDO_V_OUT_MASK, AXP717_LDO0_OUTPUT_CONTROL, BIT(7)), - AXP_DESC(AXP717, CLDO1, "cldo1", "vin1", 500, 3500, 100, + AXP_DESC(AXP717, CLDO1, "cldo1", "cldoin", 500, 3500, 100, AXP717_CLDO1_CONTROL, AXP717_LDO_V_OUT_MASK, AXP717_LDO1_OUTPUT_CONTROL, BIT(0)), - AXP_DESC(AXP717, CLDO2, "cldo2", "vin1", 500, 3500, 100, + AXP_DESC(AXP717, CLDO2, "cldo2", "cldoin", 500, 3500, 100, AXP717_CLDO2_CONTROL, AXP717_LDO_V_OUT_MASK, AXP717_LDO1_OUTPUT_CONTROL, BIT(1)), - AXP_DESC(AXP717, CLDO3, "cldo3", "vin1", 500, 3500, 100, + AXP_DESC(AXP717, CLDO3, "cldo3", "cldoin", 500, 3500, 100, AXP717_CLDO3_CONTROL, AXP717_LDO_V_OUT_MASK, AXP717_LDO1_OUTPUT_CONTROL, BIT(2)), - AXP_DESC(AXP717, CLDO4, "cldo4", "vin1", 500, 3500, 100, + AXP_DESC(AXP717, CLDO4, "cldo4", "cldoin", 500, 3500, 100, AXP717_CLDO4_CONTROL, AXP717_LDO_V_OUT_MASK, AXP717_LDO1_OUTPUT_CONTROL, BIT(3)), AXP_DESC(AXP717, CPUSLDO, "cpusldo", "vin1", 500, 1400, 50, diff --git a/drivers/regulator/bd71815-regulator.c b/drivers/regulator/bd71815-regulator.c index 26192d55a685..79fbb45297f6 100644 --- a/drivers/regulator/bd71815-regulator.c +++ b/drivers/regulator/bd71815-regulator.c @@ -256,7 +256,7 @@ static int buck12_set_hw_dvs_levels(struct device_node *np, * 10: 2.50mV/usec 10mV 4uS * 11: 1.25mV/usec 10mV 8uS */ -static const unsigned int bd7181x_ramp_table[] = { 1250, 2500, 5000, 10000 }; +static const unsigned int bd7181x_ramp_table[] = { 10000, 5000, 2500, 1250 }; static int bd7181x_led_set_current_limit(struct regulator_dev *rdev, int min_uA, int max_uA) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 5794f4e9dd52..844e9587a880 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -3347,6 +3347,7 @@ struct regmap *regulator_get_regmap(struct regulator *regulator) return map ? map : ERR_PTR(-EOPNOTSUPP); } +EXPORT_SYMBOL_GPL(regulator_get_regmap); /** * regulator_get_hardware_vsel_register - get the HW voltage selector register diff --git a/drivers/regulator/tps6594-regulator.c b/drivers/regulator/tps6594-regulator.c index 4a859f4c0f83..ac53792e3fed 100644 --- a/drivers/regulator/tps6594-regulator.c +++ b/drivers/regulator/tps6594-regulator.c @@ -653,18 +653,14 @@ static int tps6594_regulator_probe(struct platform_device *pdev) } } - if (tps->chip_id == LP8764) { - nr_buck = ARRAY_SIZE(buck_regs); - nr_ldo = 0; - nr_types = REGS_INT_NB; - } else if (tps->chip_id == TPS65224) { + if (tps->chip_id == TPS65224) { nr_buck = ARRAY_SIZE(tps65224_buck_regs); nr_ldo = ARRAY_SIZE(tps65224_ldo_regs); - nr_types = REGS_INT_NB; + nr_types = TPS65224_REGS_INT_NB; } else { nr_buck = ARRAY_SIZE(buck_regs); - nr_ldo = ARRAY_SIZE(tps6594_ldo_regs); - nr_types = TPS65224_REGS_INT_NB; + nr_ldo = (tps->chip_id == LP8764) ? 0 : ARRAY_SIZE(tps6594_ldo_regs); + nr_types = REGS_INT_NB; } reg_irq_nb = nr_types * (nr_buck + nr_ldo); diff --git a/drivers/reset/Kconfig b/drivers/reset/Kconfig index 7112f5932609..6bb5d9e372e4 100644 --- a/drivers/reset/Kconfig +++ b/drivers/reset/Kconfig @@ -68,6 +68,7 @@ config RESET_BRCMSTB_RESCAL config RESET_GPIO tristate "GPIO reset controller" + depends on GPIOLIB help This enables a generic reset controller for resets attached via GPIOs. Typically for OF platforms this driver expects "reset-gpios" diff --git a/drivers/reset/hisilicon/hi6220_reset.c b/drivers/reset/hisilicon/hi6220_reset.c index 5c3267acd2b1..65aa5ff5ed82 100644 --- a/drivers/reset/hisilicon/hi6220_reset.c +++ b/drivers/reset/hisilicon/hi6220_reset.c @@ -219,4 +219,5 @@ static int __init hi6220_reset_init(void) postcore_initcall(hi6220_reset_init); +MODULE_DESCRIPTION("Hisilicon Hi6220 reset controller driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 2f16f543079b..a76c6af9ea63 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -4906,7 +4906,7 @@ dasd_eckd_free_cp(struct dasd_ccw_req *cqr, struct request *req) ccw++; if (dst) { if (ccw->flags & CCW_FLAG_IDA) - cda = *((char **)dma32_to_virt(ccw->cda)); + cda = dma64_to_virt(*((dma64_t *)dma32_to_virt(ccw->cda))); else cda = dma32_to_virt(ccw->cda); if (dst != cda) { @@ -5525,7 +5525,7 @@ dasd_eckd_dump_ccw_range(struct dasd_device *device, struct ccw1 *from, /* get pointer to data (consider IDALs) */ if (from->flags & CCW_FLAG_IDA) - datap = (char *)*((addr_t *)dma32_to_virt(from->cda)); + datap = dma64_to_virt(*((dma64_t *)dma32_to_virt(from->cda))); else datap = dma32_to_virt(from->cda); diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index 361e9bd75257..9f2023a077c2 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -585,7 +585,7 @@ dasd_fba_free_cp(struct dasd_ccw_req *cqr, struct request *req) ccw++; if (dst) { if (ccw->flags & CCW_FLAG_IDA) - cda = *((char **)dma32_to_virt(ccw->cda)); + cda = dma64_to_virt(*((dma64_t *)dma32_to_virt(ccw->cda))); else cda = dma32_to_virt(ccw->cda); if (dst != cda) { diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c index d53ee34d398f..fbe29cabcbb8 100644 --- a/drivers/s390/char/sclp.c +++ b/drivers/s390/char/sclp.c @@ -1293,6 +1293,7 @@ sclp_init(void) fail_unregister_reboot_notifier: unregister_reboot_notifier(&sclp_reboot_notifier); fail_init_state_uninitialized: + list_del(&sclp_state_change_event.list); sclp_init_state = sclp_init_state_uninitialized; free_page((unsigned long) sclp_read_sccb); free_page((unsigned long) sclp_init_sccb); diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index 6e5c508b1e07..5f6e10225627 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -490,13 +490,14 @@ static int ccwchain_fetch_tic(struct ccw1 *ccw, struct channel_program *cp) { struct ccwchain *iter; - u32 cda, ccw_head; + u32 offset, ccw_head; list_for_each_entry(iter, &cp->ccwchain_list, next) { ccw_head = iter->ch_iova; if (is_cpa_within_range(ccw->cda, ccw_head, iter->ch_len)) { - cda = (u64)iter->ch_ccw + dma32_to_u32(ccw->cda) - ccw_head; - ccw->cda = u32_to_dma32(cda); + /* Calculate offset of TIC target */ + offset = dma32_to_u32(ccw->cda) - ccw_head; + ccw->cda = virt_to_dma32((void *)iter->ch_ccw + offset); return 0; } } @@ -914,7 +915,7 @@ void cp_update_scsw(struct channel_program *cp, union scsw *scsw) * in the ioctl directly. Path status changes etc. */ list_for_each_entry(chain, &cp->ccwchain_list, next) { - ccw_head = (u32)(u64)chain->ch_ccw; + ccw_head = dma32_to_u32(virt_to_dma32(chain->ch_ccw)); /* * On successful execution, cpa points just beyond the end * of the chain. diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index d7569f395559..d6491fc84e8c 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -698,6 +698,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, dma64_t *indicatorp = NULL; int ret, i, queue_idx = 0; struct ccw1 *ccw; + dma32_t indicatorp_dma = 0; ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw), NULL); if (!ccw) @@ -725,7 +726,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, */ indicatorp = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*indicatorp), - &ccw->cda); + &indicatorp_dma); if (!indicatorp) goto out; *indicatorp = indicators_dma(vcdev); @@ -735,6 +736,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, /* no error, just fall back to legacy interrupts */ vcdev->is_thinint = false; } + ccw->cda = indicatorp_dma; if (!vcdev->is_thinint) { /* Register queue indicators with host. */ *indicators(vcdev) = 0; diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 4c69fc63c119..cbbe43d8ef87 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -610,15 +610,15 @@ int sas_ata_init(struct domain_device *found_dev) rc = ata_sas_tport_add(ata_host->dev, ap); if (rc) - goto destroy_port; + goto free_port; found_dev->sata_dev.ata_host = ata_host; found_dev->sata_dev.ap = ap; return 0; -destroy_port: - kfree(ap); +free_port: + ata_port_free(ap); free_host: ata_host_put(ata_host); return rc; diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c index 8fb7c41c0962..48d975c6dbf2 100644 --- a/drivers/scsi/libsas/sas_discover.c +++ b/drivers/scsi/libsas/sas_discover.c @@ -301,7 +301,7 @@ void sas_free_device(struct kref *kref) if (dev_is_sata(dev) && dev->sata_dev.ap) { ata_sas_tport_delete(dev->sata_dev.ap); - kfree(dev->sata_dev.ap); + ata_port_free(dev->sata_dev.ap); ata_host_put(dev->sata_dev.ata_host); dev->sata_dev.ata_host = NULL; dev->sata_dev.ap = NULL; diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h index 85948963fb97..03d6ec1eb970 100644 --- a/drivers/scsi/libsas/sas_internal.h +++ b/drivers/scsi/libsas/sas_internal.h @@ -145,6 +145,20 @@ static inline void sas_fail_probe(struct domain_device *dev, const char *func, i func, dev->parent ? "exp-attached" : "direct-attached", SAS_ADDR(dev->sas_addr), err); + + /* + * If the device probe failed, the expander phy attached address + * needs to be reset so that the phy will not be treated as flutter + * in the next revalidation + */ + if (dev->parent && !dev_is_expander(dev->dev_type)) { + struct sas_phy *phy = dev->phy; + struct domain_device *parent = dev->parent; + struct ex_phy *ex_phy = &parent->ex_dev.ex_phy[phy->number]; + + memset(ex_phy->attached_sas_addr, 0, SAS_ADDR_SIZE); + } + sas_unregister_dev(dev->port, dev); } diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index acf0592d63da..91f022fb8d0c 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -926,6 +926,7 @@ static const int device_qfull_result = static const int condition_met_result = SAM_STAT_CONDITION_MET; static struct dentry *sdebug_debugfs_root; +static ASYNC_DOMAIN_EXCLUSIVE(sdebug_async_domain); static void sdebug_err_free(struct rcu_head *head) { @@ -1148,6 +1149,8 @@ static int sdebug_target_alloc(struct scsi_target *starget) if (!targetip) return -ENOMEM; + async_synchronize_full_domain(&sdebug_async_domain); + targetip->debugfs_entry = debugfs_create_dir(dev_name(&starget->dev), sdebug_debugfs_root); @@ -1174,7 +1177,8 @@ static void sdebug_target_destroy(struct scsi_target *starget) targetip = (struct sdebug_target_info *)starget->hostdata; if (targetip) { starget->hostdata = NULL; - async_schedule(sdebug_tartget_cleanup_async, targetip); + async_schedule_domain(sdebug_tartget_cleanup_async, targetip, + &sdebug_async_domain); } } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 37dd6ead72a4..1b7561abe05d 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -63,6 +63,7 @@ #include <scsi/scsi_cmnd.h> #include <scsi/scsi_dbg.h> #include <scsi/scsi_device.h> +#include <scsi/scsi_devinfo.h> #include <scsi/scsi_driver.h> #include <scsi/scsi_eh.h> #include <scsi/scsi_host.h> @@ -3118,6 +3119,9 @@ static void sd_read_io_hints(struct scsi_disk *sdkp, unsigned char *buffer) struct scsi_mode_data data; int res; + if (sdp->sdev_bflags & BLIST_SKIP_IO_HINTS) + return; + res = scsi_mode_sense(sdp, /*dbd=*/0x8, /*modepage=*/0x0a, /*subpage=*/0x05, buffer, SD_BUF_SIZE, SD_TIMEOUT, sdkp->max_retries, &data, &sshdr); @@ -4115,8 +4119,6 @@ static int sd_resume(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); - sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); - if (opal_unlock_from_suspend(sdkp->opal_dev)) { sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n"); return -EIO; @@ -4133,12 +4135,13 @@ static int sd_resume_common(struct device *dev, bool runtime) if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ return 0; + sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); + if (!sd_do_start_stop(sdkp->device, runtime)) { sdkp->suspended = false; return 0; } - sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); ret = sd_start_stop_device(sdkp, 1); if (!ret) { sd_resume(dev); diff --git a/drivers/soc/litex/Kconfig b/drivers/soc/litex/Kconfig index e6ba3573a772..f3f869639588 100644 --- a/drivers/soc/litex/Kconfig +++ b/drivers/soc/litex/Kconfig @@ -7,7 +7,7 @@ config LITEX config LITEX_SOC_CONTROLLER tristate "Enable LiteX SoC Controller driver" - depends on OF || COMPILE_TEST + depends on OF depends on HAS_IOMEM select LITEX help diff --git a/drivers/soc/litex/litex_soc_ctrl.c b/drivers/soc/litex/litex_soc_ctrl.c index 10813299aa10..72c44119dd54 100644 --- a/drivers/soc/litex/litex_soc_ctrl.c +++ b/drivers/soc/litex/litex_soc_ctrl.c @@ -82,13 +82,11 @@ static int litex_reset_handler(struct notifier_block *this, unsigned long mode, return NOTIFY_DONE; } -#ifdef CONFIG_OF static const struct of_device_id litex_soc_ctrl_of_match[] = { {.compatible = "litex,soc-controller"}, {}, }; MODULE_DEVICE_TABLE(of, litex_soc_ctrl_of_match); -#endif /* CONFIG_OF */ static int litex_soc_ctrl_probe(struct platform_device *pdev) { @@ -130,7 +128,7 @@ static void litex_soc_ctrl_remove(struct platform_device *pdev) static struct platform_driver litex_soc_ctrl_driver = { .driver = { .name = "litex-soc-controller", - .of_match_table = of_match_ptr(litex_soc_ctrl_of_match) + .of_match_table = litex_soc_ctrl_of_match, }, .probe = litex_soc_ctrl_probe, .remove_new = litex_soc_ctrl_remove, diff --git a/drivers/soc/qcom/pmic_glink.c b/drivers/soc/qcom/pmic_glink.c index 40fb09d69014..65279243072c 100644 --- a/drivers/soc/qcom/pmic_glink.c +++ b/drivers/soc/qcom/pmic_glink.c @@ -348,11 +348,15 @@ static void pmic_glink_remove(struct platform_device *pdev) mutex_unlock(&__pmic_glink_lock); } +static const unsigned long pmic_glink_sc8280xp_client_mask = BIT(PMIC_GLINK_CLIENT_BATT) | + BIT(PMIC_GLINK_CLIENT_ALTMODE); + static const unsigned long pmic_glink_sm8450_client_mask = BIT(PMIC_GLINK_CLIENT_BATT) | BIT(PMIC_GLINK_CLIENT_ALTMODE) | BIT(PMIC_GLINK_CLIENT_UCSI); static const struct of_device_id pmic_glink_of_match[] = { + { .compatible = "qcom,sc8280xp-pmic-glink", .data = &pmic_glink_sc8280xp_client_mask }, { .compatible = "qcom,pmic-glink", .data = &pmic_glink_sm8450_client_mask }, {} }; diff --git a/drivers/soc/tegra/fuse/fuse-tegra.c b/drivers/soc/tegra/fuse/fuse-tegra.c index b6bfd6729df3..d27667283846 100644 --- a/drivers/soc/tegra/fuse/fuse-tegra.c +++ b/drivers/soc/tegra/fuse/fuse-tegra.c @@ -127,8 +127,8 @@ static void tegra_fuse_print_sku_info(struct tegra_sku_info *tegra_sku_info) static int tegra_fuse_add_lookups(struct tegra_fuse *fuse) { - fuse->lookups = kmemdup_array(fuse->soc->lookups, sizeof(*fuse->lookups), - fuse->soc->num_lookups, GFP_KERNEL); + fuse->lookups = kmemdup_array(fuse->soc->lookups, fuse->soc->num_lookups, + sizeof(*fuse->lookups), GFP_KERNEL); if (!fuse->lookups) return -ENOMEM; diff --git a/drivers/soundwire/amd_manager.c b/drivers/soundwire/amd_manager.c index 20d94bcfc9b4..795e223f7e5c 100644 --- a/drivers/soundwire/amd_manager.c +++ b/drivers/soundwire/amd_manager.c @@ -571,6 +571,9 @@ static int sdw_master_read_amd_prop(struct sdw_bus *bus) amd_manager->wake_en_mask = wake_en_mask; fwnode_property_read_u32(link, "amd-sdw-power-mode", &power_mode_mask); amd_manager->power_mode_mask = power_mode_mask; + + fwnode_handle_put(link); + return 0; } diff --git a/drivers/soundwire/intel_auxdevice.c b/drivers/soundwire/intel_auxdevice.c index 17cf27e6ea73..18517121cc89 100644 --- a/drivers/soundwire/intel_auxdevice.c +++ b/drivers/soundwire/intel_auxdevice.c @@ -155,8 +155,10 @@ static int sdw_master_read_intel_prop(struct sdw_bus *bus) SDW_MASTER_QUIRKS_CLEAR_INITIAL_PARITY; intel_prop = devm_kzalloc(bus->dev, sizeof(*intel_prop), GFP_KERNEL); - if (!intel_prop) + if (!intel_prop) { + fwnode_handle_put(link); return -ENOMEM; + } /* initialize with hardware defaults, in case the properties are not found */ intel_prop->doaise = 0x1; @@ -184,6 +186,8 @@ static int sdw_master_read_intel_prop(struct sdw_bus *bus) intel_prop->dodse, intel_prop->dods); + fwnode_handle_put(link); + return 0; } diff --git a/drivers/soundwire/mipi_disco.c b/drivers/soundwire/mipi_disco.c index 55a9c51c84c1..e5d9df26d4dc 100644 --- a/drivers/soundwire/mipi_disco.c +++ b/drivers/soundwire/mipi_disco.c @@ -66,8 +66,10 @@ int sdw_master_read_prop(struct sdw_bus *bus) prop->clk_freq = devm_kcalloc(bus->dev, prop->num_clk_freq, sizeof(*prop->clk_freq), GFP_KERNEL); - if (!prop->clk_freq) + if (!prop->clk_freq) { + fwnode_handle_put(link); return -ENOMEM; + } fwnode_property_read_u32_array(link, "mipi-sdw-clock-frequencies-supported", @@ -92,8 +94,10 @@ int sdw_master_read_prop(struct sdw_bus *bus) prop->clk_gears = devm_kcalloc(bus->dev, prop->num_clk_gears, sizeof(*prop->clk_gears), GFP_KERNEL); - if (!prop->clk_gears) + if (!prop->clk_gears) { + fwnode_handle_put(link); return -ENOMEM; + } fwnode_property_read_u32_array(link, "mipi-sdw-supported-clock-gears", @@ -116,6 +120,8 @@ int sdw_master_read_prop(struct sdw_bus *bus) fwnode_property_read_u32(link, "mipi-sdw-command-error-threshold", &prop->err_threshold); + fwnode_handle_put(link); + return 0; } EXPORT_SYMBOL(sdw_master_read_prop); @@ -197,8 +203,10 @@ static int sdw_slave_read_dpn(struct sdw_slave *slave, dpn[i].num_words, sizeof(*dpn[i].words), GFP_KERNEL); - if (!dpn[i].words) + if (!dpn[i].words) { + fwnode_handle_put(node); return -ENOMEM; + } fwnode_property_read_u32_array(node, "mipi-sdw-port-wordlength-configs", @@ -236,8 +244,10 @@ static int sdw_slave_read_dpn(struct sdw_slave *slave, dpn[i].num_channels, sizeof(*dpn[i].channels), GFP_KERNEL); - if (!dpn[i].channels) + if (!dpn[i].channels) { + fwnode_handle_put(node); return -ENOMEM; + } fwnode_property_read_u32_array(node, "mipi-sdw-channel-number-list", @@ -251,8 +261,10 @@ static int sdw_slave_read_dpn(struct sdw_slave *slave, dpn[i].num_ch_combinations, sizeof(*dpn[i].ch_combinations), GFP_KERNEL); - if (!dpn[i].ch_combinations) + if (!dpn[i].ch_combinations) { + fwnode_handle_put(node); return -ENOMEM; + } fwnode_property_read_u32_array(node, "mipi-sdw-channel-combination-list", @@ -274,6 +286,8 @@ static int sdw_slave_read_dpn(struct sdw_slave *slave, /* TODO: Read audio mode */ + fwnode_handle_put(node); + i++; } @@ -348,10 +362,14 @@ int sdw_slave_read_prop(struct sdw_slave *slave) prop->dp0_prop = devm_kzalloc(&slave->dev, sizeof(*prop->dp0_prop), GFP_KERNEL); - if (!prop->dp0_prop) + if (!prop->dp0_prop) { + fwnode_handle_put(port); return -ENOMEM; + } sdw_slave_read_dp0(slave, port, prop->dp0_prop); + + fwnode_handle_put(port); } /* diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c index e358ac5b4509..96a524772549 100644 --- a/drivers/spi/spi-axi-spi-engine.c +++ b/drivers/spi/spi-axi-spi-engine.c @@ -164,16 +164,20 @@ static void spi_engine_gen_xfer(struct spi_engine_program *p, bool dry, } static void spi_engine_gen_sleep(struct spi_engine_program *p, bool dry, - int delay_ns, u32 sclk_hz) + int delay_ns, int inst_ns, u32 sclk_hz) { unsigned int t; - /* negative delay indicates error, e.g. from spi_delay_to_ns() */ - if (delay_ns <= 0) + /* + * Negative delay indicates error, e.g. from spi_delay_to_ns(). And if + * delay is less that the instruction execution time, there is no need + * for an extra sleep instruction since the instruction execution time + * will already cover the required delay. + */ + if (delay_ns < 0 || delay_ns <= inst_ns) return; - /* rounding down since executing the instruction adds a couple of ticks delay */ - t = DIV_ROUND_DOWN_ULL((u64)delay_ns * sclk_hz, NSEC_PER_SEC); + t = DIV_ROUND_UP_ULL((u64)(delay_ns - inst_ns) * sclk_hz, NSEC_PER_SEC); while (t) { unsigned int n = min(t, 256U); @@ -220,10 +224,16 @@ static void spi_engine_compile_message(struct spi_message *msg, bool dry, struct spi_device *spi = msg->spi; struct spi_controller *host = spi->controller; struct spi_transfer *xfer; - int clk_div, new_clk_div; + int clk_div, new_clk_div, inst_ns; bool keep_cs = false; u8 bits_per_word = 0; + /* + * Take into account instruction execution time for more accurate sleep + * times, especially when the delay is small. + */ + inst_ns = DIV_ROUND_UP(NSEC_PER_SEC, host->max_speed_hz); + clk_div = 1; spi_engine_program_add_cmd(p, dry, @@ -252,7 +262,7 @@ static void spi_engine_compile_message(struct spi_message *msg, bool dry, spi_engine_gen_xfer(p, dry, xfer); spi_engine_gen_sleep(p, dry, spi_delay_to_ns(&xfer->delay, xfer), - xfer->effective_speed_hz); + inst_ns, xfer->effective_speed_hz); if (xfer->cs_change) { if (list_is_last(&xfer->transfer_list, &msg->transfers)) { @@ -262,7 +272,7 @@ static void spi_engine_compile_message(struct spi_message *msg, bool dry, spi_engine_gen_cs(p, dry, spi, false); spi_engine_gen_sleep(p, dry, spi_delay_to_ns( - &xfer->cs_change_delay, xfer), + &xfer->cs_change_delay, xfer), inst_ns, xfer->effective_speed_hz); if (!list_next_entry(xfer, transfer_list)->cs_off) diff --git a/drivers/spi/spi-cs42l43.c b/drivers/spi/spi-cs42l43.c index 9d747ea69926..8b618ef0f711 100644 --- a/drivers/spi/spi-cs42l43.c +++ b/drivers/spi/spi-cs42l43.c @@ -26,7 +26,7 @@ #include <linux/units.h> #define CS42L43_FIFO_SIZE 16 -#define CS42L43_SPI_ROOT_HZ (40 * HZ_PER_MHZ) +#define CS42L43_SPI_ROOT_HZ 49152000 #define CS42L43_SPI_MAX_LENGTH 65532 enum cs42l43_spi_cmd { @@ -54,7 +54,7 @@ static const struct software_node ampr = { static struct spi_board_info ampl_info = { .modalias = "cs35l56", - .max_speed_hz = 20 * HZ_PER_MHZ, + .max_speed_hz = 11 * HZ_PER_MHZ, .chip_select = 0, .mode = SPI_MODE_0, .swnode = &l, @@ -62,7 +62,7 @@ static struct spi_board_info ampl_info = { static struct spi_board_info ampr_info = { .modalias = "cs35l56", - .max_speed_hz = 20 * HZ_PER_MHZ, + .max_speed_hz = 11 * HZ_PER_MHZ, .chip_select = 1, .mode = SPI_MODE_0, .swnode = &r, diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c index be3998104bfb..f7e8b5efa50e 100644 --- a/drivers/spi/spi-davinci.c +++ b/drivers/spi/spi-davinci.c @@ -984,6 +984,9 @@ static int davinci_spi_probe(struct platform_device *pdev) return ret; free_dma: + /* This bit needs to be cleared to disable dpsi->clk */ + clear_io_bits(dspi->base + SPIGCR1, SPIGCR1_POWERDOWN_MASK); + if (dspi->dma_rx) { dma_release_channel(dspi->dma_rx); dma_release_channel(dspi->dma_tx); @@ -1013,6 +1016,9 @@ static void davinci_spi_remove(struct platform_device *pdev) spi_bitbang_stop(&dspi->bitbang); + /* This bit needs to be cleared to disable dpsi->clk */ + clear_io_bits(dspi->base + SPIGCR1, SPIGCR1_POWERDOWN_MASK); + if (dspi->dma_rx) { dma_release_channel(dspi->dma_rx); dma_release_channel(dspi->dma_tx); diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index f4006c82f867..1439883326cf 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -660,18 +660,8 @@ static int mx51_ecspi_prepare_transfer(struct spi_imx_data *spi_imx, ctrl |= (spi_imx->target_burst * 8 - 1) << MX51_ECSPI_CTRL_BL_OFFSET; else { - if (spi_imx->usedma) { - ctrl |= (spi_imx->bits_per_word - 1) - << MX51_ECSPI_CTRL_BL_OFFSET; - } else { - if (spi_imx->count >= MX51_ECSPI_CTRL_MAX_BURST) - ctrl |= (MX51_ECSPI_CTRL_MAX_BURST * BITS_PER_BYTE - 1) - << MX51_ECSPI_CTRL_BL_OFFSET; - else - ctrl |= (spi_imx->count / DIV_ROUND_UP(spi_imx->bits_per_word, - BITS_PER_BYTE) * spi_imx->bits_per_word - 1) - << MX51_ECSPI_CTRL_BL_OFFSET; - } + ctrl |= (spi_imx->bits_per_word - 1) + << MX51_ECSPI_CTRL_BL_OFFSET; } /* set clock speed */ @@ -1060,7 +1050,7 @@ static struct spi_imx_devtype_data imx35_cspi_devtype_data = { .rx_available = mx31_rx_available, .reset = mx31_reset, .fifo_size = 8, - .has_dmamode = true, + .has_dmamode = false, .dynamic_burst = false, .has_targetmode = false, .devtype = IMX35_CSPI, diff --git a/drivers/spi/spi-mux.c b/drivers/spi/spi-mux.c index 5d72e3d59df8..c02c4204442f 100644 --- a/drivers/spi/spi-mux.c +++ b/drivers/spi/spi-mux.c @@ -158,12 +158,14 @@ static int spi_mux_probe(struct spi_device *spi) /* supported modes are the same as our parent's */ ctlr->mode_bits = spi->controller->mode_bits; ctlr->flags = spi->controller->flags; + ctlr->bits_per_word_mask = spi->controller->bits_per_word_mask; ctlr->transfer_one_message = spi_mux_transfer_one_message; ctlr->setup = spi_mux_setup; ctlr->num_chipselect = mux_control_states(priv->mux); ctlr->bus_num = -1; ctlr->dev.of_node = spi->dev.of_node; ctlr->must_async = true; + ctlr->defer_optimize_message = true; ret = devm_spi_register_controller(&spi->dev, ctlr); if (ret) diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c index 7e3083b83534..002f29dbcea6 100644 --- a/drivers/spi/spi-omap2-mcspi.c +++ b/drivers/spi/spi-omap2-mcspi.c @@ -1277,24 +1277,11 @@ static int omap2_mcspi_prepare_message(struct spi_controller *ctlr, /* * Check if this transfer contains only one word; - * OR contains 1 to 4 words, with bits_per_word == 8 and no delay between each word - * OR contains 1 to 2 words, with bits_per_word == 16 and no delay between each word - * - * If one of the two last case is true, this also change the bits_per_word of this - * transfer to make it a bit faster. - * It's not an issue to change the bits_per_word here even if the multi-mode is not - * applicable for this message, the signal on the wire will be the same. */ if (bits_per_word < 8 && tr->len == 1) { /* multi-mode is applicable, only one word (1..7 bits) */ - } else if (tr->word_delay.value == 0 && bits_per_word == 8 && tr->len <= 4) { - /* multi-mode is applicable, only one "bigger" word (8,16,24,32 bits) */ - tr->bits_per_word = tr->len * bits_per_word; - } else if (tr->word_delay.value == 0 && bits_per_word == 16 && tr->len <= 2) { - /* multi-mode is applicable, only one "bigger" word (16,32 bits) */ - tr->bits_per_word = tr->len * bits_per_word / 2; } else if (bits_per_word >= 8 && tr->len == bits_per_word / 8) { - /* multi-mode is applicable, only one word (9..15,17..32 bits) */ + /* multi-mode is applicable, only one word (8..32 bits) */ } else { /* multi-mode is not applicable: more than one word in the transfer */ mcspi->use_multi_mode = false; diff --git a/drivers/spi/spi-stm32-qspi.c b/drivers/spi/spi-stm32-qspi.c index f1e922fd362a..955c920c4b63 100644 --- a/drivers/spi/spi-stm32-qspi.c +++ b/drivers/spi/spi-stm32-qspi.c @@ -349,7 +349,7 @@ static int stm32_qspi_wait_poll_status(struct stm32_qspi *qspi) static int stm32_qspi_get_mode(u8 buswidth) { - if (buswidth == 4) + if (buswidth >= 4) return CCR_BUSWIDTH_4; return buswidth; @@ -653,9 +653,7 @@ static int stm32_qspi_setup(struct spi_device *spi) return -EINVAL; mode = spi->mode & (SPI_TX_OCTAL | SPI_RX_OCTAL); - if ((mode == SPI_TX_OCTAL || mode == SPI_RX_OCTAL) || - ((mode == (SPI_TX_OCTAL | SPI_RX_OCTAL)) && - gpiod_count(qspi->dev, "cs") == -ENOENT)) { + if (mode && gpiod_count(qspi->dev, "cs") == -ENOENT) { dev_err(qspi->dev, "spi-rx-bus-width\\/spi-tx-bus-width\\/cs-gpios\n"); dev_err(qspi->dev, "configuration not supported\n"); @@ -676,10 +674,10 @@ static int stm32_qspi_setup(struct spi_device *spi) qspi->cr_reg = CR_APMS | 3 << CR_FTHRES_SHIFT | CR_SSHIFT | CR_EN; /* - * Dual flash mode is only enable in case SPI_TX_OCTAL and SPI_TX_OCTAL - * are both set in spi->mode and "cs-gpios" properties is found in DT + * Dual flash mode is only enable in case SPI_TX_OCTAL or SPI_RX_OCTAL + * is set in spi->mode and "cs-gpios" properties is found in DT */ - if (mode == (SPI_TX_OCTAL | SPI_RX_OCTAL)) { + if (mode) { qspi->cr_reg |= CR_DFM; dev_dbg(qspi->dev, "Dual flash mode enable"); } diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 9bc9fd10d538..0f04e832f9ec 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -689,10 +689,12 @@ static int __spi_add_device(struct spi_device *spi) * Make sure that multiple logical CS doesn't map to the same physical CS. * For example, spi->chip_select[0] != spi->chip_select[1] and so on. */ - for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) { - status = spi_dev_check_cs(dev, spi, idx, spi, idx + 1); - if (status) - return status; + if (!spi_controller_is_target(ctlr)) { + for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) { + status = spi_dev_check_cs(dev, spi, idx, spi, idx + 1); + if (status) + return status; + } } /* Set the bus ID string */ @@ -2149,7 +2151,8 @@ static void __spi_unoptimize_message(struct spi_message *msg) */ static void spi_maybe_unoptimize_message(struct spi_message *msg) { - if (!msg->pre_optimized && msg->optimized) + if (!msg->pre_optimized && msg->optimized && + !msg->spi->controller->defer_optimize_message) __spi_unoptimize_message(msg); } @@ -4156,7 +4159,8 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message) return -EINVAL; if (xfer->tx_nbits != SPI_NBITS_SINGLE && xfer->tx_nbits != SPI_NBITS_DUAL && - xfer->tx_nbits != SPI_NBITS_QUAD) + xfer->tx_nbits != SPI_NBITS_QUAD && + xfer->tx_nbits != SPI_NBITS_OCTAL) return -EINVAL; if ((xfer->tx_nbits == SPI_NBITS_DUAL) && !(spi->mode & (SPI_TX_DUAL | SPI_TX_QUAD))) @@ -4171,7 +4175,8 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message) return -EINVAL; if (xfer->rx_nbits != SPI_NBITS_SINGLE && xfer->rx_nbits != SPI_NBITS_DUAL && - xfer->rx_nbits != SPI_NBITS_QUAD) + xfer->rx_nbits != SPI_NBITS_QUAD && + xfer->rx_nbits != SPI_NBITS_OCTAL) return -EINVAL; if ((xfer->rx_nbits == SPI_NBITS_DUAL) && !(spi->mode & (SPI_RX_DUAL | SPI_RX_QUAD))) @@ -4290,6 +4295,11 @@ static int __spi_optimize_message(struct spi_device *spi, static int spi_maybe_optimize_message(struct spi_device *spi, struct spi_message *msg) { + if (spi->controller->defer_optimize_message) { + msg->spi = spi; + return 0; + } + if (msg->pre_optimized) return 0; @@ -4320,6 +4330,13 @@ int spi_optimize_message(struct spi_device *spi, struct spi_message *msg) { int ret; + /* + * Pre-optimization is not supported and optimization is deferred e.g. + * when using spi-mux. + */ + if (spi->controller->defer_optimize_message) + return 0; + ret = __spi_optimize_message(spi, msg); if (ret) return ret; @@ -4346,6 +4363,9 @@ EXPORT_SYMBOL_GPL(spi_optimize_message); */ void spi_unoptimize_message(struct spi_message *msg) { + if (msg->spi->controller->defer_optimize_message) + return; + __spi_unoptimize_message(msg); msg->pre_optimized = false; } @@ -4428,8 +4448,6 @@ int spi_async(struct spi_device *spi, struct spi_message *message) spin_unlock_irqrestore(&ctlr->bus_lock_spinlock, flags); - spi_maybe_unoptimize_message(message); - return ret; } EXPORT_SYMBOL_GPL(spi_async); diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c index 69daeba974f2..5f518e5a9273 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c @@ -707,7 +707,7 @@ int vchiq_initialise(struct vchiq_state *state, struct vchiq_instance **instance * block forever. */ for (i = 0; i < VCHIQ_INIT_RETRIES; i++) { - if (state) + if (vchiq_remote_initialised(state)) break; usleep_range(500, 600); } @@ -1202,7 +1202,7 @@ void vchiq_dump_platform_instances(struct vchiq_state *state, struct seq_file *f { int i; - if (!state) + if (!vchiq_remote_initialised(state)) return; /* diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h index 8af209e34fb2..382ec08f6a14 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h @@ -413,6 +413,11 @@ struct vchiq_state { struct opaque_platform_state *platform_state; }; +static inline bool vchiq_remote_initialised(const struct vchiq_state *state) +{ + return state->remote && state->remote->initialised; +} + struct bulk_waiter { struct vchiq_bulk *bulk; struct completion event; diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.c index 1f74d0bb33ba..d5f7f61c5626 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.c @@ -138,7 +138,7 @@ void vchiq_debugfs_deinit(void) #else /* CONFIG_DEBUG_FS */ -void vchiq_debugfs_init(void) +void vchiq_debugfs_init(struct vchiq_state *state) { } diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_dev.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_dev.c index 3c63347d2d08..430f2ed2ccd3 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_dev.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_dev.c @@ -1170,6 +1170,11 @@ static int vchiq_open(struct inode *inode, struct file *file) dev_dbg(state->dev, "arm: vchiq open\n"); + if (!vchiq_remote_initialised(state)) { + dev_dbg(state->dev, "arm: vchiq has no connection to VideoCore\n"); + return -ENOTCONN; + } + instance = kzalloc(sizeof(*instance), GFP_KERNEL); if (!instance) return -ENOMEM; @@ -1200,7 +1205,7 @@ static int vchiq_release(struct inode *inode, struct file *file) dev_dbg(state->dev, "arm: instance=%p\n", instance); - if (!state) { + if (!vchiq_remote_initialised(state)) { ret = -EPERM; goto out; } diff --git a/drivers/tee/optee/ffa_abi.c b/drivers/tee/optee/ffa_abi.c index 3235e1c719e8..3e73efa51bba 100644 --- a/drivers/tee/optee/ffa_abi.c +++ b/drivers/tee/optee/ffa_abi.c @@ -660,7 +660,9 @@ static bool optee_ffa_api_is_compatbile(struct ffa_device *ffa_dev, const struct ffa_ops *ops) { const struct ffa_msg_ops *msg_ops = ops->msg_ops; - struct ffa_send_direct_data data = { OPTEE_FFA_GET_API_VERSION }; + struct ffa_send_direct_data data = { + .data0 = OPTEE_FFA_GET_API_VERSION, + }; int rc; msg_ops->mode_32bit_set(ffa_dev); @@ -677,7 +679,9 @@ static bool optee_ffa_api_is_compatbile(struct ffa_device *ffa_dev, return false; } - data = (struct ffa_send_direct_data){ OPTEE_FFA_GET_OS_VERSION }; + data = (struct ffa_send_direct_data){ + .data0 = OPTEE_FFA_GET_OS_VERSION, + }; rc = msg_ops->sync_send_receive(ffa_dev, &data); if (rc) { pr_err("Unexpected error %d\n", rc); @@ -698,7 +702,9 @@ static bool optee_ffa_exchange_caps(struct ffa_device *ffa_dev, unsigned int *rpc_param_count, unsigned int *max_notif_value) { - struct ffa_send_direct_data data = { OPTEE_FFA_EXCHANGE_CAPABILITIES }; + struct ffa_send_direct_data data = { + .data0 = OPTEE_FFA_EXCHANGE_CAPABILITIES, + }; int rc; rc = ops->msg_ops->sync_send_receive(ffa_dev, &data); diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c index 45f04a25255a..1b2345a697c5 100644 --- a/drivers/thermal/gov_power_allocator.c +++ b/drivers/thermal/gov_power_allocator.c @@ -759,6 +759,9 @@ static void power_allocator_manage(struct thermal_zone_device *tz) return; } + if (!params->trip_max) + return; + allocate_power(tz, params->trip_max->temperature); params->update_cdevs = true; } diff --git a/drivers/thermal/gov_step_wise.c b/drivers/thermal/gov_step_wise.c index 65974fe8be0d..fd5527188cf9 100644 --- a/drivers/thermal/gov_step_wise.c +++ b/drivers/thermal/gov_step_wise.c @@ -55,7 +55,11 @@ static unsigned long get_target_state(struct thermal_instance *instance, if (cur_state <= instance->lower) return THERMAL_NO_TARGET; - return clamp(cur_state - 1, instance->lower, instance->upper); + /* + * If 'throttle' is false, no mitigation is necessary, so + * request the lower state for this instance. + */ + return instance->lower; } return instance->target; @@ -93,23 +97,6 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, if (instance->initialized && old_target == instance->target) continue; - if (trip->type == THERMAL_TRIP_PASSIVE) { - /* - * If the target state for this thermal instance - * changes from THERMAL_NO_TARGET to something else, - * ensure that the zone temperature will be updated - * (assuming enabled passive cooling) until it becomes - * THERMAL_NO_TARGET again, or the cooling device may - * not be reset to its initial state. - */ - if (old_target == THERMAL_NO_TARGET && - instance->target != THERMAL_NO_TARGET) - tz->passive++; - else if (old_target != THERMAL_NO_TARGET && - instance->target == THERMAL_NO_TARGET) - tz->passive--; - } - instance->initialized = true; mutex_lock(&instance->cdev->lock); diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c index 14e34eabc419..4a1bfebb1b8e 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c @@ -150,7 +150,7 @@ static irqreturn_t proc_thermal_irq_handler(int irq, void *devid) { struct proc_thermal_pci *pci_info = devid; struct proc_thermal_device *proc_priv; - int ret = IRQ_HANDLED; + int ret = IRQ_NONE; u32 status; proc_priv = pci_info->proc_priv; @@ -175,6 +175,7 @@ static irqreturn_t proc_thermal_irq_handler(int irq, void *devid) /* Disable enable interrupt flag */ proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_INT_ENABLE_0, 0); pkg_thermal_schedule_work(&pci_info->work); + ret = IRQ_HANDLED; } pci_write_config_byte(pci_info->pdev, 0xdc, 0x01); diff --git a/drivers/thermal/mediatek/lvts_thermal.c b/drivers/thermal/mediatek/lvts_thermal.c index 0bb3a495b56e..819ed0110f3e 100644 --- a/drivers/thermal/mediatek/lvts_thermal.c +++ b/drivers/thermal/mediatek/lvts_thermal.c @@ -769,7 +769,11 @@ static int lvts_golden_temp_init(struct device *dev, u8 *calib, */ gt = (((u32 *)calib)[0] >> lvts_data->gt_calib_bit_offset) & 0xff; - if (gt && gt < LVTS_GOLDEN_TEMP_MAX) + /* A zero value for gt means that device has invalid efuse data */ + if (!gt) + return -ENODATA; + + if (gt < LVTS_GOLDEN_TEMP_MAX) golden_temp = gt; golden_temp_offset = golden_temp * 500 + lvts_data->temp_offset; @@ -1458,7 +1462,6 @@ static const struct lvts_ctrl_data mt8188_lvts_mcu_data_ctrl[] = { }, VALID_SENSOR_MAP(1, 1, 1, 1), .offset = 0x0, - .mode = LVTS_MSR_FILTERED_MODE, }, { .lvts_sensor = { @@ -1469,7 +1472,6 @@ static const struct lvts_ctrl_data mt8188_lvts_mcu_data_ctrl[] = { }, VALID_SENSOR_MAP(1, 1, 0, 0), .offset = 0x100, - .mode = LVTS_MSR_FILTERED_MODE, } }; @@ -1483,7 +1485,6 @@ static const struct lvts_ctrl_data mt8188_lvts_ap_data_ctrl[] = { }, VALID_SENSOR_MAP(0, 1, 0, 0), .offset = 0x0, - .mode = LVTS_MSR_FILTERED_MODE, }, { .lvts_sensor = { @@ -1496,7 +1497,6 @@ static const struct lvts_ctrl_data mt8188_lvts_ap_data_ctrl[] = { }, VALID_SENSOR_MAP(1, 1, 1, 0), .offset = 0x100, - .mode = LVTS_MSR_FILTERED_MODE, }, { .lvts_sensor = { @@ -1507,7 +1507,6 @@ static const struct lvts_ctrl_data mt8188_lvts_ap_data_ctrl[] = { }, VALID_SENSOR_MAP(1, 1, 0, 0), .offset = 0x200, - .mode = LVTS_MSR_FILTERED_MODE, }, { .lvts_sensor = { @@ -1518,7 +1517,6 @@ static const struct lvts_ctrl_data mt8188_lvts_ap_data_ctrl[] = { }, VALID_SENSOR_MAP(1, 1, 0, 0), .offset = 0x300, - .mode = LVTS_MSR_FILTERED_MODE, } }; diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index d70e76dd3c94..ecc748d15eb7 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -300,6 +300,8 @@ static void monitor_thermal_zone(struct thermal_zone_device *tz) thermal_zone_device_set_polling(tz, tz->passive_delay_jiffies); else if (tz->polling_delay_jiffies) thermal_zone_device_set_polling(tz, tz->polling_delay_jiffies); + else if (tz->temperature == THERMAL_TEMP_INVALID) + thermal_zone_device_set_polling(tz, msecs_to_jiffies(THERMAL_RECHECK_DELAY_MS)); } static struct thermal_governor *thermal_get_tz_governor(struct thermal_zone_device *tz) @@ -482,16 +484,14 @@ static void thermal_trip_crossed(struct thermal_zone_device *tz, thermal_governor_trip_crossed(governor, tz, trip, crossed_up); } -static int thermal_trip_notify_cmp(void *ascending, const struct list_head *a, +static int thermal_trip_notify_cmp(void *not_used, const struct list_head *a, const struct list_head *b) { struct thermal_trip_desc *tda = container_of(a, struct thermal_trip_desc, notify_list_node); struct thermal_trip_desc *tdb = container_of(b, struct thermal_trip_desc, notify_list_node); - int ret = tdb->notify_temp - tda->notify_temp; - - return ascending ? ret : -ret; + return tda->notify_temp - tdb->notify_temp; } void __thermal_zone_device_update(struct thermal_zone_device *tz, @@ -511,7 +511,7 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz, update_temperature(tz); if (tz->temperature == THERMAL_TEMP_INVALID) - return; + goto monitor; __thermal_zone_set_trips(tz); @@ -520,12 +520,12 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz, for_each_trip_desc(tz, td) handle_thermal_trip(tz, td, &way_up_list, &way_down_list); - list_sort(&way_up_list, &way_up_list, thermal_trip_notify_cmp); + list_sort(NULL, &way_up_list, thermal_trip_notify_cmp); list_for_each_entry(td, &way_up_list, notify_list_node) thermal_trip_crossed(tz, &td->trip, governor, true); list_sort(NULL, &way_down_list, thermal_trip_notify_cmp); - list_for_each_entry(td, &way_down_list, notify_list_node) + list_for_each_entry_reverse(td, &way_down_list, notify_list_node) thermal_trip_crossed(tz, &td->trip, governor, false); if (governor->manage) @@ -533,6 +533,7 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz, thermal_debug_update_trip_stats(tz); +monitor: monitor_thermal_zone(tz); } @@ -1406,6 +1407,7 @@ thermal_zone_device_register_with_trips(const char *type, ida_init(&tz->ida); mutex_init(&tz->lock); init_completion(&tz->removal); + init_completion(&tz->resume); id = ida_alloc(&thermal_tz_ida, GFP_KERNEL); if (id < 0) { result = id; @@ -1651,6 +1653,9 @@ static void thermal_zone_device_resume(struct work_struct *work) thermal_zone_device_init(tz); __thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); + complete(&tz->resume); + tz->resuming = false; + mutex_unlock(&tz->lock); } @@ -1668,6 +1673,20 @@ static int thermal_pm_notify(struct notifier_block *nb, list_for_each_entry(tz, &thermal_tz_list, node) { mutex_lock(&tz->lock); + if (tz->resuming) { + /* + * thermal_zone_device_resume() queued up for + * this zone has not acquired the lock yet, so + * release it to let the function run and wait + * util it has done the work. + */ + mutex_unlock(&tz->lock); + + wait_for_completion(&tz->resume); + + mutex_lock(&tz->lock); + } + tz->suspended = true; mutex_unlock(&tz->lock); @@ -1685,6 +1704,9 @@ static int thermal_pm_notify(struct notifier_block *nb, cancel_delayed_work(&tz->poll_queue); + reinit_completion(&tz->resume); + tz->resuming = true; + /* * Replace the work function with the resume one, which * will restore the original work function and schedule @@ -1709,6 +1731,12 @@ static int thermal_pm_notify(struct notifier_block *nb, static struct notifier_block thermal_pm_nb = { .notifier_call = thermal_pm_notify, + /* + * Run at the lowest priority to avoid interference between the thermal + * zone resume work items spawned by thermal_pm_notify() and the other + * PM notifiers. + */ + .priority = INT_MIN, }; static int __init thermal_init(void) diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index 20e7b45673d6..94eeb4011a48 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -55,6 +55,7 @@ struct thermal_governor { * @type: the thermal zone device type * @device: &struct device for this thermal zone * @removal: removal completion + * @resume: resume completion * @trip_temp_attrs: attributes for trip points for sysfs: trip temperature * @trip_type_attrs: attributes for trip points for sysfs: trip type * @trip_hyst_attrs: attributes for trip points for sysfs: trip hysteresis @@ -89,6 +90,7 @@ struct thermal_governor { * @poll_queue: delayed work for polling * @notify_event: Last notification event * @suspended: thermal zone suspend indicator + * @resuming: indicates whether or not thermal zone resume is in progress * @trips: array of struct thermal_trip objects */ struct thermal_zone_device { @@ -96,6 +98,7 @@ struct thermal_zone_device { char type[THERMAL_NAME_LENGTH]; struct device device; struct completion removal; + struct completion resume; struct attribute_group trips_attribute_group; struct thermal_attr *trip_temp_attrs; struct thermal_attr *trip_type_attrs; @@ -123,12 +126,19 @@ struct thermal_zone_device { struct delayed_work poll_queue; enum thermal_notify_event notify_event; bool suspended; + bool resuming; #ifdef CONFIG_THERMAL_DEBUGFS struct thermal_debugfs *debugfs; #endif struct thermal_trip_desc trips[] __counted_by(num_trips); }; +/* + * Default delay after a failing thermal zone temperature check before + * attempting to check it again. + */ +#define THERMAL_RECHECK_DELAY_MS 250 + /* Default Thermal Governor */ #if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE) #define DEFAULT_THERMAL_GOVERNOR "step_wise" diff --git a/drivers/tty/mxser.c b/drivers/tty/mxser.c index 458bb1280ebf..5b97e420a95f 100644 --- a/drivers/tty/mxser.c +++ b/drivers/tty/mxser.c @@ -288,7 +288,7 @@ struct mxser_board { enum mxser_must_hwid must_hwid; speed_t max_baud; - struct mxser_port ports[] __counted_by(nports); + struct mxser_port ports[] /* __counted_by(nports) */; }; static DECLARE_BITMAP(mxser_boards, MXSER_BOARDS); diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index ff15022369e4..b0adafc44747 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -15,7 +15,6 @@ */ #include <linux/acpi.h> -#include <linux/cleanup.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/ioport.h> @@ -42,8 +41,6 @@ #include <asm/irq.h> -#include "../serial_base.h" /* For serial_base_add_isa_preferred_console() */ - #include "8250.h" /* @@ -563,8 +560,6 @@ static void __init serial8250_isa_init_ports(void) port->irqflags |= irqflag; if (serial8250_isa_config != NULL) serial8250_isa_config(i, &up->port, &up->capabilities); - - serial_base_add_isa_preferred_console(serial8250_reg.dev_name, i); } } diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 170639d12b2a..1af9aed99c65 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -115,6 +115,10 @@ /* RX FIFO occupancy indicator */ #define UART_OMAP_RX_LVL 0x19 +/* Timeout low and High */ +#define UART_OMAP_TO_L 0x26 +#define UART_OMAP_TO_H 0x27 + /* * Copy of the genpd flags for the console. * Only used if console suspend is disabled @@ -663,13 +667,25 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id) /* * On K3 SoCs, it is observed that RX TIMEOUT is signalled after - * FIFO has been drained, in which case a dummy read of RX FIFO - * is required to clear RX TIMEOUT condition. + * FIFO has been drained or erroneously. + * So apply solution of Errata i2310 as mentioned in + * https://www.ti.com/lit/pdf/sprz536 */ if (priv->habit & UART_RX_TIMEOUT_QUIRK && (iir & UART_IIR_RX_TIMEOUT) == UART_IIR_RX_TIMEOUT && serial_port_in(port, UART_OMAP_RX_LVL) == 0) { - serial_port_in(port, UART_RX); + unsigned char efr2, timeout_h, timeout_l; + + efr2 = serial_in(up, UART_OMAP_EFR2); + timeout_h = serial_in(up, UART_OMAP_TO_H); + timeout_l = serial_in(up, UART_OMAP_TO_L); + serial_out(up, UART_OMAP_TO_H, 0xFF); + serial_out(up, UART_OMAP_TO_L, 0xFF); + serial_out(up, UART_OMAP_EFR2, UART_OMAP_EFR2_TIMEOUT_BEHAVE); + serial_in(up, UART_IIR); + serial_out(up, UART_OMAP_EFR2, efr2); + serial_out(up, UART_OMAP_TO_H, timeout_h); + serial_out(up, UART_OMAP_TO_L, timeout_l); } /* Stop processing interrupts on input overrun */ diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 40af74b55933..e1d7aa2fa347 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -1985,6 +1985,17 @@ enum { MOXA_SUPP_RS485 = BIT(2), }; +static unsigned short moxa_get_nports(unsigned short device) +{ + switch (device) { + case PCI_DEVICE_ID_MOXA_CP116E_A_A: + case PCI_DEVICE_ID_MOXA_CP116E_A_B: + return 8; + } + + return FIELD_GET(0x00F0, device); +} + static bool pci_moxa_is_mini_pcie(unsigned short device) { if (device == PCI_DEVICE_ID_MOXA_CP102N || @@ -2038,7 +2049,7 @@ static int pci_moxa_init(struct pci_dev *dev) { unsigned short device = dev->device; resource_size_t iobar_addr = pci_resource_start(dev, 2); - unsigned int num_ports = (device & 0x00F0) >> 4, i; + unsigned int i, num_ports = moxa_get_nports(device); u8 val, init_mode = MOXA_RS232; if (!(pci_moxa_supported_rs(dev) & MOXA_SUPP_RS232)) { diff --git a/drivers/tty/serial/bcm63xx_uart.c b/drivers/tty/serial/bcm63xx_uart.c index 34801a6f300b..b88cc28c94e3 100644 --- a/drivers/tty/serial/bcm63xx_uart.c +++ b/drivers/tty/serial/bcm63xx_uart.c @@ -308,8 +308,8 @@ static void bcm_uart_do_tx(struct uart_port *port) val = bcm_uart_readl(port, UART_MCTL_REG); val = (val & UART_MCTL_TXFIFOFILL_MASK) >> UART_MCTL_TXFIFOFILL_SHIFT; - - pending = uart_port_tx_limited(port, ch, port->fifosize - val, + pending = uart_port_tx_limited_flags(port, ch, UART_TX_NOSTOP, + port->fifosize - val, true, bcm_uart_writel(port, ch, UART_FIFO_REG), ({})); @@ -320,6 +320,9 @@ static void bcm_uart_do_tx(struct uart_port *port) val = bcm_uart_readl(port, UART_IR_REG); val &= ~UART_TX_INT_MASK; bcm_uart_writel(port, val, UART_IR_REG); + + if (uart_tx_stopped(port)) + bcm_uart_stop_tx(port); } /* diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 2eb22594960f..ff32cd2d2863 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -120,6 +120,7 @@ #define UCR4_OREN (1<<1) /* Receiver overrun interrupt enable */ #define UCR4_DREN (1<<0) /* Recv data ready interrupt enable */ #define UFCR_RXTL_SHF 0 /* Receiver trigger level shift */ +#define UFCR_RXTL_MASK 0x3F /* Receiver trigger 6 bits wide */ #define UFCR_DCEDTE (1<<6) /* DCE/DTE mode select */ #define UFCR_RFDIV (7<<7) /* Reference freq divider mask */ #define UFCR_RFDIV_REG(x) (((x) < 7 ? 6 - (x) : 6) << 7) @@ -1551,6 +1552,7 @@ static void imx_uart_shutdown(struct uart_port *port) struct imx_port *sport = (struct imx_port *)port; unsigned long flags; u32 ucr1, ucr2, ucr4, uts; + int loops; if (sport->dma_is_enabled) { dmaengine_terminate_sync(sport->dma_chan_tx); @@ -1613,6 +1615,56 @@ static void imx_uart_shutdown(struct uart_port *port) ucr4 &= ~UCR4_TCEN; imx_uart_writel(sport, ucr4, UCR4); + /* + * We have to ensure the tx state machine ends up in OFF. This + * is especially important for rs485 where we must not leave + * the RTS signal high, blocking the bus indefinitely. + * + * All interrupts are now disabled, so imx_uart_stop_tx() will + * no longer be called from imx_uart_transmit_buffer(). It may + * still be called via the hrtimers, and if those are in play, + * we have to honour the delays. + */ + if (sport->tx_state == WAIT_AFTER_RTS || sport->tx_state == SEND) + imx_uart_stop_tx(port); + + /* + * In many cases (rs232 mode, or if tx_state was + * WAIT_AFTER_RTS, or if tx_state was SEND and there is no + * delay_rts_after_send), this will have moved directly to + * OFF. In rs485 mode, tx_state might already have been + * WAIT_AFTER_SEND and the hrtimer thus already started, or + * the above imx_uart_stop_tx() call could have started it. In + * those cases, we have to wait for the hrtimer to fire and + * complete the transition to OFF. + */ + loops = port->rs485.flags & SER_RS485_ENABLED ? + port->rs485.delay_rts_after_send : 0; + while (sport->tx_state != OFF && loops--) { + uart_port_unlock_irqrestore(&sport->port, flags); + msleep(1); + uart_port_lock_irqsave(&sport->port, &flags); + } + + if (sport->tx_state != OFF) { + dev_warn(sport->port.dev, "unexpected tx_state %d\n", + sport->tx_state); + /* + * This machine may be busted, but ensure the RTS + * signal is inactive in order not to block other + * devices. + */ + if (port->rs485.flags & SER_RS485_ENABLED) { + ucr2 = imx_uart_readl(sport, UCR2); + if (port->rs485.flags & SER_RS485_RTS_AFTER_SEND) + imx_uart_rts_active(sport, &ucr2); + else + imx_uart_rts_inactive(sport, &ucr2); + imx_uart_writel(sport, ucr2, UCR2); + } + sport->tx_state = OFF; + } + uart_port_unlock_irqrestore(&sport->port, flags); clk_disable_unprepare(sport->clk_per); @@ -1933,7 +1985,7 @@ static int imx_uart_rs485_config(struct uart_port *port, struct ktermios *termio struct serial_rs485 *rs485conf) { struct imx_port *sport = (struct imx_port *)port; - u32 ucr2; + u32 ucr2, ufcr; if (rs485conf->flags & SER_RS485_ENABLED) { /* Enable receiver if low-active RTS signal is requested */ @@ -1952,8 +2004,13 @@ static int imx_uart_rs485_config(struct uart_port *port, struct ktermios *termio /* Make sure Rx is enabled in case Tx is active with Rx disabled */ if (!(rs485conf->flags & SER_RS485_ENABLED) || - rs485conf->flags & SER_RS485_RX_DURING_TX) + rs485conf->flags & SER_RS485_RX_DURING_TX) { + /* If the receiver trigger is 0, set it to a default value */ + ufcr = imx_uart_readl(sport, UFCR); + if ((ufcr & UFCR_RXTL_MASK) == 0) + imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT); imx_uart_start_rx(port); + } return 0; } diff --git a/drivers/tty/serial/ma35d1_serial.c b/drivers/tty/serial/ma35d1_serial.c index 19f0a305cc43..3b4206e815fe 100644 --- a/drivers/tty/serial/ma35d1_serial.c +++ b/drivers/tty/serial/ma35d1_serial.c @@ -688,12 +688,13 @@ static int ma35d1serial_probe(struct platform_device *pdev) struct uart_ma35d1_port *up; int ret = 0; - if (pdev->dev.of_node) { - ret = of_alias_get_id(pdev->dev.of_node, "serial"); - if (ret < 0) { - dev_err(&pdev->dev, "failed to get alias/pdev id, errno %d\n", ret); - return ret; - } + if (!pdev->dev.of_node) + return -ENODEV; + + ret = of_alias_get_id(pdev->dev.of_node, "serial"); + if (ret < 0) { + dev_err(&pdev->dev, "failed to get alias/pdev id, errno %d\n", ret); + return ret; } up = &ma35d1serial_ports[ret]; up->port.line = ret; diff --git a/drivers/tty/serial/mcf.c b/drivers/tty/serial/mcf.c index b0604d6da025..58858dd352c5 100644 --- a/drivers/tty/serial/mcf.c +++ b/drivers/tty/serial/mcf.c @@ -462,7 +462,7 @@ static const struct uart_ops mcf_uart_ops = { .verify_port = mcf_verify_port, }; -static struct mcf_uart mcf_ports[4]; +static struct mcf_uart mcf_ports[10]; #define MCF_MAXPORTS ARRAY_SIZE(mcf_ports) diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index 2bd25afe0d92..69a632fefc41 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -649,15 +649,25 @@ static void qcom_geni_serial_start_tx_dma(struct uart_port *uport) static void qcom_geni_serial_start_tx_fifo(struct uart_port *uport) { + unsigned char c; u32 irq_en; - if (qcom_geni_serial_main_active(uport) || - !qcom_geni_serial_tx_empty(uport)) - return; + /* + * Start a new transfer in case the previous command was cancelled and + * left data in the FIFO which may prevent the watermark interrupt + * from triggering. Note that the stale data is discarded. + */ + if (!qcom_geni_serial_main_active(uport) && + !qcom_geni_serial_tx_empty(uport)) { + if (uart_fifo_out(uport, &c, 1) == 1) { + writel(M_CMD_DONE_EN, uport->membase + SE_GENI_M_IRQ_CLEAR); + qcom_geni_serial_setup_tx(uport, 1); + writel(c, uport->membase + SE_GENI_TX_FIFOn); + } + } irq_en = readl(uport->membase + SE_GENI_M_IRQ_EN); irq_en |= M_TX_FIFO_WATERMARK_EN | M_CMD_DONE_EN; - writel(DEF_TX_WM, uport->membase + SE_GENI_TX_WATERMARK_REG); writel(irq_en, uport->membase + SE_GENI_M_IRQ_EN); } @@ -665,13 +675,17 @@ static void qcom_geni_serial_start_tx_fifo(struct uart_port *uport) static void qcom_geni_serial_stop_tx_fifo(struct uart_port *uport) { u32 irq_en; - struct qcom_geni_serial_port *port = to_dev_port(uport); irq_en = readl(uport->membase + SE_GENI_M_IRQ_EN); irq_en &= ~(M_CMD_DONE_EN | M_TX_FIFO_WATERMARK_EN); writel(0, uport->membase + SE_GENI_TX_WATERMARK_REG); writel(irq_en, uport->membase + SE_GENI_M_IRQ_EN); - /* Possible stop tx is called multiple times. */ +} + +static void qcom_geni_serial_cancel_tx_cmd(struct uart_port *uport) +{ + struct qcom_geni_serial_port *port = to_dev_port(uport); + if (!qcom_geni_serial_main_active(uport)) return; @@ -684,6 +698,8 @@ static void qcom_geni_serial_stop_tx_fifo(struct uart_port *uport) writel(M_CMD_ABORT_EN, uport->membase + SE_GENI_M_IRQ_CLEAR); } writel(M_CMD_CANCEL_EN, uport->membase + SE_GENI_M_IRQ_CLEAR); + + port->tx_remaining = 0; } static void qcom_geni_serial_handle_rx_fifo(struct uart_port *uport, bool drop) @@ -862,7 +878,7 @@ static void qcom_geni_serial_send_chunk_fifo(struct uart_port *uport, memset(buf, 0, sizeof(buf)); tx_bytes = min(remaining, BYTES_PER_FIFO_WORD); - tx_bytes = uart_fifo_out(uport, buf, tx_bytes); + uart_fifo_out(uport, buf, tx_bytes); iowrite32_rep(uport->membase + SE_GENI_TX_FIFOn, buf, 1); @@ -890,13 +906,17 @@ static void qcom_geni_serial_handle_tx_fifo(struct uart_port *uport, else pending = kfifo_len(&tport->xmit_fifo); - /* All data has been transmitted and acknowledged as received */ - if (!pending && !status && done) { + /* All data has been transmitted or command has been cancelled */ + if (!pending && done) { qcom_geni_serial_stop_tx_fifo(uport); goto out_write_wakeup; } - avail = port->tx_fifo_depth - (status & TX_FIFO_WC); + if (active) + avail = port->tx_fifo_depth - (status & TX_FIFO_WC); + else + avail = port->tx_fifo_depth; + avail *= BYTES_PER_FIFO_WORD; chunk = min(avail, pending); @@ -1069,11 +1089,15 @@ static void qcom_geni_serial_shutdown(struct uart_port *uport) { disable_irq(uport->irq); - if (uart_console(uport)) - return; - qcom_geni_serial_stop_tx(uport); qcom_geni_serial_stop_rx(uport); + + qcom_geni_serial_cancel_tx_cmd(uport); +} + +static void qcom_geni_serial_flush_buffer(struct uart_port *uport) +{ + qcom_geni_serial_cancel_tx_cmd(uport); } static int qcom_geni_serial_port_setup(struct uart_port *uport) @@ -1532,6 +1556,7 @@ static const struct uart_ops qcom_geni_console_pops = { .request_port = qcom_geni_serial_request_port, .config_port = qcom_geni_serial_config_port, .shutdown = qcom_geni_serial_shutdown, + .flush_buffer = qcom_geni_serial_flush_buffer, .type = qcom_geni_serial_get_type, .set_mctrl = qcom_geni_serial_set_mctrl, .get_mctrl = qcom_geni_serial_get_mctrl, diff --git a/drivers/tty/serial/serial_base.h b/drivers/tty/serial/serial_base.h index 743a72ac34f3..b6c38d2edfd4 100644 --- a/drivers/tty/serial/serial_base.h +++ b/drivers/tty/serial/serial_base.h @@ -49,33 +49,3 @@ void serial_ctrl_unregister_port(struct uart_driver *drv, struct uart_port *port int serial_core_register_port(struct uart_driver *drv, struct uart_port *port); void serial_core_unregister_port(struct uart_driver *drv, struct uart_port *port); - -#ifdef CONFIG_SERIAL_CORE_CONSOLE - -int serial_base_add_preferred_console(struct uart_driver *drv, - struct uart_port *port); - -#else - -static inline -int serial_base_add_preferred_console(struct uart_driver *drv, - struct uart_port *port) -{ - return 0; -} - -#endif - -#ifdef CONFIG_SERIAL_8250_CONSOLE - -int serial_base_add_isa_preferred_console(const char *name, int idx); - -#else - -static inline -int serial_base_add_isa_preferred_console(const char *name, int idx) -{ - return 0; -} - -#endif diff --git a/drivers/tty/serial/serial_base_bus.c b/drivers/tty/serial/serial_base_bus.c index 73c6ee540c83..4df2a4b10445 100644 --- a/drivers/tty/serial/serial_base_bus.c +++ b/drivers/tty/serial/serial_base_bus.c @@ -8,7 +8,6 @@ * The serial core bus manages the serial core controller instances. */ -#include <linux/cleanup.h> #include <linux/container_of.h> #include <linux/device.h> #include <linux/idr.h> @@ -205,134 +204,6 @@ void serial_base_port_device_remove(struct serial_port_device *port_dev) put_device(&port_dev->dev); } -#ifdef CONFIG_SERIAL_CORE_CONSOLE - -static int serial_base_add_one_prefcon(const char *match, const char *dev_name, - int port_id) -{ - int ret; - - ret = add_preferred_console_match(match, dev_name, port_id); - if (ret == -ENOENT) - return 0; - - return ret; -} - -#ifdef __sparc__ - -/* Handle Sparc ttya and ttyb options as done in console_setup() */ -static int serial_base_add_sparc_console(const char *dev_name, int idx) -{ - const char *name; - - switch (idx) { - case 0: - name = "ttya"; - break; - case 1: - name = "ttyb"; - break; - default: - return 0; - } - - return serial_base_add_one_prefcon(name, dev_name, idx); -} - -#else - -static inline int serial_base_add_sparc_console(const char *dev_name, int idx) -{ - return 0; -} - -#endif - -static int serial_base_add_prefcon(const char *name, int idx) -{ - const char *char_match __free(kfree) = NULL; - const char *nmbr_match __free(kfree) = NULL; - int ret; - - /* Handle ttyS specific options */ - if (strstarts(name, "ttyS")) { - /* No name, just a number */ - nmbr_match = kasprintf(GFP_KERNEL, "%i", idx); - if (!nmbr_match) - return -ENODEV; - - ret = serial_base_add_one_prefcon(nmbr_match, name, idx); - if (ret) - return ret; - - /* Sparc ttya and ttyb */ - ret = serial_base_add_sparc_console(name, idx); - if (ret) - return ret; - } - - /* Handle the traditional character device name style console=ttyS0 */ - char_match = kasprintf(GFP_KERNEL, "%s%i", name, idx); - if (!char_match) - return -ENOMEM; - - return serial_base_add_one_prefcon(char_match, name, idx); -} - -/** - * serial_base_add_preferred_console - Adds a preferred console - * @drv: Serial port device driver - * @port: Serial port instance - * - * Tries to add a preferred console for a serial port if specified in the - * kernel command line. Supports both the traditional character device such - * as console=ttyS0, and a hardware addressing based console=DEVNAME:0.0 - * style name. - * - * Translates the kernel command line option using a hardware based addressing - * console=DEVNAME:0.0 to the serial port character device such as ttyS0. - * Cannot be called early for ISA ports, depends on struct device. - * - * Note that duplicates are ignored by add_preferred_console(). - * - * Return: 0 on success, negative error code on failure. - */ -int serial_base_add_preferred_console(struct uart_driver *drv, - struct uart_port *port) -{ - const char *port_match __free(kfree) = NULL; - int ret; - - ret = serial_base_add_prefcon(drv->dev_name, port->line); - if (ret) - return ret; - - port_match = kasprintf(GFP_KERNEL, "%s:%i.%i", dev_name(port->dev), - port->ctrl_id, port->port_id); - if (!port_match) - return -ENOMEM; - - /* Translate a hardware addressing style console=DEVNAME:0.0 */ - return serial_base_add_one_prefcon(port_match, drv->dev_name, port->line); -} - -#endif - -#ifdef CONFIG_SERIAL_8250_CONSOLE - -/* - * Early ISA ports initialize the console before there is no struct device. - * This should be only called from serial8250_isa_init_preferred_console(), - * other callers are likely wrong and should rely on earlycon instead. - */ -int serial_base_add_isa_preferred_console(const char *name, int idx) -{ - return serial_base_add_prefcon(name, idx); -} - -#endif - static int serial_base_init(void) { int ret; diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 0c4d60976663..2a8006e3d687 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -3422,10 +3422,6 @@ int serial_core_register_port(struct uart_driver *drv, struct uart_port *port) if (ret) goto err_unregister_ctrl_dev; - ret = serial_base_add_preferred_console(drv, port); - if (ret) - goto err_unregister_port_dev; - ret = serial_core_add_one_port(drv, port); if (ret) goto err_unregister_port_dev; diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c index 8944548c30fa..c532416aec22 100644 --- a/drivers/ufs/core/ufs-mcq.c +++ b/drivers/ufs/core/ufs-mcq.c @@ -105,16 +105,15 @@ EXPORT_SYMBOL_GPL(ufshcd_mcq_config_mac); * @hba: per adapter instance * @req: pointer to the request to be issued * - * Return: the hardware queue instance on which the request would - * be queued. + * Return: the hardware queue instance on which the request will be or has + * been queued. %NULL if the request has already been freed. */ struct ufs_hw_queue *ufshcd_mcq_req_to_hwq(struct ufs_hba *hba, struct request *req) { - u32 utag = blk_mq_unique_tag(req); - u32 hwq = blk_mq_unique_tag_to_hwq(utag); + struct blk_mq_hw_ctx *hctx = READ_ONCE(req->mq_hctx); - return &hba->uhq[hwq]; + return hctx ? &hba->uhq[hctx->queue_num] : NULL; } /** @@ -515,6 +514,8 @@ int ufshcd_mcq_sq_cleanup(struct ufs_hba *hba, int task_tag) if (!cmd) return -EINVAL; hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd)); + if (!hwq) + return 0; } else { hwq = hba->dev_cmd_queue; } diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index e5e9da61f15d..46433ecf0c4d 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6456,6 +6456,8 @@ static bool ufshcd_abort_one(struct request *rq, void *priv) /* Release cmd in MCQ mode if abort succeeds */ if (is_mcq_enabled(hba) && (*ret == 0)) { hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(lrbp->cmd)); + if (!hwq) + return 0; spin_lock_irqsave(&hwq->cq_lock, flags); if (ufshcd_cmd_inflight(lrbp->cmd)) ufshcd_release_scsi_cmd(hba, lrbp); @@ -8787,6 +8789,7 @@ static int ufshcd_probe_hba(struct ufs_hba *hba, bool init_dev_params) (hba->quirks & UFSHCD_QUIRK_REINIT_AFTER_MAX_GEAR_SWITCH)) { /* Reset the device and controller before doing reinit */ ufshcd_device_reset(hba); + ufs_put_device_desc(hba); ufshcd_hba_stop(hba); ufshcd_vops_reinit_notify(hba); ret = ufshcd_hba_enable(hba); diff --git a/drivers/usb/atm/cxacru.c b/drivers/usb/atm/cxacru.c index 4ce7cba2b48a..8f3b9a0a38e1 100644 --- a/drivers/usb/atm/cxacru.c +++ b/drivers/usb/atm/cxacru.c @@ -1131,6 +1131,7 @@ static int cxacru_bind(struct usbatm_data *usbatm_instance, struct cxacru_data *instance; struct usb_device *usb_dev = interface_to_usbdev(intf); struct usb_host_endpoint *cmd_ep = usb_dev->ep_in[CXACRU_EP_CMD]; + struct usb_endpoint_descriptor *in, *out; int ret; /* instance init */ @@ -1177,6 +1178,19 @@ static int cxacru_bind(struct usbatm_data *usbatm_instance, goto fail; } + if (usb_endpoint_xfer_int(&cmd_ep->desc)) + ret = usb_find_common_endpoints(intf->cur_altsetting, + NULL, NULL, &in, &out); + else + ret = usb_find_common_endpoints(intf->cur_altsetting, + &in, &out, NULL, NULL); + + if (ret) { + usb_err(usbatm_instance, "cxacru_bind: interface has incorrect endpoints\n"); + ret = -ENODEV; + goto fail; + } + if ((cmd_ep->desc.bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == USB_ENDPOINT_XFER_INT) { usb_fill_int_urb(instance->rcv_urb, diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 3362af165ef5..880d52c0949d 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -291,6 +291,20 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, if (ifp->desc.bNumEndpoints >= num_ep) goto skip_to_next_endpoint_or_interface_descriptor; + /* Save a copy of the descriptor and use it instead of the original */ + endpoint = &ifp->endpoint[ifp->desc.bNumEndpoints]; + memcpy(&endpoint->desc, d, n); + d = &endpoint->desc; + + /* Clear the reserved bits in bEndpointAddress */ + i = d->bEndpointAddress & + (USB_ENDPOINT_DIR_MASK | USB_ENDPOINT_NUMBER_MASK); + if (i != d->bEndpointAddress) { + dev_notice(ddev, "config %d interface %d altsetting %d has an endpoint descriptor with address 0x%X, changing to 0x%X\n", + cfgno, inum, asnum, d->bEndpointAddress, i); + endpoint->desc.bEndpointAddress = i; + } + /* Check for duplicate endpoint addresses */ if (config_endpoint_is_duplicate(config, inum, asnum, d)) { dev_notice(ddev, "config %d interface %d altsetting %d has a duplicate endpoint with address 0x%X, skipping\n", @@ -308,10 +322,8 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, } } - endpoint = &ifp->endpoint[ifp->desc.bNumEndpoints]; + /* Accept this endpoint */ ++ifp->desc.bNumEndpoints; - - memcpy(&endpoint->desc, d, n); INIT_LIST_HEAD(&endpoint->urb_list); /* diff --git a/drivers/usb/core/of.c b/drivers/usb/core/of.c index f1a499ee482c..763e4122ed5b 100644 --- a/drivers/usb/core/of.c +++ b/drivers/usb/core/of.c @@ -84,9 +84,12 @@ static bool usb_of_has_devices_or_graph(const struct usb_device *hub) if (of_graph_is_present(np)) return true; - for_each_child_of_node(np, child) - if (of_property_present(child, "reg")) + for_each_child_of_node(np, child) { + if (of_property_present(child, "reg")) { + of_node_put(child); return true; + } + } return false; } diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index b4783574b8e6..13171454f959 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -506,6 +506,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1b1c, 0x1b38), .driver_info = USB_QUIRK_DELAY_INIT | USB_QUIRK_DELAY_CTRL_MSG }, + /* START BP-850k Printer */ + { USB_DEVICE(0x1bc3, 0x0003), .driver_info = USB_QUIRK_NO_SET_INTF }, + /* MIDI keyboard WORLDE MINI */ { USB_DEVICE(0x1c75, 0x0204), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 7ee61a89520b..cb82557678dd 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -957,12 +957,16 @@ static bool dwc3_core_is_valid(struct dwc3 *dwc) static void dwc3_core_setup_global_control(struct dwc3 *dwc) { + unsigned int power_opt; + unsigned int hw_mode; u32 reg; reg = dwc3_readl(dwc->regs, DWC3_GCTL); reg &= ~DWC3_GCTL_SCALEDOWN_MASK; + hw_mode = DWC3_GHWPARAMS0_MODE(dwc->hwparams.hwparams0); + power_opt = DWC3_GHWPARAMS1_EN_PWROPT(dwc->hwparams.hwparams1); - switch (DWC3_GHWPARAMS1_EN_PWROPT(dwc->hwparams.hwparams1)) { + switch (power_opt) { case DWC3_GHWPARAMS1_EN_PWROPT_CLK: /** * WORKAROUND: DWC3 revisions between 2.10a and 2.50a have an @@ -995,6 +999,20 @@ static void dwc3_core_setup_global_control(struct dwc3 *dwc) break; } + /* + * This is a workaround for STAR#4846132, which only affects + * DWC_usb31 version2.00a operating in host mode. + * + * There is a problem in DWC_usb31 version 2.00a operating + * in host mode that would cause a CSR read timeout When CSR + * read coincides with RAM Clock Gating Entry. By disable + * Clock Gating, sacrificing power consumption for normal + * operation. + */ + if (power_opt != DWC3_GHWPARAMS1_EN_PWROPT_NO && + hw_mode != DWC3_GHWPARAMS0_MODE_GADGET && DWC3_VER_IS(DWC31, 200A)) + reg |= DWC3_GCTL_DSBLCLKGTNG; + /* check if current dwc3 is on simulation board */ if (dwc->hwparams.hwparams6 & DWC3_GHWPARAMS6_EN_FPGA) { dev_info(dwc->dev, "Running with FPGA optimizations\n"); @@ -2250,7 +2268,6 @@ assert_reset: static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg) { - unsigned long flags; u32 reg; int i; @@ -2293,9 +2310,7 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg) break; if (dwc->current_otg_role == DWC3_OTG_ROLE_DEVICE) { - spin_lock_irqsave(&dwc->lock, flags); dwc3_gadget_suspend(dwc); - spin_unlock_irqrestore(&dwc->lock, flags); synchronize_irq(dwc->irq_gadget); } @@ -2312,7 +2327,6 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg) static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg) { - unsigned long flags; int ret; u32 reg; int i; @@ -2366,9 +2380,7 @@ static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg) if (dwc->current_otg_role == DWC3_OTG_ROLE_HOST) { dwc3_otg_host_init(dwc); } else if (dwc->current_otg_role == DWC3_OTG_ROLE_DEVICE) { - spin_lock_irqsave(&dwc->lock, flags); dwc3_gadget_resume(dwc); - spin_unlock_irqrestore(&dwc->lock, flags); } break; diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 9ef821ca2fc7..052852f80146 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -54,6 +54,10 @@ #define PCI_DEVICE_ID_INTEL_MTL 0x7e7e #define PCI_DEVICE_ID_INTEL_ARLH_PCH 0x777e #define PCI_DEVICE_ID_INTEL_TGL 0x9a15 +#define PCI_DEVICE_ID_INTEL_PTLH 0xe332 +#define PCI_DEVICE_ID_INTEL_PTLH_PCH 0xe37e +#define PCI_DEVICE_ID_INTEL_PTLU 0xe432 +#define PCI_DEVICE_ID_INTEL_PTLU_PCH 0xe47e #define PCI_DEVICE_ID_AMD_MR 0x163a #define PCI_INTEL_BXT_DSM_GUID "732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511" @@ -430,6 +434,10 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_DEVICE_DATA(INTEL, MTLS, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, ARLH_PCH, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, TGL, &dwc3_pci_intel_swnode) }, + { PCI_DEVICE_DATA(INTEL, PTLH, &dwc3_pci_intel_swnode) }, + { PCI_DEVICE_DATA(INTEL, PTLH_PCH, &dwc3_pci_intel_swnode) }, + { PCI_DEVICE_DATA(INTEL, PTLU, &dwc3_pci_intel_swnode) }, + { PCI_DEVICE_DATA(INTEL, PTLU_PCH, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(AMD, NL_USB, &dwc3_pci_amd_swnode) }, { PCI_DEVICE_DATA(AMD, MR, &dwc3_pci_amd_mr_swnode) }, diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index ce3cfa1f36f5..0e7c1e947c0a 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -115,9 +115,12 @@ static int usb_string_copy(const char *s, char **s_copy) int ret; char *str; char *copy = *s_copy; + ret = strlen(s); if (ret > USB_MAX_STRING_LEN) return -EOVERFLOW; + if (ret < 1) + return -EINVAL; if (copy) { str = copy; diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c index ba7d180cc9e6..44e20c6c36d3 100644 --- a/drivers/usb/gadget/function/f_printer.c +++ b/drivers/usb/gadget/function/f_printer.c @@ -213,6 +213,7 @@ static inline struct usb_endpoint_descriptor *ep_desc(struct usb_gadget *gadget, struct usb_endpoint_descriptor *ss) { switch (gadget->speed) { + case USB_SPEED_SUPER_PLUS: case USB_SPEED_SUPER: return ss; case USB_SPEED_HIGH: @@ -449,11 +450,8 @@ printer_read(struct file *fd, char __user *buf, size_t len, loff_t *ptr) mutex_lock(&dev->lock_printer_io); spin_lock_irqsave(&dev->lock, flags); - if (dev->interface < 0) { - spin_unlock_irqrestore(&dev->lock, flags); - mutex_unlock(&dev->lock_printer_io); - return -ENODEV; - } + if (dev->interface < 0) + goto out_disabled; /* We will use this flag later to check if a printer reset happened * after we turn interrupts back on. @@ -461,6 +459,9 @@ printer_read(struct file *fd, char __user *buf, size_t len, loff_t *ptr) dev->reset_printer = 0; setup_rx_reqs(dev); + /* this dropped the lock - need to retest */ + if (dev->interface < 0) + goto out_disabled; bytes_copied = 0; current_rx_req = dev->current_rx_req; @@ -494,6 +495,8 @@ printer_read(struct file *fd, char __user *buf, size_t len, loff_t *ptr) wait_event_interruptible(dev->rx_wait, (likely(!list_empty(&dev->rx_buffers)))); spin_lock_irqsave(&dev->lock, flags); + if (dev->interface < 0) + goto out_disabled; } /* We have data to return then copy it to the caller's buffer.*/ @@ -537,6 +540,9 @@ printer_read(struct file *fd, char __user *buf, size_t len, loff_t *ptr) return -EAGAIN; } + if (dev->interface < 0) + goto out_disabled; + /* If we not returning all the data left in this RX request * buffer then adjust the amount of data left in the buffer. * Othewise if we are done with this RX request buffer then @@ -566,6 +572,11 @@ printer_read(struct file *fd, char __user *buf, size_t len, loff_t *ptr) return bytes_copied; else return -EAGAIN; + +out_disabled: + spin_unlock_irqrestore(&dev->lock, flags); + mutex_unlock(&dev->lock_printer_io); + return -ENODEV; } static ssize_t @@ -586,11 +597,8 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr) mutex_lock(&dev->lock_printer_io); spin_lock_irqsave(&dev->lock, flags); - if (dev->interface < 0) { - spin_unlock_irqrestore(&dev->lock, flags); - mutex_unlock(&dev->lock_printer_io); - return -ENODEV; - } + if (dev->interface < 0) + goto out_disabled; /* Check if a printer reset happens while we have interrupts on */ dev->reset_printer = 0; @@ -613,6 +621,8 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr) wait_event_interruptible(dev->tx_wait, (likely(!list_empty(&dev->tx_reqs)))); spin_lock_irqsave(&dev->lock, flags); + if (dev->interface < 0) + goto out_disabled; } while (likely(!list_empty(&dev->tx_reqs)) && len) { @@ -662,6 +672,9 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr) return -EAGAIN; } + if (dev->interface < 0) + goto out_disabled; + list_add(&req->list, &dev->tx_reqs_active); /* here, we unlock, and only unlock, to avoid deadlock. */ @@ -674,6 +687,8 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr) mutex_unlock(&dev->lock_printer_io); return -EAGAIN; } + if (dev->interface < 0) + goto out_disabled; } spin_unlock_irqrestore(&dev->lock, flags); @@ -685,6 +700,11 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr) return bytes_copied; else return -EAGAIN; + +out_disabled: + spin_unlock_irqrestore(&dev->lock, flags); + mutex_unlock(&dev->lock_printer_io); + return -ENODEV; } static int diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index 11dd0b9e847f..95191083b455 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -1163,8 +1163,6 @@ struct net_device *gether_connect(struct gether *link) if (netif_running(dev->net)) eth_start(dev, GFP_ATOMIC); - netif_device_attach(dev->net); - /* on error, disable any endpoints */ } else { (void) usb_ep_disable(link->out_ep); @@ -1202,7 +1200,7 @@ void gether_disconnect(struct gether *link) DBG(dev, "%s\n", __func__); - netif_device_detach(dev->net); + netif_stop_queue(dev->net); netif_carrier_off(dev->net); /* disable endpoints, forcing (synchronous) completion diff --git a/drivers/usb/gadget/udc/aspeed_udc.c b/drivers/usb/gadget/udc/aspeed_udc.c index 3916c8e2ba01..821a6ab5da56 100644 --- a/drivers/usb/gadget/udc/aspeed_udc.c +++ b/drivers/usb/gadget/udc/aspeed_udc.c @@ -66,8 +66,8 @@ #define USB_UPSTREAM_EN BIT(0) /* Main config reg */ -#define UDC_CFG_SET_ADDR(x) ((x) & 0x3f) -#define UDC_CFG_ADDR_MASK (0x3f) +#define UDC_CFG_SET_ADDR(x) ((x) & UDC_CFG_ADDR_MASK) +#define UDC_CFG_ADDR_MASK GENMASK(6, 0) /* Interrupt ctrl & status reg */ #define UDC_IRQ_EP_POOL_NAK BIT(17) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 37eb37b0affa..0a8cf6c17f82 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1125,10 +1125,20 @@ int xhci_resume(struct xhci_hcd *xhci, pm_message_t msg) xhci_dbg(xhci, "Start the secondary HCD\n"); retval = xhci_run(xhci->shared_hcd); } - + if (retval) + return retval; + /* + * Resume roothubs unconditionally as PORTSC change bits are not + * immediately visible after xHC reset + */ hcd->state = HC_STATE_SUSPENDED; - if (xhci->shared_hcd) + + if (xhci->shared_hcd) { xhci->shared_hcd->state = HC_STATE_SUSPENDED; + usb_hcd_resume_root_hub(xhci->shared_hcd); + } + usb_hcd_resume_root_hub(hcd); + goto done; } @@ -1152,7 +1162,6 @@ int xhci_resume(struct xhci_hcd *xhci, pm_message_t msg) xhci_dbc_resume(xhci); - done: if (retval == 0) { /* * Resume roothubs only if there are pending events. @@ -1178,6 +1187,7 @@ int xhci_resume(struct xhci_hcd *xhci, pm_message_t msg) usb_hcd_resume_root_hub(hcd); } } +done: /* * If system is subject to the Quirk, Compliance Mode Timer needs to * be re-initialized Always after a system resume. Ports are subject diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c index 8abf3a567e30..108d9a593a80 100644 --- a/drivers/usb/musb/da8xx.c +++ b/drivers/usb/musb/da8xx.c @@ -556,7 +556,7 @@ static int da8xx_probe(struct platform_device *pdev) ret = of_platform_populate(pdev->dev.of_node, NULL, da8xx_auxdata_lookup, &pdev->dev); if (ret) - return ret; + goto err_unregister_phy; pinfo = da8xx_dev_info; pinfo.parent = &pdev->dev; @@ -571,9 +571,13 @@ static int da8xx_probe(struct platform_device *pdev) ret = PTR_ERR_OR_ZERO(glue->musb); if (ret) { dev_err(&pdev->dev, "failed to register musb device: %d\n", ret); - usb_phy_generic_unregister(glue->usb_phy); + goto err_unregister_phy; } + return 0; + +err_unregister_phy: + usb_phy_generic_unregister(glue->usb_phy); return ret; } diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 8b0308d84270..85697466b147 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -1737,6 +1737,49 @@ static void mos7840_port_remove(struct usb_serial_port *port) kfree(mos7840_port); } +static int mos7840_suspend(struct usb_serial *serial, pm_message_t message) +{ + struct moschip_port *mos7840_port; + struct usb_serial_port *port; + int i; + + for (i = 0; i < serial->num_ports; ++i) { + port = serial->port[i]; + if (!tty_port_initialized(&port->port)) + continue; + + mos7840_port = usb_get_serial_port_data(port); + + usb_kill_urb(mos7840_port->read_urb); + mos7840_port->read_urb_busy = false; + } + + return 0; +} + +static int mos7840_resume(struct usb_serial *serial) +{ + struct moschip_port *mos7840_port; + struct usb_serial_port *port; + int res; + int i; + + for (i = 0; i < serial->num_ports; ++i) { + port = serial->port[i]; + if (!tty_port_initialized(&port->port)) + continue; + + mos7840_port = usb_get_serial_port_data(port); + + mos7840_port->read_urb_busy = true; + res = usb_submit_urb(mos7840_port->read_urb, GFP_NOIO); + if (res) + mos7840_port->read_urb_busy = false; + } + + return 0; +} + static struct usb_serial_driver moschip7840_4port_device = { .driver = { .owner = THIS_MODULE, @@ -1764,6 +1807,8 @@ static struct usb_serial_driver moschip7840_4port_device = { .port_probe = mos7840_port_probe, .port_remove = mos7840_port_remove, .read_bulk_callback = mos7840_bulk_in_callback, + .suspend = mos7840_suspend, + .resume = mos7840_resume, }; static struct usb_serial_driver * const serial_drivers[] = { diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 8a5846d4adf6..311040f9b935 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1425,6 +1425,10 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1901, 0xff), /* Telit LN940 (MBIM) */ .driver_info = NCTRL(0) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x3000, 0xff), /* Telit FN912 */ + .driver_info = RSVD(0) | NCTRL(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x3001, 0xff), /* Telit FN912 */ + .driver_info = RSVD(0) | NCTRL(2) | RSVD(3) | RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x7010, 0xff), /* Telit LE910-S1 (RNDIS) */ .driver_info = NCTRL(2) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x7011, 0xff), /* Telit LE910-S1 (ECM) */ @@ -1433,6 +1437,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(2) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x701b, 0xff), /* Telit LE910R1 (ECM) */ .driver_info = NCTRL(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x9000, 0xff), /* Telit generic core-dump device */ + .driver_info = NCTRL(0) }, { USB_DEVICE(TELIT_VENDOR_ID, 0x9010), /* Telit SBL FN980 flashing device */ .driver_info = NCTRL(0) | ZLP }, { USB_DEVICE(TELIT_VENDOR_ID, 0x9200), /* Telit LE910S1 flashing device */ @@ -2224,6 +2230,10 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_7106_2COM, 0x02, 0x02, 0x01) }, { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM2, 0xff, 0x02, 0x01) }, { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM2, 0xff, 0x00, 0x00) }, + { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x7126, 0xff, 0x00, 0x00), + .driver_info = NCTRL(2) }, + { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x7127, 0xff, 0x00, 0x00), + .driver_info = NCTRL(2) | NCTRL(3) | NCTRL(4) }, { USB_DEVICE(CELLIENT_VENDOR_ID, CELLIENT_PRODUCT_MEN200) }, { USB_DEVICE(CELLIENT_VENDOR_ID, CELLIENT_PRODUCT_MPL200), .driver_info = RSVD(1) | RSVD(4) }, @@ -2284,6 +2294,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(0x0489, 0xe0f0, 0xff), /* Foxconn T99W373 MBIM */ .driver_info = RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(0x0489, 0xe145, 0xff), /* Foxconn T99W651 RNDIS */ + .driver_info = RSVD(5) | RSVD(6) }, { USB_DEVICE(0x1508, 0x1001), /* Fibocom NL668 (IOT version) */ .driver_info = RSVD(4) | RSVD(5) | RSVD(6) }, { USB_DEVICE(0x1782, 0x4d10) }, /* Fibocom L610 (AT mode) */ @@ -2321,6 +2333,32 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x0115, 0xff), /* Rolling RW135-GL (laptop MBIM) */ .driver_info = RSVD(5) }, + { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x0802, 0xff), /* Rolling RW350-GL (laptop MBIM) */ + .driver_info = RSVD(5) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0100, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WWD for Global */ + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0100, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0100, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0101, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WRD for Global SKU */ + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0101, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0101, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0106, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WRD for China SKU */ + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0106, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0106, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0111, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WWD for SA */ + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0111, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0111, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0112, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WWD for EU */ + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0112, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0112, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0113, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WWD for NA */ + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0113, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0113, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0115, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WWD for China EDU */ + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0115, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0115, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0116, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WWD for Golbal EDU */ + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0116, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0116, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(OPPO_VENDOR_ID, OPPO_PRODUCT_R11, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x40) }, diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index b31464740f6c..8c8b5e6041cc 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -79,6 +79,12 @@ static int slave_alloc (struct scsi_device *sdev) if (us->protocol == USB_PR_BULK && us->max_lun > 0) sdev->sdev_bflags |= BLIST_FORCELUN; + /* + * Some USB storage devices reset if the IO advice hints grouping mode + * page is queried. Hence skip that mode page. + */ + sdev->sdev_bflags |= BLIST_SKIP_IO_HINTS; + return 0; } diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index a48870a87a29..b610a2de4ae5 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -21,6 +21,7 @@ #include <scsi/scsi.h> #include <scsi/scsi_eh.h> #include <scsi/scsi_dbg.h> +#include <scsi/scsi_devinfo.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_device.h> #include <scsi/scsi_host.h> @@ -820,6 +821,12 @@ static int uas_slave_alloc(struct scsi_device *sdev) struct uas_dev_info *devinfo = (struct uas_dev_info *)sdev->host->hostdata; + /* + * Some USB storage devices reset if the IO advice hints grouping mode + * page is queried. Hence skip that mode page. + */ + sdev->sdev_bflags |= BLIST_SKIP_IO_HINTS; + sdev->hostdata = devinfo; return 0; } diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index 8d112c3edae5..adf32ca0f761 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -25,6 +25,7 @@ struct ucsi_acpi { unsigned long flags; #define UCSI_ACPI_COMMAND_PENDING 1 #define UCSI_ACPI_ACK_PENDING 2 +#define UCSI_ACPI_CHECK_BOGUS_EVENT 3 guid_t guid; u64 cmd; }; @@ -128,6 +129,58 @@ static const struct ucsi_operations ucsi_zenbook_ops = { .async_write = ucsi_acpi_async_write }; +static int ucsi_gram_read(struct ucsi *ucsi, unsigned int offset, + void *val, size_t val_len) +{ + u16 bogus_change = UCSI_CONSTAT_POWER_LEVEL_CHANGE | + UCSI_CONSTAT_PDOS_CHANGE; + struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); + struct ucsi_connector_status *status; + int ret; + + ret = ucsi_acpi_read(ucsi, offset, val, val_len); + if (ret < 0) + return ret; + + if (UCSI_COMMAND(ua->cmd) == UCSI_GET_CONNECTOR_STATUS && + test_bit(UCSI_ACPI_CHECK_BOGUS_EVENT, &ua->flags) && + offset == UCSI_MESSAGE_IN) { + status = (struct ucsi_connector_status *)val; + + /* Clear the bogus change */ + if (status->change == bogus_change) + status->change = 0; + + clear_bit(UCSI_ACPI_CHECK_BOGUS_EVENT, &ua->flags); + } + + return ret; +} + +static int ucsi_gram_sync_write(struct ucsi *ucsi, unsigned int offset, + const void *val, size_t val_len) +{ + struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); + int ret; + + ret = ucsi_acpi_sync_write(ucsi, offset, val, val_len); + if (ret < 0) + return ret; + + if (UCSI_COMMAND(ua->cmd) == UCSI_GET_PDOS && + ua->cmd & UCSI_GET_PDOS_PARTNER_PDO(1) && + ua->cmd & UCSI_GET_PDOS_SRC_PDOS) + set_bit(UCSI_ACPI_CHECK_BOGUS_EVENT, &ua->flags); + + return ret; +} + +static const struct ucsi_operations ucsi_gram_ops = { + .read = ucsi_gram_read, + .sync_write = ucsi_gram_sync_write, + .async_write = ucsi_acpi_async_write +}; + static const struct dmi_system_id ucsi_acpi_quirks[] = { { .matches = { @@ -136,6 +189,14 @@ static const struct dmi_system_id ucsi_acpi_quirks[] = { }, .driver_data = (void *)&ucsi_zenbook_ops, }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LG Electronics"), + DMI_MATCH(DMI_PRODUCT_FAMILY, "LG gram PC"), + DMI_MATCH(DMI_PRODUCT_NAME, "90Q"), + }, + .driver_data = (void *)&ucsi_gram_ops, + }, { } }; diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c index 985a880e86da..2fa973afe4e6 100644 --- a/drivers/usb/typec/ucsi/ucsi_glink.c +++ b/drivers/usb/typec/ucsi/ucsi_glink.c @@ -372,6 +372,7 @@ static int pmic_glink_ucsi_probe(struct auxiliary_device *adev, ret = fwnode_property_read_u32(fwnode, "reg", &port); if (ret < 0) { dev_err(dev, "missing reg property of %pOFn\n", fwnode); + fwnode_handle_put(fwnode); return ret; } @@ -386,9 +387,11 @@ static int pmic_glink_ucsi_probe(struct auxiliary_device *adev, if (!desc) continue; - if (IS_ERR(desc)) + if (IS_ERR(desc)) { + fwnode_handle_put(fwnode); return dev_err_probe(dev, PTR_ERR(desc), "unable to acquire orientation gpio\n"); + } ucsi->port_orientation[port] = desc; } diff --git a/drivers/usb/typec/ucsi/ucsi_stm32g0.c b/drivers/usb/typec/ucsi/ucsi_stm32g0.c index ac48b7763114..ac69288e8bb0 100644 --- a/drivers/usb/typec/ucsi/ucsi_stm32g0.c +++ b/drivers/usb/typec/ucsi/ucsi_stm32g0.c @@ -65,6 +65,7 @@ struct ucsi_stm32g0 { struct device *dev; unsigned long flags; #define COMMAND_PENDING 1 +#define ACK_PENDING 2 const char *fw_name; struct ucsi *ucsi; bool suspended; @@ -396,9 +397,13 @@ static int ucsi_stm32g0_sync_write(struct ucsi *ucsi, unsigned int offset, const size_t len) { struct ucsi_stm32g0 *g0 = ucsi_get_drvdata(ucsi); + bool ack = UCSI_COMMAND(*(u64 *)val) == UCSI_ACK_CC_CI; int ret; - set_bit(COMMAND_PENDING, &g0->flags); + if (ack) + set_bit(ACK_PENDING, &g0->flags); + else + set_bit(COMMAND_PENDING, &g0->flags); ret = ucsi_stm32g0_async_write(ucsi, offset, val, len); if (ret) @@ -406,9 +411,14 @@ static int ucsi_stm32g0_sync_write(struct ucsi *ucsi, unsigned int offset, const if (!wait_for_completion_timeout(&g0->complete, msecs_to_jiffies(5000))) ret = -ETIMEDOUT; + else + return 0; out_clear_bit: - clear_bit(COMMAND_PENDING, &g0->flags); + if (ack) + clear_bit(ACK_PENDING, &g0->flags); + else + clear_bit(COMMAND_PENDING, &g0->flags); return ret; } @@ -429,8 +439,9 @@ static irqreturn_t ucsi_stm32g0_irq_handler(int irq, void *data) if (UCSI_CCI_CONNECTOR(cci)) ucsi_connector_change(g0->ucsi, UCSI_CCI_CONNECTOR(cci)); - if (test_bit(COMMAND_PENDING, &g0->flags) && - cci & (UCSI_CCI_ACK_COMPLETE | UCSI_CCI_COMMAND_COMPLETE)) + if (cci & UCSI_CCI_ACK_COMPLETE && test_and_clear_bit(ACK_PENDING, &g0->flags)) + complete(&g0->complete); + if (cci & UCSI_CCI_COMMAND_COMPLETE && test_and_clear_bit(COMMAND_PENDING, &g0->flags)) complete(&g0->complete); return IRQ_HANDLED; diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 987c7921affa..ba0ce0075b2f 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1260,7 +1260,7 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info( struct vfio_pci_hot_reset_info hdr; struct vfio_pci_fill_info fill = {}; bool slot = false; - int ret, count; + int ret, count = 0; if (copy_from_user(&hdr, arg, minsz)) return -EFAULT; diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 85eea38dbdf4..2882944d23cc 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -257,6 +257,7 @@ config GPIO_WATCHDOG_ARCH_INITCALL config LENOVO_SE10_WDT tristate "Lenovo SE10 Watchdog" depends on (X86 && DMI) || COMPILE_TEST + depends on HAS_IOPORT select WATCHDOG_CORE help If you say yes here you get support for the watchdog diff --git a/drivers/watchdog/menz69_wdt.c b/drivers/watchdog/menz69_wdt.c index c7de30270043..0508a65acfa6 100644 --- a/drivers/watchdog/menz69_wdt.c +++ b/drivers/watchdog/menz69_wdt.c @@ -161,6 +161,7 @@ static struct mcb_driver men_z069_driver = { module_mcb_driver(men_z069_driver); MODULE_AUTHOR("Johannes Thumshirn <jth@kernel.org>"); +MODULE_DESCRIPTION("Watchdog driver for the MEN z069 IP-Core"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("mcb:16z069"); MODULE_IMPORT_NS(MCB); diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c index a7a12f2fe9de..b6e0236509bb 100644 --- a/drivers/watchdog/omap_wdt.c +++ b/drivers/watchdog/omap_wdt.c @@ -370,5 +370,6 @@ static struct platform_driver omap_wdt_driver = { module_platform_driver(omap_wdt_driver); MODULE_AUTHOR("George G. Davis"); +MODULE_DESCRIPTION("Driver for the TI OMAP 16xx/24xx/34xx 32KHz (non-secure) watchdog"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:omap_wdt"); diff --git a/drivers/watchdog/simatic-ipc-wdt.c b/drivers/watchdog/simatic-ipc-wdt.c index cdc1a2e15180..1e91f0a560ff 100644 --- a/drivers/watchdog/simatic-ipc-wdt.c +++ b/drivers/watchdog/simatic-ipc-wdt.c @@ -227,6 +227,7 @@ static struct platform_driver simatic_ipc_wdt_driver = { module_platform_driver(simatic_ipc_wdt_driver); +MODULE_DESCRIPTION("Siemens SIMATIC IPC driver for Watchdogs"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("platform:" KBUILD_MODNAME); MODULE_AUTHOR("Gerd Haeussler <gerd.haeussler.ext@siemens.com>"); diff --git a/drivers/watchdog/ts4800_wdt.c b/drivers/watchdog/ts4800_wdt.c index 0099403f4992..24b1ad52102e 100644 --- a/drivers/watchdog/ts4800_wdt.c +++ b/drivers/watchdog/ts4800_wdt.c @@ -200,5 +200,6 @@ static struct platform_driver ts4800_wdt_driver = { module_platform_driver(ts4800_wdt_driver); MODULE_AUTHOR("Damien Riegel <damien.riegel@savoirfairelinux.com>"); +MODULE_DESCRIPTION("Watchdog driver for TS-4800 based boards"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("platform:ts4800_wdt"); diff --git a/drivers/watchdog/twl4030_wdt.c b/drivers/watchdog/twl4030_wdt.c index 09d17e20f4a7..8c80d04811e4 100644 --- a/drivers/watchdog/twl4030_wdt.c +++ b/drivers/watchdog/twl4030_wdt.c @@ -118,6 +118,7 @@ static struct platform_driver twl4030_wdt_driver = { module_platform_driver(twl4030_wdt_driver); MODULE_AUTHOR("Nokia Corporation"); +MODULE_DESCRIPTION("TWL4030 Watchdog"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:twl4030_wdt"); diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 15bb7989c387..3acf5e050072 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -512,7 +512,7 @@ static int afs_iget5_set_root(struct inode *inode, void *opaque) struct afs_vnode *vnode = AFS_FS_I(inode); vnode->volume = as->volume; - vnode->fid.vid = as->volume->vid, + vnode->fid.vid = as->volume->vid; vnode->fid.vnode = 1; vnode->fid.unique = 1; inode->i_ino = 1; @@ -545,7 +545,7 @@ struct inode *afs_root_iget(struct super_block *sb, struct key *key) BUG_ON(!(inode->i_state & I_NEW)); vnode = AFS_FS_I(inode); - vnode->cb_v_check = atomic_read(&as->volume->cb_v_break), + vnode->cb_v_check = atomic_read(&as->volume->cb_v_break); afs_set_netfs_context(vnode); op = afs_alloc_operation(key, as->volume); diff --git a/fs/attr.c b/fs/attr.c index 960a310581eb..825007d5cda4 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -17,8 +17,6 @@ #include <linux/filelock.h> #include <linux/security.h> -#include "internal.h" - /** * setattr_should_drop_sgid - determine whether the setgid bit needs to be * removed diff --git a/fs/autofs/init.c b/fs/autofs/init.c index b5e4dfa04ed0..1d644a35ffa0 100644 --- a/fs/autofs/init.c +++ b/fs/autofs/init.c @@ -38,4 +38,5 @@ static void __exit exit_autofs_fs(void) module_init(init_autofs_fs) module_exit(exit_autofs_fs) +MODULE_DESCRIPTION("Kernel automounter support"); MODULE_LICENSE("GPL"); diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index 1f5db6863663..cf792d4de4f1 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -126,7 +126,7 @@ enum { const struct fs_parameter_spec autofs_param_specs[] = { fsparam_flag ("direct", Opt_direct), fsparam_fd ("fd", Opt_fd), - fsparam_u32 ("gid", Opt_gid), + fsparam_gid ("gid", Opt_gid), fsparam_flag ("ignore", Opt_ignore), fsparam_flag ("indirect", Opt_indirect), fsparam_u32 ("maxproto", Opt_maxproto), @@ -134,7 +134,7 @@ const struct fs_parameter_spec autofs_param_specs[] = { fsparam_flag ("offset", Opt_offset), fsparam_u32 ("pgrp", Opt_pgrp), fsparam_flag ("strictexpire", Opt_strictexpire), - fsparam_u32 ("uid", Opt_uid), + fsparam_uid ("uid", Opt_uid), {} }; @@ -193,8 +193,6 @@ static int autofs_parse_param(struct fs_context *fc, struct fs_parameter *param) struct autofs_fs_context *ctx = fc->fs_private; struct autofs_sb_info *sbi = fc->s_fs_info; struct fs_parse_result result; - kuid_t uid; - kgid_t gid; int opt; opt = fs_parse(fc, autofs_param_specs, param, &result); @@ -205,16 +203,10 @@ static int autofs_parse_param(struct fs_context *fc, struct fs_parameter *param) case Opt_fd: return autofs_parse_fd(fc, sbi, param, &result); case Opt_uid: - uid = make_kuid(current_user_ns(), result.uint_32); - if (!uid_valid(uid)) - return invalfc(fc, "Invalid uid"); - ctx->uid = uid; + ctx->uid = result.uid; break; case Opt_gid: - gid = make_kgid(current_user_ns(), result.uint_32); - if (!gid_valid(gid)) - return invalfc(fc, "Invalid gid"); - ctx->gid = gid; + ctx->gid = result.gid; break; case Opt_pgrp: ctx->pgrp = result.uint_32; diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index c4b6601f5b74..658f11aebda1 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -3,6 +3,7 @@ #include "alloc_background.h" #include "alloc_foreground.h" #include "backpointers.h" +#include "bkey_buf.h" #include "btree_cache.h" #include "btree_io.h" #include "btree_key_cache.h" @@ -29,7 +30,7 @@ #include <linux/sched/task.h> #include <linux/sort.h> -static void bch2_discard_one_bucket_fast(struct bch_fs *c, struct bpos bucket); +static void bch2_discard_one_bucket_fast(struct bch_dev *, u64); /* Persistent alloc info: */ @@ -259,6 +260,14 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, "invalid data type (got %u should be %u)", a.v->data_type, alloc_data_type(*a.v, a.v->data_type)); + for (unsigned i = 0; i < 2; i++) + bkey_fsck_err_on(a.v->io_time[i] > LRU_TIME_MAX, + c, err, + alloc_key_io_time_bad, + "invalid io_time[%s]: %llu, max %llu", + i == READ ? "read" : "write", + a.v->io_time[i], LRU_TIME_MAX); + switch (a.v->data_type) { case BCH_DATA_free: case BCH_DATA_need_gc_gens: @@ -757,8 +766,8 @@ int bch2_trigger_alloc(struct btree_trans *trans, alloc_data_type_set(new_a, new_a->data_type); if (bch2_bucket_sectors_total(*new_a) > bch2_bucket_sectors_total(*old_a)) { - new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); - new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now)); + new_a->io_time[READ] = bch2_current_io_time(c, READ); + new_a->io_time[WRITE]= bch2_current_io_time(c, WRITE); SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true); SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true); } @@ -768,6 +777,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, !bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset)) { new_a->gen++; SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false); + alloc_data_type_set(new_a, new_a->data_type); } if (old_a->data_type != new_a->data_type || @@ -781,7 +791,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, if (new_a->data_type == BCH_DATA_cached && !new_a->io_time[READ]) - new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); + new_a->io_time[READ] = bch2_current_io_time(c, READ); u64 old_lru = alloc_lru_idx_read(*old_a); u64 new_lru = alloc_lru_idx_read(*new_a); @@ -882,14 +892,14 @@ int bch2_trigger_alloc(struct btree_trans *trans, closure_wake_up(&c->freelist_wait); if (statechange(a->data_type == BCH_DATA_need_discard) && - !bch2_bucket_is_open(c, new.k->p.inode, new.k->p.offset) && + !bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset) && bucket_flushed(new_a)) - bch2_discard_one_bucket_fast(c, new.k->p); + bch2_discard_one_bucket_fast(ca, new.k->p.offset); if (statechange(a->data_type == BCH_DATA_cached) && !bch2_bucket_is_open(c, new.k->p.inode, new.k->p.offset) && should_invalidate_buckets(ca, bch2_dev_usage_read(ca))) - bch2_do_invalidates(c); + bch2_dev_do_invalidates(ca); if (statechange(a->data_type == BCH_DATA_need_gc_gens)) bch2_gc_gens_async(c); @@ -1544,13 +1554,13 @@ err: } static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, - struct btree_iter *alloc_iter) + struct btree_iter *alloc_iter, + struct bkey_buf *last_flushed) { struct bch_fs *c = trans->c; - struct btree_iter lru_iter; struct bch_alloc_v4 a_convert; const struct bch_alloc_v4 *a; - struct bkey_s_c alloc_k, lru_k; + struct bkey_s_c alloc_k; struct printbuf buf = PRINTBUF; int ret; @@ -1564,6 +1574,14 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, a = bch2_alloc_to_v4(alloc_k, &a_convert); + if (a->fragmentation_lru) { + ret = bch2_lru_check_set(trans, BCH_LRU_FRAGMENTATION_START, + a->fragmentation_lru, + alloc_k, last_flushed); + if (ret) + return ret; + } + if (a->data_type != BCH_DATA_cached) return 0; @@ -1579,7 +1597,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, if (ret) goto err; - a_mut->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now); + a_mut->v.io_time[READ] = bch2_current_io_time(c, READ); ret = bch2_trans_update(trans, alloc_iter, &a_mut->k_i, BTREE_TRIGGER_norun); if (ret) @@ -1588,73 +1606,66 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, a = &a_mut->v; } - lru_k = bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru, - lru_pos(alloc_k.k->p.inode, - bucket_to_u64(alloc_k.k->p), - a->io_time[READ]), 0); - ret = bkey_err(lru_k); + ret = bch2_lru_check_set(trans, alloc_k.k->p.inode, a->io_time[READ], + alloc_k, last_flushed); if (ret) - return ret; - - if (fsck_err_on(lru_k.k->type != KEY_TYPE_set, c, - alloc_key_to_missing_lru_entry, - "missing lru entry\n" - " %s", - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { - ret = bch2_lru_set(trans, - alloc_k.k->p.inode, - bucket_to_u64(alloc_k.k->p), - a->io_time[READ]); - if (ret) - goto err; - } + goto err; err: fsck_err: - bch2_trans_iter_exit(trans, &lru_iter); printbuf_exit(&buf); return ret; } int bch2_check_alloc_to_lru_refs(struct bch_fs *c) { + struct bkey_buf last_flushed; + + bch2_bkey_buf_init(&last_flushed); + bkey_init(&last_flushed.k->k); + int ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_check_alloc_to_lru_ref(trans, &iter))); + bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed))); + + bch2_bkey_buf_exit(&last_flushed, c); bch_err_fn(c, ret); return ret; } -static int discard_in_flight_add(struct bch_fs *c, struct bpos bucket) +static int discard_in_flight_add(struct bch_dev *ca, u64 bucket, bool in_progress) { int ret; - mutex_lock(&c->discard_buckets_in_flight_lock); - darray_for_each(c->discard_buckets_in_flight, i) - if (bkey_eq(*i, bucket)) { - ret = -EEXIST; + mutex_lock(&ca->discard_buckets_in_flight_lock); + darray_for_each(ca->discard_buckets_in_flight, i) + if (i->bucket == bucket) { + ret = -BCH_ERR_EEXIST_discard_in_flight_add; goto out; } - ret = darray_push(&c->discard_buckets_in_flight, bucket); + ret = darray_push(&ca->discard_buckets_in_flight, ((struct discard_in_flight) { + .in_progress = in_progress, + .bucket = bucket, + })); out: - mutex_unlock(&c->discard_buckets_in_flight_lock); + mutex_unlock(&ca->discard_buckets_in_flight_lock); return ret; } -static void discard_in_flight_remove(struct bch_fs *c, struct bpos bucket) +static void discard_in_flight_remove(struct bch_dev *ca, u64 bucket) { - mutex_lock(&c->discard_buckets_in_flight_lock); - darray_for_each(c->discard_buckets_in_flight, i) - if (bkey_eq(*i, bucket)) { - darray_remove_item(&c->discard_buckets_in_flight, i); + mutex_lock(&ca->discard_buckets_in_flight_lock); + darray_for_each(ca->discard_buckets_in_flight, i) + if (i->bucket == bucket) { + BUG_ON(!i->in_progress); + darray_remove_item(&ca->discard_buckets_in_flight, i); goto found; } BUG(); found: - mutex_unlock(&c->discard_buckets_in_flight_lock); + mutex_unlock(&ca->discard_buckets_in_flight_lock); } struct discard_buckets_state { @@ -1662,26 +1673,11 @@ struct discard_buckets_state { u64 open; u64 need_journal_commit; u64 discarded; - struct bch_dev *ca; u64 need_journal_commit_this_dev; }; -static void discard_buckets_next_dev(struct bch_fs *c, struct discard_buckets_state *s, struct bch_dev *ca) -{ - if (s->ca == ca) - return; - - if (s->ca && s->need_journal_commit_this_dev > - bch2_dev_usage_read(s->ca).d[BCH_DATA_free].buckets) - bch2_journal_flush_async(&c->journal, NULL); - - if (s->ca) - percpu_ref_put(&s->ca->io_ref); - s->ca = ca; - s->need_journal_commit_this_dev = 0; -} - static int bch2_discard_one_bucket(struct btree_trans *trans, + struct bch_dev *ca, struct btree_iter *need_discard_iter, struct bpos *discard_pos_done, struct discard_buckets_state *s) @@ -1695,16 +1691,6 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, bool discard_locked = false; int ret = 0; - struct bch_dev *ca = s->ca && s->ca->dev_idx == pos.inode - ? s->ca - : bch2_dev_get_ioref(c, pos.inode, WRITE); - if (!ca) { - bch2_btree_iter_set_pos(need_discard_iter, POS(pos.inode + 1, 0)); - return 0; - } - - discard_buckets_next_dev(c, s, ca); - if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) { s->open++; goto out; @@ -1764,7 +1750,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, goto out; } - if (discard_in_flight_add(c, SPOS(iter.pos.inode, iter.pos.offset, true))) + if (discard_in_flight_add(ca, iter.pos.offset, true)) goto out; discard_locked = true; @@ -1788,8 +1774,9 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, } SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false); - alloc_data_type_set(&a->v, a->v.data_type); write: + alloc_data_type_set(&a->v, a->v.data_type); + ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?: bch2_trans_commit(trans, NULL, NULL, BCH_WATERMARK_btree| @@ -1801,7 +1788,7 @@ write: s->discarded++; out: if (discard_locked) - discard_in_flight_remove(c, iter.pos); + discard_in_flight_remove(ca, iter.pos.offset); s->seen++; bch2_trans_iter_exit(trans, &iter); printbuf_exit(&buf); @@ -1810,7 +1797,8 @@ out: static void bch2_do_discards_work(struct work_struct *work) { - struct bch_fs *c = container_of(work, struct bch_fs, discard_work); + struct bch_dev *ca = container_of(work, struct bch_dev, discard_work); + struct bch_fs *c = ca->fs; struct discard_buckets_state s = {}; struct bpos discard_pos_done = POS_MAX; int ret; @@ -1821,23 +1809,41 @@ static void bch2_do_discards_work(struct work_struct *work) * successful commit: */ ret = bch2_trans_run(c, - for_each_btree_key(trans, iter, - BTREE_ID_need_discard, POS_MIN, 0, k, - bch2_discard_one_bucket(trans, &iter, &discard_pos_done, &s))); - - discard_buckets_next_dev(c, &s, NULL); + for_each_btree_key_upto(trans, iter, + BTREE_ID_need_discard, + POS(ca->dev_idx, 0), + POS(ca->dev_idx, U64_MAX), 0, k, + bch2_discard_one_bucket(trans, ca, &iter, &discard_pos_done, &s))); trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); bch2_write_ref_put(c, BCH_WRITE_REF_discard); + percpu_ref_put(&ca->io_ref); +} + +void bch2_dev_do_discards(struct bch_dev *ca) +{ + struct bch_fs *c = ca->fs; + + if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) + return; + + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard)) + goto put_ioref; + + if (queue_work(c->write_ref_wq, &ca->discard_work)) + return; + + bch2_write_ref_put(c, BCH_WRITE_REF_discard); +put_ioref: + percpu_ref_put(&ca->io_ref); } void bch2_do_discards(struct bch_fs *c) { - if (bch2_write_ref_tryget(c, BCH_WRITE_REF_discard) && - !queue_work(c->write_ref_wq, &c->discard_work)) - bch2_write_ref_put(c, BCH_WRITE_REF_discard); + for_each_member_device(c, ca) + bch2_dev_do_discards(ca); } static int bch2_clear_bucket_needs_discard(struct btree_trans *trans, struct bpos bucket) @@ -1866,68 +1872,69 @@ err: static void bch2_do_discards_fast_work(struct work_struct *work) { - struct bch_fs *c = container_of(work, struct bch_fs, discard_fast_work); + struct bch_dev *ca = container_of(work, struct bch_dev, discard_fast_work); + struct bch_fs *c = ca->fs; while (1) { bool got_bucket = false; - struct bpos bucket; - struct bch_dev *ca; - - mutex_lock(&c->discard_buckets_in_flight_lock); - darray_for_each(c->discard_buckets_in_flight, i) { - if (i->snapshot) - continue; + u64 bucket; - ca = bch2_dev_get_ioref(c, i->inode, WRITE); - if (!ca) { - darray_remove_item(&c->discard_buckets_in_flight, i); + mutex_lock(&ca->discard_buckets_in_flight_lock); + darray_for_each(ca->discard_buckets_in_flight, i) { + if (i->in_progress) continue; - } got_bucket = true; - bucket = *i; - i->snapshot = true; + bucket = i->bucket; + i->in_progress = true; break; } - mutex_unlock(&c->discard_buckets_in_flight_lock); + mutex_unlock(&ca->discard_buckets_in_flight_lock); if (!got_bucket) break; if (ca->mi.discard && !c->opts.nochanges) blkdev_issue_discard(ca->disk_sb.bdev, - bucket.offset * ca->mi.bucket_size, + bucket_to_sector(ca, bucket), ca->mi.bucket_size, GFP_KERNEL); int ret = bch2_trans_do(c, NULL, NULL, - BCH_WATERMARK_btree| - BCH_TRANS_COMMIT_no_enospc, - bch2_clear_bucket_needs_discard(trans, bucket)); + BCH_WATERMARK_btree| + BCH_TRANS_COMMIT_no_enospc, + bch2_clear_bucket_needs_discard(trans, POS(ca->dev_idx, bucket))); bch_err_fn(c, ret); - percpu_ref_put(&ca->io_ref); - discard_in_flight_remove(c, bucket); + discard_in_flight_remove(ca, bucket); if (ret) break; } bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); + percpu_ref_put(&ca->io_ref); } -static void bch2_discard_one_bucket_fast(struct bch_fs *c, struct bpos bucket) +static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket) { - rcu_read_lock(); - struct bch_dev *ca = bch2_dev_rcu(c, bucket.inode); - bool dead = !ca || percpu_ref_is_dying(&ca->io_ref); - rcu_read_unlock(); + struct bch_fs *c = ca->fs; + + if (discard_in_flight_add(ca, bucket, false)) + return; + + if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) + return; - if (!dead && - !discard_in_flight_add(c, bucket) && - bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast) && - !queue_work(c->write_ref_wq, &c->discard_fast_work)) - bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast)) + goto put_ioref; + + if (queue_work(c->write_ref_wq, &ca->discard_fast_work)) + return; + + bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); +put_ioref: + percpu_ref_put(&ca->io_ref); } static int invalidate_one_bucket(struct btree_trans *trans, @@ -1975,8 +1982,8 @@ static int invalidate_one_bucket(struct btree_trans *trans, a->v.data_type = 0; a->v.dirty_sectors = 0; a->v.cached_sectors = 0; - a->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now); - a->v.io_time[WRITE] = atomic64_read(&c->io_clock[WRITE].now); + a->v.io_time[READ] = bch2_current_io_time(c, READ); + a->v.io_time[WRITE] = bch2_current_io_time(c, WRITE); ret = bch2_trans_commit(trans, NULL, NULL, BCH_WATERMARK_btree| @@ -2011,9 +2018,25 @@ err: goto out; } +static struct bkey_s_c next_lru_key(struct btree_trans *trans, struct btree_iter *iter, + struct bch_dev *ca, bool *wrapped) +{ + struct bkey_s_c k; +again: + k = bch2_btree_iter_peek_upto(iter, lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX)); + if (!k.k && !*wrapped) { + bch2_btree_iter_set_pos(iter, lru_pos(ca->dev_idx, 0, 0)); + *wrapped = true; + goto again; + } + + return k; +} + static void bch2_do_invalidates_work(struct work_struct *work) { - struct bch_fs *c = container_of(work, struct bch_fs, invalidate_work); + struct bch_dev *ca = container_of(work, struct bch_dev, invalidate_work); + struct bch_fs *c = ca->fs; struct btree_trans *trans = bch2_trans_get(c); int ret = 0; @@ -2021,31 +2044,63 @@ static void bch2_do_invalidates_work(struct work_struct *work) if (ret) goto err; - for_each_member_device(c, ca) { - s64 nr_to_invalidate = - should_invalidate_buckets(ca, bch2_dev_usage_read(ca)); + s64 nr_to_invalidate = + should_invalidate_buckets(ca, bch2_dev_usage_read(ca)); + struct btree_iter iter; + bool wrapped = false; - ret = for_each_btree_key_upto(trans, iter, BTREE_ID_lru, - lru_pos(ca->dev_idx, 0, 0), - lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX), - BTREE_ITER_intent, k, - invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate)); + bch2_trans_iter_init(trans, &iter, BTREE_ID_lru, + lru_pos(ca->dev_idx, 0, + ((bch2_current_io_time(c, READ) + U32_MAX) & + LRU_TIME_MAX)), 0); - if (ret < 0) { - bch2_dev_put(ca); + while (true) { + bch2_trans_begin(trans); + + struct bkey_s_c k = next_lru_key(trans, &iter, ca, &wrapped); + ret = bkey_err(k); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + continue; + if (ret) break; - } + if (!k.k) + break; + + ret = invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate); + if (ret) + break; + + bch2_btree_iter_advance(&iter); } + bch2_trans_iter_exit(trans, &iter); err: bch2_trans_put(trans); bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); + percpu_ref_put(&ca->io_ref); +} + +void bch2_dev_do_invalidates(struct bch_dev *ca) +{ + struct bch_fs *c = ca->fs; + + if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) + return; + + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate)) + goto put_ioref; + + if (queue_work(c->write_ref_wq, &ca->invalidate_work)) + return; + + bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); +put_ioref: + percpu_ref_put(&ca->io_ref); } void bch2_do_invalidates(struct bch_fs *c) { - if (bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate) && - !queue_work(c->write_ref_wq, &c->invalidate_work)) - bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); + for_each_member_device(c, ca) + bch2_dev_do_invalidates(ca); } int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, @@ -2204,7 +2259,7 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, if (ret) return ret; - now = atomic64_read(&c->io_clock[rw].now); + now = bch2_current_io_time(c, rw); if (a->v.io_time[rw] == now) goto out; @@ -2361,16 +2416,20 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca) set_bit(ca->dev_idx, c->rw_devs[i].d); } -void bch2_fs_allocator_background_exit(struct bch_fs *c) +void bch2_dev_allocator_background_exit(struct bch_dev *ca) +{ + darray_exit(&ca->discard_buckets_in_flight); +} + +void bch2_dev_allocator_background_init(struct bch_dev *ca) { - darray_exit(&c->discard_buckets_in_flight); + mutex_init(&ca->discard_buckets_in_flight_lock); + INIT_WORK(&ca->discard_work, bch2_do_discards_work); + INIT_WORK(&ca->discard_fast_work, bch2_do_discards_fast_work); + INIT_WORK(&ca->invalidate_work, bch2_do_invalidates_work); } void bch2_fs_allocator_background_init(struct bch_fs *c) { spin_lock_init(&c->freelist_lock); - mutex_init(&c->discard_buckets_in_flight_lock); - INIT_WORK(&c->discard_work, bch2_do_discards_work); - INIT_WORK(&c->discard_fast_work, bch2_do_discards_fast_work); - INIT_WORK(&c->invalidate_work, bch2_do_invalidates_work); } diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index ae31a94be6f9..ba2c5557a3f0 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -141,7 +141,13 @@ static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a, !bch2_bucket_sectors_fragmented(ca, a)) return 0; - u64 d = bch2_bucket_sectors_dirty(a); + /* + * avoid overflowing LRU_TIME_BITS on a corrupted fs, when + * bucket_sectors_dirty is (much) bigger than bucket_size + */ + u64 d = min(bch2_bucket_sectors_dirty(a), + ca->mi.bucket_size); + return div_u64(d * (1ULL << 31), ca->mi.bucket_size); } @@ -269,6 +275,7 @@ int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned, enum btree_iter_update_trigger_flags); int bch2_check_alloc_info(struct bch_fs *); int bch2_check_alloc_to_lru_refs(struct bch_fs *); +void bch2_dev_do_discards(struct bch_dev *); void bch2_do_discards(struct bch_fs *); static inline u64 should_invalidate_buckets(struct bch_dev *ca, @@ -283,6 +290,7 @@ static inline u64 should_invalidate_buckets(struct bch_dev *ca, return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets); } +void bch2_dev_do_invalidates(struct bch_dev *); void bch2_do_invalidates(struct bch_fs *); static inline struct bch_backpointer *alloc_v4_backpointers(struct bch_alloc_v4 *a) @@ -306,7 +314,9 @@ u64 bch2_min_rw_member_capacity(struct bch_fs *); void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *); void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *); -void bch2_fs_allocator_background_exit(struct bch_fs *); +void bch2_dev_allocator_background_exit(struct bch_dev *); +void bch2_dev_allocator_background_init(struct bch_dev *); + void bch2_fs_allocator_background_init(struct bch_fs *); #endif /* _BCACHEFS_ALLOC_BACKGROUND_H */ diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 927a5f300b30..27d97c22ae27 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -621,13 +621,13 @@ again: avail = dev_buckets_free(ca, *usage, watermark); if (usage->d[BCH_DATA_need_discard].buckets > avail) - bch2_do_discards(c); + bch2_dev_do_discards(ca); if (usage->d[BCH_DATA_need_gc_gens].buckets > avail) bch2_gc_gens_async(c); if (should_invalidate_buckets(ca, *usage)) - bch2_do_invalidates(c); + bch2_dev_do_invalidates(ca); if (!avail) { if (cl && !waiting) { @@ -1703,6 +1703,7 @@ void bch2_fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c) for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++) nr[c->open_buckets[i].data_type]++; + printbuf_tabstops_reset(out); printbuf_tabstop_push(out, 24); percpu_down_read(&c->mark_lock); @@ -1736,6 +1737,7 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++) nr[c->open_buckets[i].data_type]++; + printbuf_tabstops_reset(out); printbuf_tabstop_push(out, 12); printbuf_tabstop_push(out, 16); printbuf_tabstop_push(out, 16); diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 4321f9fb73bd..6d8b1bc90be0 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -434,13 +434,6 @@ int bch2_check_btree_backpointers(struct bch_fs *c) return ret; } -static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r) -{ - return bpos_eq(l.k->p, r.k->p) && - bkey_bytes(l.k) == bkey_bytes(r.k) && - !memcmp(l.v, r.v, bkey_val_bytes(l.k)); -} - struct extents_to_bp_state { struct bpos bucket_start; struct bpos bucket_end; @@ -536,11 +529,8 @@ static int check_bp_exists(struct btree_trans *trans, struct btree_iter other_extent_iter = {}; struct printbuf buf = PRINTBUF; struct bkey_s_c bp_k; - struct bkey_buf tmp; int ret = 0; - bch2_bkey_buf_init(&tmp); - struct bch_dev *ca = bch2_dev_bucket_tryget(c, bucket); if (!ca) { prt_str(&buf, "extent for nonexistent device:bucket "); @@ -565,22 +555,9 @@ static int check_bp_exists(struct btree_trans *trans, if (bp_k.k->type != KEY_TYPE_backpointer || memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) { - bch2_bkey_buf_reassemble(&tmp, c, orig_k); - - if (!bkey_and_val_eq(orig_k, bkey_i_to_s_c(s->last_flushed.k))) { - if (bp.level) { - bch2_trans_unlock(trans); - bch2_btree_interior_updates_flush(c); - } - - ret = bch2_btree_write_buffer_flush_sync(trans); - if (ret) - goto err; - - bch2_bkey_buf_copy(&s->last_flushed, c, tmp.k); - ret = -BCH_ERR_transaction_restart_write_buffer_flush; - goto out; - } + ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed); + if (ret) + goto err; goto check_existing_bp; } @@ -589,7 +566,6 @@ err: fsck_err: bch2_trans_iter_exit(trans, &other_extent_iter); bch2_trans_iter_exit(trans, &bp_iter); - bch2_bkey_buf_exit(&tmp, c); bch2_dev_put(ca); printbuf_exit(&buf); return ret; @@ -794,6 +770,8 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, !((1U << btree) & btree_interior_mask)) continue; + bch2_trans_begin(trans); + __for_each_btree_node(trans, iter, btree, btree == start.btree ? start.pos : POS_MIN, 0, depth, BTREE_ITER_prefetch, b, ret) { @@ -905,7 +883,7 @@ static int check_one_backpointer(struct btree_trans *trans, struct bbpos start, struct bbpos end, struct bkey_s_c_backpointer bp, - struct bpos *last_flushed_pos) + struct bkey_buf *last_flushed) { struct bch_fs *c = trans->c; struct btree_iter iter; @@ -925,20 +903,18 @@ static int check_one_backpointer(struct btree_trans *trans, if (ret) return ret; - if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) { - *last_flushed_pos = bp.k->p; - ret = bch2_btree_write_buffer_flush_sync(trans) ?: - -BCH_ERR_transaction_restart_write_buffer_flush; - goto out; - } + if (!k.k) { + ret = bch2_btree_write_buffer_maybe_flush(trans, bp.s_c, last_flushed); + if (ret) + goto out; - if (fsck_err_on(!k.k, c, - backpointer_to_missing_ptr, - "backpointer for missing %s\n %s", - bp.v->level ? "btree node" : "extent", - (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { - ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p); - goto out; + if (fsck_err(c, backpointer_to_missing_ptr, + "backpointer for missing %s\n %s", + bp.v->level ? "btree node" : "extent", + (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { + ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p); + goto out; + } } out: fsck_err: @@ -951,14 +927,20 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, struct bbpos start, struct bbpos end) { - struct bpos last_flushed_pos = SPOS_MAX; + struct bkey_buf last_flushed; - return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, + bch2_bkey_buf_init(&last_flushed); + bkey_init(&last_flushed.k->k); + + int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, check_one_backpointer(trans, start, end, bkey_s_c_to_backpointer(k), - &last_flushed_pos)); + &last_flushed)); + + bch2_bkey_buf_exit(&last_flushed, trans->c); + return ret; } int bch2_check_backpointers_to_extents(struct bch_fs *c) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 2992a644d822..1106fec6e155 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -493,6 +493,11 @@ struct io_count { u64 sectors[2][BCH_DATA_NR]; }; +struct discard_in_flight { + bool in_progress:1; + u64 bucket:63; +}; + struct bch_dev { struct kobject kobj; #ifdef CONFIG_BCACHEFS_DEBUG @@ -554,6 +559,12 @@ struct bch_dev { size_t inc_gen_really_needs_gc; size_t buckets_waiting_on_journal; + struct work_struct invalidate_work; + struct work_struct discard_work; + struct mutex discard_buckets_in_flight_lock; + DARRAY(struct discard_in_flight) discard_buckets_in_flight; + struct work_struct discard_fast_work; + atomic64_t rebalance_work; struct journal_device journal; @@ -915,11 +926,6 @@ struct bch_fs { unsigned write_points_nr; struct buckets_waiting_for_journal buckets_waiting_for_journal; - struct work_struct invalidate_work; - struct work_struct discard_work; - struct mutex discard_buckets_in_flight_lock; - DARRAY(struct bpos) discard_buckets_in_flight; - struct work_struct discard_fast_work; /* GARBAGE COLLECTION */ struct work_struct gc_gens_work; @@ -1214,6 +1220,11 @@ static inline s64 bch2_current_time(const struct bch_fs *c) return timespec_to_bch2_time(c, now); } +static inline u64 bch2_current_io_time(const struct bch_fs *c, int rw) +{ + return max(1ULL, (u64) atomic64_read(&c->io_clock[rw].now) & LRU_TIME_MAX); +} + static inline struct stdio_redirect *bch2_fs_stdio_redirect(struct bch_fs *c) { struct stdio_redirect *stdio = c->stdio; diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 90c12fe2a2cd..e3b1bde489c3 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -476,6 +476,9 @@ struct bch_lru { #define LRU_ID_STRIPES (1U << 16) +#define LRU_TIME_BITS 48 +#define LRU_TIME_MAX ((1ULL << LRU_TIME_BITS) - 1) + /* Optional/variable size superblock sections: */ struct bch_sb_field { @@ -987,8 +990,9 @@ enum bch_version_upgrade_opts { #define BCH_ERROR_ACTIONS() \ x(continue, 0) \ - x(ro, 1) \ - x(panic, 2) + x(fix_safe, 1) \ + x(panic, 2) \ + x(ro, 3) enum bch_error_actions { #define x(t, n) BCH_ON_ERROR_##t = n, @@ -1382,9 +1386,10 @@ enum btree_id { /* * Maximum number of btrees that we will _ever_ have under the current scheme, - * where we refer to them with bitfields + * where we refer to them with 64 bit bitfields - and we also need a bit for + * the interior btree node type: */ -#define BTREE_ID_NR_MAX 64 +#define BTREE_ID_NR_MAX 63 static inline bool btree_id_is_alloc(enum btree_id id) { diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c index f46978e5cb7c..587d7318a2e8 100644 --- a/fs/bcachefs/bkey.c +++ b/fs/bcachefs/bkey.c @@ -660,8 +660,9 @@ int bch2_bkey_format_invalid(struct bch_fs *c, bch2_bkey_format_field_overflows(f, i)) { unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i]; u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1)); - u64 packed_max = f->bits_per_field[i] - ? ~((~0ULL << 1) << (f->bits_per_field[i] - 1)) + unsigned packed_bits = min(64, f->bits_per_field[i]); + u64 packed_max = packed_bits + ? ~((~0ULL << 1) << (packed_bits - 1)) : 0; prt_printf(err, "field %u too large: %llu + %llu > %llu", @@ -1064,7 +1065,7 @@ void bch2_bkey_swab_key(const struct bkey_format *_f, struct bkey_packed *k) { const struct bkey_format *f = bkey_packed(k) ? _f : &bch2_bkey_format_current; u8 *l = k->key_start; - u8 *h = (u8 *) (k->_data + f->key_u64s) - 1; + u8 *h = (u8 *) ((u64 *) k->_data + f->key_u64s) - 1; while (l < h) { swap(*l, *h); diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h index fcd43915df07..936357149cf0 100644 --- a/fs/bcachefs/bkey.h +++ b/fs/bcachefs/bkey.h @@ -194,6 +194,13 @@ static inline struct bpos bkey_max(struct bpos l, struct bpos r) return bkey_gt(l, r) ? l : r; } +static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r) +{ + return bpos_eq(l.k->p, r.k->p) && + bkey_bytes(l.k) == bkey_bytes(r.k) && + !memcmp(l.v, r.v, bkey_val_bytes(l.k)); +} + void bch2_bpos_swab(struct bpos *); void bch2_bkey_swab_key(const struct bkey_format *, struct bkey_packed *); diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index c2c3dae52186..bd32aac05192 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -398,8 +398,12 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id, for (i = 0; i < nr_compat; i++) switch (!write ? i : nr_compat - 1 - i) { case 0: - if (big_endian != CPU_BIG_ENDIAN) + if (big_endian != CPU_BIG_ENDIAN) { + bch2_bkey_swab_key(f, k); + } else if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) { bch2_bkey_swab_key(f, k); + bch2_bkey_swab_key(f, k); + } break; case 1: if (version < bcachefs_metadata_version_bkey_renumber) diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h index 726ef7483763..baef0722f5fb 100644 --- a/fs/bcachefs/bkey_methods.h +++ b/fs/bcachefs/bkey_methods.h @@ -129,7 +129,8 @@ static inline void bch2_bkey_compat(unsigned level, enum btree_id btree_id, struct bkey_packed *k) { if (version < bcachefs_metadata_version_current || - big_endian != CPU_BIG_ENDIAN) + big_endian != CPU_BIG_ENDIAN || + IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) __bch2_bkey_compat(level, btree_id, version, big_endian, write, f, k); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 0e477a926579..a0deb8266011 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -641,16 +641,30 @@ static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree, bool in target_depth = 0; /* root */ - mutex_lock(&c->btree_root_lock); - struct btree *b = bch2_btree_id_root(c, btree)->b; - if (!btree_node_fake(b)) { + do { +retry_root: + bch2_trans_begin(trans); + + struct btree_iter iter; + bch2_trans_node_iter_init(trans, &iter, btree, POS_MIN, + 0, bch2_btree_id_root(c, btree)->b->c.level, 0); + struct btree *b = bch2_btree_iter_peek_node(&iter); + ret = PTR_ERR_OR_ZERO(b); + if (ret) + goto err_root; + + if (b != btree_node_root(c, b)) { + bch2_trans_iter_exit(trans, &iter); + goto retry_root; + } + gc_pos_set(c, gc_pos_btree(btree, b->c.level + 1, SPOS_MAX)); - ret = lockrestart_do(trans, - bch2_gc_mark_key(trans, b->c.btree_id, b->c.level + 1, - NULL, NULL, bkey_i_to_s_c(&b->key), initial)); + struct bkey_s_c k = bkey_i_to_s_c(&b->key); + ret = bch2_gc_mark_key(trans, btree, b->c.level + 1, NULL, NULL, k, initial); level = b->c.level; - } - mutex_unlock(&c->btree_root_lock); +err_root: + bch2_trans_iter_exit(trans, &iter); + } while (bch2_err_matches(ret, BCH_ERR_transaction_restart)); if (ret) return ret; @@ -903,6 +917,8 @@ static int bch2_alloc_write_key(struct btree_trans *trans, bch2_dev_usage_update(c, ca, &old_gc, &gc, 0, true); percpu_up_read(&c->mark_lock); + gc.fragmentation_lru = alloc_lru_idx_fragmentation(gc, ca); + if (fsck_err_on(new.data_type != gc.data_type, c, alloc_key_data_type_wrong, "bucket %llu:%llu gen %u has wrong data_type" @@ -916,23 +932,19 @@ static int bch2_alloc_write_key(struct btree_trans *trans, #define copy_bucket_field(_errtype, _f) \ if (fsck_err_on(new._f != gc._f, c, _errtype, \ "bucket %llu:%llu gen %u data type %s has wrong " #_f \ - ": got %u, should be %u", \ + ": got %llu, should be %llu", \ iter->pos.inode, iter->pos.offset, \ gc.gen, \ bch2_data_type_str(gc.data_type), \ - new._f, gc._f)) \ + (u64) new._f, (u64) gc._f)) \ new._f = gc._f; \ - copy_bucket_field(alloc_key_gen_wrong, - gen); - copy_bucket_field(alloc_key_dirty_sectors_wrong, - dirty_sectors); - copy_bucket_field(alloc_key_cached_sectors_wrong, - cached_sectors); - copy_bucket_field(alloc_key_stripe_wrong, - stripe); - copy_bucket_field(alloc_key_stripe_redundancy_wrong, - stripe_redundancy); + copy_bucket_field(alloc_key_gen_wrong, gen); + copy_bucket_field(alloc_key_dirty_sectors_wrong, dirty_sectors); + copy_bucket_field(alloc_key_cached_sectors_wrong, cached_sectors); + copy_bucket_field(alloc_key_stripe_wrong, stripe); + copy_bucket_field(alloc_key_stripe_redundancy_wrong, stripe_redundancy); + copy_bucket_field(alloc_key_fragmentation_lru_wrong, fragmentation_lru); #undef copy_bucket_field if (!bch2_alloc_v4_cmp(*old, new)) @@ -946,7 +958,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans, a->v = new; /* - * The trigger normally makes sure this is set, but we're not running + * The trigger normally makes sure these are set, but we're not running * triggers: */ if (a->v.data_type == BCH_DATA_cached && !a->v.io_time[READ]) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 3694c600a3ad..19352a08ea20 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -996,7 +996,7 @@ retry_all: bch2_trans_unlock(trans); cond_resched(); - trans->locked = true; + trans_set_locked(trans); if (unlikely(trans->memory_allocation_failure)) { struct closure cl; @@ -3089,7 +3089,8 @@ u32 bch2_trans_begin(struct btree_trans *trans) bch2_trans_srcu_unlock(trans); trans->last_begin_ip = _RET_IP_; - trans->locked = true; + + trans_set_locked(trans); if (trans->restarted) { bch2_btree_path_traverse_all(trans); @@ -3130,7 +3131,6 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) trans = mempool_alloc(&c->btree_trans_pool, GFP_NOFS); memset(trans, 0, sizeof(*trans)); - closure_init_stack(&trans->ref); seqmutex_lock(&c->btree_trans_lock); if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) { @@ -3150,22 +3150,16 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) BUG_ON(pos_task && pid == pos_task->pid && pos->locked); - - if (pos_task && pid < pos_task->pid) { - list_add_tail(&trans->list, &pos->list); - goto list_add_done; - } } } - list_add_tail(&trans->list, &c->btree_trans_list); -list_add_done: + + list_add(&trans->list, &c->btree_trans_list); seqmutex_unlock(&c->btree_trans_lock); got_trans: trans->c = c; trans->last_begin_time = local_clock(); trans->fn_idx = fn_idx; trans->locking_wait.task = current; - trans->locked = true; trans->journal_replay_not_finished = unlikely(!test_bit(JOURNAL_replay_done, &c->journal.flags)) && atomic_inc_not_zero(&c->journal_keys.ref); @@ -3199,6 +3193,9 @@ got_trans: trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); trans->srcu_lock_time = jiffies; trans->srcu_held = true; + trans_set_locked(trans); + + closure_init_stack_release(&trans->ref); return trans; } @@ -3235,7 +3232,6 @@ void bch2_trans_put(struct btree_trans *trans) trans_for_each_update(trans, i) __btree_path_put(trans->paths + i->path, true); trans->nr_updates = 0; - trans->locking_wait.task = NULL; check_btree_paths_leaked(trans); @@ -3256,6 +3252,13 @@ void bch2_trans_put(struct btree_trans *trans) if (unlikely(trans->journal_replay_not_finished)) bch2_journal_keys_put(c); + /* + * trans->ref protects trans->locking_wait.task, btree_paths array; used + * by cycle detector + */ + closure_return_sync(&trans->ref); + trans->locking_wait.task = NULL; + unsigned long *paths_allocated = trans->paths_allocated; trans->paths_allocated = NULL; trans->paths = NULL; @@ -3273,8 +3276,6 @@ void bch2_trans_put(struct btree_trans *trans) trans = this_cpu_xchg(c->btree_trans_bufs->trans, trans); if (trans) { - closure_sync(&trans->ref); - seqmutex_lock(&c->btree_trans_lock); list_del(&trans->list); seqmutex_unlock(&c->btree_trans_lock); @@ -3380,8 +3381,6 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c) per_cpu_ptr(c->btree_trans_bufs, cpu)->trans; if (trans) { - closure_sync(&trans->ref); - seqmutex_lock(&c->btree_trans_lock); list_del(&trans->list); seqmutex_unlock(&c->btree_trans_lock); diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index d66fff22109a..c51826fd557f 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -231,7 +231,7 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle) prt_newline(&buf); } - bch2_print_string_as_lines(KERN_ERR, buf.buf); + bch2_print_string_as_lines_nonblocking(KERN_ERR, buf.buf); printbuf_exit(&buf); BUG(); } @@ -792,7 +792,7 @@ static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace) return bch2_trans_relock_fail(trans, path, &f, trace); } - trans->locked = true; + trans_set_locked(trans); out: bch2_trans_verify_locks(trans); return 0; @@ -812,16 +812,14 @@ void bch2_trans_unlock_noassert(struct btree_trans *trans) { __bch2_trans_unlock(trans); - trans->locked = false; - trans->last_unlock_ip = _RET_IP_; + trans_set_unlocked(trans); } void bch2_trans_unlock(struct btree_trans *trans) { __bch2_trans_unlock(trans); - trans->locked = false; - trans->last_unlock_ip = _RET_IP_; + trans_set_unlocked(trans); } void bch2_trans_unlock_long(struct btree_trans *trans) diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h index 7f41545b9147..75a6274c7d27 100644 --- a/fs/bcachefs/btree_locking.h +++ b/fs/bcachefs/btree_locking.h @@ -193,6 +193,28 @@ int bch2_six_check_for_deadlock(struct six_lock *lock, void *p); /* lock: */ +static inline void trans_set_locked(struct btree_trans *trans) +{ + if (!trans->locked) { + trans->locked = true; + trans->last_unlock_ip = 0; + + trans->pf_memalloc_nofs = (current->flags & PF_MEMALLOC_NOFS) != 0; + current->flags |= PF_MEMALLOC_NOFS; + } +} + +static inline void trans_set_unlocked(struct btree_trans *trans) +{ + if (trans->locked) { + trans->locked = false; + trans->last_unlock_ip = _RET_IP_; + + if (!trans->pf_memalloc_nofs) + current->flags &= ~PF_MEMALLOC_NOFS; + } +} + static inline int __btree_node_lock_nopath(struct btree_trans *trans, struct btree_bkey_cached_common *b, enum six_lock_type type, diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index d63db4fefe73..48cb1a7d31c5 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -484,6 +484,7 @@ struct btree_trans { bool lock_may_not_fail:1; bool srcu_held:1; bool locked:1; + bool pf_memalloc_nofs:1; bool write_locked:1; bool used_mempool:1; bool in_traverse_all:1; @@ -761,13 +762,13 @@ static inline bool btree_node_type_needs_gc(enum btree_node_type type) static inline bool btree_node_type_is_extents(enum btree_node_type type) { - const unsigned mask = 0 + const u64 mask = 0 #define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << (nr + 1)) BCH_BTREE_IDS() #undef x ; - return (1U << type) & mask; + return BIT_ULL(type) & mask; } static inline bool btree_id_is_extents(enum btree_id btree) @@ -777,35 +778,35 @@ static inline bool btree_id_is_extents(enum btree_id btree) static inline bool btree_type_has_snapshots(enum btree_id id) { - const unsigned mask = 0 + const u64 mask = 0 #define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_SNAPSHOTS)) << nr) BCH_BTREE_IDS() #undef x ; - return (1U << id) & mask; + return BIT_ULL(id) & mask; } static inline bool btree_type_has_snapshot_field(enum btree_id id) { - const unsigned mask = 0 + const u64 mask = 0 #define x(name, nr, flags, ...) |((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr) BCH_BTREE_IDS() #undef x ; - return (1U << id) & mask; + return BIT_ULL(id) & mask; } static inline bool btree_type_has_ptrs(enum btree_id id) { - const unsigned mask = 0 + const u64 mask = 0 #define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_DATA)) << nr) BCH_BTREE_IDS() #undef x ; - return (1U << id) & mask; + return BIT_ULL(id) & mask; } struct btree_root { diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 75c8a196b3f6..d0e92d948002 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -1,11 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "bkey_buf.h" #include "btree_locking.h" #include "btree_update.h" #include "btree_update_interior.h" #include "btree_write_buffer.h" #include "error.h" +#include "extents.h" #include "journal.h" #include "journal_io.h" #include "journal_reclaim.h" @@ -492,6 +494,41 @@ int bch2_btree_write_buffer_tryflush(struct btree_trans *trans) return ret; } +/** + * In check and repair code, when checking references to write buffer btrees we + * need to issue a flush before we have a definitive error: this issues a flush + * if this is a key we haven't yet checked. + */ +int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans, + struct bkey_s_c referring_k, + struct bkey_buf *last_flushed) +{ + struct bch_fs *c = trans->c; + struct bkey_buf tmp; + int ret = 0; + + bch2_bkey_buf_init(&tmp); + + if (!bkey_and_val_eq(referring_k, bkey_i_to_s_c(last_flushed->k))) { + bch2_bkey_buf_reassemble(&tmp, c, referring_k); + + if (bkey_is_btree_ptr(referring_k.k)) { + bch2_trans_unlock(trans); + bch2_btree_interior_updates_flush(c); + } + + ret = bch2_btree_write_buffer_flush_sync(trans); + if (ret) + goto err; + + bch2_bkey_buf_copy(last_flushed, c, tmp.k); + ret = -BCH_ERR_transaction_restart_write_buffer_flush; + } +err: + bch2_bkey_buf_exit(&tmp, c); + return ret; +} + static void bch2_btree_write_buffer_flush_work(struct work_struct *work) { struct bch_fs *c = container_of(work, struct bch_fs, btree_write_buffer.flush_work); diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h index eebcd2b15249..dd5e64218b50 100644 --- a/fs/bcachefs/btree_write_buffer.h +++ b/fs/bcachefs/btree_write_buffer.h @@ -23,6 +23,9 @@ int bch2_btree_write_buffer_flush_sync(struct btree_trans *); int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *); int bch2_btree_write_buffer_tryflush(struct btree_trans *); +struct bkey_buf; +int bch2_btree_write_buffer_maybe_flush(struct btree_trans *, struct bkey_s_c, struct bkey_buf *); + struct journal_keys_to_wb { struct btree_write_buffer_keys *wb; size_t room; diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 743d57eba760..314ee3e0187f 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -805,7 +805,7 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, "bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)\n" "while marking %s", ptr->dev, bucket_nr, b_gen, - *bucket_gen(ca, bucket_nr), + bucket_gen_get(ca, bucket_nr), bch2_data_type_str(bucket_data_type ?: ptr_data_type), ptr->gen, (printbuf_reset(&buf), diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 80ee0be9793e..8ad4be73860c 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -116,6 +116,14 @@ static inline u8 *bucket_gen(struct bch_dev *ca, size_t b) return gens->b + b; } +static inline u8 bucket_gen_get(struct bch_dev *ca, size_t b) +{ + rcu_read_lock(); + u8 gen = *bucket_gen(ca, b); + rcu_read_unlock(); + return gen; +} + static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca, const struct bch_extent_ptr *ptr) { diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 9e54323f0f5f..6d82e1165adc 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -216,7 +216,8 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a ret = PTR_ERR_OR_ZERO(optstr) ?: bch2_parse_mount_opts(NULL, &thr->opts, optstr); - kfree(optstr); + if (!IS_ERR(optstr)) + kfree(optstr); if (ret) goto err; @@ -319,7 +320,8 @@ static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg) return ret; ret = bch2_dev_add(c, path); - kfree(path); + if (!IS_ERR(path)) + kfree(path); return ret; } @@ -850,7 +852,8 @@ static long bch2_ioctl_fsck_online(struct bch_fs *c, ret = PTR_ERR_OR_ZERO(optstr) ?: bch2_parse_mount_opts(c, &thr->opts, optstr); - kfree(optstr); + if (!IS_ERR(optstr)) + kfree(optstr); if (ret) goto err; diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c index 363644451106..0f40b585ce2b 100644 --- a/fs/bcachefs/clock.c +++ b/fs/bcachefs/clock.c @@ -132,14 +132,9 @@ static struct io_timer *get_expired_timer(struct io_clock *clock, { struct io_timer *ret = NULL; - spin_lock(&clock->timer_lock); - if (clock->timers.used && time_after_eq(now, clock->timers.data[0]->expire)) heap_pop(&clock->timers, ret, io_timer_cmp, NULL); - - spin_unlock(&clock->timer_lock); - return ret; } @@ -148,8 +143,10 @@ void __bch2_increment_clock(struct io_clock *clock, unsigned sectors) struct io_timer *timer; unsigned long now = atomic64_add_return(sectors, &clock->now); + spin_lock(&clock->timer_lock); while ((timer = get_expired_timer(clock, now))) timer->fn(timer); + spin_unlock(&clock->timer_lock); } void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 1a0072eef109..0087b8555ead 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -5,7 +5,9 @@ #include "bkey_buf.h" #include "btree_update.h" #include "buckets.h" +#include "compress.h" #include "data_update.h" +#include "disk_groups.h" #include "ec.h" #include "error.h" #include "extents.h" @@ -454,6 +456,38 @@ static void bch2_update_unwritten_extent(struct btree_trans *trans, } } +void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) +{ + printbuf_tabstop_push(out, 20); + prt_str(out, "rewrite ptrs:\t"); + bch2_prt_u64_base2(out, data_opts->rewrite_ptrs); + prt_newline(out); + + prt_str(out, "kill ptrs:\t"); + bch2_prt_u64_base2(out, data_opts->kill_ptrs); + prt_newline(out); + + prt_str(out, "target:\t"); + bch2_target_to_text(out, c, data_opts->target); + prt_newline(out); + + prt_str(out, "compression:\t"); + bch2_compression_opt_to_text(out, background_compression(*io_opts)); + prt_newline(out); + + prt_str(out, "extra replicas:\t"); + prt_u64(out, data_opts->extra_replicas); +} + +void bch2_data_update_to_text(struct printbuf *out, struct data_update *m) +{ + bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k)); + prt_newline(out); + bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts); +} + int bch2_extent_drop_ptrs(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k, @@ -643,6 +677,16 @@ int bch2_data_update_init(struct btree_trans *trans, if (!(durability_have + durability_removing)) m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1); + if (!m->op.nr_replicas) { + struct printbuf buf = PRINTBUF; + + bch2_data_update_to_text(&buf, m); + WARN(1, "trying to move an extent, but nr_replicas=0\n%s", buf.buf); + printbuf_exit(&buf); + ret = -BCH_ERR_data_update_done; + goto done; + } + m->op.nr_replicas_required = m->op.nr_replicas; if (reserve_sectors) { diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h index 991095bbd469..8d36365bdea8 100644 --- a/fs/bcachefs/data_update.h +++ b/fs/bcachefs/data_update.h @@ -17,6 +17,9 @@ struct data_update_opts { unsigned write_flags; }; +void bch2_data_update_opts_to_text(struct printbuf *, struct bch_fs *, + struct bch_io_opts *, struct data_update_opts *); + struct data_update { /* extent being updated: */ enum btree_id btree_id; @@ -27,6 +30,8 @@ struct data_update { struct bch_write_op op; }; +void bch2_data_update_to_text(struct printbuf *, struct data_update *); + int bch2_data_update_index_update(struct bch_write_op *); void bch2_data_update_read_done(struct data_update *, diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 51cbf3928361..ebabab171fe5 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -568,6 +568,32 @@ static const struct file_operations cached_btree_nodes_ops = { .read = bch2_cached_btree_nodes_read, }; +typedef int (*list_cmp_fn)(const struct list_head *l, const struct list_head *r); + +static void list_sort(struct list_head *head, list_cmp_fn cmp) +{ + struct list_head *pos; + + list_for_each(pos, head) + while (!list_is_last(pos, head) && + cmp(pos, pos->next) > 0) { + struct list_head *pos2, *next = pos->next; + + list_del(next); + list_for_each(pos2, head) + if (cmp(next, pos2) < 0) + goto pos_found; + BUG(); +pos_found: + list_add_tail(next, pos2); + } +} + +static int list_ptr_order_cmp(const struct list_head *l, const struct list_head *r) +{ + return cmp_int(l, r); +} + static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) { @@ -575,41 +601,39 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, struct bch_fs *c = i->c; struct btree_trans *trans; ssize_t ret = 0; - u32 seq; i->ubuf = buf; i->size = size; i->ret = 0; restart: seqmutex_lock(&c->btree_trans_lock); - list_for_each_entry(trans, &c->btree_trans_list, list) { - struct task_struct *task = READ_ONCE(trans->locking_wait.task); + list_sort(&c->btree_trans_list, list_ptr_order_cmp); - if (!task || task->pid <= i->iter) + list_for_each_entry(trans, &c->btree_trans_list, list) { + if ((ulong) trans <= i->iter) continue; - closure_get(&trans->ref); - seq = seqmutex_seq(&c->btree_trans_lock); - seqmutex_unlock(&c->btree_trans_lock); + i->iter = (ulong) trans; - ret = flush_buf(i); - if (ret) { - closure_put(&trans->ref); - goto unlocked; - } + if (!closure_get_not_zero(&trans->ref)) + continue; + + u32 seq = seqmutex_unlock(&c->btree_trans_lock); bch2_btree_trans_to_text(&i->buf, trans); prt_printf(&i->buf, "backtrace:\n"); printbuf_indent_add(&i->buf, 2); - bch2_prt_task_backtrace(&i->buf, task, 0, GFP_KERNEL); + bch2_prt_task_backtrace(&i->buf, trans->locking_wait.task, 0, GFP_KERNEL); printbuf_indent_sub(&i->buf, 2); prt_newline(&i->buf); - i->iter = task->pid; - closure_put(&trans->ref); + ret = flush_buf(i); + if (ret) + goto unlocked; + if (!seqmutex_relock(&c->btree_trans_lock, seq)) goto restart; } @@ -804,50 +828,55 @@ static const struct file_operations btree_transaction_stats_op = { .read = btree_transaction_stats_read, }; -static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf, - size_t size, loff_t *ppos) +/* walk btree transactions until we find a deadlock and print it */ +static void btree_deadlock_to_text(struct printbuf *out, struct bch_fs *c) { - struct dump_iter *i = file->private_data; - struct bch_fs *c = i->c; struct btree_trans *trans; - ssize_t ret = 0; - u32 seq; - - i->ubuf = buf; - i->size = size; - i->ret = 0; - - if (i->iter) - goto out; + ulong iter = 0; restart: seqmutex_lock(&c->btree_trans_lock); - list_for_each_entry(trans, &c->btree_trans_list, list) { - struct task_struct *task = READ_ONCE(trans->locking_wait.task); + list_sort(&c->btree_trans_list, list_ptr_order_cmp); - if (!task || task->pid <= i->iter) + list_for_each_entry(trans, &c->btree_trans_list, list) { + if ((ulong) trans <= iter) continue; - closure_get(&trans->ref); - seq = seqmutex_seq(&c->btree_trans_lock); - seqmutex_unlock(&c->btree_trans_lock); + iter = (ulong) trans; - ret = flush_buf(i); - if (ret) { - closure_put(&trans->ref); - goto out; - } + if (!closure_get_not_zero(&trans->ref)) + continue; - bch2_check_for_deadlock(trans, &i->buf); + u32 seq = seqmutex_unlock(&c->btree_trans_lock); - i->iter = task->pid; + bool found = bch2_check_for_deadlock(trans, out) != 0; closure_put(&trans->ref); + if (found) + return; + if (!seqmutex_relock(&c->btree_trans_lock, seq)) goto restart; } seqmutex_unlock(&c->btree_trans_lock); -out: +} + +static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + struct dump_iter *i = file->private_data; + struct bch_fs *c = i->c; + ssize_t ret = 0; + + i->ubuf = buf; + i->size = size; + i->ret = 0; + + if (!i->iter) { + btree_deadlock_to_text(&i->buf, c); + i->iter++; + } + if (i->buf.allocation_failure) ret = -ENOMEM; diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index dbe35b80bc0b..58612abf7927 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -116,6 +116,9 @@ x(ENOENT, ENOENT_dev_idx_not_found) \ x(ENOTEMPTY, ENOTEMPTY_dir_not_empty) \ x(ENOTEMPTY, ENOTEMPTY_subvol_not_empty) \ + x(EEXIST, EEXIST_str_hash_set) \ + x(EEXIST, EEXIST_discard_in_flight_add) \ + x(EEXIST, EEXIST_subvolume_create) \ x(0, open_buckets_empty) \ x(0, freelist_empty) \ x(BCH_ERR_freelist_empty, no_buckets_found) \ diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index c66eeffcd7f2..d95c40f1b6af 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -15,6 +15,7 @@ bool bch2_inconsistent_error(struct bch_fs *c) switch (c->opts.errors) { case BCH_ON_ERROR_continue: return false; + case BCH_ON_ERROR_fix_safe: case BCH_ON_ERROR_ro: if (bch2_fs_emergency_read_only(c)) bch_err(c, "inconsistency detected - emergency read only at journal seq %llu", @@ -191,6 +192,12 @@ static void prt_actioning(struct printbuf *out, const char *action) prt_str(out, "ing"); } +static const u8 fsck_flags_extra[] = { +#define x(t, n, flags) [BCH_FSCK_ERR_##t] = flags, + BCH_SB_ERRS() +#undef x +}; + int bch2_fsck_err(struct bch_fs *c, enum bch_fsck_flags flags, enum bch_sb_error_id err, @@ -203,6 +210,9 @@ int bch2_fsck_err(struct bch_fs *c, int ret = -BCH_ERR_fsck_ignore; const char *action_orig = "fix?", *action = action_orig; + if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra))) + flags |= fsck_flags_extra[err]; + if ((flags & FSCK_CAN_FIX) && test_bit(err, c->sb.errors_silent)) return -BCH_ERR_fsck_fix; @@ -265,7 +275,14 @@ int bch2_fsck_err(struct bch_fs *c, prt_printf(out, bch2_log_msg(c, "")); #endif - if (!test_bit(BCH_FS_fsck_running, &c->flags)) { + if ((flags & FSCK_CAN_FIX) && + (flags & FSCK_AUTOFIX) && + (c->opts.errors == BCH_ON_ERROR_continue || + c->opts.errors == BCH_ON_ERROR_fix_safe)) { + prt_str(out, ", "); + prt_actioning(out, action); + ret = -BCH_ERR_fsck_fix; + } else if (!test_bit(BCH_FS_fsck_running, &c->flags)) { if (c->opts.errors != BCH_ON_ERROR_continue || !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { prt_str(out, ", shutting down"); diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index 36caedf72d89..777711504c35 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -108,13 +108,6 @@ struct fsck_err_state { char *last_msg; }; -enum bch_fsck_flags { - FSCK_CAN_FIX = 1 << 0, - FSCK_CAN_IGNORE = 1 << 1, - FSCK_NEED_FSCK = 1 << 2, - FSCK_NO_RATELIMIT = 1 << 3, -}; - #define fsck_err_count(_c, _err) bch2_sb_err_count(_c, BCH_FSCK_ERR_##_err) __printf(4, 5) __cold diff --git a/fs/bcachefs/eytzinger.h b/fs/bcachefs/eytzinger.h index 24840aee335c..795f4fc0bab1 100644 --- a/fs/bcachefs/eytzinger.h +++ b/fs/bcachefs/eytzinger.h @@ -48,7 +48,7 @@ static inline unsigned eytzinger1_right_child(unsigned i) static inline unsigned eytzinger1_first(unsigned size) { - return rounddown_pow_of_two(size); + return size ? rounddown_pow_of_two(size) : 0; } static inline unsigned eytzinger1_last(unsigned size) @@ -101,7 +101,9 @@ static inline unsigned eytzinger1_prev(unsigned i, unsigned size) static inline unsigned eytzinger1_extra(unsigned size) { - return (size + 1 - rounddown_pow_of_two(size)) << 1; + return size + ? (size + 1 - rounddown_pow_of_two(size)) << 1 + : 0; } static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size, diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 3551a737181b..79a0c8732bce 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -373,7 +373,7 @@ retry: } if (dst_dentry->d_inode) { - error = -EEXIST; + error = -BCH_ERR_EEXIST_subvolume_create; goto err3; } diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 77126992dba8..0c7d1bc0548a 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -188,6 +188,18 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino BUG_ON(!old); if (unlikely(old != inode)) { + /* + * bcachefs doesn't use I_NEW; we have no use for it since we + * only insert fully created inodes in the inode hash table. But + * discard_new_inode() expects it to be set... + */ + inode->v.i_flags |= I_NEW; + /* + * We don't want bch2_evict_inode() to delete the inode on disk, + * we just raced and had another inode in cache. Normally new + * inodes don't have nlink == 0 - except tmpfiles do... + */ + set_nlink(&inode->v, 1); discard_new_inode(&inode->v); inode = old; } else { @@ -195,8 +207,10 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list); mutex_unlock(&c->vfs_inodes_lock); /* - * we really don't want insert_inode_locked2() to be setting - * I_NEW... + * Again, I_NEW makes no sense for bcachefs. This is only needed + * for clearing I_NEW, but since the inode was already fully + * created and initialized we didn't actually want + * inode_insert5() to set it for us. */ unlock_new_inode(&inode->v); } @@ -230,7 +244,6 @@ static struct bch_inode_info *__bch2_new_inode(struct bch_fs *c) inode->ei_flags = 0; mutex_init(&inode->ei_quota_lock); memset(&inode->ei_devs_need_flush, 0, sizeof(inode->ei_devs_need_flush)); - inode->v.i_state = 0; if (unlikely(inode_init_always(c->vfs_sb, &inode->v))) { kmem_cache_free(bch2_inode_cache, inode); @@ -1157,6 +1170,7 @@ static const struct file_operations bch_file_operations = { .read_iter = bch2_read_iter, .write_iter = bch2_write_iter, .mmap = bch2_mmap, + .get_unmapped_area = thp_get_unmapped_area, .fsync = bch2_fsync, .splice_read = filemap_splice_read, .splice_write = iter_file_splice_write, @@ -1488,11 +1502,6 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum, bch2_iget5_set(&inode->v, &inum); bch2_inode_update_after_write(trans, inode, bi, ~0); - if (BCH_SUBVOLUME_SNAP(subvol)) - set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags); - else - clear_bit(EI_INODE_SNAPSHOT, &inode->ei_flags); - inode->v.i_blocks = bi->bi_sectors; inode->v.i_ino = bi->bi_inum; inode->v.i_rdev = bi->bi_dev; @@ -1504,6 +1513,9 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum, inode->ei_qid = bch_qid(bi); inode->ei_subvol = inum.subvol; + if (BCH_SUBVOLUME_SNAP(subvol)) + set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags); + inode->v.i_mapping->a_ops = &bch_address_space_operations; switch (inode->v.i_mode & S_IFMT) { @@ -2019,6 +2031,8 @@ err_put_super: __bch2_fs_stop(c); deactivate_locked_super(sb); err: + if (ret) + pr_err("error: %s", bch2_err_str(ret)); /* * On an inconsistency error in recovery we might see an -EROFS derived * errorcode (from the journal), but we don't want to return that to diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index 4ec979b4b23e..4583c9386e8c 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -125,7 +125,7 @@ err_noprint: bch2_bkey_buf_exit(&old, c); if (closure_nr_remaining(&cl) != 1) { - bch2_trans_unlock(trans); + bch2_trans_unlock_long(trans); closure_sync(&cl); } diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index c97fa7002b06..ebf39ef72fb2 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -389,7 +389,6 @@ retry: bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); - bch2_trans_unlock(trans); if (!bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, @@ -1004,6 +1003,9 @@ get_bio: rbio->promote = promote; INIT_WORK(&rbio->work, NULL); + if (flags & BCH_READ_NODECODE) + orig->pick = pick; + rbio->bio.bi_opf = orig->bio.bi_opf; rbio->bio.bi_iter.bi_sector = pick.ptr.offset; rbio->bio.bi_end_io = bch2_read_endio; diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index adec8e1ea73e..10b19791ec98 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1095,7 +1095,7 @@ unlock: return ret; } -int bch2_dev_journal_alloc(struct bch_dev *ca) +int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs) { unsigned nr; int ret; @@ -1117,7 +1117,7 @@ int bch2_dev_journal_alloc(struct bch_dev *ca) min(1 << 13, (1 << 24) / ca->mi.bucket_size)); - ret = __bch2_set_nr_journal_buckets(ca, nr, true, NULL); + ret = __bch2_set_nr_journal_buckets(ca, nr, new_fs, NULL); err: bch_err_fn(ca, ret); return ret; @@ -1129,7 +1129,7 @@ int bch2_fs_journal_alloc(struct bch_fs *c) if (ca->journal.nr) continue; - int ret = bch2_dev_journal_alloc(ca); + int ret = bch2_dev_journal_alloc(ca, true); if (ret) { percpu_ref_put(&ca->io_ref); return ret; @@ -1167,6 +1167,9 @@ void bch2_dev_journal_stop(struct journal *j, struct bch_dev *ca) void bch2_fs_journal_stop(struct journal *j) { + if (!test_bit(JOURNAL_running, &j->flags)) + return; + bch2_journal_reclaim_stop(j); bch2_journal_flush_all_pins(j); @@ -1181,9 +1184,11 @@ void bch2_fs_journal_stop(struct journal *j) journal_quiesce(j); cancel_delayed_work_sync(&j->write_work); - BUG_ON(!bch2_journal_error(j) && - test_bit(JOURNAL_replay_done, &j->flags) && - j->last_empty_seq != journal_cur_seq(j)); + WARN(!bch2_journal_error(j) && + test_bit(JOURNAL_replay_done, &j->flags) && + j->last_empty_seq != journal_cur_seq(j), + "journal shutdown error: cur seq %llu but last empty seq %llu", + journal_cur_seq(j), j->last_empty_seq); if (!bch2_journal_error(j)) clear_bit(JOURNAL_running, &j->flags); @@ -1415,8 +1420,8 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) unsigned long now = jiffies; u64 nr_writes = j->nr_flush_writes + j->nr_noflush_writes; - if (!out->nr_tabstops) - printbuf_tabstop_push(out, 28); + printbuf_tabstops_reset(out); + printbuf_tabstop_push(out, 28); out->atomic++; rcu_read_lock(); @@ -1518,6 +1523,11 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 struct journal_entry_pin *pin; spin_lock(&j->lock); + if (!test_bit(JOURNAL_running, &j->flags)) { + spin_unlock(&j->lock); + return true; + } + *seq = max(*seq, j->pin.front); if (*seq >= j->pin.back) { diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index fd1f7cdaa8bc..bc6b9c39dcb4 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -433,7 +433,7 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *); int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *, unsigned nr); -int bch2_dev_journal_alloc(struct bch_dev *); +int bch2_dev_journal_alloc(struct bch_dev *, bool); int bch2_fs_journal_alloc(struct bch_fs *); void bch2_dev_journal_stop(struct journal *, struct bch_dev *); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index cdcb1ad49af4..2326e2cb9cd2 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -415,6 +415,8 @@ static int journal_entry_btree_keys_validate(struct bch_fs *c, flags|BCH_VALIDATE_journal); if (ret == FSCK_DELETED_KEY) continue; + else if (ret) + return ret; k = bkey_next(k); } @@ -1677,6 +1679,13 @@ static CLOSURE_CALLBACK(journal_write_done) mod_delayed_work(j->wq, &j->write_work, max(0L, delta)); } + /* + * We don't typically trigger journal writes from her - the next journal + * write will be triggered immediately after the previous one is + * allocated, in bch2_journal_write() - but the journal write error path + * is special: + */ + bch2_journal_do_writes(j); spin_unlock(&j->lock); } @@ -1755,11 +1764,13 @@ static CLOSURE_CALLBACK(journal_write_preflush) if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) { spin_lock(&j->lock); - closure_wait(&j->async_wait, cl); + if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) { + closure_wait(&j->async_wait, cl); + spin_unlock(&j->lock); + continue_at(cl, journal_write_preflush, j->wq); + return; + } spin_unlock(&j->lock); - - continue_at(cl, journal_write_preflush, j->wq); - return; } if (w->separate_flush) { @@ -1967,7 +1978,6 @@ CLOSURE_CALLBACK(bch2_journal_write) struct journal *j = container_of(w, struct journal, buf[w->idx]); struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_replicas_padded replicas; - struct printbuf journal_debug_buf = PRINTBUF; unsigned nr_rw_members = 0; int ret; @@ -2011,11 +2021,15 @@ CLOSURE_CALLBACK(bch2_journal_write) } if (ret) { - __bch2_journal_debug_to_text(&journal_debug_buf, j); + struct printbuf buf = PRINTBUF; + buf.atomic++; + + prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write: %s"), + bch2_err_str(ret)); + __bch2_journal_debug_to_text(&buf, j); spin_unlock(&j->lock); - bch_err(c, "Unable to allocate journal write:\n%s", - journal_debug_buf.buf); - printbuf_exit(&journal_debug_buf); + bch2_print_string_as_lines(KERN_ERR, buf.buf); + printbuf_exit(&buf); goto err; } diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c index ed4846709611..1f25c111c54c 100644 --- a/fs/bcachefs/journal_seq_blacklist.c +++ b/fs/bcachefs/journal_seq_blacklist.c @@ -232,7 +232,7 @@ bool bch2_blacklist_entries_gc(struct bch_fs *c) BUG_ON(nr != t->nr); unsigned i; - for (src = bl->start, i = eytzinger0_first(t->nr); + for (src = bl->start, i = t->nr == 0 ? 0 : eytzinger0_first(t->nr); src < bl->start + nr; src++, i = eytzinger0_next(i, nr)) { BUG_ON(t->entries[i].start != le64_to_cpu(src->start)); diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index a40d116224ed..b12894ef44f3 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -77,6 +77,45 @@ static const char * const bch2_lru_types[] = { NULL }; +int bch2_lru_check_set(struct btree_trans *trans, + u16 lru_id, u64 time, + struct bkey_s_c referring_k, + struct bkey_buf *last_flushed) +{ + struct bch_fs *c = trans->c; + struct printbuf buf = PRINTBUF; + struct btree_iter lru_iter; + struct bkey_s_c lru_k = + bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru, + lru_pos(lru_id, + bucket_to_u64(referring_k.k->p), + time), 0); + int ret = bkey_err(lru_k); + if (ret) + return ret; + + if (lru_k.k->type != KEY_TYPE_set) { + ret = bch2_btree_write_buffer_maybe_flush(trans, referring_k, last_flushed); + if (ret) + goto err; + + if (fsck_err(c, alloc_key_to_missing_lru_entry, + "missing %s lru entry\n" + " %s", + bch2_lru_types[lru_type(lru_k)], + (bch2_bkey_val_to_text(&buf, c, referring_k), buf.buf))) { + ret = bch2_lru_set(trans, lru_id, bucket_to_u64(referring_k.k->p), time); + if (ret) + goto err; + } + } +err: +fsck_err: + bch2_trans_iter_exit(trans, &lru_iter); + printbuf_exit(&buf); + return ret; +} + static int bch2_check_lru_key(struct btree_trans *trans, struct btree_iter *lru_iter, struct bkey_s_c lru_k, diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h index fb11ab0dd00e..ed75bcf59d47 100644 --- a/fs/bcachefs/lru.h +++ b/fs/bcachefs/lru.h @@ -2,9 +2,6 @@ #ifndef _BCACHEFS_LRU_H #define _BCACHEFS_LRU_H -#define LRU_TIME_BITS 48 -#define LRU_TIME_MAX ((1ULL << LRU_TIME_BITS) - 1) - static inline u64 lru_pos_id(struct bpos pos) { return pos.inode >> LRU_TIME_BITS; @@ -64,6 +61,9 @@ int bch2_lru_del(struct btree_trans *, u16, u64, u64); int bch2_lru_set(struct btree_trans *, u16, u64, u64); int bch2_lru_change(struct btree_trans *, u16, u64, u64, u64); +struct bkey_buf; +int bch2_lru_check_set(struct btree_trans *, u16, u64, struct bkey_s_c, struct bkey_buf *); + int bch2_check_lrus(struct bch_fs *); #endif /* _BCACHEFS_LRU_H */ diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 6e477fadaa2a..e714e3bd5bbb 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -36,31 +36,6 @@ const char * const bch2_data_ops_strs[] = { NULL }; -static void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, - struct bch_io_opts *io_opts, - struct data_update_opts *data_opts) -{ - printbuf_tabstop_push(out, 20); - prt_str(out, "rewrite ptrs:\t"); - bch2_prt_u64_base2(out, data_opts->rewrite_ptrs); - prt_newline(out); - - prt_str(out, "kill ptrs:\t"); - bch2_prt_u64_base2(out, data_opts->kill_ptrs); - prt_newline(out); - - prt_str(out, "target:\t"); - bch2_target_to_text(out, c, data_opts->target); - prt_newline(out); - - prt_str(out, "compression:\t"); - bch2_compression_opt_to_text(out, background_compression(*io_opts)); - prt_newline(out); - - prt_str(out, "extra replicas:\t"); - prt_u64(out, data_opts->extra_replicas); -} - static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_update_opts *data_opts) diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 25530e0bb2f3..b197ec90d4cb 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -137,7 +137,7 @@ enum fsck_err_opts { x(errors, u8, \ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_STR(bch2_error_actions), \ - BCH_SB_ERROR_ACTION, BCH_ON_ERROR_ro, \ + BCH_SB_ERROR_ACTION, BCH_ON_ERROR_fix_safe, \ NULL, "Action to take on filesystem error") \ x(metadata_replicas, u8, \ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index cf513fc79ce4..1f9d044ed920 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -326,6 +326,12 @@ static int journal_replay_entry_early(struct bch_fs *c, case BCH_JSET_ENTRY_btree_root: { struct btree_root *r; + if (fsck_err_on(entry->btree_id >= BTREE_ID_NR_MAX, + c, invalid_btree_id, + "invalid btree id %u (max %u)", + entry->btree_id, BTREE_ID_NR_MAX)) + return 0; + while (entry->btree_id >= c->btree_roots_extra.nr + BTREE_ID_NR) { ret = darray_push(&c->btree_roots_extra, (struct btree_root) { NULL }); if (ret) @@ -415,7 +421,7 @@ static int journal_replay_entry_early(struct bch_fs *c, atomic64_set(&c->io_clock[clock->rw].now, le64_to_cpu(clock->time)); } } - +fsck_err: return ret; } @@ -658,10 +664,10 @@ int bch2_fs_recovery(struct bch_fs *c) if (check_version_upgrade(c)) write_sb = true; + c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); + if (write_sb) bch2_write_super(c); - - c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); mutex_unlock(&c->sb_lock); if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index 3fb23e399ffb..4710b61631f0 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -228,7 +228,7 @@ int bch2_sb_downgrade_update(struct bch_fs *c) dst = (void *) &darray_top(table); dst->version = cpu_to_le16(src->version); - dst->recovery_passes[0] = cpu_to_le64(src->recovery_passes); + dst->recovery_passes[0] = cpu_to_le64(bch2_recovery_passes_to_stable(src->recovery_passes)); dst->recovery_passes[1] = 0; dst->nr_errors = cpu_to_le16(src->nr_errors); for (unsigned i = 0; i < src->nr_errors; i++) diff --git a/fs/bcachefs/sb-errors.c b/fs/bcachefs/sb-errors.c index bda33e59e226..c1270d790e43 100644 --- a/fs/bcachefs/sb-errors.c +++ b/fs/bcachefs/sb-errors.c @@ -110,19 +110,25 @@ out: void bch2_sb_errors_from_cpu(struct bch_fs *c) { bch_sb_errors_cpu *src = &c->fsck_error_counts; - struct bch_sb_field_errors *dst = - bch2_sb_field_resize(&c->disk_sb, errors, - bch2_sb_field_errors_u64s(src->nr)); + struct bch_sb_field_errors *dst; unsigned i; + mutex_lock(&c->fsck_error_counts_lock); + + dst = bch2_sb_field_resize(&c->disk_sb, errors, + bch2_sb_field_errors_u64s(src->nr)); + if (!dst) - return; + goto err; for (i = 0; i < src->nr; i++) { SET_BCH_SB_ERROR_ENTRY_ID(&dst->entries[i], src->data[i].id); SET_BCH_SB_ERROR_ENTRY_NR(&dst->entries[i], src->data[i].nr); dst->entries[i].last_error_time = cpu_to_le64(src->data[i].last_error_time); } + +err: + mutex_unlock(&c->fsck_error_counts_lock); } static int bch2_sb_errors_to_cpu(struct bch_fs *c) diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 84d2763bd597..d54121ec093f 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -2,281 +2,295 @@ #ifndef _BCACHEFS_SB_ERRORS_FORMAT_H #define _BCACHEFS_SB_ERRORS_FORMAT_H -#define BCH_SB_ERRS() \ - x(clean_but_journal_not_empty, 0) \ - x(dirty_but_no_journal_entries, 1) \ - x(dirty_but_no_journal_entries_post_drop_nonflushes, 2) \ - x(sb_clean_journal_seq_mismatch, 3) \ - x(sb_clean_btree_root_mismatch, 4) \ - x(sb_clean_missing, 5) \ - x(jset_unsupported_version, 6) \ - x(jset_unknown_csum, 7) \ - x(jset_last_seq_newer_than_seq, 8) \ - x(jset_past_bucket_end, 9) \ - x(jset_seq_blacklisted, 10) \ - x(journal_entries_missing, 11) \ - x(journal_entry_replicas_not_marked, 12) \ - x(journal_entry_past_jset_end, 13) \ - x(journal_entry_replicas_data_mismatch, 14) \ - x(journal_entry_bkey_u64s_0, 15) \ - x(journal_entry_bkey_past_end, 16) \ - x(journal_entry_bkey_bad_format, 17) \ - x(journal_entry_bkey_invalid, 18) \ - x(journal_entry_btree_root_bad_size, 19) \ - x(journal_entry_blacklist_bad_size, 20) \ - x(journal_entry_blacklist_v2_bad_size, 21) \ - x(journal_entry_blacklist_v2_start_past_end, 22) \ - x(journal_entry_usage_bad_size, 23) \ - x(journal_entry_data_usage_bad_size, 24) \ - x(journal_entry_clock_bad_size, 25) \ - x(journal_entry_clock_bad_rw, 26) \ - x(journal_entry_dev_usage_bad_size, 27) \ - x(journal_entry_dev_usage_bad_dev, 28) \ - x(journal_entry_dev_usage_bad_pad, 29) \ - x(btree_node_unreadable, 30) \ - x(btree_node_fault_injected, 31) \ - x(btree_node_bad_magic, 32) \ - x(btree_node_bad_seq, 33) \ - x(btree_node_unsupported_version, 34) \ - x(btree_node_bset_older_than_sb_min, 35) \ - x(btree_node_bset_newer_than_sb, 36) \ - x(btree_node_data_missing, 37) \ - x(btree_node_bset_after_end, 38) \ - x(btree_node_replicas_sectors_written_mismatch, 39) \ - x(btree_node_replicas_data_mismatch, 40) \ - x(bset_unknown_csum, 41) \ - x(bset_bad_csum, 42) \ - x(bset_past_end_of_btree_node, 43) \ - x(bset_wrong_sector_offset, 44) \ - x(bset_empty, 45) \ - x(bset_bad_seq, 46) \ - x(bset_blacklisted_journal_seq, 47) \ - x(first_bset_blacklisted_journal_seq, 48) \ - x(btree_node_bad_btree, 49) \ - x(btree_node_bad_level, 50) \ - x(btree_node_bad_min_key, 51) \ - x(btree_node_bad_max_key, 52) \ - x(btree_node_bad_format, 53) \ - x(btree_node_bkey_past_bset_end, 54) \ - x(btree_node_bkey_bad_format, 55) \ - x(btree_node_bad_bkey, 56) \ - x(btree_node_bkey_out_of_order, 57) \ - x(btree_root_bkey_invalid, 58) \ - x(btree_root_read_error, 59) \ - x(btree_root_bad_min_key, 60) \ - x(btree_root_bad_max_key, 61) \ - x(btree_node_read_error, 62) \ - x(btree_node_topology_bad_min_key, 63) \ - x(btree_node_topology_bad_max_key, 64) \ - x(btree_node_topology_overwritten_by_prev_node, 65) \ - x(btree_node_topology_overwritten_by_next_node, 66) \ - x(btree_node_topology_interior_node_empty, 67) \ - x(fs_usage_hidden_wrong, 68) \ - x(fs_usage_btree_wrong, 69) \ - x(fs_usage_data_wrong, 70) \ - x(fs_usage_cached_wrong, 71) \ - x(fs_usage_reserved_wrong, 72) \ - x(fs_usage_persistent_reserved_wrong, 73) \ - x(fs_usage_nr_inodes_wrong, 74) \ - x(fs_usage_replicas_wrong, 75) \ - x(dev_usage_buckets_wrong, 76) \ - x(dev_usage_sectors_wrong, 77) \ - x(dev_usage_fragmented_wrong, 78) \ - x(dev_usage_buckets_ec_wrong, 79) \ - x(bkey_version_in_future, 80) \ - x(bkey_u64s_too_small, 81) \ - x(bkey_invalid_type_for_btree, 82) \ - x(bkey_extent_size_zero, 83) \ - x(bkey_extent_size_greater_than_offset, 84) \ - x(bkey_size_nonzero, 85) \ - x(bkey_snapshot_nonzero, 86) \ - x(bkey_snapshot_zero, 87) \ - x(bkey_at_pos_max, 88) \ - x(bkey_before_start_of_btree_node, 89) \ - x(bkey_after_end_of_btree_node, 90) \ - x(bkey_val_size_nonzero, 91) \ - x(bkey_val_size_too_small, 92) \ - x(alloc_v1_val_size_bad, 93) \ - x(alloc_v2_unpack_error, 94) \ - x(alloc_v3_unpack_error, 95) \ - x(alloc_v4_val_size_bad, 96) \ - x(alloc_v4_backpointers_start_bad, 97) \ - x(alloc_key_data_type_bad, 98) \ - x(alloc_key_empty_but_have_data, 99) \ - x(alloc_key_dirty_sectors_0, 100) \ - x(alloc_key_data_type_inconsistency, 101) \ - x(alloc_key_to_missing_dev_bucket, 102) \ - x(alloc_key_cached_inconsistency, 103) \ - x(alloc_key_cached_but_read_time_zero, 104) \ - x(alloc_key_to_missing_lru_entry, 105) \ - x(alloc_key_data_type_wrong, 106) \ - x(alloc_key_gen_wrong, 107) \ - x(alloc_key_dirty_sectors_wrong, 108) \ - x(alloc_key_cached_sectors_wrong, 109) \ - x(alloc_key_stripe_wrong, 110) \ - x(alloc_key_stripe_redundancy_wrong, 111) \ - x(bucket_sector_count_overflow, 112) \ - x(bucket_metadata_type_mismatch, 113) \ - x(need_discard_key_wrong, 114) \ - x(freespace_key_wrong, 115) \ - x(freespace_hole_missing, 116) \ - x(bucket_gens_val_size_bad, 117) \ - x(bucket_gens_key_wrong, 118) \ - x(bucket_gens_hole_wrong, 119) \ - x(bucket_gens_to_invalid_dev, 120) \ - x(bucket_gens_to_invalid_buckets, 121) \ - x(bucket_gens_nonzero_for_invalid_buckets, 122) \ - x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \ - x(need_discard_freespace_key_bad, 124) \ - x(backpointer_bucket_offset_wrong, 125) \ - x(backpointer_to_missing_device, 126) \ - x(backpointer_to_missing_alloc, 127) \ - x(backpointer_to_missing_ptr, 128) \ - x(lru_entry_at_time_0, 129) \ - x(lru_entry_to_invalid_bucket, 130) \ - x(lru_entry_bad, 131) \ - x(btree_ptr_val_too_big, 132) \ - x(btree_ptr_v2_val_too_big, 133) \ - x(btree_ptr_has_non_ptr, 134) \ - x(extent_ptrs_invalid_entry, 135) \ - x(extent_ptrs_no_ptrs, 136) \ - x(extent_ptrs_too_many_ptrs, 137) \ - x(extent_ptrs_redundant_crc, 138) \ - x(extent_ptrs_redundant_stripe, 139) \ - x(extent_ptrs_unwritten, 140) \ - x(extent_ptrs_written_and_unwritten, 141) \ - x(ptr_to_invalid_device, 142) \ - x(ptr_to_duplicate_device, 143) \ - x(ptr_after_last_bucket, 144) \ - x(ptr_before_first_bucket, 145) \ - x(ptr_spans_multiple_buckets, 146) \ - x(ptr_to_missing_backpointer, 147) \ - x(ptr_to_missing_alloc_key, 148) \ - x(ptr_to_missing_replicas_entry, 149) \ - x(ptr_to_missing_stripe, 150) \ - x(ptr_to_incorrect_stripe, 151) \ - x(ptr_gen_newer_than_bucket_gen, 152) \ - x(ptr_too_stale, 153) \ - x(stale_dirty_ptr, 154) \ - x(ptr_bucket_data_type_mismatch, 155) \ - x(ptr_cached_and_erasure_coded, 156) \ - x(ptr_crc_uncompressed_size_too_small, 157) \ - x(ptr_crc_csum_type_unknown, 158) \ - x(ptr_crc_compression_type_unknown, 159) \ - x(ptr_crc_redundant, 160) \ - x(ptr_crc_uncompressed_size_too_big, 161) \ - x(ptr_crc_nonce_mismatch, 162) \ - x(ptr_stripe_redundant, 163) \ - x(reservation_key_nr_replicas_invalid, 164) \ - x(reflink_v_refcount_wrong, 165) \ - x(reflink_p_to_missing_reflink_v, 166) \ - x(stripe_pos_bad, 167) \ - x(stripe_val_size_bad, 168) \ - x(stripe_sector_count_wrong, 169) \ - x(snapshot_tree_pos_bad, 170) \ - x(snapshot_tree_to_missing_snapshot, 171) \ - x(snapshot_tree_to_missing_subvol, 172) \ - x(snapshot_tree_to_wrong_subvol, 173) \ - x(snapshot_tree_to_snapshot_subvol, 174) \ - x(snapshot_pos_bad, 175) \ - x(snapshot_parent_bad, 176) \ - x(snapshot_children_not_normalized, 177) \ - x(snapshot_child_duplicate, 178) \ - x(snapshot_child_bad, 179) \ - x(snapshot_skiplist_not_normalized, 180) \ - x(snapshot_skiplist_bad, 181) \ - x(snapshot_should_not_have_subvol, 182) \ - x(snapshot_to_bad_snapshot_tree, 183) \ - x(snapshot_bad_depth, 184) \ - x(snapshot_bad_skiplist, 185) \ - x(subvol_pos_bad, 186) \ - x(subvol_not_master_and_not_snapshot, 187) \ - x(subvol_to_missing_root, 188) \ - x(subvol_root_wrong_bi_subvol, 189) \ - x(bkey_in_missing_snapshot, 190) \ - x(inode_pos_inode_nonzero, 191) \ - x(inode_pos_blockdev_range, 192) \ - x(inode_unpack_error, 193) \ - x(inode_str_hash_invalid, 194) \ - x(inode_v3_fields_start_bad, 195) \ - x(inode_snapshot_mismatch, 196) \ - x(inode_unlinked_but_clean, 197) \ - x(inode_unlinked_but_nlink_nonzero, 198) \ - x(inode_checksum_type_invalid, 199) \ - x(inode_compression_type_invalid, 200) \ - x(inode_subvol_root_but_not_dir, 201) \ - x(inode_i_size_dirty_but_clean, 202) \ - x(inode_i_sectors_dirty_but_clean, 203) \ - x(inode_i_sectors_wrong, 204) \ - x(inode_dir_wrong_nlink, 205) \ - x(inode_dir_multiple_links, 206) \ - x(inode_multiple_links_but_nlink_0, 207) \ - x(inode_wrong_backpointer, 208) \ - x(inode_wrong_nlink, 209) \ - x(inode_unreachable, 210) \ - x(deleted_inode_but_clean, 211) \ - x(deleted_inode_missing, 212) \ - x(deleted_inode_is_dir, 213) \ - x(deleted_inode_not_unlinked, 214) \ - x(extent_overlapping, 215) \ - x(extent_in_missing_inode, 216) \ - x(extent_in_non_reg_inode, 217) \ - x(extent_past_end_of_inode, 218) \ - x(dirent_empty_name, 219) \ - x(dirent_val_too_big, 220) \ - x(dirent_name_too_long, 221) \ - x(dirent_name_embedded_nul, 222) \ - x(dirent_name_dot_or_dotdot, 223) \ - x(dirent_name_has_slash, 224) \ - x(dirent_d_type_wrong, 225) \ - x(inode_bi_parent_wrong, 226) \ - x(dirent_in_missing_dir_inode, 227) \ - x(dirent_in_non_dir_inode, 228) \ - x(dirent_to_missing_inode, 229) \ - x(dirent_to_missing_subvol, 230) \ - x(dirent_to_itself, 231) \ - x(quota_type_invalid, 232) \ - x(xattr_val_size_too_small, 233) \ - x(xattr_val_size_too_big, 234) \ - x(xattr_invalid_type, 235) \ - x(xattr_name_invalid_chars, 236) \ - x(xattr_in_missing_inode, 237) \ - x(root_subvol_missing, 238) \ - x(root_dir_missing, 239) \ - x(root_inode_not_dir, 240) \ - x(dir_loop, 241) \ - x(hash_table_key_duplicate, 242) \ - x(hash_table_key_wrong_offset, 243) \ - x(unlinked_inode_not_on_deleted_list, 244) \ - x(reflink_p_front_pad_bad, 245) \ - x(journal_entry_dup_same_device, 246) \ - x(inode_bi_subvol_missing, 247) \ - x(inode_bi_subvol_wrong, 248) \ - x(inode_points_to_missing_dirent, 249) \ - x(inode_points_to_wrong_dirent, 250) \ - x(inode_bi_parent_nonzero, 251) \ - x(dirent_to_missing_parent_subvol, 252) \ - x(dirent_not_visible_in_parent_subvol, 253) \ - x(subvol_fs_path_parent_wrong, 254) \ - x(subvol_root_fs_path_parent_nonzero, 255) \ - x(subvol_children_not_set, 256) \ - x(subvol_children_bad, 257) \ - x(subvol_loop, 258) \ - x(subvol_unreachable, 259) \ - x(btree_node_bkey_bad_u64s, 260) \ - x(btree_node_topology_empty_interior_node, 261) \ - x(btree_ptr_v2_min_key_bad, 262) \ - x(btree_root_unreadable_and_scan_found_nothing, 263) \ - x(snapshot_node_missing, 264) \ - x(dup_backpointer_to_bad_csum_extent, 265) \ - x(btree_bitmap_not_marked, 266) \ - x(sb_clean_entry_overrun, 267) \ - x(btree_ptr_v2_written_0, 268) \ - x(subvol_snapshot_bad, 269) \ - x(subvol_inode_bad, 270) +enum bch_fsck_flags { + FSCK_CAN_FIX = 1 << 0, + FSCK_CAN_IGNORE = 1 << 1, + FSCK_NEED_FSCK = 1 << 2, + FSCK_NO_RATELIMIT = 1 << 3, + FSCK_AUTOFIX = 1 << 4, +}; + +#define BCH_SB_ERRS() \ + x(clean_but_journal_not_empty, 0, 0) \ + x(dirty_but_no_journal_entries, 1, 0) \ + x(dirty_but_no_journal_entries_post_drop_nonflushes, 2, 0) \ + x(sb_clean_journal_seq_mismatch, 3, 0) \ + x(sb_clean_btree_root_mismatch, 4, 0) \ + x(sb_clean_missing, 5, 0) \ + x(jset_unsupported_version, 6, 0) \ + x(jset_unknown_csum, 7, 0) \ + x(jset_last_seq_newer_than_seq, 8, 0) \ + x(jset_past_bucket_end, 9, 0) \ + x(jset_seq_blacklisted, 10, 0) \ + x(journal_entries_missing, 11, 0) \ + x(journal_entry_replicas_not_marked, 12, 0) \ + x(journal_entry_past_jset_end, 13, 0) \ + x(journal_entry_replicas_data_mismatch, 14, 0) \ + x(journal_entry_bkey_u64s_0, 15, 0) \ + x(journal_entry_bkey_past_end, 16, 0) \ + x(journal_entry_bkey_bad_format, 17, 0) \ + x(journal_entry_bkey_invalid, 18, 0) \ + x(journal_entry_btree_root_bad_size, 19, 0) \ + x(journal_entry_blacklist_bad_size, 20, 0) \ + x(journal_entry_blacklist_v2_bad_size, 21, 0) \ + x(journal_entry_blacklist_v2_start_past_end, 22, 0) \ + x(journal_entry_usage_bad_size, 23, 0) \ + x(journal_entry_data_usage_bad_size, 24, 0) \ + x(journal_entry_clock_bad_size, 25, 0) \ + x(journal_entry_clock_bad_rw, 26, 0) \ + x(journal_entry_dev_usage_bad_size, 27, 0) \ + x(journal_entry_dev_usage_bad_dev, 28, 0) \ + x(journal_entry_dev_usage_bad_pad, 29, 0) \ + x(btree_node_unreadable, 30, 0) \ + x(btree_node_fault_injected, 31, 0) \ + x(btree_node_bad_magic, 32, 0) \ + x(btree_node_bad_seq, 33, 0) \ + x(btree_node_unsupported_version, 34, 0) \ + x(btree_node_bset_older_than_sb_min, 35, 0) \ + x(btree_node_bset_newer_than_sb, 36, 0) \ + x(btree_node_data_missing, 37, 0) \ + x(btree_node_bset_after_end, 38, 0) \ + x(btree_node_replicas_sectors_written_mismatch, 39, 0) \ + x(btree_node_replicas_data_mismatch, 40, 0) \ + x(bset_unknown_csum, 41, 0) \ + x(bset_bad_csum, 42, 0) \ + x(bset_past_end_of_btree_node, 43, 0) \ + x(bset_wrong_sector_offset, 44, 0) \ + x(bset_empty, 45, 0) \ + x(bset_bad_seq, 46, 0) \ + x(bset_blacklisted_journal_seq, 47, 0) \ + x(first_bset_blacklisted_journal_seq, 48, 0) \ + x(btree_node_bad_btree, 49, 0) \ + x(btree_node_bad_level, 50, 0) \ + x(btree_node_bad_min_key, 51, 0) \ + x(btree_node_bad_max_key, 52, 0) \ + x(btree_node_bad_format, 53, 0) \ + x(btree_node_bkey_past_bset_end, 54, 0) \ + x(btree_node_bkey_bad_format, 55, 0) \ + x(btree_node_bad_bkey, 56, 0) \ + x(btree_node_bkey_out_of_order, 57, 0) \ + x(btree_root_bkey_invalid, 58, 0) \ + x(btree_root_read_error, 59, 0) \ + x(btree_root_bad_min_key, 60, 0) \ + x(btree_root_bad_max_key, 61, 0) \ + x(btree_node_read_error, 62, 0) \ + x(btree_node_topology_bad_min_key, 63, 0) \ + x(btree_node_topology_bad_max_key, 64, 0) \ + x(btree_node_topology_overwritten_by_prev_node, 65, 0) \ + x(btree_node_topology_overwritten_by_next_node, 66, 0) \ + x(btree_node_topology_interior_node_empty, 67, 0) \ + x(fs_usage_hidden_wrong, 68, FSCK_AUTOFIX) \ + x(fs_usage_btree_wrong, 69, FSCK_AUTOFIX) \ + x(fs_usage_data_wrong, 70, FSCK_AUTOFIX) \ + x(fs_usage_cached_wrong, 71, FSCK_AUTOFIX) \ + x(fs_usage_reserved_wrong, 72, FSCK_AUTOFIX) \ + x(fs_usage_persistent_reserved_wrong, 73, FSCK_AUTOFIX) \ + x(fs_usage_nr_inodes_wrong, 74, FSCK_AUTOFIX) \ + x(fs_usage_replicas_wrong, 75, FSCK_AUTOFIX) \ + x(dev_usage_buckets_wrong, 76, FSCK_AUTOFIX) \ + x(dev_usage_sectors_wrong, 77, FSCK_AUTOFIX) \ + x(dev_usage_fragmented_wrong, 78, FSCK_AUTOFIX) \ + x(dev_usage_buckets_ec_wrong, 79, FSCK_AUTOFIX) \ + x(bkey_version_in_future, 80, 0) \ + x(bkey_u64s_too_small, 81, 0) \ + x(bkey_invalid_type_for_btree, 82, 0) \ + x(bkey_extent_size_zero, 83, 0) \ + x(bkey_extent_size_greater_than_offset, 84, 0) \ + x(bkey_size_nonzero, 85, 0) \ + x(bkey_snapshot_nonzero, 86, 0) \ + x(bkey_snapshot_zero, 87, 0) \ + x(bkey_at_pos_max, 88, 0) \ + x(bkey_before_start_of_btree_node, 89, 0) \ + x(bkey_after_end_of_btree_node, 90, 0) \ + x(bkey_val_size_nonzero, 91, 0) \ + x(bkey_val_size_too_small, 92, 0) \ + x(alloc_v1_val_size_bad, 93, 0) \ + x(alloc_v2_unpack_error, 94, 0) \ + x(alloc_v3_unpack_error, 95, 0) \ + x(alloc_v4_val_size_bad, 96, 0) \ + x(alloc_v4_backpointers_start_bad, 97, 0) \ + x(alloc_key_data_type_bad, 98, 0) \ + x(alloc_key_empty_but_have_data, 99, 0) \ + x(alloc_key_dirty_sectors_0, 100, 0) \ + x(alloc_key_data_type_inconsistency, 101, 0) \ + x(alloc_key_to_missing_dev_bucket, 102, 0) \ + x(alloc_key_cached_inconsistency, 103, 0) \ + x(alloc_key_cached_but_read_time_zero, 104, 0) \ + x(alloc_key_to_missing_lru_entry, 105, 0) \ + x(alloc_key_data_type_wrong, 106, FSCK_AUTOFIX) \ + x(alloc_key_gen_wrong, 107, FSCK_AUTOFIX) \ + x(alloc_key_dirty_sectors_wrong, 108, FSCK_AUTOFIX) \ + x(alloc_key_cached_sectors_wrong, 109, FSCK_AUTOFIX) \ + x(alloc_key_stripe_wrong, 110, FSCK_AUTOFIX) \ + x(alloc_key_stripe_redundancy_wrong, 111, FSCK_AUTOFIX) \ + x(bucket_sector_count_overflow, 112, 0) \ + x(bucket_metadata_type_mismatch, 113, 0) \ + x(need_discard_key_wrong, 114, 0) \ + x(freespace_key_wrong, 115, 0) \ + x(freespace_hole_missing, 116, 0) \ + x(bucket_gens_val_size_bad, 117, 0) \ + x(bucket_gens_key_wrong, 118, 0) \ + x(bucket_gens_hole_wrong, 119, 0) \ + x(bucket_gens_to_invalid_dev, 120, 0) \ + x(bucket_gens_to_invalid_buckets, 121, 0) \ + x(bucket_gens_nonzero_for_invalid_buckets, 122, 0) \ + x(need_discard_freespace_key_to_invalid_dev_bucket, 123, 0) \ + x(need_discard_freespace_key_bad, 124, 0) \ + x(backpointer_bucket_offset_wrong, 125, 0) \ + x(backpointer_to_missing_device, 126, 0) \ + x(backpointer_to_missing_alloc, 127, 0) \ + x(backpointer_to_missing_ptr, 128, 0) \ + x(lru_entry_at_time_0, 129, 0) \ + x(lru_entry_to_invalid_bucket, 130, 0) \ + x(lru_entry_bad, 131, 0) \ + x(btree_ptr_val_too_big, 132, 0) \ + x(btree_ptr_v2_val_too_big, 133, 0) \ + x(btree_ptr_has_non_ptr, 134, 0) \ + x(extent_ptrs_invalid_entry, 135, 0) \ + x(extent_ptrs_no_ptrs, 136, 0) \ + x(extent_ptrs_too_many_ptrs, 137, 0) \ + x(extent_ptrs_redundant_crc, 138, 0) \ + x(extent_ptrs_redundant_stripe, 139, 0) \ + x(extent_ptrs_unwritten, 140, 0) \ + x(extent_ptrs_written_and_unwritten, 141, 0) \ + x(ptr_to_invalid_device, 142, 0) \ + x(ptr_to_duplicate_device, 143, 0) \ + x(ptr_after_last_bucket, 144, 0) \ + x(ptr_before_first_bucket, 145, 0) \ + x(ptr_spans_multiple_buckets, 146, 0) \ + x(ptr_to_missing_backpointer, 147, 0) \ + x(ptr_to_missing_alloc_key, 148, 0) \ + x(ptr_to_missing_replicas_entry, 149, 0) \ + x(ptr_to_missing_stripe, 150, 0) \ + x(ptr_to_incorrect_stripe, 151, 0) \ + x(ptr_gen_newer_than_bucket_gen, 152, 0) \ + x(ptr_too_stale, 153, 0) \ + x(stale_dirty_ptr, 154, 0) \ + x(ptr_bucket_data_type_mismatch, 155, 0) \ + x(ptr_cached_and_erasure_coded, 156, 0) \ + x(ptr_crc_uncompressed_size_too_small, 157, 0) \ + x(ptr_crc_csum_type_unknown, 158, 0) \ + x(ptr_crc_compression_type_unknown, 159, 0) \ + x(ptr_crc_redundant, 160, 0) \ + x(ptr_crc_uncompressed_size_too_big, 161, 0) \ + x(ptr_crc_nonce_mismatch, 162, 0) \ + x(ptr_stripe_redundant, 163, 0) \ + x(reservation_key_nr_replicas_invalid, 164, 0) \ + x(reflink_v_refcount_wrong, 165, 0) \ + x(reflink_p_to_missing_reflink_v, 166, 0) \ + x(stripe_pos_bad, 167, 0) \ + x(stripe_val_size_bad, 168, 0) \ + x(stripe_sector_count_wrong, 169, 0) \ + x(snapshot_tree_pos_bad, 170, 0) \ + x(snapshot_tree_to_missing_snapshot, 171, 0) \ + x(snapshot_tree_to_missing_subvol, 172, 0) \ + x(snapshot_tree_to_wrong_subvol, 173, 0) \ + x(snapshot_tree_to_snapshot_subvol, 174, 0) \ + x(snapshot_pos_bad, 175, 0) \ + x(snapshot_parent_bad, 176, 0) \ + x(snapshot_children_not_normalized, 177, 0) \ + x(snapshot_child_duplicate, 178, 0) \ + x(snapshot_child_bad, 179, 0) \ + x(snapshot_skiplist_not_normalized, 180, 0) \ + x(snapshot_skiplist_bad, 181, 0) \ + x(snapshot_should_not_have_subvol, 182, 0) \ + x(snapshot_to_bad_snapshot_tree, 183, 0) \ + x(snapshot_bad_depth, 184, 0) \ + x(snapshot_bad_skiplist, 185, 0) \ + x(subvol_pos_bad, 186, 0) \ + x(subvol_not_master_and_not_snapshot, 187, 0) \ + x(subvol_to_missing_root, 188, 0) \ + x(subvol_root_wrong_bi_subvol, 189, 0) \ + x(bkey_in_missing_snapshot, 190, 0) \ + x(inode_pos_inode_nonzero, 191, 0) \ + x(inode_pos_blockdev_range, 192, 0) \ + x(inode_unpack_error, 193, 0) \ + x(inode_str_hash_invalid, 194, 0) \ + x(inode_v3_fields_start_bad, 195, 0) \ + x(inode_snapshot_mismatch, 196, 0) \ + x(inode_unlinked_but_clean, 197, 0) \ + x(inode_unlinked_but_nlink_nonzero, 198, 0) \ + x(inode_checksum_type_invalid, 199, 0) \ + x(inode_compression_type_invalid, 200, 0) \ + x(inode_subvol_root_but_not_dir, 201, 0) \ + x(inode_i_size_dirty_but_clean, 202, 0) \ + x(inode_i_sectors_dirty_but_clean, 203, 0) \ + x(inode_i_sectors_wrong, 204, 0) \ + x(inode_dir_wrong_nlink, 205, 0) \ + x(inode_dir_multiple_links, 206, 0) \ + x(inode_multiple_links_but_nlink_0, 207, 0) \ + x(inode_wrong_backpointer, 208, 0) \ + x(inode_wrong_nlink, 209, 0) \ + x(inode_unreachable, 210, 0) \ + x(deleted_inode_but_clean, 211, 0) \ + x(deleted_inode_missing, 212, 0) \ + x(deleted_inode_is_dir, 213, 0) \ + x(deleted_inode_not_unlinked, 214, 0) \ + x(extent_overlapping, 215, 0) \ + x(extent_in_missing_inode, 216, 0) \ + x(extent_in_non_reg_inode, 217, 0) \ + x(extent_past_end_of_inode, 218, 0) \ + x(dirent_empty_name, 219, 0) \ + x(dirent_val_too_big, 220, 0) \ + x(dirent_name_too_long, 221, 0) \ + x(dirent_name_embedded_nul, 222, 0) \ + x(dirent_name_dot_or_dotdot, 223, 0) \ + x(dirent_name_has_slash, 224, 0) \ + x(dirent_d_type_wrong, 225, 0) \ + x(inode_bi_parent_wrong, 226, 0) \ + x(dirent_in_missing_dir_inode, 227, 0) \ + x(dirent_in_non_dir_inode, 228, 0) \ + x(dirent_to_missing_inode, 229, 0) \ + x(dirent_to_missing_subvol, 230, 0) \ + x(dirent_to_itself, 231, 0) \ + x(quota_type_invalid, 232, 0) \ + x(xattr_val_size_too_small, 233, 0) \ + x(xattr_val_size_too_big, 234, 0) \ + x(xattr_invalid_type, 235, 0) \ + x(xattr_name_invalid_chars, 236, 0) \ + x(xattr_in_missing_inode, 237, 0) \ + x(root_subvol_missing, 238, 0) \ + x(root_dir_missing, 239, 0) \ + x(root_inode_not_dir, 240, 0) \ + x(dir_loop, 241, 0) \ + x(hash_table_key_duplicate, 242, 0) \ + x(hash_table_key_wrong_offset, 243, 0) \ + x(unlinked_inode_not_on_deleted_list, 244, 0) \ + x(reflink_p_front_pad_bad, 245, 0) \ + x(journal_entry_dup_same_device, 246, 0) \ + x(inode_bi_subvol_missing, 247, 0) \ + x(inode_bi_subvol_wrong, 248, 0) \ + x(inode_points_to_missing_dirent, 249, 0) \ + x(inode_points_to_wrong_dirent, 250, 0) \ + x(inode_bi_parent_nonzero, 251, 0) \ + x(dirent_to_missing_parent_subvol, 252, 0) \ + x(dirent_not_visible_in_parent_subvol, 253, 0) \ + x(subvol_fs_path_parent_wrong, 254, 0) \ + x(subvol_root_fs_path_parent_nonzero, 255, 0) \ + x(subvol_children_not_set, 256, 0) \ + x(subvol_children_bad, 257, 0) \ + x(subvol_loop, 258, 0) \ + x(subvol_unreachable, 259, 0) \ + x(btree_node_bkey_bad_u64s, 260, 0) \ + x(btree_node_topology_empty_interior_node, 261, 0) \ + x(btree_ptr_v2_min_key_bad, 262, 0) \ + x(btree_root_unreadable_and_scan_found_nothing, 263, 0) \ + x(snapshot_node_missing, 264, 0) \ + x(dup_backpointer_to_bad_csum_extent, 265, 0) \ + x(btree_bitmap_not_marked, 266, 0) \ + x(sb_clean_entry_overrun, 267, 0) \ + x(btree_ptr_v2_written_0, 268, 0) \ + x(subvol_snapshot_bad, 269, 0) \ + x(subvol_inode_bad, 270, 0) \ + x(alloc_key_stripe_sectors_wrong, 271, 0) \ + x(accounting_mismatch, 272, 0) \ + x(accounting_replicas_not_marked, 273, 0) \ + x(invalid_btree_id, 274, 0) \ + x(alloc_key_io_time_bad, 275, 0) \ + x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) enum bch_sb_error_id { -#define x(t, n) BCH_FSCK_ERR_##t = n, +#define x(t, n, ...) BCH_FSCK_ERR_##t = n, BCH_SB_ERRS() #undef x BCH_SB_ERR_MAX diff --git a/fs/bcachefs/seqmutex.h b/fs/bcachefs/seqmutex.h index c1860d8163fb..c4b3d8d3f414 100644 --- a/fs/bcachefs/seqmutex.h +++ b/fs/bcachefs/seqmutex.h @@ -19,17 +19,14 @@ static inline bool seqmutex_trylock(struct seqmutex *lock) static inline void seqmutex_lock(struct seqmutex *lock) { mutex_lock(&lock->lock); -} - -static inline void seqmutex_unlock(struct seqmutex *lock) -{ lock->seq++; - mutex_unlock(&lock->lock); } -static inline u32 seqmutex_seq(struct seqmutex *lock) +static inline u32 seqmutex_unlock(struct seqmutex *lock) { - return lock->seq; + u32 seq = lock->seq; + mutex_unlock(&lock->lock); + return seq; } static inline bool seqmutex_relock(struct seqmutex *lock, u32 seq) diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 51918acfd726..24023d6a9698 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -168,6 +168,9 @@ static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id) size_t new_bytes = kmalloc_size_roundup(struct_size(new, s, idx + 1)); size_t new_size = (new_bytes - sizeof(*new)) / sizeof(new->s[0]); + if (unlikely(new_bytes > INT_MAX)) + return NULL; + new = kvzalloc(new_bytes, GFP_KERNEL); if (!new) return NULL; @@ -1565,13 +1568,6 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) return 0; - if (!test_bit(BCH_FS_started, &c->flags)) { - ret = bch2_fs_read_write_early(c); - bch_err_msg(c, ret, "deleting dead snapshots: error going rw"); - if (ret) - return ret; - } - trans = bch2_trans_get(c); /* @@ -1687,6 +1683,8 @@ void bch2_delete_dead_snapshots_work(struct work_struct *work) { struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work); + set_worker_desc("bcachefs-delete-dead-snapshots/%s", c->name); + bch2_delete_dead_snapshots(c); bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots); } diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index cbad9b27874f..c8c266cb5797 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -300,7 +300,7 @@ not_found: if (!found && (flags & STR_HASH_must_replace)) { ret = -BCH_ERR_ENOENT_str_hash_set_must_replace; } else if (found && (flags & STR_HASH_must_create)) { - ret = -EEXIST; + ret = -BCH_ERR_EEXIST_str_hash_set; } else { if (!found && slot.path) swap(iter, slot); diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 055478d21e9e..b156fc85b8a3 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -649,9 +649,10 @@ reread: bytes = vstruct_bytes(sb->sb); - if (bytes > 512ULL << min(BCH_SB_LAYOUT_SIZE_BITS_MAX, sb->sb->layout.sb_max_size_bits)) { - prt_printf(err, "Invalid superblock: too big (got %zu bytes, layout max %lu)", - bytes, 512UL << sb->sb->layout.sb_max_size_bits); + u64 sb_size = 512ULL << min(BCH_SB_LAYOUT_SIZE_BITS_MAX, sb->sb->layout.sb_max_size_bits); + if (bytes > sb_size) { + prt_printf(err, "Invalid superblock: too big (got %zu bytes, layout max %llu)", + bytes, sb_size); return -BCH_ERR_invalid_sb_too_big; } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 65e239d32915..da735608d47c 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -536,7 +536,6 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_find_btree_nodes_exit(&c->found_btree_nodes); bch2_free_pending_node_rewrites(c); - bch2_fs_allocator_background_exit(c); bch2_fs_sb_errors_exit(c); bch2_fs_counters_exit(c); bch2_fs_snapshots_exit(c); @@ -564,8 +563,11 @@ static void __bch2_fs_free(struct bch_fs *c) BUG_ON(atomic_read(&c->journal_keys.ref)); bch2_fs_btree_write_buffer_exit(c); percpu_free_rwsem(&c->mark_lock); - EBUG_ON(c->online_reserved && percpu_u64_get(c->online_reserved)); - free_percpu(c->online_reserved); + if (c->online_reserved) { + u64 v = percpu_u64_get(c->online_reserved); + WARN(v, "online_reserved not 0 at shutdown: %lli", v); + free_percpu(c->online_reserved); + } darray_exit(&c->btree_roots_extra); free_percpu(c->pcpu); @@ -912,9 +914,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) bch2_io_clock_init(&c->io_clock[WRITE]) ?: bch2_fs_journal_init(&c->journal) ?: bch2_fs_replicas_init(c) ?: + bch2_fs_btree_iter_init(c) ?: bch2_fs_btree_cache_init(c) ?: bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?: - bch2_fs_btree_iter_init(c) ?: bch2_fs_btree_interior_update_init(c) ?: bch2_fs_buckets_waiting_for_journal_init(c) ?: bch2_fs_btree_write_buffer_init(c) ?: @@ -931,12 +933,13 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) if (ret) goto err; - for (i = 0; i < c->sb.nr_devices; i++) - if (bch2_member_exists(c->disk_sb.sb, i) && - bch2_dev_alloc(c, i)) { - ret = -EEXIST; + for (i = 0; i < c->sb.nr_devices; i++) { + if (!bch2_member_exists(c->disk_sb.sb, i)) + continue; + ret = bch2_dev_alloc(c, i); + if (ret) goto err; - } + } bch2_journal_entry_res_resize(&c->journal, &c->btree_root_journal_res, @@ -1194,6 +1197,7 @@ static void bch2_dev_free(struct bch_dev *ca) kfree(ca->buckets_nouse); bch2_free_super(&ca->disk_sb); + bch2_dev_allocator_background_exit(ca); bch2_dev_journal_exit(ca); free_percpu(ca->io_done); @@ -1316,6 +1320,8 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, atomic_long_set(&ca->ref, 1); #endif + bch2_dev_allocator_background_init(ca); + if (percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete, PERCPU_REF_INIT_DEAD, GFP_KERNEL) || !(ca->sb_read_scratch = (void *) __get_free_page(GFP_KERNEL)) || @@ -1528,6 +1534,7 @@ static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca) * The allocator thread itself allocates btree nodes, so stop it first: */ bch2_dev_allocator_remove(c, ca); + bch2_recalc_capacity(c); bch2_dev_journal_stop(&c->journal, ca); } @@ -1539,6 +1546,7 @@ static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca) bch2_dev_allocator_add(c, ca); bch2_recalc_capacity(c); + bch2_dev_do_discards(ca); } int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, @@ -1764,7 +1772,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) if (ret) goto err; - ret = bch2_dev_journal_alloc(ca); + ret = bch2_dev_journal_alloc(ca, true); bch_err_msg(c, ret, "allocating journal"); if (ret) goto err; @@ -1924,7 +1932,7 @@ int bch2_dev_online(struct bch_fs *c, const char *path) } if (!ca->journal.nr) { - ret = bch2_dev_journal_alloc(ca); + ret = bch2_dev_journal_alloc(ca, false); bch_err_msg(ca, ret, "allocating journal"); if (ret) goto err; diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index de331dec2a99..4ec7e44d6e36 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -252,8 +252,10 @@ void bch2_prt_u64_base2(struct printbuf *out, u64 v) bch2_prt_u64_base2_nbits(out, v, fls64(v) ?: 1); } -void bch2_print_string_as_lines(const char *prefix, const char *lines) +static void __bch2_print_string_as_lines(const char *prefix, const char *lines, + bool nonblocking) { + bool locked = false; const char *p; if (!lines) { @@ -261,7 +263,13 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines) return; } - console_lock(); + if (!nonblocking) { + console_lock(); + locked = true; + } else { + locked = console_trylock(); + } + while (1) { p = strchrnul(lines, '\n'); printk("%s%.*s\n", prefix, (int) (p - lines), lines); @@ -269,7 +277,18 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines) break; lines = p + 1; } - console_unlock(); + if (locked) + console_unlock(); +} + +void bch2_print_string_as_lines(const char *prefix, const char *lines) +{ + return __bch2_print_string_as_lines(prefix, lines, false); +} + +void bch2_print_string_as_lines_nonblocking(const char *prefix, const char *lines) +{ + return __bch2_print_string_as_lines(prefix, lines, true); } int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigned skipnr, diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 5d2c470a49ac..5b0533ec4c7e 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -315,6 +315,7 @@ void bch2_prt_u64_base2_nbits(struct printbuf *, u64, unsigned); void bch2_prt_u64_base2(struct printbuf *, u64); void bch2_print_string_as_lines(const char *prefix, const char *lines); +void bch2_print_string_as_lines_nonblocking(const char *prefix, const char *lines); typedef DARRAY(unsigned long) bch_stacktrace; int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned, gfp_t); diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index d76f406d3b2e..f92f108840f5 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -475,6 +475,7 @@ static int befs_symlink_read_folio(struct file *unused, struct folio *folio) befs_data_stream *data = &befs_ino->i_data.ds; befs_off_t len = data->size; char *link = folio_address(folio); + int err = -EIO; if (len == 0 || len > PAGE_SIZE) { befs_error(sb, "Long symlink with illegal length"); @@ -487,13 +488,10 @@ static int befs_symlink_read_folio(struct file *unused, struct folio *folio) goto fail; } link[len - 1] = '\0'; - folio_mark_uptodate(folio); - folio_unlock(folio); - return 0; + err = 0; fail: - folio_set_error(folio); - folio_unlock(folio); - return -EIO; + folio_end_read(folio, err == 0); + return err; } /* diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index a43897b03ce9..6fdec541f8bf 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1216,7 +1216,6 @@ out_free_interp: } reloc_func_desc = interp_load_addr; - allow_write_access(interpreter); fput(interpreter); kfree(interp_elf_ex); @@ -1308,7 +1307,6 @@ out_free_dentry: kfree(interp_elf_ex); kfree(interp_elf_phdata); out_free_file: - allow_write_access(interpreter); if (interpreter) fput(interpreter); out_free_ph: diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index b799701454a9..28a3439f163a 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -394,7 +394,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) goto error; } - allow_write_access(interpreter); fput(interpreter); interpreter = NULL; } @@ -466,10 +465,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) retval = 0; error: - if (interpreter) { - allow_write_access(interpreter); + if (interpreter) fput(interpreter); - } kfree(interpreter_name); kfree(exec_params.phdrs); kfree(exec_params.loadmap); diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 68fa225f89e5..31660d8cc2c6 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -247,13 +247,10 @@ static int load_misc_binary(struct linux_binprm *bprm) if (retval < 0) goto ret; - if (fmt->flags & MISC_FMT_OPEN_FILE) { + if (fmt->flags & MISC_FMT_OPEN_FILE) interp_file = file_clone_open(fmt->interp_file); - if (!IS_ERR(interp_file)) - deny_write_access(interp_file); - } else { + else interp_file = open_exec(fmt->interpreter); - } retval = PTR_ERR(interp_file); if (IS_ERR(interp_file)) goto ret; @@ -1086,4 +1083,5 @@ static void __exit exit_misc_binfmt(void) core_initcall(init_misc_binfmt); module_exit(exit_misc_binfmt); +MODULE_DESCRIPTION("Kernel support for miscellaneous binaries"); MODULE_LICENSE("GPL"); diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index 1b6625e95958..637daf6e4d45 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -155,4 +155,5 @@ static void __exit exit_script_binfmt(void) core_initcall(init_script_binfmt); module_exit(exit_script_binfmt); +MODULE_DESCRIPTION("Kernel support for scripts starting with #!"); MODULE_LICENSE("GPL"); diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index 477f350a8bd0..e3a57196b0ee 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -741,7 +741,9 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num) ret = btrfs_bio_csum(bbio); if (ret) goto fail_put_bio; - } else if (use_append) { + } else if (use_append || + (btrfs_is_zoned(fs_info) && inode && + inode->flags & BTRFS_INODE_NODATASUM)) { ret = btrfs_alloc_dummy_sum(bbio); if (ret) goto fail_put_bio; diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 1e09aeea69c2..60066822b532 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1785,6 +1785,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) container_of(work, struct btrfs_fs_info, reclaim_bgs_work); struct btrfs_block_group *bg; struct btrfs_space_info *space_info; + LIST_HEAD(retry_list); if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags)) return; @@ -1921,8 +1922,20 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) } next: - if (ret) - btrfs_mark_bg_to_reclaim(bg); + if (ret) { + /* Refcount held by the reclaim_bgs list after splice. */ + spin_lock(&fs_info->unused_bgs_lock); + /* + * This block group might be added to the unused list + * during the above process. Move it back to the + * reclaim list otherwise. + */ + if (list_empty(&bg->bg_list)) { + btrfs_get_block_group(bg); + list_add_tail(&bg->bg_list, &retry_list); + } + spin_unlock(&fs_info->unused_bgs_lock); + } btrfs_put_block_group(bg); mutex_unlock(&fs_info->reclaim_bgs_lock); @@ -1942,6 +1955,9 @@ next: spin_unlock(&fs_info->unused_bgs_lock); mutex_unlock(&fs_info->reclaim_bgs_lock); end: + spin_lock(&fs_info->unused_bgs_lock); + list_splice_tail(&retry_list, &fs_info->reclaim_bgs); + spin_unlock(&fs_info->unused_bgs_lock); btrfs_exclop_finish(fs_info); sb_end_write(fs_info->sb); } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 38cdb8875e8e..cabb558dbdaa 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2856,6 +2856,8 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block if (ret) return ret; + spin_lock_init(&fs_info->extent_map_shrinker_lock); + ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL); if (ret) return ret; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f688fab55251..958155cc43a8 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3553,7 +3553,7 @@ err: for (int i = 0; i < num_folios; i++) { if (eb->folios[i]) { detach_extent_buffer_folio(eb, eb->folios[i]); - __folio_put(eb->folios[i]); + folio_put(eb->folios[i]); } } __free_extent_buffer(eb); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 744e8952abb0..b4c9a6aa118c 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1028,7 +1028,14 @@ out_free_pre: return ret; } -static long btrfs_scan_inode(struct btrfs_inode *inode, long *scanned, long nr_to_scan) +struct btrfs_em_shrink_ctx { + long nr_to_scan; + long scanned; + u64 last_ino; + u64 last_root; +}; + +static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_ctx *ctx) { const u64 cur_fs_gen = btrfs_get_fs_generation(inode->root->fs_info); struct extent_map_tree *tree = &inode->extent_tree; @@ -1057,14 +1064,25 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, long *scanned, long nr_t if (!down_read_trylock(&inode->i_mmap_lock)) return 0; - write_lock(&tree->lock); + /* + * We want to be fast because we can be called from any path trying to + * allocate memory, so if the lock is busy we don't want to spend time + * waiting for it - either some task is about to do IO for the inode or + * we may have another task shrinking extent maps, here in this code, so + * skip this inode. + */ + if (!write_trylock(&tree->lock)) { + up_read(&inode->i_mmap_lock); + return 0; + } + node = rb_first_cached(&tree->map); while (node) { struct extent_map *em; em = rb_entry(node, struct extent_map, rb_node); node = rb_next(node); - (*scanned)++; + ctx->scanned++; if (em->flags & EXTENT_FLAG_PINNED) goto next; @@ -1085,16 +1103,18 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, long *scanned, long nr_t free_extent_map(em); nr_dropped++; next: - if (*scanned >= nr_to_scan) + if (ctx->scanned >= ctx->nr_to_scan) break; /* - * Restart if we had to reschedule, and any extent maps that were - * pinned before may have become unpinned after we released the - * lock and took it again. + * Stop if we need to reschedule or there's contention on the + * lock. This is to avoid slowing other tasks trying to take the + * lock and because the shrinker might be called during a memory + * allocation path and we want to avoid taking a very long time + * and slowing down all sorts of tasks. */ - if (cond_resched_rwlock_write(&tree->lock)) - node = rb_first_cached(&tree->map); + if (need_resched() || rwlock_needbreak(&tree->lock)) + break; } write_unlock(&tree->lock); up_read(&inode->i_mmap_lock); @@ -1102,25 +1122,30 @@ next: return nr_dropped; } -static long btrfs_scan_root(struct btrfs_root *root, long *scanned, long nr_to_scan) +static long btrfs_scan_root(struct btrfs_root *root, struct btrfs_em_shrink_ctx *ctx) { - struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_inode *inode; long nr_dropped = 0; - u64 min_ino = fs_info->extent_map_shrinker_last_ino + 1; + u64 min_ino = ctx->last_ino + 1; inode = btrfs_find_first_inode(root, min_ino); while (inode) { - nr_dropped += btrfs_scan_inode(inode, scanned, nr_to_scan); + nr_dropped += btrfs_scan_inode(inode, ctx); min_ino = btrfs_ino(inode) + 1; - fs_info->extent_map_shrinker_last_ino = btrfs_ino(inode); - iput(&inode->vfs_inode); + ctx->last_ino = btrfs_ino(inode); + btrfs_add_delayed_iput(inode); - if (*scanned >= nr_to_scan) + if (ctx->scanned >= ctx->nr_to_scan) + break; + + /* + * We may be called from memory allocation paths, so we don't + * want to take too much time and slowdown tasks. + */ + if (need_resched()) break; - cond_resched(); inode = btrfs_find_first_inode(root, min_ino); } @@ -1132,14 +1157,14 @@ static long btrfs_scan_root(struct btrfs_root *root, long *scanned, long nr_to_s * inode if there is one or we will find out this was the last * one and move to the next root. */ - fs_info->extent_map_shrinker_last_root = btrfs_root_id(root); + ctx->last_root = btrfs_root_id(root); } else { /* * No more inodes in this root, set extent_map_shrinker_last_ino to 0 so * that when processing the next root we start from its first inode. */ - fs_info->extent_map_shrinker_last_ino = 0; - fs_info->extent_map_shrinker_last_root = btrfs_root_id(root) + 1; + ctx->last_ino = 0; + ctx->last_root = btrfs_root_id(root) + 1; } return nr_dropped; @@ -1147,19 +1172,41 @@ static long btrfs_scan_root(struct btrfs_root *root, long *scanned, long nr_to_s long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan) { - const u64 start_root_id = fs_info->extent_map_shrinker_last_root; - u64 next_root_id = start_root_id; + struct btrfs_em_shrink_ctx ctx; + u64 start_root_id; + u64 next_root_id; bool cycled = false; long nr_dropped = 0; - long scanned = 0; + + ctx.scanned = 0; + ctx.nr_to_scan = nr_to_scan; + + /* + * In case we have multiple tasks running this shrinker, make the next + * one start from the next inode in case it starts before we finish. + */ + spin_lock(&fs_info->extent_map_shrinker_lock); + ctx.last_ino = fs_info->extent_map_shrinker_last_ino; + fs_info->extent_map_shrinker_last_ino++; + ctx.last_root = fs_info->extent_map_shrinker_last_root; + spin_unlock(&fs_info->extent_map_shrinker_lock); + + start_root_id = ctx.last_root; + next_root_id = ctx.last_root; if (trace_btrfs_extent_map_shrinker_scan_enter_enabled()) { s64 nr = percpu_counter_sum_positive(&fs_info->evictable_extent_maps); - trace_btrfs_extent_map_shrinker_scan_enter(fs_info, nr_to_scan, nr); + trace_btrfs_extent_map_shrinker_scan_enter(fs_info, nr_to_scan, + nr, ctx.last_root, + ctx.last_ino); } - while (scanned < nr_to_scan) { + /* + * We may be called from memory allocation paths, so we don't want to + * take too much time and slowdown tasks, so stop if we need reschedule. + */ + while (ctx.scanned < ctx.nr_to_scan && !need_resched()) { struct btrfs_root *root; unsigned long count; @@ -1171,8 +1218,8 @@ long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan) spin_unlock(&fs_info->fs_roots_radix_lock); if (start_root_id > 0 && !cycled) { next_root_id = 0; - fs_info->extent_map_shrinker_last_root = 0; - fs_info->extent_map_shrinker_last_ino = 0; + ctx.last_root = 0; + ctx.last_ino = 0; cycled = true; continue; } @@ -1186,15 +1233,33 @@ long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan) continue; if (is_fstree(btrfs_root_id(root))) - nr_dropped += btrfs_scan_root(root, &scanned, nr_to_scan); + nr_dropped += btrfs_scan_root(root, &ctx); btrfs_put_root(root); } + /* + * In case of multiple tasks running this extent map shrinking code this + * isn't perfect but it's simple and silences things like KCSAN. It's + * not possible to know which task made more progress because we can + * cycle back to the first root and first inode if it's not the first + * time the shrinker ran, see the above logic. Also a task that started + * later may finish ealier than another task and made less progress. So + * make this simple and update to the progress of the last task that + * finished, with the occasional possiblity of having two consecutive + * runs of the shrinker process the same inodes. + */ + spin_lock(&fs_info->extent_map_shrinker_lock); + fs_info->extent_map_shrinker_last_ino = ctx.last_ino; + fs_info->extent_map_shrinker_last_root = ctx.last_root; + spin_unlock(&fs_info->extent_map_shrinker_lock); + if (trace_btrfs_extent_map_shrinker_scan_exit_enabled()) { s64 nr = percpu_counter_sum_positive(&fs_info->evictable_extent_maps); - trace_btrfs_extent_map_shrinker_scan_exit(fs_info, nr_dropped, nr); + trace_btrfs_extent_map_shrinker_scan_exit(fs_info, nr_dropped, + nr, ctx.last_root, + ctx.last_ino); } return nr_dropped; diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 3ab8dea5036b..dabc3d0793cf 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2697,7 +2697,7 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, u64 offset = bytenr - block_group->start; u64 to_free, to_unusable; int bg_reclaim_threshold = 0; - bool initial = (size == block_group->length); + bool initial = ((size == block_group->length) && (block_group->alloc_offset == 0)); u64 reclaimable_unusable; WARN_ON(!initial && offset + size > block_group->zone_capacity); diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h index 89f0650631cd..833dc3fe0a38 100644 --- a/fs/btrfs/fs.h +++ b/fs/btrfs/fs.h @@ -630,6 +630,7 @@ struct btrfs_fs_info { s32 delalloc_batch; struct percpu_counter evictable_extent_maps; + spinlock_t extent_map_shrinker_lock; u64 extent_map_shrinker_last_root; u64 extent_map_shrinker_last_ino; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 753db965f7c0..d62c96f00ff8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5587,7 +5587,7 @@ static struct inode *btrfs_iget_locked(struct super_block *s, u64 ino, args.ino = ino; args.root = root; - inode = iget5_locked(s, hashval, btrfs_find_actor, + inode = iget5_locked_rcu(s, hashval, btrfs_find_actor, btrfs_init_locked_inode, (void *)&args); return inode; @@ -10385,7 +10385,7 @@ out_unlock: out_folios: for (i = 0; i < nr_folios; i++) { if (folios[i]) - __folio_put(folios[i]); + folio_put(folios[i]); } kvfree(folios); out: diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index fc2a7ea26354..39a15cca58ca 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1351,7 +1351,7 @@ static int flush_reservations(struct btrfs_fs_info *fs_info) int btrfs_quota_disable(struct btrfs_fs_info *fs_info) { - struct btrfs_root *quota_root; + struct btrfs_root *quota_root = NULL; struct btrfs_trans_handle *trans = NULL; int ret = 0; @@ -1449,9 +1449,9 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) btrfs_free_tree_block(trans, btrfs_root_id(quota_root), quota_root->node, 0, 1); - btrfs_put_root(quota_root); out: + btrfs_put_root(quota_root); mutex_unlock(&fs_info->qgroup_ioctl_lock); if (ret && trans) btrfs_end_transaction(trans); @@ -3062,8 +3062,6 @@ int btrfs_qgroup_check_inherit(struct btrfs_fs_info *fs_info, struct btrfs_qgroup_inherit *inherit, size_t size) { - if (!btrfs_qgroup_enabled(fs_info)) - return 0; if (inherit->flags & ~BTRFS_QGROUP_INHERIT_FLAGS_SUPP) return -EOPNOTSUPP; if (size < sizeof(*inherit) || size > PAGE_SIZE) @@ -3085,6 +3083,14 @@ int btrfs_qgroup_check_inherit(struct btrfs_fs_info *fs_info, return -EINVAL; /* + * Skip the inherit source qgroups check if qgroup is not enabled. + * Qgroup can still be later enabled causing problems, but in that case + * btrfs_qgroup_inherit() would just ignore those invalid ones. + */ + if (!btrfs_qgroup_enabled(fs_info)) + return 0; + + /* * Now check all the remaining qgroups, they should all: * * - Exist diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index cf531255ab76..9522a8b79d22 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -441,7 +441,8 @@ static int process_extent_item(struct btrfs_fs_info *fs_info, u32 item_size = btrfs_item_size(leaf, slot); unsigned long end, ptr; u64 offset, flags, count; - int type, ret; + int type; + int ret = 0; ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); flags = btrfs_extent_flags(leaf, ei); @@ -486,7 +487,11 @@ static int process_extent_item(struct btrfs_fs_info *fs_info, key->objectid, key->offset); break; case BTRFS_EXTENT_OWNER_REF_KEY: - WARN_ON(!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA)); + if (!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA)) { + btrfs_err(fs_info, + "found extent owner ref without simple quotas enabled"); + ret = -EINVAL; + } break; default: btrfs_err(fs_info, "invalid key type in iref"); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index afd6932f5e89..d7caa3732f07 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1688,20 +1688,24 @@ static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx, (i << fs_info->sectorsize_bits); int err; - bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ, - fs_info, scrub_read_endio, stripe); - bbio->bio.bi_iter.bi_sector = logical >> SECTOR_SHIFT; - io_stripe.is_scrub = true; + stripe_len = (nr_sectors - i) << fs_info->sectorsize_bits; + /* + * For RST cases, we need to manually split the bbio to + * follow the RST boundary. + */ err = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical, - &stripe_len, &bioc, &io_stripe, - &mirror); + &stripe_len, &bioc, &io_stripe, &mirror); btrfs_put_bioc(bioc); - if (err) { - btrfs_bio_end_io(bbio, - errno_to_blk_status(err)); - return; + if (err < 0) { + set_bit(i, &stripe->io_error_bitmap); + set_bit(i, &stripe->error_bitmap); + continue; } + + bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ, + fs_info, scrub_read_endio, stripe); + bbio->bio.bi_iter.bi_sector = logical >> SECTOR_SHIFT; } __bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff); diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index d620323d08ea..ae8c56442549 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -373,11 +373,18 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info, * "optimal" chunk size based on the fs size. However when we actually * allocate the chunk we will strip this down further, making it no more * than 10% of the disk or 1G, whichever is smaller. + * + * On the zoned mode, we need to use zone_size (= + * data_sinfo->chunk_size) as it is. */ data_sinfo = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); - data_chunk_size = min(data_sinfo->chunk_size, - mult_perc(fs_info->fs_devices->total_rw_bytes, 10)); - data_chunk_size = min_t(u64, data_chunk_size, SZ_1G); + if (!btrfs_is_zoned(fs_info)) { + data_chunk_size = min(data_sinfo->chunk_size, + mult_perc(fs_info->fs_devices->total_rw_bytes, 10)); + data_chunk_size = min_t(u64, data_chunk_size, SZ_1G); + } else { + data_chunk_size = data_sinfo->chunk_size; + } /* * Since data allocations immediately use block groups as part of the @@ -405,6 +412,17 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info, avail >>= 3; else avail >>= 1; + + /* + * On the zoned mode, we always allocate one zone as one chunk. + * Returning non-zone size alingned bytes here will result in + * less pressure for the async metadata reclaim process, and it + * will over-commit too much leading to ENOSPC. Align down to the + * zone size to avoid that. + */ + if (btrfs_is_zoned(fs_info)) + avail = ALIGN_DOWN(avail, fs_info->zone_size); + return avail; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 26a2e5aa08e9..0bce1d45e252 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -138,6 +138,25 @@ static void wait_log_commit(struct btrfs_root *root, int transid); * and once to do all the other items. */ +static struct inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *root) +{ + unsigned int nofs_flag; + struct inode *inode; + + /* + * We're holding a transaction handle whether we are logging or + * replaying a log tree, so we must make sure NOFS semantics apply + * because btrfs_alloc_inode() may be triggered and it uses GFP_KERNEL + * to allocate an inode, which can recurse back into the filesystem and + * attempt a transaction commit, resulting in a deadlock. + */ + nofs_flag = memalloc_nofs_save(); + inode = btrfs_iget(root->fs_info->sb, objectid, root); + memalloc_nofs_restore(nofs_flag); + + return inode; +} + /* * start a sub transaction and setup the log tree * this increments the log tree writer count to make the people @@ -600,7 +619,7 @@ static noinline struct inode *read_one_inode(struct btrfs_root *root, { struct inode *inode; - inode = btrfs_iget(root->fs_info->sb, objectid, root); + inode = btrfs_iget_logging(objectid, root); if (IS_ERR(inode)) inode = NULL; return inode; @@ -5438,7 +5457,6 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, struct btrfs_log_ctx *ctx) { struct btrfs_root *root = start_inode->root; - struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_path *path; LIST_HEAD(dir_list); struct btrfs_dir_list *dir_elem; @@ -5499,7 +5517,7 @@ again: continue; btrfs_release_path(path); - di_inode = btrfs_iget(fs_info->sb, di_key.objectid, root); + di_inode = btrfs_iget_logging(di_key.objectid, root); if (IS_ERR(di_inode)) { ret = PTR_ERR(di_inode); goto out; @@ -5559,7 +5577,7 @@ again: btrfs_add_delayed_iput(curr_inode); curr_inode = NULL; - vfs_inode = btrfs_iget(fs_info->sb, ino, root); + vfs_inode = btrfs_iget_logging(ino, root); if (IS_ERR(vfs_inode)) { ret = PTR_ERR(vfs_inode); break; @@ -5654,7 +5672,7 @@ static int add_conflicting_inode(struct btrfs_trans_handle *trans, if (ctx->num_conflict_inodes >= MAX_CONFLICT_INODES) return BTRFS_LOG_FORCE_COMMIT; - inode = btrfs_iget(root->fs_info->sb, ino, root); + inode = btrfs_iget_logging(ino, root); /* * If the other inode that had a conflicting dir entry was deleted in * the current transaction then we either: @@ -5755,7 +5773,6 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_log_ctx *ctx) { - struct btrfs_fs_info *fs_info = root->fs_info; int ret = 0; /* @@ -5786,7 +5803,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, list_del(&curr->list); kfree(curr); - inode = btrfs_iget(fs_info->sb, ino, root); + inode = btrfs_iget_logging(ino, root); /* * If the other inode that had a conflicting dir entry was * deleted in the current transaction, we need to log its parent @@ -5797,7 +5814,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, if (ret != -ENOENT) break; - inode = btrfs_iget(fs_info->sb, parent, root); + inode = btrfs_iget_logging(parent, root); if (IS_ERR(inode)) { ret = PTR_ERR(inode); break; @@ -6319,7 +6336,6 @@ static int log_new_delayed_dentries(struct btrfs_trans_handle *trans, struct btrfs_log_ctx *ctx) { const bool orig_log_new_dentries = ctx->log_new_dentries; - struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_delayed_item *item; int ret = 0; @@ -6345,7 +6361,7 @@ static int log_new_delayed_dentries(struct btrfs_trans_handle *trans, if (key.type == BTRFS_ROOT_ITEM_KEY) continue; - di_inode = btrfs_iget(fs_info->sb, key.objectid, inode->root); + di_inode = btrfs_iget_logging(key.objectid, inode->root); if (IS_ERR(di_inode)) { ret = PTR_ERR(di_inode); break; @@ -6729,7 +6745,6 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_log_ctx *ctx) { - struct btrfs_fs_info *fs_info = trans->fs_info; int ret; struct btrfs_path *path; struct btrfs_key key; @@ -6794,8 +6809,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, cur_offset = item_size; } - dir_inode = btrfs_iget(fs_info->sb, inode_key.objectid, - root); + dir_inode = btrfs_iget_logging(inode_key.objectid, root); /* * If the parent inode was deleted, return an error to * fallback to a transaction commit. This is to prevent @@ -6857,7 +6871,6 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans, btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); while (true) { - struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *leaf; int slot; struct btrfs_key search_key; @@ -6872,7 +6885,7 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans, search_key.objectid = found_key.offset; search_key.type = BTRFS_INODE_ITEM_KEY; search_key.offset = 0; - inode = btrfs_iget(fs_info->sb, ino, root); + inode = btrfs_iget_logging(ino, root); if (IS_ERR(inode)) return PTR_ERR(inode); diff --git a/fs/buffer.c b/fs/buffer.c index 8c19e705b9c3..dbe8f411ce52 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -258,7 +258,6 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) } else { clear_buffer_uptodate(bh); buffer_io_error(bh, ", async page read"); - folio_set_error(folio); } /* @@ -391,7 +390,6 @@ static void end_buffer_async_write(struct buffer_head *bh, int uptodate) buffer_io_error(bh, ", lost async page write"); mark_buffer_write_io_error(bh); clear_buffer_uptodate(bh); - folio_set_error(folio); } first = folio_buffers(folio); @@ -1960,7 +1958,6 @@ recover: clear_buffer_dirty(bh); } } while ((bh = bh->b_this_page) != head); - folio_set_error(folio); BUG_ON(folio_test_writeback(folio)); mapping_set_error(folio->mapping, err); folio_start_writeback(folio); @@ -2405,10 +2402,8 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block) if (iblock < lblock) { WARN_ON(bh->b_size != blocksize); err = get_block(inode, iblock, bh, 0); - if (err) { - folio_set_error(folio); + if (err) page_error = true; - } } if (!buffer_mapped(bh)) { folio_zero_range(folio, i * blocksize, diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c index f449f7340aad..9fb06dc16520 100644 --- a/fs/cachefiles/cache.c +++ b/fs/cachefiles/cache.c @@ -8,6 +8,7 @@ #include <linux/slab.h> #include <linux/statfs.h> #include <linux/namei.h> +#include <trace/events/fscache.h> #include "internal.h" /* @@ -312,19 +313,59 @@ static void cachefiles_withdraw_objects(struct cachefiles_cache *cache) } /* - * Withdraw volumes. + * Withdraw fscache volumes. + */ +static void cachefiles_withdraw_fscache_volumes(struct cachefiles_cache *cache) +{ + struct list_head *cur; + struct cachefiles_volume *volume; + struct fscache_volume *vcookie; + + _enter(""); +retry: + spin_lock(&cache->object_list_lock); + list_for_each(cur, &cache->volumes) { + volume = list_entry(cur, struct cachefiles_volume, cache_link); + + if (atomic_read(&volume->vcookie->n_accesses) == 0) + continue; + + vcookie = fscache_try_get_volume(volume->vcookie, + fscache_volume_get_withdraw); + if (vcookie) { + spin_unlock(&cache->object_list_lock); + fscache_withdraw_volume(vcookie); + fscache_put_volume(vcookie, fscache_volume_put_withdraw); + goto retry; + } + } + spin_unlock(&cache->object_list_lock); + + _leave(""); +} + +/* + * Withdraw cachefiles volumes. */ static void cachefiles_withdraw_volumes(struct cachefiles_cache *cache) { _enter(""); for (;;) { + struct fscache_volume *vcookie = NULL; struct cachefiles_volume *volume = NULL; spin_lock(&cache->object_list_lock); if (!list_empty(&cache->volumes)) { volume = list_first_entry(&cache->volumes, struct cachefiles_volume, cache_link); + vcookie = fscache_try_get_volume(volume->vcookie, + fscache_volume_get_withdraw); + if (!vcookie) { + spin_unlock(&cache->object_list_lock); + cpu_relax(); + continue; + } list_del_init(&volume->cache_link); } spin_unlock(&cache->object_list_lock); @@ -332,6 +373,7 @@ static void cachefiles_withdraw_volumes(struct cachefiles_cache *cache) break; cachefiles_withdraw_volume(volume); + fscache_put_volume(vcookie, fscache_volume_put_withdraw); } _leave(""); @@ -371,6 +413,7 @@ void cachefiles_withdraw_cache(struct cachefiles_cache *cache) pr_info("File cache on %s unregistering\n", fscache->name); fscache_withdraw_cache(fscache); + cachefiles_withdraw_fscache_volumes(cache); /* we now have to destroy all the active objects pertaining to this * cache - which we do by passing them off to thread pool to be diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index 06cdf1a8a16f..89b11336a836 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c @@ -366,14 +366,14 @@ static __poll_t cachefiles_daemon_poll(struct file *file, if (cachefiles_in_ondemand_mode(cache)) { if (!xa_empty(&cache->reqs)) { - rcu_read_lock(); + xas_lock(&xas); xas_for_each_marked(&xas, req, ULONG_MAX, CACHEFILES_REQ_NEW) { if (!cachefiles_ondemand_is_reopening_read(req)) { mask |= EPOLLIN; break; } } - rcu_read_unlock(); + xas_unlock(&xas); } } else { if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags)) diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 6845a90cdfcc..7b99bd98de75 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -48,6 +48,7 @@ enum cachefiles_object_state { CACHEFILES_ONDEMAND_OBJSTATE_CLOSE, /* Anonymous fd closed by daemon or initial state */ CACHEFILES_ONDEMAND_OBJSTATE_OPEN, /* Anonymous fd associated with object is available */ CACHEFILES_ONDEMAND_OBJSTATE_REOPENING, /* Object that was closed and is being reopened. */ + CACHEFILES_ONDEMAND_OBJSTATE_DROPPING, /* Object is being dropped. */ }; struct cachefiles_ondemand_info { @@ -128,6 +129,7 @@ struct cachefiles_cache { unsigned long req_id_next; struct xarray ondemand_ids; /* xarray for ondemand_id allocation */ u32 ondemand_id_next; + u32 msg_id_next; }; static inline bool cachefiles_in_ondemand_mode(struct cachefiles_cache *cache) @@ -335,6 +337,7 @@ cachefiles_ondemand_set_object_##_state(struct cachefiles_object *object) \ CACHEFILES_OBJECT_STATE_FUNCS(open, OPEN); CACHEFILES_OBJECT_STATE_FUNCS(close, CLOSE); CACHEFILES_OBJECT_STATE_FUNCS(reopening, REOPENING); +CACHEFILES_OBJECT_STATE_FUNCS(dropping, DROPPING); static inline bool cachefiles_ondemand_is_reopening_read(struct cachefiles_req *req) { diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index bce005f2b456..470c96658385 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -517,7 +517,8 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object, */ xas_lock(&xas); - if (test_bit(CACHEFILES_DEAD, &cache->flags)) { + if (test_bit(CACHEFILES_DEAD, &cache->flags) || + cachefiles_ondemand_object_is_dropping(object)) { xas_unlock(&xas); ret = -EIO; goto out; @@ -527,20 +528,32 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object, smp_mb(); if (opcode == CACHEFILES_OP_CLOSE && - !cachefiles_ondemand_object_is_open(object)) { + !cachefiles_ondemand_object_is_open(object)) { WARN_ON_ONCE(object->ondemand->ondemand_id == 0); xas_unlock(&xas); ret = -EIO; goto out; } - xas.xa_index = 0; + /* + * Cyclically find a free xas to avoid msg_id reuse that would + * cause the daemon to successfully copen a stale msg_id. + */ + xas.xa_index = cache->msg_id_next; xas_find_marked(&xas, UINT_MAX, XA_FREE_MARK); + if (xas.xa_node == XAS_RESTART) { + xas.xa_index = 0; + xas_find_marked(&xas, cache->msg_id_next - 1, XA_FREE_MARK); + } if (xas.xa_node == XAS_RESTART) xas_set_err(&xas, -EBUSY); + xas_store(&xas, req); - xas_clear_mark(&xas, XA_FREE_MARK); - xas_set_mark(&xas, CACHEFILES_REQ_NEW); + if (xas_valid(&xas)) { + cache->msg_id_next = xas.xa_index + 1; + xas_clear_mark(&xas, XA_FREE_MARK); + xas_set_mark(&xas, CACHEFILES_REQ_NEW); + } xas_unlock(&xas); } while (xas_nomem(&xas, GFP_KERNEL)); @@ -568,7 +581,8 @@ out: * If error occurs after creating the anonymous fd, * cachefiles_ondemand_fd_release() will set object to close. */ - if (opcode == CACHEFILES_OP_OPEN) + if (opcode == CACHEFILES_OP_OPEN && + !cachefiles_ondemand_object_is_dropping(object)) cachefiles_ondemand_set_object_close(object); kfree(req); return ret; @@ -667,8 +681,34 @@ int cachefiles_ondemand_init_object(struct cachefiles_object *object) void cachefiles_ondemand_clean_object(struct cachefiles_object *object) { + unsigned long index; + struct cachefiles_req *req; + struct cachefiles_cache *cache; + + if (!object->ondemand) + return; + cachefiles_ondemand_send_req(object, CACHEFILES_OP_CLOSE, 0, cachefiles_ondemand_init_close_req, NULL); + + if (!object->ondemand->ondemand_id) + return; + + /* Cancel all requests for the object that is being dropped. */ + cache = object->volume->cache; + xa_lock(&cache->reqs); + cachefiles_ondemand_set_object_dropping(object); + xa_for_each(&cache->reqs, index, req) { + if (req->object == object) { + req->error = -EIO; + complete(&req->done); + __xa_erase(&cache->reqs, index); + } + } + xa_unlock(&cache->reqs); + + /* Wait for ondemand_object_worker() to finish to avoid UAF. */ + cancel_work_sync(&object->ondemand->ondemand_work); } int cachefiles_ondemand_init_obj_info(struct cachefiles_object *object, diff --git a/fs/cachefiles/volume.c b/fs/cachefiles/volume.c index 89df0ba8ba5e..781aac4ef274 100644 --- a/fs/cachefiles/volume.c +++ b/fs/cachefiles/volume.c @@ -133,7 +133,6 @@ void cachefiles_free_volume(struct fscache_volume *vcookie) void cachefiles_withdraw_volume(struct cachefiles_volume *volume) { - fscache_withdraw_volume(volume->vcookie); cachefiles_set_volume_xattr(volume); __cachefiles_free_volume(volume); } diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c index bcb6173943ee..4dd8a993c60a 100644 --- a/fs/cachefiles/xattr.c +++ b/fs/cachefiles/xattr.c @@ -110,9 +110,11 @@ int cachefiles_check_auxdata(struct cachefiles_object *object, struct file *file if (xlen == 0) xlen = vfs_getxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, buf, tlen); if (xlen != tlen) { - if (xlen < 0) + if (xlen < 0) { + ret = xlen; trace_cachefiles_vfs_error(object, file_inode(file), xlen, cachefiles_trace_getxattr_error); + } if (xlen == -EIO) cachefiles_io_error_obj( object, @@ -252,6 +254,7 @@ int cachefiles_check_volume_xattr(struct cachefiles_volume *volume) xlen = vfs_getxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, buf, len); if (xlen != len) { if (xlen < 0) { + ret = xlen; trace_cachefiles_vfs_error(NULL, d_inode(dentry), xlen, cachefiles_trace_getxattr_error); if (xlen == -EIO) diff --git a/fs/coda/symlink.c b/fs/coda/symlink.c index ccdbec388091..40f84d014524 100644 --- a/fs/coda/symlink.c +++ b/fs/coda/symlink.c @@ -31,15 +31,7 @@ static int coda_symlink_filler(struct file *file, struct folio *folio) cii = ITOC(inode); error = venus_readlink(inode->i_sb, &cii->c_fid, p, &len); - if (error) - goto fail; - folio_mark_uptodate(folio); - folio_unlock(folio); - return 0; - -fail: - folio_set_error(folio); - folio_unlock(folio); + folio_end_read(folio, error == 0); return error; } diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 460690ca0174..b84d1747a020 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -811,19 +811,19 @@ out: static int cramfs_read_folio(struct file *file, struct folio *folio) { - struct page *page = &folio->page; - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; u32 maxblock; int bytes_filled; void *pgdata; + bool success = false; maxblock = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT; bytes_filled = 0; - pgdata = kmap_local_page(page); + pgdata = kmap_local_folio(folio, 0); - if (page->index < maxblock) { + if (folio->index < maxblock) { struct super_block *sb = inode->i_sb; - u32 blkptr_offset = OFFSET(inode) + page->index * 4; + u32 blkptr_offset = OFFSET(inode) + folio->index * 4; u32 block_ptr, block_start, block_len; bool uncompressed, direct; @@ -844,7 +844,7 @@ static int cramfs_read_folio(struct file *file, struct folio *folio) if (uncompressed) { block_len = PAGE_SIZE; /* if last block: cap to file length */ - if (page->index == maxblock - 1) + if (folio->index == maxblock - 1) block_len = offset_in_page(inode->i_size); } else { @@ -861,7 +861,7 @@ static int cramfs_read_folio(struct file *file, struct folio *folio) * from the previous block's pointer. */ block_start = OFFSET(inode) + maxblock * 4; - if (page->index) + if (folio->index) block_start = *(u32 *) cramfs_read(sb, blkptr_offset - 4, 4); /* Beware... previous ptr might be a direct ptr */ @@ -906,17 +906,12 @@ static int cramfs_read_folio(struct file *file, struct folio *folio) } memset(pgdata + bytes_filled, 0, PAGE_SIZE - bytes_filled); - flush_dcache_page(page); - kunmap_local(pgdata); - SetPageUptodate(page); - unlock_page(page); - return 0; + flush_dcache_folio(folio); + success = true; err: kunmap_local(pgdata); - ClearPageUptodate(page); - SetPageError(page); - unlock_page(page); + folio_end_read(folio, success); return 0; } @@ -1003,4 +998,5 @@ static void __exit exit_cramfs_fs(void) module_init(init_cramfs_fs) module_exit(exit_cramfs_fs) +MODULE_DESCRIPTION("Compressed ROM file system support"); MODULE_LICENSE("GPL"); diff --git a/fs/dcache.c b/fs/dcache.c index 407095188f83..8bdc278a0205 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -35,6 +35,8 @@ #include "internal.h" #include "mount.h" +#include <asm/runtime-const.h> + /* * Usage: * dcache->d_inode->i_lock protects: @@ -100,9 +102,10 @@ static unsigned int d_hash_shift __ro_after_init; static struct hlist_bl_head *dentry_hashtable __ro_after_init; -static inline struct hlist_bl_head *d_hash(unsigned int hash) +static inline struct hlist_bl_head *d_hash(unsigned long hashlen) { - return dentry_hashtable + (hash >> d_hash_shift); + return runtime_const_ptr(dentry_hashtable) + + runtime_const_shift_right_32(hashlen, d_hash_shift); } #define IN_LOOKUP_SHIFT 10 @@ -355,7 +358,11 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry) flags &= ~DCACHE_ENTRY_TYPE; WRITE_ONCE(dentry->d_flags, flags); dentry->d_inode = NULL; - if (flags & DCACHE_LRU_LIST) + /* + * The negative counter only tracks dentries on the LRU. Don't inc if + * d_lru is on another list. + */ + if ((flags & (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST) this_cpu_inc(nr_dentry_negative); } @@ -1548,7 +1555,7 @@ void shrink_dcache_for_umount(struct super_block *sb) { struct dentry *dentry; - WARN(down_read_trylock(&sb->s_umount), "s_umount should've been locked"); + rwsem_assert_held_write(&sb->s_umount); dentry = sb->s_root; sb->s_root = NULL; @@ -1844,9 +1851,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) spin_lock(&dentry->d_lock); /* - * Decrement negative dentry count if it was in the LRU list. + * The negative counter only tracks dentries on the LRU. Don't dec if + * d_lru is on another list. */ - if (dentry->d_flags & DCACHE_LRU_LIST) + if ((dentry->d_flags & + (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST) this_cpu_dec(nr_dentry_negative); hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); raw_write_seqcount_begin(&dentry->d_seq); @@ -2104,7 +2113,7 @@ static noinline struct dentry *__d_lookup_rcu_op_compare( unsigned *seqp) { u64 hashlen = name->hash_len; - struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen)); + struct hlist_bl_head *b = d_hash(hashlen); struct hlist_bl_node *node; struct dentry *dentry; @@ -2171,7 +2180,7 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent, { u64 hashlen = name->hash_len; const unsigned char *str = name->name; - struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen)); + struct hlist_bl_head *b = d_hash(hashlen); struct hlist_bl_node *node; struct dentry *dentry; @@ -3029,28 +3038,25 @@ EXPORT_SYMBOL(d_splice_alias); bool is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) { - bool result; + bool subdir; unsigned seq; if (new_dentry == old_dentry) return true; - do { - /* for restarting inner loop in case of seq retry */ - seq = read_seqbegin(&rename_lock); - /* - * Need rcu_readlock to protect against the d_parent trashing - * due to d_move - */ - rcu_read_lock(); - if (d_ancestor(old_dentry, new_dentry)) - result = true; - else - result = false; - rcu_read_unlock(); - } while (read_seqretry(&rename_lock, seq)); - - return result; + /* Access d_parent under rcu as d_move() may change it. */ + rcu_read_lock(); + seq = read_seqbegin(&rename_lock); + subdir = d_ancestor(old_dentry, new_dentry); + /* Try lockless once... */ + if (read_seqretry(&rename_lock, seq)) { + /* ...else acquire lock for progress even on deep chains. */ + read_seqlock_excl(&rename_lock); + subdir = d_ancestor(old_dentry, new_dentry); + read_sequnlock_excl(&rename_lock); + } + rcu_read_unlock(); + return subdir; } EXPORT_SYMBOL(is_subdir); @@ -3100,6 +3106,34 @@ void d_tmpfile(struct file *file, struct inode *inode) } EXPORT_SYMBOL(d_tmpfile); +/* + * Obtain inode number of the parent dentry. + */ +ino_t d_parent_ino(struct dentry *dentry) +{ + struct dentry *parent; + struct inode *iparent; + unsigned seq; + ino_t ret; + + scoped_guard(rcu) { + seq = raw_seqcount_begin(&dentry->d_seq); + parent = READ_ONCE(dentry->d_parent); + iparent = d_inode_rcu(parent); + if (likely(iparent)) { + ret = iparent->i_ino; + if (!read_seqcount_retry(&dentry->d_seq, seq)) + return ret; + } + } + + spin_lock(&dentry->d_lock); + ret = dentry->d_parent->d_inode->i_ino; + spin_unlock(&dentry->d_lock); + return ret; +} +EXPORT_SYMBOL(d_parent_ino); + static __initdata unsigned long dhash_entries; static int __init set_dhash_entries(char *str) { @@ -3129,6 +3163,9 @@ static void __init dcache_init_early(void) 0, 0); d_hash_shift = 32 - d_hash_shift; + + runtime_const_init(shift, d_hash_shift); + runtime_const_init(ptr, dentry_hashtable); } static void __init dcache_init(void) @@ -3157,6 +3194,9 @@ static void __init dcache_init(void) 0, 0); d_hash_shift = 32 - d_hash_shift; + + runtime_const_init(shift, d_hash_shift); + runtime_const_init(ptr, dentry_hashtable); } /* SLAB cache for __getname() consumers */ diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 8fd928899a59..91521576f500 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -92,9 +92,9 @@ enum { }; static const struct fs_parameter_spec debugfs_param_specs[] = { - fsparam_u32 ("gid", Opt_gid), + fsparam_gid ("gid", Opt_gid), fsparam_u32oct ("mode", Opt_mode), - fsparam_u32 ("uid", Opt_uid), + fsparam_uid ("uid", Opt_uid), {} }; @@ -102,8 +102,6 @@ static int debugfs_parse_param(struct fs_context *fc, struct fs_parameter *param { struct debugfs_fs_info *opts = fc->s_fs_info; struct fs_parse_result result; - kuid_t uid; - kgid_t gid; int opt; opt = fs_parse(fc, debugfs_param_specs, param, &result); @@ -120,16 +118,10 @@ static int debugfs_parse_param(struct fs_context *fc, struct fs_parameter *param switch (opt) { case Opt_uid: - uid = make_kuid(current_user_ns(), result.uint_32); - if (!uid_valid(uid)) - return invalf(fc, "Unknown uid"); - opts->uid = uid; + opts->uid = result.uid; break; case Opt_gid: - gid = make_kgid(current_user_ns(), result.uint_32); - if (!gid_valid(gid)) - return invalf(fc, "Unknown gid"); - opts->gid = gid; + opts->gid = result.gid; break; case Opt_mode: opts->mode = result.uint_32 & S_IALLUGO; diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index bb14462f6d99..a929f1b613be 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -275,8 +275,8 @@ enum { }; static const struct fs_parameter_spec efivarfs_parameters[] = { - fsparam_u32("uid", Opt_uid), - fsparam_u32("gid", Opt_gid), + fsparam_uid("uid", Opt_uid), + fsparam_gid("gid", Opt_gid), {}, }; @@ -293,14 +293,10 @@ static int efivarfs_parse_param(struct fs_context *fc, struct fs_parameter *para switch (opt) { case Opt_uid: - opts->uid = make_kuid(current_user_ns(), result.uint_32); - if (!uid_valid(opts->uid)) - return -EINVAL; + opts->uid = result.uid; break; case Opt_gid: - opts->gid = make_kgid(current_user_ns(), result.uint_32); - if (!gid_valid(opts->gid)) - return -EINVAL; + opts->gid = result.gid; break; default: return -EINVAL; diff --git a/fs/efs/inode.c b/fs/efs/inode.c index 7844ab24b813..462619e59766 100644 --- a/fs/efs/inode.c +++ b/fs/efs/inode.c @@ -311,4 +311,5 @@ efs_block_t efs_map_block(struct inode *inode, efs_block_t block) { return 0; } +MODULE_DESCRIPTION("Extent File System (efs)"); MODULE_LICENSE("GPL"); diff --git a/fs/efs/symlink.c b/fs/efs/symlink.c index 3b03a573cb1a..7749feded722 100644 --- a/fs/efs/symlink.c +++ b/fs/efs/symlink.c @@ -14,10 +14,9 @@ static int efs_symlink_read_folio(struct file *file, struct folio *folio) { - struct page *page = &folio->page; - char *link = page_address(page); - struct buffer_head * bh; - struct inode * inode = page->mapping->host; + char *link = folio_address(folio); + struct buffer_head *bh; + struct inode *inode = folio->mapping->host; efs_block_t size = inode->i_size; int err; @@ -40,12 +39,9 @@ static int efs_symlink_read_folio(struct file *file, struct folio *folio) brelse(bh); } link[size] = '\0'; - SetPageUptodate(page); - unlock_page(page); - return 0; + err = 0; fail: - SetPageError(page); - unlock_page(page); + folio_end_read(folio, err == 0); return err; } diff --git a/fs/erofs/super.c b/fs/erofs/super.c index c93bd24d2771..1b91d9513013 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -343,7 +343,7 @@ static int erofs_read_superblock(struct super_block *sb) sbi->build_time = le64_to_cpu(dsb->build_time); sbi->build_time_nsec = le32_to_cpu(dsb->build_time_nsec); - memcpy(&sb->s_uuid, dsb->uuid, sizeof(dsb->uuid)); + super_set_uuid(sb, (void *)dsb->uuid, sizeof(dsb->uuid)); ret = strscpy(sbi->volume_name, dsb->volume_name, sizeof(dsb->volume_name)); diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 9b248ee5fef2..74d3d7bffcf3 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -711,6 +711,8 @@ int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, err = z_erofs_do_map_blocks(inode, map, flags); out: + if (err) + map->m_llen = 0; trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err); return err; } diff --git a/fs/erofs/zutil.c b/fs/erofs/zutil.c index 036024bce9f7..b80f612867c2 100644 --- a/fs/erofs/zutil.c +++ b/fs/erofs/zutil.c @@ -148,7 +148,7 @@ int __init z_erofs_gbuf_init(void) void z_erofs_gbuf_exit(void) { - int i; + int i, j; for (i = 0; i < z_erofs_gbuf_count + (!!z_erofs_rsvbuf); ++i) { struct z_erofs_gbuf *gbuf = &z_erofs_gbufpool[i]; @@ -161,9 +161,9 @@ void z_erofs_gbuf_exit(void) if (!gbuf->pages) continue; - for (i = 0; i < gbuf->nrpages; ++i) - if (gbuf->pages[i]) - put_page(gbuf->pages[i]); + for (j = 0; j < gbuf->nrpages; ++j) + if (gbuf->pages[j]) + put_page(gbuf->pages[j]); kfree(gbuf->pages); gbuf->pages = NULL; } diff --git a/fs/exec.c b/fs/exec.c index 40073142288f..4dee205452e2 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -952,10 +952,6 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags) path_noexec(&file->f_path))) goto exit; - err = deny_write_access(file); - if (err) - goto exit; - out: return file; @@ -971,8 +967,7 @@ exit: * * Returns ERR_PTR on failure or allocated struct file on success. * - * As this is a wrapper for the internal do_open_execat(), callers - * must call allow_write_access() before fput() on release. Also see + * As this is a wrapper for the internal do_open_execat(). Also see * do_close_execat(). */ struct file *open_exec(const char *name) @@ -1524,10 +1519,8 @@ static int prepare_bprm_creds(struct linux_binprm *bprm) /* Matches do_open_execat() */ static void do_close_execat(struct file *file) { - if (!file) - return; - allow_write_access(file); - fput(file); + if (file) + fput(file); } static void free_bprm(struct linux_binprm *bprm) @@ -1846,7 +1839,6 @@ static int exec_binprm(struct linux_binprm *bprm) bprm->file = bprm->interpreter; bprm->interpreter = NULL; - allow_write_access(exec); if (unlikely(bprm->have_execfd)) { if (bprm->executable) { fput(exec); diff --git a/fs/exfat/super.c b/fs/exfat/super.c index 3d5ea2cfad66..a3c7173ef693 100644 --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -225,8 +225,8 @@ static const struct constant_table exfat_param_enums[] = { }; static const struct fs_parameter_spec exfat_parameters[] = { - fsparam_u32("uid", Opt_uid), - fsparam_u32("gid", Opt_gid), + fsparam_uid("uid", Opt_uid), + fsparam_gid("gid", Opt_gid), fsparam_u32oct("umask", Opt_umask), fsparam_u32oct("dmask", Opt_dmask), fsparam_u32oct("fmask", Opt_fmask), @@ -262,10 +262,10 @@ static int exfat_parse_param(struct fs_context *fc, struct fs_parameter *param) switch (opt) { case Opt_uid: - opts->fs_uid = make_kuid(current_user_ns(), result.uint_32); + opts->fs_uid = result.uid; break; case Opt_gid: - opts->fs_gid = make_kgid(current_user_ns(), result.uint_32); + opts->fs_gid = result.gid; break; case Opt_umask: opts->fs_fmask = result.uint_32; diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 07ea3d62b298..4f2dd4ab4486 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -427,7 +427,7 @@ EXPORT_SYMBOL_GPL(exportfs_encode_fh); struct dentry * exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len, - int fileid_type, + int fileid_type, unsigned int flags, int (*acceptable)(void *, struct dentry *), void *context) { @@ -445,6 +445,11 @@ exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len, if (IS_ERR_OR_NULL(result)) return result; + if ((flags & EXPORT_FH_DIR_ONLY) && !d_is_dir(result)) { + err = -ENOTDIR; + goto err_result; + } + /* * If no acceptance criteria was specified by caller, a disconnected * dentry is also accepatable. Callers may use this mode to query if @@ -581,7 +586,7 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, { struct dentry *ret; - ret = exportfs_decode_fh_raw(mnt, fid, fh_len, fileid_type, + ret = exportfs_decode_fh_raw(mnt, fid, fh_len, fileid_type, 0, acceptable, context); if (IS_ERR_OR_NULL(ret)) { if (ret == ERR_PTR(-ENOMEM)) diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index 7ae0b61258a7..0a056d97e640 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -31,11 +31,10 @@ int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname, ext4_fname_from_fscrypt_name(fname, &name); -#if IS_ENABLED(CONFIG_UNICODE) err = ext4_fname_setup_ci_filename(dir, iname, fname); if (err) ext4_fname_free_filename(fname); -#endif + return err; } @@ -51,11 +50,9 @@ int ext4_fname_prepare_lookup(struct inode *dir, struct dentry *dentry, ext4_fname_from_fscrypt_name(fname, &name); -#if IS_ENABLED(CONFIG_UNICODE) err = ext4_fname_setup_ci_filename(dir, &dentry->d_name, fname); if (err) ext4_fname_free_filename(fname); -#endif return err; } @@ -70,10 +67,7 @@ void ext4_fname_free_filename(struct ext4_filename *fname) fname->usr_fname = NULL; fname->disk_name.name = NULL; -#if IS_ENABLED(CONFIG_UNICODE) - kfree(fname->cf_name.name); - fname->cf_name.name = NULL; -#endif + ext4_fname_free_ci_filename(fname); } static bool uuid_is_zero(__u8 u[16]) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 983dad8c07ec..8007abd4972d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2511,7 +2511,7 @@ struct ext4_filename { struct fscrypt_str crypto_buf; #endif #if IS_ENABLED(CONFIG_UNICODE) - struct fscrypt_str cf_name; + struct qstr cf_name; #endif }; @@ -2745,8 +2745,25 @@ ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); #if IS_ENABLED(CONFIG_UNICODE) extern int ext4_fname_setup_ci_filename(struct inode *dir, - const struct qstr *iname, - struct ext4_filename *fname); + const struct qstr *iname, + struct ext4_filename *fname); + +static inline void ext4_fname_free_ci_filename(struct ext4_filename *fname) +{ + kfree(fname->cf_name.name); + fname->cf_name.name = NULL; +} +#else +static inline int ext4_fname_setup_ci_filename(struct inode *dir, + const struct qstr *iname, + struct ext4_filename *fname) +{ + return 0; +} + +static inline void ext4_fname_free_ci_filename(struct ext4_filename *fname) +{ +} #endif /* ext4 encryption related stuff goes here crypto.c */ @@ -2769,16 +2786,11 @@ static inline int ext4_fname_setup_filename(struct inode *dir, int lookup, struct ext4_filename *fname) { - int err = 0; fname->usr_fname = iname; fname->disk_name.name = (unsigned char *) iname->name; fname->disk_name.len = iname->len; -#if IS_ENABLED(CONFIG_UNICODE) - err = ext4_fname_setup_ci_filename(dir, iname, fname); -#endif - - return err; + return ext4_fname_setup_ci_filename(dir, iname, fname); } static inline int ext4_fname_prepare_lookup(struct inode *dir, @@ -2790,10 +2802,7 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir, static inline void ext4_fname_free_filename(struct ext4_filename *fname) { -#if IS_ENABLED(CONFIG_UNICODE) - kfree(fname->cf_name.name); - fname->cf_name.name = NULL; -#endif + ext4_fname_free_ci_filename(fname); } static inline int ext4_ioctl_get_encryption_pwsalt(struct file *filp, diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index a630b27a4cc6..e6769b97a970 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1390,62 +1390,11 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block) } #if IS_ENABLED(CONFIG_UNICODE) -/* - * Test whether a case-insensitive directory entry matches the filename - * being searched for. If quick is set, assume the name being looked up - * is already in the casefolded form. - * - * Returns: 0 if the directory entry matches, more than 0 if it - * doesn't match or less than zero on error. - */ -static int ext4_ci_compare(const struct inode *parent, const struct qstr *name, - u8 *de_name, size_t de_name_len, bool quick) -{ - const struct super_block *sb = parent->i_sb; - const struct unicode_map *um = sb->s_encoding; - struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len); - struct qstr entry = QSTR_INIT(de_name, de_name_len); - int ret; - - if (IS_ENCRYPTED(parent)) { - const struct fscrypt_str encrypted_name = - FSTR_INIT(de_name, de_name_len); - - decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL); - if (!decrypted_name.name) - return -ENOMEM; - ret = fscrypt_fname_disk_to_usr(parent, 0, 0, &encrypted_name, - &decrypted_name); - if (ret < 0) - goto out; - entry.name = decrypted_name.name; - entry.len = decrypted_name.len; - } - - if (quick) - ret = utf8_strncasecmp_folded(um, name, &entry); - else - ret = utf8_strncasecmp(um, name, &entry); - if (ret < 0) { - /* Handle invalid character sequence as either an error - * or as an opaque byte sequence. - */ - if (sb_has_strict_encoding(sb)) - ret = -EINVAL; - else if (name->len != entry.len) - ret = 1; - else - ret = !!memcmp(name->name, entry.name, entry.len); - } -out: - kfree(decrypted_name.name); - return ret; -} - int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname, struct ext4_filename *name) { - struct fscrypt_str *cf_name = &name->cf_name; + struct qstr *cf_name = &name->cf_name; + unsigned char *buf; struct dx_hash_info *hinfo = &name->hinfo; int len; @@ -1455,18 +1404,18 @@ int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname, return 0; } - cf_name->name = kmalloc(EXT4_NAME_LEN, GFP_NOFS); - if (!cf_name->name) + buf = kmalloc(EXT4_NAME_LEN, GFP_NOFS); + if (!buf) return -ENOMEM; - len = utf8_casefold(dir->i_sb->s_encoding, - iname, cf_name->name, - EXT4_NAME_LEN); + len = utf8_casefold(dir->i_sb->s_encoding, iname, buf, EXT4_NAME_LEN); if (len <= 0) { - kfree(cf_name->name); - cf_name->name = NULL; + kfree(buf); + buf = NULL; } + cf_name->name = buf; cf_name->len = (unsigned) len; + if (!IS_ENCRYPTED(dir)) return 0; @@ -1502,22 +1451,29 @@ static bool ext4_match(struct inode *parent, #if IS_ENABLED(CONFIG_UNICODE) if (IS_CASEFOLDED(parent) && (!IS_ENCRYPTED(parent) || fscrypt_has_encryption_key(parent))) { - if (fname->cf_name.name) { - struct qstr cf = {.name = fname->cf_name.name, - .len = fname->cf_name.len}; - if (IS_ENCRYPTED(parent)) { - if (fname->hinfo.hash != EXT4_DIRENT_HASH(de) || - fname->hinfo.minor_hash != - EXT4_DIRENT_MINOR_HASH(de)) { - - return false; - } - } - return !ext4_ci_compare(parent, &cf, de->name, - de->name_len, true); - } - return !ext4_ci_compare(parent, fname->usr_fname, de->name, - de->name_len, false); + /* + * Just checking IS_ENCRYPTED(parent) below is not + * sufficient to decide whether one can use the hash for + * skipping the string comparison, because the key might + * have been added right after + * ext4_fname_setup_ci_filename(). In this case, a hash + * mismatch will be a false negative. Therefore, make + * sure cf_name was properly initialized before + * considering the calculated hash. + */ + if (IS_ENCRYPTED(parent) && fname->cf_name.name && + (fname->hinfo.hash != EXT4_DIRENT_HASH(de) || + fname->hinfo.minor_hash != EXT4_DIRENT_MINOR_HASH(de))) + return false; + /* + * Treat comparison errors as not a match. The + * only case where it happens is on a disk + * corruption or ENOMEM. + */ + + return generic_ci_match(parent, fname->usr_fname, + &fname->cf_name, de->name, + de->name_len) > 0; } #endif @@ -1869,8 +1825,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi } } -#if IS_ENABLED(CONFIG_UNICODE) - if (!inode && IS_CASEFOLDED(dir)) { + if (IS_ENABLED(CONFIG_UNICODE) && !inode && IS_CASEFOLDED(dir)) { /* Eventually we want to call d_add_ci(dentry, NULL) * for negative dentries in the encoding case as * well. For now, prevent the negative dentry @@ -1878,7 +1833,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi */ return NULL; } -#endif + return d_splice_alias(inode, dentry); } @@ -3208,16 +3163,14 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) ext4_fc_track_unlink(handle, dentry); retval = ext4_mark_inode_dirty(handle, dir); -#if IS_ENABLED(CONFIG_UNICODE) /* VFS negative dentries are incompatible with Encoding and * Case-insensitiveness. Eventually we'll want avoid * invalidating the dentries here, alongside with returning the * negative dentries at ext4_lookup(), when it is better * supported by the VFS for the CI case. */ - if (IS_CASEFOLDED(dir)) + if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) d_invalidate(dentry); -#endif end_rmdir: brelse(bh); @@ -3319,16 +3272,15 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) goto out_trace; retval = __ext4_unlink(dir, &dentry->d_name, d_inode(dentry), dentry); -#if IS_ENABLED(CONFIG_UNICODE) + /* VFS negative dentries are incompatible with Encoding and * Case-insensitiveness. Eventually we'll want avoid * invalidating the dentries here, alongside with returning the * negative dentries at ext4_lookup(), when it is better * supported by the VFS for the CI case. */ - if (IS_CASEFOLDED(dir)) + if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) d_invalidate(dentry); -#endif out_trace: trace_ext4_unlink_exit(dentry, retval); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c682fb927b64..eb899628e121 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1721,8 +1721,8 @@ static const struct fs_parameter_spec ext4_param_specs[] = { fsparam_flag ("bsdgroups", Opt_grpid), fsparam_flag ("nogrpid", Opt_nogrpid), fsparam_flag ("sysvgroups", Opt_nogrpid), - fsparam_u32 ("resgid", Opt_resgid), - fsparam_u32 ("resuid", Opt_resuid), + fsparam_gid ("resgid", Opt_resgid), + fsparam_uid ("resuid", Opt_resuid), fsparam_u32 ("sb", Opt_sb), fsparam_enum ("errors", Opt_errors, ext4_param_errors), fsparam_flag ("nouid32", Opt_nouid32), @@ -2127,8 +2127,6 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) struct fs_parse_result result; const struct mount_opts *m; int is_remount; - kuid_t uid; - kgid_t gid; int token; token = fs_parse(fc, ext4_param_specs, param, &result); @@ -2270,23 +2268,11 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) ctx->spec |= EXT4_SPEC_s_stripe; return 0; case Opt_resuid: - uid = make_kuid(current_user_ns(), result.uint_32); - if (!uid_valid(uid)) { - ext4_msg(NULL, KERN_ERR, "Invalid uid value %d", - result.uint_32); - return -EINVAL; - } - ctx->s_resuid = uid; + ctx->s_resuid = result.uid; ctx->spec |= EXT4_SPEC_s_resuid; return 0; case Opt_resgid: - gid = make_kgid(current_user_ns(), result.uint_32); - if (!gid_valid(gid)) { - ext4_msg(NULL, KERN_ERR, "Invalid gid value %d", - result.uint_32); - return -EINVAL; - } - ctx->s_resgid = gid; + ctx->s_resgid = result.gid; ctx->spec |= EXT4_SPEC_s_resgid; return 0; case Opt_journal_dev: @@ -3586,14 +3572,12 @@ int ext4_feature_set_ok(struct super_block *sb, int readonly) return 0; } -#if !IS_ENABLED(CONFIG_UNICODE) - if (ext4_has_feature_casefold(sb)) { + if (!IS_ENABLED(CONFIG_UNICODE) && ext4_has_feature_casefold(sb)) { ext4_msg(sb, KERN_ERR, "Filesystem with casefold feature cannot be " "mounted without CONFIG_UNICODE"); return 0; } -#endif if (readonly) return 1; diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index ec2aeccb69a3..8bffdeccdbc3 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -219,8 +219,7 @@ static int f2fs_acl_update_mode(struct mnt_idmap *idmap, return error; if (error == 0) *acl = NULL; - if (!vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode)) && - !capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID)) + if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode))) mode &= ~S_ISGID; *mode_p = mode; return 0; diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 02c9355176d3..cbd7a5e96a37 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -42,35 +42,49 @@ static unsigned int bucket_blocks(unsigned int level) return 4; } +#if IS_ENABLED(CONFIG_UNICODE) /* If @dir is casefolded, initialize @fname->cf_name from @fname->usr_fname. */ int f2fs_init_casefolded_name(const struct inode *dir, struct f2fs_filename *fname) { -#if IS_ENABLED(CONFIG_UNICODE) struct super_block *sb = dir->i_sb; + unsigned char *buf; + int len; if (IS_CASEFOLDED(dir) && !is_dot_dotdot(fname->usr_fname->name, fname->usr_fname->len)) { - fname->cf_name.name = f2fs_kmem_cache_alloc(f2fs_cf_name_slab, - GFP_NOFS, false, F2FS_SB(sb)); - if (!fname->cf_name.name) + buf = f2fs_kmem_cache_alloc(f2fs_cf_name_slab, + GFP_NOFS, false, F2FS_SB(sb)); + if (!buf) return -ENOMEM; - fname->cf_name.len = utf8_casefold(sb->s_encoding, - fname->usr_fname, - fname->cf_name.name, - F2FS_NAME_LEN); - if ((int)fname->cf_name.len <= 0) { - kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name); - fname->cf_name.name = NULL; + + len = utf8_casefold(sb->s_encoding, fname->usr_fname, + buf, F2FS_NAME_LEN); + if (len <= 0) { + kmem_cache_free(f2fs_cf_name_slab, buf); if (sb_has_strict_encoding(sb)) return -EINVAL; /* fall back to treating name as opaque byte sequence */ + return 0; } + fname->cf_name.name = buf; + fname->cf_name.len = len; } -#endif + return 0; } +void f2fs_free_casefolded_name(struct f2fs_filename *fname) +{ + unsigned char *buf = (unsigned char *)fname->cf_name.name; + + if (buf) { + kmem_cache_free(f2fs_cf_name_slab, buf); + fname->cf_name.name = NULL; + } +} +#endif /* CONFIG_UNICODE */ + static int __f2fs_setup_filename(const struct inode *dir, const struct fscrypt_name *crypt_name, struct f2fs_filename *fname) @@ -142,12 +156,7 @@ void f2fs_free_filename(struct f2fs_filename *fname) kfree(fname->crypto_buf.name); fname->crypto_buf.name = NULL; #endif -#if IS_ENABLED(CONFIG_UNICODE) - if (fname->cf_name.name) { - kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name); - fname->cf_name.name = NULL; - } -#endif + f2fs_free_casefolded_name(fname); } static unsigned long dir_block_index(unsigned int level, @@ -176,58 +185,6 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, return f2fs_find_target_dentry(&d, fname, max_slots); } -#if IS_ENABLED(CONFIG_UNICODE) -/* - * Test whether a case-insensitive directory entry matches the filename - * being searched for. - * - * Returns 1 for a match, 0 for no match, and -errno on an error. - */ -static int f2fs_match_ci_name(const struct inode *dir, const struct qstr *name, - const u8 *de_name, u32 de_name_len) -{ - const struct super_block *sb = dir->i_sb; - const struct unicode_map *um = sb->s_encoding; - struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len); - struct qstr entry = QSTR_INIT(de_name, de_name_len); - int res; - - if (IS_ENCRYPTED(dir)) { - const struct fscrypt_str encrypted_name = - FSTR_INIT((u8 *)de_name, de_name_len); - - if (WARN_ON_ONCE(!fscrypt_has_encryption_key(dir))) - return -EINVAL; - - decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL); - if (!decrypted_name.name) - return -ENOMEM; - res = fscrypt_fname_disk_to_usr(dir, 0, 0, &encrypted_name, - &decrypted_name); - if (res < 0) - goto out; - entry.name = decrypted_name.name; - entry.len = decrypted_name.len; - } - - res = utf8_strncasecmp_folded(um, name, &entry); - /* - * In strict mode, ignore invalid names. In non-strict mode, - * fall back to treating them as opaque byte sequences. - */ - if (res < 0 && !sb_has_strict_encoding(sb)) { - res = name->len == entry.len && - memcmp(name->name, entry.name, name->len) == 0; - } else { - /* utf8_strncasecmp_folded returns 0 on match */ - res = (res == 0); - } -out: - kfree(decrypted_name.name); - return res; -} -#endif /* CONFIG_UNICODE */ - static inline int f2fs_match_name(const struct inode *dir, const struct f2fs_filename *fname, const u8 *de_name, u32 de_name_len) @@ -235,11 +192,11 @@ static inline int f2fs_match_name(const struct inode *dir, struct fscrypt_name f; #if IS_ENABLED(CONFIG_UNICODE) - if (fname->cf_name.name) { - struct qstr cf = FSTR_TO_QSTR(&fname->cf_name); + if (fname->cf_name.name) + return generic_ci_match(dir, fname->usr_fname, + &fname->cf_name, + de_name, de_name_len); - return f2fs_match_ci_name(dir, &cf, de_name, de_name_len); - } #endif f.usr_fname = fname->usr_fname; f.disk_name = fname->disk_name; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1974b6aff397..8a9d910aa552 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -531,7 +531,7 @@ struct f2fs_filename { * internal operation where usr_fname is also NULL. In all these cases * we fall back to treating the name as an opaque byte sequence. */ - struct fscrypt_str cf_name; + struct qstr cf_name; #endif }; @@ -3533,8 +3533,22 @@ int f2fs_get_tmpfile(struct mnt_idmap *idmap, struct inode *dir, /* * dir.c */ +#if IS_ENABLED(CONFIG_UNICODE) int f2fs_init_casefolded_name(const struct inode *dir, struct f2fs_filename *fname); +void f2fs_free_casefolded_name(struct f2fs_filename *fname); +#else +static inline int f2fs_init_casefolded_name(const struct inode *dir, + struct f2fs_filename *fname) +{ + return 0; +} + +static inline void f2fs_free_casefolded_name(struct f2fs_filename *fname) +{ +} +#endif /* CONFIG_UNICODE */ + int f2fs_setup_filename(struct inode *dir, const struct qstr *iname, int lookup, struct f2fs_filename *fname); int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 5c0b281a70f3..c1ad9b278c47 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -185,7 +185,7 @@ static int get_parent_ino(struct inode *inode, nid_t *pino) if (!dentry) return 0; - *pino = parent_ino(dentry); + *pino = d_parent_ino(dentry); dput(dentry); return 1; } @@ -923,10 +923,8 @@ static void __setattr_copy(struct mnt_idmap *idmap, inode_set_ctime_to_ts(inode, attr->ia_ctime); if (ia_valid & ATTR_MODE) { umode_t mode = attr->ia_mode; - vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode); - if (!vfsgid_in_group_p(vfsgid) && - !capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID)) + if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode))) mode &= ~S_ISGID; set_acl_inode(inode, mode); } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index e54f8c08bda8..1ecde2b45e99 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -576,8 +576,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, goto out_iput; } out_splice: -#if IS_ENABLED(CONFIG_UNICODE) - if (!inode && IS_CASEFOLDED(dir)) { + if (IS_ENABLED(CONFIG_UNICODE) && !inode && IS_CASEFOLDED(dir)) { /* Eventually we want to call d_add_ci(dentry, NULL) * for negative dentries in the encoding case as * well. For now, prevent the negative dentry @@ -586,7 +585,7 @@ out_splice: trace_f2fs_lookup_end(dir, dentry, ino, err); return NULL; } -#endif + new = d_splice_alias(inode, dentry); trace_f2fs_lookup_end(dir, !IS_ERR_OR_NULL(new) ? new : dentry, ino, IS_ERR(new) ? PTR_ERR(new) : err); @@ -639,16 +638,15 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) f2fs_delete_entry(de, page, dir, inode); f2fs_unlock_op(sbi); -#if IS_ENABLED(CONFIG_UNICODE) /* VFS negative dentries are incompatible with Encoding and * Case-insensitiveness. Eventually we'll want avoid * invalidating the dentries here, alongside with returning the * negative dentries at f2fs_lookup(), when it is better * supported by the VFS for the CI case. */ - if (IS_CASEFOLDED(dir)) + if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) d_invalidate(dentry); -#endif + if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); fail: diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 496aee53c38a..8712e264071f 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -46,10 +46,6 @@ static struct kmem_cache *fsync_entry_slab; -#if IS_ENABLED(CONFIG_UNICODE) -extern struct kmem_cache *f2fs_cf_name_slab; -#endif - bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi) { s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count); @@ -153,11 +149,8 @@ static int init_recovered_filename(const struct inode *dir, if (err) return err; f2fs_hash_filename(dir, fname); -#if IS_ENABLED(CONFIG_UNICODE) /* Case-sensitive match is fine for recovery */ - kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name); - fname->cf_name.name = NULL; -#endif + f2fs_free_casefolded_name(fname); } else { f2fs_hash_filename(dir, fname); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1f1b3647a998..df4cf31f93df 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -321,7 +321,7 @@ struct kmem_cache *f2fs_cf_name_slab; static int __init f2fs_create_casefold_cache(void) { f2fs_cf_name_slab = f2fs_kmem_cache_create("f2fs_casefolded_name", - F2FS_NAME_LEN); + F2FS_NAME_LEN); return f2fs_cf_name_slab ? 0 : -ENOMEM; } @@ -1326,13 +1326,13 @@ default_check: return -EINVAL; } #endif -#if !IS_ENABLED(CONFIG_UNICODE) - if (f2fs_sb_has_casefold(sbi)) { + + if (!IS_ENABLED(CONFIG_UNICODE) && f2fs_sb_has_casefold(sbi)) { f2fs_err(sbi, "Filesystem with casefold feature cannot be mounted without CONFIG_UNICODE"); return -EINVAL; } -#endif + /* * The BLKZONED feature indicates that the drive was formatted with * zone alignment optimization. This is optional for host-aware diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 66cf4778cf3b..d3e426de5f01 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -7,6 +7,8 @@ #include <linux/hash.h> #include <linux/ratelimit.h> #include <linux/msdos_fs.h> +#include <linux/fs_context.h> +#include <linux/fs_parser.h> /* * vfat shortname flags @@ -51,7 +53,8 @@ struct fat_mount_options { tz_set:1, /* Filesystem timestamps' offset set */ rodir:1, /* allow ATTR_RO for directory */ discard:1, /* Issue discard requests on deletions */ - dos1xfloppy:1; /* Assume default BPB for DOS 1.x floppies */ + dos1xfloppy:1, /* Assume default BPB for DOS 1.x floppies */ + debug:1; /* Not currently used */ }; #define FAT_HASH_BITS 8 @@ -415,12 +418,21 @@ extern struct inode *fat_iget(struct super_block *sb, loff_t i_pos); extern struct inode *fat_build_inode(struct super_block *sb, struct msdos_dir_entry *de, loff_t i_pos); extern int fat_sync_inode(struct inode *inode); -extern int fat_fill_super(struct super_block *sb, void *data, int silent, - int isvfat, void (*setup)(struct super_block *)); +extern int fat_fill_super(struct super_block *sb, struct fs_context *fc, + void (*setup)(struct super_block *)); extern int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de); extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2); + +extern const struct fs_parameter_spec fat_param_spec[]; +int fat_init_fs_context(struct fs_context *fc, bool is_vfat); +void fat_free_fc(struct fs_context *fc); + +int fat_parse_param(struct fs_context *fc, struct fs_parameter *param, + bool is_vfat); +int fat_reconfigure(struct fs_context *fc); + static inline unsigned long fat_dir_hash(int logstart) { return hash_32(logstart, FAT_HASH_BITS); diff --git a/fs/fat/fat_test.c b/fs/fat/fat_test.c index 2dab4ca1d0d8..1f0062659067 100644 --- a/fs/fat/fat_test.c +++ b/fs/fat/fat_test.c @@ -193,4 +193,5 @@ static struct kunit_suite fat_test_suite = { kunit_test_suites(&fat_test_suite); +MODULE_DESCRIPTION("KUnit tests for FAT filesystems"); MODULE_LICENSE("GPL v2"); diff --git a/fs/fat/inode.c b/fs/fat/inode.c index d9e6fbb6f246..19115fd2d2a4 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -16,7 +16,6 @@ #include <linux/mpage.h> #include <linux/vfs.h> #include <linux/seq_file.h> -#include <linux/parser.h> #include <linux/uio.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> @@ -804,16 +803,17 @@ static void __exit fat_destroy_inodecache(void) kmem_cache_destroy(fat_inode_cachep); } -static int fat_remount(struct super_block *sb, int *flags, char *data) +int fat_reconfigure(struct fs_context *fc) { bool new_rdonly; + struct super_block *sb = fc->root->d_sb; struct msdos_sb_info *sbi = MSDOS_SB(sb); - *flags |= SB_NODIRATIME | (sbi->options.isvfat ? 0 : SB_NOATIME); + fc->sb_flags |= SB_NODIRATIME | (sbi->options.isvfat ? 0 : SB_NOATIME); sync_filesystem(sb); /* make sure we update state on remount. */ - new_rdonly = *flags & SB_RDONLY; + new_rdonly = fc->sb_flags & SB_RDONLY; if (new_rdonly != sb_rdonly(sb)) { if (new_rdonly) fat_set_state(sb, 0, 0); @@ -822,6 +822,7 @@ static int fat_remount(struct super_block *sb, int *flags, char *data) } return 0; } +EXPORT_SYMBOL_GPL(fat_reconfigure); static int fat_statfs(struct dentry *dentry, struct kstatfs *buf) { @@ -939,8 +940,6 @@ static const struct super_operations fat_sops = { .evict_inode = fat_evict_inode, .put_super = fat_put_super, .statfs = fat_statfs, - .remount_fs = fat_remount, - .show_options = fat_show_options, }; @@ -1037,355 +1036,282 @@ static int fat_show_options(struct seq_file *m, struct dentry *root) } enum { - Opt_check_n, Opt_check_r, Opt_check_s, Opt_uid, Opt_gid, - Opt_umask, Opt_dmask, Opt_fmask, Opt_allow_utime, Opt_codepage, - Opt_usefree, Opt_nocase, Opt_quiet, Opt_showexec, Opt_debug, - Opt_immutable, Opt_dots, Opt_nodots, - Opt_charset, Opt_shortname_lower, Opt_shortname_win95, - Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, - Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, - Opt_obsolete, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont, - Opt_err_panic, Opt_err_ro, Opt_discard, Opt_nfs, Opt_time_offset, - Opt_nfs_stale_rw, Opt_nfs_nostale_ro, Opt_err, Opt_dos1xfloppy, + Opt_check, Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask, + Opt_allow_utime, Opt_codepage, Opt_usefree, Opt_nocase, Opt_quiet, + Opt_showexec, Opt_debug, Opt_immutable, Opt_dots, Opt_dotsOK, + Opt_charset, Opt_shortname, Opt_utf8, Opt_utf8_bool, + Opt_uni_xl, Opt_uni_xl_bool, Opt_nonumtail, Opt_nonumtail_bool, + Opt_obsolete, Opt_flush, Opt_tz, Opt_rodir, Opt_errors, Opt_discard, + Opt_nfs, Opt_nfs_enum, Opt_time_offset, Opt_dos1xfloppy, }; -static const match_table_t fat_tokens = { - {Opt_check_r, "check=relaxed"}, - {Opt_check_s, "check=strict"}, - {Opt_check_n, "check=normal"}, - {Opt_check_r, "check=r"}, - {Opt_check_s, "check=s"}, - {Opt_check_n, "check=n"}, - {Opt_uid, "uid=%u"}, - {Opt_gid, "gid=%u"}, - {Opt_umask, "umask=%o"}, - {Opt_dmask, "dmask=%o"}, - {Opt_fmask, "fmask=%o"}, - {Opt_allow_utime, "allow_utime=%o"}, - {Opt_codepage, "codepage=%u"}, - {Opt_usefree, "usefree"}, - {Opt_nocase, "nocase"}, - {Opt_quiet, "quiet"}, - {Opt_showexec, "showexec"}, - {Opt_debug, "debug"}, - {Opt_immutable, "sys_immutable"}, - {Opt_flush, "flush"}, - {Opt_tz_utc, "tz=UTC"}, - {Opt_time_offset, "time_offset=%d"}, - {Opt_err_cont, "errors=continue"}, - {Opt_err_panic, "errors=panic"}, - {Opt_err_ro, "errors=remount-ro"}, - {Opt_discard, "discard"}, - {Opt_nfs_stale_rw, "nfs"}, - {Opt_nfs_stale_rw, "nfs=stale_rw"}, - {Opt_nfs_nostale_ro, "nfs=nostale_ro"}, - {Opt_dos1xfloppy, "dos1xfloppy"}, - {Opt_obsolete, "conv=binary"}, - {Opt_obsolete, "conv=text"}, - {Opt_obsolete, "conv=auto"}, - {Opt_obsolete, "conv=b"}, - {Opt_obsolete, "conv=t"}, - {Opt_obsolete, "conv=a"}, - {Opt_obsolete, "fat=%u"}, - {Opt_obsolete, "blocksize=%u"}, - {Opt_obsolete, "cvf_format=%20s"}, - {Opt_obsolete, "cvf_options=%100s"}, - {Opt_obsolete, "posix"}, - {Opt_err, NULL}, -}; -static const match_table_t msdos_tokens = { - {Opt_nodots, "nodots"}, - {Opt_nodots, "dotsOK=no"}, - {Opt_dots, "dots"}, - {Opt_dots, "dotsOK=yes"}, - {Opt_err, NULL} +static const struct constant_table fat_param_check[] = { + {"relaxed", 'r'}, + {"r", 'r'}, + {"strict", 's'}, + {"s", 's'}, + {"normal", 'n'}, + {"n", 'n'}, + {} }; -static const match_table_t vfat_tokens = { - {Opt_charset, "iocharset=%s"}, - {Opt_shortname_lower, "shortname=lower"}, - {Opt_shortname_win95, "shortname=win95"}, - {Opt_shortname_winnt, "shortname=winnt"}, - {Opt_shortname_mixed, "shortname=mixed"}, - {Opt_utf8_no, "utf8=0"}, /* 0 or no or false */ - {Opt_utf8_no, "utf8=no"}, - {Opt_utf8_no, "utf8=false"}, - {Opt_utf8_yes, "utf8=1"}, /* empty or 1 or yes or true */ - {Opt_utf8_yes, "utf8=yes"}, - {Opt_utf8_yes, "utf8=true"}, - {Opt_utf8_yes, "utf8"}, - {Opt_uni_xl_no, "uni_xlate=0"}, /* 0 or no or false */ - {Opt_uni_xl_no, "uni_xlate=no"}, - {Opt_uni_xl_no, "uni_xlate=false"}, - {Opt_uni_xl_yes, "uni_xlate=1"}, /* empty or 1 or yes or true */ - {Opt_uni_xl_yes, "uni_xlate=yes"}, - {Opt_uni_xl_yes, "uni_xlate=true"}, - {Opt_uni_xl_yes, "uni_xlate"}, - {Opt_nonumtail_no, "nonumtail=0"}, /* 0 or no or false */ - {Opt_nonumtail_no, "nonumtail=no"}, - {Opt_nonumtail_no, "nonumtail=false"}, - {Opt_nonumtail_yes, "nonumtail=1"}, /* empty or 1 or yes or true */ - {Opt_nonumtail_yes, "nonumtail=yes"}, - {Opt_nonumtail_yes, "nonumtail=true"}, - {Opt_nonumtail_yes, "nonumtail"}, - {Opt_rodir, "rodir"}, - {Opt_err, NULL} + +static const struct constant_table fat_param_tz[] = { + {"UTC", 0}, + {} }; -static int parse_options(struct super_block *sb, char *options, int is_vfat, - int silent, int *debug, struct fat_mount_options *opts) -{ - char *p; - substring_t args[MAX_OPT_ARGS]; - int option; - char *iocharset; +static const struct constant_table fat_param_errors[] = { + {"continue", FAT_ERRORS_CONT}, + {"panic", FAT_ERRORS_PANIC}, + {"remount-ro", FAT_ERRORS_RO}, + {} +}; - opts->isvfat = is_vfat; - opts->fs_uid = current_uid(); - opts->fs_gid = current_gid(); - opts->fs_fmask = opts->fs_dmask = current_umask(); - opts->allow_utime = -1; - opts->codepage = fat_default_codepage; - fat_reset_iocharset(opts); - if (is_vfat) { - opts->shortname = VFAT_SFN_DISPLAY_WINNT|VFAT_SFN_CREATE_WIN95; - opts->rodir = 0; - } else { - opts->shortname = 0; - opts->rodir = 1; - } - opts->name_check = 'n'; - opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK = 0; - opts->unicode_xlate = 0; - opts->numtail = 1; - opts->usefree = opts->nocase = 0; - opts->tz_set = 0; - opts->nfs = 0; - opts->errors = FAT_ERRORS_RO; - *debug = 0; +static const struct constant_table fat_param_nfs[] = { + {"stale_rw", FAT_NFS_STALE_RW}, + {"nostale_ro", FAT_NFS_NOSTALE_RO}, + {} +}; - opts->utf8 = IS_ENABLED(CONFIG_FAT_DEFAULT_UTF8) && is_vfat; +/* + * These are all obsolete but we still reject invalid options. + * The corresponding values are therefore meaningless. + */ +static const struct constant_table fat_param_conv[] = { + {"binary", 0}, + {"text", 0}, + {"auto", 0}, + {"b", 0}, + {"t", 0}, + {"a", 0}, + {} +}; - if (!options) - goto out; +/* Core options. See below for vfat and msdos extras */ +const struct fs_parameter_spec fat_param_spec[] = { + fsparam_enum ("check", Opt_check, fat_param_check), + fsparam_uid ("uid", Opt_uid), + fsparam_gid ("gid", Opt_gid), + fsparam_u32oct ("umask", Opt_umask), + fsparam_u32oct ("dmask", Opt_dmask), + fsparam_u32oct ("fmask", Opt_fmask), + fsparam_u32oct ("allow_utime", Opt_allow_utime), + fsparam_u32 ("codepage", Opt_codepage), + fsparam_flag ("usefree", Opt_usefree), + fsparam_flag ("nocase", Opt_nocase), + fsparam_flag ("quiet", Opt_quiet), + fsparam_flag ("showexec", Opt_showexec), + fsparam_flag ("debug", Opt_debug), + fsparam_flag ("sys_immutable", Opt_immutable), + fsparam_flag ("flush", Opt_flush), + fsparam_enum ("tz", Opt_tz, fat_param_tz), + fsparam_s32 ("time_offset", Opt_time_offset), + fsparam_enum ("errors", Opt_errors, fat_param_errors), + fsparam_flag ("discard", Opt_discard), + fsparam_flag ("nfs", Opt_nfs), + fsparam_enum ("nfs", Opt_nfs_enum, fat_param_nfs), + fsparam_flag ("dos1xfloppy", Opt_dos1xfloppy), + __fsparam(fs_param_is_enum, "conv", + Opt_obsolete, fs_param_deprecated, fat_param_conv), + __fsparam(fs_param_is_u32, "fat", + Opt_obsolete, fs_param_deprecated, NULL), + __fsparam(fs_param_is_u32, "blocksize", + Opt_obsolete, fs_param_deprecated, NULL), + __fsparam(fs_param_is_string, "cvf_format", + Opt_obsolete, fs_param_deprecated, NULL), + __fsparam(fs_param_is_string, "cvf_options", + Opt_obsolete, fs_param_deprecated, NULL), + __fsparam(NULL, "posix", + Opt_obsolete, fs_param_deprecated, NULL), + {} +}; +EXPORT_SYMBOL_GPL(fat_param_spec); - while ((p = strsep(&options, ",")) != NULL) { - int token; - if (!*p) - continue; +static const struct fs_parameter_spec msdos_param_spec[] = { + fsparam_flag_no ("dots", Opt_dots), + fsparam_bool ("dotsOK", Opt_dotsOK), + {} +}; - token = match_token(p, fat_tokens, args); - if (token == Opt_err) { - if (is_vfat) - token = match_token(p, vfat_tokens, args); - else - token = match_token(p, msdos_tokens, args); - } - switch (token) { - case Opt_check_s: - opts->name_check = 's'; - break; - case Opt_check_r: - opts->name_check = 'r'; - break; - case Opt_check_n: - opts->name_check = 'n'; - break; - case Opt_usefree: - opts->usefree = 1; - break; - case Opt_nocase: - if (!is_vfat) - opts->nocase = 1; - else { - /* for backward compatibility */ - opts->shortname = VFAT_SFN_DISPLAY_WIN95 - | VFAT_SFN_CREATE_WIN95; - } - break; - case Opt_quiet: - opts->quiet = 1; - break; - case Opt_showexec: - opts->showexec = 1; - break; - case Opt_debug: - *debug = 1; - break; - case Opt_immutable: - opts->sys_immutable = 1; - break; - case Opt_uid: - if (match_int(&args[0], &option)) - return -EINVAL; - opts->fs_uid = make_kuid(current_user_ns(), option); - if (!uid_valid(opts->fs_uid)) - return -EINVAL; - break; - case Opt_gid: - if (match_int(&args[0], &option)) - return -EINVAL; - opts->fs_gid = make_kgid(current_user_ns(), option); - if (!gid_valid(opts->fs_gid)) - return -EINVAL; - break; - case Opt_umask: - if (match_octal(&args[0], &option)) - return -EINVAL; - opts->fs_fmask = opts->fs_dmask = option; - break; - case Opt_dmask: - if (match_octal(&args[0], &option)) - return -EINVAL; - opts->fs_dmask = option; - break; - case Opt_fmask: - if (match_octal(&args[0], &option)) - return -EINVAL; - opts->fs_fmask = option; - break; - case Opt_allow_utime: - if (match_octal(&args[0], &option)) - return -EINVAL; - opts->allow_utime = option & (S_IWGRP | S_IWOTH); - break; - case Opt_codepage: - if (match_int(&args[0], &option)) - return -EINVAL; - opts->codepage = option; - break; - case Opt_flush: - opts->flush = 1; - break; - case Opt_time_offset: - if (match_int(&args[0], &option)) - return -EINVAL; - /* - * GMT+-12 zones may have DST corrections so at least - * 13 hours difference is needed. Make the limit 24 - * just in case someone invents something unusual. - */ - if (option < -24 * 60 || option > 24 * 60) - return -EINVAL; - opts->tz_set = 1; - opts->time_offset = option; - break; - case Opt_tz_utc: - opts->tz_set = 1; - opts->time_offset = 0; - break; - case Opt_err_cont: - opts->errors = FAT_ERRORS_CONT; - break; - case Opt_err_panic: - opts->errors = FAT_ERRORS_PANIC; - break; - case Opt_err_ro: - opts->errors = FAT_ERRORS_RO; - break; - case Opt_nfs_stale_rw: - opts->nfs = FAT_NFS_STALE_RW; - break; - case Opt_nfs_nostale_ro: - opts->nfs = FAT_NFS_NOSTALE_RO; - break; - case Opt_dos1xfloppy: - opts->dos1xfloppy = 1; - break; +static const struct constant_table fat_param_shortname[] = { + {"lower", VFAT_SFN_DISPLAY_LOWER | VFAT_SFN_CREATE_WIN95}, + {"win95", VFAT_SFN_DISPLAY_WIN95 | VFAT_SFN_CREATE_WIN95}, + {"winnt", VFAT_SFN_DISPLAY_WINNT | VFAT_SFN_CREATE_WINNT}, + {"mixed", VFAT_SFN_DISPLAY_WINNT | VFAT_SFN_CREATE_WIN95}, + {} +}; - /* msdos specific */ - case Opt_dots: - opts->dotsOK = 1; - break; - case Opt_nodots: - opts->dotsOK = 0; - break; +static const struct fs_parameter_spec vfat_param_spec[] = { + fsparam_string ("iocharset", Opt_charset), + fsparam_enum ("shortname", Opt_shortname, fat_param_shortname), + fsparam_flag ("utf8", Opt_utf8), + fsparam_bool ("utf8", Opt_utf8_bool), + fsparam_flag ("uni_xlate", Opt_uni_xl), + fsparam_bool ("uni_xlate", Opt_uni_xl_bool), + fsparam_flag ("nonumtail", Opt_nonumtail), + fsparam_bool ("nonumtail", Opt_nonumtail_bool), + fsparam_flag ("rodir", Opt_rodir), + {} +}; - /* vfat specific */ - case Opt_charset: - fat_reset_iocharset(opts); - iocharset = match_strdup(&args[0]); - if (!iocharset) - return -ENOMEM; - opts->iocharset = iocharset; - break; - case Opt_shortname_lower: - opts->shortname = VFAT_SFN_DISPLAY_LOWER - | VFAT_SFN_CREATE_WIN95; - break; - case Opt_shortname_win95: - opts->shortname = VFAT_SFN_DISPLAY_WIN95 - | VFAT_SFN_CREATE_WIN95; - break; - case Opt_shortname_winnt: - opts->shortname = VFAT_SFN_DISPLAY_WINNT - | VFAT_SFN_CREATE_WINNT; - break; - case Opt_shortname_mixed: - opts->shortname = VFAT_SFN_DISPLAY_WINNT - | VFAT_SFN_CREATE_WIN95; - break; - case Opt_utf8_no: /* 0 or no or false */ - opts->utf8 = 0; - break; - case Opt_utf8_yes: /* empty or 1 or yes or true */ - opts->utf8 = 1; - break; - case Opt_uni_xl_no: /* 0 or no or false */ - opts->unicode_xlate = 0; - break; - case Opt_uni_xl_yes: /* empty or 1 or yes or true */ - opts->unicode_xlate = 1; - break; - case Opt_nonumtail_no: /* 0 or no or false */ - opts->numtail = 1; /* negated option */ - break; - case Opt_nonumtail_yes: /* empty or 1 or yes or true */ - opts->numtail = 0; /* negated option */ - break; - case Opt_rodir: - opts->rodir = 1; - break; - case Opt_discard: - opts->discard = 1; - break; +int fat_parse_param(struct fs_context *fc, struct fs_parameter *param, + bool is_vfat) +{ + struct fat_mount_options *opts = fc->fs_private; + struct fs_parse_result result; + int opt; - /* obsolete mount options */ - case Opt_obsolete: - fat_msg(sb, KERN_INFO, "\"%s\" option is obsolete, " - "not supported now", p); - break; - /* unknown option */ - default: - if (!silent) { - fat_msg(sb, KERN_ERR, - "Unrecognized mount option \"%s\" " - "or missing value", p); - } - return -EINVAL; - } - } + /* remount options have traditionally been ignored */ + if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) + return 0; -out: - /* UTF-8 doesn't provide FAT semantics */ - if (!strcmp(opts->iocharset, "utf8")) { - fat_msg(sb, KERN_WARNING, "utf8 is not a recommended IO charset" - " for FAT filesystems, filesystem will be " - "case sensitive!"); + opt = fs_parse(fc, fat_param_spec, param, &result); + /* If option not found in fat_param_spec, try vfat/msdos options */ + if (opt == -ENOPARAM) { + if (is_vfat) + opt = fs_parse(fc, vfat_param_spec, param, &result); + else + opt = fs_parse(fc, msdos_param_spec, param, &result); } - /* If user doesn't specify allow_utime, it's initialized from dmask. */ - if (opts->allow_utime == (unsigned short)-1) - opts->allow_utime = ~opts->fs_dmask & (S_IWGRP | S_IWOTH); - if (opts->unicode_xlate) - opts->utf8 = 0; - if (opts->nfs == FAT_NFS_NOSTALE_RO) { - sb->s_flags |= SB_RDONLY; - sb->s_export_op = &fat_export_ops_nostale; + if (opt < 0) + return opt; + + switch (opt) { + case Opt_check: + opts->name_check = result.uint_32; + break; + case Opt_usefree: + opts->usefree = 1; + break; + case Opt_nocase: + if (!is_vfat) + opts->nocase = 1; + else { + /* for backward compatibility */ + opts->shortname = VFAT_SFN_DISPLAY_WIN95 + | VFAT_SFN_CREATE_WIN95; + } + break; + case Opt_quiet: + opts->quiet = 1; + break; + case Opt_showexec: + opts->showexec = 1; + break; + case Opt_debug: + opts->debug = 1; + break; + case Opt_immutable: + opts->sys_immutable = 1; + break; + case Opt_uid: + opts->fs_uid = result.uid; + break; + case Opt_gid: + opts->fs_gid = result.gid; + break; + case Opt_umask: + opts->fs_fmask = opts->fs_dmask = result.uint_32; + break; + case Opt_dmask: + opts->fs_dmask = result.uint_32; + break; + case Opt_fmask: + opts->fs_fmask = result.uint_32; + break; + case Opt_allow_utime: + opts->allow_utime = result.uint_32 & (S_IWGRP | S_IWOTH); + break; + case Opt_codepage: + opts->codepage = result.uint_32; + break; + case Opt_flush: + opts->flush = 1; + break; + case Opt_time_offset: + /* + * GMT+-12 zones may have DST corrections so at least + * 13 hours difference is needed. Make the limit 24 + * just in case someone invents something unusual. + */ + if (result.int_32 < -24 * 60 || result.int_32 > 24 * 60) + return -EINVAL; + opts->tz_set = 1; + opts->time_offset = result.int_32; + break; + case Opt_tz: + opts->tz_set = 1; + opts->time_offset = result.uint_32; + break; + case Opt_errors: + opts->errors = result.uint_32; + break; + case Opt_nfs: + opts->nfs = FAT_NFS_STALE_RW; + break; + case Opt_nfs_enum: + opts->nfs = result.uint_32; + break; + case Opt_dos1xfloppy: + opts->dos1xfloppy = 1; + break; + + /* msdos specific */ + case Opt_dots: /* dots / nodots */ + opts->dotsOK = !result.negated; + break; + case Opt_dotsOK: /* dotsOK = yes/no */ + opts->dotsOK = result.boolean; + break; + + /* vfat specific */ + case Opt_charset: + fat_reset_iocharset(opts); + opts->iocharset = param->string; + param->string = NULL; /* Steal string */ + break; + case Opt_shortname: + opts->shortname = result.uint_32; + break; + case Opt_utf8: + opts->utf8 = 1; + break; + case Opt_utf8_bool: + opts->utf8 = result.boolean; + break; + case Opt_uni_xl: + opts->unicode_xlate = 1; + break; + case Opt_uni_xl_bool: + opts->unicode_xlate = result.boolean; + break; + case Opt_nonumtail: + opts->numtail = 0; /* negated option */ + break; + case Opt_nonumtail_bool: + opts->numtail = !result.boolean; /* negated option */ + break; + case Opt_rodir: + opts->rodir = 1; + break; + case Opt_discard: + opts->discard = 1; + break; + + /* obsolete mount options */ + case Opt_obsolete: + printk(KERN_INFO "FAT-fs: \"%s\" option is obsolete, " + "not supported now", param->key); + break; + default: + return -EINVAL; } return 0; } +EXPORT_SYMBOL_GPL(fat_parse_param); static int fat_read_root(struct inode *inode) { @@ -1604,9 +1530,11 @@ out: /* * Read the super block of an MS-DOS FS. */ -int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, +int fat_fill_super(struct super_block *sb, struct fs_context *fc, void (*setup)(struct super_block *)) { + struct fat_mount_options *opts = fc->fs_private; + int silent = fc->sb_flags & SB_SILENT; struct inode *root_inode = NULL, *fat_inode = NULL; struct inode *fsinfo_inode = NULL; struct buffer_head *bh; @@ -1614,7 +1542,6 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, struct msdos_sb_info *sbi; u16 logical_sector_size; u32 total_sectors, total_clusters, fat_clusters, rootdir_sectors; - int debug; long error; char buf[50]; struct timespec64 ts; @@ -1643,9 +1570,27 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); - error = parse_options(sb, data, isvfat, silent, &debug, &sbi->options); - if (error) - goto out_fail; + /* UTF-8 doesn't provide FAT semantics */ + if (!strcmp(opts->iocharset, "utf8")) { + fat_msg(sb, KERN_WARNING, "utf8 is not a recommended IO charset" + " for FAT filesystems, filesystem will be" + " case sensitive!"); + } + + /* If user doesn't specify allow_utime, it's initialized from dmask. */ + if (opts->allow_utime == (unsigned short)-1) + opts->allow_utime = ~opts->fs_dmask & (S_IWGRP | S_IWOTH); + if (opts->unicode_xlate) + opts->utf8 = 0; + if (opts->nfs == FAT_NFS_NOSTALE_RO) { + sb->s_flags |= SB_RDONLY; + sb->s_export_op = &fat_export_ops_nostale; + } + + /* Apply parsed options to sbi (structure copy) */ + sbi->options = *opts; + /* Transfer ownership of iocharset to sbi->options */ + opts->iocharset = NULL; setup(sb); /* flavour-specific stuff that needs options */ @@ -1950,6 +1895,57 @@ int fat_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2) } EXPORT_SYMBOL_GPL(fat_flush_inodes); +int fat_init_fs_context(struct fs_context *fc, bool is_vfat) +{ + struct fat_mount_options *opts; + + opts = kzalloc(sizeof(*opts), GFP_KERNEL); + if (!opts) + return -ENOMEM; + + opts->isvfat = is_vfat; + opts->fs_uid = current_uid(); + opts->fs_gid = current_gid(); + opts->fs_fmask = opts->fs_dmask = current_umask(); + opts->allow_utime = -1; + opts->codepage = fat_default_codepage; + fat_reset_iocharset(opts); + if (is_vfat) { + opts->shortname = VFAT_SFN_DISPLAY_WINNT|VFAT_SFN_CREATE_WIN95; + opts->rodir = 0; + } else { + opts->shortname = 0; + opts->rodir = 1; + } + opts->name_check = 'n'; + opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK = 0; + opts->unicode_xlate = 0; + opts->numtail = 1; + opts->usefree = opts->nocase = 0; + opts->tz_set = 0; + opts->nfs = 0; + opts->errors = FAT_ERRORS_RO; + opts->debug = 0; + + opts->utf8 = IS_ENABLED(CONFIG_FAT_DEFAULT_UTF8) && is_vfat; + + fc->fs_private = opts; + /* fc->ops assigned by caller */ + + return 0; +} +EXPORT_SYMBOL_GPL(fat_init_fs_context); + +void fat_free_fc(struct fs_context *fc) +{ + struct fat_mount_options *opts = fc->fs_private; + + if (opts->iocharset != fat_default_iocharset) + kfree(opts->iocharset); + kfree(fc->fs_private); +} +EXPORT_SYMBOL_GPL(fat_free_fc); + static int __init init_fat_fs(void) { int err; @@ -1978,4 +1974,5 @@ static void __exit exit_fat_fs(void) module_init(init_fat_fs) module_exit(exit_fat_fs) +MODULE_DESCRIPTION("Core FAT filesystem support"); MODULE_LICENSE("GPL"); diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 2116c486843b..f06f6ba643cc 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -650,24 +650,48 @@ static void setup(struct super_block *sb) sb->s_flags |= SB_NOATIME; } -static int msdos_fill_super(struct super_block *sb, void *data, int silent) +static int msdos_fill_super(struct super_block *sb, struct fs_context *fc) { - return fat_fill_super(sb, data, silent, 0, setup); + return fat_fill_super(sb, fc, setup); } -static struct dentry *msdos_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data) +static int msdos_get_tree(struct fs_context *fc) { - return mount_bdev(fs_type, flags, dev_name, data, msdos_fill_super); + return get_tree_bdev(fc, msdos_fill_super); +} + +static int msdos_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + return fat_parse_param(fc, param, false); +} + +static const struct fs_context_operations msdos_context_ops = { + .parse_param = msdos_parse_param, + .get_tree = msdos_get_tree, + .reconfigure = fat_reconfigure, + .free = fat_free_fc, +}; + +static int msdos_init_fs_context(struct fs_context *fc) +{ + int err; + + /* Initialize with is_vfat == false */ + err = fat_init_fs_context(fc, false); + if (err) + return err; + + fc->ops = &msdos_context_ops; + return 0; } static struct file_system_type msdos_fs_type = { .owner = THIS_MODULE, .name = "msdos", - .mount = msdos_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, + .init_fs_context = msdos_init_fs_context, + .parameters = fat_param_spec, }; MODULE_ALIAS_FS("msdos"); diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index c4d00999a433..6423e1dedf14 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -1195,24 +1195,48 @@ static void setup(struct super_block *sb) sb->s_d_op = &vfat_dentry_ops; } -static int vfat_fill_super(struct super_block *sb, void *data, int silent) +static int vfat_fill_super(struct super_block *sb, struct fs_context *fc) { - return fat_fill_super(sb, data, silent, 1, setup); + return fat_fill_super(sb, fc, setup); } -static struct dentry *vfat_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data) +static int vfat_get_tree(struct fs_context *fc) { - return mount_bdev(fs_type, flags, dev_name, data, vfat_fill_super); + return get_tree_bdev(fc, vfat_fill_super); +} + +static int vfat_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + return fat_parse_param(fc, param, true); +} + +static const struct fs_context_operations vfat_context_ops = { + .parse_param = vfat_parse_param, + .get_tree = vfat_get_tree, + .reconfigure = fat_reconfigure, + .free = fat_free_fc, +}; + +static int vfat_init_fs_context(struct fs_context *fc) +{ + int err; + + /* Initialize with is_vfat == true */ + err = fat_init_fs_context(fc, true); + if (err) + return err; + + fc->ops = &vfat_context_ops; + return 0; } static struct file_system_type vfat_fs_type = { .owner = THIS_MODULE, .name = "vfat", - .mount = vfat_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, + .init_fs_context = vfat_init_fs_context, + .parameters = fat_param_spec, }; MODULE_ALIAS_FS("vfat"); diff --git a/fs/fhandle.c b/fs/fhandle.c index 8a7f86c2139a..6e8cea16790e 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -115,88 +115,188 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name, return err; } -static struct vfsmount *get_vfsmount_from_fd(int fd) +static int get_path_from_fd(int fd, struct path *root) { - struct vfsmount *mnt; - if (fd == AT_FDCWD) { struct fs_struct *fs = current->fs; spin_lock(&fs->lock); - mnt = mntget(fs->pwd.mnt); + *root = fs->pwd; + path_get(root); spin_unlock(&fs->lock); } else { struct fd f = fdget(fd); if (!f.file) - return ERR_PTR(-EBADF); - mnt = mntget(f.file->f_path.mnt); + return -EBADF; + *root = f.file->f_path; + path_get(root); fdput(f); } - return mnt; + + return 0; } +enum handle_to_path_flags { + HANDLE_CHECK_PERMS = (1 << 0), + HANDLE_CHECK_SUBTREE = (1 << 1), +}; + +struct handle_to_path_ctx { + struct path root; + enum handle_to_path_flags flags; + unsigned int fh_flags; +}; + static int vfs_dentry_acceptable(void *context, struct dentry *dentry) { - return 1; + struct handle_to_path_ctx *ctx = context; + struct user_namespace *user_ns = current_user_ns(); + struct dentry *d, *root = ctx->root.dentry; + struct mnt_idmap *idmap = mnt_idmap(ctx->root.mnt); + int retval = 0; + + if (!root) + return 1; + + /* Old permission model with global CAP_DAC_READ_SEARCH. */ + if (!ctx->flags) + return 1; + + /* + * It's racy as we're not taking rename_lock but we're able to ignore + * permissions and we just need an approximation whether we were able + * to follow a path to the file. + * + * It's also potentially expensive on some filesystems especially if + * there is a deep path. + */ + d = dget(dentry); + while (d != root && !IS_ROOT(d)) { + struct dentry *parent = dget_parent(d); + + /* + * We know that we have the ability to override DAC permissions + * as we've verified this earlier via CAP_DAC_READ_SEARCH. But + * we also need to make sure that there aren't any unmapped + * inodes in the path that would prevent us from reaching the + * file. + */ + if (!privileged_wrt_inode_uidgid(user_ns, idmap, + d_inode(parent))) { + dput(d); + dput(parent); + return retval; + } + + dput(d); + d = parent; + } + + if (!(ctx->flags & HANDLE_CHECK_SUBTREE) || d == root) + retval = 1; + WARN_ON_ONCE(d != root && d != root->d_sb->s_root); + dput(d); + return retval; } -static int do_handle_to_path(int mountdirfd, struct file_handle *handle, - struct path *path) +static int do_handle_to_path(struct file_handle *handle, struct path *path, + struct handle_to_path_ctx *ctx) { - int retval = 0; int handle_dwords; + struct vfsmount *mnt = ctx->root.mnt; - path->mnt = get_vfsmount_from_fd(mountdirfd); - if (IS_ERR(path->mnt)) { - retval = PTR_ERR(path->mnt); - goto out_err; - } /* change the handle size to multiple of sizeof(u32) */ handle_dwords = handle->handle_bytes >> 2; - path->dentry = exportfs_decode_fh(path->mnt, + path->dentry = exportfs_decode_fh_raw(mnt, (struct fid *)handle->f_handle, handle_dwords, handle->handle_type, - vfs_dentry_acceptable, NULL); - if (IS_ERR(path->dentry)) { - retval = PTR_ERR(path->dentry); - goto out_mnt; + ctx->fh_flags, + vfs_dentry_acceptable, ctx); + if (IS_ERR_OR_NULL(path->dentry)) { + if (path->dentry == ERR_PTR(-ENOMEM)) + return -ENOMEM; + return -ESTALE; } + path->mnt = mntget(mnt); return 0; -out_mnt: - mntput(path->mnt); -out_err: - return retval; +} + +/* + * Allow relaxed permissions of file handles if the caller has the + * ability to mount the filesystem or create a bind-mount of the + * provided @mountdirfd. + * + * In both cases the caller may be able to get an unobstructed way to + * the encoded file handle. If the caller is only able to create a + * bind-mount we need to verify that there are no locked mounts on top + * of it that could prevent us from getting to the encoded file. + * + * In principle, locked mounts can prevent the caller from mounting the + * filesystem but that only applies to procfs and sysfs neither of which + * support decoding file handles. + */ +static inline bool may_decode_fh(struct handle_to_path_ctx *ctx, + unsigned int o_flags) +{ + struct path *root = &ctx->root; + + /* + * Restrict to O_DIRECTORY to provide a deterministic API that avoids a + * confusing api in the face of disconnected non-dir dentries. + * + * There's only one dentry for each directory inode (VFS rule)... + */ + if (!(o_flags & O_DIRECTORY)) + return false; + + if (ns_capable(root->mnt->mnt_sb->s_user_ns, CAP_SYS_ADMIN)) + ctx->flags = HANDLE_CHECK_PERMS; + else if (is_mounted(root->mnt) && + ns_capable(real_mount(root->mnt)->mnt_ns->user_ns, + CAP_SYS_ADMIN) && + !has_locked_children(real_mount(root->mnt), root->dentry)) + ctx->flags = HANDLE_CHECK_PERMS | HANDLE_CHECK_SUBTREE; + else + return false; + + /* Are we able to override DAC permissions? */ + if (!ns_capable(current_user_ns(), CAP_DAC_READ_SEARCH)) + return false; + + ctx->fh_flags = EXPORT_FH_DIR_ONLY; + return true; } static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, - struct path *path) + struct path *path, unsigned int o_flags) { int retval = 0; struct file_handle f_handle; struct file_handle *handle = NULL; + struct handle_to_path_ctx ctx = {}; - /* - * With handle we don't look at the execute bit on the - * directory. Ideally we would like CAP_DAC_SEARCH. - * But we don't have that - */ - if (!capable(CAP_DAC_READ_SEARCH)) { - retval = -EPERM; + retval = get_path_from_fd(mountdirfd, &ctx.root); + if (retval) goto out_err; + + if (!capable(CAP_DAC_READ_SEARCH) && !may_decode_fh(&ctx, o_flags)) { + retval = -EPERM; + goto out_path; } + if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) { retval = -EFAULT; - goto out_err; + goto out_path; } if ((f_handle.handle_bytes > MAX_HANDLE_SZ) || (f_handle.handle_bytes == 0)) { retval = -EINVAL; - goto out_err; + goto out_path; } handle = kmalloc(struct_size(handle, f_handle, f_handle.handle_bytes), GFP_KERNEL); if (!handle) { retval = -ENOMEM; - goto out_err; + goto out_path; } /* copy the full handle */ *handle = f_handle; @@ -207,10 +307,12 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, goto out_handle; } - retval = do_handle_to_path(mountdirfd, handle, path); + retval = do_handle_to_path(handle, path, &ctx); out_handle: kfree(handle); +out_path: + path_put(&ctx.root); out_err: return retval; } @@ -223,7 +325,7 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh, struct file *file; int fd; - retval = handle_to_path(mountdirfd, ufh, &path); + retval = handle_to_path(mountdirfd, ufh, &path, open_flag); if (retval) return retval; diff --git a/fs/fs_parser.c b/fs/fs_parser.c index a4d6ca0b8971..24727ec34e5a 100644 --- a/fs/fs_parser.c +++ b/fs/fs_parser.c @@ -308,6 +308,40 @@ int fs_param_is_fd(struct p_log *log, const struct fs_parameter_spec *p, } EXPORT_SYMBOL(fs_param_is_fd); +int fs_param_is_uid(struct p_log *log, const struct fs_parameter_spec *p, + struct fs_parameter *param, struct fs_parse_result *result) +{ + kuid_t uid; + + if (fs_param_is_u32(log, p, param, result) != 0) + return fs_param_bad_value(log, param); + + uid = make_kuid(current_user_ns(), result->uint_32); + if (!uid_valid(uid)) + return inval_plog(log, "Invalid uid '%s'", param->string); + + result->uid = uid; + return 0; +} +EXPORT_SYMBOL(fs_param_is_uid); + +int fs_param_is_gid(struct p_log *log, const struct fs_parameter_spec *p, + struct fs_parameter *param, struct fs_parse_result *result) +{ + kgid_t gid; + + if (fs_param_is_u32(log, p, param, result) != 0) + return fs_param_bad_value(log, param); + + gid = make_kgid(current_user_ns(), result->uint_32); + if (!gid_valid(gid)) + return inval_plog(log, "Invalid gid '%s'", param->string); + + result->gid = gid; + return 0; +} +EXPORT_SYMBOL(fs_param_is_gid); + int fs_param_is_blockdev(struct p_log *log, const struct fs_parameter_spec *p, struct fs_parameter *param, struct fs_parse_result *result) { diff --git a/fs/fsopen.c b/fs/fsopen.c index 6593ae518115..ed2dd000622e 100644 --- a/fs/fsopen.c +++ b/fs/fsopen.c @@ -220,10 +220,6 @@ static int vfs_cmd_create(struct fs_context *fc, bool exclusive) if (!mount_capable(fc)) return -EPERM; - /* require the new mount api */ - if (exclusive && fc->ops == &legacy_fs_context_ops) - return -EOPNOTSUPP; - fc->phase = FS_CONTEXT_CREATING; fc->exclusive = exclusive; @@ -411,6 +407,7 @@ SYSCALL_DEFINE5(fsconfig, case FSCONFIG_SET_PATH: case FSCONFIG_SET_PATH_EMPTY: case FSCONFIG_SET_FD: + case FSCONFIG_CMD_CREATE_EXCL: ret = -EOPNOTSUPP; goto out_f; } @@ -451,7 +448,7 @@ SYSCALL_DEFINE5(fsconfig, fallthrough; case FSCONFIG_SET_PATH: param.type = fs_value_is_filename; - param.name = getname_flags(_value, lookup_flags, NULL); + param.name = getname_flags(_value, lookup_flags); if (IS_ERR(param.name)) { ret = PTR_ERR(param.name); goto out_key; diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c index 3d192b80a561..04cfd8fee992 100644 --- a/fs/fuse/acl.c +++ b/fs/fuse/acl.c @@ -146,8 +146,8 @@ int fuse_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, * be stripped. */ if (fc->posix_acl && - !vfsgid_in_group_p(i_gid_into_vfsgid(&nop_mnt_idmap, inode)) && - !capable_wrt_inode_uidgid(&nop_mnt_idmap, inode, CAP_FSETID)) + !in_group_or_capable(&nop_mnt_idmap, inode, + i_gid_into_vfsgid(&nop_mnt_idmap, inode))) extra_flags |= FUSE_SETXATTR_ACL_KILL_SGID; ret = fuse_setxattr(inode, name, value, size, 0, extra_flags); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 99e44ea7d875..d8ab4e93916f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -740,8 +740,8 @@ static const struct fs_parameter_spec fuse_fs_parameters[] = { fsparam_string ("source", OPT_SOURCE), fsparam_u32 ("fd", OPT_FD), fsparam_u32oct ("rootmode", OPT_ROOTMODE), - fsparam_u32 ("user_id", OPT_USER_ID), - fsparam_u32 ("group_id", OPT_GROUP_ID), + fsparam_uid ("user_id", OPT_USER_ID), + fsparam_gid ("group_id", OPT_GROUP_ID), fsparam_flag ("default_permissions", OPT_DEFAULT_PERMISSIONS), fsparam_flag ("allow_other", OPT_ALLOW_OTHER), fsparam_u32 ("max_read", OPT_MAX_READ), @@ -755,6 +755,8 @@ static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param) struct fs_parse_result result; struct fuse_fs_context *ctx = fsc->fs_private; int opt; + kuid_t kuid; + kgid_t kgid; if (fsc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { /* @@ -799,16 +801,26 @@ static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param) break; case OPT_USER_ID: - ctx->user_id = make_kuid(fsc->user_ns, result.uint_32); - if (!uid_valid(ctx->user_id)) + kuid = result.uid; + /* + * The requested uid must be representable in the + * filesystem's idmapping. + */ + if (!kuid_has_mapping(fsc->user_ns, kuid)) return invalfc(fsc, "Invalid user_id"); + ctx->user_id = kuid; ctx->user_id_present = true; break; case OPT_GROUP_ID: - ctx->group_id = make_kgid(fsc->user_ns, result.uint_32); - if (!gid_valid(ctx->group_id)) + kgid = result.gid; + /* + * The requested gid must be representable in the + * filesystem's idmapping. + */ + if (!kgid_has_mapping(fsc->user_ns, kgid)) return invalfc(fsc, "Invalid group_id"); + ctx->group_id = kgid; ctx->group_id_present = true; break; diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 8c34798a0715..744e10b46904 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -200,6 +200,7 @@ struct inode *hfs_new_inode(struct inode *dir, const struct qstr *name, umode_t HFS_I(inode)->flags = 0; HFS_I(inode)->rsrc_inode = NULL; HFS_I(inode)->fs_blocks = 0; + HFS_I(inode)->tz_secondswest = sys_tz.tz_minuteswest * 60; if (S_ISDIR(mode)) { inode->i_size = 2; HFS_SB(sb)->folder_count++; @@ -275,6 +276,8 @@ void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext, for (count = 0, i = 0; i < 3; i++) count += be16_to_cpu(ext[i].count); HFS_I(inode)->first_blocks = count; + HFS_I(inode)->cached_start = 0; + HFS_I(inode)->cached_blocks = 0; inode->i_size = HFS_I(inode)->phys_size = log_size; HFS_I(inode)->fs_blocks = (log_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 6764afa98a6f..eeac99765f0d 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -28,6 +28,7 @@ static struct kmem_cache *hfs_inode_cachep; +MODULE_DESCRIPTION("Apple Macintosh file system support"); MODULE_LICENSE("GPL"); static int hfs_sync_fs(struct super_block *sb, int wait) diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c index ca2ba8c9f82e..901e83d65d20 100644 --- a/fs/hfsplus/bfind.c +++ b/fs/hfsplus/bfind.c @@ -25,19 +25,8 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd) fd->key = ptr + tree->max_key_len + 2; hfs_dbg(BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); - switch (tree->cnid) { - case HFSPLUS_CAT_CNID: - mutex_lock_nested(&tree->tree_lock, CATALOG_BTREE_MUTEX); - break; - case HFSPLUS_EXT_CNID: - mutex_lock_nested(&tree->tree_lock, EXTENTS_BTREE_MUTEX); - break; - case HFSPLUS_ATTR_CNID: - mutex_lock_nested(&tree->tree_lock, ATTR_BTREE_MUTEX); - break; - default: - BUG(); - } + mutex_lock_nested(&tree->tree_lock, + hfsplus_btree_lock_class(tree)); return 0; } diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index 3c572e44f2ad..9c51867dddc5 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -430,7 +430,8 @@ int hfsplus_free_fork(struct super_block *sb, u32 cnid, hfsplus_free_extents(sb, ext_entry, total_blocks - start, total_blocks); total_blocks = start; - mutex_lock(&fd.tree->tree_lock); + mutex_lock_nested(&fd.tree->tree_lock, + hfsplus_btree_lock_class(fd.tree)); } while (total_blocks > blocks); hfs_find_exit(&fd); @@ -592,7 +593,8 @@ void hfsplus_file_truncate(struct inode *inode) alloc_cnt, alloc_cnt - blk_cnt); hfsplus_dump_extent(hip->first_extents); hip->first_blocks = blk_cnt; - mutex_lock(&fd.tree->tree_lock); + mutex_lock_nested(&fd.tree->tree_lock, + hfsplus_btree_lock_class(fd.tree)); break; } res = __hfsplus_ext_cache_extent(&fd, inode, alloc_cnt); @@ -606,7 +608,8 @@ void hfsplus_file_truncate(struct inode *inode) hfsplus_free_extents(sb, hip->cached_extents, alloc_cnt - start, alloc_cnt - blk_cnt); hfsplus_dump_extent(hip->cached_extents); - mutex_lock(&fd.tree->tree_lock); + mutex_lock_nested(&fd.tree->tree_lock, + hfsplus_btree_lock_class(fd.tree)); if (blk_cnt > start) { hip->extent_state |= HFSPLUS_EXT_DIRTY; break; diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 012a3d003fbe..9e78f181c24f 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -553,6 +553,27 @@ static inline __be32 __hfsp_ut2mt(time64_t ut) return cpu_to_be32(lower_32_bits(ut) + HFSPLUS_UTC_OFFSET); } +static inline enum hfsplus_btree_mutex_classes +hfsplus_btree_lock_class(struct hfs_btree *tree) +{ + enum hfsplus_btree_mutex_classes class; + + switch (tree->cnid) { + case HFSPLUS_CAT_CNID: + class = CATALOG_BTREE_MUTEX; + break; + case HFSPLUS_EXT_CNID: + class = EXTENTS_BTREE_MUTEX; + break; + case HFSPLUS_ATTR_CNID: + class = ATTR_BTREE_MUTEX; + break; + default: + BUG(); + } + return class; +} + /* compatibility */ #define hfsp_mt2ut(t) (struct timespec64){ .tv_sec = __hfsp_mt2ut(t) } #define hfsp_ut2mt(t) __hfsp_ut2mt((t).tv_sec) diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c index 5661a2e24d03..40d04dba13ac 100644 --- a/fs/hfsplus/ioctl.c +++ b/fs/hfsplus/ioctl.c @@ -40,7 +40,7 @@ static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags) /* Directory containing the bootable system */ vh->finder_info[0] = bvh->finder_info[0] = - cpu_to_be32(parent_ino(dentry)); + cpu_to_be32(d_parent_ino(dentry)); /* * Bootloader. Just using the inode here breaks in the case of @@ -51,7 +51,7 @@ static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags) /* Per spec, the OS X system folder - same as finder_info[0] here */ vh->finder_info[5] = bvh->finder_info[5] = - cpu_to_be32(parent_ino(dentry)); + cpu_to_be32(d_parent_ino(dentry)); mutex_unlock(&sbi->vh_mutex); return 0; diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c index 5a400259ae74..9a1a93e3888b 100644 --- a/fs/hfsplus/xattr.c +++ b/fs/hfsplus/xattr.c @@ -696,7 +696,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size) return err; } - strbuf = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + + strbuf = kzalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + XATTR_MAC_OSX_PREFIX_LEN + 1, GFP_KERNEL); if (!strbuf) { res = -ENOMEM; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index a73d27c4dd58..3eb747d26924 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -16,11 +16,16 @@ #include <linux/seq_file.h> #include <linux/writeback.h> #include <linux/mount.h> +#include <linux/fs_context.h> #include <linux/namei.h> #include "hostfs.h" #include <init.h> #include <kern.h> +struct hostfs_fs_info { + char *host_root_path; +}; + struct hostfs_inode_info { int fd; fmode_t mode; @@ -90,8 +95,10 @@ static char *__dentry_name(struct dentry *dentry, char *name) char *p = dentry_path_raw(dentry, name, PATH_MAX); char *root; size_t len; + struct hostfs_fs_info *fsi; - root = dentry->d_sb->s_fs_info; + fsi = dentry->d_sb->s_fs_info; + root = fsi->host_root_path; len = strlen(root); if (IS_ERR(p)) { __putname(name); @@ -196,8 +203,10 @@ static int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf) long long f_bavail; long long f_files; long long f_ffree; + struct hostfs_fs_info *fsi; - err = do_statfs(dentry->d_sb->s_fs_info, + fsi = dentry->d_sb->s_fs_info; + err = do_statfs(fsi->host_root_path, &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files, &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid), &sf->f_namelen); @@ -245,7 +254,11 @@ static void hostfs_free_inode(struct inode *inode) static int hostfs_show_options(struct seq_file *seq, struct dentry *root) { - const char *root_path = root->d_sb->s_fs_info; + struct hostfs_fs_info *fsi; + const char *root_path; + + fsi = root->d_sb->s_fs_info; + root_path = fsi->host_root_path; size_t offset = strlen(root_ino) + 1; if (strlen(root_path) > offset) @@ -432,31 +445,20 @@ static int hostfs_writepage(struct page *page, struct writeback_control *wbc) static int hostfs_read_folio(struct file *file, struct folio *folio) { - struct page *page = &folio->page; char *buffer; - loff_t start = page_offset(page); + loff_t start = folio_pos(folio); int bytes_read, ret = 0; - buffer = kmap_local_page(page); + buffer = kmap_local_folio(folio, 0); bytes_read = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer, PAGE_SIZE); - if (bytes_read < 0) { - ClearPageUptodate(page); - SetPageError(page); + if (bytes_read < 0) ret = bytes_read; - goto out; - } - - memset(buffer + bytes_read, 0, PAGE_SIZE - bytes_read); - - ClearPageError(page); - SetPageUptodate(page); - - out: - flush_dcache_page(page); + else + buffer = folio_zero_tail(folio, bytes_read, buffer); kunmap_local(buffer); - unlock_page(page); + folio_end_read(folio, ret == 0); return ret; } @@ -922,10 +924,11 @@ static const struct inode_operations hostfs_link_iops = { .get_link = hostfs_get_link, }; -static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) +static int hostfs_fill_super(struct super_block *sb, struct fs_context *fc) { + struct hostfs_fs_info *fsi = sb->s_fs_info; + const char *host_root = fc->source; struct inode *root_inode; - char *host_root_path, *req_root = d; int err; sb->s_blocksize = 1024; @@ -939,15 +942,15 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) return err; /* NULL is printed as '(null)' by printf(): avoid that. */ - if (req_root == NULL) - req_root = ""; + if (fc->source == NULL) + host_root = ""; - sb->s_fs_info = host_root_path = - kasprintf(GFP_KERNEL, "%s/%s", root_ino, req_root); - if (host_root_path == NULL) + fsi->host_root_path = + kasprintf(GFP_KERNEL, "%s/%s", root_ino, host_root); + if (fsi->host_root_path == NULL) return -ENOMEM; - root_inode = hostfs_iget(sb, host_root_path); + root_inode = hostfs_iget(sb, fsi->host_root_path); if (IS_ERR(root_inode)) return PTR_ERR(root_inode); @@ -955,7 +958,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) char *name; iput(root_inode); - name = follow_link(host_root_path); + name = follow_link(fsi->host_root_path); if (IS_ERR(name)) return PTR_ERR(name); @@ -972,11 +975,38 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) return 0; } -static struct dentry *hostfs_read_sb(struct file_system_type *type, - int flags, const char *dev_name, - void *data) +static int hostfs_fc_get_tree(struct fs_context *fc) +{ + return get_tree_nodev(fc, hostfs_fill_super); +} + +static void hostfs_fc_free(struct fs_context *fc) { - return mount_nodev(type, flags, data, hostfs_fill_sb_common); + struct hostfs_fs_info *fsi = fc->s_fs_info; + + if (!fsi) + return; + + kfree(fsi->host_root_path); + kfree(fsi); +} + +static const struct fs_context_operations hostfs_context_ops = { + .get_tree = hostfs_fc_get_tree, + .free = hostfs_fc_free, +}; + +static int hostfs_init_fs_context(struct fs_context *fc) +{ + struct hostfs_fs_info *fsi; + + fsi = kzalloc(sizeof(*fsi), GFP_KERNEL); + if (!fsi) + return -ENOMEM; + + fc->s_fs_info = fsi; + fc->ops = &hostfs_context_ops; + return 0; } static void hostfs_kill_sb(struct super_block *s) @@ -986,11 +1016,11 @@ static void hostfs_kill_sb(struct super_block *s) } static struct file_system_type hostfs_type = { - .owner = THIS_MODULE, - .name = "hostfs", - .mount = hostfs_read_sb, - .kill_sb = hostfs_kill_sb, - .fs_flags = 0, + .owner = THIS_MODULE, + .name = "hostfs", + .init_fs_context = hostfs_init_fs_context, + .kill_sb = hostfs_kill_sb, + .fs_flags = 0, }; MODULE_ALIAS_FS("hostfs"); diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 9184b4584b01..d0edf9ed33b6 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -472,9 +472,8 @@ out: static int hpfs_symlink_read_folio(struct file *file, struct folio *folio) { - struct page *page = &folio->page; - char *link = page_address(page); - struct inode *i = page->mapping->host; + char *link = folio_address(folio); + struct inode *i = folio->mapping->host; struct fnode *fnode; struct buffer_head *bh; int err; @@ -485,17 +484,9 @@ static int hpfs_symlink_read_folio(struct file *file, struct folio *folio) goto fail; err = hpfs_read_ea(i->i_sb, fnode, "SYMLINK", link, PAGE_SIZE); brelse(bh); - if (err) - goto fail; - hpfs_unlock(i->i_sb); - SetPageUptodate(page); - unlock_page(page); - return 0; - fail: hpfs_unlock(i->i_sb); - SetPageError(page); - unlock_page(page); + folio_end_read(folio, err == 0); return err; } diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 314834a078e9..e73717daa5f9 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -793,4 +793,5 @@ static void __exit exit_hpfs_fs(void) module_init(init_hpfs_fs) module_exit(exit_hpfs_fs) +MODULE_DESCRIPTION("OS/2 HPFS file system support"); MODULE_LICENSE("GPL"); diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 412f295acebe..81dab95f67ed 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -73,13 +73,13 @@ enum hugetlb_param { }; static const struct fs_parameter_spec hugetlb_fs_parameters[] = { - fsparam_u32 ("gid", Opt_gid), + fsparam_gid ("gid", Opt_gid), fsparam_string("min_size", Opt_min_size), fsparam_u32oct("mode", Opt_mode), fsparam_string("nr_inodes", Opt_nr_inodes), fsparam_string("pagesize", Opt_pagesize), fsparam_string("size", Opt_size), - fsparam_u32 ("uid", Opt_uid), + fsparam_uid ("uid", Opt_uid), {} }; @@ -1376,15 +1376,11 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par switch (opt) { case Opt_uid: - ctx->uid = make_kuid(current_user_ns(), result.uint_32); - if (!uid_valid(ctx->uid)) - goto bad_val; + ctx->uid = result.uid; return 0; case Opt_gid: - ctx->gid = make_kgid(current_user_ns(), result.uint_32); - if (!gid_valid(ctx->gid)) - goto bad_val; + ctx->gid = result.gid; return 0; case Opt_mode: diff --git a/fs/inode.c b/fs/inode.c index 3a41f83a4ba5..f356fe2ec2b6 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -162,6 +162,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_sb = sb; inode->i_blkbits = sb->s_blocksize_bits; inode->i_flags = 0; + inode->i_state = 0; atomic64_set(&inode->i_sequence, 0); atomic_set(&inode->i_count, 1); inode->i_op = &empty_iops; @@ -231,6 +232,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) if (unlikely(security_inode_alloc(inode))) return -ENOMEM; + this_cpu_inc(nr_inodes); return 0; @@ -886,36 +888,45 @@ long prune_icache_sb(struct super_block *sb, struct shrink_control *sc) return freed; } -static void __wait_on_freeing_inode(struct inode *inode); +static void __wait_on_freeing_inode(struct inode *inode, bool locked); /* * Called with the inode lock held. */ static struct inode *find_inode(struct super_block *sb, struct hlist_head *head, int (*test)(struct inode *, void *), - void *data) + void *data, bool locked) { struct inode *inode = NULL; + if (locked) + lockdep_assert_held(&inode_hash_lock); + else + lockdep_assert_not_held(&inode_hash_lock); + + rcu_read_lock(); repeat: - hlist_for_each_entry(inode, head, i_hash) { + hlist_for_each_entry_rcu(inode, head, i_hash) { if (inode->i_sb != sb) continue; if (!test(inode, data)) continue; spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE)) { - __wait_on_freeing_inode(inode); + __wait_on_freeing_inode(inode, locked); goto repeat; } if (unlikely(inode->i_state & I_CREATING)) { spin_unlock(&inode->i_lock); + rcu_read_unlock(); return ERR_PTR(-ESTALE); } __iget(inode); spin_unlock(&inode->i_lock); + rcu_read_unlock(); return inode; } + rcu_read_unlock(); return NULL; } @@ -924,29 +935,39 @@ repeat: * iget_locked for details. */ static struct inode *find_inode_fast(struct super_block *sb, - struct hlist_head *head, unsigned long ino) + struct hlist_head *head, unsigned long ino, + bool locked) { struct inode *inode = NULL; + if (locked) + lockdep_assert_held(&inode_hash_lock); + else + lockdep_assert_not_held(&inode_hash_lock); + + rcu_read_lock(); repeat: - hlist_for_each_entry(inode, head, i_hash) { + hlist_for_each_entry_rcu(inode, head, i_hash) { if (inode->i_ino != ino) continue; if (inode->i_sb != sb) continue; spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE)) { - __wait_on_freeing_inode(inode); + __wait_on_freeing_inode(inode, locked); goto repeat; } if (unlikely(inode->i_state & I_CREATING)) { spin_unlock(&inode->i_lock); + rcu_read_unlock(); return ERR_PTR(-ESTALE); } __iget(inode); spin_unlock(&inode->i_lock); + rcu_read_unlock(); return inode; } + rcu_read_unlock(); return NULL; } @@ -1004,14 +1025,7 @@ EXPORT_SYMBOL(get_next_ino); */ struct inode *new_inode_pseudo(struct super_block *sb) { - struct inode *inode = alloc_inode(sb); - - if (inode) { - spin_lock(&inode->i_lock); - inode->i_state = 0; - spin_unlock(&inode->i_lock); - } - return inode; + return alloc_inode(sb); } /** @@ -1161,7 +1175,7 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval, again: spin_lock(&inode_hash_lock); - old = find_inode(inode->i_sb, head, test, data); + old = find_inode(inode->i_sb, head, test, data, true); if (unlikely(old)) { /* * Uhhuh, somebody else created the same inode under us. @@ -1235,7 +1249,6 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, struct inode *new = alloc_inode(sb); if (new) { - new->i_state = 0; inode = inode_insert5(new, hashval, test, set, data); if (unlikely(inode != new)) destroy_inode(new); @@ -1246,6 +1259,47 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, EXPORT_SYMBOL(iget5_locked); /** + * iget5_locked_rcu - obtain an inode from a mounted file system + * @sb: super block of file system + * @hashval: hash value (usually inode number) to get + * @test: callback used for comparisons between inodes + * @set: callback used to initialize a new struct inode + * @data: opaque data pointer to pass to @test and @set + * + * This is equivalent to iget5_locked, except the @test callback must + * tolerate the inode not being stable, including being mid-teardown. + */ +struct inode *iget5_locked_rcu(struct super_block *sb, unsigned long hashval, + int (*test)(struct inode *, void *), + int (*set)(struct inode *, void *), void *data) +{ + struct hlist_head *head = inode_hashtable + hash(sb, hashval); + struct inode *inode, *new; + +again: + inode = find_inode(sb, head, test, data, false); + if (inode) { + if (IS_ERR(inode)) + return NULL; + wait_on_inode(inode); + if (unlikely(inode_unhashed(inode))) { + iput(inode); + goto again; + } + return inode; + } + + new = alloc_inode(sb); + if (new) { + inode = inode_insert5(new, hashval, test, set, data); + if (unlikely(inode != new)) + destroy_inode(new); + } + return inode; +} +EXPORT_SYMBOL_GPL(iget5_locked_rcu); + +/** * iget_locked - obtain an inode from a mounted file system * @sb: super block of file system * @ino: inode number to get @@ -1263,9 +1317,7 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino) struct hlist_head *head = inode_hashtable + hash(sb, ino); struct inode *inode; again: - spin_lock(&inode_hash_lock); - inode = find_inode_fast(sb, head, ino); - spin_unlock(&inode_hash_lock); + inode = find_inode_fast(sb, head, ino, false); if (inode) { if (IS_ERR(inode)) return NULL; @@ -1283,7 +1335,7 @@ again: spin_lock(&inode_hash_lock); /* We released the lock, so.. */ - old = find_inode_fast(sb, head, ino); + old = find_inode_fast(sb, head, ino, true); if (!old) { inode->i_ino = ino; spin_lock(&inode->i_lock); @@ -1419,7 +1471,7 @@ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, struct inode *inode; spin_lock(&inode_hash_lock); - inode = find_inode(sb, head, test, data); + inode = find_inode(sb, head, test, data, true); spin_unlock(&inode_hash_lock); return IS_ERR(inode) ? NULL : inode; @@ -1474,7 +1526,7 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino) struct inode *inode; again: spin_lock(&inode_hash_lock); - inode = find_inode_fast(sb, head, ino); + inode = find_inode_fast(sb, head, ino, true); spin_unlock(&inode_hash_lock); if (inode) { @@ -2235,17 +2287,21 @@ EXPORT_SYMBOL(inode_needs_sync); * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list * will DTRT. */ -static void __wait_on_freeing_inode(struct inode *inode) +static void __wait_on_freeing_inode(struct inode *inode, bool locked) { wait_queue_head_t *wq; DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); wq = bit_waitqueue(&inode->i_state, __I_NEW); prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); spin_unlock(&inode->i_lock); - spin_unlock(&inode_hash_lock); + rcu_read_unlock(); + if (locked) + spin_unlock(&inode_hash_lock); schedule(); finish_wait(wq, &wait.wq_entry); - spin_lock(&inode_hash_lock); + if (locked) + spin_lock(&inode_hash_lock); + rcu_read_lock(); } static __initdata unsigned long ihash_entries; @@ -2538,6 +2594,7 @@ bool in_group_or_capable(struct mnt_idmap *idmap, return true; return false; } +EXPORT_SYMBOL(in_group_or_capable); /** * mode_strip_sgid - handle the sgid bit for non-directories diff --git a/fs/internal.h b/fs/internal.h index ab2225136f60..cdd73209eecb 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -17,6 +17,7 @@ struct fs_context; struct pipe_inode_info; struct iov_iter; struct mnt_idmap; +struct ns_common; /* * block/bdev.c @@ -239,6 +240,7 @@ extern void mnt_pin_kill(struct mount *m); * fs/nsfs.c */ extern const struct dentry_operations ns_dentry_operations; +int open_namespace(struct ns_common *ns); /* * fs/stat.c: @@ -247,6 +249,8 @@ extern const struct dentry_operations ns_dentry_operations; int getname_statx_lookup_flags(int flags); int do_statx(int dfd, struct filename *filename, unsigned int flags, unsigned int mask, struct statx __user *buffer); +int do_statx_fd(int fd, unsigned int flags, unsigned int mask, + struct statx __user *buffer); /* * fs/splice.c: @@ -321,3 +325,15 @@ struct stashed_operations { int path_from_stashed(struct dentry **stashed, struct vfsmount *mnt, void *data, struct path *path); void stashed_dentry_prune(struct dentry *dentry); +/** + * path_mounted - check whether path is mounted + * @path: path to check + * + * Determine whether @path refers to the root of a mount. + * + * Return: true if @path is the root of a mount, false if not. + */ +static inline bool path_mounted(const struct path *path) +{ + return path->mnt->mnt_root == path->dentry; +} diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index d46558990279..f420c53d86ac 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -307,8 +307,6 @@ static void iomap_finish_folio_read(struct folio *folio, size_t off, spin_unlock_irqrestore(&ifs->state_lock, flags); } - if (error) - folio_set_error(folio); if (finished) folio_end_read(folio, uptodate); } @@ -444,6 +442,24 @@ done: return pos - orig_pos + plen; } +static loff_t iomap_read_folio_iter(const struct iomap_iter *iter, + struct iomap_readpage_ctx *ctx) +{ + struct folio *folio = ctx->cur_folio; + size_t offset = offset_in_folio(folio, iter->pos); + loff_t length = min_t(loff_t, folio_size(folio) - offset, + iomap_length(iter)); + loff_t done, ret; + + for (done = 0; done < length; done += ret) { + ret = iomap_readpage_iter(iter, ctx, done); + if (ret <= 0) + return ret; + } + + return done; +} + int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops) { struct iomap_iter iter = { @@ -459,10 +475,7 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops) trace_iomap_readpage(iter.inode, 1); while ((ret = iomap_iter(&iter, ops)) > 0) - iter.processed = iomap_readpage_iter(&iter, &ctx, 0); - - if (ret < 0) - folio_set_error(folio); + iter.processed = iomap_read_folio_iter(&iter, &ctx); if (ctx.bio) { submit_bio(ctx.bio); @@ -698,7 +711,6 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos, if (folio_test_uptodate(folio)) return 0; - folio_clear_error(folio); do { iomap_adjust_read_range(iter->inode, folio, &block_start, @@ -878,37 +890,22 @@ static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len, size_t copied, struct folio *folio) { const struct iomap *srcmap = iomap_iter_srcmap(iter); - loff_t old_size = iter->inode->i_size; - size_t written; if (srcmap->type == IOMAP_INLINE) { iomap_write_end_inline(iter, folio, pos, copied); - written = copied; - } else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) { - written = block_write_end(NULL, iter->inode->i_mapping, pos, - len, copied, &folio->page, NULL); - WARN_ON_ONCE(written != copied && written != 0); - } else { - written = __iomap_write_end(iter->inode, pos, len, copied, - folio) ? copied : 0; + return true; } - /* - * Update the in-memory inode size after copying the data into the page - * cache. It's up to the file system to write the updated size to disk, - * preferably after I/O completion so that no stale data is exposed. - * Only once that's done can we unlock and release the folio. - */ - if (pos + written > old_size) { - i_size_write(iter->inode, pos + written); - iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; - } - __iomap_put_folio(iter, pos, written, folio); + if (srcmap->flags & IOMAP_F_BUFFER_HEAD) { + size_t bh_written; - if (old_size < pos) - pagecache_isize_extended(iter->inode, old_size, pos); + bh_written = block_write_end(NULL, iter->inode->i_mapping, pos, + len, copied, &folio->page, NULL); + WARN_ON_ONCE(bh_written != copied && bh_written != 0); + return bh_written == copied; + } - return written == copied; + return __iomap_write_end(iter->inode, pos, len, copied, folio); } static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) @@ -923,6 +920,7 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) do { struct folio *folio; + loff_t old_size; size_t offset; /* Offset into folio */ size_t bytes; /* Bytes to write to folio */ size_t copied; /* Bytes copied from user */ @@ -974,6 +972,23 @@ retry: written = iomap_write_end(iter, pos, bytes, copied, folio) ? copied : 0; + /* + * Update the in-memory inode size after copying the data into + * the page cache. It's up to the file system to write the + * updated size to disk, preferably after I/O completion so that + * no stale data is exposed. Only once that's done can we + * unlock and release the folio. + */ + old_size = iter->inode->i_size; + if (pos + written > old_size) { + i_size_write(iter->inode, pos + written); + iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; + } + __iomap_put_folio(iter, pos, written, folio); + + if (old_size < pos) + pagecache_isize_extended(iter->inode, old_size, pos); + cond_resched(); if (unlikely(written == 0)) { /* @@ -1344,6 +1359,7 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter) bytes = folio_size(folio) - offset; ret = iomap_write_end(iter, pos, bytes, bytes, folio); + __iomap_put_folio(iter, pos, bytes, folio); if (WARN_ON_ONCE(!ret)) return -EIO; @@ -1409,6 +1425,7 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) folio_mark_accessed(folio); ret = iomap_write_end(iter, pos, bytes, bytes, folio); + __iomap_put_folio(iter, pos, bytes, folio); if (WARN_ON_ONCE(!ret)) return -EIO; @@ -1539,8 +1556,6 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error) /* walk all folios in bio, ending page IO on them */ bio_for_each_folio_all(fi, bio) { - if (error) - folio_set_error(fi.folio); iomap_finish_folio_write(inode, fi.folio, fi.length); folio_count++; } diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 93b1077a380a..ed548efdd9bb 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -326,8 +326,8 @@ static const struct fs_parameter_spec isofs_param_spec[] = { fsparam_u32 ("session", Opt_session), fsparam_u32 ("sbsector", Opt_sb), fsparam_enum ("check", Opt_check, isofs_param_check), - fsparam_u32 ("uid", Opt_uid), - fsparam_u32 ("gid", Opt_gid), + fsparam_uid ("uid", Opt_uid), + fsparam_gid ("gid", Opt_gid), /* Note: mode/dmode historically accepted %u not strictly %o */ fsparam_u32 ("mode", Opt_mode), fsparam_u32 ("dmode", Opt_dmode), @@ -344,8 +344,6 @@ static int isofs_parse_param(struct fs_context *fc, struct isofs_options *popt = fc->fs_private; struct fs_parse_result result; int opt; - kuid_t uid; - kgid_t gid; unsigned int n; /* There are no remountable options */ @@ -409,17 +407,11 @@ static int isofs_parse_param(struct fs_context *fc, case Opt_ignore: break; case Opt_uid: - uid = make_kuid(current_user_ns(), result.uint_32); - if (!uid_valid(uid)) - return -EINVAL; - popt->uid = uid; + popt->uid = result.uid; popt->uid_set = 1; break; case Opt_gid: - gid = make_kgid(current_user_ns(), result.uint_32); - if (!gid_valid(gid)) - return -EINVAL; - popt->gid = gid; + popt->gid = result.gid; popt->gid_set = 1; break; case Opt_mode: diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c index d6c17ad69dee..dbf911126e61 100644 --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c @@ -688,11 +688,10 @@ int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode, */ static int rock_ridge_symlink_read_folio(struct file *file, struct folio *folio) { - struct page *page = &folio->page; - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct iso_inode_info *ei = ISOFS_I(inode); struct isofs_sb_info *sbi = ISOFS_SB(inode->i_sb); - char *link = page_address(page); + char *link = folio_address(folio); unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); struct buffer_head *bh; char *rpnt = link; @@ -779,9 +778,10 @@ repeat: goto fail; brelse(bh); *rpnt = '\0'; - SetPageUptodate(page); - unlock_page(page); - return 0; + ret = 0; +end: + folio_end_read(folio, ret == 0); + return ret; /* error exit from macro */ out: @@ -795,9 +795,8 @@ out_bad_span: fail: brelse(bh); error: - SetPageError(page); - unlock_page(page); - return -EIO; + ret = -EIO; + goto end; } const struct address_space_operations isofs_symlink_aops = { diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index 62ea76da7fdf..e12cb145147e 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -95,13 +95,8 @@ static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg) ret = jffs2_read_inode_range(c, f, pg_buf, pg->index << PAGE_SHIFT, PAGE_SIZE); - if (ret) { - ClearPageUptodate(pg); - SetPageError(pg); - } else { + if (!ret) SetPageUptodate(pg); - ClearPageError(pg); - } flush_dcache_page(pg); kunmap(pg); @@ -304,10 +299,8 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping, kunmap(pg); - if (ret) { - /* There was an error writing. */ - SetPageError(pg); - } + if (ret) + mapping_set_error(mapping, ret); /* Adjust writtenlen for the padding we did, so we don't confuse our caller */ writtenlen -= min(writtenlen, (start - aligned_start)); @@ -330,7 +323,6 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping, it gets reread */ jffs2_dbg(1, "%s(): Not all bytes written. Marking page !uptodate\n", __func__); - SetPageError(pg); ClearPageUptodate(pg); } diff --git a/fs/libfs.c b/fs/libfs.c index b635ee5adbcc..8aa34870449f 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1854,6 +1854,80 @@ static const struct dentry_operations generic_ci_dentry_ops = { .d_revalidate = fscrypt_d_revalidate, #endif }; + +/** + * generic_ci_match() - Match a name (case-insensitively) with a dirent. + * This is a filesystem helper for comparison with directory entries. + * generic_ci_d_compare should be used in VFS' ->d_compare instead. + * + * @parent: Inode of the parent of the dirent under comparison + * @name: name under lookup. + * @folded_name: Optional pre-folded name under lookup + * @de_name: Dirent name. + * @de_name_len: dirent name length. + * + * Test whether a case-insensitive directory entry matches the filename + * being searched. If @folded_name is provided, it is used instead of + * recalculating the casefold of @name. + * + * Return: > 0 if the directory entry matches, 0 if it doesn't match, or + * < 0 on error. + */ +int generic_ci_match(const struct inode *parent, + const struct qstr *name, + const struct qstr *folded_name, + const u8 *de_name, u32 de_name_len) +{ + const struct super_block *sb = parent->i_sb; + const struct unicode_map *um = sb->s_encoding; + struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len); + struct qstr dirent = QSTR_INIT(de_name, de_name_len); + int res = 0; + + if (IS_ENCRYPTED(parent)) { + const struct fscrypt_str encrypted_name = + FSTR_INIT((u8 *) de_name, de_name_len); + + if (WARN_ON_ONCE(!fscrypt_has_encryption_key(parent))) + return -EINVAL; + + decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL); + if (!decrypted_name.name) + return -ENOMEM; + res = fscrypt_fname_disk_to_usr(parent, 0, 0, &encrypted_name, + &decrypted_name); + if (res < 0) { + kfree(decrypted_name.name); + return res; + } + dirent.name = decrypted_name.name; + dirent.len = decrypted_name.len; + } + + /* + * Attempt a case-sensitive match first. It is cheaper and + * should cover most lookups, including all the sane + * applications that expect a case-sensitive filesystem. + */ + + if (dirent.len == name->len && + !memcmp(name->name, dirent.name, dirent.len)) + goto out; + + if (folded_name->name) + res = utf8_strncasecmp_folded(um, folded_name, &dirent); + else + res = utf8_strncasecmp(um, name, &dirent); + +out: + kfree(decrypted_name.name); + if (res < 0 && sb_has_strict_encoding(sb)) { + pr_err_ratelimited("Directory contains filename that is invalid UTF-8"); + return 0; + } + return !res; +} +EXPORT_SYMBOL(generic_ci_match); #endif #ifdef CONFIG_FS_ENCRYPTION diff --git a/fs/locks.c b/fs/locks.c index 90c8746874de..bdd94c32256f 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1367,9 +1367,9 @@ retry: locks_wake_up_blocks(&left->c); } out: + trace_posix_lock_inode(inode, request, error); spin_unlock(&ctx->flc_lock); percpu_up_read(&file_rwsem); - trace_posix_lock_inode(inode, request, error); /* * Free any unused locks. */ @@ -2448,8 +2448,9 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, error = do_lock_file_wait(filp, cmd, file_lock); /* - * Attempt to detect a close/fcntl race and recover by releasing the - * lock that was just acquired. There is no need to do that when we're + * Detect close/fcntl races and recover by zapping all POSIX locks + * associated with this file and our files_struct, just like on + * filp_flush(). There is no need to do that when we're * unlocking though, or for OFD locks. */ if (!error && file_lock->c.flc_type != F_UNLCK && @@ -2464,9 +2465,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, f = files_lookup_fd_locked(files, fd); spin_unlock(&files->file_lock); if (f != filp) { - file_lock->c.flc_type = F_UNLCK; - error = do_lock_file_wait(filp, cmd, file_lock); - WARN_ON_ONCE(error); + locks_remove_posix(filp, files); error = -EBADF; } } diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 7f9a2d8aa420..1c3df63162ef 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -730,5 +730,6 @@ static void __exit exit_minix_fs(void) module_init(init_minix_fs) module_exit(exit_minix_fs) +MODULE_DESCRIPTION("Minix file system"); MODULE_LICENSE("GPL"); diff --git a/fs/minix/namei.c b/fs/minix/namei.c index d6031acc34f0..a944a0f17b53 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -213,8 +213,7 @@ static int minix_rename(struct mnt_idmap *idmap, if (!new_de) goto out_dir; err = minix_set_link(new_de, new_page, old_inode); - kunmap(new_page); - put_page(new_page); + unmap_and_put_page(new_page, new_de); if (err) goto out_dir; inode_set_ctime_current(new_inode); diff --git a/fs/mount.h b/fs/mount.h index 4a42fc68f4cc..ad4b1ddebb54 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -16,6 +16,8 @@ struct mnt_namespace { u64 event; unsigned int nr_mounts; /* # of mounts in the namespace */ unsigned int pending_mounts; + struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */ + refcount_t passive; /* number references not pinning @mounts */ } __randomize_layout; struct mnt_pcp { @@ -152,3 +154,4 @@ static inline void move_from_ns(struct mount *mnt, struct list_head *dt_list) } extern void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor); +bool has_locked_children(struct mount *mnt, struct dentry *dentry); diff --git a/fs/mpage.c b/fs/mpage.c index fa8b99a199fa..b5b5ddf9d513 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -48,13 +48,8 @@ static void mpage_read_end_io(struct bio *bio) struct folio_iter fi; int err = blk_status_to_errno(bio->bi_status); - bio_for_each_folio_all(fi, bio) { - if (err) - folio_set_error(fi.folio); - else - folio_mark_uptodate(fi.folio); - folio_unlock(fi.folio); - } + bio_for_each_folio_all(fi, bio) + folio_end_read(fi.folio, err == 0); bio_put(bio); } @@ -65,10 +60,8 @@ static void mpage_write_end_io(struct bio *bio) int err = blk_status_to_errno(bio->bi_status); bio_for_each_folio_all(fi, bio) { - if (err) { - folio_set_error(fi.folio); + if (err) mapping_set_error(fi.folio->mapping, err); - } folio_end_writeback(fi.folio); } diff --git a/fs/namei.c b/fs/namei.c index 37fb0a8aa09a..3a4c40e12f78 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -126,7 +126,7 @@ #define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname)) struct filename * -getname_flags(const char __user *filename, int flags, int *empty) +getname_flags(const char __user *filename, int flags) { struct filename *result; char *kname; @@ -148,9 +148,20 @@ getname_flags(const char __user *filename, int flags, int *empty) result->name = kname; len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX); - if (unlikely(len < 0)) { - __putname(result); - return ERR_PTR(len); + /* + * Handle both empty path and copy failure in one go. + */ + if (unlikely(len <= 0)) { + if (unlikely(len < 0)) { + __putname(result); + return ERR_PTR(len); + } + + /* The empty path is special. */ + if (!(flags & LOOKUP_EMPTY)) { + __putname(result); + return ERR_PTR(-ENOENT); + } } /* @@ -180,6 +191,12 @@ getname_flags(const char __user *filename, int flags, int *empty) kfree(result); return ERR_PTR(len); } + /* The empty path is special. */ + if (unlikely(!len) && !(flags & LOOKUP_EMPTY)) { + __putname(kname); + kfree(result); + return ERR_PTR(-ENOENT); + } if (unlikely(len == PATH_MAX)) { __putname(kname); kfree(result); @@ -188,16 +205,6 @@ getname_flags(const char __user *filename, int flags, int *empty) } atomic_set(&result->refcnt, 1); - /* The empty path is special. */ - if (unlikely(!len)) { - if (empty) - *empty = 1; - if (!(flags & LOOKUP_EMPTY)) { - putname(result); - return ERR_PTR(-ENOENT); - } - } - result->uptr = filename; result->aname = NULL; audit_getname(result); @@ -209,13 +216,13 @@ getname_uflags(const char __user *filename, int uflags) { int flags = (uflags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0; - return getname_flags(filename, flags, NULL); + return getname_flags(filename, flags); } struct filename * getname(const char __user * filename) { - return getname_flags(filename, 0, NULL); + return getname_flags(filename, 0); } struct filename * @@ -1233,29 +1240,48 @@ int may_linkat(struct mnt_idmap *idmap, const struct path *link) * * Returns 0 if the open is allowed, -ve on error. */ -static int may_create_in_sticky(struct mnt_idmap *idmap, - struct nameidata *nd, struct inode *const inode) +static int may_create_in_sticky(struct mnt_idmap *idmap, struct nameidata *nd, + struct inode *const inode) { umode_t dir_mode = nd->dir_mode; - vfsuid_t dir_vfsuid = nd->dir_vfsuid; + vfsuid_t dir_vfsuid = nd->dir_vfsuid, i_vfsuid; - if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) || - (!sysctl_protected_regular && S_ISREG(inode->i_mode)) || - likely(!(dir_mode & S_ISVTX)) || - vfsuid_eq(i_uid_into_vfsuid(idmap, inode), dir_vfsuid) || - vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), current_fsuid())) + if (likely(!(dir_mode & S_ISVTX))) return 0; - if (likely(dir_mode & 0002) || - (dir_mode & 0020 && - ((sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) || - (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode))))) { - const char *operation = S_ISFIFO(inode->i_mode) ? - "sticky_create_fifo" : - "sticky_create_regular"; - audit_log_path_denied(AUDIT_ANOM_CREAT, operation); + if (S_ISREG(inode->i_mode) && !sysctl_protected_regular) + return 0; + + if (S_ISFIFO(inode->i_mode) && !sysctl_protected_fifos) + return 0; + + i_vfsuid = i_uid_into_vfsuid(idmap, inode); + + if (vfsuid_eq(i_vfsuid, dir_vfsuid)) + return 0; + + if (vfsuid_eq_kuid(i_vfsuid, current_fsuid())) + return 0; + + if (likely(dir_mode & 0002)) { + audit_log_path_denied(AUDIT_ANOM_CREAT, "sticky_create"); return -EACCES; } + + if (dir_mode & 0020) { + if (sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) { + audit_log_path_denied(AUDIT_ANOM_CREAT, + "sticky_create_fifo"); + return -EACCES; + } + + if (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode)) { + audit_log_path_denied(AUDIT_ANOM_CREAT, + "sticky_create_regular"); + return -EACCES; + } + } + return 0; } @@ -1712,17 +1738,26 @@ static struct dentry *lookup_slow(const struct qstr *name, } static inline int may_lookup(struct mnt_idmap *idmap, - struct nameidata *nd) + struct nameidata *restrict nd) { - if (nd->flags & LOOKUP_RCU) { - int err = inode_permission(idmap, nd->inode, MAY_EXEC|MAY_NOT_BLOCK); - if (!err) // success, keep going - return 0; - if (!try_to_unlazy(nd)) - return -ECHILD; // redo it all non-lazy - if (err != -ECHILD) // hard error - return err; - } + int err, mask; + + mask = nd->flags & LOOKUP_RCU ? MAY_NOT_BLOCK : 0; + err = inode_permission(idmap, nd->inode, mask | MAY_EXEC); + if (likely(!err)) + return 0; + + // If we failed, and we weren't in LOOKUP_RCU, it's final + if (!(nd->flags & LOOKUP_RCU)) + return err; + + // Drop out of RCU mode to make sure it wasn't transient + if (!try_to_unlazy(nd)) + return -ECHILD; // redo it all non-lazy + + if (err != -ECHILD) // hard error + return err; + return inode_permission(idmap, nd->inode, MAY_EXEC); } @@ -2163,21 +2198,39 @@ EXPORT_SYMBOL(hashlen_string); /* * Calculate the length and hash of the path component, and - * return the "hash_len" as the result. + * return the length as the result. */ -static inline u64 hash_name(const void *salt, const char *name) +static inline const char *hash_name(struct nameidata *nd, + const char *name, + unsigned long *lastword) { - unsigned long a = 0, b, x = 0, y = (unsigned long)salt; + unsigned long a, b, x, y = (unsigned long)nd->path.dentry; unsigned long adata, bdata, mask, len; const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; - len = 0; - goto inside; + /* + * The first iteration is special, because it can result in + * '.' and '..' and has no mixing other than the final fold. + */ + a = load_unaligned_zeropad(name); + b = a ^ REPEAT_BYTE('/'); + if (has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants)) { + adata = prep_zero_mask(a, adata, &constants); + bdata = prep_zero_mask(b, bdata, &constants); + mask = create_zero_mask(adata | bdata); + a &= zero_bytemask(mask); + *lastword = a; + len = find_zero(mask); + nd->last.hash = fold_hash(a, y); + nd->last.len = len; + return name + len; + } + len = 0; + x = 0; do { HASH_MIX(x, y, a); len += sizeof(unsigned long); -inside: a = load_unaligned_zeropad(name+len); b = a ^ REPEAT_BYTE('/'); } while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants))); @@ -2185,11 +2238,25 @@ inside: adata = prep_zero_mask(a, adata, &constants); bdata = prep_zero_mask(b, bdata, &constants); mask = create_zero_mask(adata | bdata); - x ^= a & zero_bytemask(mask); + a &= zero_bytemask(mask); + x ^= a; + len += find_zero(mask); + *lastword = 0; // Multi-word components cannot be DOT or DOTDOT - return hashlen_create(fold_hash(x, y), len + find_zero(mask)); + nd->last.hash = fold_hash(x, y); + nd->last.len = len; + return name + len; } +/* + * Note that the 'last' word is always zero-masked, but + * was loaded as a possibly big-endian word. + */ +#ifdef __BIG_ENDIAN + #define LAST_WORD_IS_DOT (0x2eul << (BITS_PER_LONG-8)) + #define LAST_WORD_IS_DOTDOT (0x2e2eul << (BITS_PER_LONG-16)) +#endif + #else /* !CONFIG_DCACHE_WORD_ACCESS: Slow, byte-at-a-time version */ /* Return the hash of a string of known length */ @@ -2222,22 +2289,35 @@ EXPORT_SYMBOL(hashlen_string); * We know there's a real path component here of at least * one character. */ -static inline u64 hash_name(const void *salt, const char *name) +static inline const char *hash_name(struct nameidata *nd, const char *name, unsigned long *lastword) { - unsigned long hash = init_name_hash(salt); - unsigned long len = 0, c; + unsigned long hash = init_name_hash(nd->path.dentry); + unsigned long len = 0, c, last = 0; c = (unsigned char)*name; do { + last = (last << 8) + c; len++; hash = partial_name_hash(c, hash); c = (unsigned char)name[len]; } while (c && c != '/'); - return hashlen_create(end_name_hash(hash), len); + + // This is reliable for DOT or DOTDOT, since the component + // cannot contain NUL characters - top bits being zero means + // we cannot have had any other pathnames. + *lastword = last; + nd->last.hash = end_name_hash(hash); + nd->last.len = len; + return name + len; } #endif +#ifndef LAST_WORD_IS_DOT + #define LAST_WORD_IS_DOT 0x2e + #define LAST_WORD_IS_DOTDOT 0x2e2e +#endif + /* * Name resolution. * This is the basic name resolution function, turning a pathname into @@ -2266,45 +2346,38 @@ static int link_path_walk(const char *name, struct nameidata *nd) for(;;) { struct mnt_idmap *idmap; const char *link; - u64 hash_len; - int type; + unsigned long lastword; idmap = mnt_idmap(nd->path.mnt); err = may_lookup(idmap, nd); if (err) return err; - hash_len = hash_name(nd->path.dentry, name); + nd->last.name = name; + name = hash_name(nd, name, &lastword); - type = LAST_NORM; - if (name[0] == '.') switch (hashlen_len(hash_len)) { - case 2: - if (name[1] == '.') { - type = LAST_DOTDOT; - nd->state |= ND_JUMPED; - } - break; - case 1: - type = LAST_DOT; - } - if (likely(type == LAST_NORM)) { - struct dentry *parent = nd->path.dentry; + switch(lastword) { + case LAST_WORD_IS_DOTDOT: + nd->last_type = LAST_DOTDOT; + nd->state |= ND_JUMPED; + break; + + case LAST_WORD_IS_DOT: + nd->last_type = LAST_DOT; + break; + + default: + nd->last_type = LAST_NORM; nd->state &= ~ND_JUMPED; + + struct dentry *parent = nd->path.dentry; if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { - struct qstr this = { { .hash_len = hash_len }, .name = name }; - err = parent->d_op->d_hash(parent, &this); + err = parent->d_op->d_hash(parent, &nd->last); if (err < 0) return err; - hash_len = this.hash_len; - name = this.name; } } - nd->last.hash_len = hash_len; - nd->last.name = name; - nd->last_type = type; - - name += hashlen_len(hash_len); if (!*name) goto OK; /* @@ -2922,16 +2995,16 @@ int path_pts(struct path *path) } #endif -int user_path_at_empty(int dfd, const char __user *name, unsigned flags, - struct path *path, int *empty) +int user_path_at(int dfd, const char __user *name, unsigned flags, + struct path *path) { - struct filename *filename = getname_flags(name, flags, empty); + struct filename *filename = getname_flags(name, flags); int ret = filename_lookup(dfd, filename, flags, path, NULL); putname(filename); return ret; } -EXPORT_SYMBOL(user_path_at_empty); +EXPORT_SYMBOL(user_path_at); int __check_sticky(struct mnt_idmap *idmap, struct inode *dir, struct inode *inode) @@ -3572,8 +3645,12 @@ static const char *open_last_lookups(struct nameidata *nd, else inode_lock_shared(dir->d_inode); dentry = lookup_open(nd, file, op, got_write); - if (!IS_ERR(dentry) && (file->f_mode & FMODE_CREATED)) - fsnotify_create(dir->d_inode, dentry); + if (!IS_ERR(dentry)) { + if (file->f_mode & FMODE_CREATED) + fsnotify_create(dir->d_inode, dentry); + if (file->f_mode & FMODE_OPENED) + fsnotify_open(file); + } if (open_flag & O_CREAT) inode_unlock(dir->d_inode); else @@ -3700,6 +3777,8 @@ int vfs_tmpfile(struct mnt_idmap *idmap, mode = vfs_prepare_mode(idmap, dir, mode, mode, mode); error = dir->i_op->tmpfile(idmap, dir, file, mode); dput(child); + if (file->f_mode & FMODE_OPENED) + fsnotify_open(file); if (error) return error; /* Don't check for other permissions, the inode was just created */ diff --git a/fs/namespace.c b/fs/namespace.c index 5a51315c6678..221db9de4729 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -70,7 +70,8 @@ static DEFINE_IDA(mnt_id_ida); static DEFINE_IDA(mnt_group_ida); /* Don't allow confusion with old 32bit mount ID */ -static atomic64_t mnt_id_ctr = ATOMIC64_INIT(1ULL << 32); +#define MNT_UNIQUE_ID_OFFSET (1ULL << 32) +static atomic64_t mnt_id_ctr = ATOMIC64_INIT(MNT_UNIQUE_ID_OFFSET); static struct hlist_head *mount_hashtable __ro_after_init; static struct hlist_head *mountpoint_hashtable __ro_after_init; @@ -78,6 +79,8 @@ static struct kmem_cache *mnt_cache __ro_after_init; static DECLARE_RWSEM(namespace_sem); static HLIST_HEAD(unmounted); /* protected by namespace_sem */ static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */ +static DEFINE_RWLOCK(mnt_ns_tree_lock); +static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */ struct mount_kattr { unsigned int attr_set; @@ -103,6 +106,109 @@ EXPORT_SYMBOL_GPL(fs_kobj); */ __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock); +static int mnt_ns_cmp(u64 seq, const struct mnt_namespace *ns) +{ + u64 seq_b = ns->seq; + + if (seq < seq_b) + return -1; + if (seq > seq_b) + return 1; + return 0; +} + +static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node) +{ + if (!node) + return NULL; + return rb_entry(node, struct mnt_namespace, mnt_ns_tree_node); +} + +static bool mnt_ns_less(struct rb_node *a, const struct rb_node *b) +{ + struct mnt_namespace *ns_a = node_to_mnt_ns(a); + struct mnt_namespace *ns_b = node_to_mnt_ns(b); + u64 seq_a = ns_a->seq; + + return mnt_ns_cmp(seq_a, ns_b) < 0; +} + +static void mnt_ns_tree_add(struct mnt_namespace *ns) +{ + guard(write_lock)(&mnt_ns_tree_lock); + rb_add(&ns->mnt_ns_tree_node, &mnt_ns_tree, mnt_ns_less); +} + +static void mnt_ns_release(struct mnt_namespace *ns) +{ + lockdep_assert_not_held(&mnt_ns_tree_lock); + + /* keep alive for {list,stat}mount() */ + if (refcount_dec_and_test(&ns->passive)) { + put_user_ns(ns->user_ns); + kfree(ns); + } +} +DEFINE_FREE(mnt_ns_release, struct mnt_namespace *, if (_T) mnt_ns_release(_T)) + +static void mnt_ns_tree_remove(struct mnt_namespace *ns) +{ + /* remove from global mount namespace list */ + if (!is_anon_ns(ns)) { + guard(write_lock)(&mnt_ns_tree_lock); + rb_erase(&ns->mnt_ns_tree_node, &mnt_ns_tree); + } + + mnt_ns_release(ns); +} + +/* + * Returns the mount namespace which either has the specified id, or has the + * next smallest id afer the specified one. + */ +static struct mnt_namespace *mnt_ns_find_id_at(u64 mnt_ns_id) +{ + struct rb_node *node = mnt_ns_tree.rb_node; + struct mnt_namespace *ret = NULL; + + lockdep_assert_held(&mnt_ns_tree_lock); + + while (node) { + struct mnt_namespace *n = node_to_mnt_ns(node); + + if (mnt_ns_id <= n->seq) { + ret = node_to_mnt_ns(node); + if (mnt_ns_id == n->seq) + break; + node = node->rb_left; + } else { + node = node->rb_right; + } + } + return ret; +} + +/* + * Lookup a mount namespace by id and take a passive reference count. Taking a + * passive reference means the mount namespace can be emptied if e.g., the last + * task holding an active reference exits. To access the mounts of the + * namespace the @namespace_sem must first be acquired. If the namespace has + * already shut down before acquiring @namespace_sem, {list,stat}mount() will + * see that the mount rbtree of the namespace is empty. + */ +static struct mnt_namespace *lookup_mnt_ns(u64 mnt_ns_id) +{ + struct mnt_namespace *ns; + + guard(read_lock)(&mnt_ns_tree_lock); + ns = mnt_ns_find_id_at(mnt_ns_id); + if (!ns || ns->seq != mnt_ns_id) + return NULL; + + refcount_inc(&ns->passive); + return ns; +} + static inline void lock_mount_hash(void) { write_seqlock(&mount_lock); @@ -1448,6 +1554,30 @@ static struct mount *mnt_find_id_at(struct mnt_namespace *ns, u64 mnt_id) return ret; } +/* + * Returns the mount which either has the specified mnt_id, or has the next + * greater id before the specified one. + */ +static struct mount *mnt_find_id_at_reverse(struct mnt_namespace *ns, u64 mnt_id) +{ + struct rb_node *node = ns->mounts.rb_node; + struct mount *ret = NULL; + + while (node) { + struct mount *m = node_to_mount(node); + + if (mnt_id >= m->mnt_id_unique) { + ret = node_to_mount(node); + if (mnt_id == m->mnt_id_unique) + break; + node = node->rb_right; + } else { + node = node->rb_left; + } + } + return ret; +} + #ifdef CONFIG_PROC_FS /* iterator; we want it to have access to namespace_sem, thus here... */ @@ -1846,19 +1976,6 @@ bool may_mount(void) return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN); } -/** - * path_mounted - check whether path is mounted - * @path: path to check - * - * Determine whether @path refers to the root of a mount. - * - * Return: true if @path is the root of a mount, false if not. - */ -static inline bool path_mounted(const struct path *path) -{ - return path->mnt->mnt_root == path->dentry; -} - static void warn_mandlock(void) { pr_warn_once("=======================================================\n" @@ -1966,69 +2083,72 @@ static bool mnt_ns_loop(struct dentry *dentry) return current->nsproxy->mnt_ns->seq >= mnt_ns->seq; } -struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, +struct mount *copy_tree(struct mount *src_root, struct dentry *dentry, int flag) { - struct mount *res, *p, *q, *r, *parent; + struct mount *res, *src_parent, *src_root_child, *src_mnt, + *dst_parent, *dst_mnt; - if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt)) + if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(src_root)) return ERR_PTR(-EINVAL); if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry)) return ERR_PTR(-EINVAL); - res = q = clone_mnt(mnt, dentry, flag); - if (IS_ERR(q)) - return q; + res = dst_mnt = clone_mnt(src_root, dentry, flag); + if (IS_ERR(dst_mnt)) + return dst_mnt; - q->mnt_mountpoint = mnt->mnt_mountpoint; + src_parent = src_root; + dst_mnt->mnt_mountpoint = src_root->mnt_mountpoint; - p = mnt; - list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) { - struct mount *s; - if (!is_subdir(r->mnt_mountpoint, dentry)) + list_for_each_entry(src_root_child, &src_root->mnt_mounts, mnt_child) { + if (!is_subdir(src_root_child->mnt_mountpoint, dentry)) continue; - for (s = r; s; s = next_mnt(s, r)) { + for (src_mnt = src_root_child; src_mnt; + src_mnt = next_mnt(src_mnt, src_root_child)) { if (!(flag & CL_COPY_UNBINDABLE) && - IS_MNT_UNBINDABLE(s)) { - if (s->mnt.mnt_flags & MNT_LOCKED) { + IS_MNT_UNBINDABLE(src_mnt)) { + if (src_mnt->mnt.mnt_flags & MNT_LOCKED) { /* Both unbindable and locked. */ - q = ERR_PTR(-EPERM); + dst_mnt = ERR_PTR(-EPERM); goto out; } else { - s = skip_mnt_tree(s); + src_mnt = skip_mnt_tree(src_mnt); continue; } } if (!(flag & CL_COPY_MNT_NS_FILE) && - is_mnt_ns_file(s->mnt.mnt_root)) { - s = skip_mnt_tree(s); + is_mnt_ns_file(src_mnt->mnt.mnt_root)) { + src_mnt = skip_mnt_tree(src_mnt); continue; } - while (p != s->mnt_parent) { - p = p->mnt_parent; - q = q->mnt_parent; + while (src_parent != src_mnt->mnt_parent) { + src_parent = src_parent->mnt_parent; + dst_mnt = dst_mnt->mnt_parent; } - p = s; - parent = q; - q = clone_mnt(p, p->mnt.mnt_root, flag); - if (IS_ERR(q)) + + src_parent = src_mnt; + dst_parent = dst_mnt; + dst_mnt = clone_mnt(src_mnt, src_mnt->mnt.mnt_root, flag); + if (IS_ERR(dst_mnt)) goto out; lock_mount_hash(); - list_add_tail(&q->mnt_list, &res->mnt_list); - attach_mnt(q, parent, p->mnt_mp, false); + list_add_tail(&dst_mnt->mnt_list, &res->mnt_list); + attach_mnt(dst_mnt, dst_parent, src_parent->mnt_mp, false); unlock_mount_hash(); } } return res; + out: if (res) { lock_mount_hash(); umount_tree(res, UMOUNT_SYNC); unlock_mount_hash(); } - return q; + return dst_mnt; } /* Caller should check returned pointer for errors */ @@ -2078,7 +2198,7 @@ void drop_collected_mounts(struct vfsmount *mnt) namespace_unlock(); } -static bool has_locked_children(struct mount *mnt, struct dentry *dentry) +bool has_locked_children(struct mount *mnt, struct dentry *dentry) { struct mount *child; @@ -3709,8 +3829,7 @@ static void free_mnt_ns(struct mnt_namespace *ns) if (!is_anon_ns(ns)) ns_free_inum(&ns->ns); dec_mnt_namespaces(ns->ucounts); - put_user_ns(ns->user_ns); - kfree(ns); + mnt_ns_tree_remove(ns); } /* @@ -3749,7 +3868,9 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a if (!anon) new_ns->seq = atomic64_add_return(1, &mnt_ns_seq); refcount_set(&new_ns->ns.count, 1); + refcount_set(&new_ns->passive, 1); new_ns->mounts = RB_ROOT; + RB_CLEAR_NODE(&new_ns->mnt_ns_tree_node); init_waitqueue_head(&new_ns->poll); new_ns->user_ns = get_user_ns(user_ns); new_ns->ucounts = ucounts; @@ -3826,6 +3947,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, while (p->mnt.mnt_root != q->mnt.mnt_root) p = next_mnt(skip_mnt_tree(p), old); } + mnt_ns_tree_add(new_ns); namespace_unlock(); if (rootmnt) @@ -4843,6 +4965,40 @@ static int statmount_fs_type(struct kstatmount *s, struct seq_file *seq) return 0; } +static void statmount_mnt_ns_id(struct kstatmount *s, struct mnt_namespace *ns) +{ + s->sm.mask |= STATMOUNT_MNT_NS_ID; + s->sm.mnt_ns_id = ns->seq; +} + +static int statmount_mnt_opts(struct kstatmount *s, struct seq_file *seq) +{ + struct vfsmount *mnt = s->mnt; + struct super_block *sb = mnt->mnt_sb; + int err; + + if (sb->s_op->show_options) { + size_t start = seq->count; + + err = sb->s_op->show_options(seq, mnt->mnt_root); + if (err) + return err; + + if (unlikely(seq_has_overflowed(seq))) + return -EAGAIN; + + if (seq->count == start) + return 0; + + /* skip leading comma */ + memmove(seq->buf + start, seq->buf + start + 1, + seq->count - start - 1); + seq->count--; + } + + return 0; +} + static int statmount_string(struct kstatmount *s, u64 flag) { int ret; @@ -4863,6 +5019,10 @@ static int statmount_string(struct kstatmount *s, u64 flag) sm->mnt_point = seq->count; ret = statmount_mnt_point(s, seq); break; + case STATMOUNT_MNT_OPTS: + sm->mnt_opts = seq->count; + ret = statmount_mnt_opts(s, seq); + break; default: WARN_ON_ONCE(true); return -EINVAL; @@ -4903,23 +5063,84 @@ static int copy_statmount_to_user(struct kstatmount *s) return 0; } -static int do_statmount(struct kstatmount *s) +static struct mount *listmnt_next(struct mount *curr, bool reverse) { - struct mount *m = real_mount(s->mnt); + struct rb_node *node; + + if (reverse) + node = rb_prev(&curr->mnt_node); + else + node = rb_next(&curr->mnt_node); + + return node_to_mount(node); +} + +static int grab_requested_root(struct mnt_namespace *ns, struct path *root) +{ + struct mount *first, *child; + + rwsem_assert_held(&namespace_sem); + + /* We're looking at our own ns, just use get_fs_root. */ + if (ns == current->nsproxy->mnt_ns) { + get_fs_root(current->fs, root); + return 0; + } + + /* + * We have to find the first mount in our ns and use that, however it + * may not exist, so handle that properly. + */ + if (RB_EMPTY_ROOT(&ns->mounts)) + return -ENOENT; + + first = child = ns->root; + for (;;) { + child = listmnt_next(child, false); + if (!child) + return -ENOENT; + if (child->mnt_parent == first) + break; + } + + root->mnt = mntget(&child->mnt); + root->dentry = dget(root->mnt->mnt_root); + return 0; +} + +static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id, + struct mnt_namespace *ns) +{ + struct path root __free(path_put) = {}; + struct mount *m; int err; + /* Has the namespace already been emptied? */ + if (mnt_ns_id && RB_EMPTY_ROOT(&ns->mounts)) + return -ENOENT; + + s->mnt = lookup_mnt_in_ns(mnt_id, ns); + if (!s->mnt) + return -ENOENT; + + err = grab_requested_root(ns, &root); + if (err) + return err; + /* * Don't trigger audit denials. We just want to determine what * mounts to show users. */ - if (!is_path_reachable(m, m->mnt.mnt_root, &s->root) && - !ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN)) + m = real_mount(s->mnt); + if (!is_path_reachable(m, m->mnt.mnt_root, &root) && + !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN)) return -EPERM; err = security_sb_statfs(s->mnt->mnt_root); if (err) return err; + s->root = root; if (s->mask & STATMOUNT_SB_BASIC) statmount_sb_basic(s); @@ -4938,6 +5159,12 @@ static int do_statmount(struct kstatmount *s) if (!err && s->mask & STATMOUNT_MNT_POINT) err = statmount_string(s, STATMOUNT_MNT_POINT); + if (!err && s->mask & STATMOUNT_MNT_OPTS) + err = statmount_string(s, STATMOUNT_MNT_OPTS); + + if (!err && s->mask & STATMOUNT_MNT_NS_ID) + statmount_mnt_ns_id(s, ns); + if (err) return err; @@ -4955,6 +5182,9 @@ static inline bool retry_statmount(const long ret, size_t *seq_size) return true; } +#define STATMOUNT_STRING_REQ (STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT | \ + STATMOUNT_FS_TYPE | STATMOUNT_MNT_OPTS) + static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq, struct statmount __user *buf, size_t bufsize, size_t seq_size) @@ -4966,10 +5196,18 @@ static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq, ks->mask = kreq->param; ks->buf = buf; ks->bufsize = bufsize; - ks->seq.size = seq_size; - ks->seq.buf = kvmalloc(seq_size, GFP_KERNEL_ACCOUNT); - if (!ks->seq.buf) - return -ENOMEM; + + if (ks->mask & STATMOUNT_STRING_REQ) { + if (bufsize == sizeof(ks->sm)) + return -EOVERFLOW; + + ks->seq.buf = kvmalloc(seq_size, GFP_KERNEL_ACCOUNT); + if (!ks->seq.buf) + return -ENOMEM; + + ks->seq.size = seq_size; + } + return 0; } @@ -4979,7 +5217,7 @@ static int copy_mnt_id_req(const struct mnt_id_req __user *req, int ret; size_t usize; - BUILD_BUG_ON(sizeof(struct mnt_id_req) != MNT_ID_REQ_SIZE_VER0); + BUILD_BUG_ON(sizeof(struct mnt_id_req) != MNT_ID_REQ_SIZE_VER1); ret = get_user(usize, &req->size); if (ret) @@ -4994,16 +5232,32 @@ static int copy_mnt_id_req(const struct mnt_id_req __user *req, return ret; if (kreq->spare != 0) return -EINVAL; + /* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */ + if (kreq->mnt_id <= MNT_UNIQUE_ID_OFFSET) + return -EINVAL; return 0; } +/* + * If the user requested a specific mount namespace id, look that up and return + * that, or if not simply grab a passive reference on our mount namespace and + * return that. + */ +static struct mnt_namespace *grab_requested_mnt_ns(u64 mnt_ns_id) +{ + if (mnt_ns_id) + return lookup_mnt_ns(mnt_ns_id); + refcount_inc(¤t->nsproxy->mnt_ns->passive); + return current->nsproxy->mnt_ns; +} + SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req, struct statmount __user *, buf, size_t, bufsize, unsigned int, flags) { - struct vfsmount *mnt; + struct mnt_namespace *ns __free(mnt_ns_release) = NULL; + struct kstatmount *ks __free(kfree) = NULL; struct mnt_id_req kreq; - struct kstatmount ks; /* We currently support retrieval of 3 strings. */ size_t seq_size = 3 * PATH_MAX; int ret; @@ -5015,64 +5269,88 @@ SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req, if (ret) return ret; + ns = grab_requested_mnt_ns(kreq.mnt_ns_id); + if (!ns) + return -ENOENT; + + if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) && + !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN)) + return -ENOENT; + + ks = kmalloc(sizeof(*ks), GFP_KERNEL_ACCOUNT); + if (!ks) + return -ENOMEM; + retry: - ret = prepare_kstatmount(&ks, &kreq, buf, bufsize, seq_size); + ret = prepare_kstatmount(ks, &kreq, buf, bufsize, seq_size); if (ret) return ret; - down_read(&namespace_sem); - mnt = lookup_mnt_in_ns(kreq.mnt_id, current->nsproxy->mnt_ns); - if (!mnt) { - up_read(&namespace_sem); - kvfree(ks.seq.buf); - return -ENOENT; - } - - ks.mnt = mnt; - get_fs_root(current->fs, &ks.root); - ret = do_statmount(&ks); - path_put(&ks.root); - up_read(&namespace_sem); + scoped_guard(rwsem_read, &namespace_sem) + ret = do_statmount(ks, kreq.mnt_id, kreq.mnt_ns_id, ns); if (!ret) - ret = copy_statmount_to_user(&ks); - kvfree(ks.seq.buf); + ret = copy_statmount_to_user(ks); + kvfree(ks->seq.buf); if (retry_statmount(ret, &seq_size)) goto retry; return ret; } -static struct mount *listmnt_next(struct mount *curr) +static ssize_t do_listmount(struct mnt_namespace *ns, u64 mnt_parent_id, + u64 last_mnt_id, u64 *mnt_ids, size_t nr_mnt_ids, + bool reverse) { - return node_to_mount(rb_next(&curr->mnt_node)); -} - -static ssize_t do_listmount(struct mount *first, struct path *orig, - u64 mnt_parent_id, u64 __user *mnt_ids, - size_t nr_mnt_ids, const struct path *root) -{ - struct mount *r; + struct path root __free(path_put) = {}; + struct path orig; + struct mount *r, *first; ssize_t ret; + rwsem_assert_held(&namespace_sem); + + ret = grab_requested_root(ns, &root); + if (ret) + return ret; + + if (mnt_parent_id == LSMT_ROOT) { + orig = root; + } else { + orig.mnt = lookup_mnt_in_ns(mnt_parent_id, ns); + if (!orig.mnt) + return -ENOENT; + orig.dentry = orig.mnt->mnt_root; + } + /* * Don't trigger audit denials. We just want to determine what * mounts to show users. */ - if (!is_path_reachable(real_mount(orig->mnt), orig->dentry, root) && - !ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN)) + if (!is_path_reachable(real_mount(orig.mnt), orig.dentry, &root) && + !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN)) return -EPERM; - ret = security_sb_statfs(orig->dentry); + ret = security_sb_statfs(orig.dentry); if (ret) return ret; - for (ret = 0, r = first; r && nr_mnt_ids; r = listmnt_next(r)) { + if (!last_mnt_id) { + if (reverse) + first = node_to_mount(rb_last(&ns->mounts)); + else + first = node_to_mount(rb_first(&ns->mounts)); + } else { + if (reverse) + first = mnt_find_id_at_reverse(ns, last_mnt_id - 1); + else + first = mnt_find_id_at(ns, last_mnt_id + 1); + } + + for (ret = 0, r = first; r && nr_mnt_ids; r = listmnt_next(r, reverse)) { if (r->mnt_id_unique == mnt_parent_id) continue; - if (!is_path_reachable(r, r->mnt.mnt_root, orig)) + if (!is_path_reachable(r, r->mnt.mnt_root, &orig)) continue; - if (put_user(r->mnt_id_unique, mnt_ids)) - return -EFAULT; + *mnt_ids = r->mnt_id_unique; mnt_ids++; nr_mnt_ids--; ret++; @@ -5080,22 +5358,26 @@ static ssize_t do_listmount(struct mount *first, struct path *orig, return ret; } -SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req, u64 __user *, - mnt_ids, size_t, nr_mnt_ids, unsigned int, flags) +SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req, + u64 __user *, mnt_ids, size_t, nr_mnt_ids, unsigned int, flags) { - struct mnt_namespace *ns = current->nsproxy->mnt_ns; + u64 *kmnt_ids __free(kvfree) = NULL; + const size_t maxcount = 1000000; + struct mnt_namespace *ns __free(mnt_ns_release) = NULL; struct mnt_id_req kreq; - struct mount *first; - struct path root, orig; - u64 mnt_parent_id, last_mnt_id; - const size_t maxcount = (size_t)-1 >> 3; + u64 last_mnt_id; ssize_t ret; - if (flags) + if (flags & ~LISTMOUNT_REVERSE) return -EINVAL; + /* + * If the mount namespace really has more than 1 million mounts the + * caller must iterate over the mount namespace (and reconsider their + * system design...). + */ if (unlikely(nr_mnt_ids > maxcount)) - return -EFAULT; + return -EOVERFLOW; if (!access_ok(mnt_ids, nr_mnt_ids * sizeof(*mnt_ids))) return -EFAULT; @@ -5103,33 +5385,37 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req, u64 __user *, ret = copy_mnt_id_req(req, &kreq); if (ret) return ret; - mnt_parent_id = kreq.mnt_id; + last_mnt_id = kreq.param; + /* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */ + if (last_mnt_id != 0 && last_mnt_id <= MNT_UNIQUE_ID_OFFSET) + return -EINVAL; - down_read(&namespace_sem); - get_fs_root(current->fs, &root); - if (mnt_parent_id == LSMT_ROOT) { - orig = root; - } else { - ret = -ENOENT; - orig.mnt = lookup_mnt_in_ns(mnt_parent_id, ns); - if (!orig.mnt) - goto err; - orig.dentry = orig.mnt->mnt_root; - } - if (!last_mnt_id) - first = node_to_mount(rb_first(&ns->mounts)); - else - first = mnt_find_id_at(ns, last_mnt_id + 1); + kmnt_ids = kvmalloc_array(nr_mnt_ids, sizeof(*kmnt_ids), + GFP_KERNEL_ACCOUNT); + if (!kmnt_ids) + return -ENOMEM; + + ns = grab_requested_mnt_ns(kreq.mnt_ns_id); + if (!ns) + return -ENOENT; + + if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) && + !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN)) + return -ENOENT; + + scoped_guard(rwsem_read, &namespace_sem) + ret = do_listmount(ns, kreq.mnt_id, last_mnt_id, kmnt_ids, + nr_mnt_ids, (flags & LISTMOUNT_REVERSE)); + if (ret <= 0) + return ret; + + if (copy_to_user(mnt_ids, kmnt_ids, ret * sizeof(*mnt_ids))) + return -EFAULT; - ret = do_listmount(first, &orig, mnt_parent_id, mnt_ids, nr_mnt_ids, &root); -err: - path_put(&root); - up_read(&namespace_sem); return ret; } - static void __init init_mount_tree(void) { struct vfsmount *mnt; @@ -5157,6 +5443,8 @@ static void __init init_mount_tree(void) set_fs_pwd(current->fs, &root); set_fs_root(current->fs, &root); + + mnt_ns_tree_add(ns); } void __init mnt_init(void) diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index a6bb03bea920..4c0401dbbfcf 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -117,7 +117,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { if (folio->index == rreq->no_unlock_folio && test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) - _debug("no unlock"); + kdebug("no unlock"); else folio_unlock(folio); } @@ -204,7 +204,7 @@ void netfs_readahead(struct readahead_control *ractl) struct netfs_inode *ctx = netfs_inode(ractl->mapping->host); int ret; - _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl)); + kenter("%lx,%x", readahead_index(ractl), readahead_count(ractl)); if (readahead_count(ractl) == 0) return; @@ -268,7 +268,7 @@ int netfs_read_folio(struct file *file, struct folio *folio) struct folio *sink = NULL; int ret; - _enter("%lx", folio->index); + kenter("%lx", folio->index); rreq = netfs_alloc_request(mapping, file, folio_file_pos(folio), folio_size(folio), @@ -508,7 +508,7 @@ retry: have_folio: *_folio = folio; - _leave(" = 0"); + kleave(" = 0"); return 0; error_put: @@ -518,7 +518,7 @@ error: folio_unlock(folio); folio_put(folio); } - _leave(" = %d", ret); + kleave(" = %d", ret); return ret; } EXPORT_SYMBOL(netfs_write_begin); @@ -536,7 +536,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio, size_t flen = folio_size(folio); int ret; - _enter("%zx @%llx", flen, start); + kenter("%zx @%llx", flen, start); ret = -ENOMEM; @@ -567,7 +567,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio, error_put: netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); error: - _leave(" = %d", ret); + kleave(" = %d", ret); return ret; } diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index 07bc1fd43530..ecbc99ec7d36 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -56,7 +56,7 @@ static enum netfs_how_to_modify netfs_how_to_modify(struct netfs_inode *ctx, struct netfs_group *group = netfs_folio_group(folio); loff_t pos = folio_file_pos(folio); - _enter(""); + kenter(""); if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE) return NETFS_FLUSH_CONTENT; @@ -272,12 +272,12 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, */ howto = netfs_how_to_modify(ctx, file, folio, netfs_group, flen, offset, part, maybe_trouble); - _debug("howto %u", howto); + kdebug("howto %u", howto); switch (howto) { case NETFS_JUST_PREFETCH: ret = netfs_prefetch_for_write(file, folio, offset, part); if (ret < 0) { - _debug("prefetch = %zd", ret); + kdebug("prefetch = %zd", ret); goto error_folio_unlock; } break; @@ -418,7 +418,7 @@ out: } iocb->ki_pos += written; - _leave(" = %zd [%zd]", written, ret); + kleave(" = %zd [%zd]", written, ret); return written ? written : ret; error_folio_unlock: @@ -491,7 +491,7 @@ ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct netfs_inode *ictx = netfs_inode(inode); ssize_t ret; - _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode)); + kenter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode)); if (!iov_iter_count(from)) return 0; @@ -523,17 +523,23 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr struct netfs_group *group; struct folio *folio = page_folio(vmf->page); struct file *file = vmf->vma->vm_file; + struct address_space *mapping = file->f_mapping; struct inode *inode = file_inode(file); struct netfs_inode *ictx = netfs_inode(inode); vm_fault_t ret = VM_FAULT_RETRY; int err; - _enter("%lx", folio->index); + kenter("%lx", folio->index); sb_start_pagefault(inode->i_sb); if (folio_lock_killable(folio) < 0) goto out; + if (folio->mapping != mapping) { + folio_unlock(folio); + ret = VM_FAULT_NOPAGE; + goto out; + } if (folio_wait_writeback_killable(folio)) { ret = VM_FAULT_LOCKED; @@ -549,9 +555,9 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr group = netfs_folio_group(folio); if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE) { folio_unlock(folio); - err = filemap_fdatawait_range(inode->i_mapping, - folio_pos(folio), - folio_pos(folio) + folio_size(folio)); + err = filemap_fdatawrite_range(mapping, + folio_pos(folio), + folio_pos(folio) + folio_size(folio)); switch (err) { case 0: ret = VM_FAULT_RETRY; diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c index 10a1e4da6bda..b6debac6205f 100644 --- a/fs/netfs/direct_read.c +++ b/fs/netfs/direct_read.c @@ -33,7 +33,7 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i size_t orig_count = iov_iter_count(iter); bool async = !is_sync_kiocb(iocb); - _enter(""); + kenter(""); if (!orig_count) return 0; /* Don't update atime */ diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c index e14cd53ac9fd..792ef17bae21 100644 --- a/fs/netfs/direct_write.c +++ b/fs/netfs/direct_write.c @@ -37,7 +37,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * size_t len = iov_iter_count(iter); bool async = !is_sync_kiocb(iocb); - _enter(""); + kenter(""); /* We're going to need a bounce buffer if what we transmit is going to * be different in some way to the source buffer, e.g. because it gets @@ -45,7 +45,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * */ // TODO - _debug("uw %llx-%llx", start, end); + kdebug("uw %llx-%llx", start, end); wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp, start, iocb->ki_flags & IOCB_DIRECT ? @@ -92,10 +92,11 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); if (async) wreq->iocb = iocb; + wreq->len = iov_iter_count(&wreq->io_iter); wreq->cleanup = netfs_cleanup_dio_write; - ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), iov_iter_count(&wreq->io_iter)); + ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len); if (ret < 0) { - _debug("begin = %zd", ret); + kdebug("begin = %zd", ret); goto out; } @@ -142,7 +143,7 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from) loff_t pos = iocb->ki_pos; unsigned long long end = pos + iov_iter_count(from) - 1; - _enter("%llx,%zx,%llx", pos, iov_iter_count(from), i_size_read(inode)); + kenter("%llx,%zx,%llx", pos, iov_iter_count(from), i_size_read(inode)); if (!iov_iter_count(from)) return 0; diff --git a/fs/netfs/fscache_cache.c b/fs/netfs/fscache_cache.c index 9397ed39b0b4..288a73c3072d 100644 --- a/fs/netfs/fscache_cache.c +++ b/fs/netfs/fscache_cache.c @@ -237,7 +237,7 @@ int fscache_add_cache(struct fscache_cache *cache, { int n_accesses; - _enter("{%s,%s}", ops->name, cache->name); + kenter("{%s,%s}", ops->name, cache->name); BUG_ON(fscache_cache_state(cache) != FSCACHE_CACHE_IS_PREPARING); @@ -257,7 +257,7 @@ int fscache_add_cache(struct fscache_cache *cache, up_write(&fscache_addremove_sem); pr_notice("Cache \"%s\" added (type %s)\n", cache->name, ops->name); - _leave(" = 0 [%s]", cache->name); + kleave(" = 0 [%s]", cache->name); return 0; } EXPORT_SYMBOL(fscache_add_cache); diff --git a/fs/netfs/fscache_cookie.c b/fs/netfs/fscache_cookie.c index bce2492186d0..4d1e8bf4c615 100644 --- a/fs/netfs/fscache_cookie.c +++ b/fs/netfs/fscache_cookie.c @@ -456,7 +456,7 @@ struct fscache_cookie *__fscache_acquire_cookie( { struct fscache_cookie *cookie; - _enter("V=%x", volume->debug_id); + kenter("V=%x", volume->debug_id); if (!index_key || !index_key_len || index_key_len > 255 || aux_data_len > 255) return NULL; @@ -484,7 +484,7 @@ struct fscache_cookie *__fscache_acquire_cookie( trace_fscache_acquire(cookie); fscache_stat(&fscache_n_acquires_ok); - _leave(" = c=%08x", cookie->debug_id); + kleave(" = c=%08x", cookie->debug_id); return cookie; } EXPORT_SYMBOL(__fscache_acquire_cookie); @@ -505,7 +505,7 @@ static void fscache_perform_lookup(struct fscache_cookie *cookie) enum fscache_access_trace trace = fscache_access_lookup_cookie_end_failed; bool need_withdraw = false; - _enter(""); + kenter(""); if (!cookie->volume->cache_priv) { fscache_create_volume(cookie->volume, true); @@ -519,7 +519,7 @@ static void fscache_perform_lookup(struct fscache_cookie *cookie) if (cookie->state != FSCACHE_COOKIE_STATE_FAILED) fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_QUIESCENT); need_withdraw = true; - _leave(" [fail]"); + kleave(" [fail]"); goto out; } @@ -572,7 +572,7 @@ void __fscache_use_cookie(struct fscache_cookie *cookie, bool will_modify) bool queue = false; int n_active; - _enter("c=%08x", cookie->debug_id); + kenter("c=%08x", cookie->debug_id); if (WARN(test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags), "Trying to use relinquished cookie\n")) @@ -636,7 +636,7 @@ again: spin_unlock(&cookie->lock); if (queue) fscache_queue_cookie(cookie, fscache_cookie_get_use_work); - _leave(""); + kleave(""); } EXPORT_SYMBOL(__fscache_use_cookie); @@ -702,7 +702,7 @@ static void fscache_cookie_state_machine(struct fscache_cookie *cookie) enum fscache_cookie_state state; bool wake = false; - _enter("c=%x", cookie->debug_id); + kenter("c=%x", cookie->debug_id); again: spin_lock(&cookie->lock); @@ -820,7 +820,7 @@ out: spin_unlock(&cookie->lock); if (wake) wake_up_cookie_state(cookie); - _leave(""); + kleave(""); } static void fscache_cookie_worker(struct work_struct *work) @@ -867,7 +867,7 @@ static void fscache_cookie_lru_do_one(struct fscache_cookie *cookie) set_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags); spin_unlock(&cookie->lock); fscache_stat(&fscache_n_cookies_lru_expired); - _debug("lru c=%x", cookie->debug_id); + kdebug("lru c=%x", cookie->debug_id); __fscache_withdraw_cookie(cookie); } @@ -971,7 +971,7 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire) if (retire) fscache_stat(&fscache_n_relinquishes_retire); - _enter("c=%08x{%d},%d", + kenter("c=%08x{%d},%d", cookie->debug_id, atomic_read(&cookie->n_active), retire); if (WARN(test_and_set_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags), @@ -1050,7 +1050,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie, { bool is_caching; - _enter("c=%x", cookie->debug_id); + kenter("c=%x", cookie->debug_id); fscache_stat(&fscache_n_invalidates); @@ -1072,7 +1072,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie, case FSCACHE_COOKIE_STATE_INVALIDATING: /* is_still_valid will catch it */ default: spin_unlock(&cookie->lock); - _leave(" [no %u]", cookie->state); + kleave(" [no %u]", cookie->state); return; case FSCACHE_COOKIE_STATE_LOOKING_UP: @@ -1081,7 +1081,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie, fallthrough; case FSCACHE_COOKIE_STATE_CREATING: spin_unlock(&cookie->lock); - _leave(" [look %x]", cookie->inval_counter); + kleave(" [look %x]", cookie->inval_counter); return; case FSCACHE_COOKIE_STATE_ACTIVE: @@ -1094,7 +1094,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie, if (is_caching) fscache_queue_cookie(cookie, fscache_cookie_get_inval_work); - _leave(" [inv]"); + kleave(" [inv]"); return; } } diff --git a/fs/netfs/fscache_io.c b/fs/netfs/fscache_io.c index 38637e5c9b57..bf4eaeec44fb 100644 --- a/fs/netfs/fscache_io.c +++ b/fs/netfs/fscache_io.c @@ -28,12 +28,12 @@ bool fscache_wait_for_operation(struct netfs_cache_resources *cres, again: if (!fscache_cache_is_live(cookie->volume->cache)) { - _leave(" [broken]"); + kleave(" [broken]"); return false; } state = fscache_cookie_state(cookie); - _enter("c=%08x{%u},%x", cookie->debug_id, state, want_state); + kenter("c=%08x{%u},%x", cookie->debug_id, state, want_state); switch (state) { case FSCACHE_COOKIE_STATE_CREATING: @@ -52,7 +52,7 @@ again: case FSCACHE_COOKIE_STATE_DROPPED: case FSCACHE_COOKIE_STATE_RELINQUISHING: default: - _leave(" [not live]"); + kleave(" [not live]"); return false; } @@ -92,7 +92,7 @@ again: spin_lock(&cookie->lock); state = fscache_cookie_state(cookie); - _enter("c=%08x{%u},%x", cookie->debug_id, state, want_state); + kenter("c=%08x{%u},%x", cookie->debug_id, state, want_state); switch (state) { case FSCACHE_COOKIE_STATE_LOOKING_UP: @@ -140,7 +140,7 @@ failed: cres->cache_priv = NULL; cres->ops = NULL; fscache_end_cookie_access(cookie, fscache_access_io_not_live); - _leave(" = -ENOBUFS"); + kleave(" = -ENOBUFS"); return -ENOBUFS; } @@ -224,7 +224,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie, if (len == 0) goto abandon; - _enter("%llx,%zx", start, len); + kenter("%llx,%zx", start, len); wreq = kzalloc(sizeof(struct fscache_write_request), GFP_NOFS); if (!wreq) diff --git a/fs/netfs/fscache_main.c b/fs/netfs/fscache_main.c index 42e98bb523e3..bf9b33d26e31 100644 --- a/fs/netfs/fscache_main.c +++ b/fs/netfs/fscache_main.c @@ -99,7 +99,7 @@ error_wq: */ void __exit fscache_exit(void) { - _enter(""); + kenter(""); kmem_cache_destroy(fscache_cookie_jar); fscache_proc_cleanup(); diff --git a/fs/netfs/fscache_volume.c b/fs/netfs/fscache_volume.c index cdf991bdd9de..2e2a405ca9b0 100644 --- a/fs/netfs/fscache_volume.c +++ b/fs/netfs/fscache_volume.c @@ -27,6 +27,19 @@ struct fscache_volume *fscache_get_volume(struct fscache_volume *volume, return volume; } +struct fscache_volume *fscache_try_get_volume(struct fscache_volume *volume, + enum fscache_volume_trace where) +{ + int ref; + + if (!__refcount_inc_not_zero(&volume->ref, &ref)) + return NULL; + + trace_fscache_volume(volume->debug_id, ref + 1, where); + return volume; +} +EXPORT_SYMBOL(fscache_try_get_volume); + static void fscache_see_volume(struct fscache_volume *volume, enum fscache_volume_trace where) { @@ -251,7 +264,7 @@ static struct fscache_volume *fscache_alloc_volume(const char *volume_key, fscache_see_volume(volume, fscache_volume_new_acquire); fscache_stat(&fscache_n_volumes); up_write(&fscache_addremove_sem); - _leave(" = v=%x", volume->debug_id); + kleave(" = v=%x", volume->debug_id); return volume; err_vol: @@ -420,6 +433,7 @@ void fscache_put_volume(struct fscache_volume *volume, fscache_free_volume(volume); } } +EXPORT_SYMBOL(fscache_put_volume); /* * Relinquish a volume representation cookie. @@ -452,7 +466,7 @@ void fscache_withdraw_volume(struct fscache_volume *volume) { int n_accesses; - _debug("withdraw V=%x", volume->debug_id); + kdebug("withdraw V=%x", volume->debug_id); /* Allow wakeups on dec-to-0 */ n_accesses = atomic_dec_return(&volume->n_accesses); diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index 95e281a8af78..21e46bc9aa49 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -34,7 +34,6 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync); /* * main.c */ -extern unsigned int netfs_debug; extern struct list_head netfs_io_requests; extern spinlock_t netfs_proc_lock; extern mempool_t netfs_request_pool; @@ -63,15 +62,6 @@ static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq) {} /* * misc.c */ -#define NETFS_FLAG_PUT_MARK BIT(0) -#define NETFS_FLAG_PAGECACHE_MARK BIT(1) -int netfs_xa_store_and_mark(struct xarray *xa, unsigned long index, - struct folio *folio, unsigned int flags, - gfp_t gfp_mask); -int netfs_add_folios_to_buffer(struct xarray *buffer, - struct address_space *mapping, - pgoff_t index, pgoff_t to, gfp_t gfp_mask); -void netfs_clear_buffer(struct xarray *buffer); /* * objects.c @@ -353,8 +343,6 @@ extern const struct seq_operations fscache_volumes_seq_ops; struct fscache_volume *fscache_get_volume(struct fscache_volume *volume, enum fscache_volume_trace where); -void fscache_put_volume(struct fscache_volume *volume, - enum fscache_volume_trace where); bool fscache_begin_volume_access(struct fscache_volume *volume, struct fscache_cookie *cookie, enum fscache_access_trace why); @@ -365,42 +353,12 @@ void fscache_create_volume(struct fscache_volume *volume, bool wait); * debug tracing */ #define dbgprintk(FMT, ...) \ - printk("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) + pr_debug("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) #define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) #define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) #define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) -#ifdef __KDEBUG -#define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__) -#define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__) -#define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__) - -#elif defined(CONFIG_NETFS_DEBUG) -#define _enter(FMT, ...) \ -do { \ - if (netfs_debug) \ - kenter(FMT, ##__VA_ARGS__); \ -} while (0) - -#define _leave(FMT, ...) \ -do { \ - if (netfs_debug) \ - kleave(FMT, ##__VA_ARGS__); \ -} while (0) - -#define _debug(FMT, ...) \ -do { \ - if (netfs_debug) \ - kdebug(FMT, ##__VA_ARGS__); \ -} while (0) - -#else -#define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__) -#define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__) -#define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__) -#endif - /* * assertions */ diff --git a/fs/netfs/io.c b/fs/netfs/io.c index c93851b98368..c7576481c321 100644 --- a/fs/netfs/io.c +++ b/fs/netfs/io.c @@ -130,7 +130,7 @@ static void netfs_reset_subreq_iter(struct netfs_io_request *rreq, if (count == remaining) return; - _debug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n", + kdebug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n", rreq->debug_id, subreq->debug_index, iov_iter_count(&subreq->io_iter), subreq->transferred, subreq->len, rreq->i_size, @@ -326,7 +326,7 @@ void netfs_subreq_terminated(struct netfs_io_subrequest *subreq, struct netfs_io_request *rreq = subreq->rreq; int u; - _enter("R=%x[%x]{%llx,%lx},%zd", + kenter("R=%x[%x]{%llx,%lx},%zd", rreq->debug_id, subreq->debug_index, subreq->start, subreq->flags, transferred_or_error); @@ -435,7 +435,7 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq, struct netfs_inode *ictx = netfs_inode(rreq->inode); size_t lsize; - _enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size); + kenter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size); if (rreq->origin != NETFS_DIO_READ) { source = netfs_cache_prepare_read(subreq, rreq->i_size); @@ -518,7 +518,7 @@ static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq, subreq->start = rreq->start + rreq->submitted; subreq->len = io_iter->count; - _debug("slice %llx,%zx,%llx", subreq->start, subreq->len, rreq->submitted); + kdebug("slice %llx,%zx,%llx", subreq->start, subreq->len, rreq->submitted); list_add_tail(&subreq->rreq_link, &rreq->subrequests); /* Call out to the cache to find out what it can do with the remaining @@ -570,7 +570,7 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync) struct iov_iter io_iter; int ret; - _enter("R=%x %llx-%llx", + kenter("R=%x %llx-%llx", rreq->debug_id, rreq->start, rreq->start + rreq->len - 1); if (rreq->len == 0) { @@ -593,7 +593,7 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync) atomic_set(&rreq->nr_outstanding, 1); io_iter = rreq->io_iter; do { - _debug("submit %llx + %llx >= %llx", + kdebug("submit %llx + %llx >= %llx", rreq->start, rreq->submitted, rreq->i_size); if (rreq->origin == NETFS_DIO_READ && rreq->start + rreq->submitted >= rreq->i_size) diff --git a/fs/netfs/main.c b/fs/netfs/main.c index 5f0f438e5d21..db824c372842 100644 --- a/fs/netfs/main.c +++ b/fs/netfs/main.c @@ -20,10 +20,6 @@ MODULE_LICENSE("GPL"); EXPORT_TRACEPOINT_SYMBOL(netfs_sreq); -unsigned netfs_debug; -module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask"); - static struct kmem_cache *netfs_request_slab; static struct kmem_cache *netfs_subrequest_slab; mempool_t netfs_request_pool; diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index bc1fc54fb724..172808e83ca8 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -8,87 +8,6 @@ #include <linux/swap.h> #include "internal.h" -/* - * Attach a folio to the buffer and maybe set marks on it to say that we need - * to put the folio later and twiddle the pagecache flags. - */ -int netfs_xa_store_and_mark(struct xarray *xa, unsigned long index, - struct folio *folio, unsigned int flags, - gfp_t gfp_mask) -{ - XA_STATE_ORDER(xas, xa, index, folio_order(folio)); - -retry: - xas_lock(&xas); - for (;;) { - xas_store(&xas, folio); - if (!xas_error(&xas)) - break; - xas_unlock(&xas); - if (!xas_nomem(&xas, gfp_mask)) - return xas_error(&xas); - goto retry; - } - - if (flags & NETFS_FLAG_PUT_MARK) - xas_set_mark(&xas, NETFS_BUF_PUT_MARK); - if (flags & NETFS_FLAG_PAGECACHE_MARK) - xas_set_mark(&xas, NETFS_BUF_PAGECACHE_MARK); - xas_unlock(&xas); - return xas_error(&xas); -} - -/* - * Create the specified range of folios in the buffer attached to the read - * request. The folios are marked with NETFS_BUF_PUT_MARK so that we know that - * these need freeing later. - */ -int netfs_add_folios_to_buffer(struct xarray *buffer, - struct address_space *mapping, - pgoff_t index, pgoff_t to, gfp_t gfp_mask) -{ - struct folio *folio; - int ret; - - if (to + 1 == index) /* Page range is inclusive */ - return 0; - - do { - /* TODO: Figure out what order folio can be allocated here */ - folio = filemap_alloc_folio(readahead_gfp_mask(mapping), 0); - if (!folio) - return -ENOMEM; - folio->index = index; - ret = netfs_xa_store_and_mark(buffer, index, folio, - NETFS_FLAG_PUT_MARK, gfp_mask); - if (ret < 0) { - folio_put(folio); - return ret; - } - - index += folio_nr_pages(folio); - } while (index <= to && index != 0); - - return 0; -} - -/* - * Clear an xarray buffer, putting a ref on the folios that have - * NETFS_BUF_PUT_MARK set. - */ -void netfs_clear_buffer(struct xarray *buffer) -{ - struct folio *folio; - XA_STATE(xas, buffer, 0); - - rcu_read_lock(); - xas_for_each_marked(&xas, folio, ULONG_MAX, NETFS_BUF_PUT_MARK) { - folio_put(folio); - } - rcu_read_unlock(); - xa_destroy(buffer); -} - /** * netfs_dirty_folio - Mark folio dirty and pin a cache object for writeback * @mapping: The mapping the folio belongs to. @@ -107,7 +26,7 @@ bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio) struct fscache_cookie *cookie = netfs_i_cookie(ictx); bool need_use = false; - _enter(""); + kenter(""); if (!filemap_dirty_folio(mapping, folio)) return false; @@ -180,7 +99,7 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) struct netfs_folio *finfo; size_t flen = folio_size(folio); - _enter("{%lx},%zx,%zx", folio->index, offset, length); + kenter("{%lx},%zx,%zx", folio->index, offset, length); if (!folio_test_private(folio)) return; diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index 426cf87aaf2e..488147439fe0 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c @@ -161,7 +161,7 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq, { struct list_head *next; - _enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr); + kenter("R=%x[%x:]", wreq->debug_id, stream->stream_nr); if (list_empty(&stream->subrequests)) return; @@ -374,7 +374,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq) unsigned int notes; int s; - _enter("%llx-%llx", wreq->start, wreq->start + wreq->len); + kenter("%llx-%llx", wreq->start, wreq->start + wreq->len); trace_netfs_collect(wreq); trace_netfs_rreq(wreq, netfs_rreq_trace_collect); @@ -409,7 +409,7 @@ reassess_streams: front = stream->front; while (front) { trace_netfs_collect_sreq(wreq, front); - //_debug("sreq [%x] %llx %zx/%zx", + //kdebug("sreq [%x] %llx %zx/%zx", // front->debug_index, front->start, front->transferred, front->len); /* Stall if there may be a discontinuity. */ @@ -598,7 +598,7 @@ reassess_streams: out: netfs_put_group_many(wreq->group, wreq->nr_group_rel); wreq->nr_group_rel = 0; - _leave(" = %x", notes); + kleave(" = %x", notes); return; need_retry: @@ -606,7 +606,7 @@ need_retry: * that any partially completed op will have had any wholly transferred * folios removed from it. */ - _debug("retry"); + kdebug("retry"); netfs_retry_writes(wreq); goto out; } @@ -621,7 +621,7 @@ void netfs_write_collection_worker(struct work_struct *work) size_t transferred; int s; - _enter("R=%x", wreq->debug_id); + kenter("R=%x", wreq->debug_id); netfs_see_request(wreq, netfs_rreq_trace_see_work); if (!test_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags)) { @@ -684,7 +684,7 @@ void netfs_write_collection_worker(struct work_struct *work) if (wreq->origin == NETFS_DIO_WRITE) inode_dio_end(wreq->inode); - _debug("finished"); + kdebug("finished"); trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip); clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags); wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS); @@ -744,7 +744,7 @@ void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, struct netfs_io_request *wreq = subreq->rreq; struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr]; - _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error); + kenter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error); switch (subreq->source) { case NETFS_UPLOAD_TO_SERVER: diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 3aa86e268f40..d7c971df8866 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -99,7 +99,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, if (IS_ERR(wreq)) return wreq; - _enter("R=%x", wreq->debug_id); + kenter("R=%x", wreq->debug_id); ictx = netfs_inode(wreq->inode); if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags)) @@ -159,7 +159,7 @@ static void netfs_prepare_write(struct netfs_io_request *wreq, subreq->max_nr_segs = INT_MAX; subreq->stream_nr = stream->stream_nr; - _enter("R=%x[%x]", wreq->debug_id, subreq->debug_index); + kenter("R=%x[%x]", wreq->debug_id, subreq->debug_index); trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index, refcount_read(&subreq->ref), @@ -215,7 +215,7 @@ static void netfs_do_issue_write(struct netfs_io_stream *stream, { struct netfs_io_request *wreq = subreq->rreq; - _enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len); + kenter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len); if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) return netfs_write_subrequest_terminated(subreq, subreq->error, false); @@ -272,11 +272,11 @@ int netfs_advance_write(struct netfs_io_request *wreq, size_t part; if (!stream->avail) { - _leave("no write"); + kleave("no write"); return len; } - _enter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0); + kenter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0); if (subreq && start != subreq->start + subreq->len) { netfs_issue_write(wreq, stream); @@ -288,7 +288,7 @@ int netfs_advance_write(struct netfs_io_request *wreq, subreq = stream->construct; part = min(subreq->max_len - subreq->len, len); - _debug("part %zx/%zx %zx/%zx", subreq->len, subreq->max_len, part, len); + kdebug("part %zx/%zx %zx/%zx", subreq->len, subreq->max_len, part, len); subreq->len += part; subreq->nr_segs++; @@ -319,7 +319,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq, bool to_eof = false, streamw = false; bool debug = false; - _enter(""); + kenter(""); /* netfs_perform_write() may shift i_size around the page or from out * of the page to beyond it, but cannot move i_size into or through the @@ -329,7 +329,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq, if (fpos >= i_size) { /* mmap beyond eof. */ - _debug("beyond eof"); + kdebug("beyond eof"); folio_start_writeback(folio); folio_unlock(folio); wreq->nr_group_rel += netfs_folio_written_back(folio); @@ -363,7 +363,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq, } flen -= foff; - _debug("folio %zx %zx %zx", foff, flen, fsize); + kdebug("folio %zx %zx %zx", foff, flen, fsize); /* Deal with discontinuities in the stream of dirty pages. These can * arise from a number of sources: @@ -483,11 +483,11 @@ static int netfs_write_folio(struct netfs_io_request *wreq, if (!debug) kdebug("R=%x: No submit", wreq->debug_id); - if (flen < fsize) + if (foff + flen < fsize) for (int s = 0; s < NR_IO_STREAMS; s++) netfs_issue_write(wreq, &wreq->io_streams[s]); - _leave(" = 0"); + kleave(" = 0"); return 0; } @@ -522,7 +522,7 @@ int netfs_writepages(struct address_space *mapping, netfs_stat(&netfs_n_wh_writepages); do { - _debug("wbiter %lx %llx", folio->index, wreq->start + wreq->submitted); + kdebug("wbiter %lx %llx", folio->index, wreq->start + wreq->submitted); /* It appears we don't have to handle cyclic writeback wrapping. */ WARN_ON_ONCE(wreq && folio_pos(folio) < wreq->start + wreq->submitted); @@ -546,14 +546,14 @@ int netfs_writepages(struct address_space *mapping, mutex_unlock(&ictx->wb_lock); netfs_put_request(wreq, false, netfs_rreq_trace_put_return); - _leave(" = %d", error); + kleave(" = %d", error); return error; couldnt_start: netfs_kill_dirty_pages(mapping, wbc, folio); out: mutex_unlock(&ictx->wb_lock); - _leave(" = %d", error); + kleave(" = %d", error); return error; } EXPORT_SYMBOL(netfs_writepages); @@ -590,7 +590,7 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c struct folio *folio, size_t copied, bool to_page_end, struct folio **writethrough_cache) { - _enter("R=%x ic=%zu ws=%u cp=%zu tp=%u", + kenter("R=%x ic=%zu ws=%u cp=%zu tp=%u", wreq->debug_id, wreq->iter.count, wreq->wsize, copied, to_page_end); if (!*writethrough_cache) { @@ -624,7 +624,7 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr struct netfs_inode *ictx = netfs_inode(wreq->inode); int ret; - _enter("R=%x", wreq->debug_id); + kenter("R=%x", wreq->debug_id); if (writethrough_cache) netfs_write_folio(wreq, wbc, writethrough_cache); @@ -657,7 +657,7 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t loff_t start = wreq->start; int error = 0; - _enter("%zx", len); + kenter("%zx", len); if (wreq->origin == NETFS_DIO_WRITE) inode_dio_begin(wreq->inode); @@ -665,7 +665,7 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t while (len) { // TODO: Prepare content encryption - _debug("unbuffered %zx", len); + kdebug("unbuffered %zx", len); part = netfs_advance_write(wreq, upload, start, len, false); start += part; len -= part; @@ -684,6 +684,6 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t if (list_empty(&upload->subrequests)) netfs_wake_write_collector(wreq, false); - _leave(" = %d", error); + kleave(" = %d", error); return error; } diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index bb2f583eb28b..90079ca134dd 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -141,8 +141,6 @@ int nfs_swap_rw(struct kiocb *iocb, struct iov_iter *iter) { ssize_t ret; - VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); - if (iov_iter_rw(iter) == READ) ret = nfs_file_direct_read(iocb, iter, true); else diff --git a/fs/nfs/read.c b/fs/nfs/read.c index a142287d86f6..cca80b5f54e0 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -122,8 +122,6 @@ static void nfs_readpage_release(struct nfs_page *req, int error) { struct folio *folio = nfs_page_to_folio(req); - if (nfs_error_is_fatal_on_server(error) && error != -ETIMEDOUT) - folio_set_error(folio); if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) if (nfs_netfs_folio_unlock(folio)) folio_unlock(folio); diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 13818129d268..1c62a5a9f51d 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -32,15 +32,7 @@ static int nfs_symlink_filler(struct file *file, struct folio *folio) int error; error = NFS_PROTO(inode)->readlink(inode, &folio->page, 0, PAGE_SIZE); - if (error < 0) - goto error; - folio_mark_uptodate(folio); - folio_unlock(folio); - return 0; - -error: - folio_set_error(folio); - folio_unlock(folio); + folio_end_read(folio, error == 0); return error; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2329cbb0e446..a91463ab87a0 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -311,7 +311,6 @@ static void nfs_mapping_set_error(struct folio *folio, int error) { struct address_space *mapping = folio_file_mapping(folio); - folio_set_error(folio); filemap_set_wb_err(mapping, error); if (mapping->host) errseq_set(&mapping->host->i_sb->s_wb_err, diff --git a/fs/nfsd/netlink.c b/fs/nfsd/netlink.c index 62d2586d9902..529a75ecf22e 100644 --- a/fs/nfsd/netlink.c +++ b/fs/nfsd/netlink.c @@ -44,9 +44,7 @@ static const struct nla_policy nfsd_listener_set_nl_policy[NFSD_A_SERVER_SOCK_AD static const struct genl_split_ops nfsd_nl_ops[] = { { .cmd = NFSD_CMD_RPC_STATUS_GET, - .start = nfsd_nl_rpc_status_get_start, .dumpit = nfsd_nl_rpc_status_get_dumpit, - .done = nfsd_nl_rpc_status_get_done, .flags = GENL_CMD_CAP_DUMP, }, { diff --git a/fs/nfsd/netlink.h b/fs/nfsd/netlink.h index e3724637d64d..2e132ef328f8 100644 --- a/fs/nfsd/netlink.h +++ b/fs/nfsd/netlink.h @@ -15,9 +15,6 @@ extern const struct nla_policy nfsd_sock_nl_policy[NFSD_A_SOCK_TRANSPORT_NAME + 1]; extern const struct nla_policy nfsd_version_nl_policy[NFSD_A_VERSION_ENABLED + 1]; -int nfsd_nl_rpc_status_get_start(struct netlink_callback *cb); -int nfsd_nl_rpc_status_get_done(struct netlink_callback *cb); - int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 202140df8f82..c848ebe5d08f 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1460,28 +1460,6 @@ static int create_proc_exports_entry(void) unsigned int nfsd_net_id; -/** - * nfsd_nl_rpc_status_get_start - Prepare rpc_status_get dumpit - * @cb: netlink metadata and command arguments - * - * Return values: - * %0: The rpc_status_get command may proceed - * %-ENODEV: There is no NFSD running in this namespace - */ -int nfsd_nl_rpc_status_get_start(struct netlink_callback *cb) -{ - struct nfsd_net *nn = net_generic(sock_net(cb->skb->sk), nfsd_net_id); - int ret = -ENODEV; - - mutex_lock(&nfsd_mutex); - if (nn->nfsd_serv) - ret = 0; - else - mutex_unlock(&nfsd_mutex); - - return ret; -} - static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb, struct netlink_callback *cb, struct nfsd_genl_rqstp *rqstp) @@ -1558,8 +1536,16 @@ static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb, int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { - struct nfsd_net *nn = net_generic(sock_net(skb->sk), nfsd_net_id); int i, ret, rqstp_index = 0; + struct nfsd_net *nn; + + mutex_lock(&nfsd_mutex); + + nn = net_generic(sock_net(skb->sk), nfsd_net_id); + if (!nn->nfsd_serv) { + ret = -ENODEV; + goto out_unlock; + } rcu_read_lock(); @@ -1636,22 +1622,10 @@ int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb, ret = skb->len; out: rcu_read_unlock(); - - return ret; -} - -/** - * nfsd_nl_rpc_status_get_done - rpc_status_get dumpit post-processing - * @cb: netlink metadata and command arguments - * - * Return values: - * %0: Success - */ -int nfsd_nl_rpc_status_get_done(struct netlink_callback *cb) -{ +out_unlock: mutex_unlock(&nfsd_mutex); - return 0; + return ret; } /** @@ -2195,6 +2169,8 @@ static __net_init int nfsd_net_init(struct net *net) nn->nfsd_svcstats.program = &nfsd_program; nn->nfsd_versions = NULL; nn->nfsd4_minorversions = NULL; + nn->nfsd_info.mutex = &nfsd_mutex; + nn->nfsd_serv = NULL; nfsd4_init_leases_net(nn); get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key)); seqlock_init(&nn->writeverf_lock); diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 0b75305fb5f5..dd4e11a703aa 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -247,7 +247,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) dentry = dget(exp->ex_path.dentry); else { dentry = exportfs_decode_fh_raw(exp->ex_path.mnt, fid, - data_left, fileid_type, + data_left, fileid_type, 0, nfsd_acceptable, exp); if (IS_ERR_OR_NULL(dentry)) { trace_nfsd_set_fh_dentry_badhandle(rqstp, fhp, diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index cd9a6a1a9fc8..89d7918de7b1 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -672,7 +672,6 @@ int nfsd_create_serv(struct net *net) return error; } spin_lock(&nfsd_notifier_lock); - nn->nfsd_info.mutex = &nfsd_mutex; nn->nfsd_serv = serv; spin_unlock(&nfsd_notifier_lock); diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index 89caef7513db..ba50388ee4bf 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -377,11 +377,12 @@ void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr, * @target: offset number of an entry in the group (start point) * @bsize: size in bits * @lock: spin lock protecting @bitmap + * @wrap: whether to wrap around */ static int nilfs_palloc_find_available_slot(unsigned char *bitmap, unsigned long target, unsigned int bsize, - spinlock_t *lock) + spinlock_t *lock, bool wrap) { int pos, end = bsize; @@ -397,6 +398,8 @@ static int nilfs_palloc_find_available_slot(unsigned char *bitmap, end = target; } + if (!wrap) + return -ENOSPC; /* wrap around */ for (pos = 0; pos < end; pos++) { @@ -495,9 +498,10 @@ int nilfs_palloc_count_max_entries(struct inode *inode, u64 nused, u64 *nmaxp) * nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object * @inode: inode of metadata file using this allocator * @req: nilfs_palloc_req structure exchanged for the allocation + * @wrap: whether to wrap around */ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, - struct nilfs_palloc_req *req) + struct nilfs_palloc_req *req, bool wrap) { struct buffer_head *desc_bh, *bitmap_bh; struct nilfs_palloc_group_desc *desc; @@ -516,7 +520,7 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, entries_per_group = nilfs_palloc_entries_per_group(inode); for (i = 0; i < ngroups; i += n) { - if (group >= ngroups) { + if (group >= ngroups && wrap) { /* wrap around */ group = 0; maxgroup = nilfs_palloc_group(inode, req->pr_entry_nr, @@ -550,7 +554,14 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, bitmap_kaddr = kmap_local_page(bitmap_bh->b_page); bitmap = bitmap_kaddr + bh_offset(bitmap_bh); pos = nilfs_palloc_find_available_slot( - bitmap, group_offset, entries_per_group, lock); + bitmap, group_offset, entries_per_group, lock, + wrap); + /* + * Since the search for a free slot in the second and + * subsequent bitmap blocks always starts from the + * beginning, the wrap flag only has an effect on the + * first search. + */ kunmap_local(bitmap_kaddr); if (pos >= 0) goto found; diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h index b667e869ac07..d825a9faca6d 100644 --- a/fs/nilfs2/alloc.h +++ b/fs/nilfs2/alloc.h @@ -50,8 +50,8 @@ struct nilfs_palloc_req { struct buffer_head *pr_entry_bh; }; -int nilfs_palloc_prepare_alloc_entry(struct inode *, - struct nilfs_palloc_req *); +int nilfs_palloc_prepare_alloc_entry(struct inode *inode, + struct nilfs_palloc_req *req, bool wrap); void nilfs_palloc_commit_alloc_entry(struct inode *, struct nilfs_palloc_req *); void nilfs_palloc_abort_alloc_entry(struct inode *, struct nilfs_palloc_req *); diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 180fc8d36213..fc1caf63a42a 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -75,7 +75,7 @@ int nilfs_dat_prepare_alloc(struct inode *dat, struct nilfs_palloc_req *req) { int ret; - ret = nilfs_palloc_prepare_alloc_entry(dat, req); + ret = nilfs_palloc_prepare_alloc_entry(dat, req, true); if (ret < 0) return ret; diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 52e50b1b7f22..4a29b0138d75 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -135,6 +135,9 @@ static bool nilfs_check_folio(struct folio *folio, char *kaddr) goto Enamelen; if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1)) goto Espan; + if (unlikely(p->inode && + NILFS_PRIVATE_INODE(le64_to_cpu(p->inode)))) + goto Einumber; } if (offs != limit) goto Eend; @@ -160,6 +163,9 @@ Enamelen: goto bad_entry; Espan: error = "directory entry across blocks"; + goto bad_entry; +Einumber: + error = "disallowed inode number"; bad_entry: nilfs_error(sb, "bad entry in directory #%lu: %s - offset=%lu, inode=%lu, rec_len=%zd, name_len=%d", @@ -377,11 +383,39 @@ found: struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct folio **foliop) { - struct nilfs_dir_entry *de = nilfs_get_folio(dir, 0, foliop); + struct folio *folio; + struct nilfs_dir_entry *de, *next_de; + size_t limit; + char *msg; + de = nilfs_get_folio(dir, 0, &folio); if (IS_ERR(de)) return NULL; - return nilfs_next_entry(de); + + limit = nilfs_last_byte(dir, 0); /* is a multiple of chunk size */ + if (unlikely(!limit || le64_to_cpu(de->inode) != dir->i_ino || + !nilfs_match(1, ".", de))) { + msg = "missing '.'"; + goto fail; + } + + next_de = nilfs_next_entry(de); + /* + * If "next_de" has not reached the end of the chunk, there is + * at least one more record. Check whether it matches "..". + */ + if (unlikely((char *)next_de == (char *)de + nilfs_chunk_size(dir) || + !nilfs_match(2, "..", next_de))) { + msg = "missing '..'"; + goto fail; + } + *foliop = folio; + return next_de; + +fail: + nilfs_error(dir->i_sb, "directory #%lu %s", dir->i_ino, msg); + folio_release_kmap(folio, de); + return NULL; } ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr) diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index 612e609158b5..1e86b9303b7c 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c @@ -56,13 +56,10 @@ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino, struct nilfs_palloc_req req; int ret; - req.pr_entry_nr = 0; /* - * 0 says find free inode from beginning - * of a group. dull code!! - */ + req.pr_entry_nr = NILFS_FIRST_INO(ifile->i_sb); req.pr_entry_bh = NULL; - ret = nilfs_palloc_prepare_alloc_entry(ifile, &req); + ret = nilfs_palloc_prepare_alloc_entry(ifile, &req, false); if (!ret) { ret = nilfs_palloc_get_entry_block(ifile, req.pr_entry_nr, 1, &req.pr_entry_bh); diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 728e90be3570..4017f7856440 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -116,9 +116,15 @@ enum { #define NILFS_FIRST_INO(sb) (((struct the_nilfs *)sb->s_fs_info)->ns_first_ino) #define NILFS_MDT_INODE(sb, ino) \ - ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & BIT(ino))) + ((ino) < NILFS_USER_INO && (NILFS_MDT_INO_BITS & BIT(ino))) #define NILFS_VALID_INODE(sb, ino) \ - ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & BIT(ino))) + ((ino) >= NILFS_FIRST_INO(sb) || \ + ((ino) < NILFS_USER_INO && (NILFS_SYS_INO_BITS & BIT(ino)))) + +#define NILFS_PRIVATE_INODE(ino) ({ \ + ino_t __ino = (ino); \ + ((__ino) < NILFS_USER_INO && (__ino) != NILFS_ROOT_INO && \ + (__ino) != NILFS_SKETCH_INO); }) /** * struct nilfs_transaction_info: context information for synchronization diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index f41d7b6d432c..e44dde57ab65 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -452,6 +452,12 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs, } nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino); + if (nilfs->ns_first_ino < NILFS_USER_INO) { + nilfs_err(nilfs->ns_sb, + "too small lower limit for non-reserved inode numbers: %u", + nilfs->ns_first_ino); + return -EINVAL; + } nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment); if (nilfs->ns_blocks_per_segment < NILFS_SEG_MIN_BLOCKS) { diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 85da0629415d..1e829ed7b0ef 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -182,7 +182,7 @@ struct the_nilfs { unsigned long ns_nrsvsegs; unsigned long ns_first_data_block; int ns_inode_size; - int ns_first_ino; + unsigned int ns_first_ino; u32 ns_crc_seed; /* /sys/fs/<nilfs>/<device> */ diff --git a/fs/nls/mac-celtic.c b/fs/nls/mac-celtic.c index 266c2d7d50bd..2963f3299d7e 100644 --- a/fs/nls/mac-celtic.c +++ b/fs/nls/mac-celtic.c @@ -598,4 +598,5 @@ static void __exit exit_nls_macceltic(void) module_init(init_nls_macceltic) module_exit(exit_nls_macceltic) +MODULE_DESCRIPTION("NLS Codepage macceltic"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/mac-centeuro.c b/fs/nls/mac-centeuro.c index 9789c6057551..43b20f4bdb67 100644 --- a/fs/nls/mac-centeuro.c +++ b/fs/nls/mac-centeuro.c @@ -528,4 +528,5 @@ static void __exit exit_nls_maccenteuro(void) module_init(init_nls_maccenteuro) module_exit(exit_nls_maccenteuro) +MODULE_DESCRIPTION("NLS Codepage maccenteuro"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/mac-croatian.c b/fs/nls/mac-croatian.c index bb19e7a07d43..62730d6a64e5 100644 --- a/fs/nls/mac-croatian.c +++ b/fs/nls/mac-croatian.c @@ -598,4 +598,5 @@ static void __exit exit_nls_maccroatian(void) module_init(init_nls_maccroatian) module_exit(exit_nls_maccroatian) +MODULE_DESCRIPTION("NLS Codepage maccroatian"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/mac-cyrillic.c b/fs/nls/mac-cyrillic.c index 2a7dea36acba..7a5c4d16aac8 100644 --- a/fs/nls/mac-cyrillic.c +++ b/fs/nls/mac-cyrillic.c @@ -493,4 +493,5 @@ static void __exit exit_nls_maccyrillic(void) module_init(init_nls_maccyrillic) module_exit(exit_nls_maccyrillic) +MODULE_DESCRIPTION("NLS Codepage maccyrillic"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/mac-gaelic.c b/fs/nls/mac-gaelic.c index 77b001653588..3d22f03a90b6 100644 --- a/fs/nls/mac-gaelic.c +++ b/fs/nls/mac-gaelic.c @@ -563,4 +563,5 @@ static void __exit exit_nls_macgaelic(void) module_init(init_nls_macgaelic) module_exit(exit_nls_macgaelic) +MODULE_DESCRIPTION("NLS Codepage macgaelic"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/mac-greek.c b/fs/nls/mac-greek.c index 1eccf499e2eb..de3aa9ddb5b1 100644 --- a/fs/nls/mac-greek.c +++ b/fs/nls/mac-greek.c @@ -493,4 +493,5 @@ static void __exit exit_nls_macgreek(void) module_init(init_nls_macgreek) module_exit(exit_nls_macgreek) +MODULE_DESCRIPTION("NLS Codepage macgreek"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/mac-iceland.c b/fs/nls/mac-iceland.c index cbd0875c6d69..0bba83f9d415 100644 --- a/fs/nls/mac-iceland.c +++ b/fs/nls/mac-iceland.c @@ -598,4 +598,5 @@ static void __exit exit_nls_maciceland(void) module_init(init_nls_maciceland) module_exit(exit_nls_maciceland) +MODULE_DESCRIPTION("NLS Codepage maciceland"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/mac-inuit.c b/fs/nls/mac-inuit.c index fba8357aaf03..493386832dfd 100644 --- a/fs/nls/mac-inuit.c +++ b/fs/nls/mac-inuit.c @@ -528,4 +528,5 @@ static void __exit exit_nls_macinuit(void) module_init(init_nls_macinuit) module_exit(exit_nls_macinuit) +MODULE_DESCRIPTION("NLS Codepage macinuit"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/mac-roman.c b/fs/nls/mac-roman.c index b6a98a5208cd..d3c082173c20 100644 --- a/fs/nls/mac-roman.c +++ b/fs/nls/mac-roman.c @@ -633,4 +633,5 @@ static void __exit exit_nls_macroman(void) module_init(init_nls_macroman) module_exit(exit_nls_macroman) +MODULE_DESCRIPTION("NLS Codepage macroman"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/mac-romanian.c b/fs/nls/mac-romanian.c index 25547f023638..a7735852f2d5 100644 --- a/fs/nls/mac-romanian.c +++ b/fs/nls/mac-romanian.c @@ -598,4 +598,5 @@ static void __exit exit_nls_macromanian(void) module_init(init_nls_macromanian) module_exit(exit_nls_macromanian) +MODULE_DESCRIPTION("NLS Codepage macromanian"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/mac-turkish.c b/fs/nls/mac-turkish.c index b5454bc7b7fa..d77e9b6b7d7c 100644 --- a/fs/nls/mac-turkish.c +++ b/fs/nls/mac-turkish.c @@ -598,4 +598,5 @@ static void __exit exit_nls_macturkish(void) module_init(init_nls_macturkish) module_exit(exit_nls_macturkish) +MODULE_DESCRIPTION("NLS Codepage macturkish"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_ascii.c b/fs/nls/nls_ascii.c index a2620650d5e4..068143d71284 100644 --- a/fs/nls/nls_ascii.c +++ b/fs/nls/nls_ascii.c @@ -163,4 +163,5 @@ static void __exit exit_nls_ascii(void) module_init(init_nls_ascii) module_exit(exit_nls_ascii) +MODULE_DESCRIPTION("NLS ASCII (United States)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index a026dbd3593f..18d597e49a19 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c @@ -545,4 +545,5 @@ EXPORT_SYMBOL(unload_nls); EXPORT_SYMBOL(load_nls); EXPORT_SYMBOL(load_nls_default); +MODULE_DESCRIPTION("Base file system native language support"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_cp1250.c b/fs/nls/nls_cp1250.c index ace3e19d3407..e22a57a4b828 100644 --- a/fs/nls/nls_cp1250.c +++ b/fs/nls/nls_cp1250.c @@ -343,4 +343,5 @@ static void __exit exit_nls_cp1250(void) module_init(init_nls_cp1250) module_exit(exit_nls_cp1250) +MODULE_DESCRIPTION("NLS Windows CP1250 (Slavic/Central European Languages)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_cp1251.c b/fs/nls/nls_cp1251.c index 9273ddfd08a1..6f46d339f23c 100644 --- a/fs/nls/nls_cp1251.c +++ b/fs/nls/nls_cp1251.c @@ -298,4 +298,5 @@ static void __exit exit_nls_cp1251(void) module_init(init_nls_cp1251) module_exit(exit_nls_cp1251) +MODULE_DESCRIPTION("NLS Windows CP1251 (Bulgarian, Belarusian)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_cp1255.c b/fs/nls/nls_cp1255.c index 1caf5dfed85b..299e089d4301 100644 --- a/fs/nls/nls_cp1255.c +++ b/fs/nls/nls_cp1255.c @@ -380,5 +380,6 @@ static void __exit exit_nls_cp1255(void) module_init(init_nls_cp1255) module_exit(exit_nls_cp1255) +MODULE_DESCRIPTION("NLS Hebrew charsets (ISO-8859-8, CP1255)"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS_NLS(iso8859-8); diff --git a/fs/nls/nls_cp437.c b/fs/nls/nls_cp437.c index 7ddb830da3fd..ab880499ea32 100644 --- a/fs/nls/nls_cp437.c +++ b/fs/nls/nls_cp437.c @@ -384,4 +384,5 @@ static void __exit exit_nls_cp437(void) module_init(init_nls_cp437) module_exit(exit_nls_cp437) +MODULE_DESCRIPTION("NLS Codepage 437 (United States, Canada)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_cp737.c b/fs/nls/nls_cp737.c index c593f683a0cd..5c37618296e9 100644 --- a/fs/nls/nls_cp737.c +++ b/fs/nls/nls_cp737.c @@ -347,4 +347,5 @@ static void __exit exit_nls_cp737(void) module_init(init_nls_cp737) module_exit(exit_nls_cp737) +MODULE_DESCRIPTION("NLS Codepage 737 (Greek)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_cp775.c b/fs/nls/nls_cp775.c index 554c863745f2..51ccc908901f 100644 --- a/fs/nls/nls_cp775.c +++ b/fs/nls/nls_cp775.c @@ -316,4 +316,5 @@ static void __exit exit_nls_cp775(void) module_init(init_nls_cp775) module_exit(exit_nls_cp775) +MODULE_DESCRIPTION("NLS Codepage 775 (Baltic Rim)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_cp850.c b/fs/nls/nls_cp850.c index 56cccd14b40b..5f9b9507a8b6 100644 --- a/fs/nls/nls_cp850.c +++ b/fs/nls/nls_cp850.c @@ -312,4 +312,5 @@ static void __exit exit_nls_cp850(void) module_init(init_nls_cp850) module_exit(exit_nls_cp850) +MODULE_DESCRIPTION("NLS Codepage 850 (Europe)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_cp852.c b/fs/nls/nls_cp852.c index 7cdc05ac1d40..fc513a5e8358 100644 --- a/fs/nls/nls_cp852.c +++ b/fs/nls/nls_cp852.c @@ -334,4 +334,5 @@ static void __exit exit_nls_cp852(void) module_init(init_nls_cp852) module_exit(exit_nls_cp852) +MODULE_DESCRIPTION("NLS Codepage 852 (Central/Eastern Europe)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_cp855.c b/fs/nls/nls_cp855.c index 7426eea05663..a43be58adb36 100644 --- a/fs/nls/nls_cp855.c +++ b/fs/nls/nls_cp855.c @@ -296,4 +296,5 @@ static void __exit exit_nls_cp855(void) module_init(init_nls_cp855) module_exit(exit_nls_cp855) +MODULE_DESCRIPTION("NLS Codepage 855 (Cyrillic)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_cp857.c b/fs/nls/nls_cp857.c index 098309733ebd..772cd4195bad 100644 --- a/fs/nls/nls_cp857.c +++ b/fs/nls/nls_cp857.c @@ -298,4 +298,5 @@ static void __exit exit_nls_cp857(void) module_init(init_nls_cp857) module_exit(exit_nls_cp857) +MODULE_DESCRIPTION("NLS Codepage 857 (Turkish)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_cp860.c b/fs/nls/nls_cp860.c index 84224478e731..36cf4ca11966 100644 --- a/fs/nls/nls_cp860.c +++ b/fs/nls/nls_cp860.c @@ -361,4 +361,5 @@ static void __exit exit_nls_cp860(void) module_init(init_nls_cp860) module_exit(exit_nls_cp860) +MODULE_DESCRIPTION("NLS Codepage 860 (Portuguese)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_cp861.c b/fs/nls/nls_cp861.c index dc873e4be092..b7397d079f8f 100644 --- a/fs/nls/nls_cp861.c +++ b/fs/nls/nls_cp861.c @@ -384,4 +384,5 @@ static void __exit exit_nls_cp861(void) module_init(init_nls_cp861) module_exit(exit_nls_cp861) +MODULE_DESCRIPTION("NLS Codepage 861 (Icelandic)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_cp862.c b/fs/nls/nls_cp862.c index d5263e3c5566..fd3b95d1e95d 100644 --- a/fs/nls/nls_cp862.c +++ b/fs/nls/nls_cp862.c @@ -418,4 +418,5 @@ static void __exit exit_nls_cp862(void) module_init(init_nls_cp862) module_exit(exit_nls_cp862) +MODULE_DESCRIPTION("NLS Codepage 862 (Hebrew)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_cp863.c b/fs/nls/nls_cp863.c index 051c9832e36a..813ae7944249 100644 --- a/fs/nls/nls_cp863.c +++ b/fs/nls/nls_cp863.c @@ -378,4 +378,5 @@ static void __exit exit_nls_cp863(void) module_init(init_nls_cp863) module_exit(exit_nls_cp863) +MODULE_DESCRIPTION("NLS Codepage 863 (Canadian French)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_cp864.c b/fs/nls/nls_cp864.c index 97eb1273b2f7..d9eb6d5cd47a 100644 --- a/fs/nls/nls_cp864.c +++ b/fs/nls/nls_cp864.c @@ -404,4 +404,5 @@ static void __exit exit_nls_cp864(void) module_init(init_nls_cp864) module_exit(exit_nls_cp864) +MODULE_DESCRIPTION("NLS Codepage 864 (Arabic)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_cp865.c b/fs/nls/nls_cp865.c index 111214228525..2678ffd98bb6 100644 --- a/fs/nls/nls_cp865.c +++ b/fs/nls/nls_cp865.c @@ -384,4 +384,5 @@ static void __exit exit_nls_cp865(void) module_init(init_nls_cp865) module_exit(exit_nls_cp865) +MODULE_DESCRIPTION("NLS Codepage 865 (Norwegian, Danish)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_cp866.c b/fs/nls/nls_cp866.c index ffdcbc3fc38d..7e93d0a3802a 100644 --- a/fs/nls/nls_cp866.c +++ b/fs/nls/nls_cp866.c @@ -302,4 +302,5 @@ static void __exit exit_nls_cp866(void) module_init(init_nls_cp866) module_exit(exit_nls_cp866) +MODULE_DESCRIPTION("NLS Codepage 866 (Cyrillic/Russian)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_cp869.c b/fs/nls/nls_cp869.c index 3b5a34589354..4491737dd5cb 100644 --- a/fs/nls/nls_cp869.c +++ b/fs/nls/nls_cp869.c @@ -312,4 +312,5 @@ static void __exit exit_nls_cp869(void) module_init(init_nls_cp869) module_exit(exit_nls_cp869) +MODULE_DESCRIPTION("NLS Codepage 869 (Greek)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_cp874.c b/fs/nls/nls_cp874.c index 8dfaa10710fa..4fcfbf8ca72c 100644 --- a/fs/nls/nls_cp874.c +++ b/fs/nls/nls_cp874.c @@ -271,5 +271,6 @@ static void __exit exit_nls_cp874(void) module_init(init_nls_cp874) module_exit(exit_nls_cp874) +MODULE_DESCRIPTION("NLS Thai charset (CP874, TIS-620)"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS_NLS(tis-620); diff --git a/fs/nls/nls_cp932.c b/fs/nls/nls_cp932.c index 67b7398e8483..e5e6270fcca6 100644 --- a/fs/nls/nls_cp932.c +++ b/fs/nls/nls_cp932.c @@ -7929,5 +7929,6 @@ static void __exit exit_nls_cp932(void) module_init(init_nls_cp932) module_exit(exit_nls_cp932) +MODULE_DESCRIPTION("NLS Japanese charset (Shift-JIS)"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS_NLS(sjis); diff --git a/fs/nls/nls_cp936.c b/fs/nls/nls_cp936.c index c96546cfec9f..91d0a15fd7f9 100644 --- a/fs/nls/nls_cp936.c +++ b/fs/nls/nls_cp936.c @@ -11107,5 +11107,6 @@ static void __exit exit_nls_cp936(void) module_init(init_nls_cp936) module_exit(exit_nls_cp936) +MODULE_DESCRIPTION("NLS Simplified Chinese charset (CP936, GB2312)"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS_NLS(gb2312); diff --git a/fs/nls/nls_cp949.c b/fs/nls/nls_cp949.c index 199171e97aa4..3ae03c76d59c 100644 --- a/fs/nls/nls_cp949.c +++ b/fs/nls/nls_cp949.c @@ -13942,5 +13942,6 @@ static void __exit exit_nls_cp949(void) module_init(init_nls_cp949) module_exit(exit_nls_cp949) +MODULE_DESCRIPTION("NLS Korean charset (CP949, EUC-KR)"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS_NLS(euc-kr); diff --git a/fs/nls/nls_cp950.c b/fs/nls/nls_cp950.c index 8e1418708209..e968aa80198d 100644 --- a/fs/nls/nls_cp950.c +++ b/fs/nls/nls_cp950.c @@ -9478,5 +9478,6 @@ static void __exit exit_nls_cp950(void) module_init(init_nls_cp950) module_exit(exit_nls_cp950) +MODULE_DESCRIPTION("NLS Traditional Chinese charset (Big5)"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS_NLS(big5); diff --git a/fs/nls/nls_euc-jp.c b/fs/nls/nls_euc-jp.c index 162b3f160353..0191cc9d955e 100644 --- a/fs/nls/nls_euc-jp.c +++ b/fs/nls/nls_euc-jp.c @@ -577,4 +577,5 @@ static void __exit exit_nls_euc_jp(void) module_init(init_nls_euc_jp) module_exit(exit_nls_euc_jp) +MODULE_DESCRIPTION("NLS Japanese charset (EUC-JP)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_iso8859-1.c b/fs/nls/nls_iso8859-1.c index 69ac020d43b1..a181be488f7d 100644 --- a/fs/nls/nls_iso8859-1.c +++ b/fs/nls/nls_iso8859-1.c @@ -254,4 +254,5 @@ static void __exit exit_nls_iso8859_1(void) module_init(init_nls_iso8859_1) module_exit(exit_nls_iso8859_1) +MODULE_DESCRIPTION("NLS ISO 8859-1 (Latin 1; Western European Languages)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_iso8859-13.c b/fs/nls/nls_iso8859-13.c index afb3f8f275f0..8e2be5bfeaf1 100644 --- a/fs/nls/nls_iso8859-13.c +++ b/fs/nls/nls_iso8859-13.c @@ -282,4 +282,5 @@ static void __exit exit_nls_iso8859_13(void) module_init(init_nls_iso8859_13) module_exit(exit_nls_iso8859_13) +MODULE_DESCRIPTION("NLS ISO 8859-13 (Latin 7; Baltic)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_iso8859-14.c b/fs/nls/nls_iso8859-14.c index 046370f0b6f0..c789eccb8a69 100644 --- a/fs/nls/nls_iso8859-14.c +++ b/fs/nls/nls_iso8859-14.c @@ -338,4 +338,5 @@ static void __exit exit_nls_iso8859_14(void) module_init(init_nls_iso8859_14) module_exit(exit_nls_iso8859_14) +MODULE_DESCRIPTION("NLS ISO 8859-14 (Latin 8; Celtic)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_iso8859-15.c b/fs/nls/nls_iso8859-15.c index 7e34a841a056..ffec649176fb 100644 --- a/fs/nls/nls_iso8859-15.c +++ b/fs/nls/nls_iso8859-15.c @@ -304,4 +304,5 @@ static void __exit exit_nls_iso8859_15(void) module_init(init_nls_iso8859_15) module_exit(exit_nls_iso8859_15) +MODULE_DESCRIPTION("NLS ISO 8859-15 (Latin 9; Western European Languages with Euro)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_iso8859-2.c b/fs/nls/nls_iso8859-2.c index 7dd571181741..d352334d0314 100644 --- a/fs/nls/nls_iso8859-2.c +++ b/fs/nls/nls_iso8859-2.c @@ -305,4 +305,5 @@ static void __exit exit_nls_iso8859_2(void) module_init(init_nls_iso8859_2) module_exit(exit_nls_iso8859_2) +MODULE_DESCRIPTION("NLS ISO 8859-2 (Latin 2; Slavic/Central European Languages)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_iso8859-3.c b/fs/nls/nls_iso8859-3.c index 740b75ec4493..09990e6634d2 100644 --- a/fs/nls/nls_iso8859-3.c +++ b/fs/nls/nls_iso8859-3.c @@ -305,4 +305,5 @@ static void __exit exit_nls_iso8859_3(void) module_init(init_nls_iso8859_3) module_exit(exit_nls_iso8859_3) +MODULE_DESCRIPTION("NLS ISO 8859-3 (Latin 3; Esperanto, Galician, Maltese, Turkish)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_iso8859-4.c b/fs/nls/nls_iso8859-4.c index 8826021e32f5..92795224912e 100644 --- a/fs/nls/nls_iso8859-4.c +++ b/fs/nls/nls_iso8859-4.c @@ -305,4 +305,5 @@ static void __exit exit_nls_iso8859_4(void) module_init(init_nls_iso8859_4) module_exit(exit_nls_iso8859_4) +MODULE_DESCRIPTION("NLS ISO 8859-4 (Latin 4; old Baltic charset)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_iso8859-5.c b/fs/nls/nls_iso8859-5.c index 7c04057a1ad8..32309315307a 100644 --- a/fs/nls/nls_iso8859-5.c +++ b/fs/nls/nls_iso8859-5.c @@ -269,4 +269,5 @@ static void __exit exit_nls_iso8859_5(void) module_init(init_nls_iso8859_5) module_exit(exit_nls_iso8859_5) +MODULE_DESCRIPTION("NLS ISO 8859-5 (Cyrillic)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_iso8859-6.c b/fs/nls/nls_iso8859-6.c index d4a881400d74..c18183469d2a 100644 --- a/fs/nls/nls_iso8859-6.c +++ b/fs/nls/nls_iso8859-6.c @@ -260,4 +260,5 @@ static void __exit exit_nls_iso8859_6(void) module_init(init_nls_iso8859_6) module_exit(exit_nls_iso8859_6) +MODULE_DESCRIPTION("NLS ISO 8859-6 (Arabic)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_iso8859-7.c b/fs/nls/nls_iso8859-7.c index 37b75d825a75..3652d6832864 100644 --- a/fs/nls/nls_iso8859-7.c +++ b/fs/nls/nls_iso8859-7.c @@ -314,4 +314,5 @@ static void __exit exit_nls_iso8859_7(void) module_init(init_nls_iso8859_7) module_exit(exit_nls_iso8859_7) +MODULE_DESCRIPTION("NLS ISO 8859-7 (Modern Greek)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_iso8859-9.c b/fs/nls/nls_iso8859-9.c index 557b98250d37..11a67834b855 100644 --- a/fs/nls/nls_iso8859-9.c +++ b/fs/nls/nls_iso8859-9.c @@ -269,4 +269,5 @@ static void __exit exit_nls_iso8859_9(void) module_init(init_nls_iso8859_9) module_exit(exit_nls_iso8859_9) +MODULE_DESCRIPTION("NLS ISO 8859-9 (Latin 5; Turkish)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_koi8-r.c b/fs/nls/nls_koi8-r.c index 811f232fccfb..e3dca27a3803 100644 --- a/fs/nls/nls_koi8-r.c +++ b/fs/nls/nls_koi8-r.c @@ -320,4 +320,5 @@ static void __exit exit_nls_koi8_r(void) module_init(init_nls_koi8_r) module_exit(exit_nls_koi8_r) +MODULE_DESCRIPTION("NLS KOI8-R (Russian)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_koi8-ru.c b/fs/nls/nls_koi8-ru.c index a80a741a8676..07afcd9e58c0 100644 --- a/fs/nls/nls_koi8-ru.c +++ b/fs/nls/nls_koi8-ru.c @@ -79,4 +79,5 @@ static void __exit exit_nls_koi8_ru(void) module_init(init_nls_koi8_ru) module_exit(exit_nls_koi8_ru) +MODULE_DESCRIPTION("NLS KOI8-RU (Belarusian)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_koi8-u.c b/fs/nls/nls_koi8-u.c index 7e029e4c188a..f60645758c1a 100644 --- a/fs/nls/nls_koi8-u.c +++ b/fs/nls/nls_koi8-u.c @@ -327,4 +327,5 @@ static void __exit exit_nls_koi8_u(void) module_init(init_nls_koi8_u) module_exit(exit_nls_koi8_u) +MODULE_DESCRIPTION("NLS KOI8-U (Ukrainian)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_ucs2_utils.c b/fs/nls/nls_ucs2_utils.c index a69781c54dd8..d4564b79d7bf 100644 --- a/fs/nls/nls_ucs2_utils.c +++ b/fs/nls/nls_ucs2_utils.c @@ -16,6 +16,7 @@ #include <asm/unaligned.h> #include "nls_ucs2_utils.h" +MODULE_DESCRIPTION("NLS UCS-2"); MODULE_LICENSE("GPL"); /* diff --git a/fs/nls/nls_utf8.c b/fs/nls/nls_utf8.c index afcfbc4a14db..a0fa0610eaac 100644 --- a/fs/nls/nls_utf8.c +++ b/fs/nls/nls_utf8.c @@ -64,4 +64,5 @@ static void __exit exit_nls_utf8(void) module_init(init_nls_utf8) module_exit(exit_nls_utf8) +MODULE_DESCRIPTION("NLS UTF-8"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nsfs.c b/fs/nsfs.c index 07e22a15ef02..a4a925dce331 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -8,10 +8,12 @@ #include <linux/magic.h> #include <linux/ktime.h> #include <linux/seq_file.h> +#include <linux/pid_namespace.h> #include <linux/user_namespace.h> #include <linux/nsfs.h> #include <linux/uaccess.h> +#include "mount.h" #include "internal.h" static struct vfsmount *nsfs_mnt; @@ -82,40 +84,47 @@ int ns_get_path(struct path *path, struct task_struct *task, return ns_get_path_cb(path, ns_get_path_task, &args); } -int open_related_ns(struct ns_common *ns, - struct ns_common *(*get_ns)(struct ns_common *ns)) +/** + * open_namespace - open a namespace + * @ns: the namespace to open + * + * This will consume a reference to @ns indendent of success or failure. + * + * Return: A file descriptor on success or a negative error code on failure. + */ +int open_namespace(struct ns_common *ns) { - struct path path = {}; - struct ns_common *relative; + struct path path __free(path_put) = {}; struct file *f; int err; - int fd; - fd = get_unused_fd_flags(O_CLOEXEC); + /* call first to consume reference */ + err = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path); + if (err < 0) + return err; + + CLASS(get_unused_fd, fd)(O_CLOEXEC); if (fd < 0) return fd; - relative = get_ns(ns); - if (IS_ERR(relative)) { - put_unused_fd(fd); - return PTR_ERR(relative); - } + f = dentry_open(&path, O_RDONLY, current_cred()); + if (IS_ERR(f)) + return PTR_ERR(f); - err = path_from_stashed(&relative->stashed, nsfs_mnt, relative, &path); - if (err < 0) { - put_unused_fd(fd); - return err; - } + fd_install(fd, f); + return take_fd(fd); +} - f = dentry_open(&path, O_RDONLY, current_cred()); - path_put(&path); - if (IS_ERR(f)) { - put_unused_fd(fd); - fd = PTR_ERR(f); - } else - fd_install(fd, f); +int open_related_ns(struct ns_common *ns, + struct ns_common *(*get_ns)(struct ns_common *ns)) +{ + struct ns_common *relative; + + relative = get_ns(ns); + if (IS_ERR(relative)) + return PTR_ERR(relative); - return fd; + return open_namespace(relative); } EXPORT_SYMBOL_GPL(open_related_ns); @@ -123,9 +132,12 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { struct user_namespace *user_ns; + struct pid_namespace *pid_ns; + struct task_struct *tsk; struct ns_common *ns = get_proc_ns(file_inode(filp)); uid_t __user *argp; uid_t uid; + int ret; switch (ioctl) { case NS_GET_USERNS: @@ -143,9 +155,69 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl, argp = (uid_t __user *) arg; uid = from_kuid_munged(current_user_ns(), user_ns->owner); return put_user(uid, argp); + case NS_GET_MNTNS_ID: { + struct mnt_namespace *mnt_ns; + __u64 __user *idp; + __u64 id; + + if (ns->ops->type != CLONE_NEWNS) + return -EINVAL; + + mnt_ns = container_of(ns, struct mnt_namespace, ns); + idp = (__u64 __user *)arg; + id = mnt_ns->seq; + return put_user(id, idp); + } + case NS_GET_PID_FROM_PIDNS: + fallthrough; + case NS_GET_TGID_FROM_PIDNS: + fallthrough; + case NS_GET_PID_IN_PIDNS: + fallthrough; + case NS_GET_TGID_IN_PIDNS: + if (ns->ops->type != CLONE_NEWPID) + return -EINVAL; + + ret = -ESRCH; + pid_ns = container_of(ns, struct pid_namespace, ns); + + rcu_read_lock(); + + if (ioctl == NS_GET_PID_IN_PIDNS || + ioctl == NS_GET_TGID_IN_PIDNS) + tsk = find_task_by_vpid(arg); + else + tsk = find_task_by_pid_ns(arg, pid_ns); + if (!tsk) + break; + + switch (ioctl) { + case NS_GET_PID_FROM_PIDNS: + ret = task_pid_vnr(tsk); + break; + case NS_GET_TGID_FROM_PIDNS: + ret = task_tgid_vnr(tsk); + break; + case NS_GET_PID_IN_PIDNS: + ret = task_pid_nr_ns(tsk, pid_ns); + break; + case NS_GET_TGID_IN_PIDNS: + ret = task_tgid_nr_ns(tsk, pid_ns); + break; + default: + ret = 0; + break; + } + rcu_read_unlock(); + + if (!ret) + ret = -ESRCH; + break; default: - return -ENOTTY; + ret = -ENOTTY; } + + return ret; } int ns_get_name(char *buf, size_t size, struct task_struct *task, diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 27fbde2701b6..c5b688c5f984 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -259,8 +259,8 @@ enum Opt { // clang-format off static const struct fs_parameter_spec ntfs_fs_parameters[] = { - fsparam_u32("uid", Opt_uid), - fsparam_u32("gid", Opt_gid), + fsparam_uid("uid", Opt_uid), + fsparam_gid("gid", Opt_gid), fsparam_u32oct("umask", Opt_umask), fsparam_u32oct("dmask", Opt_dmask), fsparam_u32oct("fmask", Opt_fmask), @@ -319,14 +319,10 @@ static int ntfs_fs_parse_param(struct fs_context *fc, switch (opt) { case Opt_uid: - opts->fs_uid = make_kuid(current_user_ns(), result.uint_32); - if (!uid_valid(opts->fs_uid)) - return invalf(fc, "ntfs3: Invalid value for uid."); + opts->fs_uid = result.uid; break; case Opt_gid: - opts->fs_gid = make_kgid(current_user_ns(), result.uint_32); - if (!gid_valid(opts->fs_gid)) - return invalf(fc, "ntfs3: Invalid value for gid."); + opts->fs_gid = result.gid; break; case Opt_umask: if (result.uint_32 & ~07777) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index f0467d3b3c88..6be175a1ab3c 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2366,6 +2366,11 @@ static int ocfs2_dio_end_io_write(struct inode *inode, } list_for_each_entry(ue, &dwc->dw_zero_list, ue_node) { + ret = ocfs2_assure_trans_credits(handle, credits); + if (ret < 0) { + mlog_errno(ret); + break; + } ret = ocfs2_mark_extent_written(inode, &et, handle, ue->ue_cpos, 1, ue->ue_phys, diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 604fea3a26ff..530fba34f6d3 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -446,6 +446,23 @@ bail: } /* + * Make sure handle has at least 'nblocks' credits available. If it does not + * have that many credits available, we will try to extend the handle to have + * enough credits. If that fails, we will restart transaction to have enough + * credits. Similar notes regarding data consistency and locking implications + * as for ocfs2_extend_trans() apply here. + */ +int ocfs2_assure_trans_credits(handle_t *handle, int nblocks) +{ + int old_nblks = jbd2_handle_buffer_credits(handle); + + trace_ocfs2_assure_trans_credits(old_nblks); + if (old_nblks >= nblocks) + return 0; + return ocfs2_extend_trans(handle, nblocks - old_nblks); +} + +/* * If we have fewer than thresh credits, extend by OCFS2_MAX_TRANS_DATA. * If that fails, restart the transaction & regain write access for the * buffer head which is used for metadata modifications. @@ -479,12 +496,6 @@ bail: return status; } - -struct ocfs2_triggers { - struct jbd2_buffer_trigger_type ot_triggers; - int ot_offset; -}; - static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger_type *triggers) { return container_of(triggers, struct ocfs2_triggers, ot_triggers); @@ -548,85 +559,76 @@ static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers, static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers, struct buffer_head *bh) { + struct ocfs2_triggers *ot = to_ocfs2_trigger(triggers); + mlog(ML_ERROR, "ocfs2_abort_trigger called by JBD2. bh = 0x%lx, " "bh->b_blocknr = %llu\n", (unsigned long)bh, (unsigned long long)bh->b_blocknr); - ocfs2_error(bh->b_assoc_map->host->i_sb, + ocfs2_error(ot->sb, "JBD2 has aborted our journal, ocfs2 cannot continue\n"); } -static struct ocfs2_triggers di_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, - .ot_offset = offsetof(struct ocfs2_dinode, i_check), -}; - -static struct ocfs2_triggers eb_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, - .ot_offset = offsetof(struct ocfs2_extent_block, h_check), -}; - -static struct ocfs2_triggers rb_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, - .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check), -}; - -static struct ocfs2_triggers gd_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, - .ot_offset = offsetof(struct ocfs2_group_desc, bg_check), -}; - -static struct ocfs2_triggers db_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_db_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, -}; +static void ocfs2_setup_csum_triggers(struct super_block *sb, + enum ocfs2_journal_trigger_type type, + struct ocfs2_triggers *ot) +{ + BUG_ON(type >= OCFS2_JOURNAL_TRIGGER_COUNT); -static struct ocfs2_triggers xb_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, - .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check), -}; + switch (type) { + case OCFS2_JTR_DI: + ot->ot_triggers.t_frozen = ocfs2_frozen_trigger; + ot->ot_offset = offsetof(struct ocfs2_dinode, i_check); + break; + case OCFS2_JTR_EB: + ot->ot_triggers.t_frozen = ocfs2_frozen_trigger; + ot->ot_offset = offsetof(struct ocfs2_extent_block, h_check); + break; + case OCFS2_JTR_RB: + ot->ot_triggers.t_frozen = ocfs2_frozen_trigger; + ot->ot_offset = offsetof(struct ocfs2_refcount_block, rf_check); + break; + case OCFS2_JTR_GD: + ot->ot_triggers.t_frozen = ocfs2_frozen_trigger; + ot->ot_offset = offsetof(struct ocfs2_group_desc, bg_check); + break; + case OCFS2_JTR_DB: + ot->ot_triggers.t_frozen = ocfs2_db_frozen_trigger; + break; + case OCFS2_JTR_XB: + ot->ot_triggers.t_frozen = ocfs2_frozen_trigger; + ot->ot_offset = offsetof(struct ocfs2_xattr_block, xb_check); + break; + case OCFS2_JTR_DQ: + ot->ot_triggers.t_frozen = ocfs2_dq_frozen_trigger; + break; + case OCFS2_JTR_DR: + ot->ot_triggers.t_frozen = ocfs2_frozen_trigger; + ot->ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check); + break; + case OCFS2_JTR_DL: + ot->ot_triggers.t_frozen = ocfs2_frozen_trigger; + ot->ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check); + break; + case OCFS2_JTR_NONE: + /* To make compiler happy... */ + return; + } -static struct ocfs2_triggers dq_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_dq_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, -}; + ot->ot_triggers.t_abort = ocfs2_abort_trigger; + ot->sb = sb; +} -static struct ocfs2_triggers dr_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, - .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check), -}; +void ocfs2_initialize_journal_triggers(struct super_block *sb, + struct ocfs2_triggers triggers[]) +{ + enum ocfs2_journal_trigger_type type; -static struct ocfs2_triggers dl_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, - .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check), -}; + for (type = OCFS2_JTR_DI; type < OCFS2_JOURNAL_TRIGGER_COUNT; type++) + ocfs2_setup_csum_triggers(sb, type, &triggers[type]); +} static int __ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci, @@ -708,56 +710,91 @@ static int __ocfs2_journal_access(handle_t *handle, int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &di_triggers, type); + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_DI], + type); } int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &eb_triggers, type); + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_EB], + type); } int ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &rb_triggers, + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_RB], type); } int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &gd_triggers, type); + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_GD], + type); } int ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &db_triggers, type); + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_DB], + type); } int ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &xb_triggers, type); + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_XB], + type); } int ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &dq_triggers, type); + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_DQ], + type); } int ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &dr_triggers, type); + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_DR], + type); } int ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &dl_triggers, type); + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_DL], + type); } int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci, @@ -778,13 +815,15 @@ void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh) if (!is_handle_aborted(handle)) { journal_t *journal = handle->h_transaction->t_journal; - mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed. " - "Aborting transaction and journal.\n"); + mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed: " + "handle type %u started at line %u, credits %u/%u " + "errcode %d. Aborting transaction and journal.\n", + handle->h_type, handle->h_line_no, + handle->h_requested_credits, + jbd2_handle_buffer_credits(handle), status); handle->h_err = status; jbd2_journal_abort_handle(handle); jbd2_journal_abort(journal, status); - ocfs2_abort(bh->b_assoc_map->host->i_sb, - "Journal already aborted.\n"); } } } diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 41c9fe7e62f9..e3c3a35dc5e0 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -243,6 +243,8 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int ocfs2_commit_trans(struct ocfs2_super *osb, handle_t *handle); int ocfs2_extend_trans(handle_t *handle, int nblocks); +int ocfs2_assure_trans_credits(handle_t *handle, + int nblocks); int ocfs2_allocate_extend_trans(handle_t *handle, int thresh); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index a503c553bab2..8fe826143d7b 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -284,6 +284,30 @@ enum ocfs2_mount_options #define OCFS2_OSB_ERROR_FS 0x0004 #define OCFS2_DEFAULT_ATIME_QUANTUM 60 +struct ocfs2_triggers { + struct jbd2_buffer_trigger_type ot_triggers; + int ot_offset; + struct super_block *sb; +}; + +enum ocfs2_journal_trigger_type { + OCFS2_JTR_DI, + OCFS2_JTR_EB, + OCFS2_JTR_RB, + OCFS2_JTR_GD, + OCFS2_JTR_DB, + OCFS2_JTR_XB, + OCFS2_JTR_DQ, + OCFS2_JTR_DR, + OCFS2_JTR_DL, + OCFS2_JTR_NONE /* This must be the last entry */ +}; + +#define OCFS2_JOURNAL_TRIGGER_COUNT OCFS2_JTR_NONE + +void ocfs2_initialize_journal_triggers(struct super_block *sb, + struct ocfs2_triggers triggers[]); + struct ocfs2_journal; struct ocfs2_slot_info; struct ocfs2_recovery_map; @@ -351,6 +375,9 @@ struct ocfs2_super struct ocfs2_journal *journal; unsigned long osb_commit_interval; + /* Journal triggers for checksum */ + struct ocfs2_triggers s_journal_triggers[OCFS2_JOURNAL_TRIGGER_COUNT]; + struct delayed_work la_enable_wq; /* diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h index 60e208b01c8d..0511c69c9fde 100644 --- a/fs/ocfs2/ocfs2_trace.h +++ b/fs/ocfs2/ocfs2_trace.h @@ -2577,6 +2577,8 @@ DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_commit_cache_end); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_extend_trans); +DEFINE_OCFS2_INT_EVENT(ocfs2_assure_trans_credits); + DEFINE_OCFS2_INT_EVENT(ocfs2_extend_trans_restart); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_allocate_extend_trans); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 8aabaed2c1cb..afee70125ae3 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1075,9 +1075,11 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) debugfs_create_file("fs_state", S_IFREG|S_IRUSR, osb->osb_debug_root, osb, &ocfs2_osb_debug_fops); - if (ocfs2_meta_ecc(osb)) + if (ocfs2_meta_ecc(osb)) { + ocfs2_initialize_journal_triggers(sb, osb->s_journal_triggers); ocfs2_blockcheck_stats_debugfs_install( &osb->osb_ecc_stats, osb->osb_debug_root); + } status = ocfs2_mount_volume(sb); if (status < 0) diff --git a/fs/open.c b/fs/open.c index 89cafb572061..22adbef7ecc2 100644 --- a/fs/open.c +++ b/fs/open.c @@ -202,13 +202,13 @@ long do_sys_ftruncate(unsigned int fd, loff_t length, int small) return error; } -SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length) +SYSCALL_DEFINE2(ftruncate, unsigned int, fd, off_t, length) { return do_sys_ftruncate(fd, length, 1); } #ifdef CONFIG_COMPAT -COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_ulong_t, length) +COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_off_t, length) { return do_sys_ftruncate(fd, length, 1); } @@ -247,6 +247,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); long ret; + loff_t sum; if (offset < 0 || len <= 0) return -EINVAL; @@ -319,8 +320,11 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) return -ENODEV; - /* Check for wrap through zero too */ - if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) + /* Check for wraparound */ + if (check_add_overflow(offset, len, &sum)) + return -EFBIG; + + if (sum > inode->i_sb->s_maxbytes) return -EFBIG; if (!file->f_op->fallocate) @@ -982,12 +986,11 @@ static int do_dentry_open(struct file *f, */ if (f->f_mode & FMODE_WRITE) { /* - * Paired with smp_mb() in collapse_file() to ensure nr_thps - * is up to date and the update to i_writecount by - * get_write_access() is visible. Ensures subsequent insertion - * of THPs into the page cache will fail. + * Depends on full fence from get_write_access() to synchronize + * against collapse_file() regarding i_writecount and nr_thps + * updates. Ensures subsequent insertion of THPs into the page + * cache will fail. */ - smp_mb(); if (filemap_nr_thps(inode->i_mapping)) { struct address_space *mapping = inode->i_mapping; @@ -1004,11 +1007,6 @@ static int do_dentry_open(struct file *f, } } - /* - * Once we return a file with FMODE_OPENED, __fput() will call - * fsnotify_close(), so we need fsnotify_open() here for symmetry. - */ - fsnotify_open(f); return 0; cleanup_all: @@ -1085,8 +1083,19 @@ EXPORT_SYMBOL(file_path); */ int vfs_open(const struct path *path, struct file *file) { + int ret; + file->f_path = *path; - return do_dentry_open(file, NULL); + ret = do_dentry_open(file, NULL); + if (!ret) { + /* + * Once we return a file with FMODE_OPENED, __fput() will call + * fsnotify_close(), so we need fsnotify_open() here for + * symmetry. + */ + fsnotify_open(file); + } + return ret; } struct file *dentry_open(const struct path *path, int flags, @@ -1177,8 +1186,10 @@ struct file *kernel_file_open(const struct path *path, int flags, error = do_dentry_open(f, NULL); if (error) { fput(f); - f = ERR_PTR(error); + return ERR_PTR(error); } + + fsnotify_open(f); return f; } EXPORT_SYMBOL_GPL(kernel_file_open); diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index a7b527ea50d3..26ecda0e4d19 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -471,4 +471,5 @@ static void __exit exit_openprom_fs(void) module_init(init_openprom_fs) module_exit(exit_openprom_fs) +MODULE_DESCRIPTION("OpenPROM filesystem support"); MODULE_LICENSE("GPL"); diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 085912268442..fdb9b65db1de 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -56,7 +56,6 @@ static int orangefs_writepage_locked(struct page *page, ret = wait_for_direct_io(ORANGEFS_IO_WRITE, inode, &off, &iter, wlen, len, wr, NULL, NULL); if (ret < 0) { - SetPageError(page); mapping_set_error(page->mapping, ret); } else { ret = 0; @@ -119,7 +118,6 @@ static int orangefs_writepages_work(struct orangefs_writepages *ow, 0, &wr, NULL, NULL); if (ret < 0) { for (i = 0; i < ow->npages; i++) { - SetPageError(ow->pages[i]); mapping_set_error(ow->pages[i]->mapping, ret); if (PagePrivate(ow->pages[i])) { wrp = (struct orangefs_write_range *) @@ -303,15 +301,10 @@ static int orangefs_read_folio(struct file *file, struct folio *folio) iov_iter_zero(~0U, &iter); /* takes care of potential aliasing */ flush_dcache_folio(folio); - if (ret < 0) { - folio_set_error(folio); - } else { - folio_mark_uptodate(folio); + if (ret > 0) ret = 0; - } - /* unlock the folio after the ->read_folio() routine completes */ - folio_unlock(folio); - return ret; + folio_end_read(folio, ret == 0); + return ret; } static int orangefs_write_begin(struct file *file, diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index b501dc07f922..edcca4beb765 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -274,10 +274,8 @@ orangefs_bufmap_map(struct orangefs_bufmap *bufmap, gossip_err("orangefs error: asked for %d pages, only got %d.\n", bufmap->page_count, ret); - for (i = 0; i < ret; i++) { - SetPageError(bufmap->page_array[i]); + for (i = 0; i < ret; i++) unpin_user_page(bufmap->page_array[i]); - } return -ENOMEM; } diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 116f542442dd..ab65e98a1def 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -1314,10 +1314,6 @@ static int ovl_create_tmpfile(struct file *file, struct dentry *dentry, int flags = file->f_flags | OVL_OPEN_FLAGS; int err; - err = ovl_copy_up(dentry->d_parent); - if (err) - return err; - old_cred = ovl_override_creds(dentry->d_sb); err = ovl_setup_cred_for_create(dentry, inode, mode, old_cred); if (err) @@ -1360,6 +1356,10 @@ static int ovl_tmpfile(struct mnt_idmap *idmap, struct inode *dir, if (!OVL_FS(dentry->d_sb)->tmpfile) return -EOPNOTSUPP; + err = ovl_copy_up(dentry->d_parent); + if (err) + return err; + err = ovl_want_write(dentry); if (err) return err; diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 063409069f56..5868cb222955 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -181,6 +181,10 @@ static int ovl_check_encode_origin(struct dentry *dentry) struct ovl_fs *ofs = OVL_FS(dentry->d_sb); bool decodable = ofs->config.nfs_export; + /* No upper layer? */ + if (!ovl_upper_mnt(ofs)) + return 1; + /* Lower file handle for non-upper non-decodable */ if (!ovl_dentry_upper(dentry) && !decodable) return 1; @@ -209,7 +213,7 @@ static int ovl_check_encode_origin(struct dentry *dentry) * ovl_connect_layer() will try to make origin's layer "connected" by * copying up a "connectable" ancestor. */ - if (d_is_dir(dentry) && ovl_upper_mnt(ofs) && decodable) + if (d_is_dir(dentry) && decodable) return ovl_connect_layer(dentry); /* Lower file handle for indexed and non-upper dir/non-dir */ diff --git a/fs/pidfs.c b/fs/pidfs.c index dbb9d854d1c5..c9cb14181def 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -11,10 +11,16 @@ #include <linux/proc_fs.h> #include <linux/proc_ns.h> #include <linux/pseudo_fs.h> +#include <linux/ptrace.h> #include <linux/seq_file.h> #include <uapi/linux/pidfd.h> +#include <linux/ipc_namespace.h> +#include <linux/time_namespace.h> +#include <linux/utsname.h> +#include <net/net_namespace.h> #include "internal.h" +#include "mount.h" #ifdef CONFIG_PROC_FS /** @@ -108,11 +114,95 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) return poll_flags; } +static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct task_struct *task __free(put_task) = NULL; + struct nsproxy *nsp __free(put_nsproxy) = NULL; + struct pid *pid = pidfd_pid(file); + struct ns_common *ns_common; + + if (arg) + return -EINVAL; + + task = get_pid_task(pid, PIDTYPE_PID); + if (!task) + return -ESRCH; + + scoped_guard(task_lock, task) { + nsp = task->nsproxy; + if (nsp) + get_nsproxy(nsp); + } + if (!nsp) + return -ESRCH; /* just pretend it didn't exist */ + + /* + * We're trying to open a file descriptor to the namespace so perform a + * filesystem cred ptrace check. Also, we mirror nsfs behavior. + */ + if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) + return -EACCES; + + switch (cmd) { + /* Namespaces that hang of nsproxy. */ + case PIDFD_GET_CGROUP_NAMESPACE: + get_cgroup_ns(nsp->cgroup_ns); + ns_common = to_ns_common(nsp->cgroup_ns); + break; + case PIDFD_GET_IPC_NAMESPACE: + get_ipc_ns(nsp->ipc_ns); + ns_common = to_ns_common(nsp->ipc_ns); + break; + case PIDFD_GET_MNT_NAMESPACE: + get_mnt_ns(nsp->mnt_ns); + ns_common = to_ns_common(nsp->mnt_ns); + break; + case PIDFD_GET_NET_NAMESPACE: + ns_common = to_ns_common(nsp->net_ns); + get_net_ns(ns_common); + break; + case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE: + get_pid_ns(nsp->pid_ns_for_children); + ns_common = to_ns_common(nsp->pid_ns_for_children); + break; + case PIDFD_GET_TIME_NAMESPACE: + get_time_ns(nsp->time_ns); + ns_common = to_ns_common(nsp->time_ns); + break; + case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE: + get_time_ns(nsp->time_ns_for_children); + ns_common = to_ns_common(nsp->time_ns_for_children); + break; + case PIDFD_GET_UTS_NAMESPACE: + get_uts_ns(nsp->uts_ns); + ns_common = to_ns_common(nsp->uts_ns); + break; + /* Namespaces that don't hang of nsproxy. */ + case PIDFD_GET_USER_NAMESPACE: + rcu_read_lock(); + ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns))); + rcu_read_unlock(); + break; + case PIDFD_GET_PID_NAMESPACE: + rcu_read_lock(); + ns_common = to_ns_common(get_pid_ns(task_active_pid_ns(task))); + rcu_read_unlock(); + break; + default: + return -ENOIOCTLCMD; + } + + /* open_namespace() unconditionally consumes the reference */ + return open_namespace(ns_common); +} + static const struct file_operations pidfs_file_operations = { .poll = pidfd_poll, #ifdef CONFIG_PROC_FS .show_fdinfo = pidfd_show_fdinfo, #endif + .unlocked_ioctl = pidfd_ioctl, + .compat_ioctl = compat_ptr_ioctl, }; struct pid *pidfd_pid(const struct file *file) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 775ce0bcf08c..c02f1e63f82d 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -202,8 +202,8 @@ int proc_alloc_inum(unsigned int *inum) { int i; - i = ida_simple_get(&proc_inum_ida, 0, UINT_MAX - PROC_DYNAMIC_FIRST + 1, - GFP_KERNEL); + i = ida_alloc_max(&proc_inum_ida, UINT_MAX - PROC_DYNAMIC_FIRST, + GFP_KERNEL); if (i < 0) return i; @@ -213,7 +213,7 @@ int proc_alloc_inum(unsigned int *inum) void proc_free_inum(unsigned int inum) { - ida_simple_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST); + ida_free(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST); } static int proc_misc_d_revalidate(struct dentry *dentry, unsigned int flags) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index f8d35f993fe5..71e5039d940d 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -707,6 +707,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) #ifdef CONFIG_X86_USER_SHADOW_STACK [ilog2(VM_SHADOW_STACK)] = "ss", #endif +#ifdef CONFIG_64BIT + [ilog2(VM_SEALED)] = "sl", +#endif }; size_t i; diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 0a808951b7d3..e133b507ddf3 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -61,7 +61,7 @@ static int show_sb_opts(struct seq_file *m, struct super_block *sb) return security_sb_show_options(m, sb); } -static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt) +static void show_vfsmnt_opts(struct seq_file *m, struct vfsmount *mnt) { static const struct proc_fs_opts mnt_opts[] = { { MNT_NOSUID, ",nosuid" }, @@ -124,7 +124,7 @@ static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt) err = show_sb_opts(m, sb); if (err) goto out; - show_mnt_opts(m, mnt); + show_vfsmnt_opts(m, mnt); if (sb->s_op->show_options) err = sb->s_op->show_options(m, mnt_path.dentry); seq_puts(m, " 0 0\n"); @@ -153,7 +153,7 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt) goto out; seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw"); - show_mnt_opts(m, mnt); + show_vfsmnt_opts(m, mnt); /* Tagged fields ("foo:X" or "bar") */ if (IS_MNT_SHARED(r)) diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index d79841e94428..e399e2dd3a12 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -430,5 +430,6 @@ static void __exit exit_qnx4_fs(void) module_init(init_qnx4_fs) module_exit(exit_qnx4_fs) +MODULE_DESCRIPTION("QNX4 file system"); MODULE_LICENSE("GPL"); diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index d62fbef838b6..4f1735b882b1 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c @@ -694,4 +694,5 @@ static void __exit exit_qnx6_fs(void) module_init(init_qnx6_fs) module_exit(exit_qnx6_fs) +MODULE_DESCRIPTION("QNX6 file system"); MODULE_LICENSE("GPL"); diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 627eb2f72ef3..a2b256dac36e 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2246,9 +2246,7 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags) int cnt; struct quota_info *dqopt = sb_dqopt(sb); - /* s_umount should be held in exclusive mode */ - if (WARN_ON_ONCE(down_read_trylock(&sb->s_umount))) - up_read(&sb->s_umount); + rwsem_assert_held_write(&sb->s_umount); /* Cannot turn off usage accounting without turning off limits, or * suspend quotas and simultaneously turn quotas off. */ @@ -2510,9 +2508,7 @@ int dquot_resume(struct super_block *sb, int type) int ret = 0, cnt; unsigned int flags; - /* s_umount should be held in exclusive mode */ - if (WARN_ON_ONCE(down_read_trylock(&sb->s_umount))) - up_read(&sb->s_umount); + rwsem_assert_held_write(&sb->s_umount); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) diff --git a/fs/readdir.c b/fs/readdir.c index 278bc0254732..d6c82421902a 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -22,8 +22,6 @@ #include <linux/compat.h> #include <linux/uaccess.h> -#include <asm/unaligned.h> - /* * Some filesystems were never converted to '->iterate_shared()' * and their directory iterators want the inode lock held for @@ -72,7 +70,7 @@ int wrap_directory_iterator(struct file *file, EXPORT_SYMBOL(wrap_directory_iterator); /* - * Note the "unsafe_put_user() semantics: we goto a + * Note the "unsafe_put_user()" semantics: we goto a * label for errors. */ #define unsafe_copy_dirent_name(_dst, _src, _len, label) do { \ diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index c1daedc50f4c..9b43a81a6488 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2699,7 +2699,6 @@ fail: } bh = bh->b_this_page; } while (bh != head); - folio_set_error(folio); BUG_ON(folio_test_writeback(folio)); folio_start_writeback(folio); folio_unlock(folio); diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 2cbb92462074..68758b6fed94 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -101,19 +101,15 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos); */ static int romfs_read_folio(struct file *file, struct folio *folio) { - struct page *page = &folio->page; - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; loff_t offset, size; unsigned long fillsize, pos; void *buf; int ret; - buf = kmap(page); - if (!buf) - return -ENOMEM; + buf = kmap_local_folio(folio, 0); - /* 32 bit warning -- but not for us :) */ - offset = page_offset(page); + offset = folio_pos(folio); size = i_size_read(inode); fillsize = 0; ret = 0; @@ -125,20 +121,14 @@ static int romfs_read_folio(struct file *file, struct folio *folio) ret = romfs_dev_read(inode->i_sb, pos, buf, fillsize); if (ret < 0) { - SetPageError(page); fillsize = 0; ret = -EIO; } } - if (fillsize < PAGE_SIZE) - memset(buf + fillsize, 0, PAGE_SIZE - fillsize); - if (ret == 0) - SetPageUptodate(page); - - flush_dcache_page(page); - kunmap(page); - unlock_page(page); + buf = folio_zero_tail(folio, fillsize, buf); + kunmap_local(buf); + folio_end_read(folio, ret == 0); return ret; } diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index bb86fc0641d8..6397fdefd876 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -134,7 +134,7 @@ module_param(enable_oplocks, bool, 0644); MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks. Default: y/Y/1"); module_param(enable_gcm_256, bool, 0644); -MODULE_PARM_DESC(enable_gcm_256, "Enable requesting strongest (256 bit) GCM encryption. Default: n/N/0"); +MODULE_PARM_DESC(enable_gcm_256, "Enable requesting strongest (256 bit) GCM encryption. Default: y/Y/0"); module_param(require_gcm_256, bool, 0644); MODULE_PARM_DESC(require_gcm_256, "Require strongest (256 bit) GCM encryption. Default: n/N/0"); diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 73482734a8d8..a865941724c0 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1494,6 +1494,8 @@ struct cifs_aio_ctx { struct cifs_io_request { struct netfs_io_request rreq; struct cifsFileInfo *cfile; + struct TCP_Server_Info *server; + pid_t pid; }; /* asynchronous read support */ @@ -1504,7 +1506,6 @@ struct cifs_io_subrequest { struct cifs_io_request *req; }; ssize_t got_bytes; - pid_t pid; unsigned int xid; int result; bool have_xid; @@ -1917,8 +1918,8 @@ require use of the stronger protocol */ #define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */ #define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */ -#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP) -#define CIFSSEC_MAX (CIFSSEC_MUST_NTLMV2) +#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP | CIFSSEC_MAY_SEAL) +#define CIFSSEC_MAX (CIFSSEC_MAY_SIGN | CIFSSEC_MUST_KRB5 | CIFSSEC_MAY_SEAL) #define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP) /* ***************************************************************** diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 25e9ab947c17..595c4b673707 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -1345,8 +1345,8 @@ cifs_async_readv(struct cifs_io_subrequest *rdata) if (rc) return rc; - smb->hdr.Pid = cpu_to_le16((__u16)rdata->pid); - smb->hdr.PidHigh = cpu_to_le16((__u16)(rdata->pid >> 16)); + smb->hdr.Pid = cpu_to_le16((__u16)rdata->req->pid); + smb->hdr.PidHigh = cpu_to_le16((__u16)(rdata->req->pid >> 16)); smb->AndXCommand = 0xFF; /* none */ smb->Fid = rdata->req->cfile->fid.netfid; @@ -1689,8 +1689,8 @@ cifs_async_writev(struct cifs_io_subrequest *wdata) if (rc) goto async_writev_out; - smb->hdr.Pid = cpu_to_le16((__u16)wdata->pid); - smb->hdr.PidHigh = cpu_to_le16((__u16)(wdata->pid >> 16)); + smb->hdr.Pid = cpu_to_le16((__u16)wdata->req->pid); + smb->hdr.PidHigh = cpu_to_le16((__u16)(wdata->req->pid >> 16)); smb->AndXCommand = 0xFF; /* none */ smb->Fid = wdata->req->cfile->fid.netfid; diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 9d5c2440abfc..1374635e89fa 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -134,17 +134,15 @@ fail: static bool cifs_clamp_length(struct netfs_io_subrequest *subreq) { struct netfs_io_request *rreq = subreq->rreq; - struct TCP_Server_Info *server; struct cifs_io_subrequest *rdata = container_of(subreq, struct cifs_io_subrequest, subreq); struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq); + struct TCP_Server_Info *server = req->server; struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb); size_t rsize = 0; int rc; rdata->xid = get_xid(); rdata->have_xid = true; - - server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses); rdata->server = server; if (cifs_sb->ctx->rsize == 0) @@ -179,15 +177,8 @@ static void cifs_req_issue_read(struct netfs_io_subrequest *subreq) struct netfs_io_request *rreq = subreq->rreq; struct cifs_io_subrequest *rdata = container_of(subreq, struct cifs_io_subrequest, subreq); struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq); - struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb); - pid_t pid; int rc = 0; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) - pid = req->cfile->pid; - else - pid = current->tgid; // Ummm... This may be a workqueue - cifs_dbg(FYI, "%s: op=%08x[%x] mapping=%p len=%zu/%zu\n", __func__, rreq->debug_id, subreq->debug_index, rreq->mapping, subreq->transferred, subreq->len); @@ -201,16 +192,8 @@ static void cifs_req_issue_read(struct netfs_io_subrequest *subreq) } __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); - rdata->pid = pid; - - rc = adjust_credits(rdata->server, &rdata->credits, rdata->subreq.len); - if (!rc) { - if (rdata->req->cfile->invalidHandle) - rc = -EAGAIN; - else - rc = rdata->server->ops->async_readv(rdata); - } + rc = rdata->server->ops->async_readv(rdata); out: if (rc) netfs_subreq_terminated(subreq, rc, false); @@ -245,11 +228,15 @@ static int cifs_init_request(struct netfs_io_request *rreq, struct file *file) rreq->rsize = cifs_sb->ctx->rsize; rreq->wsize = cifs_sb->ctx->wsize; + req->pid = current->tgid; // Ummm... This may be a workqueue if (file) { open_file = file->private_data; rreq->netfs_priv = file->private_data; req->cfile = cifsFileInfo_get(open_file); + req->server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) + req->pid = req->cfile->pid; } else if (rreq->origin != NETFS_WRITEBACK) { WARN_ON_ONCE(1); return -EIO; @@ -259,35 +246,6 @@ static int cifs_init_request(struct netfs_io_request *rreq, struct file *file) } /* - * Expand the size of a readahead to the size of the rsize, if at least as - * large as a page, allowing for the possibility that rsize is not pow-2 - * aligned. - */ -static void cifs_expand_readahead(struct netfs_io_request *rreq) -{ - unsigned int rsize = rreq->rsize; - loff_t misalignment, i_size = i_size_read(rreq->inode); - - if (rsize < PAGE_SIZE) - return; - - if (rsize < INT_MAX) - rsize = roundup_pow_of_two(rsize); - else - rsize = ((unsigned int)INT_MAX + 1) / 2; - - misalignment = rreq->start & (rsize - 1); - if (misalignment) { - rreq->start -= misalignment; - rreq->len += misalignment; - } - - rreq->len = round_up(rreq->len, rsize); - if (rreq->start < i_size && rreq->len > i_size - rreq->start) - rreq->len = i_size - rreq->start; -} - -/* * Completion of a request operation. */ static void cifs_rreq_done(struct netfs_io_request *rreq) @@ -342,7 +300,6 @@ const struct netfs_request_ops cifs_req_ops = { .init_request = cifs_init_request, .free_request = cifs_free_request, .free_subrequest = cifs_free_subrequest, - .expand_readahead = cifs_expand_readahead, .clamp_length = cifs_clamp_length, .issue_read = cifs_req_issue_read, .done = cifs_rreq_done, @@ -3200,8 +3157,6 @@ static int cifs_swap_rw(struct kiocb *iocb, struct iov_iter *iter) { ssize_t ret; - WARN_ON_ONCE(iov_iter_count(iter) != PAGE_SIZE); - if (iov_iter_rw(iter) == READ) ret = netfs_unbuffered_read_iter_locked(iocb, iter); else diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 3bbac925d076..bc926ab2555b 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -128,12 +128,14 @@ const struct fs_parameter_spec smb3_fs_parameters[] = { fsparam_flag("compress", Opt_compress), fsparam_flag("witness", Opt_witness), + /* Mount options which take uid or gid */ + fsparam_uid("backupuid", Opt_backupuid), + fsparam_gid("backupgid", Opt_backupgid), + fsparam_uid("uid", Opt_uid), + fsparam_uid("cruid", Opt_cruid), + fsparam_gid("gid", Opt_gid), + /* Mount options which take numeric value */ - fsparam_u32("backupuid", Opt_backupuid), - fsparam_u32("backupgid", Opt_backupgid), - fsparam_u32("uid", Opt_uid), - fsparam_u32("cruid", Opt_cruid), - fsparam_u32("gid", Opt_gid), fsparam_u32("file_mode", Opt_file_mode), fsparam_u32("dirmode", Opt_dirmode), fsparam_u32("dir_mode", Opt_dirmode), @@ -951,8 +953,6 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, int i, opt; bool is_smb3 = !strcmp(fc->fs_type->name, "smb3"); bool skip_parsing = false; - kuid_t uid; - kgid_t gid; cifs_dbg(FYI, "CIFS: parsing cifs mount option '%s'\n", param->key); @@ -1083,38 +1083,23 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, } break; case Opt_uid: - uid = make_kuid(current_user_ns(), result.uint_32); - if (!uid_valid(uid)) - goto cifs_parse_mount_err; - ctx->linux_uid = uid; + ctx->linux_uid = result.uid; ctx->uid_specified = true; break; case Opt_cruid: - uid = make_kuid(current_user_ns(), result.uint_32); - if (!uid_valid(uid)) - goto cifs_parse_mount_err; - ctx->cred_uid = uid; + ctx->cred_uid = result.uid; ctx->cruid_specified = true; break; case Opt_backupuid: - uid = make_kuid(current_user_ns(), result.uint_32); - if (!uid_valid(uid)) - goto cifs_parse_mount_err; - ctx->backupuid = uid; + ctx->backupuid = result.uid; ctx->backupuid_specified = true; break; case Opt_backupgid: - gid = make_kgid(current_user_ns(), result.uint_32); - if (!gid_valid(gid)) - goto cifs_parse_mount_err; - ctx->backupgid = gid; + ctx->backupgid = result.gid; ctx->backupgid_specified = true; break; case Opt_gid: - gid = make_kgid(current_user_ns(), result.uint_32); - if (!gid_valid(gid)) - goto cifs_parse_mount_err; - ctx->linux_gid = gid; + ctx->linux_gid = result.gid; ctx->gid_specified = true; break; case Opt_port: diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 38a06e8a0f90..2ae2dbb6202b 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -4484,6 +4484,16 @@ smb2_new_read_req(void **buf, unsigned int *total_len, return rc; } +static void smb2_readv_worker(struct work_struct *work) +{ + struct cifs_io_subrequest *rdata = + container_of(work, struct cifs_io_subrequest, subreq.work); + + netfs_subreq_terminated(&rdata->subreq, + (rdata->result == 0 || rdata->result == -EAGAIN) ? + rdata->got_bytes : rdata->result, true); +} + static void smb2_readv_callback(struct mid_q_entry *mid) { @@ -4578,9 +4588,8 @@ smb2_readv_callback(struct mid_q_entry *mid) rdata->result = 0; } rdata->credits.value = 0; - netfs_subreq_terminated(&rdata->subreq, - (rdata->result == 0 || rdata->result == -EAGAIN) ? - rdata->got_bytes : rdata->result, true); + INIT_WORK(&rdata->subreq.work, smb2_readv_worker); + queue_work(cifsiod_wq, &rdata->subreq.work); release_mid(mid); add_credits(server, &credits, 0); } @@ -4612,7 +4621,7 @@ smb2_async_readv(struct cifs_io_subrequest *rdata) io_parms.length = rdata->subreq.len; io_parms.persistent_fid = rdata->req->cfile->fid.persistent_fid; io_parms.volatile_fid = rdata->req->cfile->fid.volatile_fid; - io_parms.pid = rdata->pid; + io_parms.pid = rdata->req->pid; rc = smb2_new_read_req( (void **) &buf, &total_len, &io_parms, rdata, 0, 0); @@ -4864,7 +4873,7 @@ smb2_async_writev(struct cifs_io_subrequest *wdata) .length = wdata->subreq.len, .persistent_fid = wdata->req->cfile->fid.persistent_fid, .volatile_fid = wdata->req->cfile->fid.volatile_fid, - .pid = wdata->pid, + .pid = wdata->req->pid, }; io_parms = &_io_parms; diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index 8d10be1fe18a..c3ee42188d25 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -917,6 +917,40 @@ struct smb2_query_directory_rsp { __u8 Buffer[]; } __packed; +/* DeviceType Flags */ +#define FILE_DEVICE_CD_ROM 0x00000002 +#define FILE_DEVICE_CD_ROM_FILE_SYSTEM 0x00000003 +#define FILE_DEVICE_DFS 0x00000006 +#define FILE_DEVICE_DISK 0x00000007 +#define FILE_DEVICE_DISK_FILE_SYSTEM 0x00000008 +#define FILE_DEVICE_FILE_SYSTEM 0x00000009 +#define FILE_DEVICE_NAMED_PIPE 0x00000011 +#define FILE_DEVICE_NETWORK 0x00000012 +#define FILE_DEVICE_NETWORK_FILE_SYSTEM 0x00000014 +#define FILE_DEVICE_NULL 0x00000015 +#define FILE_DEVICE_PARALLEL_PORT 0x00000016 +#define FILE_DEVICE_PRINTER 0x00000018 +#define FILE_DEVICE_SERIAL_PORT 0x0000001b +#define FILE_DEVICE_STREAMS 0x0000001e +#define FILE_DEVICE_TAPE 0x0000001f +#define FILE_DEVICE_TAPE_FILE_SYSTEM 0x00000020 +#define FILE_DEVICE_VIRTUAL_DISK 0x00000024 +#define FILE_DEVICE_NETWORK_REDIRECTOR 0x00000028 + +/* Device Characteristics */ +#define FILE_REMOVABLE_MEDIA 0x00000001 +#define FILE_READ_ONLY_DEVICE 0x00000002 +#define FILE_FLOPPY_DISKETTE 0x00000004 +#define FILE_WRITE_ONCE_MEDIA 0x00000008 +#define FILE_REMOTE_DEVICE 0x00000010 +#define FILE_DEVICE_IS_MOUNTED 0x00000020 +#define FILE_VIRTUAL_VOLUME 0x00000040 +#define FILE_DEVICE_SECURE_OPEN 0x00000100 +#define FILE_CHARACTERISTIC_TS_DEVICE 0x00001000 +#define FILE_CHARACTERISTIC_WEBDAV_DEVICE 0x00002000 +#define FILE_PORTABLE_DEVICE 0x00004000 +#define FILE_DEVICE_ALLOW_APPCONTAINER_TRAVERSAL 0x00020000 + /* * Maximum number of iovs we need for a set-info request. * The largest one is rename/hardlink diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index e7e07891781b..840c71c66b30 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2051,15 +2051,22 @@ out_err1: * @access: file access flags * @disposition: file disposition flags * @may_flags: set with MAY_ flags + * @is_dir: is creating open flags for directory * * Return: file open flags */ static int smb2_create_open_flags(bool file_present, __le32 access, __le32 disposition, - int *may_flags) + int *may_flags, + bool is_dir) { int oflags = O_NONBLOCK | O_LARGEFILE; + if (is_dir) { + access &= ~FILE_WRITE_DESIRE_ACCESS_LE; + ksmbd_debug(SMB, "Discard write access to a directory\n"); + } + if (access & FILE_READ_DESIRED_ACCESS_LE && access & FILE_WRITE_DESIRE_ACCESS_LE) { oflags |= O_RDWR; @@ -3167,7 +3174,9 @@ int smb2_open(struct ksmbd_work *work) open_flags = smb2_create_open_flags(file_present, daccess, req->CreateDisposition, - &may_flags); + &may_flags, + req->CreateOptions & FILE_DIRECTORY_FILE_LE || + (file_present && S_ISDIR(d_inode(path.dentry)->i_mode))); if (!test_tree_conn_flag(tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) { if (open_flags & (O_CREAT | O_TRUNC)) { @@ -5314,8 +5323,13 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, info = (struct filesystem_device_info *)rsp->Buffer; - info->DeviceType = cpu_to_le32(stfs.f_type); - info->DeviceCharacteristics = cpu_to_le32(0x00000020); + info->DeviceType = cpu_to_le32(FILE_DEVICE_DISK); + info->DeviceCharacteristics = + cpu_to_le32(FILE_DEVICE_IS_MOUNTED); + if (!test_tree_conn_flag(work->tcon, + KSMBD_TREE_CONN_FLAG_WRITABLE)) + info->DeviceCharacteristics |= + cpu_to_le32(FILE_READ_ONLY_DEVICE); rsp->OutputBufferLength = cpu_to_le32(8); break; } diff --git a/fs/stat.c b/fs/stat.c index 70bd3e888cfa..6f65b3456cad 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -214,6 +214,43 @@ int getname_statx_lookup_flags(int flags) return lookup_flags; } +static int vfs_statx_path(struct path *path, int flags, struct kstat *stat, + u32 request_mask) +{ + int error = vfs_getattr(path, stat, request_mask, flags); + + if (request_mask & STATX_MNT_ID_UNIQUE) { + stat->mnt_id = real_mount(path->mnt)->mnt_id_unique; + stat->result_mask |= STATX_MNT_ID_UNIQUE; + } else { + stat->mnt_id = real_mount(path->mnt)->mnt_id; + stat->result_mask |= STATX_MNT_ID; + } + + if (path_mounted(path)) + stat->attributes |= STATX_ATTR_MOUNT_ROOT; + stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT; + + /* Handle STATX_DIOALIGN for block devices. */ + if (request_mask & STATX_DIOALIGN) { + struct inode *inode = d_backing_inode(path->dentry); + + if (S_ISBLK(inode->i_mode)) + bdev_statx_dioalign(inode, stat); + } + + return error; +} + +static int vfs_statx_fd(int fd, int flags, struct kstat *stat, + u32 request_mask) +{ + CLASS(fd_raw, f)(fd); + if (!f.file) + return -EBADF; + return vfs_statx_path(&f.file->f_path, flags, stat, request_mask); +} + /** * vfs_statx - Get basic and extra attributes by filename * @dfd: A file descriptor representing the base dir for a relative filename @@ -243,36 +280,13 @@ static int vfs_statx(int dfd, struct filename *filename, int flags, retry: error = filename_lookup(dfd, filename, lookup_flags, &path, NULL); if (error) - goto out; - - error = vfs_getattr(&path, stat, request_mask, flags); - - if (request_mask & STATX_MNT_ID_UNIQUE) { - stat->mnt_id = real_mount(path.mnt)->mnt_id_unique; - stat->result_mask |= STATX_MNT_ID_UNIQUE; - } else { - stat->mnt_id = real_mount(path.mnt)->mnt_id; - stat->result_mask |= STATX_MNT_ID; - } - - if (path.mnt->mnt_root == path.dentry) - stat->attributes |= STATX_ATTR_MOUNT_ROOT; - stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT; - - /* Handle STATX_DIOALIGN for block devices. */ - if (request_mask & STATX_DIOALIGN) { - struct inode *inode = d_backing_inode(path.dentry); - - if (S_ISBLK(inode->i_mode)) - bdev_statx_dioalign(inode, stat); - } - + return error; + error = vfs_statx_path(&path, flags, stat, request_mask); path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } -out: return error; } @@ -289,18 +303,10 @@ int vfs_fstatat(int dfd, const char __user *filename, * If AT_EMPTY_PATH is set, we expect the common case to be that * empty path, and avoid doing all the extra pathname work. */ - if (dfd >= 0 && flags == AT_EMPTY_PATH) { - char c; - - ret = get_user(c, filename); - if (unlikely(ret)) - return ret; - - if (likely(!c)) - return vfs_fstat(dfd, stat); - } + if (flags == AT_EMPTY_PATH && vfs_empty_path(dfd, filename)) + return vfs_fstat(dfd, stat); - name = getname_flags(filename, getname_statx_lookup_flags(statx_flags), NULL); + name = getname_flags(filename, getname_statx_lookup_flags(statx_flags)); ret = vfs_statx(dfd, name, statx_flags, stat, STATX_BASIC_STATS); putname(name); @@ -488,34 +494,39 @@ static int do_readlinkat(int dfd, const char __user *pathname, char __user *buf, int bufsiz) { struct path path; + struct filename *name; int error; - int empty = 0; unsigned int lookup_flags = LOOKUP_EMPTY; if (bufsiz <= 0) return -EINVAL; retry: - error = user_path_at_empty(dfd, pathname, lookup_flags, &path, &empty); - if (!error) { - struct inode *inode = d_backing_inode(path.dentry); - - error = empty ? -ENOENT : -EINVAL; - /* - * AFS mountpoints allow readlink(2) but are not symlinks - */ - if (d_is_symlink(path.dentry) || inode->i_op->readlink) { - error = security_inode_readlink(path.dentry); - if (!error) { - touch_atime(&path); - error = vfs_readlink(path.dentry, buf, bufsiz); - } - } - path_put(&path); - if (retry_estale(error, lookup_flags)) { - lookup_flags |= LOOKUP_REVAL; - goto retry; + name = getname_flags(pathname, lookup_flags); + error = filename_lookup(dfd, name, lookup_flags, &path, NULL); + if (unlikely(error)) { + putname(name); + return error; + } + + /* + * AFS mountpoints allow readlink(2) but are not symlinks + */ + if (d_is_symlink(path.dentry) || + d_backing_inode(path.dentry)->i_op->readlink) { + error = security_inode_readlink(path.dentry); + if (!error) { + touch_atime(&path); + error = vfs_readlink(path.dentry, buf, bufsiz); } + } else { + error = (name->name[0] == '\0') ? -ENOENT : -EINVAL; + } + path_put(&path); + putname(name); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; } return error; } @@ -674,7 +685,8 @@ int do_statx(int dfd, struct filename *filename, unsigned int flags, if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE) return -EINVAL; - /* STATX_CHANGE_COOKIE is kernel-only for now. Ignore requests + /* + * STATX_CHANGE_COOKIE is kernel-only for now. Ignore requests * from userland. */ mask &= ~STATX_CHANGE_COOKIE; @@ -686,16 +698,41 @@ int do_statx(int dfd, struct filename *filename, unsigned int flags, return cp_statx(&stat, buffer); } +int do_statx_fd(int fd, unsigned int flags, unsigned int mask, + struct statx __user *buffer) +{ + struct kstat stat; + int error; + + if (mask & STATX__RESERVED) + return -EINVAL; + if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE) + return -EINVAL; + + /* + * STATX_CHANGE_COOKIE is kernel-only for now. Ignore requests + * from userland. + */ + mask &= ~STATX_CHANGE_COOKIE; + + error = vfs_statx_fd(fd, flags, &stat, mask); + if (error) + return error; + + return cp_statx(&stat, buffer); +} + /** * sys_statx - System call to get enhanced stats * @dfd: Base directory to pathwalk from *or* fd to stat. - * @filename: File to stat or "" with AT_EMPTY_PATH + * @filename: File to stat or either NULL or "" with AT_EMPTY_PATH * @flags: AT_* flags to control pathwalk. * @mask: Parts of statx struct actually required. * @buffer: Result buffer. * * Note that fstat() can be emulated by setting dfd to the fd of interest, - * supplying "" as the filename and setting AT_EMPTY_PATH in the flags. + * supplying "" (or preferably NULL) as the filename and setting AT_EMPTY_PATH + * in the flags. */ SYSCALL_DEFINE5(statx, int, dfd, const char __user *, filename, unsigned, flags, @@ -703,9 +740,24 @@ SYSCALL_DEFINE5(statx, struct statx __user *, buffer) { int ret; + unsigned lflags; struct filename *name; - name = getname_flags(filename, getname_statx_lookup_flags(flags), NULL); + /* + * Short-circuit handling of NULL and "" paths. + * + * For a NULL path we require and accept only the AT_EMPTY_PATH flag + * (possibly |'d with AT_STATX flags). + * + * However, glibc on 32-bit architectures implements fstatat as statx + * with the "" pathname and AT_NO_AUTOMOUNT | AT_EMPTY_PATH flags. + * Supporting this results in the uglification below. + */ + lflags = flags & ~(AT_NO_AUTOMOUNT | AT_STATX_SYNC_TYPE); + if (lflags == AT_EMPTY_PATH && vfs_empty_path(dfd, filename)) + return do_statx_fd(dfd, flags & ~AT_NO_AUTOMOUNT, mask, buffer); + + name = getname_flags(filename, getname_statx_lookup_flags(flags)); ret = do_statx(dfd, name, flags, mask, buffer); putname(name); diff --git a/fs/super.c b/fs/super.c index b72f1d288e95..095ba793e10c 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1502,8 +1502,17 @@ static int fs_bdev_thaw(struct block_device *bdev) lockdep_assert_held(&bdev->bd_fsfreeze_mutex); + /* + * The block device may have been frozen before it was claimed by a + * filesystem. Concurrently another process might try to mount that + * frozen block device and has temporarily claimed the block device for + * that purpose causing a concurrent fs_bdev_thaw() to end up here. The + * mounter is already about to abort mounting because they still saw an + * elevanted bdev->bd_fsfreeze_count so get_bdev_super() will return + * NULL in that case. + */ sb = get_bdev_super(bdev); - if (WARN_ON_ONCE(!sb)) + if (!sb) return -EINVAL; if (sb->s_op->thaw_super) diff --git a/fs/sysv/super.c b/fs/sysv/super.c index 3365a30dc1e0..5c0d07ddbda2 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -591,4 +591,5 @@ static void __exit exit_sysv_fs(void) module_init(init_sysv_fs) module_exit(exit_sysv_fs) +MODULE_DESCRIPTION("SystemV Filesystem"); MODULE_LICENSE("GPL"); diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 7c29f4afc23d..1028ab6d9a74 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -296,9 +296,9 @@ enum { }; static const struct fs_parameter_spec tracefs_param_specs[] = { - fsparam_u32 ("gid", Opt_gid), + fsparam_gid ("gid", Opt_gid), fsparam_u32oct ("mode", Opt_mode), - fsparam_u32 ("uid", Opt_uid), + fsparam_uid ("uid", Opt_uid), {} }; @@ -306,8 +306,6 @@ static int tracefs_parse_param(struct fs_context *fc, struct fs_parameter *param { struct tracefs_fs_info *opts = fc->s_fs_info; struct fs_parse_result result; - kuid_t uid; - kgid_t gid; int opt; opt = fs_parse(fc, tracefs_param_specs, param, &result); @@ -316,16 +314,10 @@ static int tracefs_parse_param(struct fs_context *fc, struct fs_parameter *param switch (opt) { case Opt_uid: - uid = make_kuid(current_user_ns(), result.uint_32); - if (!uid_valid(uid)) - return invalf(fc, "Unknown uid"); - opts->uid = uid; + opts->uid = result.uid; break; case Opt_gid: - gid = make_kgid(current_user_ns(), result.uint_32); - if (!gid_valid(gid)) - return invalf(fc, "Unknown gid"); - opts->gid = gid; + opts->gid = result.gid; break; case Opt_mode: opts->mode = result.uint_32 & S_IALLUGO; diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 27c85d92d1dc..61f25d3cf3f7 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -188,7 +188,6 @@ Eend: "offset=%lu", dir->i_ino, (page->index<<PAGE_SHIFT)+offs); fail: - SetPageError(page); return false; } diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index eee7320ab0b0..17e409ceaa33 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -2057,7 +2057,7 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, goto out; features = uffdio_api.features; ret = -EINVAL; - if (uffdio_api.api != UFFD_API || (features & ~UFFD_API_FEATURES)) + if (uffdio_api.api != UFFD_API) goto err_out; ret = -EPERM; if ((features & UFFD_FEATURE_EVENT_FORK) && !capable(CAP_SYS_PTRACE)) @@ -2081,6 +2081,11 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED; uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC; #endif + + ret = -EINVAL; + if (features & ~uffdio_api.features) + goto err_out; + uffdio_api.ioctls = UFFD_API_IOCTLS; ret = -EFAULT; if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api))) diff --git a/fs/vboxsf/file.c b/fs/vboxsf/file.c index 118dedef8ebe..fdb4da24d662 100644 --- a/fs/vboxsf/file.c +++ b/fs/vboxsf/file.c @@ -228,26 +228,19 @@ const struct inode_operations vboxsf_reg_iops = { static int vboxsf_read_folio(struct file *file, struct folio *folio) { - struct page *page = &folio->page; struct vboxsf_handle *sf_handle = file->private_data; - loff_t off = page_offset(page); + loff_t off = folio_pos(folio); u32 nread = PAGE_SIZE; u8 *buf; int err; - buf = kmap(page); + buf = kmap_local_folio(folio, 0); err = vboxsf_read(sf_handle->root, sf_handle->handle, off, &nread, buf); - if (err == 0) { - memset(&buf[nread], 0, PAGE_SIZE - nread); - flush_dcache_page(page); - SetPageUptodate(page); - } else { - SetPageError(page); - } + buf = folio_zero_tail(folio, nread, buf + nread); - kunmap(page); - unlock_page(page); + kunmap_local(buf); + folio_end_read(folio, err == 0); return err; } @@ -295,7 +288,6 @@ static int vboxsf_writepage(struct page *page, struct writeback_control *wbc) kref_put(&sf_handle->refcount, vboxsf_handle_release); if (err == 0) { - ClearPageError(page); /* mtime changed */ sf_i->force_restat = 1; } else { diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c index ffb1d565da39..e95b8a48d8a0 100644 --- a/fs/vboxsf/super.c +++ b/fs/vboxsf/super.c @@ -41,8 +41,8 @@ enum { opt_nls, opt_uid, opt_gid, opt_ttl, opt_dmode, opt_fmode, static const struct fs_parameter_spec vboxsf_fs_parameters[] = { fsparam_string ("nls", opt_nls), - fsparam_u32 ("uid", opt_uid), - fsparam_u32 ("gid", opt_gid), + fsparam_uid ("uid", opt_uid), + fsparam_gid ("gid", opt_gid), fsparam_u32 ("ttl", opt_ttl), fsparam_u32oct ("dmode", opt_dmode), fsparam_u32oct ("fmode", opt_fmode), @@ -55,8 +55,6 @@ static int vboxsf_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct vboxsf_fs_context *ctx = fc->fs_private; struct fs_parse_result result; - kuid_t uid; - kgid_t gid; int opt; opt = fs_parse(fc, vboxsf_fs_parameters, param, &result); @@ -73,16 +71,10 @@ static int vboxsf_parse_param(struct fs_context *fc, struct fs_parameter *param) param->string = NULL; break; case opt_uid: - uid = make_kuid(current_user_ns(), result.uint_32); - if (!uid_valid(uid)) - return -EINVAL; - ctx->o.uid = uid; + ctx->o.uid = result.uid; break; case opt_gid: - gid = make_kgid(current_user_ns(), result.uint_32); - if (!gid_valid(gid)) - return -EINVAL; - ctx->o.gid = gid; + ctx->o.gid = result.gid; break; case opt_ttl: ctx->o.ttl = msecs_to_jiffies(result.uint_32); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index c101cf266bc4..6af6f744fdd6 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4058,20 +4058,32 @@ xfs_bmapi_reserve_delalloc( xfs_extlen_t indlen; uint64_t fdblocks; int error; - xfs_fileoff_t aoff = off; + xfs_fileoff_t aoff; + bool use_cowextszhint = + whichfork == XFS_COW_FORK && !prealloc; +retry: /* * Cap the alloc length. Keep track of prealloc so we know whether to * tag the inode before we return. */ + aoff = off; alen = XFS_FILBLKS_MIN(len + prealloc, XFS_MAX_BMBT_EXTLEN); if (!eof) alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff); if (prealloc && alen >= len) prealloc = alen - len; - /* Figure out the extent size, adjust alen */ - if (whichfork == XFS_COW_FORK) { + /* + * If we're targetting the COW fork but aren't creating a speculative + * posteof preallocation, try to expand the reservation to align with + * the COW extent size hint if there's sufficient free space. + * + * Unlike the data fork, the CoW cancellation functions will free all + * the reservations at inactivation, so we don't require that every + * delalloc reservation have a dirty pagecache. + */ + if (use_cowextszhint) { struct xfs_bmbt_irec prev; xfs_extlen_t extsz = xfs_get_cowextsz_hint(ip); @@ -4090,7 +4102,7 @@ xfs_bmapi_reserve_delalloc( */ error = xfs_quota_reserve_blkres(ip, alen); if (error) - return error; + goto out; /* * Split changing sb for alen and indlen since they could be coming @@ -4140,6 +4152,17 @@ out_unreserve_frextents: out_unreserve_quota: if (XFS_IS_QUOTA_ON(mp)) xfs_quota_unreserve_blkres(ip, alen); +out: + if (error == -ENOSPC || error == -EDQUOT) { + trace_xfs_delalloc_enospc(ip, off, len); + + if (prealloc || use_cowextszhint) { + /* retry without any preallocation */ + use_cowextszhint = false; + prealloc = 0; + goto retry; + } + } return error; } diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 97996cb79aaa..454b63ef7201 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -996,7 +996,7 @@ struct xfs_getparents_by_handle { #define XFS_IOC_FSGEOMETRY _IOR ('X', 126, struct xfs_fsop_geom) #define XFS_IOC_BULKSTAT _IOR ('X', 127, struct xfs_bulkstat_req) #define XFS_IOC_INUMBERS _IOR ('X', 128, struct xfs_inumbers_req) -#define XFS_IOC_EXCHANGE_RANGE _IOWR('X', 129, struct xfs_exchange_range) +#define XFS_IOC_EXCHANGE_RANGE _IOW ('X', 129, struct xfs_exchange_range) /* XFS_IOC_GETFSUUID ---------- deprecated 140 */ diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index e7a7bfbe75b4..513b50da6215 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -379,10 +379,13 @@ xfs_dinode_verify_fork( /* * A directory small enough to fit in the inode must be stored * in local format. The directory sf <-> extents conversion - * code updates the directory size accordingly. + * code updates the directory size accordingly. Directories + * being truncated have zero size and are not subject to this + * check. */ if (S_ISDIR(mode)) { - if (be64_to_cpu(dip->di_size) <= fork_size && + if (dip->di_size && + be64_to_cpu(dip->di_size) <= fork_size && fork_format != XFS_DINODE_FMT_LOCAL) return __this_address; } @@ -528,9 +531,19 @@ xfs_dinode_verify( if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN) return __this_address; - /* No zero-length symlinks/dirs. */ - if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) - return __this_address; + /* + * No zero-length symlinks/dirs unless they're unlinked and hence being + * inactivated. + */ + if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) { + if (dip->di_version > 1) { + if (dip->di_nlink) + return __this_address; + } else { + if (dip->di_onlink) + return __this_address; + } + } fa = xfs_dinode_verify_nrext64(mp, dip); if (fa) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index ac2e77ebb54c..a4d9fbc21b83 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -486,13 +486,11 @@ out_unlock: /* * Test whether it is appropriate to check an inode for and free post EOF - * blocks. The 'force' parameter determines whether we should also consider - * regular files that are marked preallocated or append-only. + * blocks. */ bool xfs_can_free_eofblocks( - struct xfs_inode *ip, - bool force) + struct xfs_inode *ip) { struct xfs_bmbt_irec imap; struct xfs_mount *mp = ip->i_mount; @@ -526,11 +524,11 @@ xfs_can_free_eofblocks( return false; /* - * Do not free real preallocated or append-only files unless the file - * has delalloc blocks and we are forced to remove them. + * Only free real extents for inodes with persistent preallocations or + * the append-only flag. */ if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) - if (!force || ip->i_delayed_blks == 0) + if (ip->i_delayed_blks == 0) return false; /* @@ -584,6 +582,22 @@ xfs_free_eofblocks( /* Wait on dio to ensure i_size has settled. */ inode_dio_wait(VFS_I(ip)); + /* + * For preallocated files only free delayed allocations. + * + * Note that this means we also leave speculative preallocations in + * place for preallocated files. + */ + if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) { + if (ip->i_delayed_blks) { + xfs_bmap_punch_delalloc_range(ip, + round_up(XFS_ISIZE(ip), mp->m_sb.sb_blocksize), + LLONG_MAX); + } + xfs_inode_clear_eofblocks_tag(ip); + return 0; + } + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); if (error) { ASSERT(xfs_is_shutdown(mp)); @@ -891,7 +905,7 @@ xfs_prepare_shift( * Trim eofblocks to avoid shifting uninitialized post-eof preallocation * into the accessible region of the file. */ - if (xfs_can_free_eofblocks(ip, true)) { + if (xfs_can_free_eofblocks(ip)) { error = xfs_free_eofblocks(ip); if (error) return error; diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 51f84d8ff372..eb0895bfb9da 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -63,7 +63,7 @@ int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset, xfs_off_t len); /* EOF block manipulation functions */ -bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); +bool xfs_can_free_eofblocks(struct xfs_inode *ip); int xfs_free_eofblocks(struct xfs_inode *ip); int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip, diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 0953163a2d84..cf629302d48e 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -86,9 +86,8 @@ xfs_inode_alloc( return NULL; } - /* VFS doesn't initialise i_mode or i_state! */ + /* VFS doesn't initialise i_mode! */ VFS_I(ip)->i_mode = 0; - VFS_I(ip)->i_state = 0; mapping_set_large_folios(VFS_I(ip)->i_mapping); XFS_STATS_INC(mp, vn_active); @@ -314,6 +313,7 @@ xfs_reinit_inode( dev_t dev = inode->i_rdev; kuid_t uid = inode->i_uid; kgid_t gid = inode->i_gid; + unsigned long state = inode->i_state; error = inode_init_always(mp->m_super, inode); @@ -324,6 +324,7 @@ xfs_reinit_inode( inode->i_rdev = dev; inode->i_uid = uid; inode->i_gid = gid; + inode->i_state = state; mapping_set_large_folios(inode->i_mapping); return error; } @@ -1155,7 +1156,7 @@ xfs_inode_free_eofblocks( } *lockflags |= XFS_IOLOCK_EXCL; - if (xfs_can_free_eofblocks(ip, false)) + if (xfs_can_free_eofblocks(ip)) return xfs_free_eofblocks(ip); /* inode could be preallocated or append-only */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 58fb7a5062e1..a4e3cd8971fc 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -42,6 +42,7 @@ #include "xfs_pnfs.h" #include "xfs_parent.h" #include "xfs_xattr.h" +#include "xfs_sb.h" struct kmem_cache *xfs_inode_cache; @@ -870,9 +871,16 @@ xfs_init_new_inode( * this saves us from needing to run a separate transaction to set the * fork offset in the immediate future. */ - if (init_xattrs && xfs_has_attr(mp)) { + if (init_xattrs) { ip->i_forkoff = xfs_default_attroffset(ip) >> 3; xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0); + + if (!xfs_has_attr(mp)) { + spin_lock(&mp->m_sb_lock); + xfs_add_attr(mp); + spin_unlock(&mp->m_sb_lock); + xfs_log_sb(tp); + } } /* @@ -1595,7 +1603,7 @@ xfs_release( if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) return 0; - if (xfs_can_free_eofblocks(ip, false)) { + if (xfs_can_free_eofblocks(ip)) { /* * Check if the inode is being opened, written and closed * frequently and we have delayed allocation blocks outstanding @@ -1856,15 +1864,13 @@ xfs_inode_needs_inactive( /* * This file isn't being freed, so check if there are post-eof blocks - * to free. @force is true because we are evicting an inode from the - * cache. Post-eof blocks must be freed, lest we end up with broken - * free space accounting. + * to free. * * Note: don't bother with iolock here since lockdep complains about * acquiring it in reclaim context. We have the only reference to the * inode at this point anyways. */ - return xfs_can_free_eofblocks(ip, true); + return xfs_can_free_eofblocks(ip); } /* @@ -1947,15 +1953,11 @@ xfs_inactive( if (VFS_I(ip)->i_nlink != 0) { /* - * force is true because we are evicting an inode from the - * cache. Post-eof blocks must be freed, lest we end up with - * broken free space accounting. - * * Note: don't bother with iolock here since lockdep complains * about acquiring it in reclaim context. We have the only * reference to the inode at this point anyways. */ - if (xfs_can_free_eofblocks(ip, true)) + if (xfs_can_free_eofblocks(ip)) error = xfs_free_eofblocks(ip); goto out; @@ -2548,11 +2550,26 @@ xfs_ifree_cluster( * This buffer may not have been correctly initialised as we * didn't read it from disk. That's not important because we are * only using to mark the buffer as stale in the log, and to - * attach stale cached inodes on it. That means it will never be - * dispatched for IO. If it is, we want to know about it, and we - * want it to fail. We can acheive this by adding a write - * verifier to the buffer. + * attach stale cached inodes on it. + * + * For the inode that triggered the cluster freeing, this + * attachment may occur in xfs_inode_item_precommit() after we + * have marked this buffer stale. If this buffer was not in + * memory before xfs_ifree_cluster() started, it will not be + * marked XBF_DONE and this will cause problems later in + * xfs_inode_item_precommit() when we trip over a (stale, !done) + * buffer to attached to the transaction. + * + * Hence we have to mark the buffer as XFS_DONE here. This is + * safe because we are also marking the buffer as XBF_STALE and + * XFS_BLI_STALE. That means it will never be dispatched for + * IO and it won't be unlocked until the cluster freeing has + * been committed to the journal and the buffer unpinned. If it + * is written, we want to know about it, and we want it to + * fail. We can acheive this by adding a write verifier to the + * buffer. */ + bp->b_flags |= XBF_DONE; bp->b_ops = &xfs_inode_buf_ops; /* diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 378342673925..414903885ab9 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1148,33 +1148,23 @@ xfs_buffered_write_iomap_begin( } } -retry: - error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb, - end_fsb - offset_fsb, prealloc_blocks, - allocfork == XFS_DATA_FORK ? &imap : &cmap, - allocfork == XFS_DATA_FORK ? &icur : &ccur, - allocfork == XFS_DATA_FORK ? eof : cow_eof); - switch (error) { - case 0: - break; - case -ENOSPC: - case -EDQUOT: - /* retry without any preallocation */ - trace_xfs_delalloc_enospc(ip, offset, count); - if (prealloc_blocks) { - prealloc_blocks = 0; - goto retry; - } - fallthrough; - default: - goto out_unlock; - } - if (allocfork == XFS_COW_FORK) { + error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb, + end_fsb - offset_fsb, prealloc_blocks, &cmap, + &ccur, cow_eof); + if (error) + goto out_unlock; + trace_xfs_iomap_alloc(ip, offset, count, allocfork, &cmap); goto found_cow; } + error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb, + end_fsb - offset_fsb, prealloc_blocks, &imap, &icur, + eof); + if (error) + goto out_unlock; + /* * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch * them out if the write happens to fail. diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index ff222827e550..a00dcbc77e12 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -17,6 +17,8 @@ #include "xfs_da_btree.h" #include "xfs_attr.h" #include "xfs_trans.h" +#include "xfs_trans_space.h" +#include "xfs_bmap_btree.h" #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_symlink.h" @@ -811,6 +813,7 @@ xfs_setattr_size( struct xfs_trans *tp; int error; uint lock_flags = 0; + uint resblks = 0; bool did_zeroing = false; xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL); @@ -917,7 +920,17 @@ xfs_setattr_size( return error; } - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); + /* + * For realtime inode with more than one block rtextsize, we need the + * block reservation for bmap btree block allocations/splits that can + * happen since it could split the tail written extent and convert the + * right beyond EOF one to unwritten. + */ + if (xfs_inode_has_bigrtalloc(ip)) + resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); + + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, resblks, + 0, 0, &tp); if (error) return error; diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index b20fa25a7e8d..052e5c98c105 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -46,6 +46,7 @@ mandatory-y += pci.h mandatory-y += percpu.h mandatory-y += pgalloc.h mandatory-y += preempt.h +mandatory-y += runtime-const.h mandatory-y += rwonce.h mandatory-y += sections.h mandatory-y += serial.h diff --git a/include/asm-generic/runtime-const.h b/include/asm-generic/runtime-const.h new file mode 100644 index 000000000000..670499459514 --- /dev/null +++ b/include/asm-generic/runtime-const.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RUNTIME_CONST_H +#define _ASM_RUNTIME_CONST_H + +/* + * This is the fallback for when the architecture doesn't + * support the runtime const operations. + * + * We just use the actual symbols as-is. + */ +#define runtime_const_ptr(sym) (sym) +#define runtime_const_shift_right_32(val, sym) ((u32)(val)>>(sym)) +#define runtime_const_init(type,sym) do { } while (0) + +#endif diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h index 933ca6581aba..fabcefe8a80a 100644 --- a/include/asm-generic/syscalls.h +++ b/include/asm-generic/syscalls.h @@ -19,7 +19,7 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, #ifndef sys_mmap asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, - unsigned long fd, off_t pgoff); + unsigned long fd, unsigned long off); #endif #ifndef sys_rt_sigreturn diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 5703526d6ebf..389a78415b9b 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -944,6 +944,14 @@ #define CON_INITCALL \ BOUNDED_SECTION_POST_LABEL(.con_initcall.init, __con_initcall, _start, _end) +#define RUNTIME_NAME(t,x) runtime_##t##_##x + +#define RUNTIME_CONST(t,x) \ + . = ALIGN(8); \ + RUNTIME_NAME(t,x) : AT(ADDR(RUNTIME_NAME(t,x)) - LOAD_OFFSET) { \ + *(RUNTIME_NAME(t,x)); \ + } + /* Alignment must be consistent with (kunit_suite *) in include/kunit/test.h */ #define KUNIT_TABLE() \ . = ALIGN(8); \ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 50aa87f8d77f..e4070fb02b11 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -746,6 +746,8 @@ struct bpf_verifier_env { /* Same as scratched_regs but for stack slots */ u64 scratched_stack_slots; u64 prev_log_pos, prev_insn_print_pos; + /* buffer used to temporary hold constants as scalar registers */ + struct bpf_reg_state fake_reg[2]; /* buffer used to generate temporary string representations, * e.g., in reg_type_str() to generate reg_type string */ diff --git a/include/linux/btf.h b/include/linux/btf.h index f9e56fd12a9f..7c3e40c3295e 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -82,7 +82,7 @@ * as to avoid issues such as the compiler inlining or eliding either a static * kfunc, or a global kfunc in an LTO build. */ -#define __bpf_kfunc __used noinline +#define __bpf_kfunc __used __retain noinline #define __bpf_kfunc_start_defs() \ __diag_push(); \ diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index c2d09bc4f976..80c4181e194a 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -63,17 +63,20 @@ #define __free(_name) __cleanup(__free_##_name) -#define __get_and_null_ptr(p) \ - ({ __auto_type __ptr = &(p); \ - __auto_type __val = *__ptr; \ - *__ptr = NULL; __val; }) +#define __get_and_null(p, nullvalue) \ + ({ \ + __auto_type __ptr = &(p); \ + __auto_type __val = *__ptr; \ + *__ptr = nullvalue; \ + __val; \ + }) static inline __must_check const volatile void * __must_check_fn(const volatile void *val) { return val; } #define no_free_ptr(p) \ - ((typeof(p)) __must_check_fn(__get_and_null_ptr(p))) + ((typeof(p)) __must_check_fn(__get_and_null(p, NULL))) #define return_ptr(p) return no_free_ptr(p) diff --git a/include/linux/closure.h b/include/linux/closure.h index 99155df162d0..2af44427107d 100644 --- a/include/linux/closure.h +++ b/include/linux/closure.h @@ -159,6 +159,7 @@ struct closure { #ifdef CONFIG_DEBUG_CLOSURES #define CLOSURE_MAGIC_DEAD 0xc054dead #define CLOSURE_MAGIC_ALIVE 0xc054a11e +#define CLOSURE_MAGIC_STACK 0xc05451cc unsigned int magic; struct list_head all; @@ -285,6 +286,21 @@ static inline void closure_get(struct closure *cl) } /** + * closure_get_not_zero + */ +static inline bool closure_get_not_zero(struct closure *cl) +{ + unsigned old = atomic_read(&cl->remaining); + do { + if (!(old & CLOSURE_REMAINING_MASK)) + return false; + + } while (!atomic_try_cmpxchg_acquire(&cl->remaining, &old, old + 1)); + + return true; +} + +/** * closure_init - Initialize a closure, setting the refcount to 1 * @cl: closure to initialize * @parent: parent of the new closure. cl will take a refcount on it for its @@ -308,6 +324,18 @@ static inline void closure_init_stack(struct closure *cl) { memset(cl, 0, sizeof(struct closure)); atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); +#ifdef CONFIG_DEBUG_CLOSURES + cl->magic = CLOSURE_MAGIC_STACK; +#endif +} + +static inline void closure_init_stack_release(struct closure *cl) +{ + memset(cl, 0, sizeof(struct closure)); + atomic_set_release(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); +#ifdef CONFIG_DEBUG_CLOSURES + cl->magic = CLOSURE_MAGIC_STACK; +#endif } /** @@ -355,6 +383,8 @@ do { \ */ #define closure_return(_cl) continue_at((_cl), NULL, NULL) +void closure_return_sync(struct closure *cl); + /** * continue_at_nobarrier - jump to another function without barrier * diff --git a/include/linux/compat.h b/include/linux/compat.h index 233f61ec8afc..56cebaff0c91 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -608,7 +608,7 @@ asmlinkage long compat_sys_fstatfs(unsigned int fd, asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf); asmlinkage long compat_sys_truncate(const char __user *, compat_off_t); -asmlinkage long compat_sys_ftruncate(unsigned int, compat_ulong_t); +asmlinkage long compat_sys_ftruncate(unsigned int, compat_off_t); /* No generic prototype for truncate64, ftruncate64, fallocate */ asmlinkage long compat_sys_openat(int dfd, const char __user *filename, int flags, umode_t mode); diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 93600de3800b..f14c275950b5 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -143,6 +143,29 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } # define __preserve_most #endif +/* + * Annotating a function/variable with __retain tells the compiler to place + * the object in its own section and set the flag SHF_GNU_RETAIN. This flag + * instructs the linker to retain the object during garbage-cleanup or LTO + * phases. + * + * Note that the __used macro is also used to prevent functions or data + * being optimized out, but operates at the compiler/IR-level and may still + * allow unintended removal of objects during linking. + * + * Optional: only supported since gcc >= 11, clang >= 13 + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-retain-function-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#retain + */ +#if __has_attribute(__retain__) && \ + (defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || \ + defined(CONFIG_LTO_CLANG)) +# define __retain __attribute__((__retain__)) +#else +# define __retain +#endif + /* Compiler specific macros. */ #ifdef __clang__ #include <linux/compiler-clang.h> diff --git a/include/linux/dcache.h b/include/linux/dcache.h index bf53e3894aae..bff956f7b2b9 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -71,7 +71,7 @@ extern const struct qstr dotdot_name; # define DNAME_INLINE_LEN 40 /* 192 bytes */ #else # ifdef CONFIG_SMP -# define DNAME_INLINE_LEN 40 /* 128 bytes */ +# define DNAME_INLINE_LEN 36 /* 128 bytes */ # else # define DNAME_INLINE_LEN 44 /* 128 bytes */ # endif @@ -89,13 +89,18 @@ struct dentry { struct inode *d_inode; /* Where the name belongs to - NULL is * negative */ unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */ + /* --- cacheline 1 boundary (64 bytes) was 32 bytes ago --- */ /* Ref lookup also touches following */ - struct lockref d_lockref; /* per-dentry lock and refcount */ const struct dentry_operations *d_op; struct super_block *d_sb; /* The root of the dentry tree */ unsigned long d_time; /* used by d_revalidate */ void *d_fsdata; /* fs-specific data */ + /* --- cacheline 2 boundary (128 bytes) --- */ + struct lockref d_lockref; /* per-dentry lock and refcount + * keep separate from RCU lookup area if + * possible! + */ union { struct list_head d_lru; /* LRU list */ @@ -278,6 +283,8 @@ static inline unsigned d_count(const struct dentry *dentry) return dentry->d_lockref.count; } +ino_t d_parent_ino(struct dentry *dentry); + /* * helper function for dentry_operations.d_dname() members */ diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index bb37ad5cc954..893a1d21dc1c 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -158,6 +158,7 @@ struct fid { #define EXPORT_FH_CONNECTABLE 0x1 /* Encode file handle with parent */ #define EXPORT_FH_FID 0x2 /* File handle may be non-decodeable */ +#define EXPORT_FH_DIR_ONLY 0x4 /* Only decode file handle for a directory */ /** * struct export_operations - for nfsd to communicate with file systems @@ -305,6 +306,7 @@ static inline int exportfs_encode_fid(struct inode *inode, struct fid *fid, extern struct dentry *exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len, int fileid_type, + unsigned int flags, int (*acceptable)(void *, struct dentry *), void *context); extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, diff --git a/include/linux/file.h b/include/linux/file.h index 45d0f4800abd..237931f20739 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -97,6 +97,26 @@ extern void put_unused_fd(unsigned int fd); DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T), get_unused_fd_flags(flags), unsigned flags) +/* + * take_fd() will take care to set @fd to -EBADF ensuring that + * CLASS(get_unused_fd) won't call put_unused_fd(). This makes it + * easier to rely on CLASS(get_unused_fd): + * + * struct file *f; + * + * CLASS(get_unused_fd, fd)(O_CLOEXEC); + * if (fd < 0) + * return fd; + * + * f = dentry_open(&path, O_RDONLY, current_cred()); + * if (IS_ERR(f)) + * return PTR_ERR(fd); + * + * fd_install(fd, f); + * return take_fd(fd); + */ +#define take_fd(fd) __get_and_null(fd, -EBADF) + extern void fd_install(unsigned int fd, struct file *file); int receive_fd(struct file *file, int __user *ufd, unsigned int o_flags); diff --git a/include/linux/filter.h b/include/linux/filter.h index 0f12cf01070e..5669da513cd7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1208,18 +1208,18 @@ static inline bool bpf_jit_kallsyms_enabled(void) return false; } -const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, +int __bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char *sym); bool is_bpf_text_address(unsigned long addr); int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *sym); struct bpf_prog *bpf_prog_ksym_find(unsigned long addr); -static inline const char * +static inline int bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym) { - const char *ret = __bpf_address_lookup(addr, size, off, sym); + int ret = __bpf_address_lookup(addr, size, off, sym); if (ret && modname) *modname = NULL; @@ -1263,11 +1263,11 @@ static inline bool bpf_jit_kallsyms_enabled(void) return false; } -static inline const char * +static inline int __bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char *sym) { - return NULL; + return 0; } static inline bool is_bpf_text_address(unsigned long addr) @@ -1286,11 +1286,11 @@ static inline struct bpf_prog *bpf_prog_ksym_find(unsigned long addr) return NULL; } -static inline const char * +static inline int bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym) { - return NULL; + return 0; } static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp) diff --git a/include/linux/fs.h b/include/linux/fs.h index 0283cf366c2a..dc9f9c4b2572 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -660,9 +660,13 @@ struct inode { }; dev_t i_rdev; loff_t i_size; - struct timespec64 __i_atime; - struct timespec64 __i_mtime; - struct timespec64 __i_ctime; /* use inode_*_ctime accessors! */ + time64_t i_atime_sec; + time64_t i_mtime_sec; + time64_t i_ctime_sec; + u32 i_atime_nsec; + u32 i_mtime_nsec; + u32 i_ctime_nsec; + u32 i_generation; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; u8 i_blkbits; @@ -719,10 +723,10 @@ struct inode { unsigned i_dir_seq; }; - __u32 i_generation; #ifdef CONFIG_FSNOTIFY __u32 i_fsnotify_mask; /* all events this inode cares about */ + /* 32-bit hole reserved for expanding i_fsnotify_mask */ struct fsnotify_mark_connector __rcu *i_fsnotify_marks; #endif @@ -1538,23 +1542,27 @@ struct timespec64 inode_set_ctime_current(struct inode *inode); static inline time64_t inode_get_atime_sec(const struct inode *inode) { - return inode->__i_atime.tv_sec; + return inode->i_atime_sec; } static inline long inode_get_atime_nsec(const struct inode *inode) { - return inode->__i_atime.tv_nsec; + return inode->i_atime_nsec; } static inline struct timespec64 inode_get_atime(const struct inode *inode) { - return inode->__i_atime; + struct timespec64 ts = { .tv_sec = inode_get_atime_sec(inode), + .tv_nsec = inode_get_atime_nsec(inode) }; + + return ts; } static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode, struct timespec64 ts) { - inode->__i_atime = ts; + inode->i_atime_sec = ts.tv_sec; + inode->i_atime_nsec = ts.tv_nsec; return ts; } @@ -1563,28 +1571,32 @@ static inline struct timespec64 inode_set_atime(struct inode *inode, { struct timespec64 ts = { .tv_sec = sec, .tv_nsec = nsec }; + return inode_set_atime_to_ts(inode, ts); } static inline time64_t inode_get_mtime_sec(const struct inode *inode) { - return inode->__i_mtime.tv_sec; + return inode->i_mtime_sec; } static inline long inode_get_mtime_nsec(const struct inode *inode) { - return inode->__i_mtime.tv_nsec; + return inode->i_mtime_nsec; } static inline struct timespec64 inode_get_mtime(const struct inode *inode) { - return inode->__i_mtime; + struct timespec64 ts = { .tv_sec = inode_get_mtime_sec(inode), + .tv_nsec = inode_get_mtime_nsec(inode) }; + return ts; } static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode, struct timespec64 ts) { - inode->__i_mtime = ts; + inode->i_mtime_sec = ts.tv_sec; + inode->i_mtime_nsec = ts.tv_nsec; return ts; } @@ -1598,23 +1610,27 @@ static inline struct timespec64 inode_set_mtime(struct inode *inode, static inline time64_t inode_get_ctime_sec(const struct inode *inode) { - return inode->__i_ctime.tv_sec; + return inode->i_ctime_sec; } static inline long inode_get_ctime_nsec(const struct inode *inode) { - return inode->__i_ctime.tv_nsec; + return inode->i_ctime_nsec; } static inline struct timespec64 inode_get_ctime(const struct inode *inode) { - return inode->__i_ctime; + struct timespec64 ts = { .tv_sec = inode_get_ctime_sec(inode), + .tv_nsec = inode_get_ctime_nsec(inode) }; + + return ts; } static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode, struct timespec64 ts) { - inode->__i_ctime = ts; + inode->i_ctime_sec = ts.tv_sec; + inode->i_ctime_nsec = ts.tv_nsec; return ts; } @@ -1926,6 +1942,8 @@ void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode, extern bool may_open_dev(const struct path *path); umode_t mode_strip_sgid(struct mnt_idmap *idmap, const struct inode *dir, umode_t mode); +bool in_group_or_capable(struct mnt_idmap *idmap, + const struct inode *inode, vfsgid_t vfsgid); /* * This is the "filldir" function type, used by readdir() to let @@ -2685,7 +2703,7 @@ static inline struct file *file_clone_open(struct file *file) } extern int filp_close(struct file *, fl_owner_t id); -extern struct filename *getname_flags(const char __user *, int, int *); +extern struct filename *getname_flags(const char __user *, int); extern struct filename *getname_uflags(const char __user *, int); extern struct filename *getname(const char __user *); extern struct filename *getname_kernel(const char *); @@ -3029,7 +3047,12 @@ extern struct inode *inode_insert5(struct inode *inode, unsigned long hashval, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *data); -extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); +struct inode *iget5_locked(struct super_block *, unsigned long, + int (*test)(struct inode *, void *), + int (*set)(struct inode *, void *), void *); +struct inode *iget5_locked_rcu(struct super_block *, unsigned long, + int (*test)(struct inode *, void *), + int (*set)(struct inode *, void *), void *); extern struct inode * iget_locked(struct super_block *, unsigned long); extern struct inode *find_inode_nowait(struct super_block *, unsigned long, @@ -3351,6 +3374,10 @@ extern int generic_file_fsync(struct file *, loff_t, loff_t, int); extern int generic_check_addressable(unsigned, u64); extern void generic_set_sb_d_ops(struct super_block *sb); +extern int generic_ci_match(const struct inode *parent, + const struct qstr *name, + const struct qstr *folded_name, + const u8 *de_name, u32 de_name_len); static inline bool sb_has_encoding(const struct super_block *sb) { @@ -3436,20 +3463,6 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) return 0; } -static inline ino_t parent_ino(struct dentry *dentry) -{ - ino_t res; - - /* - * Don't strictly need d_lock here? If the parent ino could change - * then surely we'd have a deeper race in the caller? - */ - spin_lock(&dentry->d_lock); - res = dentry->d_parent->d_inode->i_ino; - spin_unlock(&dentry->d_lock); - return res; -} - /* Transaction based IO helpers */ /* @@ -3574,7 +3587,7 @@ static inline bool dir_emit_dot(struct file *file, struct dir_context *ctx) static inline bool dir_emit_dotdot(struct file *file, struct dir_context *ctx) { return ctx->actor(ctx, "..", 2, ctx->pos, - parent_ino(file->f_path.dentry), DT_DIR); + d_parent_ino(file->f_path.dentry), DT_DIR); } static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx) { @@ -3613,4 +3626,21 @@ extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len, extern int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice); +static inline bool vfs_empty_path(int dfd, const char __user *path) +{ + char c; + + if (dfd < 0) + return false; + + /* We now allow NULL to be used for empty path. */ + if (!path) + return true; + + if (unlikely(get_user(c, path))) + return false; + + return !c; +} + #endif /* _LINUX_FS_H */ diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h index d3350979115f..6cf713a7e6c6 100644 --- a/include/linux/fs_parser.h +++ b/include/linux/fs_parser.h @@ -28,7 +28,7 @@ typedef int fs_param_type(struct p_log *, */ fs_param_type fs_param_is_bool, fs_param_is_u32, fs_param_is_s32, fs_param_is_u64, fs_param_is_enum, fs_param_is_string, fs_param_is_blob, fs_param_is_blockdev, - fs_param_is_path, fs_param_is_fd; + fs_param_is_path, fs_param_is_fd, fs_param_is_uid, fs_param_is_gid; /* * Specification of the type of value a parameter wants. @@ -57,6 +57,8 @@ struct fs_parse_result { int int_32; /* For spec_s32/spec_enum */ unsigned int uint_32; /* For spec_u32{,_octal,_hex}/spec_enum */ u64 uint_64; /* For spec_u64 */ + kuid_t uid; + kgid_t gid; }; }; @@ -131,6 +133,8 @@ static inline bool fs_validate_description(const char *name, #define fsparam_bdev(NAME, OPT) __fsparam(fs_param_is_blockdev, NAME, OPT, 0, NULL) #define fsparam_path(NAME, OPT) __fsparam(fs_param_is_path, NAME, OPT, 0, NULL) #define fsparam_fd(NAME, OPT) __fsparam(fs_param_is_fd, NAME, OPT, 0, NULL) +#define fsparam_uid(NAME, OPT) __fsparam(fs_param_is_uid, NAME, OPT, 0, NULL) +#define fsparam_gid(NAME, OPT) __fsparam(fs_param_is_gid, NAME, OPT, 0, NULL) /* String parameter that allows empty argument */ #define fsparam_string_empty(NAME, OPT) \ diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index bdf7f3eddf0a..4c91a019972b 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -19,6 +19,7 @@ enum fscache_cache_trace; enum fscache_cookie_trace; enum fscache_access_trace; +enum fscache_volume_trace; enum fscache_cache_state { FSCACHE_CACHE_IS_NOT_PRESENT, /* No cache is present for this name */ @@ -97,6 +98,11 @@ extern void fscache_withdraw_cookie(struct fscache_cookie *cookie); extern void fscache_io_error(struct fscache_cache *cache); +extern struct fscache_volume * +fscache_try_get_volume(struct fscache_volume *volume, + enum fscache_volume_trace where); +extern void fscache_put_volume(struct fscache_volume *volume, + enum fscache_volume_trace where); extern void fscache_end_volume_access(struct fscache_volume *volume, struct fscache_cookie *cookie, enum fscache_access_trace why); diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 4da80e92f804..278620e063ab 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -112,7 +112,13 @@ static inline int fsnotify_file(struct file *file, __u32 mask) { const struct path *path; - if (file->f_mode & FMODE_NONOTIFY) + /* + * FMODE_NONOTIFY are fds generated by fanotify itself which should not + * generate new events. We also don't want to generate events for + * FMODE_PATH fds (involves open & close events) as they are just + * handle creation / destruction events and not "real" file events. + */ + if (file->f_mode & (FMODE_NONOTIFY | FMODE_PATH)) return 0; path = &file->f_path; diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 800995c425e0..b792274189a3 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -86,15 +86,15 @@ struct ftrace_hash; #if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_MODULES) && \ defined(CONFIG_DYNAMIC_FTRACE) -const char * +int ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym); #else -static inline const char * +static inline int ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym) { - return NULL; + return 0; } #endif diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 9709537370ee..424acb98c7c2 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -960,8 +960,6 @@ int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr); #define builtin_i2c_driver(__i2c_driver) \ builtin_driver(__i2c_driver, i2c_add_driver) -#endif /* I2C */ - /* must call put_device() when done with returned i2c_client device */ struct i2c_client *i2c_find_device_by_fwnode(struct fwnode_handle *fwnode); @@ -971,6 +969,28 @@ struct i2c_adapter *i2c_find_adapter_by_fwnode(struct fwnode_handle *fwnode); /* must call i2c_put_adapter() when done with returned i2c_adapter device */ struct i2c_adapter *i2c_get_adapter_by_fwnode(struct fwnode_handle *fwnode); +#else /* I2C */ + +static inline struct i2c_client * +i2c_find_device_by_fwnode(struct fwnode_handle *fwnode) +{ + return NULL; +} + +static inline struct i2c_adapter * +i2c_find_adapter_by_fwnode(struct fwnode_handle *fwnode) +{ + return NULL; +} + +static inline struct i2c_adapter * +i2c_get_adapter_by_fwnode(struct fwnode_handle *fwnode) +{ + return NULL; +} + +#endif /* !I2C */ + #if IS_ENABLED(CONFIG_OF) /* must call put_device() when done with returned i2c_client device */ static inline struct i2c_client *of_find_i2c_device_by_node(struct device_node *node) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index ede42dce1506..3bb6198d1523 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -207,7 +207,6 @@ struct io_submit_state { bool need_plug; bool cq_flush; unsigned short submit_nr; - unsigned int cqes_count; struct blk_plug plug; }; diff --git a/include/linux/kcov.h b/include/linux/kcov.h index 1068a7318d89..75a2fb8b16c3 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -21,6 +21,8 @@ enum kcov_mode { KCOV_MODE_TRACE_PC = 2, /* Collecting comparison operands mode. */ KCOV_MODE_TRACE_CMP = 3, + /* The process owns a KCOV remote reference. */ + KCOV_MODE_REMOTE = 4, }; #define KCOV_IN_CTXSW (1 << 30) diff --git a/include/linux/libata.h b/include/linux/libata.h index 13fb41d25da6..7d3bd7c9664a 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1249,6 +1249,7 @@ extern int ata_slave_link_init(struct ata_port *ap); extern struct ata_port *ata_sas_port_alloc(struct ata_host *, struct ata_port_info *, struct Scsi_Host *); extern void ata_port_probe(struct ata_port *ap); +extern void ata_port_free(struct ata_port *ap); extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap); extern void ata_sas_tport_delete(struct ata_port *ap); int ata_sas_device_configure(struct scsi_device *sdev, struct queue_limits *lim, diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index f804b76cde44..44488b1ab9a9 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -413,7 +413,7 @@ LSM_HOOK(void, LSM_RET_VOID, key_post_create_or_update, struct key *keyring, #ifdef CONFIG_AUDIT LSM_HOOK(int, 0, audit_rule_init, u32 field, u32 op, char *rulestr, - void **lsmrule) + void **lsmrule, gfp_t gfp) LSM_HOOK(int, 0, audit_rule_known, struct audit_krule *krule) LSM_HOOK(int, 0, audit_rule_match, u32 secid, u32 field, u32 op, void *lsmrule) LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5df52e15f7d6..d45bfb7cf81d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2029,7 +2029,11 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 pcc_ifa2[0x1]; u8 reserved_at_3f1[0xf]; - u8 reserved_at_400[0x400]; + u8 reserved_at_400[0x40]; + + u8 reserved_at_440[0x8]; + u8 max_num_eqs_24b[0x18]; + u8 reserved_at_460[0x3a0]; }; enum mlx5_ifc_flow_destination_type { diff --git a/include/linux/mm.h b/include/linux/mm.h index 9849dfda44d4..eb7c96d24ac0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -406,6 +406,11 @@ extern unsigned int kobjsize(const void *objp); #define VM_ALLOW_ANY_UNCACHED VM_NONE #endif +#ifdef CONFIG_64BIT +/* VM is sealed, in vm_flags */ +#define VM_SEALED _BITUL(63) +#endif + /* Bits set in the VMA until the stack is in its final location */ #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY) @@ -3776,14 +3781,7 @@ DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free); static inline bool want_init_on_free(void) { return static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON, - &init_on_free); -} - -DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON, init_mlocked_on_free); -static inline bool want_init_mlocked_on_free(void) -{ - return static_branch_maybe(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON, - &init_mlocked_on_free); + &init_on_free); } extern bool _debug_pagealloc_enabled_early; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 8f9c9590a42c..1dc6248feb83 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -654,13 +654,12 @@ enum zone_watermarks { }; /* - * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER. One additional list - * for THP which will usually be GFP_MOVABLE. Even if it is another type, - * it should not contribute to serious fragmentation causing THP allocation - * failures. + * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER. Two additional lists + * are added for THP. One PCP list is used by GPF_MOVABLE, and the other PCP list + * is used by GFP_UNMOVABLE and GFP_RECLAIMABLE. */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define NR_PCP_THP 1 +#define NR_PCP_THP 2 #else #define NR_PCP_THP 0 #endif @@ -1980,8 +1979,9 @@ static inline int subsection_map_index(unsigned long pfn) static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) { int idx = subsection_map_index(pfn); + struct mem_section_usage *usage = READ_ONCE(ms->usage); - return test_bit(idx, READ_ONCE(ms->usage)->subsection_map); + return usage ? test_bit(idx, usage->subsection_map) : 0; } #else static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) diff --git a/include/linux/module.h b/include/linux/module.h index ffa1c603163c..330ffb59efe5 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -931,11 +931,11 @@ int module_kallsyms_on_each_symbol(const char *modname, * least KSYM_NAME_LEN long: a pointer to namebuf is returned if * found, otherwise NULL. */ -const char *module_address_lookup(unsigned long addr, - unsigned long *symbolsize, - unsigned long *offset, - char **modname, const unsigned char **modbuildid, - char *namebuf); +int module_address_lookup(unsigned long addr, + unsigned long *symbolsize, + unsigned long *offset, + char **modname, const unsigned char **modbuildid, + char *namebuf); int lookup_module_symbol_name(unsigned long addr, char *symname); int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, @@ -964,14 +964,14 @@ static inline int module_kallsyms_on_each_symbol(const char *modname, } /* For kallsyms to ask for address resolution. NULL means not found. */ -static inline const char *module_address_lookup(unsigned long addr, +static inline int module_address_lookup(unsigned long addr, unsigned long *symbolsize, unsigned long *offset, char **modname, const unsigned char **modbuildid, char *namebuf) { - return NULL; + return 0; } static inline int lookup_module_symbol_name(unsigned long addr, char *symname) diff --git a/include/linux/namei.h b/include/linux/namei.h index 967aa9ea9f96..8ec8fed3bce8 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -50,13 +50,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT}; extern int path_pts(struct path *path); -extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty); - -static inline int user_path_at(int dfd, const char __user *name, unsigned flags, - struct path *path) -{ - return user_path_at_empty(dfd, name, flags, path, NULL); -} +extern int user_path_at(int, const char __user *, unsigned, struct path *); struct dentry *lookup_one_qstr_excl(const struct qstr *name, struct dentry *base, diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index 5601d14e2886..dab6a1734a22 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -42,6 +42,17 @@ struct nsproxy { }; extern struct nsproxy init_nsproxy; +#define to_ns_common(__ns) \ + _Generic((__ns), \ + struct cgroup_namespace *: &(__ns->ns), \ + struct ipc_namespace *: &(__ns->ns), \ + struct net *: &(__ns->ns), \ + struct pid_namespace *: &(__ns->ns), \ + struct mnt_namespace *: &(__ns->ns), \ + struct time_namespace *: &(__ns->ns), \ + struct user_namespace *: &(__ns->ns), \ + struct uts_namespace *: &(__ns->ns)) + /* * A structure to encompass all bits needed to install * a partial or complete new set of namespaces. @@ -112,4 +123,6 @@ static inline void get_nsproxy(struct nsproxy *ns) refcount_inc(&ns->count); } +DEFINE_FREE(put_nsproxy, struct nsproxy *, if (_T) put_nsproxy(_T)) + #endif diff --git a/include/linux/numa.h b/include/linux/numa.h index 1d43371fafd2..eb19503604fe 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -15,6 +15,11 @@ #define NUMA_NO_NODE (-1) #define NUMA_NO_MEMBLK (-1) +static inline bool numa_valid_node(int nid) +{ + return nid >= 0 && nid < MAX_NUMNODES; +} + /* optionally keep NUMA memory info available post init */ #ifdef CONFIG_NUMA_KEEP_MEMINFO #define __initdata_or_meminfo diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 425573202295..c693ac344ec0 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -85,10 +85,11 @@ enum { enum { NVMF_RDMA_QPTYPE_CONNECTED = 1, /* Reliable Connected */ NVMF_RDMA_QPTYPE_DATAGRAM = 2, /* Reliable Datagram */ + NVMF_RDMA_QPTYPE_INVALID = 0xff, }; -/* RDMA QP Service Type codes for Discovery Log Page entry TSAS - * RDMA_QPTYPE field +/* RDMA Provider Type codes for Discovery Log Page entry TSAS + * RDMA_PRTYPE field */ enum { NVMF_RDMA_PRTYPE_NOT_SPECIFIED = 1, /* No Provider Specified */ @@ -110,6 +111,7 @@ enum { NVMF_TCP_SECTYPE_NONE = 0, /* No Security */ NVMF_TCP_SECTYPE_TLS12 = 1, /* TLSv1.2, NVMe-oF 1.1 and NVMe-TCP 3.6.1.1 */ NVMF_TCP_SECTYPE_TLS13 = 2, /* TLSv1.3, NVMe-oF 1.1 and NVMe-TCP 3.6.1.1 */ + NVMF_TCP_SECTYPE_INVALID = 0xff, }; #define NVME_AQ_DEPTH 32 diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 104078afe0b1..b9e914e1face 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -944,15 +944,18 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) * mistaken for a page type value. */ -#define PAGE_TYPE_BASE 0xf0000000 -/* Reserve 0x0000007f to catch underflows of _mapcount */ -#define PAGE_MAPCOUNT_RESERVE -128 -#define PG_buddy 0x00000080 -#define PG_offline 0x00000100 -#define PG_table 0x00000200 -#define PG_guard 0x00000400 -#define PG_hugetlb 0x00000800 -#define PG_slab 0x00001000 +enum pagetype { + PG_buddy = 0x00000080, + PG_offline = 0x00000100, + PG_table = 0x00000200, + PG_guard = 0x00000400, + PG_hugetlb = 0x00000800, + PG_slab = 0x00001000, + + PAGE_TYPE_BASE = 0xf0000000, + /* Reserve 0x0000007f to catch underflows of _mapcount */ + PAGE_MAPCOUNT_RESERVE = -128, +}; #define PageType(page, flag) \ ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 1acf5bac7f50..8c236c651d1d 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -230,7 +230,13 @@ static inline int folio_ref_dec_return(struct folio *folio) static inline bool page_ref_add_unless(struct page *page, int nr, int u) { - bool ret = atomic_add_unless(&page->_refcount, nr, u); + bool ret = false; + + rcu_read_lock(); + /* avoid writing to the vmemmap area being remapped */ + if (!page_is_fake_head(page) && page_ref_count(page) != u) + ret = atomic_add_unless(&page->_refcount, nr, u); + rcu_read_unlock(); if (page_ref_tracepoint_active(page_ref_mod_unless)) __page_ref_mod_unless(page, nr, ret); @@ -258,54 +264,9 @@ static inline bool folio_try_get(struct folio *folio) return folio_ref_add_unless(folio, 1, 0); } -static inline bool folio_ref_try_add_rcu(struct folio *folio, int count) -{ -#ifdef CONFIG_TINY_RCU - /* - * The caller guarantees the folio will not be freed from interrupt - * context, so (on !SMP) we only need preemption to be disabled - * and TINY_RCU does that for us. - */ -# ifdef CONFIG_PREEMPT_COUNT - VM_BUG_ON(!in_atomic() && !irqs_disabled()); -# endif - VM_BUG_ON_FOLIO(folio_ref_count(folio) == 0, folio); - folio_ref_add(folio, count); -#else - if (unlikely(!folio_ref_add_unless(folio, count, 0))) { - /* Either the folio has been freed, or will be freed. */ - return false; - } -#endif - return true; -} - -/** - * folio_try_get_rcu - Attempt to increase the refcount on a folio. - * @folio: The folio. - * - * This is a version of folio_try_get() optimised for non-SMP kernels. - * If you are still holding the rcu_read_lock() after looking up the - * page and know that the page cannot have its refcount decreased to - * zero in interrupt context, you can use this instead of folio_try_get(). - * - * Example users include get_user_pages_fast() (as pages are not unmapped - * from interrupt context) and the page cache lookups (as pages are not - * truncated from interrupt context). We also know that pages are not - * frozen in interrupt context for the purposes of splitting or migration. - * - * You can also use this function if you're holding a lock that prevents - * pages being frozen & removed; eg the i_pages lock for the page cache - * or the mmap_lock or page table lock for page tables. In this case, - * it will always succeed, and you could have used a plain folio_get(), - * but it's sometimes more convenient to have a common function called - * from both locked and RCU-protected contexts. - * - * Return: True if the reference count was successfully incremented. - */ -static inline bool folio_try_get_rcu(struct folio *folio) +static inline bool folio_ref_try_add(struct folio *folio, int count) { - return folio_ref_try_add_rcu(folio, 1); + return folio_ref_add_unless(folio, count, 0); } static inline int page_ref_freeze(struct page *page, int count) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ee633712bba0..a0a026d2d244 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -354,11 +354,18 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) * a good order (that's 1MB if you're using 4kB pages) */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER +#define PREFERRED_MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER #else -#define MAX_PAGECACHE_ORDER 8 +#define PREFERRED_MAX_PAGECACHE_ORDER 8 #endif +/* + * xas_split_alloc() does not support arbitrary orders. This implies no + * 512MB THP on ARM64 with 64KB base page size. + */ +#define MAX_XAS_ORDER (XA_CHUNK_SHIFT * 2 - 1) +#define MAX_PAGECACHE_ORDER min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER) + /** * mapping_set_large_folios() - Indicate the file supports large folios. * @mapping: The file. @@ -381,6 +388,10 @@ static inline void mapping_set_large_folios(struct address_space *mapping) */ static inline bool mapping_large_folio_support(struct address_space *mapping) { + /* AS_LARGE_FOLIO_SUPPORT is only reasonable for pagecache folios */ + VM_WARN_ONCE((unsigned long)mapping & PAGE_MAPPING_ANON, + "Anonymous mapping always supports large folio"); + return IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); } diff --git a/include/linux/path.h b/include/linux/path.h index 475225a03d0d..ca073e70decd 100644 --- a/include/linux/path.h +++ b/include/linux/path.h @@ -24,4 +24,13 @@ static inline void path_put_init(struct path *path) *path = (struct path) { }; } +/* + * Cleanup macro for use with __free(path_put). Avoids dereference and + * copying @path unlike DEFINE_FREE(). path_put() will handle the empty + * path correctly just ensure @path is initialized: + * + * struct path path __free(path_put) = {}; + */ +#define __free_path_put path_put + #endif /* _LINUX_PATH_H */ diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index 86ba5d33e43b..9cacadbd61f8 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -37,6 +37,9 @@ static inline union codetag_ref *get_page_tag_ref(struct page *page) static inline void put_page_tag_ref(union codetag_ref *ref) { + if (WARN_ON(!ref)) + return; + page_ext_put(page_ext_from_codetag_ref(ref)); } @@ -102,9 +105,11 @@ static inline struct alloc_tag *pgalloc_tag_get(struct page *page) union codetag_ref *ref = get_page_tag_ref(page); alloc_tag_sub_check(ref); - if (ref && ref->ct) - tag = ct_to_alloc_tag(ref->ct); - put_page_tag_ref(ref); + if (ref) { + if (ref->ct) + tag = ct_to_alloc_tag(ref->ct); + put_page_tag_ref(ref); + } } return tag; diff --git a/include/linux/phy.h b/include/linux/phy.h index e6e83304558e..3be430cf3132 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1122,7 +1122,7 @@ struct phy_driver { u8 index, enum led_brightness value); /** - * @led_blink_set: Set a PHY LED brightness. Index indicates + * @led_blink_set: Set a PHY LED blinking. Index indicates * which of the PHYs led should be configured to blink. Delays * are in milliseconds and if both are zero then a sensible * default should be chosen. The call should adjust the diff --git a/include/linux/printk.h b/include/linux/printk.h index 40afab23881a..65c5184470f1 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -60,9 +60,6 @@ static inline const char *printk_skip_headers(const char *buffer) #define CONSOLE_LOGLEVEL_DEFAULT CONFIG_CONSOLE_LOGLEVEL_DEFAULT #define CONSOLE_LOGLEVEL_QUIET CONFIG_CONSOLE_LOGLEVEL_QUIET -int add_preferred_console_match(const char *match, const char *name, - const short idx); - extern int console_printk[]; #define console_loglevel (console_printk[0]) diff --git a/include/linux/sched.h b/include/linux/sched.h index 61591ac6eab6..a5f4b48fca18 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2192,13 +2192,13 @@ static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); } extern void sched_set_stop_task(int cpu, struct task_struct *stop); #ifdef CONFIG_MEM_ALLOC_PROFILING -static inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag) +static __always_inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag) { swap(current->alloc_tag, tag); return tag; } -static inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old) +static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old) { #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG WARN(current->alloc_tag != tag, "current->alloc_tag was changed:\n"); diff --git a/include/linux/security.h b/include/linux/security.h index 21cf70346b33..de3af33e6ff5 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2048,7 +2048,8 @@ static inline void security_key_post_create_or_update(struct key *keyring, #ifdef CONFIG_AUDIT #ifdef CONFIG_SECURITY -int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule); +int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule, + gfp_t gfp); int security_audit_rule_known(struct audit_krule *krule); int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule); void security_audit_rule_free(void *lsmrule); @@ -2056,7 +2057,7 @@ void security_audit_rule_free(void *lsmrule); #else static inline int security_audit_rule_init(u32 field, u32 op, char *rulestr, - void **lsmrule) + void **lsmrule, gfp_t gfp) { return 0; } diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 8cb65f50e830..aea25eef9a1a 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -811,8 +811,7 @@ enum UART_TX_FLAGS { if (pending < WAKEUP_CHARS) { \ uart_write_wakeup(__port); \ \ - if (!((flags) & UART_TX_NOSTOP) && pending == 0 && \ - __port->ops->tx_empty(__port)) \ + if (!((flags) & UART_TX_NOSTOP) && pending == 0) \ __port->ops->stop_tx(__port); \ } \ \ @@ -852,6 +851,24 @@ enum UART_TX_FLAGS { }) /** + * uart_port_tx_limited_flags -- transmit helper for uart_port with count limiting with flags + * @port: uart port + * @ch: variable to store a character to be written to the HW + * @flags: %UART_TX_NOSTOP or similar + * @count: a limit of characters to send + * @tx_ready: can HW accept more data function + * @put_char: function to write a character + * @tx_done: function to call after the loop is done + * + * See uart_port_tx_limited() for more details. + */ +#define uart_port_tx_limited_flags(port, ch, flags, count, tx_ready, put_char, tx_done) ({ \ + unsigned int __count = (count); \ + __uart_port_tx(port, ch, flags, tx_ready, put_char, tx_done, __count, \ + __count--); \ +}) + +/** * uart_port_tx -- transmit helper for uart_port * @port: uart port * @ch: variable to store a character to be written to the HW diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index e8e1e798924f..67b9a15a5330 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -533,6 +533,9 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @queue_empty: signal green light for opportunistically skipping the queue * for spi_sync transfers. * @must_async: disable all fast paths in the core + * @defer_optimize_message: set to true if controller cannot pre-optimize messages + * and needs to defer the optimization step until the message is actually + * being transferred * * Each SPI controller can communicate with one or more @spi_device * children. These make a small bus, sharing MOSI, MISO and SCK signals @@ -776,6 +779,7 @@ struct spi_controller { /* Flag for enabling opportunistic skipping of the queue in spi_sync */ bool queue_empty; bool must_async; + bool defer_optimize_message; }; static inline void *spi_controller_get_devdata(struct spi_controller *ctlr) @@ -1085,12 +1089,13 @@ struct spi_transfer { unsigned dummy_data:1; unsigned cs_off:1; unsigned cs_change:1; - unsigned tx_nbits:3; - unsigned rx_nbits:3; + unsigned tx_nbits:4; + unsigned rx_nbits:4; unsigned timestamped:1; #define SPI_NBITS_SINGLE 0x01 /* 1-bit transfer */ #define SPI_NBITS_DUAL 0x02 /* 2-bit transfer */ #define SPI_NBITS_QUAD 0x04 /* 4-bit transfer */ +#define SPI_NBITS_OCTAL 0x08 /* 8-bit transfer */ u8 bits_per_word; struct spi_delay delay; struct spi_delay cs_change_delay; diff --git a/include/linux/string.h b/include/linux/string.h index 60168aa2af07..9edace076ddb 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -289,7 +289,7 @@ extern void *kmemdup_noprof(const void *src, size_t len, gfp_t gfp) __realloc_si extern void *kvmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2); extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp); -extern void *kmemdup_array(const void *src, size_t element_size, size_t count, gfp_t gfp) +extern void *kmemdup_array(const void *src, size_t count, size_t element_size, gfp_t gfp) __realloc_size(2, 3); /* lib/argv_split.c */ diff --git a/include/linux/swap.h b/include/linux/swap.h index bd450023b9a4..e685e93ba354 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -354,7 +354,8 @@ static inline swp_entry_t page_swap_entry(struct page *page) } /* linux/mm/workingset.c */ -bool workingset_test_recent(void *shadow, bool file, bool *workingset); +bool workingset_test_recent(void *shadow, bool file, bool *workingset, + bool flush); void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages); void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg); void workingset_refault(struct folio *folio, void *shadow); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 9104952d323d..fff820c3e93e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -322,13 +322,13 @@ asmlinkage long sys_io_pgetevents(aio_context_t ctx_id, long nr, struct io_event __user *events, struct __kernel_timespec __user *timeout, - const struct __aio_sigset *sig); + const struct __aio_sigset __user *sig); asmlinkage long sys_io_pgetevents_time32(aio_context_t ctx_id, long min_nr, long nr, struct io_event __user *events, struct old_timespec32 __user *timeout, - const struct __aio_sigset *sig); + const struct __aio_sigset __user *sig); asmlinkage long sys_io_uring_setup(u32 entries, struct io_uring_params __user *p); asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit, @@ -418,7 +418,7 @@ asmlinkage long sys_listmount(const struct mnt_id_req __user *req, u64 __user *mnt_ids, size_t nr_mnt_ids, unsigned int flags); asmlinkage long sys_truncate(const char __user *path, long length); -asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length); +asmlinkage long sys_ftruncate(unsigned int fd, off_t length); #if BITS_PER_LONG == 32 asmlinkage long sys_truncate64(const char __user *path, loff_t length); asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length); @@ -441,7 +441,7 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group); asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, umode_t mode); asmlinkage long sys_openat2(int dfd, const char __user *filename, - struct open_how *how, size_t size); + struct open_how __user *how, size_t size); asmlinkage long sys_close(unsigned int fd); asmlinkage long sys_close_range(unsigned int fd, unsigned int max_fd, unsigned int flags); @@ -555,7 +555,7 @@ asmlinkage long sys_get_robust_list(int pid, asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, size_t len); -asmlinkage long sys_futex_waitv(struct futex_waitv *waiters, +asmlinkage long sys_futex_waitv(struct futex_waitv __user *waiters, unsigned int nr_futexes, unsigned int flags, struct __kernel_timespec __user *timeout, clockid_t clockid); @@ -859,9 +859,15 @@ asmlinkage long sys_prlimit64(pid_t pid, unsigned int resource, const struct rlimit64 __user *new_rlim, struct rlimit64 __user *old_rlim); asmlinkage long sys_fanotify_init(unsigned int flags, unsigned int event_f_flags); +#if defined(CONFIG_ARCH_SPLIT_ARG64) +asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags, + unsigned int mask_1, unsigned int mask_2, + int dfd, const char __user * pathname); +#else asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags, u64 mask, int fd, const char __user *pathname); +#endif asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name, struct file_handle __user *handle, int __user *mnt_id, int flag); @@ -907,7 +913,7 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, asmlinkage long sys_getrandom(char __user *buf, size_t count, unsigned int flags); asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags); -asmlinkage long sys_bpf(int cmd, union bpf_attr *attr, unsigned int size); +asmlinkage long sys_bpf(int cmd, union bpf_attr __user *attr, unsigned int size); asmlinkage long sys_execveat(int dfd, const char __user *filename, const char __user *const __user *argv, const char __user *const __user *envp, int flags); @@ -960,11 +966,11 @@ asmlinkage long sys_cachestat(unsigned int fd, struct cachestat_range __user *cstat_range, struct cachestat __user *cstat, unsigned int flags); asmlinkage long sys_map_shadow_stack(unsigned long addr, unsigned long size, unsigned int flags); -asmlinkage long sys_lsm_get_self_attr(unsigned int attr, struct lsm_ctx *ctx, - u32 *size, u32 flags); -asmlinkage long sys_lsm_set_self_attr(unsigned int attr, struct lsm_ctx *ctx, +asmlinkage long sys_lsm_get_self_attr(unsigned int attr, struct lsm_ctx __user *ctx, + u32 __user *size, u32 flags); +asmlinkage long sys_lsm_set_self_attr(unsigned int attr, struct lsm_ctx __user *ctx, u32 size, u32 flags); -asmlinkage long sys_lsm_list_modules(u64 *ids, u32 *size, u32 flags); +asmlinkage long sys_lsm_list_modules(u64 __user *ids, u32 __user *size, u32 flags); /* * Architecture-specific system calls diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 21a67dc9efe8..e93ee8d936a9 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -490,9 +490,16 @@ static inline void tpm_buf_append_empty_auth(struct tpm_buf *buf, u32 handle) { } #endif + +static inline struct tpm2_auth *tpm2_chip_auth(struct tpm_chip *chip) +{ #ifdef CONFIG_TCG_TPM2_HMAC + return chip->auth; +#else + return NULL; +#endif +} -int tpm2_start_auth_session(struct tpm_chip *chip); void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf, u32 handle, u8 *name); void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf, @@ -504,9 +511,27 @@ static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip, u8 *passphrase, int passphraselen) { - tpm_buf_append_hmac_session(chip, buf, attributes, passphrase, - passphraselen); + struct tpm_header *head; + int offset; + + if (tpm2_chip_auth(chip)) { + tpm_buf_append_hmac_session(chip, buf, attributes, passphrase, passphraselen); + } else { + offset = buf->handles * 4 + TPM_HEADER_SIZE; + head = (struct tpm_header *)buf->data; + + /* + * If the only sessions are optional, the command tag must change to + * TPM2_ST_NO_SESSIONS. + */ + if (tpm_buf_length(buf) == offset) + head->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS); + } } + +#ifdef CONFIG_TCG_TPM2_HMAC + +int tpm2_start_auth_session(struct tpm_chip *chip); void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf); int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf, int rc); @@ -521,56 +546,6 @@ static inline int tpm2_start_auth_session(struct tpm_chip *chip) static inline void tpm2_end_auth_session(struct tpm_chip *chip) { } -static inline void tpm_buf_append_name(struct tpm_chip *chip, - struct tpm_buf *buf, - u32 handle, u8 *name) -{ - tpm_buf_append_u32(buf, handle); - /* count the number of handles in the upper bits of flags */ - buf->handles++; -} -static inline void tpm_buf_append_hmac_session(struct tpm_chip *chip, - struct tpm_buf *buf, - u8 attributes, u8 *passphrase, - int passphraselen) -{ - /* offset tells us where the sessions area begins */ - int offset = buf->handles * 4 + TPM_HEADER_SIZE; - u32 len = 9 + passphraselen; - - if (tpm_buf_length(buf) != offset) { - /* not the first session so update the existing length */ - len += get_unaligned_be32(&buf->data[offset]); - put_unaligned_be32(len, &buf->data[offset]); - } else { - tpm_buf_append_u32(buf, len); - } - /* auth handle */ - tpm_buf_append_u32(buf, TPM2_RS_PW); - /* nonce */ - tpm_buf_append_u16(buf, 0); - /* attributes */ - tpm_buf_append_u8(buf, 0); - /* passphrase */ - tpm_buf_append_u16(buf, passphraselen); - tpm_buf_append(buf, passphrase, passphraselen); -} -static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip, - struct tpm_buf *buf, - u8 attributes, - u8 *passphrase, - int passphraselen) -{ - int offset = buf->handles * 4 + TPM_HEADER_SIZE; - struct tpm_header *head = (struct tpm_header *) buf->data; - - /* - * if the only sessions are optional, the command tag - * must change to TPM2_ST_NO_SESSIONS - */ - if (tpm_buf_length(buf) == offset) - head->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS); -} static inline void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf) { diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index fb3993894536..d9968bfc8eac 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -95,7 +95,7 @@ enum wq_misc_consts { WORK_BUSY_RUNNING = 1 << 1, /* maximum string length for set_worker_desc() */ - WORKER_DESC_LEN = 24, + WORKER_DESC_LEN = 32, }; /* Convenience constants - of type 'unsigned long', not 'enum'! */ diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index fe932ca3bc8c..e372a88e8c3f 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -324,6 +324,17 @@ enum { * claim to support it. */ HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, + + /* + * When this quirk is set, the reserved bits of Primary/Secondary_PHY + * inside the LE Extended Advertising Report events are discarded. + * This is required for some Apple/Broadcom controllers which + * abuse these reserved bits for unrelated flags. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. + */ + HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY, }; /* HCI device flags */ diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 6a9d063e9f47..534c3386e714 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -38,6 +38,8 @@ int __hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen, int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param, u8 event, u32 timeout, struct sock *sk); +int hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u32 timeout); void hci_cmd_sync_init(struct hci_dev *hdev); void hci_cmd_sync_clear(struct hci_dev *hdev); diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 7d6b1254c92d..c0deaafebfdc 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -263,7 +263,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, struct sock *inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, struct sock *child); -void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, +bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, unsigned long timeout); struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child, struct request_sock *req, diff --git a/include/net/mac80211.h b/include/net/mac80211.h index cafc664ee531..45ad37adbe32 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -395,7 +395,7 @@ enum ieee80211_bss_change { BSS_CHANGED_HE_OBSS_PD = 1<<28, BSS_CHANGED_HE_BSS_COLOR = 1<<29, BSS_CHANGED_FILS_DISCOVERY = 1<<30, - BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31, + BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = BIT_ULL(31), BSS_CHANGED_MLD_VALID_LINKS = BIT_ULL(33), BSS_CHANGED_MLD_TTLM = BIT_ULL(34), diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 2796153b03da..188d41da1a40 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -619,6 +619,11 @@ static inline void *nft_set_priv(const struct nft_set *set) return (void *)set->data; } +static inline enum nft_data_types nft_set_datatype(const struct nft_set *set) +{ + return set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE; +} + static inline bool nft_set_gc_is_pending(const struct nft_set *s) { return refcount_read(&s->refs) != 1; diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 02bbdc577f8e..a6a0bf4a247e 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -15,6 +15,9 @@ struct netns_nf { const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO]; #ifdef CONFIG_SYSCTL struct ctl_table_header *nf_log_dir_header; +#ifdef CONFIG_LWTUNNEL + struct ctl_table_header *nf_lwtnl_dir_header; +#endif #endif struct nf_hook_entries __rcu *hooks_ipv4[NF_INET_NUMHOOKS]; struct nf_hook_entries __rcu *hooks_ipv6[NF_INET_NUMHOOKS]; diff --git a/include/net/tcx.h b/include/net/tcx.h index 72a3e75e539f..5ce0ce9e0c02 100644 --- a/include/net/tcx.h +++ b/include/net/tcx.h @@ -13,7 +13,7 @@ struct mini_Qdisc; struct tcx_entry { struct mini_Qdisc __rcu *miniq; struct bpf_mprog_bundle bundle; - bool miniq_active; + u32 miniq_active; struct rcu_head rcu; }; @@ -125,11 +125,16 @@ static inline void tcx_skeys_dec(bool ingress) tcx_dec(); } -static inline void tcx_miniq_set_active(struct bpf_mprog_entry *entry, - const bool active) +static inline void tcx_miniq_inc(struct bpf_mprog_entry *entry) { ASSERT_RTNL(); - tcx_entry(entry)->miniq_active = active; + tcx_entry(entry)->miniq_active++; +} + +static inline void tcx_miniq_dec(struct bpf_mprog_entry *entry) +{ + ASSERT_RTNL(); + tcx_entry(entry)->miniq_active--; } static inline bool tcx_entry_is_active(struct bpf_mprog_entry *entry) diff --git a/include/scsi/scsi_devinfo.h b/include/scsi/scsi_devinfo.h index 6b548dc2c496..1d79a3b536ce 100644 --- a/include/scsi/scsi_devinfo.h +++ b/include/scsi/scsi_devinfo.h @@ -69,8 +69,10 @@ #define BLIST_RETRY_ITF ((__force blist_flags_t)(1ULL << 32)) /* Always retry ABORTED_COMMAND with ASC 0xc1 */ #define BLIST_RETRY_ASC_C1 ((__force blist_flags_t)(1ULL << 33)) +/* Do not query the IO Advice Hints Grouping mode page */ +#define BLIST_SKIP_IO_HINTS ((__force blist_flags_t)(1ULL << 34)) -#define __BLIST_LAST_USED BLIST_RETRY_ASC_C1 +#define __BLIST_LAST_USED BLIST_SKIP_IO_HINTS #define __BLIST_HIGH_UNUSED (~(__BLIST_LAST_USED | \ (__force blist_flags_t) \ diff --git a/include/sound/dmaengine_pcm.h b/include/sound/dmaengine_pcm.h index c11aaf8079fb..f6baa9a01868 100644 --- a/include/sound/dmaengine_pcm.h +++ b/include/sound/dmaengine_pcm.h @@ -36,6 +36,7 @@ snd_pcm_uframes_t snd_dmaengine_pcm_pointer_no_residue(struct snd_pcm_substream int snd_dmaengine_pcm_open(struct snd_pcm_substream *substream, struct dma_chan *chan); int snd_dmaengine_pcm_close(struct snd_pcm_substream *substream); +int snd_dmaengine_pcm_sync_stop(struct snd_pcm_substream *substream); int snd_dmaengine_pcm_open_request_chan(struct snd_pcm_substream *substream, dma_filter_fn filter_fn, void *filter_data); diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index fadf406b5260..c978fa2893a5 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -2556,9 +2556,10 @@ TRACE_EVENT(btrfs_extent_map_shrinker_count, TRACE_EVENT(btrfs_extent_map_shrinker_scan_enter, - TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_to_scan, long nr), + TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_to_scan, long nr, + u64 last_root_id, u64 last_ino), - TP_ARGS(fs_info, nr_to_scan, nr), + TP_ARGS(fs_info, nr_to_scan, nr, last_root_id, last_ino), TP_STRUCT__entry_btrfs( __field( long, nr_to_scan ) @@ -2570,8 +2571,8 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_enter, TP_fast_assign_btrfs(fs_info, __entry->nr_to_scan = nr_to_scan; __entry->nr = nr; - __entry->last_root_id = fs_info->extent_map_shrinker_last_root; - __entry->last_ino = fs_info->extent_map_shrinker_last_ino; + __entry->last_root_id = last_root_id; + __entry->last_ino = last_ino; ), TP_printk_btrfs("nr_to_scan=%ld nr=%ld last_root=%llu(%s) last_ino=%llu", @@ -2581,9 +2582,10 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_enter, TRACE_EVENT(btrfs_extent_map_shrinker_scan_exit, - TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_dropped, long nr), + TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_dropped, long nr, + u64 last_root_id, u64 last_ino), - TP_ARGS(fs_info, nr_dropped, nr), + TP_ARGS(fs_info, nr_dropped, nr, last_root_id, last_ino), TP_STRUCT__entry_btrfs( __field( long, nr_dropped ) @@ -2595,8 +2597,8 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_exit, TP_fast_assign_btrfs(fs_info, __entry->nr_dropped = nr_dropped; __entry->nr = nr; - __entry->last_root_id = fs_info->extent_map_shrinker_last_root; - __entry->last_ino = fs_info->extent_map_shrinker_last_ino; + __entry->last_root_id = last_root_id; + __entry->last_ino = last_ino; ), TP_printk_btrfs("nr_dropped=%ld nr=%ld last_root=%llu(%s) last_ino=%llu", diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index a6190aa1b406..f1a73aa83fbb 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -35,12 +35,14 @@ enum fscache_volume_trace { fscache_volume_get_cookie, fscache_volume_get_create_work, fscache_volume_get_hash_collision, + fscache_volume_get_withdraw, fscache_volume_free, fscache_volume_new_acquire, fscache_volume_put_cookie, fscache_volume_put_create_work, fscache_volume_put_hash_collision, fscache_volume_put_relinquish, + fscache_volume_put_withdraw, fscache_volume_see_create_work, fscache_volume_see_hash_wake, fscache_volume_wait_create_work, @@ -120,12 +122,14 @@ enum fscache_access_trace { EM(fscache_volume_get_cookie, "GET cook ") \ EM(fscache_volume_get_create_work, "GET creat") \ EM(fscache_volume_get_hash_collision, "GET hcoll") \ + EM(fscache_volume_get_withdraw, "GET withd") \ EM(fscache_volume_free, "FREE ") \ EM(fscache_volume_new_acquire, "NEW acq ") \ EM(fscache_volume_put_cookie, "PUT cook ") \ EM(fscache_volume_put_create_work, "PUT creat") \ EM(fscache_volume_put_hash_collision, "PUT hcoll") \ EM(fscache_volume_put_relinquish, "PUT relnq") \ + EM(fscache_volume_put_withdraw, "PUT withd") \ EM(fscache_volume_see_create_work, "SEE creat") \ EM(fscache_volume_see_hash_wake, "SEE hwake") \ E_(fscache_volume_wait_create_work, "WAIT crea") diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h index f1b5e816e7e5..ff33f41a9db7 100644 --- a/include/trace/events/qdisc.h +++ b/include/trace/events/qdisc.h @@ -81,7 +81,7 @@ TRACE_EVENT(qdisc_reset, TP_ARGS(q), TP_STRUCT__entry( - __string( dev, qdisc_dev(q)->name ) + __string( dev, qdisc_dev(q) ? qdisc_dev(q)->name : "(null)" ) __string( kind, q->ops->id ) __field( u32, parent ) __field( u32, handle ) diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index d983c48a3b6a..d4cc26932ff4 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -737,7 +737,7 @@ __SC_COMP(__NR_pselect6_time64, sys_pselect6, compat_sys_pselect6_time64) #define __NR_ppoll_time64 414 __SC_COMP(__NR_ppoll_time64, sys_ppoll, compat_sys_ppoll_time64) #define __NR_io_pgetevents_time64 416 -__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents) +__SC_COMP(__NR_io_pgetevents_time64, sys_io_pgetevents, compat_sys_io_pgetevents_time64) #define __NR_recvmmsg_time64 417 __SC_COMP(__NR_recvmmsg_time64, sys_recvmmsg, compat_sys_recvmmsg_time64) #define __NR_mq_timedsend_time64 418 diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index aaed8e12ad0b..926b1deb1116 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -802,6 +802,9 @@ struct drm_panthor_queue_submit { * Must be 64-bit/8-byte aligned (the size of a CS instruction) * * Can be zero if stream_addr is zero too. + * + * When the stream size is zero, the queue submit serves as a + * synchronization point. */ __u32 stream_size; @@ -822,6 +825,8 @@ struct drm_panthor_queue_submit { * ensure the GPU doesn't get garbage when reading the indirect command * stream buffers. If you want the cache flush to happen * unconditionally, pass a zero here. + * + * Ignored when stream_size is zero. */ __u32 latest_flush; diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 1446c3bae515..d425b83181df 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -776,7 +776,13 @@ struct drm_xe_gem_create { #define DRM_XE_GEM_CPU_CACHING_WC 2 /** * @cpu_caching: The CPU caching mode to select for this object. If - * mmaping the object the mode selected here will also be used. + * mmaping the object the mode selected here will also be used. The + * exception is when mapping system memory (including data evicted + * to system) on discrete GPUs. The caching mode selected will + * then be overridden to DRM_XE_GEM_CPU_CACHING_WB, and coherency + * between GPU- and CPU is guaranteed. The caching mode of + * existing CPU-mappings will be updated transparently to + * user-space clients. */ __u16 cpu_caching; /** @pad: MBZ */ diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index ad5478dbad00..225bc366ffcb 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -154,7 +154,7 @@ struct mount_attr { */ struct statmount { __u32 size; /* Total size, including strings */ - __u32 __spare1; + __u32 mnt_opts; /* [str] Mount options of the mount */ __u64 mask; /* What results were written */ __u32 sb_dev_major; /* Device ID */ __u32 sb_dev_minor; @@ -172,7 +172,8 @@ struct statmount { __u64 propagate_from; /* Propagation from in current namespace */ __u32 mnt_root; /* [str] Root of mount relative to root of fs */ __u32 mnt_point; /* [str] Mountpoint relative to current root */ - __u64 __spare2[50]; + __u64 mnt_ns_id; /* ID of the mount namespace */ + __u64 __spare2[49]; char str[]; /* Variable size part containing strings */ }; @@ -188,10 +189,12 @@ struct mnt_id_req { __u32 spare; __u64 mnt_id; __u64 param; + __u64 mnt_ns_id; }; /* List of all mnt_id_req versions. */ #define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ +#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */ /* * @mask bits for statmount(2) @@ -202,10 +205,13 @@ struct mnt_id_req { #define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ #define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ #define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ +#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ +#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ /* * Special @mnt_id values that can be passed to listmount */ #define LSMT_ROOT 0xffffffffffffffff /* root mount */ +#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ #endif /* _UAPI_LINUX_MOUNT_H */ diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h index a0c8552b64ee..b133211331f6 100644 --- a/include/uapi/linux/nsfs.h +++ b/include/uapi/linux/nsfs.h @@ -15,5 +15,15 @@ #define NS_GET_NSTYPE _IO(NSIO, 0x3) /* Get owner UID (in the caller's user namespace) for a user namespace */ #define NS_GET_OWNER_UID _IO(NSIO, 0x4) +/* Get the id for a mount namespace */ +#define NS_GET_MNTNS_ID _IO(NSIO, 0x5) +/* Translate pid from target pid namespace into the caller's pid namespace. */ +#define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int) +/* Return thread-group leader id of pid in the callers pid namespace. */ +#define NS_GET_TGID_FROM_PIDNS _IOR(NSIO, 0x7, int) +/* Translate pid from caller's pid namespace into a target pid namespace. */ +#define NS_GET_PID_IN_PIDNS _IOR(NSIO, 0x8, int) +/* Return thread-group leader id of pid in the target pid namespace. */ +#define NS_GET_TGID_IN_PIDNS _IOR(NSIO, 0x9, int) #endif /* __LINUX_NSFS_H */ diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h index 72ec000a97cd..565fc0629fff 100644 --- a/include/uapi/linux/pidfd.h +++ b/include/uapi/linux/pidfd.h @@ -5,6 +5,7 @@ #include <linux/types.h> #include <linux/fcntl.h> +#include <linux/ioctl.h> /* Flags for pidfd_open(). */ #define PIDFD_NONBLOCK O_NONBLOCK @@ -15,4 +16,17 @@ #define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1) #define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2) +#define PIDFS_IOCTL_MAGIC 0xFF + +#define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1) +#define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2) +#define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3) +#define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4) +#define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5) +#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6) +#define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7) +#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8) +#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9) +#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10) + #endif /* _UAPI_LINUX_PIDFD_H */ diff --git a/include/uapi/linux/trace_mmap.h b/include/uapi/linux/trace_mmap.h index bd1066754220..c102ef35d11e 100644 --- a/include/uapi/linux/trace_mmap.h +++ b/include/uapi/linux/trace_mmap.h @@ -43,6 +43,6 @@ struct trace_buffer_meta { __u64 Reserved2; }; -#define TRACE_MMAP_IOCTL_GET_READER _IO('T', 0x1) +#define TRACE_MMAP_IOCTL_GET_READER _IO('R', 0x20) #endif /* _TRACE_MMAP_H_ */ diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h index f33d914d8f46..91583690bddc 100644 --- a/include/uapi/misc/fastrpc.h +++ b/include/uapi/misc/fastrpc.h @@ -8,11 +8,14 @@ #define FASTRPC_IOCTL_ALLOC_DMA_BUFF _IOWR('R', 1, struct fastrpc_alloc_dma_buf) #define FASTRPC_IOCTL_FREE_DMA_BUFF _IOWR('R', 2, __u32) #define FASTRPC_IOCTL_INVOKE _IOWR('R', 3, struct fastrpc_invoke) +/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_ATTACH _IO('R', 4) #define FASTRPC_IOCTL_INIT_CREATE _IOWR('R', 5, struct fastrpc_init_create) #define FASTRPC_IOCTL_MMAP _IOWR('R', 6, struct fastrpc_req_mmap) #define FASTRPC_IOCTL_MUNMAP _IOWR('R', 7, struct fastrpc_req_munmap) +/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_ATTACH_SNS _IO('R', 8) +/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_CREATE_STATIC _IOWR('R', 9, struct fastrpc_init_create_static) #define FASTRPC_IOCTL_MEM_MAP _IOWR('R', 10, struct fastrpc_mem_map) #define FASTRPC_IOCTL_MEM_UNMAP _IOWR('R', 11, struct fastrpc_mem_unmap) diff --git a/init/Kconfig b/init/Kconfig index 72404c1f2157..febdea2afc3b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -883,7 +883,7 @@ config GCC10_NO_ARRAY_BOUNDS config CC_NO_ARRAY_BOUNDS bool - default y if CC_IS_GCC && GCC_VERSION >= 100000 && GCC10_NO_ARRAY_BOUNDS + default y if CC_IS_GCC && GCC_VERSION >= 90000 && GCC10_NO_ARRAY_BOUNDS # Currently, disable -Wstringop-overflow for GCC globally. config GCC_NO_STRINGOP_OVERFLOW diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 42139bb85fff..8e6faa942a6f 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1213,8 +1213,8 @@ static void io_req_normal_work_add(struct io_kiocb *req) if (ctx->flags & IORING_SETUP_SQPOLL) { struct io_sq_data *sqd = ctx->sq_data; - if (wq_has_sleeper(&sqd->wait)) - wake_up(&sqd->wait); + if (sqd->thread) + __set_notify_signal(sqd->thread); return; } diff --git a/io_uring/net.c b/io_uring/net.c index 7ebbeab05fea..7b75da2e7826 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1137,16 +1137,18 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags) flags |= MSG_DONTWAIT; retry_multishot: - kmsg->msg.msg_inq = -1; - kmsg->msg.msg_flags = 0; - if (io_do_buffer_select(req)) { ret = io_recv_buf_select(req, kmsg, &len, issue_flags); - if (unlikely(ret)) + if (unlikely(ret)) { + kmsg->msg.msg_inq = -1; goto out_free; + } sr->buf = NULL; } + kmsg->msg.msg_flags = 0; + kmsg->msg.msg_inq = -1; + if (flags & MSG_WAITALL) min_ret = iov_iter_count(&kmsg->msg.msg_iter); diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 60c00144471a..a860516bf448 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1049,7 +1049,6 @@ int io_import_fixed(int ddir, struct iov_iter *iter, * branch doesn't expect non PAGE_SIZE'd chunks. */ iter->bvec = bvec; - iter->nr_segs = bvec->bv_len; iter->count -= offset; iter->iov_offset = offset; } else { diff --git a/io_uring/statx.c b/io_uring/statx.c index abb874209caa..f7f9b202eec0 100644 --- a/io_uring/statx.c +++ b/io_uring/statx.c @@ -37,8 +37,7 @@ int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) sx->flags = READ_ONCE(sqe->statx_flags); sx->filename = getname_flags(path, - getname_statx_lookup_flags(sx->flags), - NULL); + getname_statx_lookup_flags(sx->flags)); if (IS_ERR(sx->filename)) { int ret = PTR_ERR(sx->filename); diff --git a/io_uring/xattr.c b/io_uring/xattr.c index 44905b82eea8..6cf41c3bc369 100644 --- a/io_uring/xattr.c +++ b/io_uring/xattr.c @@ -96,7 +96,7 @@ int io_getxattr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) path = u64_to_user_ptr(READ_ONCE(sqe->addr3)); - ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL); + ix->filename = getname_flags(path, LOOKUP_FOLLOW); if (IS_ERR(ix->filename)) { ret = PTR_ERR(ix->filename); ix->filename = NULL; @@ -189,7 +189,7 @@ int io_setxattr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) path = u64_to_user_ptr(READ_ONCE(sqe->addr3)); - ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL); + ix->filename = getname_flags(path, LOOKUP_FOLLOW); if (IS_ERR(ix->filename)) { ret = PTR_ERR(ix->filename); ix->filename = NULL; diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 5eea4dc0509e..a7cbd69efbef 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -903,7 +903,8 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, audit_mq_open(oflag, mode, attr); - if (IS_ERR(name = getname(u_name))) + name = getname(u_name); + if (IS_ERR(name)) return PTR_ERR(name); fd = get_unused_fd_flags(O_CLOEXEC); diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index be8c680121e4..d6ef4f4f9cba 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -529,7 +529,8 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, entry->rule.buflen += f_val; f->lsm_str = str; err = security_audit_rule_init(f->type, f->op, str, - (void **)&f->lsm_rule); + (void **)&f->lsm_rule, + GFP_KERNEL); /* Keep currently invalid fields around in case they * become valid after a policy reload. */ if (err == -EINVAL) { @@ -799,7 +800,7 @@ static inline int audit_dupe_lsm_field(struct audit_field *df, /* our own (refreshed) copy of lsm_rule */ ret = security_audit_rule_init(df->type, df->op, df->lsm_str, - (void **)&df->lsm_rule); + (void **)&df->lsm_rule, GFP_KERNEL); /* Keep currently invalid fields around in case they * become valid after a policy reload. */ if (ret == -EINVAL) { diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c index 583ee4fe48ef..e52b3ad231b9 100644 --- a/kernel/bpf/arena.c +++ b/kernel/bpf/arena.c @@ -212,6 +212,7 @@ static u64 arena_map_mem_usage(const struct bpf_map *map) struct vma_list { struct vm_area_struct *vma; struct list_head head; + atomic_t mmap_count; }; static int remember_vma(struct bpf_arena *arena, struct vm_area_struct *vma) @@ -221,20 +222,30 @@ static int remember_vma(struct bpf_arena *arena, struct vm_area_struct *vma) vml = kmalloc(sizeof(*vml), GFP_KERNEL); if (!vml) return -ENOMEM; + atomic_set(&vml->mmap_count, 1); vma->vm_private_data = vml; vml->vma = vma; list_add(&vml->head, &arena->vma_list); return 0; } +static void arena_vm_open(struct vm_area_struct *vma) +{ + struct vma_list *vml = vma->vm_private_data; + + atomic_inc(&vml->mmap_count); +} + static void arena_vm_close(struct vm_area_struct *vma) { struct bpf_map *map = vma->vm_file->private_data; struct bpf_arena *arena = container_of(map, struct bpf_arena, map); - struct vma_list *vml; + struct vma_list *vml = vma->vm_private_data; + if (!atomic_dec_and_test(&vml->mmap_count)) + return; guard(mutex)(&arena->lock); - vml = vma->vm_private_data; + /* update link list under lock */ list_del(&vml->head); vma->vm_private_data = NULL; kfree(vml); @@ -287,6 +298,7 @@ out: } static const struct vm_operations_struct arena_vm_ops = { + .open = arena_vm_open, .close = arena_vm_close, .fault = arena_vm_fault, }; diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 976cb258a0ed..c938dea5ddbf 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -782,8 +782,8 @@ bpf_local_storage_map_alloc(union bpf_attr *attr, nbuckets = max_t(u32, 2, nbuckets); smap->bucket_log = ilog2(nbuckets); - smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets), - nbuckets, GFP_USER | __GFP_NOWARN); + smap->buckets = bpf_map_kvcalloc(&smap->map, nbuckets, + sizeof(*smap->buckets), GFP_USER | __GFP_NOWARN); if (!smap->buckets) { err = -ENOMEM; goto free_smap; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 1a6c3faa6e4a..695a0fb2cd4d 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -736,11 +736,11 @@ static struct bpf_ksym *bpf_ksym_find(unsigned long addr) return n ? container_of(n, struct bpf_ksym, tnode) : NULL; } -const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, +int __bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char *sym) { struct bpf_ksym *ksym; - char *ret = NULL; + int ret = 0; rcu_read_lock(); ksym = bpf_ksym_find(addr); @@ -748,9 +748,8 @@ const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long symbol_start = ksym->start; unsigned long symbol_end = ksym->end; - strscpy(sym, ksym->name, KSYM_NAME_LEN); + ret = strscpy(sym, ksym->name, KSYM_NAME_LEN); - ret = sym; if (size) *size = symbol_end - symbol_start; if (off) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 2a69a9a36c0f..3243c83ef3e3 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1084,7 +1084,10 @@ struct bpf_async_cb { struct bpf_prog *prog; void __rcu *callback_fn; void *value; - struct rcu_head rcu; + union { + struct rcu_head rcu; + struct work_struct delete_work; + }; u64 flags; }; @@ -1107,6 +1110,7 @@ struct bpf_async_cb { struct bpf_hrtimer { struct bpf_async_cb cb; struct hrtimer timer; + atomic_t cancelling; }; struct bpf_work { @@ -1219,6 +1223,21 @@ static void bpf_wq_delete_work(struct work_struct *work) kfree_rcu(w, cb.rcu); } +static void bpf_timer_delete_work(struct work_struct *work) +{ + struct bpf_hrtimer *t = container_of(work, struct bpf_hrtimer, cb.delete_work); + + /* Cancel the timer and wait for callback to complete if it was running. + * If hrtimer_cancel() can be safely called it's safe to call + * kfree_rcu(t) right after for both preallocated and non-preallocated + * maps. The async->cb = NULL was already done and no code path can see + * address 't' anymore. Timer if armed for existing bpf_hrtimer before + * bpf_timer_cancel_and_free will have been cancelled. + */ + hrtimer_cancel(&t->timer); + kfree_rcu(t, cb.rcu); +} + static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u64 flags, enum bpf_async_type type) { @@ -1262,6 +1281,8 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u clockid = flags & (MAX_CLOCKS - 1); t = (struct bpf_hrtimer *)cb; + atomic_set(&t->cancelling, 0); + INIT_WORK(&t->cb.delete_work, bpf_timer_delete_work); hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT); t->timer.function = bpf_timer_cb; cb->value = (void *)async - map->record->timer_off; @@ -1440,7 +1461,8 @@ static void drop_prog_refcnt(struct bpf_async_cb *async) BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer) { - struct bpf_hrtimer *t; + struct bpf_hrtimer *t, *cur_t; + bool inc = false; int ret = 0; if (in_nmi()) @@ -1452,14 +1474,41 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer) ret = -EINVAL; goto out; } - if (this_cpu_read(hrtimer_running) == t) { + + cur_t = this_cpu_read(hrtimer_running); + if (cur_t == t) { /* If bpf callback_fn is trying to bpf_timer_cancel() * its own timer the hrtimer_cancel() will deadlock - * since it waits for callback_fn to finish + * since it waits for callback_fn to finish. */ ret = -EDEADLK; goto out; } + + /* Only account in-flight cancellations when invoked from a timer + * callback, since we want to avoid waiting only if other _callbacks_ + * are waiting on us, to avoid introducing lockups. Non-callback paths + * are ok, since nobody would synchronously wait for their completion. + */ + if (!cur_t) + goto drop; + atomic_inc(&t->cancelling); + /* Need full barrier after relaxed atomic_inc */ + smp_mb__after_atomic(); + inc = true; + if (atomic_read(&cur_t->cancelling)) { + /* We're cancelling timer t, while some other timer callback is + * attempting to cancel us. In such a case, it might be possible + * that timer t belongs to the other callback, or some other + * callback waiting upon it (creating transitive dependencies + * upon us), and we will enter a deadlock if we continue + * cancelling and waiting for it synchronously, since it might + * do the same. Bail! + */ + ret = -EDEADLK; + goto out; + } +drop: drop_prog_refcnt(&t->cb); out: __bpf_spin_unlock_irqrestore(&timer->lock); @@ -1467,6 +1516,8 @@ out: * if it was running. */ ret = ret ?: hrtimer_cancel(&t->timer); + if (inc) + atomic_dec(&t->cancelling); rcu_read_unlock(); return ret; } @@ -1512,25 +1563,39 @@ void bpf_timer_cancel_and_free(void *val) if (!t) return; - /* Cancel the timer and wait for callback to complete if it was running. - * If hrtimer_cancel() can be safely called it's safe to call kfree(t) - * right after for both preallocated and non-preallocated maps. - * The async->cb = NULL was already done and no code path can - * see address 't' anymore. - * - * Check that bpf_map_delete/update_elem() wasn't called from timer - * callback_fn. In such case don't call hrtimer_cancel() (since it will - * deadlock) and don't call hrtimer_try_to_cancel() (since it will just - * return -1). Though callback_fn is still running on this cpu it's + /* We check that bpf_map_delete/update_elem() was called from timer + * callback_fn. In such case we don't call hrtimer_cancel() (since it + * will deadlock) and don't call hrtimer_try_to_cancel() (since it will + * just return -1). Though callback_fn is still running on this cpu it's * safe to do kfree(t) because bpf_timer_cb() read everything it needed * from 't'. The bpf subprog callback_fn won't be able to access 't', * since async->cb = NULL was already done. The timer will be * effectively cancelled because bpf_timer_cb() will return * HRTIMER_NORESTART. + * + * However, it is possible the timer callback_fn calling us armed the + * timer _before_ calling us, such that failing to cancel it here will + * cause it to possibly use struct hrtimer after freeing bpf_hrtimer. + * Therefore, we _need_ to cancel any outstanding timers before we do + * kfree_rcu, even though no more timers can be armed. + * + * Moreover, we need to schedule work even if timer does not belong to + * the calling callback_fn, as on two different CPUs, we can end up in a + * situation where both sides run in parallel, try to cancel one + * another, and we end up waiting on both sides in hrtimer_cancel + * without making forward progress, since timer1 depends on time2 + * callback to finish, and vice versa. + * + * CPU 1 (timer1_cb) CPU 2 (timer2_cb) + * bpf_timer_cancel_and_free(timer2) bpf_timer_cancel_and_free(timer1) + * + * To avoid these issues, punt to workqueue context when we are in a + * timer callback. */ - if (this_cpu_read(hrtimer_running) != t) - hrtimer_cancel(&t->timer); - kfree_rcu(t, cb.rcu); + if (this_cpu_read(hrtimer_running)) + queue_work(system_unbound_wq, &t->cb.delete_work); + else + bpf_timer_delete_work(&t->cb.delete_work); } /* This function is called by map_delete/update_elem for individual element and diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 0ee653a936ea..e20b90c36131 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -51,7 +51,8 @@ struct bpf_ringbuf { * This prevents a user-space application from modifying the * position and ruining in-kernel tracking. The permissions of the * pages depend on who is producing samples: user-space or the - * kernel. + * kernel. Note that the pending counter is placed in the same + * page as the producer, so that it shares the same cache line. * * Kernel-producer * --------------- @@ -70,6 +71,7 @@ struct bpf_ringbuf { */ unsigned long consumer_pos __aligned(PAGE_SIZE); unsigned long producer_pos __aligned(PAGE_SIZE); + unsigned long pending_pos; char data[] __aligned(PAGE_SIZE); }; @@ -179,6 +181,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node) rb->mask = data_sz - 1; rb->consumer_pos = 0; rb->producer_pos = 0; + rb->pending_pos = 0; return rb; } @@ -404,9 +407,9 @@ bpf_ringbuf_restore_from_rec(struct bpf_ringbuf_hdr *hdr) static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size) { - unsigned long cons_pos, prod_pos, new_prod_pos, flags; - u32 len, pg_off; + unsigned long cons_pos, prod_pos, new_prod_pos, pend_pos, flags; struct bpf_ringbuf_hdr *hdr; + u32 len, pg_off, tmp_size, hdr_len; if (unlikely(size > RINGBUF_MAX_RECORD_SZ)) return NULL; @@ -424,13 +427,29 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size) spin_lock_irqsave(&rb->spinlock, flags); } + pend_pos = rb->pending_pos; prod_pos = rb->producer_pos; new_prod_pos = prod_pos + len; - /* check for out of ringbuf space by ensuring producer position - * doesn't advance more than (ringbuf_size - 1) ahead + while (pend_pos < prod_pos) { + hdr = (void *)rb->data + (pend_pos & rb->mask); + hdr_len = READ_ONCE(hdr->len); + if (hdr_len & BPF_RINGBUF_BUSY_BIT) + break; + tmp_size = hdr_len & ~BPF_RINGBUF_DISCARD_BIT; + tmp_size = round_up(tmp_size + BPF_RINGBUF_HDR_SZ, 8); + pend_pos += tmp_size; + } + rb->pending_pos = pend_pos; + + /* check for out of ringbuf space: + * - by ensuring producer position doesn't advance more than + * (ringbuf_size - 1) ahead + * - by ensuring oldest not yet committed record until newest + * record does not span more than (ringbuf_size - 1) */ - if (new_prod_pos - cons_pos > rb->mask) { + if (new_prod_pos - cons_pos > rb->mask || + new_prod_pos - pend_pos > rb->mask) { spin_unlock_irqrestore(&rb->spinlock, flags); return NULL; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 36ef8e96787e..214a9fa8c6fb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4549,11 +4549,12 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, state->stack[spi].spilled_ptr.id = 0; } else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) && env->bpf_capable) { - struct bpf_reg_state fake_reg = {}; + struct bpf_reg_state *tmp_reg = &env->fake_reg[0]; - __mark_reg_known(&fake_reg, insn->imm); - fake_reg.type = SCALAR_VALUE; - save_register_state(env, state, spi, &fake_reg, size); + memset(tmp_reg, 0, sizeof(*tmp_reg)); + __mark_reg_known(tmp_reg, insn->imm); + tmp_reg->type = SCALAR_VALUE; + save_register_state(env, state, spi, tmp_reg, size); } else if (reg && is_spillable_regtype(reg->type)) { /* register containing pointer is being spilled into stack */ if (size != BPF_REG_SIZE) { @@ -6235,6 +6236,7 @@ static void set_sext32_default_val(struct bpf_reg_state *reg, int size) } reg->u32_min_value = 0; reg->u32_max_value = U32_MAX; + reg->var_off = tnum_subreg(tnum_unknown); } static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size) @@ -6279,6 +6281,7 @@ static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size) reg->s32_max_value = s32_max; reg->u32_min_value = (u32)s32_min; reg->u32_max_value = (u32)s32_max; + reg->var_off = tnum_subreg(tnum_range(s32_min, s32_max)); return; } @@ -12718,6 +12721,16 @@ static bool signed_add32_overflows(s32 a, s32 b) return res < a; } +static bool signed_add16_overflows(s16 a, s16 b) +{ + /* Do the add in u16, where overflow is well-defined */ + s16 res = (s16)((u16)a + (u16)b); + + if (b < 0) + return res > a; + return res < a; +} + static bool signed_sub_overflows(s64 a, s64 b) { /* Do the sub in u64, where overflow is well-defined */ @@ -15113,7 +15126,6 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs; struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL; struct bpf_reg_state *eq_branch_regs; - struct bpf_reg_state fake_reg = {}; u8 opcode = BPF_OP(insn->code); bool is_jmp32; int pred = -1; @@ -15179,7 +15191,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); return -EINVAL; } - src_reg = &fake_reg; + src_reg = &env->fake_reg[0]; + memset(src_reg, 0, sizeof(*src_reg)); src_reg->type = SCALAR_VALUE; __mark_reg_known(src_reg, insn->imm); } @@ -15239,10 +15252,16 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, &other_branch_regs[insn->src_reg], dst_reg, src_reg, opcode, is_jmp32); } else /* BPF_SRC(insn->code) == BPF_K */ { + /* reg_set_min_max() can mangle the fake_reg. Make a copy + * so that these are two different memory locations. The + * src_reg is not used beyond here in context of K. + */ + memcpy(&env->fake_reg[1], &env->fake_reg[0], + sizeof(env->fake_reg[0])); err = reg_set_min_max(env, &other_branch_regs[insn->dst_reg], - src_reg /* fake one */, - dst_reg, src_reg /* same fake one */, + &env->fake_reg[0], + dst_reg, &env->fake_reg[1], opcode, is_jmp32); } if (err) @@ -17441,11 +17460,11 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) goto skip_inf_loop_check; } if (is_may_goto_insn_at(env, insn_idx)) { - if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) { + if (sl->state.may_goto_depth != cur->may_goto_depth && + states_equal(env, &sl->state, cur, RANGE_WITHIN)) { update_loop_entry(cur, &sl->state); goto hit; } - goto skip_inf_loop_check; } if (calls_callback(env, insn_idx)) { if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) @@ -18723,6 +18742,39 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of return new_prog; } +/* + * For all jmp insns in a given 'prog' that point to 'tgt_idx' insn adjust the + * jump offset by 'delta'. + */ +static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta) +{ + struct bpf_insn *insn = prog->insnsi; + u32 insn_cnt = prog->len, i; + + for (i = 0; i < insn_cnt; i++, insn++) { + u8 code = insn->code; + + if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) || + BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT) + continue; + + if (insn->code == (BPF_JMP32 | BPF_JA)) { + if (i + 1 + insn->imm != tgt_idx) + continue; + if (signed_add32_overflows(insn->imm, delta)) + return -ERANGE; + insn->imm += delta; + } else { + if (i + 1 + insn->off != tgt_idx) + continue; + if (signed_add16_overflows(insn->imm, delta)) + return -ERANGE; + insn->off += delta; + } + } + return 0; +} + static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env, u32 off, u32 cnt) { @@ -19997,7 +20049,10 @@ static int do_misc_fixups(struct bpf_verifier_env *env) stack_depth_extra = 8; insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off); - insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2); + if (insn->off >= 0) + insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2); + else + insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1); insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1); insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off); cnt = 4; @@ -20313,7 +20368,7 @@ patch_map_ops_generic: goto next_insn; } -#ifdef CONFIG_X86_64 +#if defined(CONFIG_X86_64) && !defined(CONFIG_UML) /* Implement bpf_get_smp_processor_id() inline. */ if (insn->imm == BPF_FUNC_get_smp_processor_id && prog->jit_requested && bpf_jit_supports_percpu_insn()) { @@ -20539,6 +20594,13 @@ next_insn: if (!new_prog) return -ENOMEM; env->prog = prog = new_prog; + /* + * If may_goto is a first insn of a prog there could be a jmp + * insn that points to it, hence adjust all such jmps to point + * to insn after BPF_ST that inits may_goto count. + * Adjustment will succeed because bpf_patch_insn_data() didn't fail. + */ + WARN_ON(adjust_jmp_off(env->prog, subprog_start, 1)); } /* Since poke tab is now finalized, publish aux to tracker. */ diff --git a/kernel/cpu.c b/kernel/cpu.c index 563877d6c28b..3d2bf1d50a0c 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1859,6 +1859,9 @@ static inline bool cpuhp_bringup_cpus_parallel(unsigned int ncpus) { return fals void __init bringup_nonboot_cpus(unsigned int max_cpus) { + if (!max_cpus) + return; + /* Try parallel bringup optimization if enabled */ if (cpuhp_bringup_cpus_parallel(max_cpus)) return; @@ -2446,7 +2449,7 @@ EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance); * The caller needs to hold cpus read locked while calling this function. * Return: * On success: - * Positive state number if @state is CPUHP_AP_ONLINE_DYN; + * Positive state number if @state is CPUHP_AP_ONLINE_DYN or CPUHP_BP_PREPARE_DYN; * 0 for all other states * On failure: proper (negative) error code */ @@ -2469,7 +2472,7 @@ int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state, ret = cpuhp_store_callbacks(state, name, startup, teardown, multi_instance); - dynstate = state == CPUHP_AP_ONLINE_DYN; + dynstate = state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN; if (ret > 0 && dynstate) { state = ret; ret = 0; @@ -2500,8 +2503,8 @@ int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state, out: mutex_unlock(&cpuhp_state_mutex); /* - * If the requested state is CPUHP_AP_ONLINE_DYN, return the - * dynamically allocated state in case of success. + * If the requested state is CPUHP_AP_ONLINE_DYN or CPUHP_BP_PREPARE_DYN, + * return the dynamically allocated state in case of success. */ if (!ret && dynstate) return state; diff --git a/kernel/exit.c b/kernel/exit.c index f95a2c1338a8..81fcee45d630 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -484,6 +484,8 @@ retry: * Search through everything else, we should not get here often. */ for_each_process(g) { + if (atomic_read(&mm->mm_users) <= 1) + break; if (g->flags & PF_KTHREAD) continue; for_each_thread(g, c) { diff --git a/kernel/fork.c b/kernel/fork.c index 99076dbe27d8..763a042eef9c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -616,12 +616,6 @@ static void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm) exe_file = get_mm_exe_file(oldmm); RCU_INIT_POINTER(mm->exe_file, exe_file); - /* - * We depend on the oldmm having properly denied write access to the - * exe_file already. - */ - if (exe_file && deny_write_access(exe_file)) - pr_warn_once("deny_write_access() failed in %s\n", __func__); } #ifdef CONFIG_MMU @@ -1412,20 +1406,11 @@ int set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) */ old_exe_file = rcu_dereference_raw(mm->exe_file); - if (new_exe_file) { - /* - * We expect the caller (i.e., sys_execve) to already denied - * write access, so this is unlikely to fail. - */ - if (unlikely(deny_write_access(new_exe_file))) - return -EACCES; + if (new_exe_file) get_file(new_exe_file); - } rcu_assign_pointer(mm->exe_file, new_exe_file); - if (old_exe_file) { - allow_write_access(old_exe_file); + if (old_exe_file) fput(old_exe_file); - } return 0; } @@ -1464,9 +1449,6 @@ int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) return ret; } - ret = deny_write_access(new_exe_file); - if (ret) - return -EACCES; get_file(new_exe_file); /* set the new file */ @@ -1475,10 +1457,8 @@ int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) rcu_assign_pointer(mm->exe_file, new_exe_file); mmap_write_unlock(mm); - if (old_exe_file) { - allow_write_access(old_exe_file); + if (old_exe_file) fput(old_exe_file); - } return 0; } diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c index 74a4ef1da9ad..fd75b4a484d7 100644 --- a/kernel/gcov/gcc_4_7.c +++ b/kernel/gcov/gcc_4_7.c @@ -18,7 +18,9 @@ #include <linux/mm.h> #include "gcov.h" -#if (__GNUC__ >= 10) +#if (__GNUC__ >= 14) +#define GCOV_COUNTERS 9 +#elif (__GNUC__ >= 10) #define GCOV_COUNTERS 8 #elif (__GNUC__ >= 7) #define GCOV_COUNTERS 9 diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 22ea19a36e6e..98b9622d372e 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -388,12 +388,12 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, !!__bpf_address_lookup(addr, symbolsize, offset, namebuf); } -static const char *kallsyms_lookup_buildid(unsigned long addr, +static int kallsyms_lookup_buildid(unsigned long addr, unsigned long *symbolsize, unsigned long *offset, char **modname, const unsigned char **modbuildid, char *namebuf) { - const char *ret; + int ret; namebuf[KSYM_NAME_LEN - 1] = 0; namebuf[0] = 0; @@ -410,7 +410,7 @@ static const char *kallsyms_lookup_buildid(unsigned long addr, if (modbuildid) *modbuildid = NULL; - ret = namebuf; + ret = strlen(namebuf); goto found; } @@ -442,8 +442,13 @@ const char *kallsyms_lookup(unsigned long addr, unsigned long *offset, char **modname, char *namebuf) { - return kallsyms_lookup_buildid(addr, symbolsize, offset, modname, - NULL, namebuf); + int ret = kallsyms_lookup_buildid(addr, symbolsize, offset, modname, + NULL, namebuf); + + if (!ret) + return NULL; + + return namebuf; } int lookup_symbol_name(unsigned long addr, char *symname) @@ -478,19 +483,15 @@ static int __sprint_symbol(char *buffer, unsigned long address, { char *modname; const unsigned char *buildid; - const char *name; unsigned long offset, size; int len; address += symbol_offset; - name = kallsyms_lookup_buildid(address, &size, &offset, &modname, &buildid, + len = kallsyms_lookup_buildid(address, &size, &offset, &modname, &buildid, buffer); - if (!name) + if (!len) return sprintf(buffer, "0x%lx", address - symbol_offset); - if (name != buffer) - strcpy(buffer, name); - len = strlen(buffer); offset -= symbol_offset; if (add_offset) diff --git a/kernel/kcov.c b/kernel/kcov.c index c3124f6d5536..f0a69d402066 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -632,6 +632,7 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, return -EINVAL; kcov->mode = mode; t->kcov = kcov; + t->kcov_mode = KCOV_MODE_REMOTE; kcov->t = t; kcov->remote = true; kcov->remote_size = remote_arg->area_size; diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c index 62fb57bb9f16..bf65e0c3c86f 100644 --- a/kernel/module/kallsyms.c +++ b/kernel/module/kallsyms.c @@ -321,14 +321,15 @@ void * __weak dereference_module_function_descriptor(struct module *mod, * For kallsyms to ask for address resolution. NULL means not found. Careful * not to lock to avoid deadlock on oopses, simply disable preemption. */ -const char *module_address_lookup(unsigned long addr, - unsigned long *size, - unsigned long *offset, - char **modname, - const unsigned char **modbuildid, - char *namebuf) +int module_address_lookup(unsigned long addr, + unsigned long *size, + unsigned long *offset, + char **modname, + const unsigned char **modbuildid, + char *namebuf) { - const char *ret = NULL; + const char *sym; + int ret = 0; struct module *mod; preempt_disable(); @@ -344,12 +345,10 @@ const char *module_address_lookup(unsigned long addr, #endif } - ret = find_kallsyms_symbol(mod, addr, size, offset); - } - /* Make a copy in here where it's safe */ - if (ret) { - strscpy(namebuf, ret, KSYM_NAME_LEN); - ret = namebuf; + sym = find_kallsyms_symbol(mod, addr, size, offset); + + if (sym) + ret = strscpy(namebuf, sym, KSYM_NAME_LEN); } preempt_enable(); diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index dc48fecfa1dc..25f3cf679b35 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -218,6 +218,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) */ do { clear_thread_flag(TIF_SIGPENDING); + clear_thread_flag(TIF_NOTIFY_SIGNAL); rc = kernel_wait4(-1, NULL, __WALL, NULL); } while (rc != -ECHILD); diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile index 040fe7d1eda2..39a2b61c7232 100644 --- a/kernel/printk/Makefile +++ b/kernel/printk/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y = printk.o conopt.o +obj-y = printk.o obj-$(CONFIG_PRINTK) += printk_safe.o nbcon.o obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o obj-$(CONFIG_PRINTK_INDEX) += index.o diff --git a/kernel/printk/conopt.c b/kernel/printk/conopt.c deleted file mode 100644 index 9d507bac3657..000000000000 --- a/kernel/printk/conopt.c +++ /dev/null @@ -1,146 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Kernel command line console options for hardware based addressing - * - * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/ - * Author: Tony Lindgren <tony@atomide.com> - */ - -#include <linux/console.h> -#include <linux/init.h> -#include <linux/string.h> -#include <linux/types.h> - -#include <asm/errno.h> - -#include "console_cmdline.h" - -/* - * Allow longer DEVNAME:0.0 style console naming such as abcd0000.serial:0.0 - * in addition to the legacy ttyS0 style naming. - */ -#define CONSOLE_NAME_MAX 32 - -#define CONSOLE_OPT_MAX 16 -#define CONSOLE_BRL_OPT_MAX 16 - -struct console_option { - char name[CONSOLE_NAME_MAX]; - char opt[CONSOLE_OPT_MAX]; - char brl_opt[CONSOLE_BRL_OPT_MAX]; - u8 has_brl_opt:1; -}; - -/* Updated only at console_setup() time, no locking needed */ -static struct console_option conopt[MAX_CMDLINECONSOLES]; - -/** - * console_opt_save - Saves kernel command line console option for driver use - * @str: Kernel command line console name and option - * @brl_opt: Braille console options - * - * Saves a kernel command line console option for driver subsystems to use for - * adding a preferred console during init. Called from console_setup() only. - * - * Return: 0 on success, negative error code on failure. - */ -int __init console_opt_save(const char *str, const char *brl_opt) -{ - struct console_option *con; - size_t namelen, optlen; - const char *opt; - int i; - - namelen = strcspn(str, ","); - if (namelen == 0 || namelen >= CONSOLE_NAME_MAX) - return -EINVAL; - - opt = str + namelen; - if (*opt == ',') - opt++; - - optlen = strlen(opt); - if (optlen >= CONSOLE_OPT_MAX) - return -EINVAL; - - for (i = 0; i < MAX_CMDLINECONSOLES; i++) { - con = &conopt[i]; - - if (con->name[0]) { - if (!strncmp(str, con->name, namelen)) - return 0; - continue; - } - - /* - * The name isn't terminated, only opt is. Empty opt is fine, - * but brl_opt can be either empty or NULL. For more info, see - * _braille_console_setup(). - */ - strscpy(con->name, str, namelen + 1); - strscpy(con->opt, opt, CONSOLE_OPT_MAX); - if (brl_opt) { - strscpy(con->brl_opt, brl_opt, CONSOLE_BRL_OPT_MAX); - con->has_brl_opt = 1; - } - - return 0; - } - - return -ENOMEM; -} - -static struct console_option *console_opt_find(const char *name) -{ - struct console_option *con; - int i; - - for (i = 0; i < MAX_CMDLINECONSOLES; i++) { - con = &conopt[i]; - if (!strcmp(name, con->name)) - return con; - } - - return NULL; -} - -/** - * add_preferred_console_match - Adds a preferred console if a match is found - * @match: Expected console on kernel command line, such as console=DEVNAME:0.0 - * @name: Name of the console character device to add such as ttyS - * @idx: Index for the console - * - * Allows driver subsystems to add a console after translating the command - * line name to the character device name used for the console. Options are - * added automatically based on the kernel command line. Duplicate preferred - * consoles are ignored by __add_preferred_console(). - * - * Return: 0 on success, negative error code on failure. - */ -int add_preferred_console_match(const char *match, const char *name, - const short idx) -{ - struct console_option *con; - char *brl_opt = NULL; - - if (!match || !strlen(match) || !name || !strlen(name) || - idx < 0) - return -EINVAL; - - con = console_opt_find(match); - if (!con) - return -ENOENT; - - /* - * See __add_preferred_console(). It checks for NULL brl_options to set - * the preferred_console flag. Empty brl_opt instead of NULL leads into - * the preferred_console flag not set, and CON_CONSDEV not being set, - * and the boot console won't get disabled at the end of console_setup(). - */ - if (con->has_brl_opt) - brl_opt = con->brl_opt; - - console_opt_add_preferred_console(name, idx, con->opt, brl_opt); - - return 0; -} diff --git a/kernel/printk/console_cmdline.h b/kernel/printk/console_cmdline.h index a125e0235589..3ca74ad391d6 100644 --- a/kernel/printk/console_cmdline.h +++ b/kernel/printk/console_cmdline.h @@ -2,12 +2,6 @@ #ifndef _CONSOLE_CMDLINE_H #define _CONSOLE_CMDLINE_H -#define MAX_CMDLINECONSOLES 8 - -int console_opt_save(const char *str, const char *brl_opt); -int console_opt_add_preferred_console(const char *name, const short idx, - char *options, char *brl_options); - struct console_cmdline { char name[16]; /* Name of the driver */ diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 420fd310129d..dddb15f48d59 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -383,6 +383,9 @@ static int console_locked; /* * Array of consoles built from command line options (console=) */ + +#define MAX_CMDLINECONSOLES 8 + static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; static int preferred_console = -1; @@ -2500,17 +2503,6 @@ static int __init console_setup(char *str) if (_braille_console_setup(&str, &brl_options)) return 1; - /* Save the console for driver subsystem use */ - if (console_opt_save(str, brl_options)) - return 1; - - /* Flag register_console() to not call try_enable_default_console() */ - console_set_on_cmdline = 1; - - /* Don't attempt to parse a DEVNAME:0.0 style console */ - if (strchr(str, ':')) - return 1; - /* * Decode str into name, index, options. */ @@ -2541,13 +2533,6 @@ static int __init console_setup(char *str) } __setup("console=", console_setup); -/* Only called from add_preferred_console_match() */ -int console_opt_add_preferred_console(const char *name, const short idx, - char *options, char *brl_options) -{ - return __add_preferred_console(name, idx, options, brl_options, true); -} - /** * add_preferred_console - add a device to the list of preferred consoles. * @name: device name @@ -3522,7 +3507,7 @@ void register_console(struct console *newcon) * Note that a console with tty binding will have CON_CONSDEV * flag set and will be first in the list. */ - if (preferred_console < 0 && !console_set_on_cmdline) { + if (preferred_console < 0) { if (hlist_empty(&console_list) || !console_first()->device || console_first()->flags & CON_BOOT) { try_enable_default_console(newcon); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index bcf2c4cc0522..59ce0841eb1f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -723,7 +723,6 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) rq->prev_irq_time += irq_delta; delta -= irq_delta; - psi_account_irqtime(rq->curr, irq_delta); delayacct_irq(rq->curr, irq_delta); #endif #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING @@ -5665,7 +5664,7 @@ void sched_tick(void) { int cpu = smp_processor_id(); struct rq *rq = cpu_rq(cpu); - struct task_struct *curr = rq->curr; + struct task_struct *curr; struct rq_flags rf; unsigned long hw_pressure; u64 resched_latency; @@ -5677,6 +5676,9 @@ void sched_tick(void) rq_lock(rq, &rf); + curr = rq->curr; + psi_account_irqtime(rq, curr, NULL); + update_rq_clock(rq); hw_pressure = arch_scale_hw_pressure(cpu_of(rq)); update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure); @@ -6737,6 +6739,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) ++*switch_count; migrate_disable_switch(rq, prev); + psi_account_irqtime(rq, prev, next); psi_sched_switch(prev, next, !task_on_rq_queued(prev)); trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index c75d1307d86d..9bedd148f007 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1804,8 +1804,13 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) * The replenish timer needs to be canceled. No * problem if it fires concurrently: boosted threads * are ignored in dl_task_timer(). + * + * If the timer callback was running (hrtimer_try_to_cancel == -1), + * it will eventually call put_task_struct(). */ - hrtimer_try_to_cancel(&p->dl.dl_timer); + if (hrtimer_try_to_cancel(&p->dl.dl_timer) == 1 && + !dl_server(&p->dl)) + put_task_struct(p); p->dl.dl_throttled = 0; } } else if (!dl_prio(p->normal_prio)) { diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 8a5b1ae0aa55..24dda708b699 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9149,12 +9149,8 @@ static int detach_tasks(struct lb_env *env) break; env->loop++; - /* - * We've more or less seen every task there is, call it quits - * unless we haven't found any movable task yet. - */ - if (env->loop > env->loop_max && - !(env->flags & LBF_ALL_PINNED)) + /* We've more or less seen every task there is, call it quits */ + if (env->loop > env->loop_max) break; /* take a breather every nr_migrate tasks */ @@ -11393,9 +11389,7 @@ more_balance: if (env.flags & LBF_NEED_BREAK) { env.flags &= ~LBF_NEED_BREAK; - /* Stop if we tried all running tasks */ - if (env.loop < busiest->nr_running) - goto more_balance; + goto more_balance; } /* diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 7b4aa5809c0f..507d7b8d79af 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -773,6 +773,7 @@ static void psi_group_change(struct psi_group *group, int cpu, enum psi_states s; u32 state_mask; + lockdep_assert_rq_held(cpu_rq(cpu)); groupc = per_cpu_ptr(group->pcpu, cpu); /* @@ -991,22 +992,32 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next, } #ifdef CONFIG_IRQ_TIME_ACCOUNTING -void psi_account_irqtime(struct task_struct *task, u32 delta) +void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_struct *prev) { - int cpu = task_cpu(task); + int cpu = task_cpu(curr); struct psi_group *group; struct psi_group_cpu *groupc; - u64 now; + u64 now, irq; + s64 delta; if (static_branch_likely(&psi_disabled)) return; - if (!task->pid) + if (!curr->pid) + return; + + lockdep_assert_rq_held(rq); + group = task_psi_group(curr); + if (prev && task_psi_group(prev) == group) return; now = cpu_clock(cpu); + irq = irq_time_read(cpu); + delta = (s64)(irq - rq->psi_irq_time); + if (delta < 0) + return; + rq->psi_irq_time = irq; - group = task_psi_group(task); do { if (!group->enabled) continue; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index a831af102070..ef20c61004eb 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1126,6 +1126,7 @@ struct rq { #ifdef CONFIG_IRQ_TIME_ACCOUNTING u64 prev_irq_time; + u64 psi_irq_time; #endif #ifdef CONFIG_PARAVIRT u64 prev_steal_time; diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 38f3698f5e5b..b02dfc322951 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -110,8 +110,12 @@ __schedstats_from_se(struct sched_entity *se) void psi_task_change(struct task_struct *task, int clear, int set); void psi_task_switch(struct task_struct *prev, struct task_struct *next, bool sleep); -void psi_account_irqtime(struct task_struct *task, u32 delta); - +#ifdef CONFIG_IRQ_TIME_ACCOUNTING +void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_struct *prev); +#else +static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr, + struct task_struct *prev) {} +#endif /*CONFIG_IRQ_TIME_ACCOUNTING */ /* * PSI tracks state that persists across sleeps, such as iowaits and * memory stalls. As a result, it has to distinguish between sleeps, @@ -192,7 +196,8 @@ static inline void psi_ttwu_dequeue(struct task_struct *p) {} static inline void psi_sched_switch(struct task_struct *prev, struct task_struct *next, bool sleep) {} -static inline void psi_account_irqtime(struct task_struct *task, u32 delta) {} +static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr, + struct task_struct *prev) {} #endif /* CONFIG_PSI */ #ifdef CONFIG_SCHED_INFO diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index d7eee421d4bc..b696b85ac63e 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -46,8 +46,8 @@ COND_SYSCALL(io_getevents_time32); COND_SYSCALL(io_getevents); COND_SYSCALL(io_pgetevents_time32); COND_SYSCALL(io_pgetevents); -COND_SYSCALL_COMPAT(io_pgetevents_time32); COND_SYSCALL_COMPAT(io_pgetevents); +COND_SYSCALL_COMPAT(io_pgetevents_time64); COND_SYSCALL(io_uring_setup); COND_SYSCALL(io_uring_enter); COND_SYSCALL(io_uring_register); diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 492c14aac642..b8ee320208d4 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1285,6 +1285,8 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, struct hrtimer_clock_base *base; unsigned long flags; + if (WARN_ON_ONCE(!timer->function)) + return; /* * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft * match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 166ad5444eea..721c3b221048 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -1136,7 +1136,7 @@ config PREEMPTIRQ_DELAY_TEST config SYNTH_EVENT_GEN_TEST tristate "Test module for in-kernel synthetic event generation" - depends on SYNTH_EVENTS + depends on SYNTH_EVENTS && m help This option creates a test module to check the base functionality of in-kernel synthetic event definition and @@ -1149,7 +1149,7 @@ config SYNTH_EVENT_GEN_TEST config KPROBE_EVENT_GEN_TEST tristate "Test module for in-kernel kprobe event generation" - depends on KPROBE_EVENTS + depends on KPROBE_EVENTS && m help This option creates a test module to check the base functionality of in-kernel kprobe event definition. diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 65208d3b5ed9..eacab4020508 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6969,7 +6969,7 @@ allocate_ftrace_mod_map(struct module *mod, return mod_map; } -static const char * +static int ftrace_func_address_lookup(struct ftrace_mod_map *mod_map, unsigned long addr, unsigned long *size, unsigned long *off, char *sym) @@ -6990,21 +6990,18 @@ ftrace_func_address_lookup(struct ftrace_mod_map *mod_map, *size = found_func->size; if (off) *off = addr - found_func->ip; - if (sym) - strscpy(sym, found_func->name, KSYM_NAME_LEN); - - return found_func->name; + return strscpy(sym, found_func->name, KSYM_NAME_LEN); } - return NULL; + return 0; } -const char * +int ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym) { struct ftrace_mod_map *mod_map; - const char *ret = NULL; + int ret = 0; /* mod_map is freed via call_rcu() */ preempt_disable(); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 003474c9a77d..3fbaecfc88c2 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -125,6 +125,7 @@ enum wq_internal_consts { HIGHPRI_NICE_LEVEL = MIN_NICE, WQ_NAME_LEN = 32, + WORKER_ID_LEN = 10 + WQ_NAME_LEN, /* "kworker/R-" + WQ_NAME_LEN */ }; /* @@ -2742,6 +2743,26 @@ static void worker_detach_from_pool(struct worker *worker) complete(detach_completion); } +static int format_worker_id(char *buf, size_t size, struct worker *worker, + struct worker_pool *pool) +{ + if (worker->rescue_wq) + return scnprintf(buf, size, "kworker/R-%s", + worker->rescue_wq->name); + + if (pool) { + if (pool->cpu >= 0) + return scnprintf(buf, size, "kworker/%d:%d%s", + pool->cpu, worker->id, + pool->attrs->nice < 0 ? "H" : ""); + else + return scnprintf(buf, size, "kworker/u%d:%d", + pool->id, worker->id); + } else { + return scnprintf(buf, size, "kworker/dying"); + } +} + /** * create_worker - create a new workqueue worker * @pool: pool the new worker will belong to @@ -2758,7 +2779,6 @@ static struct worker *create_worker(struct worker_pool *pool) { struct worker *worker; int id; - char id_buf[23]; /* ID is needed to determine kthread name */ id = ida_alloc(&pool->worker_ida, GFP_KERNEL); @@ -2777,17 +2797,14 @@ static struct worker *create_worker(struct worker_pool *pool) worker->id = id; if (!(pool->flags & POOL_BH)) { - if (pool->cpu >= 0) - snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id, - pool->attrs->nice < 0 ? "H" : ""); - else - snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id); + char id_buf[WORKER_ID_LEN]; + format_worker_id(id_buf, sizeof(id_buf), worker, pool); worker->task = kthread_create_on_node(worker_thread, worker, - pool->node, "kworker/%s", id_buf); + pool->node, "%s", id_buf); if (IS_ERR(worker->task)) { if (PTR_ERR(worker->task) == -EINTR) { - pr_err("workqueue: Interrupted when creating a worker thread \"kworker/%s\"\n", + pr_err("workqueue: Interrupted when creating a worker thread \"%s\"\n", id_buf); } else { pr_err_once("workqueue: Failed to create a worker thread: %pe", @@ -3350,7 +3367,6 @@ woke_up: raw_spin_unlock_irq(&pool->lock); set_pf_worker(false); - set_task_comm(worker->task, "kworker/dying"); ida_free(&pool->worker_ida, worker->id); worker_detach_from_pool(worker); WARN_ON_ONCE(!list_empty(&worker->entry)); @@ -5542,6 +5558,7 @@ static int wq_clamp_max_active(int max_active, unsigned int flags, static int init_rescuer(struct workqueue_struct *wq) { struct worker *rescuer; + char id_buf[WORKER_ID_LEN]; int ret; if (!(wq->flags & WQ_MEM_RECLAIM)) @@ -5555,7 +5572,9 @@ static int init_rescuer(struct workqueue_struct *wq) } rescuer->rescue_wq = wq; - rescuer->task = kthread_create(rescuer_thread, rescuer, "kworker/R-%s", wq->name); + format_worker_id(id_buf, sizeof(id_buf), rescuer, NULL); + + rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", id_buf); if (IS_ERR(rescuer->task)) { ret = PTR_ERR(rescuer->task); pr_err("workqueue: Failed to create a rescuer kthread for wq \"%s\": %pe", @@ -6384,19 +6403,15 @@ void show_freezable_workqueues(void) /* used to show worker information through /proc/PID/{comm,stat,status} */ void wq_worker_comm(char *buf, size_t size, struct task_struct *task) { - int off; - - /* always show the actual comm */ - off = strscpy(buf, task->comm, size); - if (off < 0) - return; - /* stabilize PF_WQ_WORKER and worker pool association */ mutex_lock(&wq_pool_attach_mutex); if (task->flags & PF_WQ_WORKER) { struct worker *worker = kthread_data(task); struct worker_pool *pool = worker->pool; + int off; + + off = format_worker_id(buf, size, worker, pool); if (pool) { raw_spin_lock_irq(&pool->lock); @@ -6415,6 +6430,8 @@ void wq_worker_comm(char *buf, size_t size, struct task_struct *task) } raw_spin_unlock_irq(&pool->lock); } + } else { + strscpy(buf, task->comm, size); } mutex_unlock(&wq_pool_attach_mutex); diff --git a/lib/Kconfig b/lib/Kconfig index d33a268bc256..b0a76dff5c18 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -539,13 +539,7 @@ config CPUMASK_OFFSTACK stack overflow. config FORCE_NR_CPUS - bool "Set number of CPUs at compile time" - depends on SMP && EXPERT && !COMPILE_TEST - help - Say Yes if you have NR_CPUS set to an actual number of possible - CPUs in your system, not to a default value. This forces the core - code to rely on compile-time value and optimize kernel routines - better. + def_bool !SMP config CPU_RMAP bool diff --git a/lib/Makefile b/lib/Makefile index 3b1769045651..30337431d10e 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -426,3 +426,7 @@ $(obj)/$(TEST_FORTIFY_LOG): $(addprefix $(obj)/, $(TEST_FORTIFY_LOGS)) FORCE ifeq ($(CONFIG_FORTIFY_SOURCE),y) $(obj)/string.o: $(obj)/$(TEST_FORTIFY_LOG) endif + +# Some architectures define __NO_FORTIFY if __SANITIZE_ADDRESS__ is undefined. +# Pass CFLAGS_KASAN to avoid warnings. +$(foreach x, $(patsubst %.log,%.o,$(TEST_FORTIFY_LOGS)), $(eval KASAN_SANITIZE_$(x) := y)) diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index 11ed973ac359..c347b8b72d78 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -227,6 +227,7 @@ struct page_ext_operations page_alloc_tagging_ops = { }; EXPORT_SYMBOL(page_alloc_tagging_ops); +#ifdef CONFIG_SYSCTL static struct ctl_table memory_allocation_profiling_sysctls[] = { { .procname = "mem_profiling", @@ -241,6 +242,17 @@ static struct ctl_table memory_allocation_profiling_sysctls[] = { { } }; +static void __init sysctl_init(void) +{ + if (!mem_profiling_support) + memory_allocation_profiling_sysctls[0].mode = 0444; + + register_sysctl_init("vm", memory_allocation_profiling_sysctls); +} +#else /* CONFIG_SYSCTL */ +static inline void sysctl_init(void) {} +#endif /* CONFIG_SYSCTL */ + static int __init alloc_tag_init(void) { const struct codetag_type_desc desc = { @@ -253,9 +265,7 @@ static int __init alloc_tag_init(void) if (IS_ERR(alloc_tag_cttype)) return PTR_ERR(alloc_tag_cttype); - if (!mem_profiling_support) - memory_allocation_profiling_sysctls[0].mode = 0444; - register_sysctl_init("vm", memory_allocation_profiling_sysctls); + sysctl_init(); procfs_init(); return 0; diff --git a/lib/build_OID_registry b/lib/build_OID_registry index 56d8bafeb848..8267e8d71338 100755 --- a/lib/build_OID_registry +++ b/lib/build_OID_registry @@ -38,7 +38,9 @@ close IN_FILE || die; # open C_FILE, ">$ARGV[1]" or die; print C_FILE "/*\n"; -print C_FILE " * Automatically generated by ", $0 =~ s#^\Q$abs_srctree/\E##r, ". Do not edit\n"; +my $scriptname = $0; +$scriptname =~ s#^\Q$abs_srctree/\E##; +print C_FILE " * Automatically generated by ", $scriptname, ". Do not edit\n"; print C_FILE " */\n"; # diff --git a/lib/closure.c b/lib/closure.c index 07409e9e35a5..116afae2eed9 100644 --- a/lib/closure.c +++ b/lib/closure.c @@ -13,14 +13,25 @@ #include <linux/seq_file.h> #include <linux/sched/debug.h> -static inline void closure_put_after_sub(struct closure *cl, int flags) +static inline void closure_put_after_sub_checks(int flags) { int r = flags & CLOSURE_REMAINING_MASK; - BUG_ON(flags & CLOSURE_GUARD_MASK); - BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR)); + if (WARN(flags & CLOSURE_GUARD_MASK, + "closure has guard bits set: %x (%u)", + flags & CLOSURE_GUARD_MASK, (unsigned) __fls(r))) + r &= ~CLOSURE_GUARD_MASK; + + WARN(!r && (flags & ~CLOSURE_DESTRUCTOR), + "closure ref hit 0 with incorrect flags set: %x (%u)", + flags & ~CLOSURE_DESTRUCTOR, (unsigned) __fls(flags)); +} + +static inline void closure_put_after_sub(struct closure *cl, int flags) +{ + closure_put_after_sub_checks(flags); - if (!r) { + if (!(flags & CLOSURE_REMAINING_MASK)) { smp_acquire__after_ctrl_dep(); cl->closure_get_happened = false; @@ -139,6 +150,41 @@ void __sched __closure_sync(struct closure *cl) } EXPORT_SYMBOL(__closure_sync); +/* + * closure_return_sync - finish running a closure, synchronously (i.e. waiting + * for outstanding get()s to finish) and returning once closure refcount is 0. + * + * Unlike closure_sync() this doesn't reinit the ref to 1; subsequent + * closure_get_not_zero() calls waill fail. + */ +void __sched closure_return_sync(struct closure *cl) +{ + struct closure_syncer s = { .task = current }; + + cl->s = &s; + set_closure_fn(cl, closure_sync_fn, NULL); + + unsigned flags = atomic_sub_return_release(1 + CLOSURE_RUNNING - CLOSURE_DESTRUCTOR, + &cl->remaining); + + closure_put_after_sub_checks(flags); + + if (unlikely(flags & CLOSURE_REMAINING_MASK)) { + while (1) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (s.done) + break; + schedule(); + } + + __set_current_state(TASK_RUNNING); + } + + if (cl->parent) + closure_put(cl->parent); +} +EXPORT_SYMBOL(closure_return_sync); + int __sched __closure_sync_timeout(struct closure *cl, unsigned long timeout) { struct closure_syncer s = { .task = current }; @@ -198,6 +244,9 @@ void closure_debug_destroy(struct closure *cl) { unsigned long flags; + if (cl->magic == CLOSURE_MAGIC_STACK) + return; + BUG_ON(cl->magic != CLOSURE_MAGIC_ALIVE); cl->magic = CLOSURE_MAGIC_DEAD; diff --git a/lib/fortify_kunit.c b/lib/fortify_kunit.c index f9cc467334ce..e17d520f532c 100644 --- a/lib/fortify_kunit.c +++ b/lib/fortify_kunit.c @@ -374,7 +374,7 @@ static const char * const test_strs[] = { for (i = 0; i < ARRAY_SIZE(test_strs); i++) { \ len = strlen(test_strs[i]); \ KUNIT_EXPECT_EQ(test, __builtin_constant_p(len), 0); \ - checker(len, kmemdup_array(test_strs[i], len, 1, gfp), \ + checker(len, kmemdup_array(test_strs[i], 1, len, gfp), \ kfree(p)); \ checker(len, kmemdup(test_strs[i], len, gfp), \ kfree(p)); \ diff --git a/lib/overflow_kunit.c b/lib/overflow_kunit.c index 4ef31b0bb74d..d305b0c054bb 100644 --- a/lib/overflow_kunit.c +++ b/lib/overflow_kunit.c @@ -1178,14 +1178,28 @@ struct foo { s16 array[] __counted_by(counter); }; +struct bar { + int a; + u32 counter; + s16 array[]; +}; + static void DEFINE_FLEX_test(struct kunit *test) { - DEFINE_RAW_FLEX(struct foo, two, array, 2); + /* Using _RAW_ on a __counted_by struct will initialize "counter" to zero */ + DEFINE_RAW_FLEX(struct foo, two_but_zero, array, 2); +#if __has_attribute(__counted_by__) + int expected_raw_size = sizeof(struct foo); +#else + int expected_raw_size = sizeof(struct foo) + 2 * sizeof(s16); +#endif + /* Without annotation, it will always be on-stack size. */ + DEFINE_RAW_FLEX(struct bar, two, array, 2); DEFINE_FLEX(struct foo, eight, array, counter, 8); DEFINE_FLEX(struct foo, empty, array, counter, 0); - KUNIT_EXPECT_EQ(test, __struct_size(two), - sizeof(struct foo) + sizeof(s16) + sizeof(s16)); + KUNIT_EXPECT_EQ(test, __struct_size(two_but_zero), expected_raw_size); + KUNIT_EXPECT_EQ(test, __struct_size(two), sizeof(struct bar) + 2 * sizeof(s16)); KUNIT_EXPECT_EQ(test, __struct_size(eight), 24); KUNIT_EXPECT_EQ(test, __struct_size(empty), sizeof(struct foo)); } diff --git a/lib/string_helpers_kunit.c b/lib/string_helpers_kunit.c index f88e39fd68d6..c853046183d2 100644 --- a/lib/string_helpers_kunit.c +++ b/lib/string_helpers_kunit.c @@ -625,4 +625,5 @@ static struct kunit_suite string_helpers_test_suite = { kunit_test_suites(&string_helpers_test_suite); +MODULE_DESCRIPTION("Test cases for string helpers module"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/lib/string_kunit.c b/lib/string_kunit.c index 2a812decf14b..c919e3293da6 100644 --- a/lib/string_kunit.c +++ b/lib/string_kunit.c @@ -633,4 +633,5 @@ static struct kunit_suite string_test_suite = { kunit_test_suites(&string_test_suite); +MODULE_DESCRIPTION("Test cases for string functions"); MODULE_LICENSE("GPL v2"); diff --git a/mm/compaction.c b/mm/compaction.c index e731d45befc7..739b1bf3d637 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -79,6 +79,13 @@ static inline bool is_via_compact_memory(int order) { return false; } #define COMPACTION_HPAGE_ORDER (PMD_SHIFT - PAGE_SHIFT) #endif +static struct page *mark_allocated_noprof(struct page *page, unsigned int order, gfp_t gfp_flags) +{ + post_alloc_hook(page, order, __GFP_MOVABLE); + return page; +} +#define mark_allocated(...) alloc_hooks(mark_allocated_noprof(__VA_ARGS__)) + static void split_map_pages(struct list_head *freepages) { unsigned int i, order; @@ -93,7 +100,7 @@ static void split_map_pages(struct list_head *freepages) nr_pages = 1 << order; - post_alloc_hook(page, order, __GFP_MOVABLE); + mark_allocated(page, order, __GFP_MOVABLE); if (order) split_page(page, order); @@ -122,7 +129,7 @@ static unsigned long release_free_list(struct list_head *freepages) * Convert free pages into post allocation pages, so * that we can free them via __free_page. */ - post_alloc_hook(page, order, __GFP_MOVABLE); + mark_allocated(page, order, __GFP_MOVABLE); __free_pages(page, order); if (pfn > high_pfn) high_pfn = pfn; diff --git a/mm/damon/core.c b/mm/damon/core.c index 6392f1cc97a3..e66823d6b10b 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1358,14 +1358,31 @@ static void damon_merge_regions_of(struct damon_target *t, unsigned int thres, * access frequencies are similar. This is for minimizing the monitoring * overhead under the dynamically changeable access pattern. If a merge was * unnecessarily made, later 'kdamond_split_regions()' will revert it. + * + * The total number of regions could be higher than the user-defined limit, + * max_nr_regions for some cases. For example, the user can update + * max_nr_regions to a number that lower than the current number of regions + * while DAMON is running. For such a case, repeat merging until the limit is + * met while increasing @threshold up to possible maximum level. */ static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold, unsigned long sz_limit) { struct damon_target *t; - - damon_for_each_target(t, c) - damon_merge_regions_of(t, threshold, sz_limit); + unsigned int nr_regions; + unsigned int max_thres; + + max_thres = c->attrs.aggr_interval / + (c->attrs.sample_interval ? c->attrs.sample_interval : 1); + do { + nr_regions = 0; + damon_for_each_target(t, c) { + damon_merge_regions_of(t, threshold, sz_limit); + nr_regions += damon_nr_regions(t); + } + threshold = max(1, threshold * 2); + } while (nr_regions > c->attrs.max_nr_regions && + threshold / 2 < max_thres); } /* diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index b104a353b532..e4969fb54da3 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -40,22 +40,7 @@ * Please refer Documentation/mm/arch_pgtable_helpers.rst for the semantics * expectations that are being validated here. All future changes in here * or the documentation need to be in sync. - * - * On s390 platform, the lower 4 bits are used to identify given page table - * entry type. But these bits might affect the ability to clear entries with - * pxx_clear() because of how dynamic page table folding works on s390. So - * while loading up the entries do not change the lower 4 bits. It does not - * have affect any other platform. Also avoid the 62nd bit on ppc64 that is - * used to mark a pte entry. */ -#define S390_SKIP_MASK GENMASK(3, 0) -#if __BITS_PER_LONG == 64 -#define PPC64_SKIP_MASK GENMASK(62, 62) -#else -#define PPC64_SKIP_MASK 0x0 -#endif -#define ARCH_SKIP_MASK (S390_SKIP_MASK | PPC64_SKIP_MASK) -#define RANDOM_ORVALUE (GENMASK(BITS_PER_LONG - 1, 0) & ~ARCH_SKIP_MASK) #define RANDOM_NZVALUE GENMASK(7, 0) struct pgtable_debug_args { @@ -511,8 +496,7 @@ static void __init pud_clear_tests(struct pgtable_debug_args *args) return; pr_debug("Validating PUD clear\n"); - pud = __pud(pud_val(pud) | RANDOM_ORVALUE); - WRITE_ONCE(*args->pudp, pud); + WARN_ON(pud_none(pud)); pud_clear(args->pudp); pud = READ_ONCE(*args->pudp); WARN_ON(!pud_none(pud)); @@ -548,8 +532,7 @@ static void __init p4d_clear_tests(struct pgtable_debug_args *args) return; pr_debug("Validating P4D clear\n"); - p4d = __p4d(p4d_val(p4d) | RANDOM_ORVALUE); - WRITE_ONCE(*args->p4dp, p4d); + WARN_ON(p4d_none(p4d)); p4d_clear(args->p4dp); p4d = READ_ONCE(*args->p4dp); WARN_ON(!p4d_none(p4d)); @@ -582,8 +565,7 @@ static void __init pgd_clear_tests(struct pgtable_debug_args *args) return; pr_debug("Validating PGD clear\n"); - pgd = __pgd(pgd_val(pgd) | RANDOM_ORVALUE); - WRITE_ONCE(*args->pgdp, pgd); + WARN_ON(pgd_none(pgd)); pgd_clear(args->pgdp); pgd = READ_ONCE(*args->pgdp); WARN_ON(!pgd_none(pgd)); @@ -634,10 +616,8 @@ static void __init pte_clear_tests(struct pgtable_debug_args *args) if (WARN_ON(!args->ptep)) return; -#ifndef CONFIG_RISCV - pte = __pte(pte_val(pte) | RANDOM_ORVALUE); -#endif set_pte_at(args->mm, args->vaddr, args->ptep, pte); + WARN_ON(pte_none(pte)); flush_dcache_page(page); barrier(); ptep_clear(args->mm, args->vaddr, args->ptep); @@ -650,8 +630,7 @@ static void __init pmd_clear_tests(struct pgtable_debug_args *args) pmd_t pmd = READ_ONCE(*args->pmdp); pr_debug("Validating PMD clear\n"); - pmd = __pmd(pmd_val(pmd) | RANDOM_ORVALUE); - WRITE_ONCE(*args->pmdp, pmd); + WARN_ON(pmd_none(pmd)); pmd_clear(args->pmdp); pmd = READ_ONCE(*args->pmdp); WARN_ON(!pmd_none(pmd)); diff --git a/mm/filemap.c b/mm/filemap.c index 876cc64aadd7..657bcd887fdb 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1847,7 +1847,7 @@ repeat: if (!folio || xa_is_value(folio)) goto out; - if (!folio_try_get_rcu(folio)) + if (!folio_try_get(folio)) goto repeat; if (unlikely(folio != xas_reload(&xas))) { @@ -2001,7 +2001,7 @@ retry: if (!folio || xa_is_value(folio)) return folio; - if (!folio_try_get_rcu(folio)) + if (!folio_try_get(folio)) goto reset; if (unlikely(folio != xas_reload(xas))) { @@ -2181,7 +2181,7 @@ unsigned filemap_get_folios_contig(struct address_space *mapping, if (xa_is_value(folio)) goto update_start; - if (!folio_try_get_rcu(folio)) + if (!folio_try_get(folio)) goto retry; if (unlikely(folio != xas_reload(&xas))) @@ -2313,7 +2313,7 @@ static void filemap_get_read_batch(struct address_space *mapping, break; if (xa_is_sibling(folio)) break; - if (!folio_try_get_rcu(folio)) + if (!folio_try_get(folio)) goto retry; if (unlikely(folio != xas_reload(&xas))) @@ -3124,7 +3124,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* Use the readahead code, even if readahead is disabled */ - if (vm_flags & VM_HUGEPAGE) { + if ((vm_flags & VM_HUGEPAGE) && HPAGE_PMD_ORDER <= MAX_PAGECACHE_ORDER) { fpin = maybe_unlock_mmap_for_io(vmf, fpin); ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1); ra->size = HPAGE_PMD_NR; @@ -3231,7 +3231,8 @@ static vm_fault_t filemap_fault_recheck_pte_none(struct vm_fault *vmf) if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID)) return 0; - ptep = pte_offset_map(vmf->pmd, vmf->address); + ptep = pte_offset_map_nolock(vma->vm_mm, vmf->pmd, vmf->address, + &vmf->ptl); if (unlikely(!ptep)) return VM_FAULT_NOPAGE; @@ -3472,7 +3473,7 @@ static struct folio *next_uptodate_folio(struct xa_state *xas, continue; if (folio_test_locked(folio)) continue; - if (!folio_try_get_rcu(folio)) + if (!folio_try_get(folio)) continue; /* Has the page moved or been split? */ if (unlikely(folio != xas_reload(xas))) @@ -4248,6 +4249,9 @@ static void filemap_cachestat(struct address_space *mapping, XA_STATE(xas, &mapping->i_pages, first_index); struct folio *folio; + /* Flush stats (and potentially sleep) outside the RCU read section. */ + mem_cgroup_flush_stats_ratelimited(NULL); + rcu_read_lock(); xas_for_each(&xas, folio, last_index) { int order; @@ -4311,7 +4315,7 @@ static void filemap_cachestat(struct address_space *mapping, goto resched; } #endif - if (workingset_test_recent(shadow, true, &workingset)) + if (workingset_test_recent(shadow, true, &workingset, false)) cs->nr_recently_evicted += nr_pages; goto resched; @@ -76,7 +76,7 @@ retry: folio = page_folio(page); if (WARN_ON_ONCE(folio_ref_count(folio) < 0)) return NULL; - if (unlikely(!folio_ref_try_add_rcu(folio, refs))) + if (unlikely(!folio_ref_try_add(folio, refs))) return NULL; /* @@ -97,95 +97,6 @@ retry: return folio; } -/** - * try_grab_folio() - Attempt to get or pin a folio. - * @page: pointer to page to be grabbed - * @refs: the value to (effectively) add to the folio's refcount - * @flags: gup flags: these are the FOLL_* flag values. - * - * "grab" names in this file mean, "look at flags to decide whether to use - * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount. - * - * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the - * same time. (That's true throughout the get_user_pages*() and - * pin_user_pages*() APIs.) Cases: - * - * FOLL_GET: folio's refcount will be incremented by @refs. - * - * FOLL_PIN on large folios: folio's refcount will be incremented by - * @refs, and its pincount will be incremented by @refs. - * - * FOLL_PIN on single-page folios: folio's refcount will be incremented by - * @refs * GUP_PIN_COUNTING_BIAS. - * - * Return: The folio containing @page (with refcount appropriately - * incremented) for success, or NULL upon failure. If neither FOLL_GET - * nor FOLL_PIN was set, that's considered failure, and furthermore, - * a likely bug in the caller, so a warning is also emitted. - */ -struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags) -{ - struct folio *folio; - - if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0)) - return NULL; - - if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page))) - return NULL; - - if (flags & FOLL_GET) - return try_get_folio(page, refs); - - /* FOLL_PIN is set */ - - /* - * Don't take a pin on the zero page - it's not going anywhere - * and it is used in a *lot* of places. - */ - if (is_zero_page(page)) - return page_folio(page); - - folio = try_get_folio(page, refs); - if (!folio) - return NULL; - - /* - * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a - * right zone, so fail and let the caller fall back to the slow - * path. - */ - if (unlikely((flags & FOLL_LONGTERM) && - !folio_is_longterm_pinnable(folio))) { - if (!put_devmap_managed_folio_refs(folio, refs)) - folio_put_refs(folio, refs); - return NULL; - } - - /* - * When pinning a large folio, use an exact count to track it. - * - * However, be sure to *also* increment the normal folio - * refcount field at least once, so that the folio really - * is pinned. That's why the refcount from the earlier - * try_get_folio() is left intact. - */ - if (folio_test_large(folio)) - atomic_add(refs, &folio->_pincount); - else - folio_ref_add(folio, - refs * (GUP_PIN_COUNTING_BIAS - 1)); - /* - * Adjust the pincount before re-checking the PTE for changes. - * This is essentially a smp_mb() and is paired with a memory - * barrier in folio_try_share_anon_rmap_*(). - */ - smp_mb__after_atomic(); - - node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs); - - return folio; -} - static void gup_put_folio(struct folio *folio, int refs, unsigned int flags) { if (flags & FOLL_PIN) { @@ -203,58 +114,59 @@ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags) } /** - * try_grab_page() - elevate a page's refcount by a flag-dependent amount - * @page: pointer to page to be grabbed - * @flags: gup flags: these are the FOLL_* flag values. + * try_grab_folio() - add a folio's refcount by a flag-dependent amount + * @folio: pointer to folio to be grabbed + * @refs: the value to (effectively) add to the folio's refcount + * @flags: gup flags: these are the FOLL_* flag values * * This might not do anything at all, depending on the flags argument. * * "grab" names in this file mean, "look at flags to decide whether to use - * FOLL_PIN or FOLL_GET behavior, when incrementing the page's refcount. + * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount. * * Either FOLL_PIN or FOLL_GET (or neither) may be set, but not both at the same - * time. Cases: please see the try_grab_folio() documentation, with - * "refs=1". + * time. * * Return: 0 for success, or if no action was required (if neither FOLL_PIN * nor FOLL_GET was set, nothing is done). A negative error code for failure: * - * -ENOMEM FOLL_GET or FOLL_PIN was set, but the page could not + * -ENOMEM FOLL_GET or FOLL_PIN was set, but the folio could not * be grabbed. + * + * It is called when we have a stable reference for the folio, typically in + * GUP slow path. */ -int __must_check try_grab_page(struct page *page, unsigned int flags) +int __must_check try_grab_folio(struct folio *folio, int refs, + unsigned int flags) { - struct folio *folio = page_folio(page); - if (WARN_ON_ONCE(folio_ref_count(folio) <= 0)) return -ENOMEM; - if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page))) + if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(&folio->page))) return -EREMOTEIO; if (flags & FOLL_GET) - folio_ref_inc(folio); + folio_ref_add(folio, refs); else if (flags & FOLL_PIN) { /* * Don't take a pin on the zero page - it's not going anywhere * and it is used in a *lot* of places. */ - if (is_zero_page(page)) + if (is_zero_folio(folio)) return 0; /* - * Similar to try_grab_folio(): be sure to *also* - * increment the normal page refcount field at least once, + * Increment the normal page refcount field at least once, * so that the page really is pinned. */ if (folio_test_large(folio)) { - folio_ref_add(folio, 1); - atomic_add(1, &folio->_pincount); + folio_ref_add(folio, refs); + atomic_add(refs, &folio->_pincount); } else { - folio_ref_add(folio, GUP_PIN_COUNTING_BIAS); + folio_ref_add(folio, refs * GUP_PIN_COUNTING_BIAS); } - node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1); + node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs); } return 0; @@ -515,6 +427,102 @@ static int record_subpages(struct page *page, unsigned long sz, return nr; } + +/** + * try_grab_folio_fast() - Attempt to get or pin a folio in fast path. + * @page: pointer to page to be grabbed + * @refs: the value to (effectively) add to the folio's refcount + * @flags: gup flags: these are the FOLL_* flag values. + * + * "grab" names in this file mean, "look at flags to decide whether to use + * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount. + * + * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the + * same time. (That's true throughout the get_user_pages*() and + * pin_user_pages*() APIs.) Cases: + * + * FOLL_GET: folio's refcount will be incremented by @refs. + * + * FOLL_PIN on large folios: folio's refcount will be incremented by + * @refs, and its pincount will be incremented by @refs. + * + * FOLL_PIN on single-page folios: folio's refcount will be incremented by + * @refs * GUP_PIN_COUNTING_BIAS. + * + * Return: The folio containing @page (with refcount appropriately + * incremented) for success, or NULL upon failure. If neither FOLL_GET + * nor FOLL_PIN was set, that's considered failure, and furthermore, + * a likely bug in the caller, so a warning is also emitted. + * + * It uses add ref unless zero to elevate the folio refcount and must be called + * in fast path only. + */ +static struct folio *try_grab_folio_fast(struct page *page, int refs, + unsigned int flags) +{ + struct folio *folio; + + /* Raise warn if it is not called in fast GUP */ + VM_WARN_ON_ONCE(!irqs_disabled()); + + if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0)) + return NULL; + + if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page))) + return NULL; + + if (flags & FOLL_GET) + return try_get_folio(page, refs); + + /* FOLL_PIN is set */ + + /* + * Don't take a pin on the zero page - it's not going anywhere + * and it is used in a *lot* of places. + */ + if (is_zero_page(page)) + return page_folio(page); + + folio = try_get_folio(page, refs); + if (!folio) + return NULL; + + /* + * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a + * right zone, so fail and let the caller fall back to the slow + * path. + */ + if (unlikely((flags & FOLL_LONGTERM) && + !folio_is_longterm_pinnable(folio))) { + if (!put_devmap_managed_folio_refs(folio, refs)) + folio_put_refs(folio, refs); + return NULL; + } + + /* + * When pinning a large folio, use an exact count to track it. + * + * However, be sure to *also* increment the normal folio + * refcount field at least once, so that the folio really + * is pinned. That's why the refcount from the earlier + * try_get_folio() is left intact. + */ + if (folio_test_large(folio)) + atomic_add(refs, &folio->_pincount); + else + folio_ref_add(folio, + refs * (GUP_PIN_COUNTING_BIAS - 1)); + /* + * Adjust the pincount before re-checking the PTE for changes. + * This is essentially a smp_mb() and is paired with a memory + * barrier in folio_try_share_anon_rmap_*(). + */ + smp_mb__after_atomic(); + + node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs); + + return folio; +} #endif /* CONFIG_ARCH_HAS_HUGEPD || CONFIG_HAVE_GUP_FAST */ #ifdef CONFIG_ARCH_HAS_HUGEPD @@ -535,7 +543,7 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, */ static int gup_hugepte(struct vm_area_struct *vma, pte_t *ptep, unsigned long sz, unsigned long addr, unsigned long end, unsigned int flags, - struct page **pages, int *nr) + struct page **pages, int *nr, bool fast) { unsigned long pte_end; struct page *page; @@ -558,9 +566,15 @@ static int gup_hugepte(struct vm_area_struct *vma, pte_t *ptep, unsigned long sz page = pte_page(pte); refs = record_subpages(page, sz, addr, end, pages + *nr); - folio = try_grab_folio(page, refs, flags); - if (!folio) - return 0; + if (fast) { + folio = try_grab_folio_fast(page, refs, flags); + if (!folio) + return 0; + } else { + folio = page_folio(page); + if (try_grab_folio(folio, refs, flags)) + return 0; + } if (unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) { gup_put_folio(folio, refs, flags); @@ -588,7 +602,7 @@ static int gup_hugepte(struct vm_area_struct *vma, pte_t *ptep, unsigned long sz static int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd, unsigned long addr, unsigned int pdshift, unsigned long end, unsigned int flags, - struct page **pages, int *nr) + struct page **pages, int *nr, bool fast) { pte_t *ptep; unsigned long sz = 1UL << hugepd_shift(hugepd); @@ -598,7 +612,8 @@ static int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd, ptep = hugepte_offset(hugepd, addr, pdshift); do { next = hugepte_addr_end(addr, end, sz); - ret = gup_hugepte(vma, ptep, sz, addr, end, flags, pages, nr); + ret = gup_hugepte(vma, ptep, sz, addr, end, flags, pages, nr, + fast); if (ret != 1) return ret; } while (ptep++, addr = next, addr != end); @@ -625,7 +640,7 @@ static struct page *follow_hugepd(struct vm_area_struct *vma, hugepd_t hugepd, ptep = hugepte_offset(hugepd, addr, pdshift); ptl = huge_pte_lock(h, vma->vm_mm, ptep); ret = gup_hugepd(vma, hugepd, addr, pdshift, addr + PAGE_SIZE, - flags, &page, &nr); + flags, &page, &nr, false); spin_unlock(ptl); if (ret == 1) { @@ -642,7 +657,7 @@ static struct page *follow_hugepd(struct vm_area_struct *vma, hugepd_t hugepd, static inline int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd, unsigned long addr, unsigned int pdshift, unsigned long end, unsigned int flags, - struct page **pages, int *nr) + struct page **pages, int *nr, bool fast) { return 0; } @@ -729,7 +744,7 @@ static struct page *follow_huge_pud(struct vm_area_struct *vma, gup_must_unshare(vma, flags, page)) return ERR_PTR(-EMLINK); - ret = try_grab_page(page, flags); + ret = try_grab_folio(page_folio(page), 1, flags); if (ret) page = ERR_PTR(ret); else @@ -806,7 +821,7 @@ static struct page *follow_huge_pmd(struct vm_area_struct *vma, VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) && !PageAnonExclusive(page), page); - ret = try_grab_page(page, flags); + ret = try_grab_folio(page_folio(page), 1, flags); if (ret) return ERR_PTR(ret); @@ -968,8 +983,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma, VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) && !PageAnonExclusive(page), page); - /* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */ - ret = try_grab_page(page, flags); + /* try_grab_folio() does nothing unless FOLL_GET or FOLL_PIN is set. */ + ret = try_grab_folio(page_folio(page), 1, flags); if (unlikely(ret)) { page = ERR_PTR(ret); goto out; @@ -1233,7 +1248,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address, goto unmap; *page = pte_page(entry); } - ret = try_grab_page(*page, gup_flags); + ret = try_grab_folio(page_folio(*page), 1, gup_flags); if (unlikely(ret)) goto unmap; out: @@ -1636,20 +1651,19 @@ next_page: * pages. */ if (page_increm > 1) { - struct folio *folio; + struct folio *folio = page_folio(page); /* * Since we already hold refcount on the * large folio, this should never fail. */ - folio = try_grab_folio(page, page_increm - 1, - foll_flags); - if (WARN_ON_ONCE(!folio)) { + if (try_grab_folio(folio, page_increm - 1, + foll_flags)) { /* * Release the 1st page ref if the * folio is problematic, fail hard. */ - gup_put_folio(page_folio(page), 1, + gup_put_folio(folio, 1, foll_flags); ret = -EFAULT; goto out; @@ -2797,7 +2811,6 @@ EXPORT_SYMBOL(get_user_pages_unlocked); * This code is based heavily on the PowerPC implementation by Nick Piggin. */ #ifdef CONFIG_HAVE_GUP_FAST - /* * Used in the GUP-fast path to determine whether GUP is permitted to work on * a specific folio. @@ -2962,7 +2975,7 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); - folio = try_grab_folio(page, 1, flags); + folio = try_grab_folio_fast(page, 1, flags); if (!folio) goto pte_unmap; @@ -3049,7 +3062,7 @@ static int gup_fast_devmap_leaf(unsigned long pfn, unsigned long addr, break; } - folio = try_grab_folio(page, 1, flags); + folio = try_grab_folio_fast(page, 1, flags); if (!folio) { gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages); break; @@ -3138,7 +3151,7 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, page = pmd_page(orig); refs = record_subpages(page, PMD_SIZE, addr, end, pages + *nr); - folio = try_grab_folio(page, refs, flags); + folio = try_grab_folio_fast(page, refs, flags); if (!folio) return 0; @@ -3182,7 +3195,7 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, page = pud_page(orig); refs = record_subpages(page, PUD_SIZE, addr, end, pages + *nr); - folio = try_grab_folio(page, refs, flags); + folio = try_grab_folio_fast(page, refs, flags); if (!folio) return 0; @@ -3222,7 +3235,7 @@ static int gup_fast_pgd_leaf(pgd_t orig, pgd_t *pgdp, unsigned long addr, page = pgd_page(orig); refs = record_subpages(page, PGDIR_SIZE, addr, end, pages + *nr); - folio = try_grab_folio(page, refs, flags); + folio = try_grab_folio_fast(page, refs, flags); if (!folio) return 0; @@ -3276,7 +3289,8 @@ static int gup_fast_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, * pmd format and THP pmd format */ if (gup_hugepd(NULL, __hugepd(pmd_val(pmd)), addr, - PMD_SHIFT, next, flags, pages, nr) != 1) + PMD_SHIFT, next, flags, pages, nr, + true) != 1) return 0; } else if (!gup_fast_pte_range(pmd, pmdp, addr, next, flags, pages, nr)) @@ -3306,7 +3320,8 @@ static int gup_fast_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, return 0; } else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) { if (gup_hugepd(NULL, __hugepd(pud_val(pud)), addr, - PUD_SHIFT, next, flags, pages, nr) != 1) + PUD_SHIFT, next, flags, pages, nr, + true) != 1) return 0; } else if (!gup_fast_pmd_range(pudp, pud, addr, next, flags, pages, nr)) @@ -3333,7 +3348,8 @@ static int gup_fast_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, BUILD_BUG_ON(p4d_leaf(p4d)); if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) { if (gup_hugepd(NULL, __hugepd(p4d_val(p4d)), addr, - P4D_SHIFT, next, flags, pages, nr) != 1) + P4D_SHIFT, next, flags, pages, nr, + true) != 1) return 0; } else if (!gup_fast_pud_range(p4dp, p4d, addr, next, flags, pages, nr)) @@ -3362,7 +3378,8 @@ static void gup_fast_pgd_range(unsigned long addr, unsigned long end, return; } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) { if (gup_hugepd(NULL, __hugepd(pgd_val(pgd)), addr, - PGDIR_SHIFT, next, flags, pages, nr) != 1) + PGDIR_SHIFT, next, flags, pages, nr, + true) != 1) return; } else if (!gup_fast_p4d_range(pgdp, pgd, addr, next, flags, pages, nr)) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 89932fd0f62e..2120f7478e55 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1331,7 +1331,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, if (!*pgmap) return ERR_PTR(-EFAULT); page = pfn_to_page(pfn); - ret = try_grab_page(page, flags); + ret = try_grab_folio(page_folio(page), 1, flags); if (ret) page = ERR_PTR(ret); @@ -3009,30 +3009,36 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, if (new_order >= folio_order(folio)) return -EINVAL; - /* Cannot split anonymous THP to order-1 */ - if (new_order == 1 && folio_test_anon(folio)) { - VM_WARN_ONCE(1, "Cannot split to order-1 folio"); - return -EINVAL; - } - - if (new_order) { - /* Only swapping a whole PMD-mapped folio is supported */ - if (folio_test_swapcache(folio)) + if (folio_test_anon(folio)) { + /* order-1 is not supported for anonymous THP. */ + if (new_order == 1) { + VM_WARN_ONCE(1, "Cannot split to order-1 folio"); return -EINVAL; + } + } else if (new_order) { /* Split shmem folio to non-zero order not supported */ if (shmem_mapping(folio->mapping)) { VM_WARN_ONCE(1, "Cannot split shmem folio to non-0 order"); return -EINVAL; } - /* No split if the file system does not support large folio */ - if (!mapping_large_folio_support(folio->mapping)) { + /* + * No split if the file system does not support large folio. + * Note that we might still have THPs in such mappings due to + * CONFIG_READ_ONLY_THP_FOR_FS. But in that case, the mapping + * does not actually support large folios properly. + */ + if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && + !mapping_large_folio_support(folio->mapping)) { VM_WARN_ONCE(1, "Cannot split file folio to non-0 order"); return -EINVAL; } } + /* Only swapping a whole PMD-mapped folio is supported */ + if (folio_test_swapcache(folio) && new_order) + return -EINVAL; is_hzp = is_huge_zero_folio(folio); if (is_hzp) { diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f35abff8be60..43e1af868cfd 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1625,13 +1625,10 @@ static inline void destroy_compound_gigantic_folio(struct folio *folio, * folio appears as just a compound page. Otherwise, wait until after * allocating vmemmap to clear the flag. * - * A reference is held on the folio, except in the case of demote. - * * Must be called with hugetlb lock held. */ -static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio, - bool adjust_surplus, - bool demote) +static void remove_hugetlb_folio(struct hstate *h, struct folio *folio, + bool adjust_surplus) { int nid = folio_nid(folio); @@ -1645,6 +1642,7 @@ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio, list_del(&folio->lru); if (folio_test_hugetlb_freed(folio)) { + folio_clear_hugetlb_freed(folio); h->free_huge_pages--; h->free_huge_pages_node[nid]--; } @@ -1661,33 +1659,13 @@ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio, if (!folio_test_hugetlb_vmemmap_optimized(folio)) __folio_clear_hugetlb(folio); - /* - * In the case of demote we do not ref count the page as it will soon - * be turned into a page of smaller size. - */ - if (!demote) - folio_ref_unfreeze(folio, 1); - h->nr_huge_pages--; h->nr_huge_pages_node[nid]--; } -static void remove_hugetlb_folio(struct hstate *h, struct folio *folio, - bool adjust_surplus) -{ - __remove_hugetlb_folio(h, folio, adjust_surplus, false); -} - -static void remove_hugetlb_folio_for_demote(struct hstate *h, struct folio *folio, - bool adjust_surplus) -{ - __remove_hugetlb_folio(h, folio, adjust_surplus, true); -} - static void add_hugetlb_folio(struct hstate *h, struct folio *folio, bool adjust_surplus) { - int zeroed; int nid = folio_nid(folio); VM_BUG_ON_FOLIO(!folio_test_hugetlb_vmemmap_optimized(folio), folio); @@ -1711,21 +1689,6 @@ static void add_hugetlb_folio(struct hstate *h, struct folio *folio, */ folio_set_hugetlb_vmemmap_optimized(folio); - /* - * This folio is about to be managed by the hugetlb allocator and - * should have no users. Drop our reference, and check for others - * just in case. - */ - zeroed = folio_put_testzero(folio); - if (unlikely(!zeroed)) - /* - * It is VERY unlikely soneone else has taken a ref - * on the folio. In this case, we simply return as - * free_huge_folio() will be called when this other ref - * is dropped. - */ - return; - arch_clear_hugetlb_flags(folio); enqueue_hugetlb_folio(h, folio); } @@ -1763,13 +1726,6 @@ static void __update_and_free_hugetlb_folio(struct hstate *h, } /* - * Move PageHWPoison flag from head page to the raw error pages, - * which makes any healthy subpages reusable. - */ - if (unlikely(folio_test_hwpoison(folio))) - folio_clear_hugetlb_hwpoison(folio); - - /* * If vmemmap pages were allocated above, then we need to clear the * hugetlb flag under the hugetlb lock. */ @@ -1780,6 +1736,15 @@ static void __update_and_free_hugetlb_folio(struct hstate *h, } /* + * Move PageHWPoison flag from head page to the raw error pages, + * which makes any healthy subpages reusable. + */ + if (unlikely(folio_test_hwpoison(folio))) + folio_clear_hugetlb_hwpoison(folio); + + folio_ref_unfreeze(folio, 1); + + /* * Non-gigantic pages demoted from CMA allocated gigantic pages * need to be given back to CMA in free_gigantic_folio. */ @@ -2197,6 +2162,9 @@ static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h, nid = numa_mem_id(); retry: folio = __folio_alloc(gfp_mask, order, nid, nmask); + /* Ensure hugetlb folio won't have large_rmappable flag set. */ + if (folio) + folio_clear_large_rmappable(folio); if (folio && !folio_ref_freeze(folio, 1)) { folio_put(folio); @@ -3079,11 +3047,8 @@ retry: free_new: spin_unlock_irq(&hugetlb_lock); - if (new_folio) { - /* Folio has a zero ref count, but needs a ref to be freed */ - folio_ref_unfreeze(new_folio, 1); + if (new_folio) update_and_free_hugetlb_folio(h, new_folio, false); - } return ret; } @@ -3938,7 +3903,7 @@ static int demote_free_hugetlb_folio(struct hstate *h, struct folio *folio) target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order); - remove_hugetlb_folio_for_demote(h, folio, false); + remove_hugetlb_folio(h, folio, false); spin_unlock_irq(&hugetlb_lock); /* @@ -3952,7 +3917,6 @@ static int demote_free_hugetlb_folio(struct hstate *h, struct folio *folio) if (rc) { /* Allocation of vmemmmap failed, we can not demote folio */ spin_lock_irq(&hugetlb_lock); - folio_ref_unfreeze(folio, 1); add_hugetlb_folio(h, folio, false); return rc; } diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index b9a55322e52c..8193906515c6 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -446,6 +446,8 @@ static int __hugetlb_vmemmap_restore_folio(const struct hstate *h, unsigned long vmemmap_reuse; VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio); + VM_WARN_ON_ONCE_FOLIO(folio_ref_count(folio), folio); + if (!folio_test_hugetlb_vmemmap_optimized(folio)) return 0; @@ -481,6 +483,9 @@ static int __hugetlb_vmemmap_restore_folio(const struct hstate *h, */ int hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio) { + /* avoid writes from page_ref_add_unless() while unfolding vmemmap */ + synchronize_rcu(); + return __hugetlb_vmemmap_restore_folio(h, folio, 0); } @@ -505,6 +510,9 @@ long hugetlb_vmemmap_restore_folios(const struct hstate *h, long restored = 0; long ret = 0; + /* avoid writes from page_ref_add_unless() while unfolding vmemmap */ + synchronize_rcu(); + list_for_each_entry_safe(folio, t_folio, folio_list, lru) { if (folio_test_hugetlb_vmemmap_optimized(folio)) { ret = __hugetlb_vmemmap_restore_folio(h, folio, @@ -550,6 +558,8 @@ static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h, unsigned long vmemmap_reuse; VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio); + VM_WARN_ON_ONCE_FOLIO(folio_ref_count(folio), folio); + if (!vmemmap_should_optimize_folio(h, folio)) return ret; @@ -601,6 +611,9 @@ void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio) { LIST_HEAD(vmemmap_pages); + /* avoid writes from page_ref_add_unless() while folding vmemmap */ + synchronize_rcu(); + __hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, 0); free_vmemmap_page_list(&vmemmap_pages); } @@ -644,6 +657,9 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l flush_tlb_all(); + /* avoid writes from page_ref_add_unless() while folding vmemmap */ + synchronize_rcu(); + list_for_each_entry(folio, folio_list, lru) { int ret; diff --git a/mm/internal.h b/mm/internal.h index b2c75b12014e..cc2c5e07fad3 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -588,7 +588,6 @@ extern void __putback_isolated_page(struct page *page, unsigned int order, extern void memblock_free_pages(struct page *page, unsigned long pfn, unsigned int order); extern void __free_pages_core(struct page *page, unsigned int order); -extern void kernel_init_pages(struct page *page, int numpages); /* * This will have no effect, other than possibly generating a warning, if the @@ -1183,8 +1182,8 @@ int migrate_device_coherent_page(struct page *page); /* * mm/gup.c */ -struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags); -int __must_check try_grab_page(struct page *page, unsigned int flags); +int __must_check try_grab_folio(struct folio *folio, int refs, + unsigned int flags); /* * mm/huge_memory.c @@ -1436,11 +1435,6 @@ unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, int priority); #ifdef CONFIG_64BIT -/* VM is sealed, in vm_flags */ -#define VM_SEALED _BITUL(63) -#endif - -#ifdef CONFIG_64BIT static inline int can_do_mseal(unsigned long flags) { if (flags) diff --git a/mm/kasan/common.c b/mm/kasan/common.c index e7c9a4dc89f8..85e7c6b4575c 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -532,7 +532,7 @@ void __kasan_mempool_unpoison_object(void *ptr, size_t size, unsigned long ip) return; /* Unpoison the object and save alloc info for non-kmalloc() allocations. */ - unpoison_slab_object(slab->slab_cache, ptr, size, flags); + unpoison_slab_object(slab->slab_cache, ptr, flags, false); /* Poison the redzone and save alloc info for kmalloc() allocations. */ if (is_kmalloc_cache(slab->slab_cache)) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 774a97e6e2da..aab471791bd9 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2000,9 +2000,9 @@ out_unlock: if (!is_shmem) { filemap_nr_thps_inc(mapping); /* - * Paired with smp_mb() in do_dentry_open() to ensure - * i_writecount is up to date and the update to nr_thps is - * visible. Ensures the page cache will be truncated if the + * Paired with the fence in do_dentry_open() -> get_write_access() + * to ensure i_writecount is up to date and the update to nr_thps + * is visible. Ensures the page cache will be truncated if the * file is opened writable. */ smp_mb(); @@ -2190,8 +2190,8 @@ rollback: if (!is_shmem && result == SCAN_COPY_MC) { filemap_nr_thps_dec(mapping); /* - * Paired with smp_mb() in do_dentry_open() to - * ensure the update to nr_thps is visible. + * Paired with the fence in do_dentry_open() -> get_write_access() + * to ensure the update to nr_thps is visible. */ smp_mb(); } diff --git a/mm/memblock.c b/mm/memblock.c index 08e9806b1cf9..e81fb68f7f88 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -754,7 +754,7 @@ bool __init_memblock memblock_validate_numa_coverage(unsigned long threshold_byt /* calculate lose page */ for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { - if (nid == NUMA_NO_NODE) + if (!numa_valid_node(nid)) nr_pages += end_pfn - start_pfn; } @@ -1061,7 +1061,7 @@ static bool should_skip_region(struct memblock_type *type, return false; /* only memory regions are associated with nodes, check it */ - if (nid != NUMA_NO_NODE && nid != m_nid) + if (numa_valid_node(nid) && nid != m_nid) return true; /* skip hotpluggable memory regions if needed */ @@ -1118,10 +1118,6 @@ void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, int idx_a = *idx & 0xffffffff; int idx_b = *idx >> 32; - if (WARN_ONCE(nid == MAX_NUMNODES, - "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) - nid = NUMA_NO_NODE; - for (; idx_a < type_a->cnt; idx_a++) { struct memblock_region *m = &type_a->regions[idx_a]; @@ -1215,9 +1211,6 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, int idx_a = *idx & 0xffffffff; int idx_b = *idx >> 32; - if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) - nid = NUMA_NO_NODE; - if (*idx == (u64)ULLONG_MAX) { idx_a = type_a->cnt - 1; if (type_b != NULL) @@ -1303,7 +1296,7 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid, if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size)) continue; - if (nid == MAX_NUMNODES || nid == r_nid) + if (!numa_valid_node(nid) || nid == r_nid) break; } if (*idx >= type->cnt) { @@ -1339,10 +1332,6 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, int start_rgn, end_rgn; int i, ret; - if (WARN_ONCE(nid == MAX_NUMNODES, - "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) - nid = NUMA_NO_NODE; - ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); if (ret) return ret; @@ -1452,9 +1441,6 @@ phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, enum memblock_flags flags = choose_memblock_flags(); phys_addr_t found; - if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) - nid = NUMA_NO_NODE; - if (!align) { /* Can't use WARNs this early in boot on powerpc */ dump_stack(); @@ -1467,7 +1453,7 @@ again: if (found && !memblock_reserve(found, size)) goto done; - if (nid != NUMA_NO_NODE && !exact_nid) { + if (numa_valid_node(nid) && !exact_nid) { found = memblock_find_in_range_node(size, align, start, end, NUMA_NO_NODE, flags); @@ -1987,7 +1973,7 @@ static void __init_memblock memblock_dump(struct memblock_type *type) end = base + size - 1; flags = rgn->flags; #ifdef CONFIG_NUMA - if (memblock_get_region_node(rgn) != MAX_NUMNODES) + if (numa_valid_node(memblock_get_region_node(rgn))) snprintf(nid_buf, sizeof(nid_buf), " on node %d", memblock_get_region_node(rgn)); #endif @@ -2181,7 +2167,7 @@ static void __init memmap_init_reserved_pages(void) start = region->base; end = start + region->size; - if (nid == NUMA_NO_NODE || nid >= MAX_NUMNODES) + if (!numa_valid_node(nid)) nid = early_pfn_to_nid(PFN_DOWN(start)); reserve_bootmem_region(start, end, nid); @@ -2272,7 +2258,7 @@ static int memblock_debug_show(struct seq_file *m, void *private) seq_printf(m, "%4d: ", i); seq_printf(m, "%pa..%pa ", ®->base, &end); - if (nid != MAX_NUMNODES) + if (numa_valid_node(nid)) seq_printf(m, "%4d ", nid); else seq_printf(m, "%4c ", 'x'); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 36793e509f47..8f2f1bb18c9c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -7745,8 +7745,7 @@ void __mem_cgroup_uncharge_folios(struct folio_batch *folios) * @new: Replacement folio. * * Charge @new as a replacement folio for @old. @old will - * be uncharged upon free. This is only used by the page cache - * (in replace_page_cache_folio()). + * be uncharged upon free. * * Both folios must be locked, @new->mapping must be set up. */ @@ -7824,17 +7823,6 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new) /* Transfer the charge and the css ref */ commit_charge(new, memcg); - /* - * If the old folio is a large folio and is in the split queue, it needs - * to be removed from the split queue now, in case getting an incorrect - * split queue in destroy_large_folio() after the memcg of the old folio - * is cleared. - * - * In addition, the old folio is about to be freed after migration, so - * removing from the split queue a bit earlier seems reasonable. - */ - if (folio_test_large(old) && folio_test_large_rmappable(old)) - folio_undo_large_rmappable(old); old->memcg_data = 0; } diff --git a/mm/memory.c b/mm/memory.c index 0f47a533014e..d10e616d7389 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1507,12 +1507,6 @@ static __always_inline void zap_present_folio_ptes(struct mmu_gather *tlb, if (unlikely(folio_mapcount(folio) < 0)) print_bad_pte(vma, addr, ptent, page); } - - if (want_init_mlocked_on_free() && folio_test_mlocked(folio) && - !delay_rmap && folio_test_anon(folio)) { - kernel_init_pages(page, folio_nr_pages(folio)); - } - if (unlikely(__tlb_remove_folio_pages(tlb, page, nr, delay_rmap))) { *force_flush = true; *force_break = true; @@ -4614,8 +4608,9 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) if (!thp_vma_suitable_order(vma, haddr, PMD_ORDER)) return ret; - if (page != &folio->page || folio_order(folio) != HPAGE_PMD_ORDER) + if (folio_order(folio) != HPAGE_PMD_ORDER) return ret; + page = &folio->page; /* * Just backoff if any subpage of a THP is corrupted otherwise @@ -5106,10 +5101,16 @@ static void numa_rebuild_large_mapping(struct vm_fault *vmf, struct vm_area_stru bool ignore_writable, bool pte_write_upgrade) { int nr = pte_pfn(fault_pte) - folio_pfn(folio); - unsigned long start = max(vmf->address - nr * PAGE_SIZE, vma->vm_start); - unsigned long end = min(vmf->address + (folio_nr_pages(folio) - nr) * PAGE_SIZE, vma->vm_end); - pte_t *start_ptep = vmf->pte - (vmf->address - start) / PAGE_SIZE; - unsigned long addr; + unsigned long start, end, addr = vmf->address; + unsigned long addr_start = addr - (nr << PAGE_SHIFT); + unsigned long pt_start = ALIGN_DOWN(addr, PMD_SIZE); + pte_t *start_ptep; + + /* Stay within the VMA and within the page table. */ + start = max3(addr_start, pt_start, vma->vm_start); + end = min3(addr_start + folio_size(folio), pt_start + PMD_SIZE, + vma->vm_end); + start_ptep = vmf->pte - ((addr - start) >> PAGE_SHIFT); /* Restore all PTEs' mapping of the large folio */ for (addr = start; addr != end; start_ptep++, addr += PAGE_SIZE) { diff --git a/mm/migrate.c b/mm/migrate.c index dd04f578c19c..a8c6f466e33a 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -415,6 +415,15 @@ int folio_migrate_mapping(struct address_space *mapping, if (folio_ref_count(folio) != expected_count) return -EAGAIN; + /* Take off deferred split queue while frozen and memcg set */ + if (folio_test_large(folio) && + folio_test_large_rmappable(folio)) { + if (!folio_ref_freeze(folio, expected_count)) + return -EAGAIN; + folio_undo_large_rmappable(folio); + folio_ref_unfreeze(folio, expected_count); + } + /* No turning back from here */ newfolio->index = folio->index; newfolio->mapping = folio->mapping; @@ -433,6 +442,10 @@ int folio_migrate_mapping(struct address_space *mapping, return -EAGAIN; } + /* Take off deferred split queue while frozen and memcg set */ + if (folio_test_large(folio) && folio_test_large_rmappable(folio)) + folio_undo_large_rmappable(folio); + /* * Now we know that no one else is looking at the folio: * no turning back from here. @@ -1654,7 +1667,16 @@ static int migrate_pages_batch(struct list_head *from, /* * The rare folio on the deferred split list should - * be split now. It should not count as a failure. + * be split now. It should not count as a failure: + * but increment nr_failed because, without doing so, + * migrate_pages() may report success with (split but + * unmigrated) pages still on its fromlist; whereas it + * always reports success when its fromlist is empty. + * stats->nr_thp_failed should be increased too, + * otherwise stats inconsistency will happen when + * migrate_pages_batch is called via migrate_pages() + * with MIGRATE_SYNC and MIGRATE_ASYNC. + * * Only check it without removing it from the list. * Since the folio can be on deferred_split_scan() * local list and removing it can cause the local list @@ -1669,6 +1691,8 @@ static int migrate_pages_batch(struct list_head *from, if (nr_pages > 2 && !list_empty(&folio->_deferred_list)) { if (try_split_folio(folio, split_folios) == 0) { + nr_failed++; + stats->nr_thp_failed += is_thp; stats->nr_thp_split += is_thp; stats->nr_split++; continue; diff --git a/mm/mm_init.c b/mm/mm_init.c index f72b852bd5b8..3ec04933f7fd 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -2523,9 +2523,6 @@ EXPORT_SYMBOL(init_on_alloc); DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free); EXPORT_SYMBOL(init_on_free); -DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON, init_mlocked_on_free); -EXPORT_SYMBOL(init_mlocked_on_free); - static bool _init_on_alloc_enabled_early __read_mostly = IS_ENABLED(CONFIG_INIT_ON_ALLOC_DEFAULT_ON); static int __init early_init_on_alloc(char *buf) @@ -2543,14 +2540,6 @@ static int __init early_init_on_free(char *buf) } early_param("init_on_free", early_init_on_free); -static bool _init_mlocked_on_free_enabled_early __read_mostly - = IS_ENABLED(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON); -static int __init early_init_mlocked_on_free(char *buf) -{ - return kstrtobool(buf, &_init_mlocked_on_free_enabled_early); -} -early_param("init_mlocked_on_free", early_init_mlocked_on_free); - DEFINE_STATIC_KEY_MAYBE(CONFIG_DEBUG_VM, check_pages_enabled); /* @@ -2578,21 +2567,12 @@ static void __init mem_debugging_and_hardening_init(void) } #endif - if ((_init_on_alloc_enabled_early || _init_on_free_enabled_early || - _init_mlocked_on_free_enabled_early) && + if ((_init_on_alloc_enabled_early || _init_on_free_enabled_early) && page_poisoning_requested) { pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, " - "will take precedence over init_on_alloc, init_on_free " - "and init_mlocked_on_free\n"); + "will take precedence over init_on_alloc and init_on_free\n"); _init_on_alloc_enabled_early = false; _init_on_free_enabled_early = false; - _init_mlocked_on_free_enabled_early = false; - } - - if (_init_mlocked_on_free_enabled_early && _init_on_free_enabled_early) { - pr_info("mem auto-init: init_on_free is on, " - "will take precedence over init_mlocked_on_free\n"); - _init_mlocked_on_free_enabled_early = false; } if (_init_on_alloc_enabled_early) { @@ -2609,17 +2589,9 @@ static void __init mem_debugging_and_hardening_init(void) static_branch_disable(&init_on_free); } - if (_init_mlocked_on_free_enabled_early) { - want_check_pages = true; - static_branch_enable(&init_mlocked_on_free); - } else { - static_branch_disable(&init_mlocked_on_free); - } - - if (IS_ENABLED(CONFIG_KMSAN) && (_init_on_alloc_enabled_early || - _init_on_free_enabled_early || _init_mlocked_on_free_enabled_early)) - pr_info("mem auto-init: please make sure init_on_alloc, init_on_free and " - "init_mlocked_on_free are disabled when running KMSAN\n"); + if (IS_ENABLED(CONFIG_KMSAN) && + (_init_on_alloc_enabled_early || _init_on_free_enabled_early)) + pr_info("mem auto-init: please make sure init_on_alloc and init_on_free are disabled when running KMSAN\n"); #ifdef CONFIG_DEBUG_PAGEALLOC if (debug_pagealloc_enabled()) { @@ -2658,10 +2630,9 @@ static void __init report_meminit(void) else stack = "off"; - pr_info("mem auto-init: stack:%s, heap alloc:%s, heap free:%s, mlocked free:%s\n", + pr_info("mem auto-init: stack:%s, heap alloc:%s, heap free:%s\n", stack, want_init_on_alloc(GFP_KERNEL) ? "on" : "off", - want_init_on_free() ? "on" : "off", - want_init_mlocked_on_free() ? "on" : "off"); + want_init_on_free() ? "on" : "off"); if (want_init_on_free()) pr_info("mem auto-init: clearing system memory may take some time...\n"); } diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 12c9297ed4a7..8a1c92090129 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -415,13 +415,20 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc) else bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE; - if (bg_thresh >= thresh) - bg_thresh = thresh / 2; tsk = current; if (rt_task(tsk)) { bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32; thresh += thresh / 4 + global_wb_domain.dirty_limit / 32; } + /* + * Dirty throttling logic assumes the limits in page units fit into + * 32-bits. This gives 16TB dirty limits max which is hopefully enough. + */ + if (thresh > UINT_MAX) + thresh = UINT_MAX; + /* This makes sure bg_thresh is within 32-bits as well */ + if (bg_thresh >= thresh) + bg_thresh = thresh / 2; dtc->thresh = thresh; dtc->bg_thresh = bg_thresh; @@ -471,7 +478,11 @@ static unsigned long node_dirty_limit(struct pglist_data *pgdat) if (rt_task(tsk)) dirty += dirty / 4; - return dirty; + /* + * Dirty throttling logic assumes the limits in page units fit into + * 32-bits. This gives 16TB dirty limits max which is hopefully enough. + */ + return min_t(unsigned long, dirty, UINT_MAX); } /** @@ -508,10 +519,17 @@ static int dirty_background_bytes_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; + unsigned long old_bytes = dirty_background_bytes; ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); - if (ret == 0 && write) + if (ret == 0 && write) { + if (DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE) > + UINT_MAX) { + dirty_background_bytes = old_bytes; + return -ERANGE; + } dirty_background_ratio = 0; + } return ret; } @@ -537,6 +555,10 @@ static int dirty_bytes_handler(struct ctl_table *table, int write, ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write && vm_dirty_bytes != old_bytes) { + if (DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) > UINT_MAX) { + vm_dirty_bytes = old_bytes; + return -ERANGE; + } writeback_set_ratelimit(); vm_dirty_ratio = 0; } @@ -1660,7 +1682,7 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc) */ dtc->wb_thresh = __wb_calc_thresh(dtc, dtc->thresh); dtc->wb_bg_thresh = dtc->thresh ? - div64_u64(dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; + div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; /* * In order to avoid the stacked BDI deadlock we need diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 222299b5c0e6..9ecf99190ea2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -504,10 +504,15 @@ out: static inline unsigned int order_to_pindex(int migratetype, int order) { + bool __maybe_unused movable; + #ifdef CONFIG_TRANSPARENT_HUGEPAGE if (order > PAGE_ALLOC_COSTLY_ORDER) { VM_BUG_ON(order != HPAGE_PMD_ORDER); - return NR_LOWORDER_PCP_LISTS; + + movable = migratetype == MIGRATE_MOVABLE; + + return NR_LOWORDER_PCP_LISTS + movable; } #else VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER); @@ -521,7 +526,7 @@ static inline int pindex_to_order(unsigned int pindex) int order = pindex / MIGRATE_PCPTYPES; #ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (pindex == NR_LOWORDER_PCP_LISTS) + if (pindex >= NR_LOWORDER_PCP_LISTS) order = HPAGE_PMD_ORDER; #else VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER); @@ -1016,7 +1021,7 @@ static inline bool should_skip_kasan_poison(struct page *page) return page_kasan_tag(page) == KASAN_TAG_KERNEL; } -void kernel_init_pages(struct page *page, int numpages) +static void kernel_init_pages(struct page *page, int numpages) { int i; diff --git a/mm/page_table_check.c b/mm/page_table_check.c index 4169576bed72..509c6ef8de40 100644 --- a/mm/page_table_check.c +++ b/mm/page_table_check.c @@ -73,6 +73,9 @@ static void page_table_check_clear(unsigned long pfn, unsigned long pgcnt) page = pfn_to_page(pfn); page_ext = page_ext_get(page); + if (!page_ext) + return; + BUG_ON(PageSlab(page)); anon = PageAnon(page); @@ -110,6 +113,9 @@ static void page_table_check_set(unsigned long pfn, unsigned long pgcnt, page = pfn_to_page(pfn); page_ext = page_ext_get(page); + if (!page_ext) + return; + BUG_ON(PageSlab(page)); anon = PageAnon(page); @@ -140,7 +146,10 @@ void __page_table_check_zero(struct page *page, unsigned int order) BUG_ON(PageSlab(page)); page_ext = page_ext_get(page); - BUG_ON(!page_ext); + + if (!page_ext) + return; + for (i = 0; i < (1ul << order); i++) { struct page_table_check *ptc = get_page_table_check(page_ext); diff --git a/mm/readahead.c b/mm/readahead.c index c1b23989d9ca..817b2a352d78 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -503,11 +503,11 @@ void page_cache_ra_order(struct readahead_control *ractl, limit = min(limit, index + ra->size - 1); - if (new_order < MAX_PAGECACHE_ORDER) { + if (new_order < MAX_PAGECACHE_ORDER) new_order += 2; - new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order); - new_order = min_t(unsigned int, new_order, ilog2(ra->size)); - } + + new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order); + new_order = min_t(unsigned int, new_order, ilog2(ra->size)); /* See comment in page_cache_ra_unbounded() */ nofs = memalloc_nofs_save(); diff --git a/mm/shmem.c b/mm/shmem.c index f5d60436b604..831b52dfd56e 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -541,8 +541,9 @@ static bool shmem_confirm_swap(struct address_space *mapping, static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER; -bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, - struct mm_struct *mm, unsigned long vm_flags) +static bool __shmem_is_huge(struct inode *inode, pgoff_t index, + bool shmem_huge_force, struct mm_struct *mm, + unsigned long vm_flags) { loff_t i_size; @@ -573,6 +574,16 @@ bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, } } +bool shmem_is_huge(struct inode *inode, pgoff_t index, + bool shmem_huge_force, struct mm_struct *mm, + unsigned long vm_flags) +{ + if (HPAGE_PMD_ORDER > MAX_PAGECACHE_ORDER) + return false; + + return __shmem_is_huge(inode, index, shmem_huge_force, mm, vm_flags); +} + #if defined(CONFIG_SYSFS) static int shmem_parse_huge(const char *str) { @@ -1786,7 +1797,7 @@ static int shmem_replace_folio(struct folio **foliop, gfp_t gfp, xa_lock_irq(&swap_mapping->i_pages); error = shmem_replace_entry(swap_mapping, swap_index, old, new); if (!error) { - mem_cgroup_migrate(old, new); + mem_cgroup_replace_folio(old, new); __lruvec_stat_mod_folio(new, NR_FILE_PAGES, 1); __lruvec_stat_mod_folio(new, NR_SHMEM, 1); __lruvec_stat_mod_folio(old, NR_FILE_PAGES, -1); @@ -3166,10 +3177,13 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, struct folio *folio; /* - * Good, the fallocate(2) manpage permits EINTR: we may have - * been interrupted because we are using up too much memory. + * Check for fatal signal so that we abort early in OOM + * situations. We don't want to abort in case of non-fatal + * signals as large fallocate can take noticeable time and + * e.g. periodic timers may result in fallocate constantly + * restarting. */ - if (signal_pending(current)) + if (fatal_signal_pending(current)) error = -EINTR; else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced) error = -ENOMEM; @@ -3903,14 +3917,14 @@ static const struct constant_table shmem_param_enums_huge[] = { }; const struct fs_parameter_spec shmem_fs_parameters[] = { - fsparam_u32 ("gid", Opt_gid), + fsparam_gid ("gid", Opt_gid), fsparam_enum ("huge", Opt_huge, shmem_param_enums_huge), fsparam_u32oct("mode", Opt_mode), fsparam_string("mpol", Opt_mpol), fsparam_string("nr_blocks", Opt_nr_blocks), fsparam_string("nr_inodes", Opt_nr_inodes), fsparam_string("size", Opt_size), - fsparam_u32 ("uid", Opt_uid), + fsparam_uid ("uid", Opt_uid), fsparam_flag ("inode32", Opt_inode32), fsparam_flag ("inode64", Opt_inode64), fsparam_flag ("noswap", Opt_noswap), @@ -3970,9 +3984,7 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param) ctx->mode = result.uint_32 & 07777; break; case Opt_uid: - kuid = make_kuid(current_user_ns(), result.uint_32); - if (!uid_valid(kuid)) - goto bad_value; + kuid = result.uid; /* * The requested uid must be representable in the @@ -3984,9 +3996,7 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param) ctx->uid = kuid; break; case Opt_gid: - kgid = make_kgid(current_user_ns(), result.uint_32); - if (!gid_valid(kgid)) - goto bad_value; + kgid = result.gid; /* * The requested gid must be representable in the diff --git a/mm/slub.c b/mm/slub.c index 1373ac365a46..4927edec6a8c 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3902,7 +3902,6 @@ bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, unsigned int orig_size) { unsigned int zero_size = s->object_size; - struct slabobj_ext *obj_exts; bool kasan_init = init; size_t i; gfp_t init_flags = flags & gfp_allowed_mask; @@ -3945,9 +3944,11 @@ bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, kmemleak_alloc_recursive(p[i], s->object_size, 1, s->flags, init_flags); kmsan_slab_alloc(s, p[i], init_flags); +#ifdef CONFIG_MEM_ALLOC_PROFILING if (need_slab_obj_ext()) { + struct slabobj_ext *obj_exts; + obj_exts = prepare_slab_obj_exts_hook(s, flags, p[i]); -#ifdef CONFIG_MEM_ALLOC_PROFILING /* * Currently obj_exts is used only for allocation profiling. * If other users appear then mem_alloc_profiling_enabled() @@ -3955,8 +3956,8 @@ bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, */ if (likely(obj_exts)) alloc_tag_add(&obj_exts->ref, current->alloc_tag, s->size); -#endif } +#endif } return memcg_slab_post_alloc_hook(s, lru, flags, size, p); diff --git a/mm/util.c b/mm/util.c index 6c3e6710e4de..fe723241b66f 100644 --- a/mm/util.c +++ b/mm/util.c @@ -139,14 +139,14 @@ EXPORT_SYMBOL(kmemdup_noprof); * kmemdup_array - duplicate a given array. * * @src: array to duplicate. - * @element_size: size of each element of array. * @count: number of elements to duplicate from array. + * @element_size: size of each element of array. * @gfp: GFP mask to use. * * Return: duplicated array of @src or %NULL in case of error, * result is physically contiguous. Use kfree() to free. */ -void *kmemdup_array(const void *src, size_t element_size, size_t count, gfp_t gfp) +void *kmemdup_array(const void *src, size_t count, size_t element_size, gfp_t gfp) { return kmemdup(src, size_mul(element_size, count), gfp); } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 45e1506d58c3..e34ea860153f 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2498,6 +2498,7 @@ struct vmap_block { struct list_head free_list; struct rcu_head rcu_head; struct list_head purge; + unsigned int cpu; }; /* Queue of free and dirty vmap blocks, for allocation and flushing purposes */ @@ -2542,7 +2543,15 @@ static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue); static struct xarray * addr_to_vb_xa(unsigned long addr) { - int index = (addr / VMAP_BLOCK_SIZE) % num_possible_cpus(); + int index = (addr / VMAP_BLOCK_SIZE) % nr_cpu_ids; + + /* + * Please note, nr_cpu_ids points on a highest set + * possible bit, i.e. we never invoke cpumask_next() + * if an index points on it which is nr_cpu_ids - 1. + */ + if (!cpu_possible(index)) + index = cpumask_next(index, cpu_possible_mask); return &per_cpu(vmap_block_queue, index).vmap_blocks; } @@ -2625,8 +2634,15 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) free_vmap_area(va); return ERR_PTR(err); } - - vbq = raw_cpu_ptr(&vmap_block_queue); + /* + * list_add_tail_rcu could happened in another core + * rather than vb->cpu due to task migration, which + * is safe as list_add_tail_rcu will ensure the list's + * integrity together with list_for_each_rcu from read + * side. + */ + vb->cpu = raw_smp_processor_id(); + vbq = per_cpu_ptr(&vmap_block_queue, vb->cpu); spin_lock(&vbq->lock); list_add_tail_rcu(&vb->free_list, &vbq->free); spin_unlock(&vbq->lock); @@ -2654,9 +2670,10 @@ static void free_vmap_block(struct vmap_block *vb) } static bool purge_fragmented_block(struct vmap_block *vb, - struct vmap_block_queue *vbq, struct list_head *purge_list, - bool force_purge) + struct list_head *purge_list, bool force_purge) { + struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, vb->cpu); + if (vb->free + vb->dirty != VMAP_BBMAP_BITS || vb->dirty == VMAP_BBMAP_BITS) return false; @@ -2704,7 +2721,7 @@ static void purge_fragmented_blocks(int cpu) continue; spin_lock(&vb->lock); - purge_fragmented_block(vb, vbq, &purge, true); + purge_fragmented_block(vb, &purge, true); spin_unlock(&vb->lock); } rcu_read_unlock(); @@ -2841,7 +2858,7 @@ static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush) * not purgeable, check whether there is dirty * space to be flushed. */ - if (!purge_fragmented_block(vb, vbq, &purge_list, false) && + if (!purge_fragmented_block(vb, &purge_list, false) && vb->dirty_max && vb->dirty != VMAP_BBMAP_BITS) { unsigned long va_start = vb->va->va_start; unsigned long s, e; diff --git a/mm/workingset.c b/mm/workingset.c index c22adb93622a..a2b28e356e68 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -412,10 +412,12 @@ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg) * @file: whether the corresponding folio is from the file lru. * @workingset: where the workingset value unpacked from shadow should * be stored. + * @flush: whether to flush cgroup rstat. * * Return: true if the shadow is for a recently evicted folio; false otherwise. */ -bool workingset_test_recent(void *shadow, bool file, bool *workingset) +bool workingset_test_recent(void *shadow, bool file, bool *workingset, + bool flush) { struct mem_cgroup *eviction_memcg; struct lruvec *eviction_lruvec; @@ -467,10 +469,16 @@ bool workingset_test_recent(void *shadow, bool file, bool *workingset) /* * Flush stats (and potentially sleep) outside the RCU read section. + * + * Note that workingset_test_recent() itself might be called in RCU read + * section (for e.g, in cachestat) - these callers need to skip flushing + * stats (via the flush argument). + * * XXX: With per-memcg flushing and thresholding, is ratelimiting * still needed here? */ - mem_cgroup_flush_stats_ratelimited(eviction_memcg); + if (flush) + mem_cgroup_flush_stats_ratelimited(eviction_memcg); eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat); refault = atomic_long_read(&eviction_lruvec->nonresident_age); @@ -558,7 +566,7 @@ void workingset_refault(struct folio *folio, void *shadow) mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr); - if (!workingset_test_recent(shadow, file, &workingset)) + if (!workingset_test_recent(shadow, file, &workingset, true)) return; folio_set_active(folio); diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index ac74f6ead62d..8f6dd2c6ee41 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -12,6 +12,7 @@ #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/gfp.h> +#include <linux/if_vlan.h> #include <linux/jiffies.h> #include <linux/kref.h> #include <linux/list.h> @@ -132,6 +133,29 @@ batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node, } /** + * batadv_vlan_id_valid() - check if vlan id is in valid batman-adv encoding + * @vid: the VLAN identifier + * + * Return: true when either no vlan is set or if VLAN is in correct range, + * false otherwise + */ +static bool batadv_vlan_id_valid(unsigned short vid) +{ + unsigned short non_vlan = vid & ~(BATADV_VLAN_HAS_TAG | VLAN_VID_MASK); + + if (vid == 0) + return true; + + if (!(vid & BATADV_VLAN_HAS_TAG)) + return false; + + if (non_vlan) + return false; + + return true; +} + +/** * batadv_orig_node_vlan_new() - search and possibly create an orig_node_vlan * object * @orig_node: the originator serving the VLAN @@ -149,6 +173,9 @@ batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node, { struct batadv_orig_node_vlan *vlan; + if (!batadv_vlan_id_valid(vid)) + return NULL; + spin_lock_bh(&orig_node->vlan_list_lock); /* first look if an object for this vid already exists */ diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index b21ff3c36b07..2243cec18ecc 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -209,6 +209,20 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr, } /** + * batadv_tt_local_entry_free_rcu() - free the tt_local_entry + * @rcu: rcu pointer of the tt_local_entry + */ +static void batadv_tt_local_entry_free_rcu(struct rcu_head *rcu) +{ + struct batadv_tt_local_entry *tt_local_entry; + + tt_local_entry = container_of(rcu, struct batadv_tt_local_entry, + common.rcu); + + kmem_cache_free(batadv_tl_cache, tt_local_entry); +} + +/** * batadv_tt_local_entry_release() - release tt_local_entry from lists and queue * for free after rcu grace period * @ref: kref pointer of the nc_node @@ -222,7 +236,7 @@ static void batadv_tt_local_entry_release(struct kref *ref) batadv_softif_vlan_put(tt_local_entry->vlan); - kfree_rcu(tt_local_entry, common.rcu); + call_rcu(&tt_local_entry->common.rcu, batadv_tt_local_entry_free_rcu); } /** @@ -241,6 +255,20 @@ batadv_tt_local_entry_put(struct batadv_tt_local_entry *tt_local_entry) } /** + * batadv_tt_global_entry_free_rcu() - free the tt_global_entry + * @rcu: rcu pointer of the tt_global_entry + */ +static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu) +{ + struct batadv_tt_global_entry *tt_global_entry; + + tt_global_entry = container_of(rcu, struct batadv_tt_global_entry, + common.rcu); + + kmem_cache_free(batadv_tg_cache, tt_global_entry); +} + +/** * batadv_tt_global_entry_release() - release tt_global_entry from lists and * queue for free after rcu grace period * @ref: kref pointer of the nc_node @@ -254,7 +282,7 @@ void batadv_tt_global_entry_release(struct kref *ref) batadv_tt_global_del_orig_list(tt_global_entry); - kfree_rcu(tt_global_entry, common.rcu); + call_rcu(&tt_global_entry->common.rcu, batadv_tt_global_entry_free_rcu); } /** @@ -380,6 +408,19 @@ static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node, } /** + * batadv_tt_orig_list_entry_free_rcu() - free the orig_entry + * @rcu: rcu pointer of the orig_entry + */ +static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu) +{ + struct batadv_tt_orig_list_entry *orig_entry; + + orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu); + + kmem_cache_free(batadv_tt_orig_cache, orig_entry); +} + +/** * batadv_tt_orig_list_entry_release() - release tt orig entry from lists and * queue for free after rcu grace period * @ref: kref pointer of the tt orig entry @@ -392,7 +433,7 @@ static void batadv_tt_orig_list_entry_release(struct kref *ref) refcount); batadv_orig_node_put(orig_entry->orig_node); - kfree_rcu(orig_entry, rcu); + call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu); } /** diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 0c76dcde5361..080053a85b4d 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -899,8 +899,8 @@ static int hci_conn_hash_alloc_unset(struct hci_dev *hdev) U16_MAX, GFP_ATOMIC); } -struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, - u8 role, u16 handle) +static struct hci_conn *__hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, + u8 role, u16 handle) { struct hci_conn *conn; @@ -1041,7 +1041,16 @@ struct hci_conn *hci_conn_add_unset(struct hci_dev *hdev, int type, if (unlikely(handle < 0)) return ERR_PTR(-ECONNREFUSED); - return hci_conn_add(hdev, type, dst, role, handle); + return __hci_conn_add(hdev, type, dst, role, handle); +} + +struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, + u8 role, u16 handle) +{ + if (handle > HCI_CONN_HANDLE_MAX) + return ERR_PTR(-EINVAL); + + return __hci_conn_add(hdev, type, dst, role, handle); } static void hci_conn_cleanup_child(struct hci_conn *conn, u8 reason) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index dd3b0f501018..c644b30977bd 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -63,50 +63,6 @@ DEFINE_MUTEX(hci_cb_list_lock); /* HCI ID Numbering */ static DEFINE_IDA(hci_index_ida); -static int hci_scan_req(struct hci_request *req, unsigned long opt) -{ - __u8 scan = opt; - - BT_DBG("%s %x", req->hdev->name, scan); - - /* Inquiry and Page scans */ - hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); - return 0; -} - -static int hci_auth_req(struct hci_request *req, unsigned long opt) -{ - __u8 auth = opt; - - BT_DBG("%s %x", req->hdev->name, auth); - - /* Authentication */ - hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth); - return 0; -} - -static int hci_encrypt_req(struct hci_request *req, unsigned long opt) -{ - __u8 encrypt = opt; - - BT_DBG("%s %x", req->hdev->name, encrypt); - - /* Encryption */ - hci_req_add(req, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt); - return 0; -} - -static int hci_linkpol_req(struct hci_request *req, unsigned long opt) -{ - __le16 policy = cpu_to_le16(opt); - - BT_DBG("%s %x", req->hdev->name, policy); - - /* Default link policy */ - hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy); - return 0; -} - /* Get HCI device by index. * Device is held on return. */ struct hci_dev *hci_dev_get(int index) @@ -735,6 +691,7 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) { struct hci_dev *hdev; struct hci_dev_req dr; + __le16 policy; int err = 0; if (copy_from_user(&dr, arg, sizeof(dr))) @@ -761,8 +718,8 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) switch (cmd) { case HCISETAUTH: - err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt, - HCI_INIT_TIMEOUT, NULL); + err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_AUTH_ENABLE, + 1, &dr.dev_opt, HCI_CMD_TIMEOUT); break; case HCISETENCRYPT: @@ -773,19 +730,23 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) if (!test_bit(HCI_AUTH, &hdev->flags)) { /* Auth must be enabled first */ - err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt, - HCI_INIT_TIMEOUT, NULL); + err = __hci_cmd_sync_status(hdev, + HCI_OP_WRITE_AUTH_ENABLE, + 1, &dr.dev_opt, + HCI_CMD_TIMEOUT); if (err) break; } - err = hci_req_sync(hdev, hci_encrypt_req, dr.dev_opt, - HCI_INIT_TIMEOUT, NULL); + err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_ENCRYPT_MODE, + 1, &dr.dev_opt, + HCI_CMD_TIMEOUT); break; case HCISETSCAN: - err = hci_req_sync(hdev, hci_scan_req, dr.dev_opt, - HCI_INIT_TIMEOUT, NULL); + err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_SCAN_ENABLE, + 1, &dr.dev_opt, + HCI_CMD_TIMEOUT); /* Ensure that the connectable and discoverable states * get correctly modified as this was a non-mgmt change. @@ -795,8 +756,11 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) break; case HCISETLINKPOL: - err = hci_req_sync(hdev, hci_linkpol_req, dr.dev_opt, - HCI_INIT_TIMEOUT, NULL); + policy = cpu_to_le16(dr.dev_opt); + + err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, + 2, &policy, + HCI_CMD_TIMEOUT); break; case HCISETLINKMODE: @@ -2751,7 +2715,11 @@ void hci_unregister_dev(struct hci_dev *hdev) list_del(&hdev->list); write_unlock(&hci_dev_list_lock); + cancel_work_sync(&hdev->rx_work); + cancel_work_sync(&hdev->cmd_work); + cancel_work_sync(&hdev->tx_work); cancel_work_sync(&hdev->power_on); + cancel_work_sync(&hdev->error_reset); hci_cmd_sync_clear(hdev); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index a487f9df8145..93f7ac905cec 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -6311,6 +6311,13 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data, evt_type = __le16_to_cpu(info->type) & LE_EXT_ADV_EVT_TYPE_MASK; legacy_evt_type = ext_evt_type_to_legacy(hdev, evt_type); + + if (test_bit(HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY, + &hdev->quirks)) { + info->primary_phy &= 0x1f; + info->secondary_phy &= 0x1f; + } + if (legacy_evt_type != LE_ADV_INVALID) { process_adv_report(hdev, legacy_evt_type, &info->bdaddr, info->bdaddr_type, NULL, 0, @@ -6660,6 +6667,7 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data, struct bt_iso_qos *qos; bool pending = false; u16 handle = __le16_to_cpu(ev->handle); + u32 c_sdu_interval, p_sdu_interval; bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); @@ -6684,12 +6692,25 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data, pending = test_and_clear_bit(HCI_CONN_CREATE_CIS, &conn->flags); - /* Convert ISO Interval (1.25 ms slots) to SDU Interval (us) */ - qos->ucast.in.interval = le16_to_cpu(ev->interval) * 1250; - qos->ucast.out.interval = qos->ucast.in.interval; + /* BLUETOOTH CORE SPECIFICATION Version 5.4 | Vol 6, Part G + * page 3075: + * Transport_Latency_C_To_P = CIG_Sync_Delay + (FT_C_To_P) × + * ISO_Interval + SDU_Interval_C_To_P + * ... + * SDU_Interval = (CIG_Sync_Delay + (FT) x ISO_Interval) - + * Transport_Latency + */ + c_sdu_interval = (get_unaligned_le24(ev->cig_sync_delay) + + (ev->c_ft * le16_to_cpu(ev->interval) * 1250)) - + get_unaligned_le24(ev->c_latency); + p_sdu_interval = (get_unaligned_le24(ev->cig_sync_delay) + + (ev->p_ft * le16_to_cpu(ev->interval) * 1250)) - + get_unaligned_le24(ev->p_latency); switch (conn->role) { case HCI_ROLE_SLAVE: + qos->ucast.in.interval = c_sdu_interval; + qos->ucast.out.interval = p_sdu_interval; /* Convert Transport Latency (us) to Latency (msec) */ qos->ucast.in.latency = DIV_ROUND_CLOSEST(get_unaligned_le24(ev->c_latency), @@ -6703,6 +6724,8 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data, qos->ucast.out.phy = ev->p_phy; break; case HCI_ROLE_MASTER: + qos->ucast.in.interval = p_sdu_interval; + qos->ucast.out.interval = c_sdu_interval; /* Convert Transport Latency (us) to Latency (msec) */ qos->ucast.out.latency = DIV_ROUND_CLOSEST(get_unaligned_le24(ev->c_latency), @@ -6893,6 +6916,10 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, bis = hci_conn_hash_lookup_handle(hdev, handle); if (!bis) { + if (handle > HCI_CONN_HANDLE_MAX) { + bt_dev_dbg(hdev, "ignore too large handle %u", handle); + continue; + } bis = hci_conn_add(hdev, ISO_LINK, BDADDR_ANY, HCI_ROLE_SLAVE, handle); if (IS_ERR(bis)) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index a8a7d2b36870..eea34e6a236f 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -280,6 +280,19 @@ int __hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen, } EXPORT_SYMBOL(__hci_cmd_sync_status); +int hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u32 timeout) +{ + int err; + + hci_req_sync_lock(hdev); + err = __hci_cmd_sync_status(hdev, opcode, plen, param, timeout); + hci_req_sync_unlock(hdev); + + return err; +} +EXPORT_SYMBOL(hci_cmd_sync_status); + static void hci_cmd_sync_work(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_sync_work); diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index cc055b952ce6..398fb81f7a13 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -1356,8 +1356,7 @@ static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg, lock_sock(sk); switch (sk->sk_state) { case BT_CONNECT2: - if (pi->conn->hcon && - test_bit(HCI_CONN_PA_SYNC, &pi->conn->hcon->flags)) { + if (test_bit(BT_SK_PA_SYNC, &pi->flags)) { iso_conn_big_sync(sk); sk->sk_state = BT_LISTEN; } else { diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index aed025734d04..c3c26bbb5dda 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -6761,6 +6761,8 @@ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, BT_DBG("chan %p, len %d", chan, skb->len); + l2cap_chan_lock(chan); + if (chan->state != BT_BOUND && chan->state != BT_CONNECTED) goto drop; @@ -6777,6 +6779,7 @@ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, } drop: + l2cap_chan_unlock(chan); l2cap_chan_put(chan); free_skb: kfree_skb(skb); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 6db60946c627..ba437c6f6ee5 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1239,6 +1239,10 @@ static void l2cap_sock_kill(struct sock *sk) BT_DBG("sk %p state %s", sk, state_to_string(sk->sk_state)); + /* Sock is dead, so set chan data to NULL, avoid other task use invalid + * sock pointer. + */ + l2cap_pi(sk)->chan->data = NULL; /* Kill poor orphan */ l2cap_chan_put(l2cap_pi(sk)->chan); @@ -1481,12 +1485,16 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan) static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) { - struct sock *sk = chan->data; - struct l2cap_pinfo *pi = l2cap_pi(sk); + struct sock *sk; + struct l2cap_pinfo *pi; int err; - lock_sock(sk); + sk = chan->data; + if (!sk) + return -ENXIO; + pi = l2cap_pi(sk); + lock_sock(sk); if (chan->mode == L2CAP_MODE_ERTM && !list_empty(&pi->rx_busy)) { err = -ENOMEM; goto done; diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index a6fb89fa6278..7e8a20f2fc42 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -30,10 +30,6 @@ MODULE_ALIAS("can-proto-" __stringify(CAN_J1939)); /* CAN_HDR: #bytes before can_frame data part */ #define J1939_CAN_HDR (offsetof(struct can_frame, data)) -/* CAN_FTR: #bytes beyond data part */ -#define J1939_CAN_FTR (sizeof(struct can_frame) - J1939_CAN_HDR - \ - sizeof(((struct can_frame *)0)->data)) - /* lowest layer */ static void j1939_can_recv(struct sk_buff *iskb, void *data) { @@ -342,7 +338,7 @@ int j1939_send_one(struct j1939_priv *priv, struct sk_buff *skb) memset(cf, 0, J1939_CAN_HDR); /* make it a full can frame again */ - skb_put(skb, J1939_CAN_FTR + (8 - dlc)); + skb_put_zero(skb, 8 - dlc); canid = CAN_EFF_FLAG | (skcb->priority << 26) | diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index fe3df23a2595..4be73de5033c 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1593,8 +1593,8 @@ j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv, struct j1939_sk_buff_cb skcb = *j1939_skb_to_cb(skb); struct j1939_session *session; const u8 *dat; + int len, ret; pgn_t pgn; - int len; netdev_dbg(priv->ndev, "%s\n", __func__); @@ -1653,7 +1653,22 @@ j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv, session->tskey = priv->rx_tskey++; j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_RTS); - WARN_ON_ONCE(j1939_session_activate(session)); + ret = j1939_session_activate(session); + if (ret) { + /* Entering this scope indicates an issue with the J1939 bus. + * Possible scenarios include: + * - A time lapse occurred, and a new session was initiated + * due to another packet being sent correctly. This could + * have been caused by too long interrupt, debugger, or being + * out-scheduled by another task. + * - The bus is receiving numerous erroneous packets, either + * from a malfunctioning device or during a test scenario. + */ + netdev_alert(priv->ndev, "%s: 0x%p: concurrent session with same addr (%02x %02x) is already active.\n", + __func__, session, skcb.addr.sa, skcb.addr.da); + j1939_session_put(session); + return NULL; + } return session; } @@ -1681,6 +1696,8 @@ static int j1939_xtp_rx_rts_session_active(struct j1939_session *session, j1939_session_timers_cancel(session); j1939_session_cancel(session, J1939_XTP_ABORT_BUSY); + if (session->transmission) + j1939_session_deactivate_activate_next(session); return -EBUSY; } diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index 1daf95e17d67..3a5bd1cd1e99 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -429,7 +429,10 @@ static int is_out(const struct crush_map *map, /** * crush_choose_firstn - choose numrep distinct items of given type * @map: the crush_map + * @work: working space initialized by crush_init_workspace() * @bucket: the bucket we are choose an item from + * @weight: weight vector (for map leaves) + * @weight_max: size of weight vector * @x: crush input value * @numrep: the number of items to choose * @type: the type of item to choose @@ -445,6 +448,7 @@ static int is_out(const struct crush_map *map, * @vary_r: pass r to recursive calls * @out2: second output vector for leaf items (if @recurse_to_leaf) * @parent_r: r value passed from the parent + * @choose_args: weights and ids for each known bucket */ static int crush_choose_firstn(const struct crush_map *map, struct crush_work *work, @@ -636,9 +640,8 @@ reject: } -/** +/* * crush_choose_indep: alternative breadth-first positionally stable mapping - * */ static void crush_choose_indep(const struct crush_map *map, struct crush_work *work, diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index f263f7e91a21..ab66b599ac47 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -1085,13 +1085,19 @@ static void delayed_work(struct work_struct *work) struct ceph_mon_client *monc = container_of(work, struct ceph_mon_client, delayed_work.work); - dout("monc delayed_work\n"); mutex_lock(&monc->mutex); + dout("%s mon%d\n", __func__, monc->cur_mon); + if (monc->cur_mon < 0) { + goto out; + } + if (monc->hunting) { dout("%s continuing hunt\n", __func__); reopen_session(monc); } else { int is_auth = ceph_auth_is_authenticated(monc->auth); + + dout("%s is_authed %d\n", __func__, is_auth); if (ceph_con_keepalive_expired(&monc->con, CEPH_MONC_PING_TIMEOUT)) { dout("monc keepalive timeout\n"); @@ -1116,6 +1122,8 @@ static void delayed_work(struct work_struct *work) } } __schedule_delayed(monc); + +out: mutex_unlock(&monc->mutex); } @@ -1232,13 +1240,15 @@ EXPORT_SYMBOL(ceph_monc_init); void ceph_monc_stop(struct ceph_mon_client *monc) { dout("stop\n"); - cancel_delayed_work_sync(&monc->delayed_work); mutex_lock(&monc->mutex); __close_session(monc); + monc->hunting = false; monc->cur_mon = -1; mutex_unlock(&monc->mutex); + cancel_delayed_work_sync(&monc->delayed_work); + /* * flush msgr queue before we destroy ourselves to ensure that: * - any work that references our embedded con is finished. diff --git a/net/core/datagram.c b/net/core/datagram.c index e614cfd8e14a..e72dd78471a6 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -416,15 +416,23 @@ static int __skb_datagram_iter(const struct sk_buff *skb, int offset, end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { - struct page *page = skb_frag_page(frag); - u8 *vaddr = kmap(page); + u32 p_off, p_len, copied; + struct page *p; + u8 *vaddr; if (copy > len) copy = len; - n = INDIRECT_CALL_1(cb, simple_copy_to_iter, - vaddr + skb_frag_off(frag) + offset - start, - copy, data, to); - kunmap(page); + + n = 0; + skb_frag_foreach_page(frag, + skb_frag_off(frag) + offset - start, + copy, p, p_off, p_len, copied) { + vaddr = kmap_local_page(p); + n += INDIRECT_CALL_1(cb, simple_copy_to_iter, + vaddr + p_off, p_len, data, to); + kunmap_local(vaddr); + } + offset += n; if (n != copy) goto short_copy; diff --git a/net/core/dev.c b/net/core/dev.c index 4d4de9008f6f..2b4819b610b8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1226,9 +1226,9 @@ int dev_change_name(struct net_device *dev, const char *newname) memcpy(oldname, dev->name, IFNAMSIZ); - write_seqlock(&netdev_rename_lock); + write_seqlock_bh(&netdev_rename_lock); err = dev_get_valid_name(net, dev, newname); - write_sequnlock(&netdev_rename_lock); + write_sequnlock_bh(&netdev_rename_lock); if (err < 0) { up_write(&devnet_rename_sem); @@ -1269,9 +1269,9 @@ rollback: if (err >= 0) { err = ret; down_write(&devnet_rename_sem); - write_seqlock(&netdev_rename_lock); + write_seqlock_bh(&netdev_rename_lock); memcpy(dev->name, oldname, IFNAMSIZ); - write_sequnlock(&netdev_rename_lock); + write_sequnlock_bh(&netdev_rename_lock); memcpy(oldname, newname, IFNAMSIZ); WRITE_ONCE(dev->name_assign_type, old_assign_type); old_assign_type = NET_NAME_RENAMED; @@ -11419,9 +11419,9 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, if (new_name[0]) { /* Rename the netdev to prepared name */ - write_seqlock(&netdev_rename_lock); + write_seqlock_bh(&netdev_rename_lock); strscpy(dev->name, new_name, IFNAMSIZ); - write_sequnlock(&netdev_rename_lock); + write_sequnlock_bh(&netdev_rename_lock); } /* Fixup kobjects */ diff --git a/net/core/filter.c b/net/core/filter.c index 2510464692af..9933851c685e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1665,6 +1665,11 @@ static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp); static inline int __bpf_try_make_writable(struct sk_buff *skb, unsigned int write_len) { +#ifdef CONFIG_DEBUG_NET + /* Avoid a splat in pskb_may_pull_reason() */ + if (write_len > INT_MAX) + return -EINVAL; +#endif return skb_ensure_writable(skb, write_len); } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 4f7a61688d18..6a823ba906c6 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -693,11 +693,16 @@ EXPORT_SYMBOL_GPL(__put_net); * get_net_ns - increment the refcount of the network namespace * @ns: common namespace (net) * - * Returns the net's common namespace. + * Returns the net's common namespace or ERR_PTR() if ref is zero. */ struct ns_common *get_net_ns(struct ns_common *ns) { - return &get_net(container_of(ns, struct net, ns))->ns; + struct net *net; + + net = maybe_get_net(container_of(ns, struct net, ns)); + if (net) + return &net->ns; + return ERR_PTR(-EINVAL); } EXPORT_SYMBOL_GPL(get_net_ns); diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 1f6ae6379e0f..05f9515d2c05 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -59,22 +59,22 @@ XDP_METADATA_KFUNC_xxx nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES, xdp_rx_meta, NETDEV_A_DEV_PAD) || nla_put_u64_64bit(rsp, NETDEV_A_DEV_XSK_FEATURES, - xsk_features, NETDEV_A_DEV_PAD)) { - genlmsg_cancel(rsp, hdr); - return -EINVAL; - } + xsk_features, NETDEV_A_DEV_PAD)) + goto err_cancel_msg; if (netdev->xdp_features & NETDEV_XDP_ACT_XSK_ZEROCOPY) { if (nla_put_u32(rsp, NETDEV_A_DEV_XDP_ZC_MAX_SEGS, - netdev->xdp_zc_max_segs)) { - genlmsg_cancel(rsp, hdr); - return -EINVAL; - } + netdev->xdp_zc_max_segs)) + goto err_cancel_msg; } genlmsg_end(rsp, hdr); return 0; + +err_cancel_msg: + genlmsg_cancel(rsp, hdr); + return -EMSGSIZE; } static void diff --git a/net/core/skmsg.c b/net/core/skmsg.c index fd20aae30be2..bbf40b999713 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -434,7 +434,8 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, page = sg_page(sge); if (copied + copy > len) copy = len - copied; - copy = copy_page_to_iter(page, sge->offset, copy, iter); + if (copy) + copy = copy_page_to_iter(page, sge->offset, copy, iter); if (!copy) { copied = copied ? copied : -EFAULT; goto out; diff --git a/net/core/sock.c b/net/core/sock.c index 8629f9aecf91..100e975073ca 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3742,6 +3742,9 @@ void sk_common_release(struct sock *sk) sk->sk_prot->unhash(sk); + if (sk->sk_socket) + sk->sk_socket->sk = NULL; + /* * In this point socket cannot receive new packets, but it is possible * that some packets are in flight because some CPU runs receiver and diff --git a/net/core/xdp.c b/net/core/xdp.c index 41693154e426..022c12059cf2 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -295,10 +295,8 @@ static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem, mutex_lock(&mem_id_lock); ret = __mem_id_init_hash_table(); mutex_unlock(&mem_id_lock); - if (ret < 0) { - WARN_ON(1); + if (ret < 0) return ERR_PTR(ret); - } } xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index ff41bd6f99c3..5926159a6f20 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -657,8 +657,11 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (dccp_v4_send_response(sk, req)) goto drop_and_free; - inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); - reqsk_put(req); + if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT))) + reqsk_free(req); + else + reqsk_put(req); + return 0; drop_and_free: diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 85f4b8fdbe5e..da5dba120bc9 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -400,8 +400,11 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (dccp_v6_send_response(sk, req)) goto drop_and_free; - inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); - reqsk_put(req); + if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT))) + reqsk_free(req); + else + reqsk_put(req); + return 0; drop_and_free: diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index e645d751a5e8..223dcd25d88a 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1306,7 +1306,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if (rxfh.input_xfrm && rxfh.input_xfrm != RXH_XFRM_SYM_XOR && rxfh.input_xfrm != RXH_XFRM_NO_CHANGE) return -EINVAL; - if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) && + if (rxfh.input_xfrm != RXH_XFRM_NO_CHANGE && + (rxfh.input_xfrm & RXH_XFRM_SYM_XOR) && !ops->cap_rss_sym_xor_supported) return -EOPNOTSUPP; diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c index b2de2108b356..34d76e87847d 100644 --- a/net/ethtool/linkstate.c +++ b/net/ethtool/linkstate.c @@ -37,6 +37,8 @@ static int linkstate_get_sqi(struct net_device *dev) mutex_lock(&phydev->lock); if (!phydev->drv || !phydev->drv->get_sqi) ret = -EOPNOTSUPP; + else if (!phydev->link) + ret = -ENETDOWN; else ret = phydev->drv->get_sqi(phydev); mutex_unlock(&phydev->lock); @@ -55,6 +57,8 @@ static int linkstate_get_sqi_max(struct net_device *dev) mutex_lock(&phydev->lock); if (!phydev->drv || !phydev->drv->get_sqi_max) ret = -EOPNOTSUPP; + else if (!phydev->link) + ret = -ENETDOWN; else ret = phydev->drv->get_sqi_max(phydev); mutex_unlock(&phydev->lock); @@ -62,6 +66,17 @@ static int linkstate_get_sqi_max(struct net_device *dev) return ret; }; +static bool linkstate_sqi_critical_error(int sqi) +{ + return sqi < 0 && sqi != -EOPNOTSUPP && sqi != -ENETDOWN; +} + +static bool linkstate_sqi_valid(struct linkstate_reply_data *data) +{ + return data->sqi >= 0 && data->sqi_max >= 0 && + data->sqi <= data->sqi_max; +} + static int linkstate_get_link_ext_state(struct net_device *dev, struct linkstate_reply_data *data) { @@ -93,12 +108,12 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base, data->link = __ethtool_get_link(dev); ret = linkstate_get_sqi(dev); - if (ret < 0 && ret != -EOPNOTSUPP) + if (linkstate_sqi_critical_error(ret)) goto out; data->sqi = ret; ret = linkstate_get_sqi_max(dev); - if (ret < 0 && ret != -EOPNOTSUPP) + if (linkstate_sqi_critical_error(ret)) goto out; data->sqi_max = ret; @@ -136,11 +151,10 @@ static int linkstate_reply_size(const struct ethnl_req_info *req_base, len = nla_total_size(sizeof(u8)) /* LINKSTATE_LINK */ + 0; - if (data->sqi != -EOPNOTSUPP) - len += nla_total_size(sizeof(u32)); - - if (data->sqi_max != -EOPNOTSUPP) - len += nla_total_size(sizeof(u32)); + if (linkstate_sqi_valid(data)) { + len += nla_total_size(sizeof(u32)); /* LINKSTATE_SQI */ + len += nla_total_size(sizeof(u32)); /* LINKSTATE_SQI_MAX */ + } if (data->link_ext_state_provided) len += nla_total_size(sizeof(u8)); /* LINKSTATE_EXT_STATE */ @@ -164,13 +178,14 @@ static int linkstate_fill_reply(struct sk_buff *skb, nla_put_u8(skb, ETHTOOL_A_LINKSTATE_LINK, !!data->link)) return -EMSGSIZE; - if (data->sqi != -EOPNOTSUPP && - nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi)) - return -EMSGSIZE; + if (linkstate_sqi_valid(data)) { + if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi)) + return -EMSGSIZE; - if (data->sqi_max != -EOPNOTSUPP && - nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX, data->sqi_max)) - return -EMSGSIZE; + if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX, + data->sqi_max)) + return -EMSGSIZE; + } if (data->link_ext_state_provided) { if (nla_put_u8(skb, ETHTOOL_A_LINKSTATE_EXT_STATE, diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index dd6d46015058..e9cb27061c12 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1810,6 +1810,29 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len, return CIPSO_V4_HDR_LEN + ret_val; } +static int cipso_v4_get_actual_opt_len(const unsigned char *data, int len) +{ + int iter = 0, optlen = 0; + + /* determining the new total option length is tricky because of + * the padding necessary, the only thing i can think to do at + * this point is walk the options one-by-one, skipping the + * padding at the end to determine the actual option size and + * from there we can determine the new total option length + */ + while (iter < len) { + if (data[iter] == IPOPT_END) { + break; + } else if (data[iter] == IPOPT_NOP) { + iter++; + } else { + iter += data[iter + 1]; + optlen = iter; + } + } + return optlen; +} + /** * cipso_v4_sock_setattr - Add a CIPSO option to a socket * @sk: the socket @@ -1986,7 +2009,6 @@ static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr) u8 cipso_len; u8 cipso_off; unsigned char *cipso_ptr; - int iter; int optlen_new; cipso_off = opt->opt.cipso - sizeof(struct iphdr); @@ -2006,19 +2028,8 @@ static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr) memmove(cipso_ptr, cipso_ptr + cipso_len, opt->opt.optlen - cipso_off - cipso_len); - /* determining the new total option length is tricky because of - * the padding necessary, the only thing i can think to do at - * this point is walk the options one-by-one, skipping the - * padding at the end to determine the actual option size and - * from there we can determine the new total option length */ - iter = 0; - optlen_new = 0; - while (iter < opt->opt.optlen) - if (opt->opt.__data[iter] != IPOPT_NOP) { - iter += opt->opt.__data[iter + 1]; - optlen_new = iter; - } else - iter++; + optlen_new = cipso_v4_get_actual_opt_len(opt->opt.__data, + opt->opt.optlen); hdr_delta = opt->opt.optlen; opt->opt.optlen = (optlen_new + 3) & ~3; hdr_delta -= opt->opt.optlen; @@ -2238,7 +2249,8 @@ int cipso_v4_skbuff_setattr(struct sk_buff *skb, */ int cipso_v4_skbuff_delattr(struct sk_buff *skb) { - int ret_val; + int ret_val, cipso_len, hdr_len_actual, new_hdr_len_actual, new_hdr_len, + hdr_len_delta; struct iphdr *iph; struct ip_options *opt = &IPCB(skb)->opt; unsigned char *cipso_ptr; @@ -2251,16 +2263,37 @@ int cipso_v4_skbuff_delattr(struct sk_buff *skb) if (ret_val < 0) return ret_val; - /* the easiest thing to do is just replace the cipso option with noop - * options since we don't change the size of the packet, although we - * still need to recalculate the checksum */ - iph = ip_hdr(skb); cipso_ptr = (unsigned char *)iph + opt->cipso; - memset(cipso_ptr, IPOPT_NOOP, cipso_ptr[1]); + cipso_len = cipso_ptr[1]; + + hdr_len_actual = sizeof(struct iphdr) + + cipso_v4_get_actual_opt_len((unsigned char *)(iph + 1), + opt->optlen); + new_hdr_len_actual = hdr_len_actual - cipso_len; + new_hdr_len = (new_hdr_len_actual + 3) & ~3; + hdr_len_delta = (iph->ihl << 2) - new_hdr_len; + + /* 1. shift any options after CIPSO to the left */ + memmove(cipso_ptr, cipso_ptr + cipso_len, + new_hdr_len_actual - opt->cipso); + /* 2. move the whole IP header to its new place */ + memmove((unsigned char *)iph + hdr_len_delta, iph, new_hdr_len_actual); + /* 3. adjust the skb layout */ + skb_pull(skb, hdr_len_delta); + skb_reset_network_header(skb); + iph = ip_hdr(skb); + /* 4. re-fill new padding with IPOPT_END (may now be longer) */ + memset((unsigned char *)iph + new_hdr_len_actual, IPOPT_END, + new_hdr_len - new_hdr_len_actual); + + opt->optlen -= hdr_len_delta; opt->cipso = 0; opt->is_changed = 1; - + if (hdr_len_delta != 0) { + iph->ihl = new_hdr_len >> 2; + iph_set_totlen(iph, skb->len); + } ip_send_check(iph); return 0; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index d81f74ce0f02..d4f0eff8b20f 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1122,25 +1122,34 @@ drop: inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq); } -static void reqsk_queue_hash_req(struct request_sock *req, +static bool reqsk_queue_hash_req(struct request_sock *req, unsigned long timeout) { + bool found_dup_sk = false; + + if (!inet_ehash_insert(req_to_sk(req), NULL, &found_dup_sk)) + return false; + + /* The timer needs to be setup after a successful insertion. */ timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED); mod_timer(&req->rsk_timer, jiffies + timeout); - inet_ehash_insert(req_to_sk(req), NULL, NULL); /* before letting lookups find us, make sure all req fields * are committed to memory and refcnt initialized. */ smp_wmb(); refcount_set(&req->rsk_refcnt, 2 + 1); + return true; } -void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, +bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, unsigned long timeout) { - reqsk_queue_hash_req(req, timeout); + if (!reqsk_queue_hash_req(req, timeout)) + return false; + inet_csk_reqsk_queue_added(sk); + return true; } EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 7adace541fe2..9712cdb8087c 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -1383,6 +1383,7 @@ static int inet_diag_dump_compat(struct sk_buff *skb, req.sdiag_family = AF_UNSPEC; /* compatibility */ req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type); req.idiag_ext = rc->idiag_ext; + req.pad = 0; req.idiag_states = rc->idiag_states; req.id = rc->id; @@ -1398,6 +1399,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb, req.sdiag_family = rc->idiag_family; req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type); req.idiag_ext = rc->idiag_ext; + req.pad = 0; req.idiag_states = rc->idiag_states; req.id = rc->id; diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 37c42b63ff99..09c0fa6756b7 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -1968,8 +1968,10 @@ static int tcp_ao_info_cmd(struct sock *sk, unsigned short int family, first = true; } - if (cmd.ao_required && tcp_ao_required_verify(sk)) - return -EKEYREJECTED; + if (cmd.ao_required && tcp_ao_required_verify(sk)) { + err = -EKEYREJECTED; + goto out; + } /* For sockets in TCP_CLOSED it's possible set keys that aren't * matching the future peer (address/port/VRF/etc), diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9c04a9c8be9d..38da23f991d6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2129,8 +2129,16 @@ void tcp_clear_retrans(struct tcp_sock *tp) static inline void tcp_init_undo(struct tcp_sock *tp) { tp->undo_marker = tp->snd_una; + /* Retransmission still in flight may cause DSACKs later. */ - tp->undo_retrans = tp->retrans_out ? : -1; + /* First, account for regular retransmits in flight: */ + tp->undo_retrans = tp->retrans_out; + /* Next, account for TLP retransmits in flight: */ + if (tp->tlp_high_seq && tp->tlp_retrans) + tp->undo_retrans++; + /* Finally, avoid 0, because undo_retrans==0 means "can undo now": */ + if (!tp->undo_retrans) + tp->undo_retrans = -1; } static bool tcp_is_rack(const struct sock *sk) @@ -2209,6 +2217,7 @@ void tcp_enter_loss(struct sock *sk) tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; + tp->tlp_high_seq = 0; tcp_ecn_queue_cwr(tp); /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous @@ -2782,13 +2791,37 @@ static void tcp_mtup_probe_success(struct sock *sk) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS); } +/* Sometimes we deduce that packets have been dropped due to reasons other than + * congestion, like path MTU reductions or failed client TFO attempts. In these + * cases we call this function to retransmit as many packets as cwnd allows, + * without reducing cwnd. Given that retransmits will set retrans_stamp to a + * non-zero value (and may do so in a later calling context due to TSQ), we + * also enter CA_Loss so that we track when all retransmitted packets are ACKed + * and clear retrans_stamp when that happens (to ensure later recurring RTOs + * are using the correct retrans_stamp and don't declare ETIMEDOUT + * prematurely). + */ +static void tcp_non_congestion_loss_retransmit(struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + + if (icsk->icsk_ca_state != TCP_CA_Loss) { + tp->high_seq = tp->snd_nxt; + tp->snd_ssthresh = tcp_current_ssthresh(sk); + tp->prior_ssthresh = 0; + tp->undo_marker = 0; + tcp_set_ca_state(sk, TCP_CA_Loss); + } + tcp_xmit_retransmit_queue(sk); +} + /* Do a simple retransmit without using the backoff mechanisms in * tcp_timer. This is used for path mtu discovery. * The socket is already locked here. */ void tcp_simple_retransmit(struct sock *sk) { - const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; int mss; @@ -2828,14 +2861,7 @@ void tcp_simple_retransmit(struct sock *sk) * in network, but units changed and effective * cwnd/ssthresh really reduced now. */ - if (icsk->icsk_ca_state != TCP_CA_Loss) { - tp->high_seq = tp->snd_nxt; - tp->snd_ssthresh = tcp_current_ssthresh(sk); - tp->prior_ssthresh = 0; - tp->undo_marker = 0; - tcp_set_ca_state(sk, TCP_CA_Loss); - } - tcp_xmit_retransmit_queue(sk); + tcp_non_congestion_loss_retransmit(sk); } EXPORT_SYMBOL(tcp_simple_retransmit); @@ -3060,7 +3086,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, return; if (tcp_try_undo_dsack(sk)) - tcp_try_keep_open(sk); + tcp_try_to_open(sk, flag); tcp_identify_packet_loss(sk, ack_flag); if (icsk->icsk_ca_state != TCP_CA_Recovery) { @@ -4207,6 +4233,13 @@ void tcp_parse_options(const struct net *net, */ break; #endif +#ifdef CONFIG_TCP_AO + case TCPOPT_AO: + /* TCP AO has already been checked + * (see tcp_inbound_ao_hash()). + */ + break; +#endif case TCPOPT_FASTOPEN: tcp_parse_fastopen_option( opsize - TCPOLEN_FASTOPEN_BASE, @@ -6295,7 +6328,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, tp->fastopen_client_fail = TFO_DATA_NOT_ACKED; skb_rbtree_walk_from(data) tcp_mark_skb_lost(sk, data); - tcp_xmit_retransmit_queue(sk); + tcp_non_congestion_loss_retransmit(sk); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); return true; @@ -7256,7 +7289,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_rsk(req)->tfo_listener = false; if (!want_cookie) { req->timeout = tcp_timeout_init((struct sock *)req); - inet_csk_reqsk_queue_hash_add(sk, req, req->timeout); + if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, + req->timeout))) { + reqsk_free(req); + return 0; + } + } af_ops->send_synack(sk, dst, &fl, req, &foc, !want_cookie ? TCP_SYNACK_NORMAL : diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index e93df98de3f4..b01eb6d94413 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -619,6 +619,7 @@ static const struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = [TCP_METRICS_ATTR_ADDR_IPV4] = { .type = NLA_U32, }, [TCP_METRICS_ATTR_ADDR_IPV6] = { .type = NLA_BINARY, .len = sizeof(struct in6_addr), }, + [TCP_METRICS_ATTR_SADDR_IPV4] = { .type = NLA_U32, }, /* Following attributes are not received for GET/DEL, * we keep them for reference */ diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 5bfd76a31af6..892c86657fbc 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -483,15 +483,26 @@ static bool tcp_rtx_probe0_timed_out(const struct sock *sk, const struct sk_buff *skb, u32 rtx_delta) { + const struct inet_connection_sock *icsk = inet_csk(sk); + u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout); const struct tcp_sock *tp = tcp_sk(sk); - const int timeout = TCP_RTO_MAX * 2; + int timeout = TCP_RTO_MAX * 2; s32 rcv_delta; + if (user_timeout) { + /* If user application specified a TCP_USER_TIMEOUT, + * it does not want win 0 packets to 'reset the timer' + * while retransmits are not making progress. + */ + if (rtx_delta > user_timeout) + return true; + timeout = min_t(u32, timeout, msecs_to_jiffies(user_timeout)); + } /* Note: timer interrupt might have been delayed by at least one jiffy, * and tp->rcv_tstamp might very well have been written recently. * rcv_delta can thus be negative. */ - rcv_delta = inet_csk(sk)->icsk_timeout - tp->rcv_tstamp; + rcv_delta = icsk->icsk_timeout - tp->rcv_tstamp; if (rcv_delta <= timeout) return false; @@ -536,8 +547,6 @@ void tcp_retransmit_timer(struct sock *sk) if (WARN_ON_ONCE(!skb)) return; - tp->tlp_high_seq = 0; - if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) && !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) { /* Receiver dastardly shrinks window. Our retransmits diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 189c9113fe9a..578668878a85 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -326,6 +326,8 @@ found: goto fail_unlock; } + sock_set_flag(sk, SOCK_RCU_FREE); + sk_add_node_rcu(sk, &hslot->head); hslot->count++; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -342,7 +344,7 @@ found: hslot2->count++; spin_unlock(&hslot2->lock); } - sock_set_flag(sk, SOCK_RCU_FREE); + error = 0; fail_unlock: spin_unlock_bh(&hslot->lock); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 6e57c03e3255..83e4f9855ae1 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -2514,7 +2514,8 @@ int __init fib6_init(void) goto out_kmem_cache_create; ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL, - inet6_dump_fib, RTNL_FLAG_DUMP_UNLOCKED); + inet6_dump_fib, RTNL_FLAG_DUMP_UNLOCKED | + RTNL_FLAG_DUMP_SPLIT_NLM_DONE); if (ret) goto out_unregister_subsys; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 952c2bf11709..8d72ca0b086d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -638,6 +638,8 @@ static void rt6_probe(struct fib6_nh *fib6_nh) rcu_read_lock(); last_probe = READ_ONCE(fib6_nh->last_probe); idev = __in6_dev_get(dev); + if (!idev) + goto out; neigh = __ipv6_neigh_lookup_noref(dev, nh_gw); if (neigh) { if (READ_ONCE(neigh->nud_state) & NUD_VALID) @@ -3603,7 +3605,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, if (!dev) goto out; - if (idev->cnf.disable_ipv6) { + if (!idev || idev->cnf.disable_ipv6) { NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device"); err = -EACCES; goto out; diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 24e2b4b494cb..c434940131b1 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -941,8 +941,8 @@ static int input_action_end_dx6(struct sk_buff *skb, if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, - dev_net(skb->dev), NULL, skb, NULL, - skb_dst(skb)->dev, input_action_end_dx6_finish); + dev_net(skb->dev), NULL, skb, skb->dev, + NULL, input_action_end_dx6_finish); return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb); drop: @@ -991,8 +991,8 @@ static int input_action_end_dx4(struct sk_buff *skb, if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, - dev_net(skb->dev), NULL, skb, NULL, - skb_dst(skb)->dev, input_action_end_dx4_finish); + dev_net(skb->dev), NULL, skb, skb->dev, + NULL, input_action_end_dx4_finish); return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb); drop: diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index cc885d3aa9e5..2f1ea5f999a2 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -56,12 +56,18 @@ static int xfrm6_get_saddr(struct net *net, int oif, { struct dst_entry *dst; struct net_device *dev; + struct inet6_dev *idev; dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr, mark); if (IS_ERR(dst)) return -EHOSTUNREACH; - dev = ip6_dst_idev(dst)->dev; + idev = ip6_dst_idev(dst); + if (!idev) { + dst_release(dst); + return -EHOSTUNREACH; + } + dev = idev->dev; ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6, 0, &saddr->in6); dst_release(dst); return 0; diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index dce37ba8ebe3..254d745832cb 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -311,6 +311,18 @@ int drv_assign_vif_chanctx(struct ieee80211_local *local, might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); + /* + * We should perhaps push emulate chanctx down and only + * make it call ->config() when the chanctx is actually + * assigned here (and unassigned below), but that's yet + * another change to all drivers to add assign/unassign + * emulation callbacks. Maybe later. + */ + if (sdata->vif.type == NL80211_IFTYPE_MONITOR && + local->emulate_chanctx && + !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) + return 0; + if (!check_sdata_in_driver(sdata)) return -EIO; @@ -338,6 +350,11 @@ void drv_unassign_vif_chanctx(struct ieee80211_local *local, might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); + if (sdata->vif.type == NL80211_IFTYPE_MONITOR && + local->emulate_chanctx && + !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) + return; + if (!check_sdata_in_driver(sdata)) return; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index dc42902e2693..b935bb5d8ed1 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -686,6 +686,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do ieee80211_del_virtual_monitor(local); ieee80211_recalc_idle(local); + ieee80211_recalc_offload(local); if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE)) break; @@ -1121,9 +1122,6 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) struct ieee80211_sub_if_data *sdata; int ret; - if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) - return 0; - ASSERT_RTNL(); lockdep_assert_wiphy(local->hw.wiphy); @@ -1145,11 +1143,13 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) ieee80211_set_default_queues(sdata); - ret = drv_add_interface(local, sdata); - if (WARN_ON(ret)) { - /* ok .. stupid driver, it asked for this! */ - kfree(sdata); - return ret; + if (ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) { + ret = drv_add_interface(local, sdata); + if (WARN_ON(ret)) { + /* ok .. stupid driver, it asked for this! */ + kfree(sdata); + return ret; + } } set_bit(SDATA_STATE_RUNNING, &sdata->state); @@ -1187,9 +1187,6 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; - if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) - return; - ASSERT_RTNL(); lockdep_assert_wiphy(local->hw.wiphy); @@ -1209,7 +1206,8 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local) ieee80211_link_release_channel(&sdata->deflink); - drv_remove_interface(local, sdata); + if (ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) + drv_remove_interface(local, sdata); kfree(sdata); } diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 1132dea0e290..0965ad11ec74 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -423,6 +423,7 @@ u64 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata) BSS_CHANGED_ERP_SLOT; } +/* context: requires softirqs disabled */ void ieee80211_handle_queued_frames(struct ieee80211_local *local) { struct sk_buff *skb; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 8ecc4b710b0e..b5f2df61c7f6 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -358,7 +358,8 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_sub_if_data *sdata) struct cfg80211_scan_request *req; struct cfg80211_chan_def chandef; u8 bands_used = 0; - int i, ielen, n_chans; + int i, ielen; + u32 *n_chans; u32 flags = 0; req = rcu_dereference_protected(local->scan_req, @@ -368,34 +369,34 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_sub_if_data *sdata) return false; if (ieee80211_hw_check(&local->hw, SINGLE_SCAN_ON_ALL_BANDS)) { + local->hw_scan_req->req.n_channels = req->n_channels; + for (i = 0; i < req->n_channels; i++) { local->hw_scan_req->req.channels[i] = req->channels[i]; bands_used |= BIT(req->channels[i]->band); } - - n_chans = req->n_channels; } else { do { if (local->hw_scan_band == NUM_NL80211_BANDS) return false; - n_chans = 0; + n_chans = &local->hw_scan_req->req.n_channels; + *n_chans = 0; for (i = 0; i < req->n_channels; i++) { if (req->channels[i]->band != local->hw_scan_band) continue; - local->hw_scan_req->req.channels[n_chans] = + local->hw_scan_req->req.channels[(*n_chans)++] = req->channels[i]; - n_chans++; + bands_used |= BIT(req->channels[i]->band); } local->hw_scan_band++; - } while (!n_chans); + } while (!*n_chans); } - local->hw_scan_req->req.n_channels = n_chans; ieee80211_prepare_scan_chandef(&chandef); if (req->flags & NL80211_SCAN_FLAG_MIN_PREQ_CONTENT) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 283bfc99417e..771c05640aa3 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1567,7 +1567,9 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata, void ieee80211_stop_device(struct ieee80211_local *local) { + local_bh_disable(); ieee80211_handle_queued_frames(local); + local_bh_enable(); ieee80211_led_radio(local, false); ieee80211_mod_tpt_led_trig(local, 0, IEEE80211_TPT_LEDTRIG_FL_RADIO); @@ -1843,7 +1845,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) /* add interfaces */ sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); - if (sdata) { + if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) { /* in HW restart it exists already */ WARN_ON(local->resuming); res = drv_add_interface(local, sdata); diff --git a/net/mac802154/main.c b/net/mac802154/main.c index 9ab7396668d2..21b7c3b280b4 100644 --- a/net/mac802154/main.c +++ b/net/mac802154/main.c @@ -161,8 +161,10 @@ void ieee802154_configure_durations(struct wpan_phy *phy, } phy->symbol_duration = duration; - phy->lifs_period = (IEEE802154_LIFS_PERIOD * phy->symbol_duration) / NSEC_PER_SEC; - phy->sifs_period = (IEEE802154_SIFS_PERIOD * phy->symbol_duration) / NSEC_PER_SEC; + phy->lifs_period = + (IEEE802154_LIFS_PERIOD * phy->symbol_duration) / NSEC_PER_USEC; + phy->sifs_period = + (IEEE802154_SIFS_PERIOD * phy->symbol_duration) / NSEC_PER_USEC; } EXPORT_SYMBOL(ieee802154_configure_durations); @@ -184,10 +186,10 @@ static void ieee802154_setup_wpan_phy_pib(struct wpan_phy *wpan_phy) * Should be done when all drivers sets this value. */ - wpan_phy->lifs_period = - (IEEE802154_LIFS_PERIOD * wpan_phy->symbol_duration) / 1000; - wpan_phy->sifs_period = - (IEEE802154_SIFS_PERIOD * wpan_phy->symbol_duration) / 1000; + wpan_phy->lifs_period = (IEEE802154_LIFS_PERIOD * + wpan_phy->symbol_duration) / NSEC_PER_USEC; + wpan_phy->sifs_period = (IEEE802154_SIFS_PERIOD * + wpan_phy->symbol_duration) / NSEC_PER_USEC; } int ieee802154_register_hw(struct ieee802154_hw *hw) diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c index 2a6f1ed763c9..6fbed5bb5c3e 100644 --- a/net/mac802154/tx.c +++ b/net/mac802154/tx.c @@ -34,8 +34,8 @@ void ieee802154_xmit_sync_worker(struct work_struct *work) if (res) goto err_tx; - dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; + DEV_STATS_INC(dev, tx_packets); + DEV_STATS_ADD(dev, tx_bytes, skb->len); ieee802154_xmit_complete(&local->hw, skb, false); @@ -90,8 +90,8 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb) if (ret) goto err_wake_netif_queue; - dev->stats.tx_packets++; - dev->stats.tx_bytes += len; + DEV_STATS_INC(dev, tx_packets); + DEV_STATS_ADD(dev, tx_bytes, len); } else { local->tx_skb = skb; queue_work(local->workqueue, &local->sync_tx_work); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 3126911f5042..b00fc285b334 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -815,12 +815,21 @@ int __init netfilter_init(void) if (ret < 0) goto err; +#ifdef CONFIG_LWTUNNEL + ret = netfilter_lwtunnel_init(); + if (ret < 0) + goto err_lwtunnel_pernet; +#endif ret = netfilter_log_init(); if (ret < 0) - goto err_pernet; + goto err_log_pernet; return 0; -err_pernet: +err_log_pernet: +#ifdef CONFIG_LWTUNNEL + netfilter_lwtunnel_fini(); +err_lwtunnel_pernet: +#endif unregister_pernet_subsys(&netfilter_net_ops); err: return ret; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index c7ae4d9bf3d2..61431690cbd5 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -53,12 +53,13 @@ MODULE_DESCRIPTION("core IP set support"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); /* When the nfnl mutex or ip_set_ref_lock is held: */ -#define ip_set_dereference(p) \ - rcu_dereference_protected(p, \ +#define ip_set_dereference(inst) \ + rcu_dereference_protected((inst)->ip_set_list, \ lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \ - lockdep_is_held(&ip_set_ref_lock)) + lockdep_is_held(&ip_set_ref_lock) || \ + (inst)->is_deleted) #define ip_set(inst, id) \ - ip_set_dereference((inst)->ip_set_list)[id] + ip_set_dereference(inst)[id] #define ip_set_ref_netlink(inst,id) \ rcu_dereference_raw((inst)->ip_set_list)[id] #define ip_set_dereference_nfnl(p) \ @@ -1133,7 +1134,7 @@ static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info, if (!list) goto cleanup; /* nfnl mutex is held, both lists are valid */ - tmp = ip_set_dereference(inst->ip_set_list); + tmp = ip_set_dereference(inst); memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max); rcu_assign_pointer(inst->ip_set_list, list); /* Make sure all current packets have passed through */ diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 74112e9c5dab..6c40bdf8b05a 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -22,9 +22,6 @@ #include <net/netfilter/nf_conntrack_acct.h> #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_conntrack_timestamp.h> -#ifdef CONFIG_LWTUNNEL -#include <net/netfilter/nf_hooks_lwtunnel.h> -#endif #include <linux/rculist_nulls.h> static bool enable_hooks __read_mostly; @@ -612,9 +609,6 @@ enum nf_ct_sysctl_index { NF_SYSCTL_CT_PROTO_TIMEOUT_GRE, NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM, #endif -#ifdef CONFIG_LWTUNNEL - NF_SYSCTL_CT_LWTUNNEL, -#endif NF_SYSCTL_CT_LAST_SYSCTL, }; @@ -946,15 +940,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { .proc_handler = proc_dointvec_jiffies, }, #endif -#ifdef CONFIG_LWTUNNEL - [NF_SYSCTL_CT_LWTUNNEL] = { - .procname = "nf_hooks_lwtunnel", - .data = NULL, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = nf_hooks_lwtunnel_sysctl_handler, - }, -#endif }; static struct ctl_table nf_ct_netfilter_table[] = { diff --git a/net/netfilter/nf_hooks_lwtunnel.c b/net/netfilter/nf_hooks_lwtunnel.c index 00e89ffd78f6..d8ebebc9775d 100644 --- a/net/netfilter/nf_hooks_lwtunnel.c +++ b/net/netfilter/nf_hooks_lwtunnel.c @@ -3,6 +3,9 @@ #include <linux/sysctl.h> #include <net/lwtunnel.h> #include <net/netfilter/nf_hooks_lwtunnel.h> +#include <linux/netfilter.h> + +#include "nf_internals.h" static inline int nf_hooks_lwtunnel_get(void) { @@ -50,4 +53,71 @@ int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write, return ret; } EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_sysctl_handler); + +static struct ctl_table nf_lwtunnel_sysctl_table[] = { + { + .procname = "nf_hooks_lwtunnel", + .data = NULL, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = nf_hooks_lwtunnel_sysctl_handler, + }, +}; + +static int __net_init nf_lwtunnel_net_init(struct net *net) +{ + struct ctl_table_header *hdr; + struct ctl_table *table; + + table = nf_lwtunnel_sysctl_table; + if (!net_eq(net, &init_net)) { + table = kmemdup(nf_lwtunnel_sysctl_table, + sizeof(nf_lwtunnel_sysctl_table), + GFP_KERNEL); + if (!table) + goto err_alloc; + } + + hdr = register_net_sysctl_sz(net, "net/netfilter", table, + ARRAY_SIZE(nf_lwtunnel_sysctl_table)); + if (!hdr) + goto err_reg; + + net->nf.nf_lwtnl_dir_header = hdr; + + return 0; +err_reg: + if (!net_eq(net, &init_net)) + kfree(table); +err_alloc: + return -ENOMEM; +} + +static void __net_exit nf_lwtunnel_net_exit(struct net *net) +{ + const struct ctl_table *table; + + table = net->nf.nf_lwtnl_dir_header->ctl_table_arg; + unregister_net_sysctl_table(net->nf.nf_lwtnl_dir_header); + if (!net_eq(net, &init_net)) + kfree(table); +} + +static struct pernet_operations nf_lwtunnel_net_ops = { + .init = nf_lwtunnel_net_init, + .exit = nf_lwtunnel_net_exit, +}; + +int __init netfilter_lwtunnel_init(void) +{ + return register_pernet_subsys(&nf_lwtunnel_net_ops); +} + +void netfilter_lwtunnel_fini(void) +{ + unregister_pernet_subsys(&nf_lwtunnel_net_ops); +} +#else +int __init netfilter_lwtunnel_init(void) { return 0; } +void netfilter_lwtunnel_fini(void) {} #endif /* CONFIG_SYSCTL */ diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 832ae64179f0..25403023060b 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -29,6 +29,12 @@ void nf_queue_nf_hook_drop(struct net *net); /* nf_log.c */ int __init netfilter_log_init(void); +#ifdef CONFIG_LWTUNNEL +/* nf_hooks_lwtunnel.c */ +int __init netfilter_lwtunnel_init(void); +void netfilter_lwtunnel_fini(void); +#endif + /* core.c */ void nf_hook_entries_delete_raw(struct nf_hook_entries __rcu **pp, const struct nf_hook_ops *reg); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index be3b4c90d2ed..91cc3a81ba8f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3823,6 +3823,15 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r nf_tables_rule_destroy(ctx, rule); } +/** nft_chain_validate - loop detection and hook validation + * + * @ctx: context containing call depth and base chain + * @chain: chain to validate + * + * Walk through the rules of the given chain and chase all jumps/gotos + * and set lookups until either the jump limit is hit or all reachable + * chains have been validated. + */ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) { struct nft_expr *expr, *last; @@ -3844,6 +3853,9 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) if (!expr->ops->validate) continue; + /* This may call nft_chain_validate() recursively, + * callers that do so must increment ctx->level. + */ err = expr->ops->validate(ctx, expr, &data); if (err < 0) return err; @@ -5740,8 +5752,7 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && nft_data_dump(skb, NFTA_SET_ELEM_DATA, nft_set_ext_data(ext), - set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE, - set->dlen) < 0) + nft_set_datatype(set), set->dlen) < 0) goto nla_put_failure; if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS) && @@ -10810,150 +10821,6 @@ int nft_chain_validate_hooks(const struct nft_chain *chain, } EXPORT_SYMBOL_GPL(nft_chain_validate_hooks); -/* - * Loop detection - walk through the ruleset beginning at the destination chain - * of a new jump until either the source chain is reached (loop) or all - * reachable chains have been traversed. - * - * The loop check is performed whenever a new jump verdict is added to an - * expression or verdict map or a verdict map is bound to a new chain. - */ - -static int nf_tables_check_loops(const struct nft_ctx *ctx, - const struct nft_chain *chain); - -static int nft_check_loops(const struct nft_ctx *ctx, - const struct nft_set_ext *ext) -{ - const struct nft_data *data; - int ret; - - data = nft_set_ext_data(ext); - switch (data->verdict.code) { - case NFT_JUMP: - case NFT_GOTO: - ret = nf_tables_check_loops(ctx, data->verdict.chain); - break; - default: - ret = 0; - break; - } - - return ret; -} - -static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx, - struct nft_set *set, - const struct nft_set_iter *iter, - struct nft_elem_priv *elem_priv) -{ - const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); - - if (!nft_set_elem_active(ext, iter->genmask)) - return 0; - - if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && - *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END) - return 0; - - return nft_check_loops(ctx, ext); -} - -static int nft_set_catchall_loops(const struct nft_ctx *ctx, - struct nft_set *set) -{ - u8 genmask = nft_genmask_next(ctx->net); - struct nft_set_elem_catchall *catchall; - struct nft_set_ext *ext; - int ret = 0; - - list_for_each_entry_rcu(catchall, &set->catchall_list, list) { - ext = nft_set_elem_ext(set, catchall->elem); - if (!nft_set_elem_active(ext, genmask)) - continue; - - ret = nft_check_loops(ctx, ext); - if (ret < 0) - return ret; - } - - return ret; -} - -static int nf_tables_check_loops(const struct nft_ctx *ctx, - const struct nft_chain *chain) -{ - const struct nft_rule *rule; - const struct nft_expr *expr, *last; - struct nft_set *set; - struct nft_set_binding *binding; - struct nft_set_iter iter; - - if (ctx->chain == chain) - return -ELOOP; - - if (fatal_signal_pending(current)) - return -EINTR; - - list_for_each_entry(rule, &chain->rules, list) { - nft_rule_for_each_expr(expr, last, rule) { - struct nft_immediate_expr *priv; - const struct nft_data *data; - int err; - - if (strcmp(expr->ops->type->name, "immediate")) - continue; - - priv = nft_expr_priv(expr); - if (priv->dreg != NFT_REG_VERDICT) - continue; - - data = &priv->data; - switch (data->verdict.code) { - case NFT_JUMP: - case NFT_GOTO: - err = nf_tables_check_loops(ctx, - data->verdict.chain); - if (err < 0) - return err; - break; - default: - break; - } - } - } - - list_for_each_entry(set, &ctx->table->sets, list) { - if (!nft_is_active_next(ctx->net, set)) - continue; - if (!(set->flags & NFT_SET_MAP) || - set->dtype != NFT_DATA_VERDICT) - continue; - - list_for_each_entry(binding, &set->bindings, list) { - if (!(binding->flags & NFT_SET_MAP) || - binding->chain != chain) - continue; - - iter.genmask = nft_genmask_next(ctx->net); - iter.type = NFT_ITER_UPDATE; - iter.skip = 0; - iter.count = 0; - iter.err = 0; - iter.fn = nf_tables_loop_check_setelem; - - set->ops->walk(ctx, set, &iter); - if (!iter.err) - iter.err = nft_set_catchall_loops(ctx, set); - - if (iter.err < 0) - return iter.err; - } - } - - return 0; -} - /** * nft_parse_u32_check - fetch u32 attribute and check for maximum value * @@ -11066,13 +10933,16 @@ static int nft_validate_register_store(const struct nft_ctx *ctx, if (data != NULL && (data->verdict.code == NFT_GOTO || data->verdict.code == NFT_JUMP)) { - err = nf_tables_check_loops(ctx, data->verdict.chain); + err = nft_chain_validate(ctx, data->verdict.chain); if (err < 0) return err; } return 0; default: + if (type != NFT_DATA_VALUE) + return -EINVAL; + if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE) return -EINVAL; if (len == 0) @@ -11081,8 +10951,6 @@ static int nft_validate_register_store(const struct nft_ctx *ctx, sizeof_field(struct nft_regs, data)) return -ERANGE; - if (data != NULL && type != NFT_DATA_VALUE) - return -EINVAL; return 0; } } @@ -11483,8 +11351,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, gc_seq = nft_gc_seq_begin(nft_net); - if (!list_empty(&nf_tables_destroy_list)) - nf_tables_trans_destroy_flush_work(); + nf_tables_trans_destroy_flush_work(); again: list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_has_owner(table) && diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index f1c31757e496..55e28e1da66e 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -325,7 +325,7 @@ static void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) hooks = nf_hook_entries_head(net, pf, entry->state.hook); i = entry->hook_index; - if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) { + if (!hooks || i >= hooks->num_hook_entries) { kfree_skb_reason(skb, SKB_DROP_REASON_NETFILTER_DROP); nf_queue_entry_free(entry); return; diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index b314ca728a29..f3080fa1b226 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -132,7 +132,8 @@ static int nft_lookup_init(const struct nft_ctx *ctx, return -EINVAL; err = nft_parse_register_store(ctx, tb[NFTA_LOOKUP_DREG], - &priv->dreg, NULL, set->dtype, + &priv->dreg, NULL, + nft_set_datatype(set), set->dlen); if (err < 0) return err; diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index 4e7c968cde2d..5e3ca068f04e 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c @@ -121,7 +121,8 @@ static void nr_heartbeat_expiry(struct timer_list *t) is accepted() it isn't 'dead' so doesn't get removed. */ if (sock_flag(sk, SOCK_DESTROY) || (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { - sock_hold(sk); + if (sk->sk_state == TCP_LISTEN) + sock_hold(sk); bh_unlock_sock(sk); nr_destroy_socket(sk); goto out; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 2928c142a2dd..3b980bf2770b 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -168,8 +168,13 @@ static u32 ovs_ct_get_mark(const struct nf_conn *ct) static void ovs_ct_get_labels(const struct nf_conn *ct, struct ovs_key_ct_labels *labels) { - struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL; + struct nf_conn_labels *cl = NULL; + if (ct) { + if (ct->master && !nf_ct_is_confirmed(ct)) + ct = ct->master; + cl = nf_ct_labels_find(ct); + } if (cl) memcpy(labels, cl->bits, OVS_CT_LABELS_LEN); else diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 9ee622fb1160..2520708b06a1 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -830,7 +830,6 @@ int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, u32 max; if (*index) { -again: rcu_read_lock(); p = idr_find(&idrinfo->action_idr, *index); @@ -839,7 +838,7 @@ again: * index but did not assign the pointer yet. */ rcu_read_unlock(); - goto again; + return -EAGAIN; } if (!p) { diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index baac083fd8f1..6fa3cca87d34 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -41,21 +41,26 @@ static struct workqueue_struct *act_ct_wq; static struct rhashtable zones_ht; static DEFINE_MUTEX(zones_mutex); +struct zones_ht_key { + struct net *net; + u16 zone; +}; + struct tcf_ct_flow_table { struct rhash_head node; /* In zones tables */ struct rcu_work rwork; struct nf_flowtable nf_ft; refcount_t ref; - u16 zone; + struct zones_ht_key key; bool dying; }; static const struct rhashtable_params zones_params = { .head_offset = offsetof(struct tcf_ct_flow_table, node), - .key_offset = offsetof(struct tcf_ct_flow_table, zone), - .key_len = sizeof_field(struct tcf_ct_flow_table, zone), + .key_offset = offsetof(struct tcf_ct_flow_table, key), + .key_len = sizeof_field(struct tcf_ct_flow_table, key), .automatic_shrinking = true, }; @@ -316,11 +321,12 @@ static struct nf_flowtable_type flowtable_ct = { static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params) { + struct zones_ht_key key = { .net = net, .zone = params->zone }; struct tcf_ct_flow_table *ct_ft; int err = -ENOMEM; mutex_lock(&zones_mutex); - ct_ft = rhashtable_lookup_fast(&zones_ht, ¶ms->zone, zones_params); + ct_ft = rhashtable_lookup_fast(&zones_ht, &key, zones_params); if (ct_ft && refcount_inc_not_zero(&ct_ft->ref)) goto out_unlock; @@ -329,7 +335,7 @@ static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params) goto err_alloc; refcount_set(&ct_ft->ref, 1); - ct_ft->zone = params->zone; + ct_ft->key = key; err = rhashtable_insert_fast(&zones_ht, &ct_ft->node, zones_params); if (err) goto err_insert; @@ -1071,6 +1077,14 @@ do_nat: */ if (nf_conntrack_confirm(skb) != NF_ACCEPT) goto drop; + + /* The ct may be dropped if a clash has been resolved, + * so it's necessary to retrieve it from skb again to + * prevent UAF. + */ + ct = nf_ct_get(skb, &ctinfo); + if (!ct) + skip_add = true; } if (!skip_add) diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index c2ef9dcf91d2..cc6051d4f2ef 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -91,7 +91,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt, entry = tcx_entry_fetch_or_create(dev, true, &created); if (!entry) return -ENOMEM; - tcx_miniq_set_active(entry, true); + tcx_miniq_inc(entry); mini_qdisc_pair_init(&q->miniqp, sch, &tcx_entry(entry)->miniq); if (created) tcx_entry_update(dev, entry, true); @@ -121,7 +121,7 @@ static void ingress_destroy(struct Qdisc *sch) tcf_block_put_ext(q->block, sch, &q->block_info); if (entry) { - tcx_miniq_set_active(entry, false); + tcx_miniq_dec(entry); if (!tcx_entry_is_active(entry)) { tcx_entry_update(dev, NULL, true); tcx_entry_free(entry); @@ -257,7 +257,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt, entry = tcx_entry_fetch_or_create(dev, true, &created); if (!entry) return -ENOMEM; - tcx_miniq_set_active(entry, true); + tcx_miniq_inc(entry); mini_qdisc_pair_init(&q->miniqp_ingress, sch, &tcx_entry(entry)->miniq); if (created) tcx_entry_update(dev, entry, true); @@ -276,7 +276,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt, entry = tcx_entry_fetch_or_create(dev, false, &created); if (!entry) return -ENOMEM; - tcx_miniq_set_active(entry, true); + tcx_miniq_inc(entry); mini_qdisc_pair_init(&q->miniqp_egress, sch, &tcx_entry(entry)->miniq); if (created) tcx_entry_update(dev, entry, false); @@ -302,7 +302,7 @@ static void clsact_destroy(struct Qdisc *sch) tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info); if (ingress_entry) { - tcx_miniq_set_active(ingress_entry, false); + tcx_miniq_dec(ingress_entry); if (!tcx_entry_is_active(ingress_entry)) { tcx_entry_update(dev, NULL, true); tcx_entry_free(ingress_entry); @@ -310,7 +310,7 @@ static void clsact_destroy(struct Qdisc *sch) } if (egress_entry) { - tcx_miniq_set_active(egress_entry, false); + tcx_miniq_dec(egress_entry); if (!tcx_entry_is_active(egress_entry)) { tcx_entry_update(dev, NULL, false); tcx_entry_free(egress_entry); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 2b4b1276d4e8..d9cda1e53a01 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1557,9 +1557,11 @@ out_drop: */ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp) { + struct rpc_timeout timeout = { + .to_increment = 0, + }; struct rpc_task *task; int proc_error; - struct rpc_timeout timeout; /* Build the svc_rqst used by the common processing routine */ rqstp->rq_xid = req->rq_xid; @@ -1612,6 +1614,7 @@ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp) timeout.to_initval = req->rq_xprt->timeout->to_initval; timeout.to_retries = req->rq_xprt->timeout->to_retries; } + timeout.to_maxval = timeout.to_initval; memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf)); task = rpc_run_bc_task(req, &timeout); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index dfc353eea8ed..0e1691316f42 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2441,6 +2441,13 @@ static void xs_tcp_setup_socket(struct work_struct *work) transport->srcport = 0; status = -EAGAIN; break; + case -EPERM: + /* Happens, for instance, if a BPF program is preventing + * the connect. Remap the error so upper layers can better + * deal with it. + */ + status = -ECONNREFUSED; + fallthrough; case -EINVAL: /* Happens, for instance, if the user specified a link * local IPv6 address without a scope-id. diff --git a/net/tipc/node.c b/net/tipc/node.c index c1e890a82434..500320e5ca47 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2105,6 +2105,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) } else { n = tipc_node_find_by_id(net, ehdr->id); } + skb_dst_force(skb); tipc_crypto_rcv(net, (n) ? n->crypto_rx : NULL, &skb, b); if (!skb) return; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 5e695a9a609c..142f56770b77 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2613,10 +2613,24 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, { struct unix_sock *u = unix_sk(sk); - if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) { - skb_unlink(skb, &sk->sk_receive_queue); - consume_skb(skb); - skb = NULL; + if (!unix_skb_len(skb)) { + struct sk_buff *unlinked_skb = NULL; + + spin_lock(&sk->sk_receive_queue.lock); + + if (copied && (!u->oob_skb || skb == u->oob_skb)) { + skb = NULL; + } else if (flags & MSG_PEEK) { + skb = skb_peek_next(skb, &sk->sk_receive_queue); + } else { + unlinked_skb = skb; + skb = skb_peek_next(skb, &sk->sk_receive_queue); + __skb_unlink(unlinked_skb, &sk->sk_receive_queue); + } + + spin_unlock(&sk->sk_receive_queue.lock); + + consume_skb(unlinked_skb); } else { struct sk_buff *unlinked_skb = NULL; @@ -3093,12 +3107,23 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) #if IS_ENABLED(CONFIG_AF_UNIX_OOB) case SIOCATMARK: { + struct unix_sock *u = unix_sk(sk); struct sk_buff *skb; int answ = 0; + mutex_lock(&u->iolock); + skb = skb_peek(&sk->sk_receive_queue); - if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb)) - answ = 1; + if (skb) { + struct sk_buff *oob_skb = READ_ONCE(u->oob_skb); + + if (skb == oob_skb || + (!oob_skb && !unix_skb_len(skb))) + answ = 1; + } + + mutex_unlock(&u->iolock); + err = put_user(answ, (int __user *)arg); } break; diff --git a/net/unix/garbage.c b/net/unix/garbage.c index dfe94a90ece4..23efb78fe9ef 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -476,6 +476,7 @@ prev_vertex: } if (vertex->index == vertex->scc_index) { + struct unix_vertex *v; struct list_head scc; bool scc_dead = true; @@ -486,15 +487,15 @@ prev_vertex: */ __list_cut_position(&scc, &vertex_stack, &vertex->scc_entry); - list_for_each_entry_reverse(vertex, &scc, scc_entry) { + list_for_each_entry_reverse(v, &scc, scc_entry) { /* Don't restart DFS from this vertex in unix_walk_scc(). */ - list_move_tail(&vertex->entry, &unix_visited_vertices); + list_move_tail(&v->entry, &unix_visited_vertices); /* Mark vertex as off-stack. */ - vertex->index = unix_vertex_grouped_index; + v->index = unix_vertex_grouped_index; if (scc_dead) - scc_dead = unix_vertex_dead(vertex); + scc_dead = unix_vertex_dead(v); } if (scc_dead) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 3c0bca4238d3..72c7bf558581 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -468,6 +468,10 @@ static const struct netlink_range_validation nl80211_punct_bitmap_range = { .max = 0xffff, }; +static const struct netlink_range_validation q_range = { + .max = INT_MAX, +}; + static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD }, [NL80211_ATTR_WIPHY] = { .type = NLA_U32 }, @@ -754,7 +758,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_TXQ_LIMIT] = { .type = NLA_U32 }, [NL80211_ATTR_TXQ_MEMORY_LIMIT] = { .type = NLA_U32 }, - [NL80211_ATTR_TXQ_QUANTUM] = { .type = NLA_U32 }, + [NL80211_ATTR_TXQ_QUANTUM] = NLA_POLICY_FULL_RANGE(NLA_U32, &q_range), [NL80211_ATTR_HE_CAPABILITY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_he_capa, NL80211_HE_MAX_CAPABILITY_LEN), diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 2f2a3163968a..0222ede0feb6 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -3416,10 +3416,14 @@ int cfg80211_wext_siwscan(struct net_device *dev, wiphy = &rdev->wiphy; /* Determine number of channels, needed to allocate creq */ - if (wreq && wreq->num_channels) + if (wreq && wreq->num_channels) { + /* Passed from userspace so should be checked */ + if (unlikely(wreq->num_channels > IW_MAX_FREQUENCIES)) + return -EINVAL; n_channels = wreq->num_channels; - else + } else { n_channels = ieee80211_get_num_supported_channels(wiphy); + } creq = kzalloc(sizeof(*creq) + sizeof(struct cfg80211_ssid) + n_channels * sizeof(void *), @@ -3493,8 +3497,10 @@ int cfg80211_wext_siwscan(struct net_device *dev, memcpy(creq->ssids[0].ssid, wreq->essid, wreq->essid_len); creq->ssids[0].ssid_len = wreq->essid_len; } - if (wreq->scan_type == IW_SCAN_TYPE_PASSIVE) + if (wreq->scan_type == IW_SCAN_TYPE_PASSIVE) { + creq->ssids = NULL; creq->n_ssids = 0; + } } for (i = 0; i < NUM_NL80211_BANDS; i++) diff --git a/rust/kernel/alloc/vec_ext.rs b/rust/kernel/alloc/vec_ext.rs index e9a81052728a..1297a4be32e8 100644 --- a/rust/kernel/alloc/vec_ext.rs +++ b/rust/kernel/alloc/vec_ext.rs @@ -4,7 +4,6 @@ use super::{AllocError, Flags}; use alloc::vec::Vec; -use core::ptr; /// Extensions to [`Vec`]. pub trait VecExt<T>: Sized { @@ -141,7 +140,11 @@ impl<T> VecExt<T> for Vec<T> { // `krealloc_aligned`. A `Vec<T>`'s `ptr` value is not guaranteed to be NULL and might be // dangling after being created with `Vec::new`. Instead, we can rely on `Vec<T>`'s capacity // to be zero if no memory has been allocated yet. - let ptr = if cap == 0 { ptr::null_mut() } else { old_ptr }; + let ptr = if cap == 0 { + core::ptr::null_mut() + } else { + old_ptr + }; // SAFETY: `ptr` is valid because it's either NULL or comes from a previous call to // `krealloc_aligned`. We also verified that the type is not a ZST. diff --git a/scripts/Makefile.dtbinst b/scripts/Makefile.dtbinst index 67956f6496a5..9d920419a62c 100644 --- a/scripts/Makefile.dtbinst +++ b/scripts/Makefile.dtbinst @@ -17,7 +17,7 @@ include $(srctree)/scripts/Kbuild.include dst := $(INSTALL_DTBS_PATH) quiet_cmd_dtb_install = INSTALL $@ - cmd_dtb_install = install -D $< $@ + cmd_dtb_install = install -D -m 0644 $< $@ $(dst)/%: $(obj)/% $(call cmd,dtb_install) diff --git a/scripts/Makefile.host b/scripts/Makefile.host index d35f55e0d141..e85be7721a48 100644 --- a/scripts/Makefile.host +++ b/scripts/Makefile.host @@ -146,7 +146,7 @@ $(call multi_depend, $(host-cxxmulti), , -objs -cxxobjs) # Create .o file from a single .cc (C++) file quiet_cmd_host-cxxobjs = HOSTCXX $@ cmd_host-cxxobjs = $(HOSTCXX) $(hostcxx_flags) -c -o $@ $< -$(host-cxxobjs): $(obj)/%.o: $(src)/%.cc FORCE +$(host-cxxobjs): $(obj)/%.o: $(obj)/%.cc FORCE $(call if_changed_dep,host-cxxobjs) # Create executable from a single Rust crate (which may consist of diff --git a/scripts/Makefile.package b/scripts/Makefile.package index 38653f3e8108..bf016af8bf8a 100644 --- a/scripts/Makefile.package +++ b/scripts/Makefile.package @@ -103,7 +103,7 @@ debian-orig: private version = $(shell dpkg-parsechangelog -S Version | sed 's/- debian-orig: private orig-name = $(source)_$(version).orig.tar$(debian-orig-suffix) debian-orig: mkdebian-opts = --need-source debian-orig: linux.tar$(debian-orig-suffix) debian - $(Q)if [ "$(df --output=target .. 2>/dev/null)" = "$(df --output=target $< 2>/dev/null)" ]; then \ + $(Q)if [ "$$(df --output=target .. 2>/dev/null)" = "$$(df --output=target $< 2>/dev/null)" ]; then \ ln -f $< ../$(orig-name); \ else \ cp $< ../$(orig-name); \ diff --git a/scripts/gdb/linux/Makefile b/scripts/gdb/linux/Makefile index fd1402c0a1a1..fcd32fcf3ae0 100644 --- a/scripts/gdb/linux/Makefile +++ b/scripts/gdb/linux/Makefile @@ -5,7 +5,7 @@ ifdef building_out_of_srctree symlinks := $(patsubst $(src)/%,%,$(wildcard $(src)/*.py)) quiet_cmd_symlink = SYMLINK $@ - cmd_symlink = ln -fsn $(patsubst $(obj)/%,$(src)/%,$@) $@ + cmd_symlink = ln -fsn $(patsubst $(obj)/%,$(abspath $(src))/%,$@) $@ always-y += $(symlinks) $(addprefix $(obj)/, $(symlinks)): FORCE diff --git a/scripts/ld-version.sh b/scripts/ld-version.sh index a78b804b680c..b9513d224476 100755 --- a/scripts/ld-version.sh +++ b/scripts/ld-version.sh @@ -57,9 +57,11 @@ else fi fi -# Some distributions append a package release number, as in 2.34-4.fc32 -# Trim the hyphen and any characters that follow. -version=${version%-*} +# There may be something after the version, such as a distribution's package +# release number (like Fedora's "2.34-4.fc32") or punctuation (like LLD briefly +# added before the "compatible with GNU linkers" string), so remove everything +# after just numbers and periods. +version=${version%%[!0-9.]*} cversion=$(get_canonical_version $version) min_cversion=$(get_canonical_version $min_version) diff --git a/scripts/package/kernel.spec b/scripts/package/kernel.spec index e095eb1e290e..c52d517b9364 100644 --- a/scripts/package/kernel.spec +++ b/scripts/package/kernel.spec @@ -57,7 +57,8 @@ patch -p1 < %{SOURCE2} %install mkdir -p %{buildroot}/lib/modules/%{KERNELRELEASE} cp $(%{make} %{makeflags} -s image_name) %{buildroot}/lib/modules/%{KERNELRELEASE}/vmlinuz -%{make} %{makeflags} INSTALL_MOD_PATH=%{buildroot} modules_install +# DEPMOD=true makes depmod no-op. We do not package depmod-generated files. +%{make} %{makeflags} INSTALL_MOD_PATH=%{buildroot} DEPMOD=true modules_install %{make} %{makeflags} INSTALL_HDR_PATH=%{buildroot}/usr headers_install cp System.map %{buildroot}/lib/modules/%{KERNELRELEASE} cp .config %{buildroot}/lib/modules/%{KERNELRELEASE}/config @@ -70,10 +71,7 @@ ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEA %endif { - for x in System.map config kernel modules.builtin \ - modules.builtin.modinfo modules.order vmlinuz; do - echo "/lib/modules/%{KERNELRELEASE}/${x}" - done + echo "/lib/modules/%{KERNELRELEASE}" for x in alias alias.bin builtin.alias.bin builtin.bin dep dep.bin \ devname softdep symbols symbols.bin; do @@ -85,7 +83,6 @@ ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEA done if [ -d "%{buildroot}/lib/modules/%{KERNELRELEASE}/dtb" ];then - echo "/lib/modules/%{KERNELRELEASE}/dtb" find "%{buildroot}/lib/modules/%{KERNELRELEASE}/dtb" -printf "%%%ghost /boot/dtb-%{KERNELRELEASE}/%%P\n" fi diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index effbf5982be1..2cff851ebfd7 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -255,21 +255,6 @@ config INIT_ON_FREE_DEFAULT_ON touching "cold" memory areas. Most cases see 3-5% impact. Some synthetic workloads have measured as high as 8%. -config INIT_MLOCKED_ON_FREE_DEFAULT_ON - bool "Enable mlocked memory zeroing on free" - depends on !KMSAN - help - This config has the effect of setting "init_mlocked_on_free=1" - on the kernel command line. If it is enabled, all mlocked process - memory is zeroed when freed. This restriction to mlocked memory - improves performance over "init_on_free" but can still be used to - protect confidential data like key material from content exposures - to other processes, as well as live forensics and cold boot attacks. - Any non-mlocked memory is not cleared before it is reassigned. This - configuration can be overwritten by setting "init_mlocked_on_free=0" - on the command line. The "init_on_free" boot option takes - precedence over "init_mlocked_on_free". - config CC_HAS_ZERO_CALL_USED_REGS def_bool $(cc-option,-fzero-call-used-regs=used-gpr) # https://github.com/ClangBuiltLinux/linux/issues/1766 diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c index 45beb1c5f747..6b5181c668b5 100644 --- a/security/apparmor/audit.c +++ b/security/apparmor/audit.c @@ -217,7 +217,7 @@ void aa_audit_rule_free(void *vrule) } } -int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) +int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule, gfp_t gfp) { struct aa_audit_rule *rule; @@ -230,14 +230,14 @@ int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) return -EINVAL; } - rule = kzalloc(sizeof(struct aa_audit_rule), GFP_KERNEL); + rule = kzalloc(sizeof(struct aa_audit_rule), gfp); if (!rule) return -ENOMEM; /* Currently rules are treated as coming from the root ns */ rule->label = aa_label_parse(&root_ns->unconfined->label, rulestr, - GFP_KERNEL, true, false); + gfp, true, false); if (IS_ERR(rule->label)) { int err = PTR_ERR(rule->label); aa_audit_rule_free(rule); diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h index acbb03b9bd25..0c8cc86b417b 100644 --- a/security/apparmor/include/audit.h +++ b/security/apparmor/include/audit.h @@ -200,7 +200,7 @@ static inline int complain_error(int error) } void aa_audit_rule_free(void *vrule); -int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule); +int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule, gfp_t gfp); int aa_audit_rule_known(struct audit_krule *rule); int aa_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule); diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index 3e568126cd48..c51e24d24d1e 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -546,7 +546,7 @@ static inline void ima_free_modsig(struct modsig *modsig) #else static inline int ima_filter_rule_init(u32 field, u32 op, char *rulestr, - void **lsmrule) + void **lsmrule, gfp_t gfp) { return -EINVAL; } diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c index abdd22007ed8..e4a79a9b2d58 100644 --- a/security/integrity/ima/ima_fs.c +++ b/security/integrity/ima/ima_fs.c @@ -427,8 +427,6 @@ static void __init remove_securityfs_measurement_lists(struct dentry **lists) kfree(lists); } - - securityfs_measurement_list_count = 0; } static int __init create_securityfs_measurement_lists(void) @@ -625,6 +623,7 @@ out: securityfs_remove(binary_runtime_measurements); remove_securityfs_measurement_lists(ascii_securityfs_measurement_lists); remove_securityfs_measurement_lists(binary_securityfs_measurement_lists); + securityfs_measurement_list_count = 0; securityfs_remove(ima_symlink); securityfs_remove(ima_dir); diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index c0556907c2e6..09da8e639239 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -401,7 +401,8 @@ static void ima_free_rule(struct ima_rule_entry *entry) kfree(entry); } -static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry) +static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry, + gfp_t gfp) { struct ima_rule_entry *nentry; int i; @@ -410,7 +411,7 @@ static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry) * Immutable elements are copied over as pointers and data; only * lsm rules can change */ - nentry = kmemdup(entry, sizeof(*nentry), GFP_KERNEL); + nentry = kmemdup(entry, sizeof(*nentry), gfp); if (!nentry) return NULL; @@ -425,7 +426,8 @@ static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry) ima_filter_rule_init(nentry->lsm[i].type, Audit_equal, nentry->lsm[i].args_p, - &nentry->lsm[i].rule); + &nentry->lsm[i].rule, + gfp); if (!nentry->lsm[i].rule) pr_warn("rule for LSM \'%s\' is undefined\n", nentry->lsm[i].args_p); @@ -438,7 +440,7 @@ static int ima_lsm_update_rule(struct ima_rule_entry *entry) int i; struct ima_rule_entry *nentry; - nentry = ima_lsm_copy_rule(entry); + nentry = ima_lsm_copy_rule(entry, GFP_KERNEL); if (!nentry) return -ENOMEM; @@ -664,7 +666,7 @@ retry: } if (rc == -ESTALE && !rule_reinitialized) { - lsm_rule = ima_lsm_copy_rule(rule); + lsm_rule = ima_lsm_copy_rule(rule, GFP_ATOMIC); if (lsm_rule) { rule_reinitialized = true; goto retry; @@ -1140,7 +1142,8 @@ static int ima_lsm_rule_init(struct ima_rule_entry *entry, entry->lsm[lsm_rule].type = audit_type; result = ima_filter_rule_init(entry->lsm[lsm_rule].type, Audit_equal, entry->lsm[lsm_rule].args_p, - &entry->lsm[lsm_rule].rule); + &entry->lsm[lsm_rule].rule, + GFP_KERNEL); if (!entry->lsm[lsm_rule].rule) { pr_warn("rule for LSM \'%s\' is undefined\n", entry->lsm[lsm_rule].args_p); diff --git a/security/security.c b/security/security.c index e5da848c50b9..e5ca08789f74 100644 --- a/security/security.c +++ b/security/security.c @@ -5332,15 +5332,17 @@ void security_key_post_create_or_update(struct key *keyring, struct key *key, * @op: rule operator * @rulestr: rule context * @lsmrule: receive buffer for audit rule struct + * @gfp: GFP flag used for kmalloc * * Allocate and initialize an LSM audit rule structure. * * Return: Return 0 if @lsmrule has been successfully set, -EINVAL in case of * an invalid rule. */ -int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule) +int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule, + gfp_t gfp) { - return call_int_hook(audit_rule_init, field, op, rulestr, lsmrule); + return call_int_hook(audit_rule_init, field, op, rulestr, lsmrule, gfp); } /** diff --git a/security/selinux/include/audit.h b/security/selinux/include/audit.h index 52aca71210b4..29c7d4c86f6d 100644 --- a/security/selinux/include/audit.h +++ b/security/selinux/include/audit.h @@ -21,12 +21,14 @@ * @op: the operator the rule uses * @rulestr: the text "target" of the rule * @rule: pointer to the new rule structure returned via this + * @gfp: GFP flag used for kmalloc * * Returns 0 if successful, -errno if not. On success, the rule structure * will be allocated internally. The caller must free this structure with * selinux_audit_rule_free() after use. */ -int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **rule); +int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **rule, + gfp_t gfp); /** * selinux_audit_rule_free - free an selinux audit rule structure. diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index f20e1968b7f7..e33e55384b75 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -3507,7 +3507,8 @@ void selinux_audit_rule_free(void *vrule) } } -int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) +int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule, + gfp_t gfp) { struct selinux_state *state = &selinux_state; struct selinux_policy *policy; @@ -3548,7 +3549,7 @@ int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) return -EINVAL; } - tmprule = kzalloc(sizeof(struct selinux_audit_rule), GFP_KERNEL); + tmprule = kzalloc(sizeof(struct selinux_audit_rule), gfp); if (!tmprule) return -ENOMEM; context_init(&tmprule->au_ctxt); diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 70ba2841e181..f5cbec1e6a92 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4693,11 +4693,13 @@ static int smack_post_notification(const struct cred *w_cred, * @op: required testing operator (=, !=, >, <, ...) * @rulestr: smack label to be audited * @vrule: pointer to save our own audit rule representation + * @gfp: type of the memory for the allocation * * Prepare to audit cases where (@field @op @rulestr) is true. * The label to be audited is created if necessay. */ -static int smack_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) +static int smack_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule, + gfp_t gfp) { struct smack_known *skp; char **rule = (char **)vrule; diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c index b6684a074a59..39944a859ff6 100644 --- a/security/yama/yama_lsm.c +++ b/security/yama/yama_lsm.c @@ -111,6 +111,7 @@ static void report_access(const char *access, struct task_struct *target, /** * yama_relation_cleanup - remove invalid entries from the relation list + * @work: unused * */ static void yama_relation_cleanup(struct work_struct *work) diff --git a/sound/core/pcm_dmaengine.c b/sound/core/pcm_dmaengine.c index 12aa1cef11a1..cc5db93b9132 100644 --- a/sound/core/pcm_dmaengine.c +++ b/sound/core/pcm_dmaengine.c @@ -349,6 +349,16 @@ int snd_dmaengine_pcm_open_request_chan(struct snd_pcm_substream *substream, } EXPORT_SYMBOL_GPL(snd_dmaengine_pcm_open_request_chan); +int snd_dmaengine_pcm_sync_stop(struct snd_pcm_substream *substream) +{ + struct dmaengine_pcm_runtime_data *prtd = substream_to_prtd(substream); + + dmaengine_synchronize(prtd->dma_chan); + + return 0; +} +EXPORT_SYMBOL_GPL(snd_dmaengine_pcm_sync_stop); + /** * snd_dmaengine_pcm_close - Close a dmaengine based PCM substream * @substream: PCM substream @@ -358,6 +368,12 @@ EXPORT_SYMBOL_GPL(snd_dmaengine_pcm_open_request_chan); int snd_dmaengine_pcm_close(struct snd_pcm_substream *substream) { struct dmaengine_pcm_runtime_data *prtd = substream_to_prtd(substream); + struct dma_tx_state state; + enum dma_status status; + + status = dmaengine_tx_status(prtd->dma_chan, prtd->cookie, &state); + if (status == DMA_PAUSED) + dmaengine_terminate_async(prtd->dma_chan); dmaengine_synchronize(prtd->dma_chan); kfree(prtd); @@ -378,6 +394,12 @@ EXPORT_SYMBOL_GPL(snd_dmaengine_pcm_close); int snd_dmaengine_pcm_close_release_chan(struct snd_pcm_substream *substream) { struct dmaengine_pcm_runtime_data *prtd = substream_to_prtd(substream); + struct dma_tx_state state; + enum dma_status status; + + status = dmaengine_tx_status(prtd->dma_chan, prtd->cookie, &state); + if (status == DMA_PAUSED) + dmaengine_terminate_async(prtd->dma_chan); dmaengine_synchronize(prtd->dma_chan); dma_release_channel(prtd->dma_chan); diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 521ba56392a0..c152ccf32214 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -1775,6 +1775,8 @@ static int snd_pcm_pre_resume(struct snd_pcm_substream *substream, snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; + if (runtime->state != SNDRV_PCM_STATE_SUSPENDED) + return -EBADFD; if (!(runtime->info & SNDRV_PCM_INFO_RESUME)) return -ENOSYS; runtime->trigger_master = substream; diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c index 171fb75267af..e90b27a135e6 100644 --- a/sound/core/seq/seq_ump_convert.c +++ b/sound/core/seq/seq_ump_convert.c @@ -791,7 +791,8 @@ static int paf_ev_to_ump_midi2(const struct snd_seq_event *event, /* set up the MIDI2 RPN/NRPN packet data from the parsed info */ static void fill_rpn(struct snd_seq_ump_midi2_bank *cc, - union snd_ump_midi2_msg *data) + union snd_ump_midi2_msg *data, + unsigned char channel) { if (cc->rpn_set) { data->rpn.status = UMP_MSG_STATUS_RPN; @@ -808,6 +809,7 @@ static void fill_rpn(struct snd_seq_ump_midi2_bank *cc, } data->rpn.data = upscale_14_to_32bit((cc->cc_data_msb << 7) | cc->cc_data_lsb); + data->rpn.channel = channel; cc->cc_data_msb = cc->cc_data_lsb = 0; } @@ -855,7 +857,7 @@ static int cc_ev_to_ump_midi2(const struct snd_seq_event *event, cc->cc_data_lsb = val; if (!(cc->rpn_set || cc->nrpn_set)) return 0; // skip - fill_rpn(cc, data); + fill_rpn(cc, data, channel); return 1; } @@ -957,7 +959,7 @@ static int ctrl14_ev_to_ump_midi2(const struct snd_seq_event *event, cc->cc_data_lsb = lsb; if (!(cc->rpn_set || cc->nrpn_set)) return 0; // skip - fill_rpn(cc, data); + fill_rpn(cc, data, channel); return 1; } @@ -1018,7 +1020,7 @@ static int system_2p_ev_to_ump_midi2(const struct snd_seq_event *event, union snd_ump_midi2_msg *data, unsigned char status) { - return system_1p_ev_to_ump_midi1(event, dest_port, + return system_2p_ev_to_ump_midi1(event, dest_port, (union snd_ump_midi1_msg *)data, status); } @@ -1075,6 +1077,8 @@ static const struct seq_ev_to_ump seq_ev_ump_encoders[] = { system_ev_to_ump_midi1, system_ev_to_ump_midi2 }, { SNDRV_SEQ_EVENT_SENSING, UMP_SYSTEM_STATUS_ACTIVE_SENSING, system_ev_to_ump_midi1, system_ev_to_ump_midi2 }, + { SNDRV_SEQ_EVENT_RESET, UMP_SYSTEM_STATUS_RESET, + system_ev_to_ump_midi1, system_ev_to_ump_midi2 }, }; static const struct seq_ev_to_ump *find_ump_encoder(int type) diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c index 537863447358..478d2b50c571 100644 --- a/sound/hda/intel-dsp-config.c +++ b/sound/hda/intel-dsp-config.c @@ -18,7 +18,7 @@ static int dsp_driver; module_param(dsp_driver, int, 0444); -MODULE_PARM_DESC(dsp_driver, "Force the DSP driver for Intel DSP (0=auto, 1=legacy, 2=SST, 3=SOF)"); +MODULE_PARM_DESC(dsp_driver, "Force the DSP driver for Intel DSP (0=auto, 1=legacy, 2=SST, 3=SOF, 4=AVS)"); #define FLAG_SST BIT(0) #define FLAG_SOF BIT(1) diff --git a/sound/oss/dmasound/dmasound_core.c b/sound/oss/dmasound/dmasound_core.c index 164335d3c200..4b1baf4dd50e 100644 --- a/sound/oss/dmasound/dmasound_core.c +++ b/sound/oss/dmasound/dmasound_core.c @@ -204,6 +204,7 @@ module_param(numWriteBufs, int, 0); static unsigned int writeBufSize = DEFAULT_BUFF_SIZE ; /* in bytes */ module_param(writeBufSize, int, 0); +MODULE_DESCRIPTION("Atari/Amiga/Q40 core DMA sound driver"); MODULE_LICENSE("GPL"); static int sq_unit = -1; diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig index 0da625533afc..a3cf0725fc43 100644 --- a/sound/pci/hda/Kconfig +++ b/sound/pci/hda/Kconfig @@ -162,6 +162,7 @@ config SND_HDA_SCODEC_CS35L56_I2C depends on ACPI || COMPILE_TEST depends on SND_SOC select FW_CS_DSP + imply SERIAL_MULTI_INSTANTIATE select SND_HDA_GENERIC select SND_SOC_CS35L56_SHARED select SND_HDA_SCODEC_CS35L56 @@ -178,6 +179,7 @@ config SND_HDA_SCODEC_CS35L56_SPI depends on ACPI || COMPILE_TEST depends on SND_SOC select FW_CS_DSP + imply SERIAL_MULTI_INSTANTIATE select SND_HDA_GENERIC select SND_SOC_CS35L56_SHARED select SND_HDA_SCODEC_CS35L56 diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c index 6c49e5c6cd20..031703f010be 100644 --- a/sound/pci/hda/cs35l41_hda.c +++ b/sound/pci/hda/cs35l41_hda.c @@ -1495,7 +1495,7 @@ static void cs35l41_hda_unbind(struct device *dev, struct device *master, void * if (comps[cs35l41->index].dev == dev) { memset(&comps[cs35l41->index], 0, sizeof(*comps)); sleep_flags = lock_system_sleep(); - device_link_remove(&comps->codec->core.dev, cs35l41->dev); + device_link_remove(&cs35l41->codec->core.dev, cs35l41->dev); unlock_system_sleep(sleep_flags); } } @@ -2019,6 +2019,8 @@ void cs35l41_hda_remove(struct device *dev) { struct cs35l41_hda *cs35l41 = dev_get_drvdata(dev); + component_del(cs35l41->dev, &cs35l41_hda_comp_ops); + pm_runtime_get_sync(cs35l41->dev); pm_runtime_dont_use_autosuspend(cs35l41->dev); pm_runtime_disable(cs35l41->dev); @@ -2026,8 +2028,6 @@ void cs35l41_hda_remove(struct device *dev) if (cs35l41->halo_initialized) cs35l41_remove_dsp(cs35l41); - component_del(cs35l41->dev, &cs35l41_hda_comp_ops); - acpi_dev_put(cs35l41->dacpi); pm_runtime_put_noidle(cs35l41->dev); diff --git a/sound/pci/hda/cs35l41_hda_property.c b/sound/pci/hda/cs35l41_hda_property.c index 6a7a6d486916..80c816922f78 100644 --- a/sound/pci/hda/cs35l41_hda_property.c +++ b/sound/pci/hda/cs35l41_hda_property.c @@ -128,6 +128,10 @@ static const struct cs35l41_config cs35l41_config_table[] = { { "17AA38B5", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, { "17AA38B6", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, { "17AA38B7", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, + { "17AA38C7", 4, INTERNAL, { CS35L41_RIGHT, CS35L41_LEFT, CS35L41_RIGHT, CS35L41_LEFT }, 0, 2, -1, 1000, 4500, 24 }, + { "17AA38C8", 4, INTERNAL, { CS35L41_RIGHT, CS35L41_LEFT, CS35L41_RIGHT, CS35L41_LEFT }, 0, 2, -1, 1000, 4500, 24 }, + { "17AA38F9", 2, EXTERNAL, { CS35L41_RIGHT, CS35L41_LEFT, 0, 0 }, 0, 2, -1, 0, 0, 0 }, + { "17AA38FA", 2, EXTERNAL, { CS35L41_RIGHT, CS35L41_LEFT, 0, 0 }, 0, 2, -1, 0, 0, 0 }, {} }; @@ -529,6 +533,10 @@ static const struct cs35l41_prop_model cs35l41_prop_model_table[] = { { "CSC3551", "17AA38B5", generic_dsd_config }, { "CSC3551", "17AA38B6", generic_dsd_config }, { "CSC3551", "17AA38B7", generic_dsd_config }, + { "CSC3551", "17AA38C7", generic_dsd_config }, + { "CSC3551", "17AA38C8", generic_dsd_config }, + { "CSC3551", "17AA38F9", generic_dsd_config }, + { "CSC3551", "17AA38FA", generic_dsd_config }, {} }; diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index 11b0570ff56d..e134ede6c5aa 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -735,6 +735,8 @@ static void cs35l56_hda_unbind(struct device *dev, struct device *master, void * if (comps[cs35l56->index].dev == dev) memset(&comps[cs35l56->index], 0, sizeof(*comps)); + cs35l56->codec = NULL; + dev_dbg(cs35l56->base.dev, "Unbound\n"); } @@ -840,6 +842,9 @@ static int cs35l56_hda_system_resume(struct device *dev) cs35l56->suspended = false; + if (!cs35l56->codec) + return 0; + ret = cs35l56_is_fw_reload_needed(&cs35l56->base); dev_dbg(cs35l56->base.dev, "fw_reload_needed: %d\n", ret); if (ret > 0) { @@ -1072,12 +1077,12 @@ void cs35l56_hda_remove(struct device *dev) { struct cs35l56_hda *cs35l56 = dev_get_drvdata(dev); + component_del(cs35l56->base.dev, &cs35l56_hda_comp_ops); + pm_runtime_dont_use_autosuspend(cs35l56->base.dev); pm_runtime_get_sync(cs35l56->base.dev); pm_runtime_disable(cs35l56->base.dev); - component_del(cs35l56->base.dev, &cs35l56_hda_comp_ops); - cs_dsp_remove(&cs35l56->cs_dsp); kfree(cs35l56->system_name); diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index aa76d1c88589..766f0b1d3e9d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -583,10 +583,14 @@ static void alc_shutup_pins(struct hda_codec *codec) switch (codec->core.vendor_id) { case 0x10ec0236: case 0x10ec0256: + case 0x10ec0257: case 0x19e58326: case 0x10ec0283: + case 0x10ec0285: case 0x10ec0286: + case 0x10ec0287: case 0x10ec0288: + case 0x10ec0295: case 0x10ec0298: alc_headset_mic_no_shutup(codec); break; @@ -7520,6 +7524,8 @@ enum { ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1, ALC287_FIXUP_LENOVO_THKPAD_WH_ALC1318, ALC256_FIXUP_CHROME_BOOK, + ALC287_FIXUP_LENOVO_14ARP8_LEGION_IAH7, + ALC287_FIXUP_LENOVO_SSID_17AA3820, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -7559,6 +7565,21 @@ static void alc287_fixup_lenovo_14irp8_duetitl(struct hda_codec *codec, __snd_hda_apply_fixup(codec, id, action, 0); } +/* Similar to above the Lenovo Yoga Pro 7 14ARP8 PCI SSID matches the codec SSID of the + Legion Y9000X 2022 IAH7.*/ +static void alc287_fixup_lenovo_14arp8_legion_iah7(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + int id; + + if (codec->core.subsystem_id == 0x17aa386e) + id = ALC287_FIXUP_CS35L41_I2C_2; /* Legion Y9000X 2022 IAH7 */ + else + id = ALC285_FIXUP_SPEAKER2_TO_DAC1; /* Yoga Pro 7 14ARP8 */ + __snd_hda_apply_fixup(codec, id, action, 0); +} + /* Another hilarious PCI SSID conflict with Lenovo Legion Pro 7 16ARX8H (with * TAS2781 codec) and Legion 7i 16IAX7 (with CS35L41 codec); * we apply a corresponding fixup depending on the codec SSID instead @@ -7576,6 +7597,20 @@ static void alc287_fixup_lenovo_legion_7(struct hda_codec *codec, __snd_hda_apply_fixup(codec, id, action, 0); } +/* Yet more conflicting PCI SSID (17aa:3820) on two Lenovo models */ +static void alc287_fixup_lenovo_ssid_17aa3820(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + int id; + + if (codec->core.subsystem_id == 0x17aa3820) + id = ALC269_FIXUP_ASPIRE_HEADSET_MIC; /* IdeaPad 330-17IKB 81DM */ + else /* 0x17aa3802 */ + id = ALC287_FIXUP_YOGA7_14ITL_SPEAKERS; /* "Yoga Duet 7 13ITL6 */ + __snd_hda_apply_fixup(codec, id, action, 0); +} + static const struct hda_fixup alc269_fixups[] = { [ALC269_FIXUP_GPIO2] = { .type = HDA_FIXUP_FUNC, @@ -9658,6 +9693,10 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK, }, + [ALC287_FIXUP_LENOVO_14ARP8_LEGION_IAH7] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc287_fixup_lenovo_14arp8_legion_iah7, + }, [ALC287_FIXUP_YOGA9_14IMH9_BASS_SPK_PIN] = { .type = HDA_FIXUP_FUNC, .v.func = alc287_fixup_yoga9_14iap7_bass_spk_pin, @@ -9808,6 +9847,10 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC225_FIXUP_HEADSET_JACK }, + [ALC287_FIXUP_LENOVO_SSID_17AA3820] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc287_fixup_lenovo_ssid_17aa3820, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -10010,6 +10053,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x83b9, "HP Spectre x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x841c, "HP Pavilion 15-CK0xx", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8497, "HP Envy x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), + SND_PCI_QUIRK(0x103c, 0x84a6, "HP 250 G7 Notebook PC", ALC269_FIXUP_HP_LINE1_MIC1_LED), SND_PCI_QUIRK(0x103c, 0x84ae, "HP 15-db0403ng", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x84da, "HP OMEN dc0019-ur", ALC295_FIXUP_HP_OMEN), SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), @@ -10045,6 +10089,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8788, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x87b7, "HP Laptop 14-fq0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x87d3, "HP Laptop 15-gw0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x87e5, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87e7, "HP ProBook 450 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f1, "HP ProBook 630 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), @@ -10194,6 +10239,13 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8c70, "HP EliteBook 835 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c71, "HP EliteBook 845 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c72, "HP EliteBook 865 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8c7b, "HP ProBook 445 G11", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8c7c, "HP ProBook 445 G11", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8c7d, "HP ProBook 465 G11", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8c7e, "HP ProBook 465 G11", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8c7f, "HP EliteBook 645 G11", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8c80, "HP EliteBook 645 G11", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8c81, "HP EliteBook 665 G11", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8c89, "HP ProBook 460 G11", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c8a, "HP EliteBook 630", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c8c, "HP EliteBook 660", ALC236_FIXUP_HP_GPIO_LED), @@ -10332,6 +10384,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC), SND_PCI_QUIRK(0x10ec, 0x10f2, "Intel Reference board", ALC700_FIXUP_INTEL_REFERENCE), SND_PCI_QUIRK(0x10ec, 0x118c, "Medion EE4254 MD62100", ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE), + SND_PCI_QUIRK(0x10ec, 0x11bc, "VAIO VJFE-IL", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x10ec, 0x1230, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10ec, 0x124c, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10ec, 0x1252, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), @@ -10429,6 +10482,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0xa600, "Clevo NL50NU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xa650, "Clevo NP[567]0SN[CD]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xa671, "Clevo NP70SN[CDE]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0xa763, "Clevo V54x_6x_TU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xb018, "Clevo NP50D[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xb019, "Clevo NH77D[BE]Q", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xb022, "Clevo NH77D[DC][QW]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -10502,7 +10556,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940 / Yoga Duet 7", ALC298_FIXUP_LENOVO_C940_DUET7), SND_PCI_QUIRK(0x17aa, 0x3819, "Lenovo 13s Gen2 ITL", ALC287_FIXUP_13S_GEN2_SPEAKERS), - SND_PCI_QUIRK(0x17aa, 0x3820, "Yoga Duet 7 13ITL6", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), + SND_PCI_QUIRK(0x17aa, 0x3820, "IdeaPad 330 / Yoga Duet 7", ALC287_FIXUP_LENOVO_SSID_17AA3820), SND_PCI_QUIRK(0x17aa, 0x3824, "Legion Y9000X 2020", ALC285_FIXUP_LEGION_Y9000X_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3827, "Ideapad S740", ALC285_FIXUP_IDEAPAD_S740_COEF), SND_PCI_QUIRK(0x17aa, 0x3834, "Lenovo IdeaPad Slim 9i 14ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), @@ -10516,7 +10570,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3865, "Lenovo 13X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x3866, "Lenovo 13X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x3869, "Lenovo Yoga7 14IAL7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), - SND_PCI_QUIRK(0x17aa, 0x386e, "Legion Y9000X 2022 IAH7", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x17aa, 0x386e, "Legion Y9000X 2022 IAH7 / Yoga Pro 7 14ARP8", ALC287_FIXUP_LENOVO_14ARP8_LEGION_IAH7), SND_PCI_QUIRK(0x17aa, 0x386f, "Legion Pro 7/7i", ALC287_FIXUP_LENOVO_LEGION_7), SND_PCI_QUIRK(0x17aa, 0x3870, "Lenovo Yoga 7 14ARB7", ALC287_FIXUP_YOGA7_14ARB7_I2C), SND_PCI_QUIRK(0x17aa, 0x3877, "Lenovo Legion 7 Slim 16ARHA7", ALC287_FIXUP_CS35L41_I2C_2), @@ -10527,6 +10581,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3882, "Lenovo Yoga Pro 7 14APH8", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), SND_PCI_QUIRK(0x17aa, 0x3884, "Y780 YG DUAL", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x3886, "Y780 VECO DUAL", ALC287_FIXUP_TAS2781_I2C), + SND_PCI_QUIRK(0x17aa, 0x3891, "Lenovo Yoga Pro 7 14AHP9", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), SND_PCI_QUIRK(0x17aa, 0x38a7, "Y780P AMD YG dual", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x38a8, "Y780P AMD VECO dual", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x38a9, "Thinkbook 16P", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), @@ -10540,10 +10595,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x38be, "Yoga S980-14.5 proX YC Dual", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x38bf, "Yoga S980-14.5 proX LX Dual", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x38c3, "Y980 DUAL", ALC287_FIXUP_TAS2781_I2C), + SND_PCI_QUIRK(0x17aa, 0x38c7, "Thinkbook 13x Gen 4", ALC287_FIXUP_CS35L41_I2C_4), + SND_PCI_QUIRK(0x17aa, 0x38c8, "Thinkbook 13x Gen 4", ALC287_FIXUP_CS35L41_I2C_4), SND_PCI_QUIRK(0x17aa, 0x38cb, "Y790 YG DUAL", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x38cd, "Y790 VECO DUAL", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x38d2, "Lenovo Yoga 9 14IMH9", ALC287_FIXUP_YOGA9_14IMH9_BASS_SPK_PIN), SND_PCI_QUIRK(0x17aa, 0x38d7, "Lenovo Yoga 9 14IMH9", ALC287_FIXUP_YOGA9_14IMH9_BASS_SPK_PIN), + SND_PCI_QUIRK(0x17aa, 0x38f9, "Thinkbook 16P Gen5", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x17aa, 0x38fa, "Thinkbook 16P Gen5", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo B50-70", ALC269_FIXUP_DMIC_THINKPAD_ACPI), @@ -10581,6 +10640,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1b7d, 0xa831, "Ordissimo EVE2 ", ALC269VB_FIXUP_ORDISSIMO_EVE2), /* Also known as Malata PC-B1303 */ SND_PCI_QUIRK(0x1c06, 0x2013, "Lemote A1802", ALC269_FIXUP_LEMOTE_A1802), SND_PCI_QUIRK(0x1c06, 0x2015, "Lemote A190X", ALC269_FIXUP_LEMOTE_A190X), + SND_PCI_QUIRK(0x1c6c, 0x122a, "Positivo N14AP7", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1c6c, 0x1251, "Positivo N14KP6-TG", ALC288_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1d05, 0x1132, "TongFang PHxTxX1", ALC256_FIXUP_SET_COEF_DEFAULTS), SND_PCI_QUIRK(0x1d05, 0x1096, "TongFang GMxMRxx", ALC269_FIXUP_NO_SHUTUP), @@ -10598,6 +10658,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), + SND_PCI_QUIRK(0x2782, 0x0214, "VAIO VJFE-CL", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x2782, 0x0232, "CHUWI CoreBook XPro", ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO), SND_PCI_QUIRK(0x2782, 0x1707, "Vaio VJFE-ADL", ALC298_FIXUP_SPK_VOLUME), SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC), @@ -10605,7 +10666,6 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10), SND_PCI_QUIRK(0x8086, 0x3038, "Intel NUC 13", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0xf111, 0x0001, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0xf111, 0x0005, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0xf111, 0x0006, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), #if 0 diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c index 75f7674c66ee..fdee6592c502 100644 --- a/sound/pci/hda/tas2781_hda_i2c.c +++ b/sound/pci/hda/tas2781_hda_i2c.c @@ -777,11 +777,11 @@ static void tas2781_hda_remove(struct device *dev) { struct tas2781_hda *tas_hda = dev_get_drvdata(dev); + component_del(tas_hda->dev, &tas2781_hda_comp_ops); + pm_runtime_get_sync(tas_hda->dev); pm_runtime_disable(tas_hda->dev); - component_del(tas_hda->dev, &tas2781_hda_comp_ops); - pm_runtime_put_noidle(tas_hda->dev); tasdevice_remove(tas_hda->priv); diff --git a/sound/soc/amd/acp/acp-i2s.c b/sound/soc/amd/acp/acp-i2s.c index 60cbc881be6e..ef12f97ddc69 100644 --- a/sound/soc/amd/acp/acp-i2s.c +++ b/sound/soc/amd/acp/acp-i2s.c @@ -588,20 +588,12 @@ static int acp_i2s_probe(struct snd_soc_dai *dai) { struct device *dev = dai->component->dev; struct acp_dev_data *adata = dev_get_drvdata(dev); - struct acp_resource *rsrc = adata->rsrc; - unsigned int val; if (!adata->acp_base) { dev_err(dev, "I2S base is NULL\n"); return -EINVAL; } - val = readl(adata->acp_base + rsrc->i2s_pin_cfg_offset); - if (val != rsrc->i2s_mode) { - dev_err(dev, "I2S Mode not supported val %x\n", val); - return -EINVAL; - } - return 0; } diff --git a/sound/soc/amd/acp/acp-pci.c b/sound/soc/amd/acp/acp-pci.c index ad320b29e87d..777b5a78d8a9 100644 --- a/sound/soc/amd/acp/acp-pci.c +++ b/sound/soc/amd/acp/acp-pci.c @@ -100,6 +100,7 @@ static int acp_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id ret = -EINVAL; goto release_regions; } + chip->flag = flag; dmic_dev = platform_device_register_data(dev, "dmic-codec", PLATFORM_DEVID_NONE, NULL, 0); if (IS_ERR(dmic_dev)) { dev_err(dev, "failed to create DMIC device\n"); @@ -139,7 +140,6 @@ static int acp_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id } } - chip->flag = flag; memset(&pdevinfo, 0, sizeof(pdevinfo)); pdevinfo.name = chip->name; @@ -199,10 +199,12 @@ static int __maybe_unused snd_acp_resume(struct device *dev) ret = acp_init(chip); if (ret) dev_err(dev, "ACP init failed\n"); - child = chip->chip_pdev->dev; - adata = dev_get_drvdata(&child); - if (adata) - acp_enable_interrupts(adata); + if (chip->chip_pdev) { + child = chip->chip_pdev->dev; + adata = dev_get_drvdata(&child); + if (adata) + acp_enable_interrupts(adata); + } return ret; } diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 1760b5d42460..4e3a8ce690a4 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -283,6 +283,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "M5402RA"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "M5602RA"), + } + }, { .driver_data = &acp6x_card, .matches = { diff --git a/sound/soc/atmel/atmel-classd.c b/sound/soc/atmel/atmel-classd.c index 6aed1ee443b4..ba314b279919 100644 --- a/sound/soc/atmel/atmel-classd.c +++ b/sound/soc/atmel/atmel-classd.c @@ -473,19 +473,22 @@ static int atmel_classd_asoc_card_init(struct device *dev, if (!dai_link) return -ENOMEM; - comp = devm_kzalloc(dev, sizeof(*comp), GFP_KERNEL); + comp = devm_kzalloc(dev, 2 * sizeof(*comp), GFP_KERNEL); if (!comp) return -ENOMEM; - dai_link->cpus = comp; + dai_link->cpus = &comp[0]; dai_link->codecs = &snd_soc_dummy_dlc; + dai_link->platforms = &comp[1]; dai_link->num_cpus = 1; dai_link->num_codecs = 1; + dai_link->num_platforms = 1; dai_link->name = "CLASSD"; dai_link->stream_name = "CLASSD PCM"; dai_link->cpus->dai_name = dev_name(dev); + dai_link->platforms->name = dev_name(dev); card->dai_link = dai_link; card->num_links = 1; diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index 8af89a263594..30497152e02a 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -215,6 +215,10 @@ static const struct reg_sequence cs35l56_asp1_defaults[] = { REG_SEQ0(CS35L56_ASP1_FRAME_CONTROL5, 0x00020100), REG_SEQ0(CS35L56_ASP1_DATA_CONTROL1, 0x00000018), REG_SEQ0(CS35L56_ASP1_DATA_CONTROL5, 0x00000018), + REG_SEQ0(CS35L56_ASP1TX1_INPUT, 0x00000000), + REG_SEQ0(CS35L56_ASP1TX2_INPUT, 0x00000000), + REG_SEQ0(CS35L56_ASP1TX3_INPUT, 0x00000000), + REG_SEQ0(CS35L56_ASP1TX4_INPUT, 0x00000000), }; /* diff --git a/sound/soc/codecs/cs42l43-jack.c b/sound/soc/codecs/cs42l43-jack.c index 901b9dbcf585..d9ab003e166b 100644 --- a/sound/soc/codecs/cs42l43-jack.c +++ b/sound/soc/codecs/cs42l43-jack.c @@ -121,7 +121,7 @@ int cs42l43_set_jack(struct snd_soc_component *component, priv->buttons[3] = 735; } - ret = cs42l43_find_index(priv, "cirrus,detect-us", 1000, &priv->detect_us, + ret = cs42l43_find_index(priv, "cirrus,detect-us", 50000, &priv->detect_us, cs42l43_accdet_us, ARRAY_SIZE(cs42l43_accdet_us)); if (ret < 0) goto error; @@ -433,7 +433,7 @@ irqreturn_t cs42l43_button_press(int irq, void *data) // Wait for 2 full cycles of comb filter to ensure good reading queue_delayed_work(system_wq, &priv->button_press_work, - msecs_to_jiffies(10)); + msecs_to_jiffies(20)); return IRQ_HANDLED; } diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c index 03b539ba540f..6a4e42e5e35b 100644 --- a/sound/soc/codecs/es8326.c +++ b/sound/soc/codecs/es8326.c @@ -857,12 +857,16 @@ static void es8326_jack_detect_handler(struct work_struct *work) * set auto-check mode, then restart jack_detect_work after 400ms. * Don't report jack status. */ - regmap_write(es8326->regmap, ES8326_INT_SOURCE, - (ES8326_INT_SRC_PIN9 | ES8326_INT_SRC_BUTTON)); + regmap_write(es8326->regmap, ES8326_INT_SOURCE, 0x00); regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x01); + regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x10, 0x00); es8326_enable_micbias(es8326->component); usleep_range(50000, 70000); regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x00); + regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x10, 0x10); + usleep_range(50000, 70000); + regmap_write(es8326->regmap, ES8326_INT_SOURCE, + (ES8326_INT_SRC_PIN9 | ES8326_INT_SRC_BUTTON)); regmap_write(es8326->regmap, ES8326_SYS_BIAS, 0x1f); regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x0f, 0x08); queue_delayed_work(system_wq, &es8326->jack_detect_work, diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index cdb7ff7020e9..51187b1e0ed2 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -81,7 +81,7 @@ static const struct reg_sequence init_list[] = { static const struct reg_sequence rt5650_init_list[] = { {0xf6, 0x0100}, {RT5645_PWR_ANLG1, 0x02}, - {RT5645_IL_CMD3, 0x0018}, + {RT5645_IL_CMD3, 0x6728}, }; static const struct reg_default rt5645_reg[] = { @@ -3130,20 +3130,32 @@ static void rt5645_enable_push_button_irq(struct snd_soc_component *component, bool enable) { struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(component); + int ret; if (enable) { snd_soc_dapm_force_enable_pin(dapm, "ADC L power"); snd_soc_dapm_force_enable_pin(dapm, "ADC R power"); snd_soc_dapm_sync(dapm); + snd_soc_component_update_bits(component, RT5650_4BTN_IL_CMD2, + RT5645_EN_4BTN_IL_MASK | RT5645_RST_4BTN_IL_MASK, + RT5645_EN_4BTN_IL_EN | RT5645_RST_4BTN_IL_RST); + usleep_range(10000, 15000); + snd_soc_component_update_bits(component, RT5650_4BTN_IL_CMD2, + RT5645_EN_4BTN_IL_MASK | RT5645_RST_4BTN_IL_MASK, + RT5645_EN_4BTN_IL_EN | RT5645_RST_4BTN_IL_NORM); + msleep(50); + ret = snd_soc_component_read(component, RT5645_INT_IRQ_ST); + pr_debug("%s read %x = %x\n", __func__, RT5645_INT_IRQ_ST, + snd_soc_component_read(component, RT5645_INT_IRQ_ST)); + snd_soc_component_write(component, RT5645_INT_IRQ_ST, ret); + ret = snd_soc_component_read(component, RT5650_4BTN_IL_CMD1); + pr_debug("%s read %x = %x\n", __func__, RT5650_4BTN_IL_CMD1, + snd_soc_component_read(component, RT5650_4BTN_IL_CMD1)); + snd_soc_component_write(component, RT5650_4BTN_IL_CMD1, ret); snd_soc_component_update_bits(component, RT5650_4BTN_IL_CMD1, 0x3, 0x3); snd_soc_component_update_bits(component, RT5645_INT_IRQ_ST, 0x8, 0x8); - snd_soc_component_update_bits(component, - RT5650_4BTN_IL_CMD2, 0x8000, 0x8000); - snd_soc_component_read(component, RT5650_4BTN_IL_CMD1); - pr_debug("%s read %x = %x\n", __func__, RT5650_4BTN_IL_CMD1, - snd_soc_component_read(component, RT5650_4BTN_IL_CMD1)); } else { snd_soc_component_update_bits(component, RT5650_4BTN_IL_CMD2, 0x8000, 0x0); snd_soc_component_update_bits(component, RT5645_INT_IRQ_ST, 0x8, 0x0); diff --git a/sound/soc/codecs/rt5645.h b/sound/soc/codecs/rt5645.h index 90816b2c5489..bef74b29fd54 100644 --- a/sound/soc/codecs/rt5645.h +++ b/sound/soc/codecs/rt5645.h @@ -2011,6 +2011,12 @@ #define RT5645_ZCD_HP_DIS (0x0 << 15) #define RT5645_ZCD_HP_EN (0x1 << 15) +/* Buttons Inline Command Function 2 (0xe0) */ +#define RT5645_EN_4BTN_IL_MASK (0x1 << 15) +#define RT5645_EN_4BTN_IL_EN (0x1 << 15) +#define RT5645_RST_4BTN_IL_MASK (0x1 << 14) +#define RT5645_RST_4BTN_IL_RST (0x0 << 14) +#define RT5645_RST_4BTN_IL_NORM (0x1 << 14) /* Codec Private Register definition */ /* DAC ADC Digital Volume (0x00) */ diff --git a/sound/soc/codecs/rt711-sdw.c b/sound/soc/codecs/rt711-sdw.c index 8ca8bcd177ab..dfda6bb5c6f8 100644 --- a/sound/soc/codecs/rt711-sdw.c +++ b/sound/soc/codecs/rt711-sdw.c @@ -38,7 +38,9 @@ static bool rt711_readable_register(struct device *dev, unsigned int reg) case 0x8300 ... 0x83ff: case 0x9c00 ... 0x9cff: case 0xb900 ... 0xb9ff: + case 0x752008: case 0x752009: + case 0x75200b: case 0x752011: case 0x75201a: case 0x752045: diff --git a/sound/soc/codecs/rt722-sdca-sdw.c b/sound/soc/codecs/rt722-sdca-sdw.c index b33da2215ade..87354bb1564e 100644 --- a/sound/soc/codecs/rt722-sdca-sdw.c +++ b/sound/soc/codecs/rt722-sdca-sdw.c @@ -68,6 +68,7 @@ static bool rt722_sdca_mbq_readable_register(struct device *dev, unsigned int re case 0x200007f: case 0x2000082 ... 0x200008e: case 0x2000090 ... 0x2000094: + case 0x3110000: case 0x5300000 ... 0x5300002: case 0x5400002: case 0x5600000 ... 0x5600007: @@ -125,6 +126,7 @@ static bool rt722_sdca_mbq_volatile_register(struct device *dev, unsigned int re case 0x2000067: case 0x2000084: case 0x2000086: + case 0x3110000: return true; default: return false; @@ -350,7 +352,7 @@ static int rt722_sdca_interrupt_callback(struct sdw_slave *slave, if (status->sdca_cascade && !rt722->disable_irq) mod_delayed_work(system_power_efficient_wq, - &rt722->jack_detect_work, msecs_to_jiffies(30)); + &rt722->jack_detect_work, msecs_to_jiffies(280)); mutex_unlock(&rt722->disable_irq_lock); diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c index 5ddc0c2fe53f..eb67689dcd6e 100644 --- a/sound/soc/fsl/fsl-asoc-card.c +++ b/sound/soc/fsl/fsl-asoc-card.c @@ -559,6 +559,8 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) if (!priv) return -ENOMEM; + priv->pdev = pdev; + cpu_np = of_parse_phandle(np, "audio-cpu", 0); /* Give a chance to old DT binding */ if (!cpu_np) @@ -787,7 +789,6 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) } /* Initialize sound card */ - priv->pdev = pdev; priv->card.dev = &pdev->dev; priv->card.owner = THIS_MODULE; ret = snd_soc_of_parse_card_name(&priv->card, "model"); diff --git a/sound/soc/fsl/imx-pcm-dma.c b/sound/soc/fsl/imx-pcm-dma.c index 14e94270911c..4fa208d6a032 100644 --- a/sound/soc/fsl/imx-pcm-dma.c +++ b/sound/soc/fsl/imx-pcm-dma.c @@ -50,4 +50,5 @@ int imx_pcm_dma_init(struct platform_device *pdev) } EXPORT_SYMBOL_GPL(imx_pcm_dma_init); +MODULE_DESCRIPTION("Freescale i.MX PCM DMA interface"); MODULE_LICENSE("GPL"); diff --git a/sound/soc/intel/avs/topology.c b/sound/soc/intel/avs/topology.c index 02bae207f6ec..b6c5d94a1554 100644 --- a/sound/soc/intel/avs/topology.c +++ b/sound/soc/intel/avs/topology.c @@ -1545,8 +1545,8 @@ static int avs_route_load(struct snd_soc_component *comp, int index, { struct snd_soc_acpi_mach *mach = dev_get_platdata(comp->card->dev); size_t len = SNDRV_CTL_ELEM_ID_NAME_MAXLEN; - char buf[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; int ssp_port, tdm_slot; + char *buf; /* See parse_link_formatted_string() for dynamic naming when(s). */ if (!avs_mach_singular_ssp(mach)) @@ -1557,13 +1557,24 @@ static int avs_route_load(struct snd_soc_component *comp, int index, return 0; tdm_slot = avs_mach_ssp_tdm(mach, ssp_port); + buf = devm_kzalloc(comp->card->dev, len, GFP_KERNEL); + if (!buf) + return -ENOMEM; avs_ssp_sprint(buf, len, route->source, ssp_port, tdm_slot); - strscpy((char *)route->source, buf, len); + route->source = buf; + + buf = devm_kzalloc(comp->card->dev, len, GFP_KERNEL); + if (!buf) + return -ENOMEM; avs_ssp_sprint(buf, len, route->sink, ssp_port, tdm_slot); - strscpy((char *)route->sink, buf, len); + route->sink = buf; + if (route->control) { + buf = devm_kzalloc(comp->card->dev, len, GFP_KERNEL); + if (!buf) + return -ENOMEM; avs_ssp_sprint(buf, len, route->control, ssp_port, tdm_slot); - strscpy((char *)route->control, buf, len); + route->control = buf; } return 0; diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index b41a1147f1c3..a64d1989e28a 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -613,6 +613,17 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { { .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ARCHOS"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ARCHOS 101 CESIUM"), + }, + .driver_data = (void *)(BYTCR_INPUT_DEFAULTS | + BYT_RT5640_JD_NOT_INV | + BYT_RT5640_DIFF_MIC | + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, + { + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ARCHOS"), DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ARCHOS 140 CESIUM"), }, .driver_data = (void *)(BYT_RT5640_IN1_MAP | diff --git a/sound/soc/intel/common/soc-acpi-intel-mtl-match.c b/sound/soc/intel/common/soc-acpi-intel-mtl-match.c index 48252fa9e39e..8e0ae3635a35 100644 --- a/sound/soc/intel/common/soc-acpi-intel-mtl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-mtl-match.c @@ -293,7 +293,7 @@ static const struct snd_soc_acpi_adr_device rt1318_1_single_adr[] = { .adr = 0x000130025D131801, .num_endpoints = 1, .endpoints = &single_endpoint, - .name_prefix = "rt1318" + .name_prefix = "rt1318-1" } }; diff --git a/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c b/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c index acaf81fd6c9b..f848e14b091a 100644 --- a/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c +++ b/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c @@ -31,7 +31,7 @@ struct mt8183_da7219_max98357_priv { static struct snd_soc_jack_pin mt8183_da7219_max98357_jack_pins[] = { { - .pin = "Headphone", + .pin = "Headphones", .mask = SND_JACK_HEADPHONE, }, { @@ -626,7 +626,7 @@ static struct snd_soc_codec_conf mt6358_codec_conf[] = { }; static const struct snd_kcontrol_new mt8183_da7219_max98357_snd_controls[] = { - SOC_DAPM_PIN_SWITCH("Headphone"), + SOC_DAPM_PIN_SWITCH("Headphones"), SOC_DAPM_PIN_SWITCH("Headset Mic"), SOC_DAPM_PIN_SWITCH("Speakers"), SOC_DAPM_PIN_SWITCH("Line Out"), @@ -634,7 +634,7 @@ static const struct snd_kcontrol_new mt8183_da7219_max98357_snd_controls[] = { static const struct snd_soc_dapm_widget mt8183_da7219_max98357_dapm_widgets[] = { - SND_SOC_DAPM_HP("Headphone", NULL), + SND_SOC_DAPM_HP("Headphones", NULL), SND_SOC_DAPM_MIC("Headset Mic", NULL), SND_SOC_DAPM_SPK("Speakers", NULL), SND_SOC_DAPM_SPK("Line Out", NULL), @@ -680,7 +680,7 @@ static struct snd_soc_codec_conf mt8183_da7219_rt1015_codec_conf[] = { }; static const struct snd_kcontrol_new mt8183_da7219_rt1015_snd_controls[] = { - SOC_DAPM_PIN_SWITCH("Headphone"), + SOC_DAPM_PIN_SWITCH("Headphones"), SOC_DAPM_PIN_SWITCH("Headset Mic"), SOC_DAPM_PIN_SWITCH("Left Spk"), SOC_DAPM_PIN_SWITCH("Right Spk"), @@ -689,7 +689,7 @@ static const struct snd_kcontrol_new mt8183_da7219_rt1015_snd_controls[] = { static const struct snd_soc_dapm_widget mt8183_da7219_rt1015_dapm_widgets[] = { - SND_SOC_DAPM_HP("Headphone", NULL), + SND_SOC_DAPM_HP("Headphones", NULL), SND_SOC_DAPM_MIC("Headset Mic", NULL), SND_SOC_DAPM_SPK("Left Spk", NULL), SND_SOC_DAPM_SPK("Right Spk", NULL), diff --git a/sound/soc/mediatek/mt8195/mt8195-mt6359.c b/sound/soc/mediatek/mt8195/mt8195-mt6359.c index ca8751190520..2832ef78eaed 100644 --- a/sound/soc/mediatek/mt8195/mt8195-mt6359.c +++ b/sound/soc/mediatek/mt8195/mt8195-mt6359.c @@ -827,6 +827,7 @@ SND_SOC_DAILINK_DEFS(ETDM2_IN_BE, SND_SOC_DAILINK_DEFS(ETDM1_OUT_BE, DAILINK_COMP_ARRAY(COMP_CPU("ETDM1_OUT")), + DAILINK_COMP_ARRAY(COMP_EMPTY()), DAILINK_COMP_ARRAY(COMP_EMPTY())); SND_SOC_DAILINK_DEFS(ETDM2_OUT_BE, diff --git a/sound/soc/mxs/mxs-pcm.c b/sound/soc/mxs/mxs-pcm.c index df2e4be992d2..9bb08cadeb18 100644 --- a/sound/soc/mxs/mxs-pcm.c +++ b/sound/soc/mxs/mxs-pcm.c @@ -43,4 +43,5 @@ int mxs_pcm_platform_register(struct device *dev) } EXPORT_SYMBOL_GPL(mxs_pcm_platform_register); +MODULE_DESCRIPTION("MXS ASoC PCM driver"); MODULE_LICENSE("GPL"); diff --git a/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c b/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c index 68a38f63a2db..66b911b49e3f 100644 --- a/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c +++ b/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c @@ -141,14 +141,17 @@ static void q6apm_lpass_dai_shutdown(struct snd_pcm_substream *substream, struct struct q6apm_lpass_dai_data *dai_data = dev_get_drvdata(dai->dev); int rc; - if (!dai_data->is_port_started[dai->id]) - return; - rc = q6apm_graph_stop(dai_data->graph[dai->id]); - if (rc < 0) - dev_err(dai->dev, "fail to close APM port (%d)\n", rc); + if (dai_data->is_port_started[dai->id]) { + rc = q6apm_graph_stop(dai_data->graph[dai->id]); + dai_data->is_port_started[dai->id] = false; + if (rc < 0) + dev_err(dai->dev, "fail to close APM port (%d)\n", rc); + } - q6apm_graph_close(dai_data->graph[dai->id]); - dai_data->is_port_started[dai->id] = false; + if (dai_data->graph[dai->id]) { + q6apm_graph_close(dai_data->graph[dai->id]); + dai_data->graph[dai->id] = NULL; + } } static int q6apm_lpass_dai_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) @@ -163,8 +166,10 @@ static int q6apm_lpass_dai_prepare(struct snd_pcm_substream *substream, struct s q6apm_graph_stop(dai_data->graph[dai->id]); dai_data->is_port_started[dai->id] = false; - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { q6apm_graph_close(dai_data->graph[dai->id]); + dai_data->graph[dai->id] = NULL; + } } /** @@ -183,26 +188,29 @@ static int q6apm_lpass_dai_prepare(struct snd_pcm_substream *substream, struct s cfg->direction = substream->stream; rc = q6apm_graph_media_format_pcm(dai_data->graph[dai->id], cfg); - if (rc) { dev_err(dai->dev, "Failed to set media format %d\n", rc); - return rc; + goto err; } rc = q6apm_graph_prepare(dai_data->graph[dai->id]); if (rc) { dev_err(dai->dev, "Failed to prepare Graph %d\n", rc); - return rc; + goto err; } rc = q6apm_graph_start(dai_data->graph[dai->id]); if (rc < 0) { dev_err(dai->dev, "fail to start APM port %x\n", dai->id); - return rc; + goto err; } dai_data->is_port_started[dai->id] = true; return 0; +err: + q6apm_graph_close(dai_data->graph[dai->id]); + dai_data->graph[dai->id] = NULL; + return rc; } static int q6apm_lpass_dai_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) diff --git a/sound/soc/qcom/sdw.c b/sound/soc/qcom/sdw.c index eaa8bb016e50..f2eda2ff46c0 100644 --- a/sound/soc/qcom/sdw.c +++ b/sound/soc/qcom/sdw.c @@ -160,4 +160,5 @@ int qcom_snd_sdw_hw_free(struct snd_pcm_substream *substream, return 0; } EXPORT_SYMBOL_GPL(qcom_snd_sdw_hw_free); +MODULE_DESCRIPTION("Qualcomm ASoC SoundWire helper functions"); MODULE_LICENSE("GPL"); diff --git a/sound/soc/rockchip/rockchip_i2s_tdm.c b/sound/soc/rockchip/rockchip_i2s_tdm.c index 9fa020ef7eab..ee517d7b5b7b 100644 --- a/sound/soc/rockchip/rockchip_i2s_tdm.c +++ b/sound/soc/rockchip/rockchip_i2s_tdm.c @@ -655,8 +655,17 @@ static int rockchip_i2s_tdm_hw_params(struct snd_pcm_substream *substream, int err; if (i2s_tdm->is_master_mode) { - struct clk *mclk = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) ? - i2s_tdm->mclk_tx : i2s_tdm->mclk_rx; + struct clk *mclk; + + if (i2s_tdm->clk_trcm == TRCM_TX) { + mclk = i2s_tdm->mclk_tx; + } else if (i2s_tdm->clk_trcm == TRCM_RX) { + mclk = i2s_tdm->mclk_rx; + } else if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { + mclk = i2s_tdm->mclk_tx; + } else { + mclk = i2s_tdm->mclk_rx; + } err = clk_set_rate(mclk, DEFAULT_MCLK_FS * params_rate(params)); if (err) diff --git a/sound/soc/soc-generic-dmaengine-pcm.c b/sound/soc/soc-generic-dmaengine-pcm.c index ea3bc9318412..a63e942fdc0b 100644 --- a/sound/soc/soc-generic-dmaengine-pcm.c +++ b/sound/soc/soc-generic-dmaengine-pcm.c @@ -318,6 +318,12 @@ static int dmaengine_copy(struct snd_soc_component *component, return 0; } +static int dmaengine_pcm_sync_stop(struct snd_soc_component *component, + struct snd_pcm_substream *substream) +{ + return snd_dmaengine_pcm_sync_stop(substream); +} + static const struct snd_soc_component_driver dmaengine_pcm_component = { .name = SND_DMAENGINE_PCM_DRV_NAME, .probe_order = SND_SOC_COMP_ORDER_LATE, @@ -327,6 +333,7 @@ static const struct snd_soc_component_driver dmaengine_pcm_component = { .trigger = dmaengine_pcm_trigger, .pointer = dmaengine_pcm_pointer, .pcm_construct = dmaengine_pcm_new, + .sync_stop = dmaengine_pcm_sync_stop, }; static const struct snd_soc_component_driver dmaengine_pcm_component_process = { @@ -339,6 +346,7 @@ static const struct snd_soc_component_driver dmaengine_pcm_component_process = { .pointer = dmaengine_pcm_pointer, .copy = dmaengine_copy, .pcm_construct = dmaengine_pcm_new, + .sync_stop = dmaengine_pcm_sync_stop, }; static const char * const dmaengine_pcm_dma_channel_names[] = { diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 90ca37e008b3..6951ff7bc61e 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -1021,6 +1021,7 @@ static int soc_tplg_dapm_graph_elems_load(struct soc_tplg *tplg, struct snd_soc_tplg_hdr *hdr) { struct snd_soc_dapm_context *dapm = &tplg->comp->dapm; + const size_t maxlen = SNDRV_CTL_ELEM_ID_NAME_MAXLEN; struct snd_soc_tplg_dapm_graph_elem *elem; struct snd_soc_dapm_route *route; int count, i; @@ -1044,31 +1045,27 @@ static int soc_tplg_dapm_graph_elems_load(struct soc_tplg *tplg, tplg->pos += sizeof(struct snd_soc_tplg_dapm_graph_elem); /* validate routes */ - if (strnlen(elem->source, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) == - SNDRV_CTL_ELEM_ID_NAME_MAXLEN) { + if ((strnlen(elem->source, maxlen) == maxlen) || + (strnlen(elem->sink, maxlen) == maxlen) || + (strnlen(elem->control, maxlen) == maxlen)) { ret = -EINVAL; break; } - if (strnlen(elem->sink, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) == - SNDRV_CTL_ELEM_ID_NAME_MAXLEN) { - ret = -EINVAL; - break; - } - if (strnlen(elem->control, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) == - SNDRV_CTL_ELEM_ID_NAME_MAXLEN) { - ret = -EINVAL; + + route->source = devm_kstrdup(tplg->dev, elem->source, GFP_KERNEL); + route->sink = devm_kstrdup(tplg->dev, elem->sink, GFP_KERNEL); + if (!route->source || !route->sink) { + ret = -ENOMEM; break; } - route->source = elem->source; - route->sink = elem->sink; - - /* set to NULL atm for tplg users */ - route->connected = NULL; - if (strnlen(elem->control, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) == 0) - route->control = NULL; - else - route->control = elem->control; + if (strnlen(elem->control, maxlen) != 0) { + route->control = devm_kstrdup(tplg->dev, elem->control, GFP_KERNEL); + if (!route->control) { + ret = -ENOMEM; + break; + } + } /* add route dobj to dobj_list */ route->dobj.type = SND_SOC_DOBJ_GRAPH; diff --git a/sound/soc/sof/intel/hda-dai.c b/sound/soc/sof/intel/hda-dai.c index ce675c22a5ab..1c823f9eea57 100644 --- a/sound/soc/sof/intel/hda-dai.c +++ b/sound/soc/sof/intel/hda-dai.c @@ -379,7 +379,7 @@ static int non_hda_dai_hw_params_data(struct snd_pcm_substream *substream, sdev = widget_to_sdev(w); if (sdev->dspless_mode_selected) - goto skip_tlv; + return 0; /* get stream_id */ hext_stream = ops->get_hext_stream(sdev, cpu_dai, substream); @@ -423,7 +423,6 @@ static int non_hda_dai_hw_params_data(struct snd_pcm_substream *substream, dma_config->dma_stream_channel_map.device_count = 1; dma_config->dma_priv_config_size = 0; -skip_tlv: return 0; } @@ -525,6 +524,9 @@ int sdw_hda_dai_hw_params(struct snd_pcm_substream *substream, return ret; } + if (sdev->dspless_mode_selected) + return 0; + ipc4_copier = widget_to_copier(w); dma_config_tlv = &ipc4_copier->dma_config_tlv[cpu_dai_id]; dma_config = &dma_config_tlv->dma_config; @@ -615,12 +617,6 @@ static int hda_dai_suspend(struct hdac_bus *bus) sdai = swidget->private; ops = sdai->platform_private; - ret = hda_link_dma_cleanup(hext_stream->link_substream, - hext_stream, - cpu_dai); - if (ret < 0) - return ret; - /* for consistency with TRIGGER_SUSPEND */ if (ops->post_trigger) { ret = ops->post_trigger(sdev, cpu_dai, @@ -629,6 +625,12 @@ static int hda_dai_suspend(struct hdac_bus *bus) if (ret < 0) return ret; } + + ret = hda_link_dma_cleanup(hext_stream->link_substream, + hext_stream, + cpu_dai); + if (ret < 0) + return ret; } } diff --git a/sound/soc/sof/intel/hda-pcm.c b/sound/soc/sof/intel/hda-pcm.c index 9fb8521b896b..f6e24edd7adb 100644 --- a/sound/soc/sof/intel/hda-pcm.c +++ b/sound/soc/sof/intel/hda-pcm.c @@ -258,6 +258,12 @@ int hda_dsp_pcm_open(struct snd_sof_dev *sdev, snd_pcm_hw_constraint_integer(substream->runtime, SNDRV_PCM_HW_PARAM_PERIODS); + /* Limit the maximum number of periods to not exceed the BDL entries count */ + if (runtime->hw.periods_max > HDA_DSP_MAX_BDL_ENTRIES) + snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_PERIODS, + runtime->hw.periods_min, + HDA_DSP_MAX_BDL_ENTRIES); + /* Only S16 and S32 supported by HDA hardware when used without DSP */ if (sdev->dspless_mode_selected) snd_pcm_hw_constraint_mask64(substream->runtime, SNDRV_PCM_HW_PARAM_FORMAT, diff --git a/sound/soc/sof/sof-audio.c b/sound/soc/sof/sof-audio.c index b3ac040811e7..ef9318947d74 100644 --- a/sound/soc/sof/sof-audio.c +++ b/sound/soc/sof/sof-audio.c @@ -485,7 +485,7 @@ sink_prepare: if (ret < 0) { /* unprepare the source widget */ if (widget_ops[widget->id].ipc_unprepare && - swidget && swidget->prepared) { + swidget && swidget->prepared && swidget->use_count == 0) { widget_ops[widget->id].ipc_unprepare(swidget); swidget->prepared = false; } diff --git a/sound/soc/ti/davinci-mcasp.c b/sound/soc/ti/davinci-mcasp.c index 1e760c315521..2b1ed91a736c 100644 --- a/sound/soc/ti/davinci-mcasp.c +++ b/sound/soc/ti/davinci-mcasp.c @@ -1472,10 +1472,11 @@ static int davinci_mcasp_hw_rule_min_periodsize( { struct snd_interval *period_size = hw_param_interval(params, SNDRV_PCM_HW_PARAM_PERIOD_SIZE); + u8 numevt = *((u8 *)rule->private); struct snd_interval frames; snd_interval_any(&frames); - frames.min = 64; + frames.min = numevt; frames.integer = 1; return snd_interval_refine(period_size, &frames); @@ -1490,6 +1491,7 @@ static int davinci_mcasp_startup(struct snd_pcm_substream *substream, u32 max_channels = 0; int i, dir, ret; int tdm_slots = mcasp->tdm_slots; + u8 *numevt; /* Do not allow more then one stream per direction */ if (mcasp->substreams[substream->stream]) @@ -1589,9 +1591,12 @@ static int davinci_mcasp_startup(struct snd_pcm_substream *substream, return ret; } + numevt = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) ? + &mcasp->txnumevt : + &mcasp->rxnumevt; snd_pcm_hw_rule_add(substream->runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, - davinci_mcasp_hw_rule_min_periodsize, NULL, + davinci_mcasp_hw_rule_min_periodsize, numevt, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, -1); return 0; diff --git a/sound/soc/ti/omap-hdmi.c b/sound/soc/ti/omap-hdmi.c index 639bc83f4263..cf43ac19c4a6 100644 --- a/sound/soc/ti/omap-hdmi.c +++ b/sound/soc/ti/omap-hdmi.c @@ -354,11 +354,7 @@ static int omap_hdmi_audio_probe(struct platform_device *pdev) if (!card) return -ENOMEM; - card->name = devm_kasprintf(dev, GFP_KERNEL, - "HDMI %s", dev_name(ad->dssdev)); - if (!card->name) - return -ENOMEM; - + card->name = "HDMI"; card->owner = THIS_MODULE; card->dai_link = devm_kzalloc(dev, sizeof(*(card->dai_link)), GFP_KERNEL); diff --git a/tools/hv/Makefile b/tools/hv/Makefile index bb52871da341..2e60e2c212cd 100644 --- a/tools/hv/Makefile +++ b/tools/hv/Makefile @@ -17,6 +17,7 @@ endif MAKEFLAGS += -r override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include +override CFLAGS += -Wno-address-of-packed-member ALL_TARGETS := hv_kvp_daemon hv_vss_daemon ifneq ($(ARCH), aarch64) diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c index 233f2b6edf52..49b79cf0c5cc 100644 --- a/tools/perf/util/comm.c +++ b/tools/perf/util/comm.c @@ -86,14 +86,6 @@ static struct comm_str *comm_str__new(const char *str) return result; } -static int comm_str__cmp(const void *_lhs, const void *_rhs) -{ - const struct comm_str *lhs = *(const struct comm_str * const *)_lhs; - const struct comm_str *rhs = *(const struct comm_str * const *)_rhs; - - return strcmp(comm_str__str(lhs), comm_str__str(rhs)); -} - static int comm_str__search(const void *_key, const void *_member) { const char *key = _key; @@ -169,9 +161,24 @@ static struct comm_str *comm_strs__findnew(const char *str) } result = comm_str__new(str); if (result) { - comm_strs->strs[comm_strs->num_strs++] = result; - qsort(comm_strs->strs, comm_strs->num_strs, sizeof(struct comm_str *), - comm_str__cmp); + int low = 0, high = comm_strs->num_strs - 1; + int insert = comm_strs->num_strs; /* Default to inserting at the end. */ + + while (low <= high) { + int mid = low + (high - low) / 2; + int cmp = strcmp(comm_str__str(comm_strs->strs[mid]), str); + + if (cmp < 0) { + low = mid + 1; + } else { + high = mid - 1; + insert = mid; + } + } + memmove(&comm_strs->strs[insert + 1], &comm_strs->strs[insert], + (comm_strs->num_strs - insert) * sizeof(struct comm_str *)); + comm_strs->num_strs++; + comm_strs->strs[insert] = result; } } up_write(&comm_strs->lock); diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index ab3d0c01dd63..a69a9c661200 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -203,11 +203,27 @@ int __dsos__add(struct dsos *dsos, struct dso *dso) dsos->dsos = temp; dsos->allocated = to_allocate; } - dsos->dsos[dsos->cnt++] = dso__get(dso); - if (dsos->cnt >= 2 && dsos->sorted) { - dsos->sorted = dsos__cmp_long_name_id_short_name(&dsos->dsos[dsos->cnt - 2], - &dsos->dsos[dsos->cnt - 1]) - <= 0; + if (!dsos->sorted) { + dsos->dsos[dsos->cnt++] = dso__get(dso); + } else { + int low = 0, high = dsos->cnt - 1; + int insert = dsos->cnt; /* Default to inserting at the end. */ + + while (low <= high) { + int mid = low + (high - low) / 2; + int cmp = dsos__cmp_long_name_id_short_name(&dsos->dsos[mid], &dso); + + if (cmp < 0) { + low = mid + 1; + } else { + high = mid - 1; + insert = mid; + } + } + memmove(&dsos->dsos[insert + 1], &dsos->dsos[insert], + (dsos->cnt - insert) * sizeof(struct dso *)); + dsos->cnt++; + dsos->dsos[insert] = dso__get(dso); } dso__set_dsos(dso, dsos); return 0; diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index 2d6dce2c8f77..b1e6817f1e54 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -14,6 +14,7 @@ turbostat : turbostat.c override CFLAGS += -O2 -Wall -Wextra -I../../../include override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' override CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"' +override CFLAGS += -DBUILD_BUG_HEADER='"../../../../include/linux/build_bug.h"' override CFLAGS += -D_FILE_OFFSET_BITS=64 override CFLAGS += -D_FORTIFY_SOURCE=2 @@ -44,10 +45,13 @@ snapshot: turbostat @echo "#define GENMASK(h, l) (((~0UL) << (l)) & (~0UL >> (sizeof(long) * 8 - 1 - (h))))" >> $(SNAPSHOT)/bits.h @echo "#define GENMASK_ULL(h, l) (((~0ULL) << (l)) & (~0ULL >> (sizeof(long long) * 8 - 1 - (h))))" >> $(SNAPSHOT)/bits.h + @echo '#define BUILD_BUG_ON(cond) do { enum { compile_time_check ## __COUNTER__ = 1/(!(cond)) }; } while (0)' > $(SNAPSHOT)/build_bug.h + @echo PWD=. > $(SNAPSHOT)/Makefile @echo "CFLAGS += -DMSRHEADER='\"msr-index.h\"'" >> $(SNAPSHOT)/Makefile @echo "CFLAGS += -DINTEL_FAMILY_HEADER='\"intel-family.h\"'" >> $(SNAPSHOT)/Makefile - @sed -e's/.*MSRHEADER.*//' -e's/.*INTEL_FAMILY_HEADER.*//' Makefile >> $(SNAPSHOT)/Makefile + @echo "CFLAGS += -DBUILD_BUG_HEADER='\"build_bug.h\"'" >> $(SNAPSHOT)/Makefile + @sed -e's/.*MSRHEADER.*//' -e's/.*INTEL_FAMILY_HEADER.*//' -e's/.*BUILD_BUG_HEADER.*//' Makefile >> $(SNAPSHOT)/Makefile @rm -f $(SNAPSHOT).tar.gz tar cvzf $(SNAPSHOT).tar.gz $(SNAPSHOT) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 8cdf41906e98..9f5d053d4bc6 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -10,6 +10,7 @@ #define _GNU_SOURCE #include MSRHEADER #include INTEL_FAMILY_HEADER +#include BUILD_BUG_HEADER #include <stdarg.h> #include <stdio.h> #include <err.h> @@ -38,7 +39,6 @@ #include <stdbool.h> #include <assert.h> #include <linux/kernel.h> -#include <linux/build_bug.h> #define UNUSED(x) (void)(x) @@ -5695,9 +5695,6 @@ static void probe_intel_uncore_frequency_cluster(void) if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK)) return; - if (quiet) - return; - for (uncore_max_id = 0;; ++uncore_max_id) { sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", uncore_max_id); @@ -5727,6 +5724,14 @@ static void probe_intel_uncore_frequency_cluster(void) sprintf(path, "%s/fabric_cluster_id", path_base); cluster_id = read_sysfs_int(path); + sprintf(path, "%s/current_freq_khz", path_base); + sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id); + + add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id); + + if (quiet) + continue; + sprintf(path, "%s/min_freq_khz", path_base); k = read_sysfs_int(path); sprintf(path, "%s/max_freq_khz", path_base); @@ -5743,11 +5748,6 @@ static void probe_intel_uncore_frequency_cluster(void) sprintf(path, "%s/current_freq_khz", path_base); k = read_sysfs_int(path); fprintf(outf, " %d MHz\n", k / 1000); - - sprintf(path, "%s/current_freq_khz", path_base); - sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id); - - add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id); } } @@ -8424,7 +8424,7 @@ void cmdline(int argc, char **argv) * Parse some options early, because they may make other options invalid, * like adding the MSR counter with --add and at the same time using --no-msr. */ - while ((opt = getopt_long_only(argc, argv, "MP", long_options, &option_index)) != -1) { + while ((opt = getopt_long_only(argc, argv, "MPn:", long_options, &option_index)) != -1) { switch (opt) { case 'M': no_msr = 1; diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 3482248aa344..90d5afd52dd0 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -630,11 +630,15 @@ static struct cxl_hdm *mock_cxl_setup_hdm(struct cxl_port *port, struct cxl_endpoint_dvsec_info *info) { struct cxl_hdm *cxlhdm = devm_kzalloc(&port->dev, sizeof(*cxlhdm), GFP_KERNEL); + struct device *dev = &port->dev; if (!cxlhdm) return ERR_PTR(-ENOMEM); cxlhdm->port = port; + cxlhdm->interleave_mask = ~0U; + cxlhdm->iw_cap_mask = ~0UL; + dev_set_drvdata(dev, cxlhdm); return cxlhdm; } diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e0b3887b3d2d..dd49c1d23a60 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -457,7 +457,7 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \ trace_printk.c trace_vprintk.c map_ptr_kern.c \ core_kern.c core_kern_overflow.c test_ringbuf.c \ - test_ringbuf_n.c test_ringbuf_map_key.c + test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c # Generate both light skeleton and libbpf skeleton for these LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \ diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index eeabd798bc3a..98b6b6a886ce 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -58,9 +58,12 @@ CONFIG_MPLS=y CONFIG_MPLS_IPTUNNEL=y CONFIG_MPLS_ROUTING=y CONFIG_MPTCP=y +CONFIG_NET_ACT_SKBMOD=y +CONFIG_NET_CLS=y CONFIG_NET_CLS_ACT=y CONFIG_NET_CLS_BPF=y CONFIG_NET_CLS_FLOWER=y +CONFIG_NET_CLS_MATCHALL=y CONFIG_NET_FOU=y CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_NET_IPGRE=y diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index 4c6f42dae409..da430df45aa4 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -12,9 +12,11 @@ #include <sys/sysinfo.h> #include <linux/perf_event.h> #include <linux/ring_buffer.h> + #include "test_ringbuf.lskel.h" #include "test_ringbuf_n.lskel.h" #include "test_ringbuf_map_key.lskel.h" +#include "test_ringbuf_write.lskel.h" #define EDONE 7777 @@ -84,6 +86,58 @@ static void *poll_thread(void *input) return (void *)(long)ring_buffer__poll(ringbuf, timeout); } +static void ringbuf_write_subtest(void) +{ + struct test_ringbuf_write_lskel *skel; + int page_size = getpagesize(); + size_t *mmap_ptr; + int err, rb_fd; + + skel = test_ringbuf_write_lskel__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->maps.ringbuf.max_entries = 0x4000; + + err = test_ringbuf_write_lskel__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + rb_fd = skel->maps.ringbuf.map_fd; + + mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0); + if (!ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos")) + goto cleanup; + *mmap_ptr = 0x3000; + ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw"); + + skel->bss->pid = getpid(); + + ringbuf = ring_buffer__new(rb_fd, process_sample, NULL, NULL); + if (!ASSERT_OK_PTR(ringbuf, "ringbuf_new")) + goto cleanup; + + err = test_ringbuf_write_lskel__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup_ringbuf; + + skel->bss->discarded = 0; + skel->bss->passed = 0; + + /* trigger exactly two samples */ + syscall(__NR_getpgid); + syscall(__NR_getpgid); + + ASSERT_EQ(skel->bss->discarded, 2, "discarded"); + ASSERT_EQ(skel->bss->passed, 0, "passed"); + + test_ringbuf_write_lskel__detach(skel); +cleanup_ringbuf: + ring_buffer__free(ringbuf); +cleanup: + test_ringbuf_write_lskel__destroy(skel); +} + static void ringbuf_subtest(void) { const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample); @@ -451,4 +505,6 @@ void test_ringbuf(void) ringbuf_n_subtest(); if (test__start_subtest("ringbuf_map_key")) ringbuf_map_key_subtest(); + if (test__start_subtest("ringbuf_write")) + ringbuf_write_subtest(); } diff --git a/tools/testing/selftests/bpf/prog_tests/tc_links.c b/tools/testing/selftests/bpf/prog_tests/tc_links.c index bc9841144685..1af9ec1149aa 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_links.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_links.c @@ -9,6 +9,8 @@ #define ping_cmd "ping -q -c1 -w1 127.0.0.1 > /dev/null" #include "test_tc_link.skel.h" + +#include "netlink_helpers.h" #include "tc_helpers.h" void serial_test_tc_links_basic(void) @@ -1787,6 +1789,65 @@ void serial_test_tc_links_ingress(void) test_tc_links_ingress(BPF_TCX_INGRESS, false, false); } +struct qdisc_req { + struct nlmsghdr n; + struct tcmsg t; + char buf[1024]; +}; + +static int qdisc_replace(int ifindex, const char *kind, bool block) +{ + struct rtnl_handle rth = { .fd = -1 }; + struct qdisc_req req; + int err; + + err = rtnl_open(&rth, 0); + if (!ASSERT_OK(err, "open_rtnetlink")) + return err; + + memset(&req, 0, sizeof(req)); + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REPLACE | NLM_F_REQUEST; + req.n.nlmsg_type = RTM_NEWQDISC; + req.t.tcm_family = AF_UNSPEC; + req.t.tcm_ifindex = ifindex; + req.t.tcm_parent = 0xfffffff1; + + addattr_l(&req.n, sizeof(req), TCA_KIND, kind, strlen(kind) + 1); + if (block) + addattr32(&req.n, sizeof(req), TCA_INGRESS_BLOCK, 1); + + err = rtnl_talk(&rth, &req.n, NULL); + ASSERT_OK(err, "talk_rtnetlink"); + rtnl_close(&rth); + return err; +} + +void serial_test_tc_links_dev_chain0(void) +{ + int err, ifindex; + + ASSERT_OK(system("ip link add dev foo type veth peer name bar"), "add veth"); + ifindex = if_nametoindex("foo"); + ASSERT_NEQ(ifindex, 0, "non_zero_ifindex"); + err = qdisc_replace(ifindex, "ingress", true); + if (!ASSERT_OK(err, "attaching ingress")) + goto cleanup; + ASSERT_OK(system("tc filter add block 1 matchall action skbmod swap mac"), "add block"); + err = qdisc_replace(ifindex, "clsact", false); + if (!ASSERT_OK(err, "attaching clsact")) + goto cleanup; + /* Heuristic: kern_sync_rcu() alone does not work; a wait-time of ~5s + * triggered the issue without the fix reliably 100% of the time. + */ + sleep(5); + ASSERT_OK(system("tc filter add dev foo ingress matchall action skbmod swap mac"), "add filter"); +cleanup: + ASSERT_OK(system("ip link del dev foo"), "del veth"); + ASSERT_EQ(if_nametoindex("foo"), 0, "foo removed"); + ASSERT_EQ(if_nametoindex("bar"), 0, "bar removed"); +} + static void test_tc_links_dev_mixed(int target) { LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); diff --git a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c new file mode 100644 index 000000000000..871d16cb95cf --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <sched.h> +#include <test_progs.h> +#include <pthread.h> +#include <network_helpers.h> + +#include "timer_lockup.skel.h" + +static long cpu; +static int *timer1_err; +static int *timer2_err; +static bool skip; + +volatile int k = 0; + +static void *timer_lockup_thread(void *arg) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1000, + ); + int i, prog_fd = *(int *)arg; + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(__sync_fetch_and_add(&cpu, 1), &cpuset); + ASSERT_OK(pthread_setaffinity_np(pthread_self(), sizeof(cpuset), + &cpuset), + "cpu affinity"); + + for (i = 0; !READ_ONCE(*timer1_err) && !READ_ONCE(*timer2_err); i++) { + bpf_prog_test_run_opts(prog_fd, &opts); + /* Skip the test if we can't reproduce the race in a reasonable + * amount of time. + */ + if (i > 50) { + WRITE_ONCE(skip, true); + break; + } + } + + return NULL; +} + +void test_timer_lockup(void) +{ + int timer1_prog, timer2_prog; + struct timer_lockup *skel; + pthread_t thrds[2]; + void *ret; + + skel = timer_lockup__open_and_load(); + if (!ASSERT_OK_PTR(skel, "timer_lockup__open_and_load")) + return; + + timer1_prog = bpf_program__fd(skel->progs.timer1_prog); + timer2_prog = bpf_program__fd(skel->progs.timer2_prog); + + timer1_err = &skel->bss->timer1_err; + timer2_err = &skel->bss->timer2_err; + + if (!ASSERT_OK(pthread_create(&thrds[0], NULL, timer_lockup_thread, + &timer1_prog), + "pthread_create thread1")) + goto out; + if (!ASSERT_OK(pthread_create(&thrds[1], NULL, timer_lockup_thread, + &timer2_prog), + "pthread_create thread2")) { + pthread_exit(&thrds[0]); + goto out; + } + + pthread_join(thrds[1], &ret); + pthread_join(thrds[0], &ret); + + if (skip) { + test__skip(); + goto out; + } + + if (*timer1_err != -EDEADLK && *timer1_err != 0) + ASSERT_FAIL("timer1_err bad value"); + if (*timer2_err != -EDEADLK && *timer2_err != 0) + ASSERT_FAIL("timer2_err bad value"); +out: + timer_lockup__destroy(skel); + return; +} diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 1c9c4ec1be11..98ef39efa77e 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -53,6 +53,7 @@ #include "verifier_movsx.skel.h" #include "verifier_netfilter_ctx.skel.h" #include "verifier_netfilter_retcode.skel.h" +#include "verifier_or_jmp32_k.skel.h" #include "verifier_precision.skel.h" #include "verifier_prevent_map_lookup.skel.h" #include "verifier_raw_stack.skel.h" @@ -170,6 +171,7 @@ void test_verifier_meta_access(void) { RUN(verifier_meta_access); } void test_verifier_movsx(void) { RUN(verifier_movsx); } void test_verifier_netfilter_ctx(void) { RUN(verifier_netfilter_ctx); } void test_verifier_netfilter_retcode(void) { RUN(verifier_netfilter_retcode); } +void test_verifier_or_jmp32_k(void) { RUN(verifier_or_jmp32_k); } void test_verifier_precision(void) { RUN(verifier_precision); } void test_verifier_prevent_map_lookup(void) { RUN(verifier_prevent_map_lookup); } void test_verifier_raw_stack(void) { RUN(verifier_raw_stack); } diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c new file mode 100644 index 000000000000..350513c0e4c9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); +} ringbuf SEC(".maps"); + +/* inputs */ +int pid = 0; + +/* outputs */ +long passed = 0; +long discarded = 0; + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int test_ringbuf_write(void *ctx) +{ + int *foo, cur_pid = bpf_get_current_pid_tgid() >> 32; + void *sample1, *sample2; + + if (cur_pid != pid) + return 0; + + sample1 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0); + if (!sample1) + return 0; + /* first one can pass */ + sample2 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0); + if (!sample2) { + bpf_ringbuf_discard(sample1, 0); + __sync_fetch_and_add(&discarded, 1); + return 0; + } + /* second one must not */ + __sync_fetch_and_add(&passed, 1); + foo = sample2 + 4084; + *foo = 256; + bpf_ringbuf_discard(sample1, 0); + bpf_ringbuf_discard(sample2, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/timer_lockup.c b/tools/testing/selftests/bpf/progs/timer_lockup.c new file mode 100644 index 000000000000..3e520133281e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/timer_lockup.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <time.h> +#include <errno.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct elem { + struct bpf_timer t; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} timer1_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} timer2_map SEC(".maps"); + +int timer1_err; +int timer2_err; + +static int timer_cb1(void *map, int *k, struct elem *v) +{ + struct bpf_timer *timer; + int key = 0; + + timer = bpf_map_lookup_elem(&timer2_map, &key); + if (timer) + timer2_err = bpf_timer_cancel(timer); + + return 0; +} + +static int timer_cb2(void *map, int *k, struct elem *v) +{ + struct bpf_timer *timer; + int key = 0; + + timer = bpf_map_lookup_elem(&timer1_map, &key); + if (timer) + timer1_err = bpf_timer_cancel(timer); + + return 0; +} + +SEC("tc") +int timer1_prog(void *ctx) +{ + struct bpf_timer *timer; + int key = 0; + + timer = bpf_map_lookup_elem(&timer1_map, &key); + if (timer) { + bpf_timer_init(timer, &timer1_map, CLOCK_BOOTTIME); + bpf_timer_set_callback(timer, timer_cb1); + bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN); + } + + return 0; +} + +SEC("tc") +int timer2_prog(void *ctx) +{ + struct bpf_timer *timer; + int key = 0; + + timer = bpf_map_lookup_elem(&timer2_map, &key); + if (timer) { + bpf_timer_init(timer, &timer2_map, CLOCK_BOOTTIME); + bpf_timer_set_callback(timer, timer_cb2); + bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN); + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c index bd676d7e615f..80c737b6d340 100644 --- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c +++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c @@ -274,6 +274,58 @@ static __naked void iter_limit_bug_cb(void) ); } +int tmp_var; +SEC("socket") +__failure __msg("infinite loop detected at insn 2") +__naked void jgt_imm64_and_may_goto(void) +{ + asm volatile (" \ + r0 = %[tmp_var] ll; \ +l0_%=: .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short -3; /* off -3 */ \ + .long 0; /* imm */ \ + if r0 > 10 goto l0_%=; \ + r0 = 0; \ + exit; \ +" :: __imm_addr(tmp_var) + : __clobber_all); +} + +SEC("socket") +__failure __msg("infinite loop detected at insn 1") +__naked void may_goto_self(void) +{ + asm volatile (" \ + r0 = *(u32 *)(r10 - 4); \ +l0_%=: .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short -1; /* off -1 */ \ + .long 0; /* imm */ \ + if r0 > 10 goto l0_%=; \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__success __retval(0) +__naked void may_goto_neg_off(void) +{ + asm volatile (" \ + r0 = *(u32 *)(r10 - 4); \ + goto l0_%=; \ + goto l1_%=; \ +l0_%=: .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short -2; /* off -2 */ \ + .long 0; /* imm */ \ + if r0 > 10 goto l0_%=; \ +l1_%=: r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + SEC("tc") __failure __flag(BPF_F_TEST_STATE_FREQ) @@ -307,6 +359,100 @@ int iter_limit_bug(struct __sk_buff *skb) return 0; } +SEC("socket") +__success __retval(0) +__naked void ja_and_may_goto(void) +{ + asm volatile (" \ +l0_%=: .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 1; /* off 1 */ \ + .long 0; /* imm */ \ + goto l0_%=; \ + r0 = 0; \ + exit; \ +" ::: __clobber_common); +} + +SEC("socket") +__success __retval(0) +__naked void ja_and_may_goto2(void) +{ + asm volatile (" \ +l0_%=: r0 = 0; \ + .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 1; /* off 1 */ \ + .long 0; /* imm */ \ + goto l0_%=; \ + r0 = 0; \ + exit; \ +" ::: __clobber_common); +} + +SEC("socket") +__success __retval(0) +__naked void jlt_and_may_goto(void) +{ + asm volatile (" \ +l0_%=: call %[bpf_jiffies64]; \ + .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 1; /* off 1 */ \ + .long 0; /* imm */ \ + if r0 < 10 goto l0_%=; \ + r0 = 0; \ + exit; \ +" :: __imm(bpf_jiffies64) + : __clobber_all); +} + +#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ + defined(__TARGET_ARCH_loongarch)) && \ + __clang_major__ >= 18 +SEC("socket") +__success __retval(0) +__naked void gotol_and_may_goto(void) +{ + asm volatile (" \ +l0_%=: r0 = 0; \ + .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 1; /* off 1 */ \ + .long 0; /* imm */ \ + gotol l0_%=; \ + r0 = 0; \ + exit; \ +" ::: __clobber_common); +} +#endif + +SEC("socket") +__success __retval(0) +__naked void ja_and_may_goto_subprog(void) +{ + asm volatile (" \ + call subprog_with_may_goto; \ + exit; \ +" ::: __clobber_all); +} + +static __naked __noinline __used +void subprog_with_may_goto(void) +{ + asm volatile (" \ +l0_%=: .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 1; /* off 1 */ \ + .long 0; /* imm */ \ + goto l0_%=; \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + #define ARR_SZ 1000000 int zero; char arr[ARR_SZ]; diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c index cbb9d6714f53..028ec855587b 100644 --- a/tools/testing/selftests/bpf/progs/verifier_movsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c @@ -224,6 +224,69 @@ l0_%=: \ : __clobber_all); } +SEC("socket") +__description("MOV32SX, S8, var_off u32_max") +__failure __msg("infinite loop detected") +__failure_unpriv __msg_unpriv("back-edge from insn 2 to 0") +__naked void mov64sx_s32_varoff_1(void) +{ + asm volatile (" \ +l0_%=: \ + r3 = *(u8 *)(r10 -387); \ + w7 = (s8)w3; \ + if w7 >= 0x2533823b goto l0_%=; \ + w0 = 0; \ + exit; \ +" : + : + : __clobber_all); +} + +SEC("socket") +__description("MOV32SX, S8, var_off not u32_max, positive after s8 extension") +__success __retval(0) +__failure_unpriv __msg_unpriv("frame pointer is read only") +__naked void mov64sx_s32_varoff_2(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r3 = r0; \ + r3 &= 0xf; \ + w7 = (s8)w3; \ + if w7 s>= 16 goto l0_%=; \ + w0 = 0; \ + exit; \ +l0_%=: \ + r10 = 1; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("MOV32SX, S8, var_off not u32_max, negative after s8 extension") +__success __retval(0) +__failure_unpriv __msg_unpriv("frame pointer is read only") +__naked void mov64sx_s32_varoff_3(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r3 = r0; \ + r3 &= 0xf; \ + r3 |= 0x80; \ + w7 = (s8)w3; \ + if w7 s>= -5 goto l0_%=; \ + w0 = 0; \ + exit; \ +l0_%=: \ + r10 = 1; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + #else SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c b/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c new file mode 100644 index 000000000000..f37713a265ac --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +SEC("socket") +__description("or_jmp32_k: bit ops + branch on unknown value") +__failure +__msg("R0 invalid mem access 'scalar'") +__naked void or_jmp32_k(void) +{ + asm volatile (" \ + r0 = 0xffffffff; \ + r0 /= 1; \ + r1 = 0; \ + w1 = -1; \ + w1 >>= 1; \ + w0 &= w1; \ + w0 |= 2; \ + if w0 != 0x7ffffffd goto l1; \ + r0 = 1; \ + exit; \ +l3: \ + r0 = 5; \ + *(u64*)(r0 - 8) = r0; \ + exit; \ +l2: \ + w0 -= 0xe; \ + if w0 == 1 goto l3; \ + r0 = 4; \ + exit; \ +l1: \ + w0 -= 0x7ffffff0; \ + if w0 s>= 0xe goto l2; \ + r0 = 3; \ + exit; \ +" ::: __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/drivers/net/virtio_net/config b/tools/testing/selftests/drivers/net/virtio_net/config index f35de0542b60..bcf7555eaffe 100644 --- a/tools/testing/selftests/drivers/net/virtio_net/config +++ b/tools/testing/selftests/drivers/net/virtio_net/config @@ -1,2 +1,8 @@ -CONFIG_VIRTIO_NET=y +CONFIG_BPF_SYSCALL=y +CONFIG_CGROUP_BPF=y +CONFIG_IPV6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_VRF=m CONFIG_VIRTIO_DEBUG=y +CONFIG_VIRTIO_NET=y diff --git a/tools/testing/selftests/fchmodat2/Makefile b/tools/testing/selftests/fchmodat2/Makefile index 71ec34bf1501..4373cea79b79 100644 --- a/tools/testing/selftests/fchmodat2/Makefile +++ b/tools/testing/selftests/fchmodat2/Makefile @@ -1,6 +1,15 @@ # SPDX-License-Identifier: GPL-2.0-or-later -CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined -static-libasan $(KHDR_INCLUDES) +CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined $(KHDR_INCLUDES) + +# gcc requires -static-libasan in order to ensure that Address Sanitizer's +# library is the first one loaded. However, clang already statically links the +# Address Sanitizer if -fsanitize is specified. Therefore, simply omit +# -static-libasan for clang builds. +ifeq ($(LLVM),) + CFLAGS += -static-libasan +endif + TEST_GEN_PROGS := fchmodat2_test include ../lib.mk diff --git a/tools/testing/selftests/filesystems/statmount/Makefile b/tools/testing/selftests/filesystems/statmount/Makefile index 07a0d5b545ca..3af3136e35a4 100644 --- a/tools/testing/selftests/filesystems/statmount/Makefile +++ b/tools/testing/selftests/filesystems/statmount/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-or-later CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) -TEST_GEN_PROGS := statmount_test +TEST_GEN_PROGS := statmount_test statmount_test_ns include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/statmount/statmount.h b/tools/testing/selftests/filesystems/statmount/statmount.h new file mode 100644 index 000000000000..f4294bab9d73 --- /dev/null +++ b/tools/testing/selftests/filesystems/statmount/statmount.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __STATMOUNT_H +#define __STATMOUNT_H + +#include <stdint.h> +#include <linux/mount.h> +#include <asm/unistd.h> + +static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask, + struct statmount *buf, size_t bufsize, + unsigned int flags) +{ + struct mnt_id_req req = { + .size = MNT_ID_REQ_SIZE_VER0, + .mnt_id = mnt_id, + .param = mask, + }; + + if (mnt_ns_id) { + req.size = MNT_ID_REQ_SIZE_VER1; + req.mnt_ns_id = mnt_ns_id; + } + + return syscall(__NR_statmount, &req, buf, bufsize, flags); +} + +static ssize_t listmount(uint64_t mnt_id, uint64_t mnt_ns_id, + uint64_t last_mnt_id, uint64_t list[], size_t num, + unsigned int flags) +{ + struct mnt_id_req req = { + .size = MNT_ID_REQ_SIZE_VER0, + .mnt_id = mnt_id, + .param = last_mnt_id, + }; + + if (mnt_ns_id) { + req.size = MNT_ID_REQ_SIZE_VER1; + req.mnt_ns_id = mnt_ns_id; + } + + return syscall(__NR_listmount, &req, list, num, flags); +} + +#endif /* __STATMOUNT_H */ diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c index e6d7c4f1c85b..c773334bbcc9 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount_test.c +++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c @@ -4,17 +4,15 @@ #include <assert.h> #include <stddef.h> -#include <stdint.h> #include <sched.h> #include <fcntl.h> #include <sys/param.h> #include <sys/mount.h> #include <sys/stat.h> #include <sys/statfs.h> -#include <linux/mount.h> #include <linux/stat.h> -#include <asm/unistd.h> +#include "statmount.h" #include "../../kselftest.h" static const char *const known_fs[] = { @@ -36,18 +34,6 @@ static const char *const known_fs[] = { "ufs", "v7", "vboxsf", "vfat", "virtiofs", "vxfs", "xenfs", "xfs", "zonefs", NULL }; -static int statmount(uint64_t mnt_id, uint64_t mask, struct statmount *buf, - size_t bufsize, unsigned int flags) -{ - struct mnt_id_req req = { - .size = MNT_ID_REQ_SIZE_VER0, - .mnt_id = mnt_id, - .param = mask, - }; - - return syscall(__NR_statmount, &req, buf, bufsize, flags); -} - static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigned int flags) { size_t bufsize = 1 << 15; @@ -56,7 +42,7 @@ static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigne int ret; for (;;) { - ret = statmount(mnt_id, mask, tmp, bufsize, flags); + ret = statmount(mnt_id, 0, mask, tmp, bufsize, flags); if (ret != -1) break; if (tofree) @@ -121,12 +107,20 @@ static char root_mntpoint[] = "/tmp/statmount_test_root.XXXXXX"; static int orig_root; static uint64_t root_id, parent_id; static uint32_t old_root_id, old_parent_id; - +static FILE *f_mountinfo; static void cleanup_namespace(void) { - fchdir(orig_root); - chroot("."); + int ret; + + ret = fchdir(orig_root); + if (ret == -1) + ksft_perror("fchdir to original root"); + + ret = chroot("."); + if (ret == -1) + ksft_perror("chroot to original root"); + umount2(root_mntpoint, MNT_DETACH); rmdir(root_mntpoint); } @@ -138,7 +132,7 @@ static void setup_namespace(void) uid_t uid = getuid(); gid_t gid = getgid(); - ret = unshare(CLONE_NEWNS|CLONE_NEWUSER); + ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID); if (ret == -1) ksft_exit_fail_msg("unsharing mountns and userns: %s\n", strerror(errno)); @@ -149,6 +143,11 @@ static void setup_namespace(void) sprintf(buf, "0 %d 1", gid); write_file("/proc/self/gid_map", buf); + f_mountinfo = fopen("/proc/self/mountinfo", "re"); + if (!f_mountinfo) + ksft_exit_fail_msg("failed to open mountinfo: %s\n", + strerror(errno)); + ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL); if (ret == -1) ksft_exit_fail_msg("making mount tree private: %s\n", @@ -208,25 +207,13 @@ static int setup_mount_tree(int log2_num) return 0; } -static ssize_t listmount(uint64_t mnt_id, uint64_t last_mnt_id, - uint64_t list[], size_t num, unsigned int flags) -{ - struct mnt_id_req req = { - .size = MNT_ID_REQ_SIZE_VER0, - .mnt_id = mnt_id, - .param = last_mnt_id, - }; - - return syscall(__NR_listmount, &req, list, num, flags); -} - static void test_listmount_empty_root(void) { ssize_t res; const unsigned int size = 32; uint64_t list[size]; - res = listmount(LSMT_ROOT, 0, list, size, 0); + res = listmount(LSMT_ROOT, 0, 0, list, size, 0); if (res == -1) { ksft_test_result_fail("listmount: %s\n", strerror(errno)); return; @@ -251,7 +238,7 @@ static void test_statmount_zero_mask(void) struct statmount sm; int ret; - ret = statmount(root_id, 0, &sm, sizeof(sm), 0); + ret = statmount(root_id, 0, 0, &sm, sizeof(sm), 0); if (ret == -1) { ksft_test_result_fail("statmount zero mask: %s\n", strerror(errno)); @@ -277,7 +264,7 @@ static void test_statmount_mnt_basic(void) int ret; uint64_t mask = STATMOUNT_MNT_BASIC; - ret = statmount(root_id, mask, &sm, sizeof(sm), 0); + ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0); if (ret == -1) { ksft_test_result_fail("statmount mnt basic: %s\n", strerror(errno)); @@ -337,7 +324,7 @@ static void test_statmount_sb_basic(void) struct statx sx; struct statfs sf; - ret = statmount(root_id, mask, &sm, sizeof(sm), 0); + ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0); if (ret == -1) { ksft_test_result_fail("statmount sb basic: %s\n", strerror(errno)); @@ -462,6 +449,88 @@ static void test_statmount_fs_type(void) free(sm); } +static void test_statmount_mnt_opts(void) +{ + struct statmount *sm; + const char *statmount_opts; + char *line = NULL; + size_t len = 0; + + sm = statmount_alloc(root_id, STATMOUNT_MNT_BASIC | STATMOUNT_MNT_OPTS, + 0); + if (!sm) { + ksft_test_result_fail("statmount mnt opts: %s\n", + strerror(errno)); + return; + } + + while (getline(&line, &len, f_mountinfo) != -1) { + int i; + char *p, *p2; + unsigned int old_mnt_id; + + old_mnt_id = atoi(line); + if (old_mnt_id != sm->mnt_id_old) + continue; + + for (p = line, i = 0; p && i < 5; i++) + p = strchr(p + 1, ' '); + if (!p) + continue; + + p2 = strchr(p + 1, ' '); + if (!p2) + continue; + *p2 = '\0'; + p = strchr(p2 + 1, '-'); + if (!p) + continue; + for (p++, i = 0; p && i < 2; i++) + p = strchr(p + 1, ' '); + if (!p) + continue; + p++; + + /* skip generic superblock options */ + if (strncmp(p, "ro", 2) == 0) + p += 2; + else if (strncmp(p, "rw", 2) == 0) + p += 2; + if (*p == ',') + p++; + if (strncmp(p, "sync", 4) == 0) + p += 4; + if (*p == ',') + p++; + if (strncmp(p, "dirsync", 7) == 0) + p += 7; + if (*p == ',') + p++; + if (strncmp(p, "lazytime", 8) == 0) + p += 8; + if (*p == ',') + p++; + p2 = strrchr(p, '\n'); + if (p2) + *p2 = '\0'; + + statmount_opts = sm->str + sm->mnt_opts; + if (strcmp(statmount_opts, p) != 0) + ksft_test_result_fail( + "unexpected mount options: '%s' != '%s'\n", + statmount_opts, p); + else + ksft_test_result_pass("statmount mount options\n"); + free(sm); + free(line); + return; + } + + ksft_test_result_fail("didnt't find mount entry\n"); + free(sm); + free(line); +} + static void test_statmount_string(uint64_t mask, size_t off, const char *name) { struct statmount *sm; @@ -498,14 +567,14 @@ static void test_statmount_string(uint64_t mask, size_t off, const char *name) exactsize = sm->size; shortsize = sizeof(*sm) + i; - ret = statmount(root_id, mask, sm, exactsize, 0); + ret = statmount(root_id, 0, mask, sm, exactsize, 0); if (ret == -1) { ksft_test_result_fail("statmount exact size: %s\n", strerror(errno)); goto out; } errno = 0; - ret = statmount(root_id, mask, sm, shortsize, 0); + ret = statmount(root_id, 0, mask, sm, shortsize, 0); if (ret != -1 || errno != EOVERFLOW) { ksft_test_result_fail("should have failed with EOVERFLOW: %s\n", strerror(errno)); @@ -533,7 +602,7 @@ static void test_listmount_tree(void) if (res == -1) return; - num = res = listmount(LSMT_ROOT, 0, list, size, 0); + num = res = listmount(LSMT_ROOT, 0, 0, list, size, 0); if (res == -1) { ksft_test_result_fail("listmount: %s\n", strerror(errno)); return; @@ -545,7 +614,7 @@ static void test_listmount_tree(void) } for (i = 0; i < size - step;) { - res = listmount(LSMT_ROOT, i ? list2[i - 1] : 0, list2 + i, step, 0); + res = listmount(LSMT_ROOT, 0, i ? list2[i - 1] : 0, list2 + i, step, 0); if (res == -1) ksft_test_result_fail("short listmount: %s\n", strerror(errno)); @@ -577,18 +646,18 @@ int main(void) int ret; uint64_t all_mask = STATMOUNT_SB_BASIC | STATMOUNT_MNT_BASIC | STATMOUNT_PROPAGATE_FROM | STATMOUNT_MNT_ROOT | - STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE; + STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE | STATMOUNT_MNT_NS_ID; ksft_print_header(); - ret = statmount(0, 0, NULL, 0, 0); + ret = statmount(0, 0, 0, NULL, 0, 0); assert(ret == -1); if (errno == ENOSYS) ksft_exit_skip("statmount() syscall not supported\n"); setup_namespace(); - ksft_set_plan(14); + ksft_set_plan(15); test_listmount_empty_root(); test_statmount_zero_mask(); test_statmount_mnt_basic(); @@ -596,6 +665,7 @@ int main(void) test_statmount_mnt_root(); test_statmount_mnt_point(); test_statmount_fs_type(); + test_statmount_mnt_opts(); test_statmount_string(STATMOUNT_MNT_ROOT, str_off(mnt_root), "mount root"); test_statmount_string(STATMOUNT_MNT_POINT, str_off(mnt_point), "mount point"); test_statmount_string(STATMOUNT_FS_TYPE, str_off(fs_type), "fs type"); diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c new file mode 100644 index 000000000000..e044f5fc57fd --- /dev/null +++ b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c @@ -0,0 +1,364 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#define _GNU_SOURCE + +#include <assert.h> +#include <fcntl.h> +#include <limits.h> +#include <sched.h> +#include <stdlib.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <linux/nsfs.h> +#include <linux/stat.h> + +#include "statmount.h" +#include "../../kselftest.h" + +#define NSID_PASS 0 +#define NSID_FAIL 1 +#define NSID_SKIP 2 +#define NSID_ERROR 3 + +static void handle_result(int ret, const char *testname) +{ + if (ret == NSID_PASS) + ksft_test_result_pass("%s\n", testname); + else if (ret == NSID_FAIL) + ksft_test_result_fail("%s\n", testname); + else if (ret == NSID_ERROR) + ksft_exit_fail_msg("%s\n", testname); + else + ksft_test_result_skip("%s\n", testname); +} + +static inline int wait_for_pid(pid_t pid) +{ + int status, ret; + +again: + ret = waitpid(pid, &status, 0); + if (ret == -1) { + if (errno == EINTR) + goto again; + + ksft_print_msg("waitpid returned -1, errno=%d\n", errno); + return -1; + } + + if (!WIFEXITED(status)) { + ksft_print_msg( + "waitpid !WIFEXITED, WIFSIGNALED=%d, WTERMSIG=%d\n", + WIFSIGNALED(status), WTERMSIG(status)); + return -1; + } + + ret = WEXITSTATUS(status); + return ret; +} + +static int get_mnt_ns_id(const char *mnt_ns, uint64_t *mnt_ns_id) +{ + int fd = open(mnt_ns, O_RDONLY); + + if (fd < 0) { + ksft_print_msg("failed to open for ns %s: %s\n", + mnt_ns, strerror(errno)); + sleep(60); + return NSID_ERROR; + } + + if (ioctl(fd, NS_GET_MNTNS_ID, mnt_ns_id) < 0) { + ksft_print_msg("failed to get the nsid for ns %s: %s\n", + mnt_ns, strerror(errno)); + return NSID_ERROR; + } + close(fd); + return NSID_PASS; +} + +static int get_mnt_id(const char *path, uint64_t *mnt_id) +{ + struct statx sx; + int ret; + + ret = statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx); + if (ret == -1) { + ksft_print_msg("retrieving unique mount ID for %s: %s\n", path, + strerror(errno)); + return NSID_ERROR; + } + + if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE)) { + ksft_print_msg("no unique mount ID available for %s\n", path); + return NSID_ERROR; + } + + *mnt_id = sx.stx_mnt_id; + return NSID_PASS; +} + +static int write_file(const char *path, const char *val) +{ + int fd = open(path, O_WRONLY); + size_t len = strlen(val); + int ret; + + if (fd == -1) { + ksft_print_msg("opening %s for write: %s\n", path, strerror(errno)); + return NSID_ERROR; + } + + ret = write(fd, val, len); + if (ret == -1) { + ksft_print_msg("writing to %s: %s\n", path, strerror(errno)); + return NSID_ERROR; + } + if (ret != len) { + ksft_print_msg("short write to %s\n", path); + return NSID_ERROR; + } + + ret = close(fd); + if (ret == -1) { + ksft_print_msg("closing %s\n", path); + return NSID_ERROR; + } + + return NSID_PASS; +} + +static int setup_namespace(void) +{ + int ret; + char buf[32]; + uid_t uid = getuid(); + gid_t gid = getgid(); + + ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID); + if (ret == -1) + ksft_exit_fail_msg("unsharing mountns and userns: %s\n", + strerror(errno)); + + sprintf(buf, "0 %d 1", uid); + ret = write_file("/proc/self/uid_map", buf); + if (ret != NSID_PASS) + return ret; + ret = write_file("/proc/self/setgroups", "deny"); + if (ret != NSID_PASS) + return ret; + sprintf(buf, "0 %d 1", gid); + ret = write_file("/proc/self/gid_map", buf); + if (ret != NSID_PASS) + return ret; + + ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL); + if (ret == -1) { + ksft_print_msg("making mount tree private: %s\n", + strerror(errno)); + return NSID_ERROR; + } + + return NSID_PASS; +} + +static int _test_statmount_mnt_ns_id(void) +{ + struct statmount sm; + uint64_t mnt_ns_id; + uint64_t root_id; + int ret; + + ret = get_mnt_ns_id("/proc/self/ns/mnt", &mnt_ns_id); + if (ret != NSID_PASS) + return ret; + + ret = get_mnt_id("/", &root_id); + if (ret != NSID_PASS) + return ret; + + ret = statmount(root_id, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0); + if (ret == -1) { + ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno)); + return NSID_ERROR; + } + + if (sm.size != sizeof(sm)) { + ksft_print_msg("unexpected size: %u != %u\n", sm.size, + (uint32_t)sizeof(sm)); + return NSID_FAIL; + } + if (sm.mask != STATMOUNT_MNT_NS_ID) { + ksft_print_msg("statmount mnt ns id unavailable\n"); + return NSID_SKIP; + } + + if (sm.mnt_ns_id != mnt_ns_id) { + ksft_print_msg("unexpected mnt ns ID: 0x%llx != 0x%llx\n", + (unsigned long long)sm.mnt_ns_id, + (unsigned long long)mnt_ns_id); + return NSID_FAIL; + } + + return NSID_PASS; +} + +static void test_statmount_mnt_ns_id(void) +{ + pid_t pid; + int ret; + + pid = fork(); + if (pid < 0) + ksft_exit_fail_msg("failed to fork: %s\n", strerror(errno)); + + /* We're the original pid, wait for the result. */ + if (pid != 0) { + ret = wait_for_pid(pid); + handle_result(ret, "test statmount ns id"); + return; + } + + ret = setup_namespace(); + if (ret != NSID_PASS) + exit(ret); + ret = _test_statmount_mnt_ns_id(); + exit(ret); +} + +static int validate_external_listmount(pid_t pid, uint64_t child_nr_mounts) +{ + uint64_t list[256]; + uint64_t mnt_ns_id; + uint64_t nr_mounts; + char buf[256]; + int ret; + + /* Get the mount ns id for our child. */ + snprintf(buf, sizeof(buf), "/proc/%lu/ns/mnt", (unsigned long)pid); + ret = get_mnt_ns_id(buf, &mnt_ns_id); + + nr_mounts = listmount(LSMT_ROOT, mnt_ns_id, 0, list, 256, 0); + if (nr_mounts == (uint64_t)-1) { + ksft_print_msg("listmount: %s\n", strerror(errno)); + return NSID_ERROR; + } + + if (nr_mounts != child_nr_mounts) { + ksft_print_msg("listmount results is %zi != %zi\n", nr_mounts, + child_nr_mounts); + return NSID_FAIL; + } + + /* Validate that all of our entries match our mnt_ns_id. */ + for (int i = 0; i < nr_mounts; i++) { + struct statmount sm; + + ret = statmount(list[i], mnt_ns_id, STATMOUNT_MNT_NS_ID, &sm, + sizeof(sm), 0); + if (ret < 0) { + ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno)); + return NSID_ERROR; + } + + if (sm.mask != STATMOUNT_MNT_NS_ID) { + ksft_print_msg("statmount mnt ns id unavailable\n"); + return NSID_SKIP; + } + + if (sm.mnt_ns_id != mnt_ns_id) { + ksft_print_msg("listmount gave us the wrong ns id: 0x%llx != 0x%llx\n", + (unsigned long long)sm.mnt_ns_id, + (unsigned long long)mnt_ns_id); + return NSID_FAIL; + } + } + + return NSID_PASS; +} + +static void test_listmount_ns(void) +{ + uint64_t nr_mounts; + char pval; + int child_ready_pipe[2]; + int parent_ready_pipe[2]; + pid_t pid; + int ret, child_ret; + + if (pipe(child_ready_pipe) < 0) + ksft_exit_fail_msg("failed to create the child pipe: %s\n", + strerror(errno)); + if (pipe(parent_ready_pipe) < 0) + ksft_exit_fail_msg("failed to create the parent pipe: %s\n", + strerror(errno)); + + pid = fork(); + if (pid < 0) + ksft_exit_fail_msg("failed to fork: %s\n", strerror(errno)); + + if (pid == 0) { + char cval; + uint64_t list[256]; + + close(child_ready_pipe[0]); + close(parent_ready_pipe[1]); + + ret = setup_namespace(); + if (ret != NSID_PASS) + exit(ret); + + nr_mounts = listmount(LSMT_ROOT, 0, 0, list, 256, 0); + if (nr_mounts == (uint64_t)-1) { + ksft_print_msg("listmount: %s\n", strerror(errno)); + exit(NSID_FAIL); + } + + /* + * Tell our parent how many mounts we have, and then wait for it + * to tell us we're done. + */ + write(child_ready_pipe[1], &nr_mounts, sizeof(nr_mounts)); + read(parent_ready_pipe[0], &cval, sizeof(cval)); + exit(NSID_PASS); + } + + close(child_ready_pipe[1]); + close(parent_ready_pipe[0]); + + /* Wait until the child has created everything. */ + if (read(child_ready_pipe[0], &nr_mounts, sizeof(nr_mounts)) != + sizeof(nr_mounts)) + ret = NSID_ERROR; + + ret = validate_external_listmount(pid, nr_mounts); + + if (write(parent_ready_pipe[1], &pval, sizeof(pval)) != sizeof(pval)) + ret = NSID_ERROR; + + child_ret = wait_for_pid(pid); + if (child_ret != NSID_PASS) + ret = child_ret; + handle_result(ret, "test listmount ns id"); +} + +int main(void) +{ + int ret; + + ksft_print_header(); + ret = statmount(0, 0, 0, NULL, 0, 0); + assert(ret == -1); + if (errno == ENOSYS) + ksft_exit_skip("statmount() syscall not supported\n"); + + ksft_set_plan(2); + test_statmount_mnt_ns_id(); + test_listmount_ns(); + + if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0) + ksft_exit_fail(); + else + ksft_exit_pass(); +} diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index b634969cbb6f..40723a6a083f 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -66,8 +66,6 @@ #include <sys/wait.h> #include <unistd.h> #include <setjmp.h> -#include <syscall.h> -#include <linux/sched.h> #include "kselftest.h" @@ -82,17 +80,6 @@ # define TH_LOG_ENABLED 1 #endif -/* Wait for the child process to end but without sharing memory mapping. */ -static inline pid_t clone3_vfork(void) -{ - struct clone_args args = { - .flags = CLONE_VFORK, - .exit_signal = SIGCHLD, - }; - - return syscall(__NR_clone3, &args, sizeof(args)); -} - /** * TH_LOG() * @@ -437,7 +424,7 @@ static inline pid_t clone3_vfork(void) } \ if (setjmp(_metadata->env) == 0) { \ /* _metadata and potentially self are shared with all forks. */ \ - child = clone3_vfork(); \ + child = fork(); \ if (child == 0) { \ fixture_name##_setup(_metadata, self, variant->data); \ /* Let setup failure terminate early. */ \ @@ -1016,7 +1003,14 @@ void __wait_for_test(struct __test_metadata *t) .sa_flags = SA_SIGINFO, }; struct sigaction saved_action; - int status; + /* + * Sets status so that WIFEXITED(status) returns true and + * WEXITSTATUS(status) returns KSFT_FAIL. This safe default value + * should never be evaluated because of the waitpid(2) check and + * SIGALRM handling. + */ + int status = KSFT_FAIL << 8; + int child; if (sigaction(SIGALRM, &action, &saved_action)) { t->exit_code = KSFT_FAIL; @@ -1028,7 +1022,15 @@ void __wait_for_test(struct __test_metadata *t) __active_test = t; t->timed_out = false; alarm(t->timeout); - waitpid(t->pid, &status, 0); + child = waitpid(t->pid, &status, 0); + if (child == -1 && errno != EINTR) { + t->exit_code = KSFT_FAIL; + fprintf(TH_LOG_STREAM, + "# %s: Failed to wait for PID %d (errno: %d)\n", + t->name, t->pid, errno); + return; + } + alarm(0); if (sigaction(SIGALRM, &saved_action, NULL)) { t->exit_code = KSFT_FAIL; @@ -1083,6 +1085,7 @@ void __wait_for_test(struct __test_metadata *t) WTERMSIG(status)); } } else { + t->exit_code = KSFT_FAIL; fprintf(TH_LOG_STREAM, "# %s: Test ended in some other way [%u]\n", t->name, @@ -1218,6 +1221,7 @@ void __run_test(struct __fixture_metadata *f, struct __test_xfail *xfail; char test_name[1024]; const char *diagnostic; + int child; /* reset test struct */ t->exit_code = KSFT_PASS; @@ -1236,15 +1240,16 @@ void __run_test(struct __fixture_metadata *f, fflush(stdout); fflush(stderr); - t->pid = clone3_vfork(); - if (t->pid < 0) { + child = fork(); + if (child < 0) { ksft_print_msg("ERROR SPAWNING TEST CHILD\n"); t->exit_code = KSFT_FAIL; - } else if (t->pid == 0) { + } else if (child == 0) { setpgrp(); t->fn(t, variant); _exit(t->exit_code); } else { + t->pid = child; __wait_for_test(t); } ksft_print_msg(" %4s %s\n", diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 8eb57de0b587..c0c7c1fe93f9 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -277,6 +277,7 @@ struct kvm_x86_cpu_property { #define X86_PROPERTY_MAX_EXT_LEAF KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31) #define X86_PROPERTY_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7) #define X86_PROPERTY_MAX_VIRT_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15) +#define X86_PROPERTY_GUEST_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23) #define X86_PROPERTY_SEV_C_BIT KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5) #define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11) diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c index 14ee17151a59..b5035c63d516 100644 --- a/tools/testing/selftests/kvm/lib/riscv/ucall.c +++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c @@ -9,6 +9,7 @@ #include "kvm_util.h" #include "processor.h" +#include "sbi.h" void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index c664e446136b..594b061aef52 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1247,9 +1247,20 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) { const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ unsigned long ht_gfn, max_gfn, max_pfn; - uint8_t maxphyaddr; + uint8_t maxphyaddr, guest_maxphyaddr; - max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1; + /* + * Use "guest MAXPHYADDR" from KVM if it's available. Guest MAXPHYADDR + * enumerates the max _mappable_ GPA, which can be less than the raw + * MAXPHYADDR, e.g. if MAXPHYADDR=52, KVM is using TDP, and the CPU + * doesn't support 5-level TDP. + */ + guest_maxphyaddr = kvm_cpu_property(X86_PROPERTY_GUEST_MAX_PHY_ADDR); + guest_maxphyaddr = guest_maxphyaddr ?: vm->pa_bits; + TEST_ASSERT(guest_maxphyaddr <= vm->pa_bits, + "Guest MAXPHYADDR should never be greater than raw MAXPHYADDR"); + + max_gfn = (1ULL << (guest_maxphyaddr - vm->page_shift)) - 1; /* Avoid reserved HyperTransport region on AMD processors. */ if (!host_cpu_is_amd) diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c index 823c132069b4..0e0712854953 100644 --- a/tools/testing/selftests/kvm/riscv/ebreak_test.c +++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c @@ -6,6 +6,7 @@ * */ #include "kvm_util.h" +#include "ucall_common.h" #define LABEL_ADDRESS(v) ((uint64_t)&(v)) diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c index 69bb94e6b227..f299cbfd23ca 100644 --- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c +++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c @@ -15,6 +15,7 @@ #include "processor.h" #include "sbi.h" #include "arch_timer.h" +#include "ucall_common.h" /* Maximum counters(firmware + hardware) */ #define RISCV_MAX_PMU_COUNTERS 64 diff --git a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c b/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c index 7a4a61be119b..3fb967f40c6a 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c +++ b/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c @@ -105,11 +105,11 @@ void test_features(uint32_t vm_type, uint64_t supported_features) int i; for (i = 0; i < 64; i++) { - if (!(supported_features & (1u << i))) + if (!(supported_features & BIT_ULL(i))) test_init2_invalid(vm_type, &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) }, "unknown feature"); - else if (KNOWN_FEATURES & (1u << i)) + else if (KNOWN_FEATURES & BIT_ULL(i)) test_init2(vm_type, &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) }); } diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c index 37de82da9be7..b61803e36d1c 100644 --- a/tools/testing/selftests/mm/ksm_functional_tests.c +++ b/tools/testing/selftests/mm/ksm_functional_tests.c @@ -656,12 +656,33 @@ unmap: munmap(map, size); } +static void init_global_file_handles(void) +{ + mem_fd = open("/proc/self/mem", O_RDWR); + if (mem_fd < 0) + ksft_exit_fail_msg("opening /proc/self/mem failed\n"); + ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR); + if (ksm_fd < 0) + ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n"); + ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY); + if (ksm_full_scans_fd < 0) + ksft_exit_skip("open(\"/sys/kernel/mm/ksm/full_scans\") failed\n"); + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + if (pagemap_fd < 0) + ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n"); + proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY); + proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages", + O_RDONLY); + ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR); +} + int main(int argc, char **argv) { unsigned int tests = 8; int err; if (argc > 1 && !strcmp(argv[1], FORK_EXEC_CHILD_PRG_NAME)) { + init_global_file_handles(); exit(test_child_ksm()); } @@ -674,22 +695,7 @@ int main(int argc, char **argv) pagesize = getpagesize(); - mem_fd = open("/proc/self/mem", O_RDWR); - if (mem_fd < 0) - ksft_exit_fail_msg("opening /proc/self/mem failed\n"); - ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR); - if (ksm_fd < 0) - ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n"); - ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY); - if (ksm_full_scans_fd < 0) - ksft_exit_skip("open(\"/sys/kernel/mm/ksm/full_scans\") failed\n"); - pagemap_fd = open("/proc/self/pagemap", O_RDONLY); - if (pagemap_fd < 0) - ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n"); - proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY); - proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages", - O_RDONLY); - ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR); + init_global_file_handles(); test_unmerge(); test_unmerge_zero_pages(); diff --git a/tools/testing/selftests/mm/map_fixed_noreplace.c b/tools/testing/selftests/mm/map_fixed_noreplace.c index b74813fdc951..d53de2486080 100644 --- a/tools/testing/selftests/mm/map_fixed_noreplace.c +++ b/tools/testing/selftests/mm/map_fixed_noreplace.c @@ -67,7 +67,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error: munmap failed!?\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 5*PAGE_SIZE at base\n"); addr = base_addr + page_size; size = 3 * page_size; @@ -76,7 +77,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error: first mmap() failed unexpectedly\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 3*PAGE_SIZE at base+PAGE_SIZE\n"); /* * Exact same mapping again: @@ -93,7 +95,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:1: mmap() succeeded when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 5*PAGE_SIZE at base\n"); /* * Second mapping contained within first: @@ -111,7 +114,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:2: mmap() succeeded when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 2*PAGE_SIZE at base+PAGE_SIZE\n"); /* * Overlap end of existing mapping: @@ -128,7 +132,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:3: mmap() succeeded when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 2*PAGE_SIZE at base+(3*PAGE_SIZE)\n"); /* * Overlap start of existing mapping: @@ -145,7 +150,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:4: mmap() succeeded when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 2*PAGE_SIZE bytes at base\n"); /* * Adjacent to start of existing mapping: @@ -162,7 +168,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:5: mmap() failed when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() PAGE_SIZE at base\n"); /* * Adjacent to end of existing mapping: @@ -179,7 +186,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:6: mmap() failed when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() PAGE_SIZE at base+(4*PAGE_SIZE)\n"); addr = base_addr; size = 5 * page_size; diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 49a56eb5d036..666ab7d9390b 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -43,7 +43,6 @@ tap tcp_fastopen_backup_key tcp_inq tcp_mmap -test_unix_oob timestamping tls toeplitz diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index bd01e4a0be2c..d9393569d03a 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -43,6 +43,8 @@ TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh TEST_PROGS += srv6_end_next_csid_l3vpn_test.sh TEST_PROGS += srv6_end_x_next_csid_l3vpn_test.sh TEST_PROGS += srv6_end_flavors_test.sh +TEST_PROGS += srv6_end_dx4_netfilter_test.sh +TEST_PROGS += srv6_end_dx6_netfilter_test.sh TEST_PROGS += vrf_strict_mode_test.sh TEST_PROGS += arp_ndisc_evict_nocarrier.sh TEST_PROGS += ndisc_unsolicited_na_test.sh diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index 3b83c797650d..50584479540b 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,4 +1,4 @@ CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect scm_pidfd scm_rights +TEST_GEN_PROGS := diag_uid msg_oob scm_pidfd scm_rights unix_connect include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/config b/tools/testing/selftests/net/af_unix/config new file mode 100644 index 000000000000..37368567768c --- /dev/null +++ b/tools/testing/selftests/net/af_unix/config @@ -0,0 +1,3 @@ +CONFIG_UNIX=y +CONFIG_AF_UNIX_OOB=y +CONFIG_UNIX_DIAG=m diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c new file mode 100644 index 000000000000..16d0c172eaeb --- /dev/null +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -0,0 +1,734 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#include <fcntl.h> +#include <string.h> +#include <unistd.h> + +#include <netinet/in.h> +#include <sys/epoll.h> +#include <sys/ioctl.h> +#include <sys/signalfd.h> +#include <sys/socket.h> + +#include "../../kselftest_harness.h" + +#define BUF_SZ 32 + +FIXTURE(msg_oob) +{ + int fd[4]; /* 0: AF_UNIX sender + * 1: AF_UNIX receiver + * 2: TCP sender + * 3: TCP receiver + */ + int signal_fd; + int epoll_fd[2]; /* 0: AF_UNIX receiver + * 1: TCP receiver + */ + bool tcp_compliant; +}; + +FIXTURE_VARIANT(msg_oob) +{ + bool peek; +}; + +FIXTURE_VARIANT_ADD(msg_oob, no_peek) +{ + .peek = false, +}; + +FIXTURE_VARIANT_ADD(msg_oob, peek) +{ + .peek = true +}; + +static void create_unix_socketpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + int ret; + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, self->fd); + ASSERT_EQ(ret, 0); +} + +static void create_tcp_socketpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + struct sockaddr_in addr; + socklen_t addrlen; + int listen_fd; + int ret; + + listen_fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GE(listen_fd, 0); + + ret = listen(listen_fd, -1); + ASSERT_EQ(ret, 0); + + addrlen = sizeof(addr); + ret = getsockname(listen_fd, (struct sockaddr *)&addr, &addrlen); + ASSERT_EQ(ret, 0); + + self->fd[2] = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GE(self->fd[2], 0); + + ret = connect(self->fd[2], (struct sockaddr *)&addr, addrlen); + ASSERT_EQ(ret, 0); + + self->fd[3] = accept(listen_fd, (struct sockaddr *)&addr, &addrlen); + ASSERT_GE(self->fd[3], 0); + + ret = fcntl(self->fd[3], F_SETFL, O_NONBLOCK); + ASSERT_EQ(ret, 0); +} + +static void setup_sigurg(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + struct signalfd_siginfo siginfo; + int pid = getpid(); + sigset_t mask; + int i, ret; + + for (i = 0; i < 2; i++) { + ret = ioctl(self->fd[i * 2 + 1], FIOSETOWN, &pid); + ASSERT_EQ(ret, 0); + } + + ret = sigemptyset(&mask); + ASSERT_EQ(ret, 0); + + ret = sigaddset(&mask, SIGURG); + ASSERT_EQ(ret, 0); + + ret = sigprocmask(SIG_BLOCK, &mask, NULL); + ASSERT_EQ(ret, 0); + + self->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK); + ASSERT_GE(self->signal_fd, 0); + + ret = read(self->signal_fd, &siginfo, sizeof(siginfo)); + ASSERT_EQ(ret, -1); +} + +static void setup_epollpri(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + struct epoll_event event = { + .events = EPOLLPRI, + }; + int i; + + for (i = 0; i < 2; i++) { + int ret; + + self->epoll_fd[i] = epoll_create1(0); + ASSERT_GE(self->epoll_fd[i], 0); + + ret = epoll_ctl(self->epoll_fd[i], EPOLL_CTL_ADD, self->fd[i * 2 + 1], &event); + ASSERT_EQ(ret, 0); + } +} + +static void close_sockets(FIXTURE_DATA(msg_oob) *self) +{ + int i; + + for (i = 0; i < 4; i++) + close(self->fd[i]); +} + +FIXTURE_SETUP(msg_oob) +{ + create_unix_socketpair(_metadata, self); + create_tcp_socketpair(_metadata, self); + + setup_sigurg(_metadata, self); + setup_epollpri(_metadata, self); + + self->tcp_compliant = true; +} + +FIXTURE_TEARDOWN(msg_oob) +{ + close_sockets(self); +} + +static void __epollpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + bool oob_remaining) +{ + struct epoll_event event[2] = {}; + int i, ret[2]; + + for (i = 0; i < 2; i++) + ret[i] = epoll_wait(self->epoll_fd[i], &event[i], 1, 0); + + ASSERT_EQ(ret[0], oob_remaining); + + if (self->tcp_compliant) + ASSERT_EQ(ret[0], ret[1]); + + if (oob_remaining) { + ASSERT_EQ(event[0].events, EPOLLPRI); + + if (self->tcp_compliant) + ASSERT_EQ(event[0].events, event[1].events); + } +} + +static void __sendpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + const void *buf, size_t len, int flags) +{ + int i, ret[2]; + + for (i = 0; i < 2; i++) { + struct signalfd_siginfo siginfo = {}; + int bytes; + + ret[i] = send(self->fd[i * 2], buf, len, flags); + + bytes = read(self->signal_fd, &siginfo, sizeof(siginfo)); + + if (flags & MSG_OOB) { + ASSERT_EQ(bytes, sizeof(siginfo)); + ASSERT_EQ(siginfo.ssi_signo, SIGURG); + + bytes = read(self->signal_fd, &siginfo, sizeof(siginfo)); + } + + ASSERT_EQ(bytes, -1); + } + + ASSERT_EQ(ret[0], len); + ASSERT_EQ(ret[0], ret[1]); +} + +static void __recvpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + const void *expected_buf, int expected_len, + int buf_len, int flags) +{ + int i, ret[2], recv_errno[2], expected_errno = 0; + char recv_buf[2][BUF_SZ] = {}; + bool printed = false; + + ASSERT_GE(BUF_SZ, buf_len); + + errno = 0; + + for (i = 0; i < 2; i++) { + ret[i] = recv(self->fd[i * 2 + 1], recv_buf[i], buf_len, flags); + recv_errno[i] = errno; + } + + if (expected_len < 0) { + expected_errno = -expected_len; + expected_len = -1; + } + + if (ret[0] != expected_len || recv_errno[0] != expected_errno) { + TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); + TH_LOG("Expected:%s", expected_errno ? strerror(expected_errno) : expected_buf); + + ASSERT_EQ(ret[0], expected_len); + ASSERT_EQ(recv_errno[0], expected_errno); + } + + if (ret[0] != ret[1] || recv_errno[0] != recv_errno[1]) { + TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); + TH_LOG("TCP :%s", ret[1] < 0 ? strerror(recv_errno[1]) : recv_buf[1]); + + printed = true; + + if (self->tcp_compliant) { + ASSERT_EQ(ret[0], ret[1]); + ASSERT_EQ(recv_errno[0], recv_errno[1]); + } + } + + if (expected_len >= 0) { + int cmp; + + cmp = strncmp(expected_buf, recv_buf[0], expected_len); + if (cmp) { + TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); + TH_LOG("Expected:%s", expected_errno ? strerror(expected_errno) : expected_buf); + + ASSERT_EQ(cmp, 0); + } + + cmp = strncmp(recv_buf[0], recv_buf[1], expected_len); + if (cmp) { + if (!printed) { + TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); + TH_LOG("TCP :%s", ret[1] < 0 ? strerror(recv_errno[1]) : recv_buf[1]); + } + + if (self->tcp_compliant) + ASSERT_EQ(cmp, 0); + } + } +} + +static void __setinlinepair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + int i, oob_inline = 1; + + for (i = 0; i < 2; i++) { + int ret; + + ret = setsockopt(self->fd[i * 2 + 1], SOL_SOCKET, SO_OOBINLINE, + &oob_inline, sizeof(oob_inline)); + ASSERT_EQ(ret, 0); + } +} + +static void __siocatmarkpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + bool oob_head) +{ + int answ[2] = {}; + int i; + + for (i = 0; i < 2; i++) { + int ret; + + ret = ioctl(self->fd[i * 2 + 1], SIOCATMARK, &answ[i]); + ASSERT_EQ(ret, 0); + } + + ASSERT_EQ(answ[0], oob_head); + + if (self->tcp_compliant) + ASSERT_EQ(answ[0], answ[1]); +} + +#define sendpair(buf, len, flags) \ + __sendpair(_metadata, self, buf, len, flags) + +#define recvpair(expected_buf, expected_len, buf_len, flags) \ + do { \ + if (variant->peek) \ + __recvpair(_metadata, self, \ + expected_buf, expected_len, \ + buf_len, (flags) | MSG_PEEK); \ + __recvpair(_metadata, self, \ + expected_buf, expected_len, buf_len, flags); \ + } while (0) + +#define epollpair(oob_remaining) \ + __epollpair(_metadata, self, oob_remaining) + +#define siocatmarkpair(oob_head) \ + __siocatmarkpair(_metadata, self, oob_head) + +#define setinlinepair() \ + __setinlinepair(_metadata, self) + +#define tcp_incompliant \ + for (self->tcp_compliant = false; \ + self->tcp_compliant == false; \ + self->tcp_compliant = true) + +TEST_F(msg_oob, non_oob) +{ + sendpair("x", 1, 0); + epollpair(false); + siocatmarkpair(false); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, oob) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); +} + +TEST_F(msg_oob, oob_drop) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("", -EAGAIN, 1, 0); /* Drop OOB. */ + epollpair(false); + siocatmarkpair(false); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, oob_ahead) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + recvpair("hell", 4, 4, 0); + epollpair(false); + siocatmarkpair(true); +} + +TEST_F(msg_oob, oob_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 5, 0); /* Break at OOB even with enough buffer. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + recvpair("", -EAGAIN, 1, 0); + siocatmarkpair(false); +} + +TEST_F(msg_oob, oob_ahead_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("world", 5, 0); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + recvpair("hell", 4, 9, 0); /* Break at OOB even after it's recv()ed. */ + epollpair(false); + siocatmarkpair(true); + + recvpair("world", 5, 5, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, oob_break_drop) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("world", 5, 0); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 10, 0); /* Break at OOB even with enough buffer. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("world", 5, 10, 0); /* Drop OOB and recv() the next skb. */ + epollpair(false); + siocatmarkpair(false); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, ex_oob_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("wor", 3, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("ld", 2, 0); + epollpair(true); + siocatmarkpair(false); + + recvpair("hellowo", 7, 10, 0); /* Break at OOB but not at ex-OOB. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("r", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + recvpair("ld", 2, 2, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, ex_oob_drop) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */ + epollpair(true); + + tcp_incompliant { + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); /* TCP drops "y" by passing through it. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, MSG_OOB); /* TCP returns -EINVAL. */ + epollpair(false); + siocatmarkpair(true); + } +} + +TEST_F(msg_oob, ex_oob_drop_2) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */ + epollpair(true); + + tcp_incompliant { + siocatmarkpair(false); + } + + recvpair("y", 1, 1, MSG_OOB); + epollpair(false); + + tcp_incompliant { + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); /* TCP returns -EAGAIN. */ + epollpair(false); + siocatmarkpair(true); + } +} + +TEST_F(msg_oob, ex_oob_ahead_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("wor", 3, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("r", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + sendpair("ld", 2, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + tcp_incompliant { + recvpair("hellowol", 8, 10, 0); /* TCP recv()s "helloworl", why "r" ?? */ + } + + epollpair(true); + siocatmarkpair(true); + + recvpair("d", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); +} + +TEST_F(msg_oob, ex_oob_siocatmark) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + sendpair("world", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */ + epollpair(true); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_oob) +{ + setinlinepair(); + + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_oob_break) +{ + setinlinepair(); + + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 5, 0); /* Break at OOB but not at ex-OOB. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("o", 1, 1, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_oob_ahead_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("world", 5, 0); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + setinlinepair(); + + recvpair("hell", 4, 9, 0); /* Break at OOB even with enough buffer. */ + epollpair(false); + siocatmarkpair(true); + + tcp_incompliant { + recvpair("world", 5, 6, 0); /* TCP recv()s "oworld", ... "o" ??? */ + } + + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_ex_oob_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("wor", 3, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("ld", 2, 0); + epollpair(true); + siocatmarkpair(false); + + setinlinepair(); + + recvpair("hellowo", 7, 10, 0); /* Break at OOB but not at ex-OOB. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("rld", 3, 3, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_ex_oob_no_drop) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + setinlinepair(); + + sendpair("y", 1, MSG_OOB); /* TCP does NOT drops "x" at this moment. */ + epollpair(true); + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_ex_oob_drop) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */ + epollpair(true); + + setinlinepair(); + + tcp_incompliant { + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); /* TCP recv()s "y". */ + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, 0); /* TCP returns -EAGAIN. */ + epollpair(false); + siocatmarkpair(false); + } +} + +TEST_F(msg_oob, inline_ex_oob_siocatmark) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + setinlinepair(); + + sendpair("world", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */ + epollpair(true); + siocatmarkpair(false); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c index 2bfed46e0b19..d66336256580 100644 --- a/tools/testing/selftests/net/af_unix/scm_rights.c +++ b/tools/testing/selftests/net/af_unix/scm_rights.c @@ -14,12 +14,12 @@ FIXTURE(scm_rights) { - int fd[16]; + int fd[32]; }; FIXTURE_VARIANT(scm_rights) { - char name[16]; + char name[32]; int type; int flags; bool test_listener; @@ -172,6 +172,8 @@ static void __create_sockets(struct __test_metadata *_metadata, const FIXTURE_VARIANT(scm_rights) *variant, int n) { + ASSERT_LE(n * 2, sizeof(self->fd) / sizeof(self->fd[0])); + if (variant->test_listener) create_listeners(_metadata, self, n); else @@ -283,4 +285,23 @@ TEST_F(scm_rights, cross_edge) close_sockets(8); } +TEST_F(scm_rights, backtrack_from_scc) +{ + create_sockets(10); + + send_fd(0, 1); + send_fd(0, 4); + send_fd(1, 2); + send_fd(2, 3); + send_fd(3, 1); + + send_fd(5, 6); + send_fd(5, 9); + send_fd(6, 7); + send_fd(7, 8); + send_fd(8, 6); + + close_sockets(10); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c deleted file mode 100644 index a7c51889acd5..000000000000 --- a/tools/testing/selftests/net/af_unix/test_unix_oob.c +++ /dev/null @@ -1,436 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -#include <stdio.h> -#include <stdlib.h> -#include <sys/socket.h> -#include <arpa/inet.h> -#include <unistd.h> -#include <string.h> -#include <fcntl.h> -#include <sys/ioctl.h> -#include <errno.h> -#include <netinet/tcp.h> -#include <sys/un.h> -#include <sys/signal.h> -#include <sys/poll.h> - -static int pipefd[2]; -static int signal_recvd; -static pid_t producer_id; -static char sock_name[32]; - -static void sig_hand(int sn, siginfo_t *si, void *p) -{ - signal_recvd = sn; -} - -static int set_sig_handler(int signal) -{ - struct sigaction sa; - - sa.sa_sigaction = sig_hand; - sigemptyset(&sa.sa_mask); - sa.sa_flags = SA_SIGINFO | SA_RESTART; - - return sigaction(signal, &sa, NULL); -} - -static void set_filemode(int fd, int set) -{ - int flags = fcntl(fd, F_GETFL, 0); - - if (set) - flags &= ~O_NONBLOCK; - else - flags |= O_NONBLOCK; - fcntl(fd, F_SETFL, flags); -} - -static void signal_producer(int fd) -{ - char cmd; - - cmd = 'S'; - write(fd, &cmd, sizeof(cmd)); -} - -static void wait_for_signal(int fd) -{ - char buf[5]; - - read(fd, buf, 5); -} - -static void die(int status) -{ - fflush(NULL); - unlink(sock_name); - kill(producer_id, SIGTERM); - exit(status); -} - -int is_sioctatmark(int fd) -{ - int ans = -1; - - if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) { -#ifdef DEBUG - perror("SIOCATMARK Failed"); -#endif - } - return ans; -} - -void read_oob(int fd, char *c) -{ - - *c = ' '; - if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) { -#ifdef DEBUG - perror("Reading MSG_OOB Failed"); -#endif - } -} - -int read_data(int pfd, char *buf, int size) -{ - int len = 0; - - memset(buf, size, '0'); - len = read(pfd, buf, size); -#ifdef DEBUG - if (len < 0) - perror("read failed"); -#endif - return len; -} - -static void wait_for_data(int pfd, int event) -{ - struct pollfd pfds[1]; - - pfds[0].fd = pfd; - pfds[0].events = event; - poll(pfds, 1, -1); -} - -void producer(struct sockaddr_un *consumer_addr) -{ - int cfd; - char buf[64]; - int i; - - memset(buf, 'x', sizeof(buf)); - cfd = socket(AF_UNIX, SOCK_STREAM, 0); - - wait_for_signal(pipefd[0]); - if (connect(cfd, (struct sockaddr *)consumer_addr, - sizeof(*consumer_addr)) != 0) { - perror("Connect failed"); - kill(0, SIGTERM); - exit(1); - } - - for (i = 0; i < 2; i++) { - /* Test 1: Test for SIGURG and OOB */ - wait_for_signal(pipefd[0]); - memset(buf, 'x', sizeof(buf)); - buf[63] = '@'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - wait_for_signal(pipefd[0]); - - /* Test 2: Test for OOB being overwitten */ - memset(buf, 'x', sizeof(buf)); - buf[63] = '%'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - memset(buf, 'x', sizeof(buf)); - buf[63] = '#'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - wait_for_signal(pipefd[0]); - - /* Test 3: Test for SIOCATMARK */ - memset(buf, 'x', sizeof(buf)); - buf[63] = '@'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - memset(buf, 'x', sizeof(buf)); - buf[63] = '%'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - memset(buf, 'x', sizeof(buf)); - send(cfd, buf, sizeof(buf), 0); - - wait_for_signal(pipefd[0]); - - /* Test 4: Test for 1byte OOB msg */ - memset(buf, 'x', sizeof(buf)); - buf[0] = '@'; - send(cfd, buf, 1, MSG_OOB); - } -} - -int -main(int argc, char **argv) -{ - int lfd, pfd; - struct sockaddr_un consumer_addr, paddr; - socklen_t len = sizeof(consumer_addr); - char buf[1024]; - int on = 0; - char oob; - int atmark; - - lfd = socket(AF_UNIX, SOCK_STREAM, 0); - memset(&consumer_addr, 0, sizeof(consumer_addr)); - consumer_addr.sun_family = AF_UNIX; - sprintf(sock_name, "unix_oob_%d", getpid()); - unlink(sock_name); - strcpy(consumer_addr.sun_path, sock_name); - - if ((bind(lfd, (struct sockaddr *)&consumer_addr, - sizeof(consumer_addr))) != 0) { - perror("socket bind failed"); - exit(1); - } - - pipe(pipefd); - - listen(lfd, 1); - - producer_id = fork(); - if (producer_id == 0) { - producer(&consumer_addr); - exit(0); - } - - set_sig_handler(SIGURG); - signal_producer(pipefd[1]); - - pfd = accept(lfd, (struct sockaddr *) &paddr, &len); - fcntl(pfd, F_SETOWN, getpid()); - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 1: - * veriyf that SIGURG is - * delivered, 63 bytes are - * read, oob is '@', and POLLPRI works. - */ - wait_for_data(pfd, POLLPRI); - read_oob(pfd, &oob); - len = read_data(pfd, buf, 1024); - if (!signal_recvd || len != 63 || oob != '@') { - fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n", - signal_recvd, len, oob); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 2: - * Verify that the first OOB is over written by - * the 2nd one and the first OOB is returned as - * part of the read, and sigurg is received. - */ - wait_for_data(pfd, POLLIN | POLLPRI); - len = 0; - while (len < 70) - len = recv(pfd, buf, 1024, MSG_PEEK); - len = read_data(pfd, buf, 1024); - read_oob(pfd, &oob); - if (!signal_recvd || len != 127 || oob != '#') { - fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n", - signal_recvd, len, oob); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 3: - * verify that 2nd oob over writes - * the first one and read breaks at - * oob boundary returning 127 bytes - * and sigurg is received and atmark - * is set. - * oob is '%' and second read returns - * 64 bytes. - */ - len = 0; - wait_for_data(pfd, POLLIN | POLLPRI); - while (len < 150) - len = recv(pfd, buf, 1024, MSG_PEEK); - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - read_oob(pfd, &oob); - - if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) { - fprintf(stderr, - "Test 3 failed, sigurg %d len %d OOB %c atmark %d\n", - signal_recvd, len, oob, atmark); - die(1); - } - - signal_recvd = 0; - - len = read_data(pfd, buf, 1024); - if (len != 64) { - fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n", - signal_recvd, len, oob); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 4: - * verify that a single byte - * oob message is delivered. - * set non blocking mode and - * check proper error is - * returned and sigurg is - * received and correct - * oob is read. - */ - - set_filemode(pfd, 0); - - wait_for_data(pfd, POLLIN | POLLPRI); - len = read_data(pfd, buf, 1024); - if ((len == -1) && (errno == 11)) - len = 0; - - read_oob(pfd, &oob); - - if (!signal_recvd || len != 0 || oob != '@') { - fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n", - signal_recvd, len, oob); - die(1); - } - - set_filemode(pfd, 1); - - /* Inline Testing */ - - on = 1; - if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) { - perror("SO_OOBINLINE"); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 1 -- Inline: - * Check that SIGURG is - * delivered and 63 bytes are - * read and oob is '@' - */ - - wait_for_data(pfd, POLLIN | POLLPRI); - len = read_data(pfd, buf, 1024); - - if (!signal_recvd || len != 63) { - fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n", - signal_recvd, len); - die(1); - } - - len = read_data(pfd, buf, 1024); - - if (len != 1) { - fprintf(stderr, - "Test 1.1 Inline failed, sigurg %d len %d oob %c\n", - signal_recvd, len, oob); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 2 -- Inline: - * Verify that the first OOB is over written by - * the 2nd one and read breaks correctly on - * 2nd OOB boundary with the first OOB returned as - * part of the read, and sigurg is delivered and - * siocatmark returns true. - * next read returns one byte, the oob byte - * and siocatmark returns false. - */ - len = 0; - wait_for_data(pfd, POLLIN | POLLPRI); - while (len < 70) - len = recv(pfd, buf, 1024, MSG_PEEK); - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - if (len != 127 || atmark != 1 || !signal_recvd) { - fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n", - len, atmark); - die(1); - } - - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - if (len != 1 || buf[0] != '#' || atmark == 1) { - fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n", - len, buf[0], atmark); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 3 -- Inline: - * verify that 2nd oob over writes - * the first one and read breaks at - * oob boundary returning 127 bytes - * and sigurg is received and siocatmark - * is true after the read. - * subsequent read returns 65 bytes - * because of oob which should be '%'. - */ - len = 0; - wait_for_data(pfd, POLLIN | POLLPRI); - while (len < 126) - len = recv(pfd, buf, 1024, MSG_PEEK); - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - if (!signal_recvd || len != 127 || !atmark) { - fprintf(stderr, - "Test 3 Inline failed, sigurg %d len %d data %c\n", - signal_recvd, len, buf[0]); - die(1); - } - - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - if (len != 65 || buf[0] != '%' || atmark != 0) { - fprintf(stderr, - "Test 3.1 Inline failed, len %d oob %c atmark %d\n", - len, buf[0], atmark); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 4 -- Inline: - * verify that a single - * byte oob message is delivered - * and read returns one byte, the oob - * byte and sigurg is received - */ - wait_for_data(pfd, POLLIN | POLLPRI); - len = read_data(pfd, buf, 1024); - if (!signal_recvd || len != 1 || buf[0] != '@') { - fprintf(stderr, - "Test 4 Inline failed, signal %d len %d data %c\n", - signal_recvd, len, buf[0]); - die(1); - } - die(0); -} diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 04de7a6ba6f3..d4891f7a2bfa 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -101,3 +101,5 @@ CONFIG_NETFILTER_XT_MATCH_POLICY=m CONFIG_CRYPTO_ARIA=y CONFIG_XFRM_INTERFACE=m CONFIG_XFRM_USER=m +CONFIG_IP_NF_MATCH_RPFILTER=m +CONFIG_IP6_NF_MATCH_RPFILTER=m diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 9e2981f2d7f5..9cb05978269d 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -160,10 +160,12 @@ make_connection() local is_v6=$1 local app_port=$app4_port local connect_addr="10.0.1.1" + local client_addr="10.0.1.2" local listen_addr="0.0.0.0" if [ "$is_v6" = "v6" ] then connect_addr="dead:beef:1::1" + client_addr="dead:beef:1::2" listen_addr="::" app_port=$app6_port else @@ -206,6 +208,7 @@ make_connection() [ "$server_serverside" = 1 ] then test_pass + print_title "Connection info: ${client_addr}:${client_port} -> ${connect_addr}:${app_port}" else test_fail "Expected tokens (c:${client_token} - s:${server_token}) and server (c:${client_serverside} - s:${server_serverside})" mptcp_lib_result_print_all_tap @@ -297,7 +300,7 @@ test_announce() ip netns exec "$ns2"\ ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id $client_addr_id dev\ ns2eth1 - print_test "ADD_ADDR id:${client_addr_id} 10.0.2.2 (ns2) => ns1, reuse port" + print_test "ADD_ADDR id:client 10.0.2.2 (ns2) => ns1, reuse port" sleep 0.5 verify_announce_event $server_evts $ANNOUNCED $server4_token "10.0.2.2" $client_addr_id \ "$client4_port" @@ -306,7 +309,7 @@ test_announce() :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl ann\ dead:beef:2::2 token "$client6_token" id $client_addr_id dev ns2eth1 - print_test "ADD_ADDR6 id:${client_addr_id} dead:beef:2::2 (ns2) => ns1, reuse port" + print_test "ADD_ADDR6 id:client dead:beef:2::2 (ns2) => ns1, reuse port" sleep 0.5 verify_announce_event "$server_evts" "$ANNOUNCED" "$server6_token" "dead:beef:2::2"\ "$client_addr_id" "$client6_port" "v6" @@ -316,7 +319,7 @@ test_announce() client_addr_id=$((client_addr_id+1)) ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\ $client_addr_id dev ns2eth1 port $new4_port - print_test "ADD_ADDR id:${client_addr_id} 10.0.2.2 (ns2) => ns1, new port" + print_test "ADD_ADDR id:client+1 10.0.2.2 (ns2) => ns1, new port" sleep 0.5 verify_announce_event "$server_evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\ "$client_addr_id" "$new4_port" @@ -327,7 +330,7 @@ test_announce() # ADD_ADDR from the server to client machine reusing the subflow port ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ $server_addr_id dev ns1eth2 - print_test "ADD_ADDR id:${server_addr_id} 10.0.2.1 (ns1) => ns2, reuse port" + print_test "ADD_ADDR id:server 10.0.2.1 (ns1) => ns2, reuse port" sleep 0.5 verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ "$server_addr_id" "$app4_port" @@ -336,7 +339,7 @@ test_announce() :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\ $server_addr_id dev ns1eth2 - print_test "ADD_ADDR6 id:${server_addr_id} dead:beef:2::1 (ns1) => ns2, reuse port" + print_test "ADD_ADDR6 id:server dead:beef:2::1 (ns1) => ns2, reuse port" sleep 0.5 verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "dead:beef:2::1"\ "$server_addr_id" "$app6_port" "v6" @@ -346,7 +349,7 @@ test_announce() server_addr_id=$((server_addr_id+1)) ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ $server_addr_id dev ns1eth2 port $new4_port - print_test "ADD_ADDR id:${server_addr_id} 10.0.2.1 (ns1) => ns2, new port" + print_test "ADD_ADDR id:server+1 10.0.2.1 (ns1) => ns2, new port" sleep 0.5 verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ "$server_addr_id" "$new4_port" @@ -380,7 +383,7 @@ test_remove() local invalid_token=$(( client4_token - 1 )) ip netns exec "$ns2" ./pm_nl_ctl rem token $invalid_token id\ $client_addr_id > /dev/null 2>&1 - print_test "RM_ADDR id:${client_addr_id} ns2 => ns1, invalid token" + print_test "RM_ADDR id:client ns2 => ns1, invalid token" local type type=$(mptcp_lib_evts_get_info type "$server_evts") if [ "$type" = "" ] @@ -394,7 +397,7 @@ test_remove() local invalid_id=$(( client_addr_id + 1 )) ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ $invalid_id > /dev/null 2>&1 - print_test "RM_ADDR id:${invalid_id} ns2 => ns1, invalid id" + print_test "RM_ADDR id:client+1 ns2 => ns1, invalid id" type=$(mptcp_lib_evts_get_info type "$server_evts") if [ "$type" = "" ] then @@ -407,7 +410,7 @@ test_remove() :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ $client_addr_id - print_test "RM_ADDR id:${client_addr_id} ns2 => ns1" + print_test "RM_ADDR id:client ns2 => ns1" sleep 0.5 verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id" @@ -416,7 +419,7 @@ test_remove() client_addr_id=$(( client_addr_id - 1 )) ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ $client_addr_id - print_test "RM_ADDR id:${client_addr_id} ns2 => ns1" + print_test "RM_ADDR id:client-1 ns2 => ns1" sleep 0.5 verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id" @@ -424,7 +427,7 @@ test_remove() :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl rem token "$client6_token" id\ $client_addr_id - print_test "RM_ADDR6 id:${client_addr_id} ns2 => ns1" + print_test "RM_ADDR6 id:client-1 ns2 => ns1" sleep 0.5 verify_remove_event "$server_evts" "$REMOVED" "$server6_token" "$client_addr_id" @@ -434,7 +437,7 @@ test_remove() # RM_ADDR from the server to client machine ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\ $server_addr_id - print_test "RM_ADDR id:${server_addr_id} ns1 => ns2" + print_test "RM_ADDR id:server ns1 => ns2" sleep 0.5 verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id" @@ -443,7 +446,7 @@ test_remove() server_addr_id=$(( server_addr_id - 1 )) ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\ $server_addr_id - print_test "RM_ADDR id:${server_addr_id} ns1 => ns2" + print_test "RM_ADDR id:server-1 ns1 => ns2" sleep 0.5 verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id" @@ -451,7 +454,7 @@ test_remove() :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl rem token "$server6_token" id\ $server_addr_id - print_test "RM_ADDR6 id:${server_addr_id} ns1 => ns2" + print_test "RM_ADDR6 id:server-1 ns1 => ns2" sleep 0.5 verify_remove_event "$client_evts" "$REMOVED" "$client6_token" "$server_addr_id" } @@ -479,8 +482,14 @@ verify_subflow_events() local locid local remid local info + local e_dport_txt - info="${e_saddr} (${e_from}) => ${e_daddr}:${e_dport} (${e_to})" + # only display the fixed ports + if [ "${e_dport}" -ge "${app4_port}" ] && [ "${e_dport}" -le "${app6_port}" ]; then + e_dport_txt=":${e_dport}" + fi + + info="${e_saddr} (${e_from}) => ${e_daddr}${e_dport_txt} (${e_to})" if [ "$e_type" = "$SUB_ESTABLISHED" ] then @@ -766,7 +775,7 @@ test_subflows_v4_v6_mix() :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server6_token" id\ $server_addr_id dev ns1eth2 - print_test "ADD_ADDR4 id:${server_addr_id} 10.0.2.1 (ns1) => ns2, reuse port" + print_test "ADD_ADDR4 id:server 10.0.2.1 (ns1) => ns2, reuse port" sleep 0.5 verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "10.0.2.1"\ "$server_addr_id" "$app6_port" @@ -861,7 +870,7 @@ test_listener() local listener_pid=$! sleep 0.5 - print_test "CREATE_LISTENER 10.0.2.2:$client4_port" + print_test "CREATE_LISTENER 10.0.2.2 (client port)" verify_listener_events $client_evts $LISTENER_CREATED $AF_INET 10.0.2.2 $client4_port # ADD_ADDR from client to server machine reusing the subflow port @@ -878,13 +887,14 @@ test_listener() mptcp_lib_kill_wait $listener_pid sleep 0.5 - print_test "CLOSE_LISTENER 10.0.2.2:$client4_port" + print_test "CLOSE_LISTENER 10.0.2.2 (client port)" verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port } print_title "Make connections" make_connection make_connection "v6" +print_title "Will be using address IDs ${client_addr_id} (client) and ${server_addr_id} (server)" test_announce test_remove diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c index bdc03a2097e8..7ea5fb28c93d 100644 --- a/tools/testing/selftests/net/msg_zerocopy.c +++ b/tools/testing/selftests/net/msg_zerocopy.c @@ -85,6 +85,7 @@ static bool cfg_rx; static int cfg_runtime_ms = 4200; static int cfg_verbose; static int cfg_waittime_ms = 500; +static int cfg_notification_limit = 32; static bool cfg_zerocopy; static socklen_t cfg_alen; @@ -95,6 +96,7 @@ static char payload[IP_MAXPACKET]; static long packets, bytes, completions, expected_completions; static int zerocopied = -1; static uint32_t next_completion; +static uint32_t sends_since_notify; static unsigned long gettimeofday_ms(void) { @@ -208,6 +210,7 @@ static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain) error(1, errno, "send"); if (cfg_verbose && ret != len) fprintf(stderr, "send: ret=%u != %u\n", ret, len); + sends_since_notify++; if (len) { packets++; @@ -435,7 +438,7 @@ static bool do_recv_completion(int fd, int domain) /* Detect notification gaps. These should not happen often, if at all. * Gaps can occur due to drops, reordering and retransmissions. */ - if (lo != next_completion) + if (cfg_verbose && lo != next_completion) fprintf(stderr, "gap: %u..%u does not append to %u\n", lo, hi, next_completion); next_completion = hi + 1; @@ -460,6 +463,7 @@ static bool do_recv_completion(int fd, int domain) static void do_recv_completions(int fd, int domain) { while (do_recv_completion(fd, domain)) {} + sends_since_notify = 0; } /* Wait for all remaining completions on the errqueue */ @@ -549,6 +553,9 @@ static void do_tx(int domain, int type, int protocol) else do_sendmsg(fd, &msg, cfg_zerocopy, domain); + if (cfg_zerocopy && sends_since_notify >= cfg_notification_limit) + do_recv_completions(fd, domain); + while (!do_poll(fd, POLLOUT)) { if (cfg_zerocopy) do_recv_completions(fd, domain); @@ -708,7 +715,7 @@ static void parse_opts(int argc, char **argv) cfg_payload_len = max_payload_len; - while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz")) != -1) { + while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) { switch (c) { case '4': if (cfg_family != PF_UNSPEC) @@ -736,6 +743,9 @@ static void parse_opts(int argc, char **argv) if (cfg_ifindex == 0) error(1, errno, "invalid iface: %s", optarg); break; + case 'l': + cfg_notification_limit = strtoul(optarg, NULL, 0); + break; case 'm': cfg_cork_mixed = true; break; diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index 5cae53543849..15bca0708717 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # # OVS kernel module self tests diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 1dd057afd3fb..9f8dec2f6539 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -531,7 +531,7 @@ class ovsactions(nla): for flat_act in parse_flat_map: if parse_starts_block(actstr, flat_act[0], False): actstr = actstr[len(flat_act[0]):] - self["attrs"].append([flat_act[1]]) + self["attrs"].append([flat_act[1], True]) actstr = actstr[strspn(actstr, ", ") :] parsed = True diff --git a/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh b/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh new file mode 100755 index 000000000000..e23210aa547f --- /dev/null +++ b/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh @@ -0,0 +1,335 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Jianguo Wu <wujianguo@chinatelecom.cn> +# +# Mostly copied from tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh. +# +# This script is designed for testing the support of netfilter hooks for +# SRv6 End.DX4 behavior. +# +# Hereafter a network diagram is shown, where one tenants (named 100) offer +# IPv4 L3 VPN services allowing hosts to communicate with each other across +# an IPv6 network. +# +# Routers rt-1 and rt-2 implement IPv4 L3 VPN services leveraging the SRv6 +# architecture. The key components for such VPNs are: a) SRv6 Encap behavior, +# b) SRv6 End.DX4 behavior. +# +# To explain how an IPv4 L3 VPN based on SRv6 works, let us briefly consider an +# example where, within the same domain of tenant 100, the host hs-1 pings +# the host hs-2. +# +# First of all, L2 reachability of the host hs-2 is taken into account by +# the router rt-1 which acts as an arp proxy. +# +# When the host hs-1 sends an IPv4 packet destined to hs-2, the router rt-1 +# receives the packet on the internal veth-t100 interface, rt-1 contains the +# SRv6 Encap route for encapsulating the IPv4 packet in a IPv6 plus the Segment +# Routing Header (SRH) packet. This packet is sent through the (IPv6) core +# network up to the router rt-2 that receives it on veth0 interface. +# +# The rt-2 router uses the 'localsid' routing table to process incoming +# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these +# packets, the SRv6 End.DX4 behavior removes the outer IPv6+SRH headers and +# routs the packet to the specified nexthop. Afterwards, the packet is sent to +# the host hs-2 through the veth-t100 interface. +# +# The ping response follows the same processing but this time the role of rt-1 +# and rt-2 are swapped. +# +# And when net.netfilter.nf_hooks_lwtunnel is set to 1 in rt-1 or rt-2, and a +# rpfilter iptables rule is added, SRv6 packets will go through netfilter PREROUTING +# hooks. +# +# +# +-------------------+ +-------------------+ +# | | | | +# | hs-1 netns | | hs-2 netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +-----------------------------------+ +-----------------------------------+ +# | . | | . | +# | +---------------+ | | +---------------- | +# | | veth-t100 | | | | veth-t100 | | +# | | 10.0.0.11/24 | +----------+ | | +----------+ | 10.0.0.22/24 | | +# | +-------+-------+ | route | | | | route | +-------+-------- | +# | | table | | | | table | | +# | +----------+ | | +----------+ | +# | +--------------+ | | +--------------+ | +# | | veth0 | | | | veth0 | | +# | | 2001:11::1/64 |.|...|.| 2001:11::2/64 | | +# | +--------------+ | | +--------------+ | +# | | | | +# | rt-1 netns | | rt-2 netns | +# | | | | +# +-----------------------------------+ +-----------------------------------+ +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# | Network configuration | +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# rt-1: localsid table +# +----------------------------------------------------------------+ +# |SID |Action | +# +----------------------------------------------------------------+ +# |fc00:21:100::6004|apply SRv6 End.DX4 nh4 10.0.0.1 dev veth-t100 | +# +----------------------------------------------------------------+ +# +# rt-1: route table +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |10.0.0.2 |apply seg6 encap segs fc00:12:100::6004| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth_t100 | +# +---------------------------------------------------+ +# +# +# rt-2: localsid table +# +---------------------------------------------------------------+ +# |SID |Action | +# +---------------------------------------------------------------+ +# |fc00:12:100::6004|apply SRv6 End.DX4 nh4 10.0.0.2 dev veth-t100| +# +---------------------------------------------------------------+ +# +# rt-2: route table +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |10.0.0.1 |apply seg6 encap segs fc00:21:100::6004| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth_t100 | +# +---------------------------------------------------+ +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +readonly IPv6_RT_NETWORK=2001:11 +readonly IPv4_HS_NETWORK=10.0.0 +readonly SID_LOCATOR=fc00 + +PING_TIMEOUT_SEC=4 + +ret=0 + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +cleanup() +{ + ip link del veth-rt-1 2>/dev/null || true + ip link del veth-rt-2 2>/dev/null || true + + # destroy routers rt-* and hosts hs-* + for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do + ip netns del ${ns} || true + done +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns add ${nsname} + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip link set veth-rt-${rt} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${rt} name veth0 + + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad + ip -netns ${nsname} link set veth0 up + ip -netns ${nsname} link set lo up + + ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1 +} + +setup_rt_netfilter() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns exec ${nsname} sysctl -wq net.netfilter.nf_hooks_lwtunnel=1 + ip netns exec ${nsname} iptables -t raw -A PREROUTING -m rpfilter --invert -j DROP +} + +setup_hs() +{ + local hs=$1 + local rt=$2 + local tid=$3 + local hsname=hs-${hs} + local rtname=rt-${rt} + local rtveth=veth-t${tid} + + # set the networking for the host + ip netns add ${hsname} + + ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} + ip -netns ${hsname} link set ${rtveth} netns ${rtname} + ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0 + ip -netns ${hsname} link set veth0 up + ip -netns ${hsname} link set lo up + + ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.${rt}${hs}/24 dev ${rtveth} + ip -netns ${rtname} link set ${rtveth} up + + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1 +} + +setup_vpn_config() +{ + local hssrc=$1 + local rtsrc=$2 + local hsdst=$3 + local rtdst=$4 + local tid=$5 + + local hssrc_name=hs-t${tid}-${hssrc} + local hsdst_name=hs-t${tid}-${hsdst} + local rtsrc_name=rt-${rtsrc} + local rtdst_name=rt-${rtdst} + local vpn_sid=${SID_LOCATOR}:${hssrc}${hsdst}:${tid}::6004 + + # set the encap route for encapsulating packets which arrive from the + # host hssrc and destined to the access router rtsrc. + ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 \ + encap seg6 mode encap segs ${vpn_sid} dev veth0 + ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 \ + via 2001:11::${rtdst} dev veth0 + + # set the decap route for decapsulating packets which arrive from + # the rtdst router and destined to the hsdst host. + ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 \ + encap seg6local action End.DX4 nh4 ${IPv4_HS_NETWORK}.${hsdst} dev veth-t${tid} +} + +setup() +{ + ip link add veth-rt-1 type veth peer name veth-rt-2 + # setup the networking for router rt-1 and router rt-2 + setup_rt_networking 1 + setup_rt_networking 2 + + # setup two hosts for the tenant 100. + # - host hs-1 is directly connected to the router rt-1; + # - host hs-2 is directly connected to the router rt-2. + setup_hs 1 1 100 + setup_hs 2 2 100 + + # setup the IPv4 L3 VPN which connects the host hs-1 and host hs-2. + setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant + setup_vpn_config 2 2 1 1 100 +} + +check_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + ip netns exec hs-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1 +} + +check_and_log_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + check_hs_connectivity ${hssrc} ${hsdst} ${tid} + log_test $? 0 "Hosts connectivity: hs-${hssrc} -> hs-${hsdst} (tenant ${tid})" +} + +host_tests() +{ + log_section "SRv6 VPN connectivity test among hosts in the same tenant" + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 +} + +router_netfilter_tests() +{ + log_section "SRv6 VPN connectivity test with netfilter enabled in routers" + setup_rt_netfilter 1 + setup_rt_netfilter 2 + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +cleanup &>/dev/null + +setup + +host_tests +router_netfilter_tests + +print_log_test_results + +cleanup &>/dev/null + +exit ${ret} diff --git a/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh b/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh new file mode 100755 index 000000000000..9e69a2ed5bc3 --- /dev/null +++ b/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh @@ -0,0 +1,340 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Jianguo Wu <wujianguo@chinatelecom.cn> +# +# Mostly copied from tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh. +# +# This script is designed for testing the support of netfilter hooks for +# SRv6 End.DX4 behavior. +# +# Hereafter a network diagram is shown, where one tenants (named 100) offer +# IPv6 L3 VPN services allowing hosts to communicate with each other across +# an IPv6 network. +# +# Routers rt-1 and rt-2 implement IPv6 L3 VPN services leveraging the SRv6 +# architecture. The key components for such VPNs are: a) SRv6 Encap behavior, +# b) SRv6 End.DX4 behavior. +# +# To explain how an IPv6 L3 VPN based on SRv6 works, let us briefly consider an +# example where, within the same domain of tenant 100, the host hs-1 pings +# the host hs-2. +# +# First of all, L2 reachability of the host hs-2 is taken into account by +# the router rt-1 which acts as an arp proxy. +# +# When the host hs-1 sends an IPv6 packet destined to hs-2, the router rt-1 +# receives the packet on the internal veth-t100 interface, rt-1 contains the +# SRv6 Encap route for encapsulating the IPv6 packet in a IPv6 plus the Segment +# Routing Header (SRH) packet. This packet is sent through the (IPv6) core +# network up to the router rt-2 that receives it on veth0 interface. +# +# The rt-2 router uses the 'localsid' routing table to process incoming +# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these +# packets, the SRv6 End.DX4 behavior removes the outer IPv6+SRH headers and +# routs the packet to the specified nexthop. Afterwards, the packet is sent to +# the host hs-2 through the veth-t100 interface. +# +# The ping response follows the same processing but this time the role of rt-1 +# and rt-2 are swapped. +# +# And when net.netfilter.nf_hooks_lwtunnel is set to 1 in rt-1 or rt-2, and a +# rpfilter iptables rule is added, SRv6 packets will go through netfilter PREROUTING +# hooks. +# +# +# +-------------------+ +-------------------+ +# | | | | +# | hs-1 netns | | hs-2 netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | cafe::1/64 | | | | cafe::2/64 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +-----------------------------------+ +-----------------------------------+ +# | . | | . | +# | +---------------+ | | +---------------- | +# | | veth-t100 | | | | veth-t100 | | +# | | cafe::11/64 | +----------+ | | +----------+ | cafe::22/64 | | +# | +-------+-------+ | route | | | | route | +-------+-------- | +# | | table | | | | table | | +# | +----------+ | | +----------+ | +# | +--------------+ | | +--------------+ | +# | | veth0 | | | | veth0 | | +# | | 2001:11::1/64 |.|...|.| 2001:11::2/64 | | +# | +--------------+ | | +--------------+ | +# | | | | +# | rt-1 netns | | rt-2 netns | +# | | | | +# +-----------------------------------+ +-----------------------------------+ +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# | Network configuration | +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# rt-1: localsid table +# +----------------------------------------------------------------+ +# |SID |Action | +# +----------------------------------------------------------------+ +# |fc00:21:100::6004|apply SRv6 End.DX6 nh6 cafe::1 dev veth-t100 | +# +----------------------------------------------------------------+ +# +# rt-1: route table +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::2 |apply seg6 encap segs fc00:12:100::6004| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth_t100 | +# +---------------------------------------------------+ +# +# +# rt-2: localsid table +# +---------------------------------------------------------------+ +# |SID |Action | +# +---------------------------------------------------------------+ +# |fc00:12:100::6004|apply SRv6 End.DX6 nh6 cafe::2 dev veth-t100 | +# +---------------------------------------------------------------+ +# +# rt-2: route table +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::1 |apply seg6 encap segs fc00:21:100::6004| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth_t100 | +# +---------------------------------------------------+ +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +readonly IPv6_RT_NETWORK=2001:11 +readonly IPv6_HS_NETWORK=cafe +readonly SID_LOCATOR=fc00 + +PING_TIMEOUT_SEC=4 + +ret=0 + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +cleanup() +{ + ip link del veth-rt-1 2>/dev/null || true + ip link del veth-rt-2 2>/dev/null || true + + # destroy routers rt-* and hosts hs-* + for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do + ip netns del ${ns} || true + done +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns add ${nsname} + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip link set veth-rt-${rt} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${rt} name veth0 + + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad + ip -netns ${nsname} link set veth0 up + ip -netns ${nsname} link set lo up + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1 +} + +setup_rt_netfilter() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns exec ${nsname} sysctl -wq net.netfilter.nf_hooks_lwtunnel=1 + ip netns exec ${nsname} ip6tables -t raw -A PREROUTING -m rpfilter --invert -j DROP +} + +setup_hs() +{ + local hs=$1 + local rt=$2 + local tid=$3 + local hsname=hs-${hs} + local rtname=rt-${rt} + local rtveth=veth-t${tid} + + # set the networking for the host + ip netns add ${hsname} + + ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} + ip -netns ${hsname} link set ${rtveth} netns ${rtname} + ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad + ip -netns ${hsname} link set veth0 up + ip -netns ${hsname} link set lo up + + ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::${rt}${hs}/64 dev ${rtveth} + ip -netns ${rtname} link set ${rtveth} up + + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1 +} + +setup_vpn_config() +{ + local hssrc=$1 + local rtsrc=$2 + local hsdst=$3 + local rtdst=$4 + local tid=$5 + + local hssrc_name=hs-t${tid}-${hssrc} + local hsdst_name=hs-t${tid}-${hsdst} + local rtsrc_name=rt-${rtsrc} + local rtdst_name=rt-${rtdst} + local rtveth=veth-t${tid} + local vpn_sid=${SID_LOCATOR}:${hssrc}${hsdst}:${tid}::6004 + + ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth} + + # set the encap route for encapsulating packets which arrive from the + # host hssrc and destined to the access router rtsrc. + ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 \ + encap seg6 mode encap segs ${vpn_sid} dev veth0 + ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 \ + via 2001:11::${rtdst} dev veth0 + + # set the decap route for decapsulating packets which arrive from + # the rtdst router and destined to the hsdst host. + ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 \ + encap seg6local action End.DX6 nh6 ${IPv6_HS_NETWORK}::${hsdst} dev veth-t${tid} +} + +setup() +{ + ip link add veth-rt-1 type veth peer name veth-rt-2 + # setup the networking for router rt-1 and router rt-2 + setup_rt_networking 1 + setup_rt_networking 2 + + # setup two hosts for the tenant 100. + # - host hs-1 is directly connected to the router rt-1; + # - host hs-2 is directly connected to the router rt-2. + setup_hs 1 1 100 + setup_hs 2 2 100 + + # setup the IPv4 L3 VPN which connects the host hs-1 and host hs-2. + setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant + setup_vpn_config 2 2 1 1 100 +} + +check_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + ip netns exec hs-${hssrc} ping -6 -c 1 -W ${PING_TIMEOUT_SEC} \ + ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1 +} + +check_and_log_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + check_hs_connectivity ${hssrc} ${hsdst} ${tid} + log_test $? 0 "Hosts connectivity: hs-${hssrc} -> hs-${hsdst} (tenant ${tid})" +} + +host_tests() +{ + log_section "SRv6 VPN connectivity test among hosts in the same tenant" + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 +} + +router_netfilter_tests() +{ + log_section "SRv6 VPN connectivity test with netfilter enabled in routers" + setup_rt_netfilter 1 + setup_rt_netfilter 2 + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +cleanup &>/dev/null + +setup + +host_tests +router_netfilter_tests + +print_log_test_results + +cleanup &>/dev/null + +exit ${ret} diff --git a/tools/testing/selftests/openat2/Makefile b/tools/testing/selftests/openat2/Makefile index 254d676a2689..185dc76ebb5f 100644 --- a/tools/testing/selftests/openat2/Makefile +++ b/tools/testing/selftests/openat2/Makefile @@ -1,8 +1,18 @@ # SPDX-License-Identifier: GPL-2.0-or-later -CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined -static-libasan +CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined TEST_GEN_PROGS := openat2_test resolve_test rename_attack_test +# gcc requires -static-libasan in order to ensure that Address Sanitizer's +# library is the first one loaded. However, clang already statically links the +# Address Sanitizer if -fsanitize is specified. Therefore, simply omit +# -static-libasan for clang builds. +ifeq ($(LLVM),) + CFLAGS += -static-libasan +endif + +LOCAL_HDRS += helpers.h + include ../lib.mk -$(TEST_GEN_PROGS): helpers.c helpers.h +$(TEST_GEN_PROGS): helpers.c diff --git a/tools/testing/selftests/powerpc/flags.mk b/tools/testing/selftests/powerpc/flags.mk index b909bee3cb2a..abb9e58d95c4 100644 --- a/tools/testing/selftests/powerpc/flags.mk +++ b/tools/testing/selftests/powerpc/flags.mk @@ -5,8 +5,5 @@ GIT_VERSION := $(shell git describe --always --long --dirty || echo "unknown") export GIT_VERSION endif -ifeq ($(CFLAGS),) -CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(selfdir)/powerpc/include $(CFLAGS) +CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(selfdir)/powerpc/include $(USERCFLAGS) export CFLAGS -endif - diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c index c7686fb6641a..55315ed695f4 100644 --- a/tools/testing/selftests/resctrl/cat_test.c +++ b/tools/testing/selftests/resctrl/cat_test.c @@ -291,11 +291,30 @@ static int cat_run_test(const struct resctrl_test *test, const struct user_param return ret; } +static bool arch_supports_noncont_cat(const struct resctrl_test *test) +{ + unsigned int eax, ebx, ecx, edx; + + /* AMD always supports non-contiguous CBM. */ + if (get_vendor() == ARCH_AMD) + return true; + + /* Intel support for non-contiguous CBM needs to be discovered. */ + if (!strcmp(test->resource, "L3")) + __cpuid_count(0x10, 1, eax, ebx, ecx, edx); + else if (!strcmp(test->resource, "L2")) + __cpuid_count(0x10, 2, eax, ebx, ecx, edx); + else + return false; + + return ((ecx >> 3) & 1); +} + static int noncont_cat_run_test(const struct resctrl_test *test, const struct user_params *uparams) { unsigned long full_cache_mask, cont_mask, noncont_mask; - unsigned int eax, ebx, ecx, edx, sparse_masks; + unsigned int sparse_masks; int bit_center, ret; char schemata[64]; @@ -304,15 +323,8 @@ static int noncont_cat_run_test(const struct resctrl_test *test, if (ret) return ret; - if (!strcmp(test->resource, "L3")) - __cpuid_count(0x10, 1, eax, ebx, ecx, edx); - else if (!strcmp(test->resource, "L2")) - __cpuid_count(0x10, 2, eax, ebx, ecx, edx); - else - return -EINVAL; - - if (sparse_masks != ((ecx >> 3) & 1)) { - ksft_print_msg("CPUID output doesn't match 'sparse_masks' file content!\n"); + if (arch_supports_noncont_cat(test) != sparse_masks) { + ksft_print_msg("Hardware and kernel differ on non-contiguous CBM support!\n"); return 1; } diff --git a/tools/testing/selftests/riscv/sigreturn/sigreturn.c b/tools/testing/selftests/riscv/sigreturn/sigreturn.c index 62397d5934f1..ed351a1cb917 100644 --- a/tools/testing/selftests/riscv/sigreturn/sigreturn.c +++ b/tools/testing/selftests/riscv/sigreturn/sigreturn.c @@ -51,7 +51,7 @@ static int vector_sigreturn(int data, void (*handler)(int, siginfo_t *, void *)) asm(".option push \n\ .option arch, +v \n\ - vsetivli x0, 1, e32, ta, ma \n\ + vsetivli x0, 1, e32, m1, ta, ma \n\ vmv.s.x v0, %1 \n\ # Generate SIGSEGV \n\ lw a0, 0(x0) \n\ diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c index b83099160fbc..94886c82ae60 100644 --- a/tools/testing/selftests/seccomp/seccomp_benchmark.c +++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c @@ -194,14 +194,14 @@ int main(int argc, char *argv[]) ksft_set_plan(7); ksft_print_msg("Running on:\n"); - ksft_print_msg(""); + ksft_print_msg("%s", ""); system("uname -a"); ksft_print_msg("Current BPF sysctl settings:\n"); /* Avoid using "sysctl" which may not be installed. */ - ksft_print_msg(""); + ksft_print_msg("%s", ""); system("grep -H . /proc/sys/net/core/bpf_jit_enable"); - ksft_print_msg(""); + ksft_print_msg("%s", ""); system("grep -H . /proc/sys/net/core/bpf_jit_harden"); affinity(); diff --git a/tools/testing/selftests/timens/exec.c b/tools/testing/selftests/timens/exec.c index e40dc5be2f66..d12ff955de0d 100644 --- a/tools/testing/selftests/timens/exec.c +++ b/tools/testing/selftests/timens/exec.c @@ -30,7 +30,7 @@ int main(int argc, char *argv[]) for (i = 0; i < 2; i++) { _gettime(CLOCK_MONOTONIC, &tst, i); - if (abs(tst.tv_sec - now.tv_sec) > 5) + if (labs(tst.tv_sec - now.tv_sec) > 5) return pr_fail("%ld %ld\n", now.tv_sec, tst.tv_sec); } return 0; @@ -50,7 +50,7 @@ int main(int argc, char *argv[]) for (i = 0; i < 2; i++) { _gettime(CLOCK_MONOTONIC, &tst, i); - if (abs(tst.tv_sec - now.tv_sec) > 5) + if (labs(tst.tv_sec - now.tv_sec) > 5) return pr_fail("%ld %ld\n", now.tv_sec, tst.tv_sec); } @@ -70,7 +70,7 @@ int main(int argc, char *argv[]) /* Check that a child process is in the new timens. */ for (i = 0; i < 2; i++) { _gettime(CLOCK_MONOTONIC, &tst, i); - if (abs(tst.tv_sec - now.tv_sec - OFFSET) > 5) + if (labs(tst.tv_sec - now.tv_sec - OFFSET) > 5) return pr_fail("%ld %ld\n", now.tv_sec + OFFSET, tst.tv_sec); } diff --git a/tools/testing/selftests/timens/timer.c b/tools/testing/selftests/timens/timer.c index 5e7f0051bd7b..5b939f59dfa4 100644 --- a/tools/testing/selftests/timens/timer.c +++ b/tools/testing/selftests/timens/timer.c @@ -56,7 +56,7 @@ int run_test(int clockid, struct timespec now) return pr_perror("timerfd_gettime"); elapsed = new_value.it_value.tv_sec; - if (abs(elapsed - 3600) > 60) { + if (llabs(elapsed - 3600) > 60) { ksft_test_result_fail("clockid: %d elapsed: %lld\n", clockid, elapsed); return 1; diff --git a/tools/testing/selftests/timens/timerfd.c b/tools/testing/selftests/timens/timerfd.c index 9edd43d6b2c1..a4196bbd6e33 100644 --- a/tools/testing/selftests/timens/timerfd.c +++ b/tools/testing/selftests/timens/timerfd.c @@ -61,7 +61,7 @@ int run_test(int clockid, struct timespec now) return pr_perror("timerfd_gettime(%d)", clockid); elapsed = new_value.it_value.tv_sec; - if (abs(elapsed - 3600) > 60) { + if (llabs(elapsed - 3600) > 60) { ksft_test_result_fail("clockid: %d elapsed: %lld\n", clockid, elapsed); return 1; diff --git a/tools/testing/selftests/timens/vfork_exec.c b/tools/testing/selftests/timens/vfork_exec.c index beb7614941fb..5b8907bf451d 100644 --- a/tools/testing/selftests/timens/vfork_exec.c +++ b/tools/testing/selftests/timens/vfork_exec.c @@ -32,7 +32,7 @@ static void *tcheck(void *_args) for (i = 0; i < 2; i++) { _gettime(CLOCK_MONOTONIC, &tst, i); - if (abs(tst.tv_sec - now->tv_sec) > 5) { + if (labs(tst.tv_sec - now->tv_sec) > 5) { pr_fail("%s: in-thread: unexpected value: %ld (%ld)\n", args->tst_name, tst.tv_sec, now->tv_sec); return (void *)1UL; @@ -64,7 +64,7 @@ static int check(char *tst_name, struct timespec *now) for (i = 0; i < 2; i++) { _gettime(CLOCK_MONOTONIC, &tst, i); - if (abs(tst.tv_sec - now->tv_sec) > 5) + if (labs(tst.tv_sec - now->tv_sec) > 5) return pr_fail("%s: unexpected value: %ld (%ld)\n", tst_name, tst.tv_sec, now->tv_sec); } diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index d53a4d8008f9..98d8ba2afa00 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -1,35 +1,30 @@ # SPDX-License-Identifier: GPL-2.0 -include ../lib.mk - uname_M := $(shell uname -m 2>/dev/null || echo not) ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) -TEST_GEN_PROGS := $(OUTPUT)/vdso_test_gettimeofday $(OUTPUT)/vdso_test_getcpu -TEST_GEN_PROGS += $(OUTPUT)/vdso_test_abi -TEST_GEN_PROGS += $(OUTPUT)/vdso_test_clock_getres +TEST_GEN_PROGS := vdso_test_gettimeofday +TEST_GEN_PROGS += vdso_test_getcpu +TEST_GEN_PROGS += vdso_test_abi +TEST_GEN_PROGS += vdso_test_clock_getres ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) -TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86 +TEST_GEN_PROGS += vdso_standalone_test_x86 endif -TEST_GEN_PROGS += $(OUTPUT)/vdso_test_correctness +TEST_GEN_PROGS += vdso_test_correctness CFLAGS := -std=gnu99 -CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector -LDFLAGS_vdso_test_correctness := -ldl + ifeq ($(CONFIG_X86_32),y) LDLIBS += -lgcc_s endif -all: $(TEST_GEN_PROGS) +include ../lib.mk $(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c $(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c $(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c $(OUTPUT)/vdso_test_clock_getres: vdso_test_clock_getres.c + $(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c - $(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \ - vdso_standalone_test_x86.c parse_vdso.c \ - -o $@ +$(OUTPUT)/vdso_standalone_test_x86: CFLAGS +=-nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector + $(OUTPUT)/vdso_test_correctness: vdso_test_correctness.c - $(CC) $(CFLAGS) \ - vdso_test_correctness.c \ - -o $@ \ - $(LDFLAGS_vdso_test_correctness) +$(OUTPUT)/vdso_test_correctness: LDFLAGS += -ldl diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c index 413f75620a35..4ae417372e9e 100644 --- a/tools/testing/selftests/vDSO/parse_vdso.c +++ b/tools/testing/selftests/vDSO/parse_vdso.c @@ -55,14 +55,20 @@ static struct vdso_info ELF(Verdef) *verdef; } vdso_info; -/* Straight from the ELF specification. */ -static unsigned long elf_hash(const unsigned char *name) +/* + * Straight from the ELF specification...and then tweaked slightly, in order to + * avoid a few clang warnings. + */ +static unsigned long elf_hash(const char *name) { unsigned long h = 0, g; - while (*name) + const unsigned char *uch_name = (const unsigned char *)name; + + while (*uch_name) { - h = (h << 4) + *name++; - if (g = h & 0xf0000000) + h = (h << 4) + *uch_name++; + g = h & 0xf0000000; + if (g) h ^= g >> 24; h &= ~g; } diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c index 8a44ff973ee1..27f6fdf11969 100644 --- a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c +++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c @@ -18,7 +18,7 @@ #include "parse_vdso.h" -/* We need a libc functions... */ +/* We need some libc functions... */ int strcmp(const char *a, const char *b) { /* This implementation is buggy: it never returns -1. */ @@ -34,6 +34,20 @@ int strcmp(const char *a, const char *b) return 0; } +/* + * The clang build needs this, although gcc does not. + * Stolen from lib/string.c. + */ +void *memcpy(void *dest, const void *src, size_t count) +{ + char *tmp = dest; + const char *s = src; + + while (count--) + *tmp++ = *s++; + return dest; +} + /* ...and two syscalls. This is x86-specific. */ static inline long x86_syscall3(long nr, long a0, long a1, long a2) { @@ -70,7 +84,7 @@ void to_base10(char *lastdig, time_t n) } } -__attribute__((externally_visible)) void c_main(void **stack) +void c_main(void **stack) { /* Parse the stack */ long argc = (long)*stack; diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile index e95bd56b332f..35856b11c143 100644 --- a/tools/testing/selftests/wireguard/qemu/Makefile +++ b/tools/testing/selftests/wireguard/qemu/Makefile @@ -109,9 +109,9 @@ KERNEL_ARCH := x86_64 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage QEMU_VPORT_RESULT := virtio-serial-device ifeq ($(HOST_ARCH),$(ARCH)) -QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi +QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off,acpi=off else -QEMU_MACHINE := -cpu max -machine microvm -no-acpi +QEMU_MACHINE := -cpu max -machine microvm,acpi=off endif else ifeq ($(ARCH),i686) CHOST := i686-linux-musl @@ -120,9 +120,9 @@ KERNEL_ARCH := x86 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage QEMU_VPORT_RESULT := virtio-serial-device ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH)) -QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi +QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off,acpi=off else -QEMU_MACHINE := -cpu coreduo -machine microvm -no-acpi +QEMU_MACHINE := -cpu coreduo -machine microvm,acpi=off endif else ifeq ($(ARCH),mips64) CHOST := mips64-linux-musl diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c index 86d267db87bb..7bc74969a819 100644 --- a/virt/kvm/dirty_ring.c +++ b/virt/kvm/dirty_ring.c @@ -55,6 +55,9 @@ static void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask) struct kvm_memory_slot *memslot; int as_id, id; + if (!mask) + return; + as_id = slot >> 16; id = (u16)slot; diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 0f4e0cf4f158..747fe251e445 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -510,8 +510,10 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, } if (folio_test_hwpoison(folio)) { + folio_unlock(folio); + folio_put(folio); r = -EHWPOISON; - goto out_unlock; + goto out_fput; } page = folio_file_page(folio, index); @@ -522,7 +524,6 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, r = 0; -out_unlock: folio_unlock(folio); out_fput: fput(file); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 14841acb8b95..1192942aef91 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -651,7 +651,7 @@ static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm, range->on_lock(kvm); if (IS_KVM_NULL_FN(range->handler)) - break; + goto mmu_unlock; } r.ret |= range->handler(kvm, &gfn_range); } @@ -660,6 +660,7 @@ static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm, if (range->flush_on_ret && r.ret) kvm_flush_remote_tlbs(kvm); +mmu_unlock: if (r.found_memslot) KVM_MMU_UNLOCK(kvm); @@ -4025,12 +4026,13 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) { struct kvm *kvm = me->kvm; struct kvm_vcpu *vcpu; - int last_boosted_vcpu = me->kvm->last_boosted_vcpu; + int last_boosted_vcpu; unsigned long i; int yielded = 0; int try = 3; int pass; + last_boosted_vcpu = READ_ONCE(kvm->last_boosted_vcpu); kvm_vcpu_set_in_spin_loop(me, true); /* * We boost the priority of a VCPU that is runnable but not @@ -4068,7 +4070,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) yielded = kvm_vcpu_yield_to(vcpu); if (yielded > 0) { - kvm->last_boosted_vcpu = i; + WRITE_ONCE(kvm->last_boosted_vcpu, i); break; } else if (yielded < 0) { try--; @@ -4427,7 +4429,7 @@ static long kvm_vcpu_ioctl(struct file *filp, struct kvm_regs *kvm_regs; r = -ENOMEM; - kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL_ACCOUNT); + kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); if (!kvm_regs) goto out; r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); @@ -4454,8 +4456,7 @@ out_free1: break; } case KVM_GET_SREGS: { - kvm_sregs = kzalloc(sizeof(struct kvm_sregs), - GFP_KERNEL_ACCOUNT); + kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL); r = -ENOMEM; if (!kvm_sregs) goto out; @@ -4547,7 +4548,7 @@ out_free1: break; } case KVM_GET_FPU: { - fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL_ACCOUNT); + fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL); r = -ENOMEM; if (!fpu) goto out; @@ -6210,7 +6211,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) active = kvm_active_vms; mutex_unlock(&kvm_lock); - env = kzalloc(sizeof(*env), GFP_KERNEL_ACCOUNT); + env = kzalloc(sizeof(*env), GFP_KERNEL); if (!env) return; @@ -6226,7 +6227,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) add_uevent_var(env, "PID=%d", kvm->userspace_pid); if (!IS_ERR(kvm->debugfs_dentry)) { - char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT); + char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL); if (p) { tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX); |