diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2021-06-23 12:43:45 +0300 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2021-06-23 14:30:41 +0300 |
commit | c3ab0e28a437c213e5e2c1d890f3891b6952b9ca (patch) | |
tree | 8246e6614acd941b07c0925dea03b97f3ae1f3a7 | |
parent | ba1f82456ba8438a8abc96274d57bfe76d34a4a8 (diff) | |
parent | 51696f39cbee5bb684e7959c0c98b5f54548aa34 (diff) | |
download | linux-c3ab0e28a437c213e5e2c1d890f3891b6952b9ca.tar.xz |
Merge branch 'topic/ppc-kvm' of https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux into HEAD
- Support for the H_RPT_INVALIDATE hypercall
- Conversion of Book3S entry/exit to C
- Bug fixes
331 files changed, 4492 insertions, 2892 deletions
@@ -160,6 +160,7 @@ Jeff Layton <jlayton@kernel.org> <jlayton@primarydata.com> Jeff Layton <jlayton@kernel.org> <jlayton@redhat.com> Jens Axboe <axboe@suse.de> Jens Osterkamp <Jens.Osterkamp@de.ibm.com> +Jernej Skrabec <jernej.skrabec@gmail.com> <jernej.skrabec@siol.net> Jiri Slaby <jirislaby@kernel.org> <jirislaby@gmail.com> Jiri Slaby <jirislaby@kernel.org> <jslaby@novell.com> Jiri Slaby <jirislaby@kernel.org> <jslaby@suse.com> diff --git a/Documentation/ABI/obsolete/sysfs-class-dax b/Documentation/ABI/obsolete/sysfs-class-dax index 0faf1354cd05..5bcce27458e3 100644 --- a/Documentation/ABI/obsolete/sysfs-class-dax +++ b/Documentation/ABI/obsolete/sysfs-class-dax @@ -1,7 +1,7 @@ What: /sys/class/dax/ Date: May, 2016 KernelVersion: v4.7 -Contact: linux-nvdimm@lists.01.org +Contact: nvdimm@lists.linux.dev Description: Device DAX is the device-centric analogue of Filesystem DAX (CONFIG_FS_DAX). It allows memory ranges to be allocated and mapped without need of an intervening file diff --git a/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered b/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered index 0360be39c98e..dae880b1a5d5 100644 --- a/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered +++ b/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered @@ -1,4 +1,4 @@ -This ABI is renamed and moved to a new location /sys/kernel/fadump/registered.¬ +This ABI is renamed and moved to a new location /sys/kernel/fadump/registered. What: /sys/kernel/fadump_registered Date: Feb 2012 diff --git a/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem b/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem index 6ce0b129ab12..ca2396edb5f1 100644 --- a/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem +++ b/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem @@ -1,4 +1,4 @@ -This ABI is renamed and moved to a new location /sys/kernel/fadump/release_mem.¬ +This ABI is renamed and moved to a new location /sys/kernel/fadump/release_mem. What: /sys/kernel/fadump_release_mem Date: Feb 2012 diff --git a/Documentation/ABI/removed/sysfs-bus-nfit b/Documentation/ABI/removed/sysfs-bus-nfit index ae8c1ca53828..277437005def 100644 --- a/Documentation/ABI/removed/sysfs-bus-nfit +++ b/Documentation/ABI/removed/sysfs-bus-nfit @@ -1,7 +1,7 @@ What: /sys/bus/nd/devices/regionX/nfit/ecc_unit_size Date: Aug, 2017 KernelVersion: v4.14 (Removed v4.18) -Contact: linux-nvdimm@lists.01.org +Contact: nvdimm@lists.linux.dev Description: (RO) Size of a write request to a DIMM that will not incur a read-modify-write cycle at the memory controller. diff --git a/Documentation/ABI/testing/sysfs-bus-nfit b/Documentation/ABI/testing/sysfs-bus-nfit index 63ef0b9ecce7..e7282d184a74 100644 --- a/Documentation/ABI/testing/sysfs-bus-nfit +++ b/Documentation/ABI/testing/sysfs-bus-nfit @@ -5,7 +5,7 @@ Interface Table (NFIT)' section in the ACPI specification What: /sys/bus/nd/devices/nmemX/nfit/serial Date: Jun, 2015 KernelVersion: v4.2 -Contact: linux-nvdimm@lists.01.org +Contact: nvdimm@lists.linux.dev Description: (RO) Serial number of the NVDIMM (non-volatile dual in-line memory module), assigned by the module vendor. @@ -14,7 +14,7 @@ Description: What: /sys/bus/nd/devices/nmemX/nfit/handle Date: Apr, 2015 KernelVersion: v4.2 -Contact: linux-nvdimm@lists.01.org +Contact: nvdimm@lists.linux.dev Description: (RO) The address (given by the _ADR object) of the device on its parent bus of the NVDIMM device containing the NVDIMM region. @@ -23,7 +23,7 @@ Description: What: /sys/bus/nd/devices/nmemX/nfit/device Date: Apr, 2015 KernelVersion: v4.1 -Contact: linux-nvdimm@lists.01.org +Contact: nvdimm@lists.linux.dev Description: (RO) Device id for the NVDIMM, assigned by the module vendor. @@ -31,7 +31,7 @@ Description: What: /sys/bus/nd/devices/nmemX/nfit/rev_id Date: Jun, 2015 KernelVersion: v4.2 -Contact: linux-nvdimm@lists.01.org +Contact: nvdimm@lists.linux.dev Description: (RO) Revision of the NVDIMM, assigned by the module vendor. @@ -39,7 +39,7 @@ Description: What: /sys/bus/nd/devices/nmemX/nfit/phys_id Date: Apr, 2015 KernelVersion: v4.2 -Contact: linux-nvdimm@lists.01.org +Contact: nvdimm@lists.linux.dev Description: (RO) Handle (i.e., instance number) for the SMBIOS (system management BIOS) Memory Device structure describing the NVDIMM @@ -49,7 +49,7 @@ Description: What: /sys/bus/nd/devices/nmemX/nfit/flags Date: Jun, 2015 KernelVersion: v4.2 -Contact: linux-nvdimm@lists.01.org +Contact: nvdimm@lists.linux.dev Description: (RO) The flags in the NFIT memory device sub-structure indicate the state of the data on the nvdimm relative to its energy @@ -68,7 +68,7 @@ What: /sys/bus/nd/devices/nmemX/nfit/format1 What: /sys/bus/nd/devices/nmemX/nfit/formats Date: Apr, 2016 KernelVersion: v4.7 -Contact: linux-nvdimm@lists.01.org +Contact: nvdimm@lists.linux.dev Description: (RO) The interface codes indicate support for persistent memory mapped directly into system physical address space and / or a @@ -84,7 +84,7 @@ Description: What: /sys/bus/nd/devices/nmemX/nfit/vendor Date: Apr, 2016 KernelVersion: v4.7 -Contact: linux-nvdimm@lists.01.org +Contact: nvdimm@lists.linux.dev Description: (RO) Vendor id of the NVDIMM. @@ -92,7 +92,7 @@ Description: What: /sys/bus/nd/devices/nmemX/nfit/dsm_mask Date: May, 2016 KernelVersion: v4.7 -Contact: linux-nvdimm@lists.01.org +Contact: nvdimm@lists.linux.dev Description: (RO) The bitmask indicates the supported device specific control functions relative to the NVDIMM command family supported by the @@ -102,7 +102,7 @@ Description: What: /sys/bus/nd/devices/nmemX/nfit/family Date: Apr, 2016 KernelVersion: v4.7 -Contact: linux-nvdimm@lists.01.org +Contact: nvdimm@lists.linux.dev Description: (RO) Displays the NVDIMM family command sets. Values 0, 1, 2 and 3 correspond to NVDIMM_FAMILY_INTEL, @@ -118,7 +118,7 @@ Description: What: /sys/bus/nd/devices/nmemX/nfit/id Date: Apr, 2016 KernelVersion: v4.7 -Contact: linux-nvdimm@lists.01.org +Contact: nvdimm@lists.linux.dev Description: (RO) ACPI specification 6.2 section 5.2.25.9, defines an identifier for an NVDIMM, which refelects the id attribute. @@ -127,7 +127,7 @@ Description: What: /sys/bus/nd/devices/nmemX/nfit/subsystem_vendor Date: Apr, 2016 KernelVersion: v4.7 -Contact: linux-nvdimm@lists.01.org +Contact: nvdimm@lists.linux.dev Description: (RO) Sub-system vendor id of the NVDIMM non-volatile memory subsystem controller. @@ -136,7 +136,7 @@ Description: What: /sys/bus/nd/devices/nmemX/nfit/subsystem_rev_id Date: Apr, 2016 KernelVersion: v4.7 -Contact: linux-nvdimm@lists.01.org +Contact: nvdimm@lists.linux.dev Description: (RO) Sub-system revision id of the NVDIMM non-volatile memory subsystem controller, assigned by the non-volatile memory subsystem @@ -146,7 +146,7 @@ Description: What: /sys/bus/nd/devices/nmemX/nfit/subsystem_device Date: Apr, 2016 KernelVersion: v4.7 -Contact: linux-nvdimm@lists.01.org +Contact: nvdimm@lists.linux.dev Description: (RO) Sub-system device id for the NVDIMM non-volatile memory subsystem controller, assigned by the non-volatile memory @@ -156,7 +156,7 @@ Description: What: /sys/bus/nd/devices/ndbusX/nfit/revision Date: Jun, 2015 KernelVersion: v4.2 -Contact: linux-nvdimm@lists.01.org +Contact: nvdimm@lists.linux.dev Description: (RO) ACPI NFIT table revision number. @@ -164,7 +164,7 @@ Description: What: /sys/bus/nd/devices/ndbusX/nfit/scrub Date: Sep, 2016 KernelVersion: v4.9 -Contact: linux-nvdimm@lists.01.org +Contact: nvdimm@lists.linux.dev Description: (RW) This shows the number of full Address Range Scrubs (ARS) that have been completed since driver load time. Userspace can @@ -177,7 +177,7 @@ Description: What: /sys/bus/nd/devices/ndbusX/nfit/hw_error_scrub Date: Sep, 2016 KernelVersion: v4.9 -Contact: linux-nvdimm@lists.01.org +Contact: nvdimm@lists.linux.dev Description: (RW) Provides a way to toggle the behavior between just adding the address (cache line) where the MCE happened to the poison @@ -196,7 +196,7 @@ Description: What: /sys/bus/nd/devices/ndbusX/nfit/dsm_mask Date: Jun, 2017 KernelVersion: v4.13 -Contact: linux-nvdimm@lists.01.org +Contact: nvdimm@lists.linux.dev Description: (RO) The bitmask indicates the supported bus specific control functions. See the section named 'NVDIMM Root Device _DSMs' in @@ -205,7 +205,7 @@ Description: What: /sys/bus/nd/devices/ndbusX/nfit/firmware_activate_noidle Date: Apr, 2020 KernelVersion: v5.8 -Contact: linux-nvdimm@lists.01.org +Contact: nvdimm@lists.linux.dev Description: (RW) The Intel platform implementation of firmware activate support exposes an option let the platform force idle devices in @@ -225,7 +225,7 @@ Description: What: /sys/bus/nd/devices/regionX/nfit/range_index Date: Jun, 2015 KernelVersion: v4.2 -Contact: linux-nvdimm@lists.01.org +Contact: nvdimm@lists.linux.dev Description: (RO) A unique number provided by the BIOS to identify an address range. Used by NVDIMM Region Mapping Structure to uniquely refer diff --git a/Documentation/ABI/testing/sysfs-bus-papr-pmem b/Documentation/ABI/testing/sysfs-bus-papr-pmem index 8316c33862a0..92e2db0e2d3d 100644 --- a/Documentation/ABI/testing/sysfs-bus-papr-pmem +++ b/Documentation/ABI/testing/sysfs-bus-papr-pmem @@ -1,7 +1,7 @@ What: /sys/bus/nd/devices/nmemX/papr/flags Date: Apr, 2020 KernelVersion: v5.8 -Contact: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, linux-nvdimm@lists.01.org, +Contact: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, nvdimm@lists.linux.dev, Description: (RO) Report flags indicating various states of a papr-pmem NVDIMM device. Each flag maps to a one or @@ -36,7 +36,7 @@ Description: What: /sys/bus/nd/devices/nmemX/papr/perf_stats Date: May, 2020 KernelVersion: v5.9 -Contact: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, linux-nvdimm@lists.01.org, +Contact: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, nvdimm@lists.linux.dev, Description: (RO) Report various performance stats related to papr-scm NVDIMM device. Each stat is reported on a new line with each line diff --git a/Documentation/ABI/testing/sysfs-module b/Documentation/ABI/testing/sysfs-module index a485434d2a0f..88bddf192ceb 100644 --- a/Documentation/ABI/testing/sysfs-module +++ b/Documentation/ABI/testing/sysfs-module @@ -37,13 +37,13 @@ Description: Maximum time allowed for periodic transfers per microframe (μs) What: /sys/module/*/{coresize,initsize} Date: Jan 2012 -KernelVersion:»·3.3 +KernelVersion: 3.3 Contact: Kay Sievers <kay.sievers@vrfy.org> Description: Module size in bytes. What: /sys/module/*/taint Date: Jan 2012 -KernelVersion:»·3.3 +KernelVersion: 3.3 Contact: Kay Sievers <kay.sievers@vrfy.org> Description: Module taint flags: == ===================== diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 1d56a6b73a4e..7ca8df5451d4 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -483,10 +483,11 @@ modprobe ======== The full path to the usermode helper for autoloading kernel modules, -by default "/sbin/modprobe". This binary is executed when the kernel -requests a module. For example, if userspace passes an unknown -filesystem type to mount(), then the kernel will automatically request -the corresponding filesystem module by executing this usermode helper. +by default ``CONFIG_MODPROBE_PATH``, which in turn defaults to +"/sbin/modprobe". This binary is executed when the kernel requests a +module. For example, if userspace passes an unknown filesystem type +to mount(), then the kernel will automatically request the +corresponding filesystem module by executing this usermode helper. This usermode helper should insert the needed module into the kernel. This sysctl only affects module autoloading. It has no effect on the diff --git a/Documentation/block/data-integrity.rst b/Documentation/block/data-integrity.rst index 4f2452a95c43..07a97aa26668 100644 --- a/Documentation/block/data-integrity.rst +++ b/Documentation/block/data-integrity.rst @@ -1,4 +1,4 @@ -============== +============== Data Integrity ============== diff --git a/Documentation/cdrom/cdrom-standard.rst b/Documentation/cdrom/cdrom-standard.rst index 70500b189cc8..5845960ca382 100644 --- a/Documentation/cdrom/cdrom-standard.rst +++ b/Documentation/cdrom/cdrom-standard.rst @@ -146,18 +146,18 @@ with the kernel as a block device by registering the following general *struct file_operations*:: struct file_operations cdrom_fops = { - NULL, /∗ lseek ∗/ - block _read , /∗ read—general block-dev read ∗/ - block _write, /∗ write—general block-dev write ∗/ - NULL, /∗ readdir ∗/ - NULL, /∗ select ∗/ - cdrom_ioctl, /∗ ioctl ∗/ - NULL, /∗ mmap ∗/ - cdrom_open, /∗ open ∗/ - cdrom_release, /∗ release ∗/ - NULL, /∗ fsync ∗/ - NULL, /∗ fasync ∗/ - NULL /∗ revalidate ∗/ + NULL, /* lseek */ + block _read , /* read--general block-dev read */ + block _write, /* write--general block-dev write */ + NULL, /* readdir */ + NULL, /* select */ + cdrom_ioctl, /* ioctl */ + NULL, /* mmap */ + cdrom_open, /* open */ + cdrom_release, /* release */ + NULL, /* fsync */ + NULL, /* fasync */ + NULL /* revalidate */ }; Every active CD-ROM device shares this *struct*. The routines @@ -250,12 +250,12 @@ The drive-specific, minor-like information that is registered with `cdrom.c`, currently contains the following fields:: struct cdrom_device_info { - const struct cdrom_device_ops * ops; /* device operations for this major */ + const struct cdrom_device_ops * ops; /* device operations for this major */ struct list_head list; /* linked list of all device_info */ struct gendisk * disk; /* matching block layer disk */ void * handle; /* driver-dependent data */ - int mask; /* mask of capability: disables them */ + int mask; /* mask of capability: disables them */ int speed; /* maximum speed for reading data */ int capacity; /* number of discs in a jukebox */ @@ -569,7 +569,7 @@ the *CDC_CLOSE_TRAY* bit in *mask*. In the file `cdrom.c` you will encounter many constructions of the type:: - if (cdo->capability & ∼cdi->mask & CDC _⟨capability⟩) ... + if (cdo->capability & ~cdi->mask & CDC _<capability>) ... There is no *ioctl* to set the mask... The reason is that I think it is better to control the **behavior** rather than the diff --git a/Documentation/driver-api/nvdimm/nvdimm.rst b/Documentation/driver-api/nvdimm/nvdimm.rst index ef6d59e0978e..1d8302b89bd4 100644 --- a/Documentation/driver-api/nvdimm/nvdimm.rst +++ b/Documentation/driver-api/nvdimm/nvdimm.rst @@ -4,7 +4,7 @@ LIBNVDIMM: Non-Volatile Devices libnvdimm - kernel / libndctl - userspace helper library -linux-nvdimm@lists.01.org +nvdimm@lists.linux.dev Version 13 diff --git a/Documentation/driver-api/serial/index.rst b/Documentation/driver-api/serial/index.rst index 21351b8c95a4..8f7d7af3b90b 100644 --- a/Documentation/driver-api/serial/index.rst +++ b/Documentation/driver-api/serial/index.rst @@ -19,7 +19,6 @@ Serial drivers moxa-smartio n_gsm - rocket serial-iso7816 serial-rs485 diff --git a/Documentation/driver-api/usb/usb.rst b/Documentation/driver-api/usb/usb.rst index 543e70434da2..820e867af45a 100644 --- a/Documentation/driver-api/usb/usb.rst +++ b/Documentation/driver-api/usb/usb.rst @@ -109,16 +109,19 @@ well as to make sure they aren't relying on some HCD-specific behavior. USB-Standard Types ================== -In ``drivers/usb/common/common.c`` and ``drivers/usb/common/debug.c`` you -will find the USB data types defined in chapter 9 of the USB specification. -These data types are used throughout USB, and in APIs including this host -side API, gadget APIs, usb character devices and debugfs interfaces. +In ``include/uapi/linux/usb/ch9.h`` you will find the USB data types defined +in chapter 9 of the USB specification. These data types are used throughout +USB, and in APIs including this host side API, gadget APIs, usb character +devices and debugfs interfaces. That file is itself included by +``include/linux/usb/ch9.h``, which also contains declarations of a few +utility routines for manipulating these data types; the implementations +are in ``drivers/usb/common/common.c``. .. kernel-doc:: drivers/usb/common/common.c :export: -.. kernel-doc:: drivers/usb/common/debug.c - :export: +In addition, some functions useful for creating debugging output are +defined in ``drivers/usb/common/debug.c``. Host-Side Data Types and Macros =============================== diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst index bf145171c2bf..832839fcf4c3 100644 --- a/Documentation/filesystems/erofs.rst +++ b/Documentation/filesystems/erofs.rst @@ -50,8 +50,8 @@ Here is the main features of EROFS: - Support POSIX.1e ACLs by using xattrs; - - Support transparent file compression as an option: - LZ4 algorithm with 4 KB fixed-sized output compression for high performance. + - Support transparent data compression as an option: + LZ4 algorithm with the fixed-sized output compression for high performance. The following git tree provides the file system user-space tools under development (ex, formatting tool mkfs.erofs): @@ -113,31 +113,31 @@ may not. All metadatas can be now observed in two different spaces (views): :: - |-> aligned with 8B - |-> followed closely - + meta_blkaddr blocks |-> another slot - _____________________________________________________________________ - | ... | inode | xattrs | extents | data inline | ... | inode ... - |________|_______|(optional)|(optional)|__(optional)_|_____|__________ - |-> aligned with the inode slot size - . . - . . - . . - . . - . . - . . - .____________________________________________________|-> aligned with 4B - | xattr_ibody_header | shared xattrs | inline xattrs | - |____________________|_______________|_______________| - |-> 12 bytes <-|->x * 4 bytes<-| . - . . . - . . . - . . . - ._______________________________.______________________. - | id | id | id | id | ... | id | ent | ... | ent| ... | - |____|____|____|____|______|____|_____|_____|____|_____| - |-> aligned with 4B - |-> aligned with 4B + |-> aligned with 8B + |-> followed closely + + meta_blkaddr blocks |-> another slot + _____________________________________________________________________ + | ... | inode | xattrs | extents | data inline | ... | inode ... + |________|_______|(optional)|(optional)|__(optional)_|_____|__________ + |-> aligned with the inode slot size + . . + . . + . . + . . + . . + . . + .____________________________________________________|-> aligned with 4B + | xattr_ibody_header | shared xattrs | inline xattrs | + |____________________|_______________|_______________| + |-> 12 bytes <-|->x * 4 bytes<-| . + . . . + . . . + . . . + ._______________________________.______________________. + | id | id | id | id | ... | id | ent | ... | ent| ... | + |____|____|____|____|______|____|_____|_____|____|_____| + |-> aligned with 4B + |-> aligned with 4B Inode could be 32 or 64 bytes, which can be distinguished from a common field which all inode versions have -- i_format:: @@ -175,13 +175,13 @@ may not. All metadatas can be now observed in two different spaces (views): Each share xattr can also be directly found by the following formula: xattr offset = xattr_blkaddr * block_size + 4 * xattr_id - :: +:: - |-> aligned by 4 bytes - + xattr_blkaddr blocks |-> aligned with 4 bytes - _________________________________________________________________________ - | ... | xattr_entry | xattr data | ... | xattr_entry | xattr data ... - |________|_____________|_____________|_____|______________|_______________ + |-> aligned by 4 bytes + + xattr_blkaddr blocks |-> aligned with 4 bytes + _________________________________________________________________________ + | ... | xattr_entry | xattr data | ... | xattr_entry | xattr data ... + |________|_____________|_____________|_____|______________|_______________ Directories ----------- @@ -193,48 +193,77 @@ algorithm (could refer to the related source code). :: - ___________________________ - / | - / ______________|________________ - / / | nameoff1 | nameoffN-1 - ____________.______________._______________v________________v__________ - | dirent | dirent | ... | dirent | filename | filename | ... | filename | - |___.0___|____1___|_____|___N-1__|____0_____|____1_____|_____|___N-1____| - \ ^ - \ | * could have - \ | trailing '\0' - \________________________| nameoff0 - - Directory block + ___________________________ + / | + / ______________|________________ + / / | nameoff1 | nameoffN-1 + ____________.______________._______________v________________v__________ + | dirent | dirent | ... | dirent | filename | filename | ... | filename | + |___.0___|____1___|_____|___N-1__|____0_____|____1_____|_____|___N-1____| + \ ^ + \ | * could have + \ | trailing '\0' + \________________________| nameoff0 + Directory block Note that apart from the offset of the first filename, nameoff0 also indicates the total number of directory entries in this block since it is no need to introduce another on-disk field at all. -Compression ------------ -Currently, EROFS supports 4KB fixed-sized output transparent file compression, -as illustrated below:: - - |---- Variant-Length Extent ----|-------- VLE --------|----- VLE ----- - clusterofs clusterofs clusterofs - | | | logical data - _________v_______________________________v_____________________v_______________ - ... | . | | . | | . | ... - ____|____.________|_____________|________.____|_____________|__.__________|____ - |-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-| - size size size size size - . . . . - . . . . - . . . . - _______._____________._____________._____________._____________________ - ... | | | | ... physical data - _______|_____________|_____________|_____________|_____________________ - |-> cluster <-|-> cluster <-|-> cluster <-| - size size size - -Currently each on-disk physical cluster can contain 4KB (un)compressed data -at most. For each logical cluster, there is a corresponding on-disk index to -describe its cluster type, physical cluster address, etc. - -See "struct z_erofs_vle_decompressed_index" in erofs_fs.h for more details. +Data compression +---------------- +EROFS implements LZ4 fixed-sized output compression which generates fixed-sized +compressed data blocks from variable-sized input in contrast to other existing +fixed-sized input solutions. Relatively higher compression ratios can be gotten +by using fixed-sized output compression since nowadays popular data compression +algorithms are mostly LZ77-based and such fixed-sized output approach can be +benefited from the historical dictionary (aka. sliding window). + +In details, original (uncompressed) data is turned into several variable-sized +extents and in the meanwhile, compressed into physical clusters (pclusters). +In order to record each variable-sized extent, logical clusters (lclusters) are +introduced as the basic unit of compress indexes to indicate whether a new +extent is generated within the range (HEAD) or not (NONHEAD). Lclusters are now +fixed in block size, as illustrated below:: + + |<- variable-sized extent ->|<- VLE ->| + clusterofs clusterofs clusterofs + | | | + _________v_________________________________v_______________________v________ + ... | . | | . | | . ... + ____|____._________|______________|________.___ _|______________|__.________ + |-> lcluster <-|-> lcluster <-|-> lcluster <-|-> lcluster <-| + (HEAD) (NONHEAD) (HEAD) (NONHEAD) . + . CBLKCNT . . + . . . + . . . + _______._____________________________.______________._________________ + ... | | | | ... + _______|______________|______________|______________|_________________ + |-> big pcluster <-|-> pcluster <-| + +A physical cluster can be seen as a container of physical compressed blocks +which contains compressed data. Previously, only lcluster-sized (4KB) pclusters +were supported. After big pcluster feature is introduced (available since +Linux v5.13), pcluster can be a multiple of lcluster size. + +For each HEAD lcluster, clusterofs is recorded to indicate where a new extent +starts and blkaddr is used to seek the compressed data. For each NONHEAD +lcluster, delta0 and delta1 are available instead of blkaddr to indicate the +distance to its HEAD lcluster and the next HEAD lcluster. A PLAIN lcluster is +also a HEAD lcluster except that its data is uncompressed. See the comments +around "struct z_erofs_vle_decompressed_index" in erofs_fs.h for more details. + +If big pcluster is enabled, pcluster size in lclusters needs to be recorded as +well. Let the delta0 of the first NONHEAD lcluster store the compressed block +count with a special flag as a new called CBLKCNT NONHEAD lcluster. It's easy +to understand its delta0 is constantly 1, as illustrated below:: + + __________________________________________________________ + | HEAD | NONHEAD | NONHEAD | ... | NONHEAD | HEAD | HEAD | + |__:___|_(CBLKCNT)_|_________|_____|_________|__:___|____:_| + |<----- a big pcluster (with CBLKCNT) ------>|<-- -->| + a lcluster-sized pcluster (without CBLKCNT) ^ + +If another HEAD follows a HEAD lcluster, there is no room to record CBLKCNT, +but it's easy to know the size of such pcluster is 1 lcluster as well. diff --git a/Documentation/hwmon/tmp103.rst b/Documentation/hwmon/tmp103.rst index e195a7d14309..b3ef81475cf8 100644 --- a/Documentation/hwmon/tmp103.rst +++ b/Documentation/hwmon/tmp103.rst @@ -21,10 +21,10 @@ Description The TMP103 is a digital output temperature sensor in a four-ball wafer chip-scale package (WCSP). The TMP103 is capable of reading temperatures to a resolution of 1°C. The TMP103 is specified for -operation over a temperature range of –40°C to +125°C. +operation over a temperature range of -40°C to +125°C. Resolution: 8 Bits -Accuracy: ±1°C Typ (–10°C to +100°C) +Accuracy: ±1°C Typ (-10°C to +100°C) The driver provides the common sysfs-interface for temperatures (see Documentation/hwmon/sysfs-interface.rst under Temperatures). diff --git a/Documentation/networking/device_drivers/ethernet/intel/i40e.rst b/Documentation/networking/device_drivers/ethernet/intel/i40e.rst index 8a9b18573688..2d3f6bd969a2 100644 --- a/Documentation/networking/device_drivers/ethernet/intel/i40e.rst +++ b/Documentation/networking/device_drivers/ethernet/intel/i40e.rst @@ -173,7 +173,7 @@ Director rule is added from ethtool (Sideband filter), ATR is turned off by the driver. To re-enable ATR, the sideband can be disabled with the ethtool -K option. For example:: - ethtool –K [adapter] ntuple [off|on] + ethtool -K [adapter] ntuple [off|on] If sideband is re-enabled after ATR is re-enabled, ATR remains enabled until a TCP-IP flow is added. When all TCP-IP sideband rules are deleted, ATR is @@ -688,7 +688,7 @@ shaper bw_rlimit: for each tc, sets minimum and maximum bandwidth rates. Totals must be equal or less than port speed. For example: min_rate 1Gbit 3Gbit: Verify bandwidth limit using network -monitoring tools such as ifstat or sar –n DEV [interval] [number of samples] +monitoring tools such as `ifstat` or `sar -n DEV [interval] [number of samples]` 2. Enable HW TC offload on interface:: diff --git a/Documentation/networking/device_drivers/ethernet/intel/iavf.rst b/Documentation/networking/device_drivers/ethernet/intel/iavf.rst index 52e037b11c97..25330b7b5168 100644 --- a/Documentation/networking/device_drivers/ethernet/intel/iavf.rst +++ b/Documentation/networking/device_drivers/ethernet/intel/iavf.rst @@ -179,7 +179,7 @@ shaper bw_rlimit: for each tc, sets minimum and maximum bandwidth rates. Totals must be equal or less than port speed. For example: min_rate 1Gbit 3Gbit: Verify bandwidth limit using network -monitoring tools such as ifstat or sar –n DEV [interval] [number of samples] +monitoring tools such as ``ifstat`` or ``sar -n DEV [interval] [number of samples]`` NOTE: Setting up channels via ethtool (ethtool -L) is not supported when the diff --git a/Documentation/process/kernel-enforcement-statement.rst b/Documentation/process/kernel-enforcement-statement.rst index e5a1be476047..dc2d813b2e79 100644 --- a/Documentation/process/kernel-enforcement-statement.rst +++ b/Documentation/process/kernel-enforcement-statement.rst @@ -1,4 +1,4 @@ -.. _process_statement_kernel: +.. _process_statement_kernel: Linux Kernel Enforcement Statement ---------------------------------- diff --git a/Documentation/security/tpm/xen-tpmfront.rst b/Documentation/security/tpm/xen-tpmfront.rst index 00d5b1db227d..31c67522f2ad 100644 --- a/Documentation/security/tpm/xen-tpmfront.rst +++ b/Documentation/security/tpm/xen-tpmfront.rst @@ -1,4 +1,4 @@ -============================= +============================= Virtual TPM interface for Xen ============================= diff --git a/Documentation/timers/no_hz.rst b/Documentation/timers/no_hz.rst index c4c70e1aada3..6cadad7c3aad 100644 --- a/Documentation/timers/no_hz.rst +++ b/Documentation/timers/no_hz.rst @@ -1,4 +1,4 @@ -====================================== +====================================== NO_HZ: Reducing Scheduling-Clock Ticks ====================================== diff --git a/Documentation/translations/zh_CN/SecurityBugs b/Documentation/translations/zh_CN/SecurityBugs deleted file mode 100644 index 2d0fffd122ce..000000000000 --- a/Documentation/translations/zh_CN/SecurityBugs +++ /dev/null @@ -1,50 +0,0 @@ -Chinese translated version of Documentation/admin-guide/security-bugs.rst - -If you have any comment or update to the content, please contact the -original document maintainer directly. However, if you have a problem -communicating in English you can also ask the Chinese maintainer for -help. Contact the Chinese maintainer if this translation is outdated -or if there is a problem with the translation. - -Chinese maintainer: Harry Wei <harryxiyou@gmail.com> ---------------------------------------------------------------------- -Documentation/admin-guide/security-bugs.rst 的中文翻译 - -如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文 -交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻 -译存在问题,请联系中文版维护者。 - -中文版维护者: 贾威威 Harry Wei <harryxiyou@gmail.com> -中文版翻译者: 贾威威 Harry Wei <harryxiyou@gmail.com> -中文版校译者: 贾威威 Harry Wei <harryxiyou@gmail.com> - - -以下为正文 ---------------------------------------------------------------------- -Linux内核开发者认为安全非常重要。因此,我们想要知道当一个有关于 -安全的漏洞被发现的时候,并且它可能会被尽快的修复或者公开。请把这个安全 -漏洞报告给Linux内核安全团队。 - -1) 联系 - -linux内核安全团队可以通过email<security@kernel.org>来联系。这是 -一组独立的安全工作人员,可以帮助改善漏洞报告并且公布和取消一个修复。安 -全团队有可能会从部分的维护者那里引进额外的帮助来了解并且修复安全漏洞。 -当遇到任何漏洞,所能提供的信息越多就越能诊断和修复。如果你不清楚什么 -是有帮助的信息,那就请重温一下admin-guide/reporting-bugs.rst文件中的概述过程。任 -何攻击性的代码都是非常有用的,未经报告者的同意不会被取消,除非它已经 -被公布于众。 - -2) 公开 - -Linux内核安全团队的宗旨就是和漏洞提交者一起处理漏洞的解决方案直 -到公开。我们喜欢尽快地完全公开漏洞。当一个漏洞或者修复还没有被完全地理 -解,解决方案没有通过测试或者供应商协调,可以合理地延迟公开。然而,我们 -期望这些延迟尽可能的短些,是可数的几天,而不是几个星期或者几个月。公开 -日期是通过安全团队和漏洞提供者以及供应商洽谈后的结果。公开时间表是从很 -短(特殊的,它已经被公众所知道)到几个星期。作为一个基本的默认政策,我 -们所期望通知公众的日期是7天的安排。 - -3) 保密协议 - -Linux内核安全团队不是一个正式的团体,因此不能加入任何的保密协议。 diff --git a/Documentation/usb/gadget_configfs.rst b/Documentation/usb/gadget_configfs.rst index 158e48dab586..e4566ffb223f 100644 --- a/Documentation/usb/gadget_configfs.rst +++ b/Documentation/usb/gadget_configfs.rst @@ -140,7 +140,7 @@ is an arbitrary string allowed in a filesystem, e.g.:: Each function provides its specific set of attributes, with either read-only or read-write access. Where applicable they need to be written to as appropriate. -Please refer to Documentation/ABI/*/configfs-usb-gadget* for more information. +Please refer to Documentation/ABI/testing/configfs-usb-gadget for more information. 4. Associating the functions with their configurations ------------------------------------------------------ diff --git a/Documentation/usb/mtouchusb.rst b/Documentation/usb/mtouchusb.rst index d1111b74bf75..5ae1f74fe74b 100644 --- a/Documentation/usb/mtouchusb.rst +++ b/Documentation/usb/mtouchusb.rst @@ -1,4 +1,4 @@ -================ +================ mtouchusb driver ================ diff --git a/Documentation/usb/usb-serial.rst b/Documentation/usb/usb-serial.rst index 8fa7dbd3da9a..69586aeb60bb 100644 --- a/Documentation/usb/usb-serial.rst +++ b/Documentation/usb/usb-serial.rst @@ -1,4 +1,4 @@ -========== +========== USB serial ========== diff --git a/Documentation/virt/kvm/amd-memory-encryption.rst b/Documentation/virt/kvm/amd-memory-encryption.rst index 5ec8a1902e15..5c081c8c7164 100644 --- a/Documentation/virt/kvm/amd-memory-encryption.rst +++ b/Documentation/virt/kvm/amd-memory-encryption.rst @@ -22,7 +22,7 @@ to SEV:: [ecx]: Bits[31:0] Number of encrypted guests supported simultaneously -If support for SEV is present, MSR 0xc001_0010 (MSR_K8_SYSCFG) and MSR 0xc001_0015 +If support for SEV is present, MSR 0xc001_0010 (MSR_AMD64_SYSCFG) and MSR 0xc001_0015 (MSR_K7_HWCR) can be used to determine if it can be enabled:: 0xc001_0010: diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index e328caa35d6c..dd3fe231e435 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6410,6 +6410,24 @@ default. See Documentation/x86/sgx/2.Kernel-internals.rst for more details. +7.26 KVM_CAP_PPC_RPT_INVALIDATE +------------------------------- + +:Capability: KVM_CAP_PPC_RPT_INVALIDATE +:Architectures: ppc +:Type: vm + +This capability indicates that the kernel is capable of handling +H_RPT_INVALIDATE hcall. + +In order to enable the use of H_RPT_INVALIDATE in the guest, +user space might have to advertise it for the guest. For example, +IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is +present in the "ibm,hypertas-functions" device-tree property. + +This capability is enabled for hypervisors on platforms like POWER9 +that support radix MMU. + 8. Other capabilities. ====================== diff --git a/Documentation/x86/amd-memory-encryption.rst b/Documentation/x86/amd-memory-encryption.rst index c48d452d0718..a1940ebe7be5 100644 --- a/Documentation/x86/amd-memory-encryption.rst +++ b/Documentation/x86/amd-memory-encryption.rst @@ -53,7 +53,7 @@ CPUID function 0x8000001f reports information related to SME:: system physical addresses, not guest physical addresses) -If support for SME is present, MSR 0xc00100010 (MSR_K8_SYSCFG) can be used to +If support for SME is present, MSR 0xc00100010 (MSR_AMD64_SYSCFG) can be used to determine if SME is enabled and/or to enable memory encryption:: 0xc0010010: @@ -79,7 +79,7 @@ The state of SME in the Linux kernel can be documented as follows: The CPU supports SME (determined through CPUID instruction). - Enabled: - Supported and bit 23 of MSR_K8_SYSCFG is set. + Supported and bit 23 of MSR_AMD64_SYSCFG is set. - Active: Supported, Enabled and the Linux kernel is actively applying @@ -89,7 +89,7 @@ The state of SME in the Linux kernel can be documented as follows: SME can also be enabled and activated in the BIOS. If SME is enabled and activated in the BIOS, then all memory accesses will be encrypted and it will not be necessary to activate the Linux memory encryption support. If the BIOS -merely enables SME (sets bit 23 of the MSR_K8_SYSCFG), then Linux can activate +merely enables SME (sets bit 23 of the MSR_AMD64_SYSCFG), then Linux can activate memory encryption by default (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=y) or by supplying mem_encrypt=on on the kernel command line. However, if BIOS does not enable SME, then Linux will not be able to activate memory encryption, even diff --git a/MAINTAINERS b/MAINTAINERS index bd7aff0c120f..008fcad7ac00 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1578,7 +1578,7 @@ F: drivers/clk/sunxi/ ARM/Allwinner sunXi SoC support M: Maxime Ripard <mripard@kernel.org> M: Chen-Yu Tsai <wens@csie.org> -R: Jernej Skrabec <jernej.skrabec@siol.net> +R: Jernej Skrabec <jernej.skrabec@gmail.com> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/sunxi/linux.git @@ -5089,7 +5089,7 @@ S: Maintained F: drivers/net/fddi/defza.* DEINTERLACE DRIVERS FOR ALLWINNER H3 -M: Jernej Skrabec <jernej.skrabec@siol.net> +M: Jernej Skrabec <jernej.skrabec@gmail.com> L: linux-media@vger.kernel.org S: Maintained T: git git://linuxtv.org/media_tree.git @@ -5237,7 +5237,7 @@ DEVICE DIRECT ACCESS (DAX) M: Dan Williams <dan.j.williams@intel.com> M: Vishal Verma <vishal.l.verma@intel.com> M: Dave Jiang <dave.jiang@intel.com> -L: linux-nvdimm@lists.01.org +L: nvdimm@lists.linux.dev S: Supported F: drivers/dax/ @@ -5632,14 +5632,14 @@ F: include/linux/power/smartreflex.h DRM DRIVER FOR ALLWINNER DE2 AND DE3 ENGINE M: Maxime Ripard <mripard@kernel.org> M: Chen-Yu Tsai <wens@csie.org> -R: Jernej Skrabec <jernej.skrabec@siol.net> +R: Jernej Skrabec <jernej.skrabec@gmail.com> L: dri-devel@lists.freedesktop.org S: Supported T: git git://anongit.freedesktop.org/drm/drm-misc F: drivers/gpu/drm/sun4i/sun8i* DRM DRIVER FOR ARM PL111 CLCD -M: Eric Anholt <eric@anholt.net> +M: Emma Anholt <emma@anholt.net> S: Supported T: git git://anongit.freedesktop.org/drm/drm-misc F: drivers/gpu/drm/pl111/ @@ -5719,7 +5719,7 @@ T: git git://anongit.freedesktop.org/drm/drm-misc F: drivers/gpu/drm/tiny/gm12u320.c DRM DRIVER FOR HX8357D PANELS -M: Eric Anholt <eric@anholt.net> +M: Emma Anholt <emma@anholt.net> S: Maintained T: git git://anongit.freedesktop.org/drm/drm-misc F: Documentation/devicetree/bindings/display/himax,hx8357d.txt @@ -6023,7 +6023,7 @@ M: Neil Armstrong <narmstrong@baylibre.com> M: Robert Foss <robert.foss@linaro.org> R: Laurent Pinchart <Laurent.pinchart@ideasonboard.com> R: Jonas Karlman <jonas@kwiboo.se> -R: Jernej Skrabec <jernej.skrabec@siol.net> +R: Jernej Skrabec <jernej.skrabec@gmail.com> S: Maintained T: git git://anongit.freedesktop.org/drm/drm-misc F: drivers/gpu/drm/bridge/ @@ -6177,7 +6177,7 @@ F: Documentation/devicetree/bindings/display/ti/ F: drivers/gpu/drm/omapdrm/ DRM DRIVERS FOR V3D -M: Eric Anholt <eric@anholt.net> +M: Emma Anholt <emma@anholt.net> S: Supported T: git git://anongit.freedesktop.org/drm/drm-misc F: Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml @@ -6185,7 +6185,7 @@ F: drivers/gpu/drm/v3d/ F: include/uapi/drm/v3d_drm.h DRM DRIVERS FOR VC4 -M: Eric Anholt <eric@anholt.net> +M: Emma Anholt <emma@anholt.net> M: Maxime Ripard <mripard@kernel.org> S: Supported T: git git://github.com/anholt/linux @@ -7006,7 +7006,7 @@ M: Dan Williams <dan.j.williams@intel.com> R: Matthew Wilcox <willy@infradead.org> R: Jan Kara <jack@suse.cz> L: linux-fsdevel@vger.kernel.org -L: linux-nvdimm@lists.01.org +L: nvdimm@lists.linux.dev S: Supported F: fs/dax.c F: include/linux/dax.h @@ -10378,7 +10378,7 @@ LIBNVDIMM BLK: MMIO-APERTURE DRIVER M: Dan Williams <dan.j.williams@intel.com> M: Vishal Verma <vishal.l.verma@intel.com> M: Dave Jiang <dave.jiang@intel.com> -L: linux-nvdimm@lists.01.org +L: nvdimm@lists.linux.dev S: Supported Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ P: Documentation/nvdimm/maintainer-entry-profile.rst @@ -10389,7 +10389,7 @@ LIBNVDIMM BTT: BLOCK TRANSLATION TABLE M: Vishal Verma <vishal.l.verma@intel.com> M: Dan Williams <dan.j.williams@intel.com> M: Dave Jiang <dave.jiang@intel.com> -L: linux-nvdimm@lists.01.org +L: nvdimm@lists.linux.dev S: Supported Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ P: Documentation/nvdimm/maintainer-entry-profile.rst @@ -10399,7 +10399,7 @@ LIBNVDIMM PMEM: PERSISTENT MEMORY DRIVER M: Dan Williams <dan.j.williams@intel.com> M: Vishal Verma <vishal.l.verma@intel.com> M: Dave Jiang <dave.jiang@intel.com> -L: linux-nvdimm@lists.01.org +L: nvdimm@lists.linux.dev S: Supported Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ P: Documentation/nvdimm/maintainer-entry-profile.rst @@ -10407,7 +10407,7 @@ F: drivers/nvdimm/pmem* LIBNVDIMM: DEVICETREE BINDINGS M: Oliver O'Halloran <oohall@gmail.com> -L: linux-nvdimm@lists.01.org +L: nvdimm@lists.linux.dev S: Supported Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ F: Documentation/devicetree/bindings/pmem/pmem-region.txt @@ -10418,7 +10418,7 @@ M: Dan Williams <dan.j.williams@intel.com> M: Vishal Verma <vishal.l.verma@intel.com> M: Dave Jiang <dave.jiang@intel.com> M: Ira Weiny <ira.weiny@intel.com> -L: linux-nvdimm@lists.01.org +L: nvdimm@lists.linux.dev S: Supported Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ P: Documentation/nvdimm/maintainer-entry-profile.rst @@ -15815,7 +15815,7 @@ F: include/uapi/linux/rose.h F: net/rose/ ROTATION DRIVER FOR ALLWINNER A83T -M: Jernej Skrabec <jernej.skrabec@siol.net> +M: Jernej Skrabec <jernej.skrabec@gmail.com> L: linux-media@vger.kernel.org S: Maintained T: git git://linuxtv.org/media_tree.git @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 13 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Frozen Wasteland # *DOCUMENTATION* diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 4392c9c189c4..e47adc97a89b 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -31,7 +31,7 @@ endif ifdef CONFIG_ARC_CURR_IN_REG -# For a global register defintion, make sure it gets passed to every file +# For a global register definition, make sure it gets passed to every file # We had a customer reported bug where some code built in kernel was NOT using # any kernel headers, and missing the r25 global register # Can't do unconditionally because of recursive include issues diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h index 9b87e162e539..dfeffa25499b 100644 --- a/arch/arc/include/asm/cmpxchg.h +++ b/arch/arc/include/asm/cmpxchg.h @@ -116,7 +116,7 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr, * * Technically the lock is also needed for UP (boils down to irq save/restore) * but we can cheat a bit since cmpxchg() atomic_ops_lock() would cause irqs to - * be disabled thus can't possibly be interrpted/preempted/clobbered by xchg() + * be disabled thus can't possibly be interrupted/preempted/clobbered by xchg() * Other way around, xchg is one instruction anyways, so can't be interrupted * as such */ @@ -143,7 +143,7 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr, /* * "atomic" variant of xchg() * REQ: It needs to follow the same serialization rules as other atomic_xxx() - * Since xchg() doesn't always do that, it would seem that following defintion + * Since xchg() doesn't always do that, it would seem that following definition * is incorrect. But here's the rationale: * SMP : Even xchg() takes the atomic_ops_lock, so OK. * LLSC: atomic_ops_lock are not relevant at all (even if SMP, since LLSC diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index ad9b7fe4dba3..4a9d33372fe2 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -7,6 +7,18 @@ #include <uapi/asm/page.h> +#ifdef CONFIG_ARC_HAS_PAE40 + +#define MAX_POSSIBLE_PHYSMEM_BITS 40 +#define PAGE_MASK_PHYS (0xff00000000ull | PAGE_MASK) + +#else /* CONFIG_ARC_HAS_PAE40 */ + +#define MAX_POSSIBLE_PHYSMEM_BITS 32 +#define PAGE_MASK_PHYS PAGE_MASK + +#endif /* CONFIG_ARC_HAS_PAE40 */ + #ifndef __ASSEMBLY__ #define clear_page(paddr) memset((paddr), 0, PAGE_SIZE) diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 163641726a2b..5878846f00cf 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -107,8 +107,8 @@ #define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE) /* Set of bits not changed in pte_modify */ -#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_SPECIAL) - +#define _PAGE_CHG_MASK (PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \ + _PAGE_SPECIAL) /* More Abbrevaited helpers */ #define PAGE_U_NONE __pgprot(___DEF) #define PAGE_U_R __pgprot(___DEF | _PAGE_READ) @@ -132,13 +132,7 @@ #define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ) #define PTE_BITS_RWX (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ) -#ifdef CONFIG_ARC_HAS_PAE40 -#define PTE_BITS_NON_RWX_IN_PD1 (0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE) -#define MAX_POSSIBLE_PHYSMEM_BITS 40 -#else -#define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK | _PAGE_CACHEABLE) -#define MAX_POSSIBLE_PHYSMEM_BITS 32 -#endif +#define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK_PHYS | _PAGE_CACHEABLE) /************************************************************************** * Mapping of vm_flags (Generic VM) to PTE flags (arch specific) diff --git a/arch/arc/include/uapi/asm/page.h b/arch/arc/include/uapi/asm/page.h index 2a97e2718a21..2a4ad619abfb 100644 --- a/arch/arc/include/uapi/asm/page.h +++ b/arch/arc/include/uapi/asm/page.h @@ -33,5 +33,4 @@ #define PAGE_MASK (~(PAGE_SIZE-1)) - #endif /* _UAPI__ASM_ARC_PAGE_H */ diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 1743506081da..2cb8dfe866b6 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -177,7 +177,7 @@ tracesys: ; Do the Sys Call as we normally would. ; Validate the Sys Call number - cmp r8, NR_syscalls + cmp r8, NR_syscalls - 1 mov.hi r0, -ENOSYS bhi tracesys_exit @@ -255,7 +255,7 @@ ENTRY(EV_Trap) ;============ Normal syscall case ; syscall num shd not exceed the total system calls avail - cmp r8, NR_syscalls + cmp r8, NR_syscalls - 1 mov.hi r0, -ENOSYS bhi .Lret_from_system_call diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c index ecfbc42d3a40..345a0000554c 100644 --- a/arch/arc/kernel/kgdb.c +++ b/arch/arc/kernel/kgdb.c @@ -140,6 +140,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, ptr = &remcomInBuffer[1]; if (kgdb_hex2long(&ptr, &addr)) regs->ret = addr; + fallthrough; case 'D': case 'k': diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index d838d0d57696..3793876f42d9 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -50,14 +50,14 @@ SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new) int ret; /* - * This is only for old cores lacking LLOCK/SCOND, which by defintion + * This is only for old cores lacking LLOCK/SCOND, which by definition * can't possibly be SMP. Thus doesn't need to be SMP safe. * And this also helps reduce the overhead for serializing in * the UP case */ WARN_ON_ONCE(IS_ENABLED(CONFIG_SMP)); - /* Z indicates to userspace if operation succeded */ + /* Z indicates to userspace if operation succeeded */ regs->status32 &= ~STATUS_Z_MASK; ret = access_ok(uaddr, sizeof(*uaddr)); @@ -107,7 +107,7 @@ fail: void arch_cpu_idle(void) { - /* Re-enable interrupts <= default irq priority before commiting SLEEP */ + /* Re-enable interrupts <= default irq priority before committing SLEEP */ const unsigned int arg = 0x10 | ARCV2_IRQ_DEF_PRIO; __asm__ __volatile__( @@ -120,7 +120,7 @@ void arch_cpu_idle(void) void arch_cpu_idle(void) { - /* sleep, but enable both set E1/E2 (levels of interrutps) before committing */ + /* sleep, but enable both set E1/E2 (levels of interrupts) before committing */ __asm__ __volatile__("sleep 0x3 \n"); } diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index fdbe06c98895..b3ccb9e5ffe4 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -259,7 +259,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) regs->r2 = (unsigned long)&sf->uc; /* - * small optim to avoid unconditonally calling do_sigaltstack + * small optim to avoid unconditionally calling do_sigaltstack * in sigreturn path, now that we only have rt_sigreturn */ magic = MAGIC_SIGALTSTK; @@ -391,7 +391,7 @@ void do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs) { /* - * ASM glue gaurantees that this is only called when returning to + * ASM glue guarantees that this is only called when returning to * user mode */ if (test_thread_flag(TIF_NOTIFY_RESUME)) diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 33832e36bdb7..e2ed355438c9 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -157,7 +157,16 @@ void __init setup_arch_memory(void) min_high_pfn = PFN_DOWN(high_mem_start); max_high_pfn = PFN_DOWN(high_mem_start + high_mem_sz); - max_zone_pfn[ZONE_HIGHMEM] = min_low_pfn; + /* + * max_high_pfn should be ok here for both HIGHMEM and HIGHMEM+PAE. + * For HIGHMEM without PAE max_high_pfn should be less than + * min_low_pfn to guarantee that these two regions don't overlap. + * For PAE case highmem is greater than lowmem, so it is natural + * to use max_high_pfn. + * + * In both cases, holes should be handled by pfn_valid(). + */ + max_zone_pfn[ZONE_HIGHMEM] = max_high_pfn; high_memory = (void *)(min_high_pfn << PAGE_SHIFT); diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c index fac4adc90204..95c649fbc95a 100644 --- a/arch/arc/mm/ioremap.c +++ b/arch/arc/mm/ioremap.c @@ -53,9 +53,10 @@ EXPORT_SYMBOL(ioremap); void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size, unsigned long flags) { + unsigned int off; unsigned long vaddr; struct vm_struct *area; - phys_addr_t off, end; + phys_addr_t end; pgprot_t prot = __pgprot(flags); /* Don't allow wraparound, zero size */ @@ -72,7 +73,7 @@ void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size, /* Mappings have to be page-aligned */ off = paddr & ~PAGE_MASK; - paddr &= PAGE_MASK; + paddr &= PAGE_MASK_PHYS; size = PAGE_ALIGN(end + 1) - paddr; /* diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 9bb3c24f3677..9c7c68247289 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -576,7 +576,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned, pte_t *ptep) { unsigned long vaddr = vaddr_unaligned & PAGE_MASK; - phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK; + phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK_PHYS; struct page *page = pfn_to_page(pte_pfn(*ptep)); create_tlb(vma, vaddr, ptep); diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index f8f07469d259..a7e54a087b80 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -135,24 +135,18 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) return; } -int xen_swiotlb_detect(void) -{ - if (!xen_domain()) - return 0; - if (xen_feature(XENFEAT_direct_mapped)) - return 1; - /* legacy case */ - if (!xen_feature(XENFEAT_not_direct_mapped) && xen_initial_domain()) - return 1; - return 0; -} - static int __init xen_mm_init(void) { struct gnttab_cache_flush cflush; + int rc; + if (!xen_swiotlb_detect()) return 0; - xen_swiotlb_init(); + + rc = xen_swiotlb_init(); + /* we can work with the default swiotlb */ + if (rc < 0 && rc != -EEXIST) + return rc; cflush.op = 0; cflush.a.dev_bus_addr = 0; diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 7ef44478560d..b52481f0605d 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -175,6 +175,9 @@ vdso_install: $(if $(CONFIG_COMPAT_VDSO), \ $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso32 $@) +archprepare: + $(Q)$(MAKE) $(build)=arch/arm64/tools kapi + # We use MRPROPER_FILES and CLEAN_FILES now archclean: $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 07ac208edc89..26889dbfe904 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -5,3 +5,5 @@ generic-y += qrwlock.h generic-y += qspinlock.h generic-y += set_memory.h generic-y += user.h + +generated-y += cpucaps.h diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h deleted file mode 100644 index b0c5eda0498f..000000000000 --- a/arch/arm64/include/asm/cpucaps.h +++ /dev/null @@ -1,74 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * arch/arm64/include/asm/cpucaps.h - * - * Copyright (C) 2016 ARM Ltd. - */ -#ifndef __ASM_CPUCAPS_H -#define __ASM_CPUCAPS_H - -#define ARM64_WORKAROUND_CLEAN_CACHE 0 -#define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE 1 -#define ARM64_WORKAROUND_845719 2 -#define ARM64_HAS_SYSREG_GIC_CPUIF 3 -#define ARM64_HAS_PAN 4 -#define ARM64_HAS_LSE_ATOMICS 5 -#define ARM64_WORKAROUND_CAVIUM_23154 6 -#define ARM64_WORKAROUND_834220 7 -#define ARM64_HAS_NO_HW_PREFETCH 8 -#define ARM64_HAS_VIRT_HOST_EXTN 11 -#define ARM64_WORKAROUND_CAVIUM_27456 12 -#define ARM64_HAS_32BIT_EL0 13 -#define ARM64_SPECTRE_V3A 14 -#define ARM64_HAS_CNP 15 -#define ARM64_HAS_NO_FPSIMD 16 -#define ARM64_WORKAROUND_REPEAT_TLBI 17 -#define ARM64_WORKAROUND_QCOM_FALKOR_E1003 18 -#define ARM64_WORKAROUND_858921 19 -#define ARM64_WORKAROUND_CAVIUM_30115 20 -#define ARM64_HAS_DCPOP 21 -#define ARM64_SVE 22 -#define ARM64_UNMAP_KERNEL_AT_EL0 23 -#define ARM64_SPECTRE_V2 24 -#define ARM64_HAS_RAS_EXTN 25 -#define ARM64_WORKAROUND_843419 26 -#define ARM64_HAS_CACHE_IDC 27 -#define ARM64_HAS_CACHE_DIC 28 -#define ARM64_HW_DBM 29 -#define ARM64_SPECTRE_V4 30 -#define ARM64_MISMATCHED_CACHE_TYPE 31 -#define ARM64_HAS_STAGE2_FWB 32 -#define ARM64_HAS_CRC32 33 -#define ARM64_SSBS 34 -#define ARM64_WORKAROUND_1418040 35 -#define ARM64_HAS_SB 36 -#define ARM64_WORKAROUND_SPECULATIVE_AT 37 -#define ARM64_HAS_ADDRESS_AUTH_ARCH 38 -#define ARM64_HAS_ADDRESS_AUTH_IMP_DEF 39 -#define ARM64_HAS_GENERIC_AUTH_ARCH 40 -#define ARM64_HAS_GENERIC_AUTH_IMP_DEF 41 -#define ARM64_HAS_IRQ_PRIO_MASKING 42 -#define ARM64_HAS_DCPODP 43 -#define ARM64_WORKAROUND_1463225 44 -#define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM 45 -#define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM 46 -#define ARM64_WORKAROUND_1542419 47 -#define ARM64_HAS_E0PD 48 -#define ARM64_HAS_RNG 49 -#define ARM64_HAS_AMU_EXTN 50 -#define ARM64_HAS_ADDRESS_AUTH 51 -#define ARM64_HAS_GENERIC_AUTH 52 -#define ARM64_HAS_32BIT_EL1 53 -#define ARM64_BTI 54 -#define ARM64_HAS_ARMv8_4_TTL 55 -#define ARM64_HAS_TLB_RANGE 56 -#define ARM64_MTE 57 -#define ARM64_WORKAROUND_1508412 58 -#define ARM64_HAS_LDAPR 59 -#define ARM64_KVM_PROTECTED_MODE 60 -#define ARM64_WORKAROUND_NVIDIA_CARMEL_CNP 61 -#define ARM64_HAS_EPAN 62 - -#define ARM64_NCAPS 63 - -#endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index ac485163a4a7..6d44c028d1c9 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -55,8 +55,10 @@ void __sync_icache_dcache(pte_t pte) { struct page *page = pte_page(pte); - if (!test_and_set_bit(PG_dcache_clean, &page->flags)) + if (!test_bit(PG_dcache_clean, &page->flags)) { sync_icache_aliases(page_address(page), page_size(page)); + set_bit(PG_dcache_clean, &page->flags); + } } EXPORT_SYMBOL_GPL(__sync_icache_dcache); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 16a2b2b1c54d..e55409caaee3 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -43,6 +43,7 @@ #include <linux/sizes.h> #include <asm/tlb.h> #include <asm/alternative.h> +#include <asm/xen/swiotlb-xen.h> /* * We need to be able to catch inadvertent references to memstart_addr @@ -482,7 +483,7 @@ void __init mem_init(void) if (swiotlb_force == SWIOTLB_FORCE || max_pfn > PFN_DOWN(arm64_dma_phys_limit)) swiotlb_init(1); - else + else if (!xen_swiotlb_detect()) swiotlb_force = SWIOTLB_NO_FORCE; set_max_mapnr(max_pfn - PHYS_PFN_OFFSET); diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 0a48191534ff..97d7bcd8d4f2 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -447,6 +447,18 @@ SYM_FUNC_START(__cpu_setup) mov x10, #(SYS_GCR_EL1_RRND | SYS_GCR_EL1_EXCL_MASK) msr_s SYS_GCR_EL1, x10 + /* + * If GCR_EL1.RRND=1 is implemented the same way as RRND=0, then + * RGSR_EL1.SEED must be non-zero for IRG to produce + * pseudorandom numbers. As RGSR_EL1 is UNKNOWN out of reset, we + * must initialize it. + */ + mrs x10, CNTVCT_EL0 + ands x10, x10, #SYS_RGSR_EL1_SEED_MASK + csinc x10, x10, xzr, ne + lsl x10, x10, #SYS_RGSR_EL1_SEED_SHIFT + msr_s SYS_RGSR_EL1, x10 + /* clear any pending tag check faults in TFSR*_EL1 */ msr_s SYS_TFSR_EL1, xzr msr_s SYS_TFSRE0_EL1, xzr diff --git a/arch/arm64/tools/Makefile b/arch/arm64/tools/Makefile new file mode 100644 index 000000000000..932b4fe5c768 --- /dev/null +++ b/arch/arm64/tools/Makefile @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: GPL-2.0 + +gen := arch/$(ARCH)/include/generated +kapi := $(gen)/asm + +kapi-hdrs-y := $(kapi)/cpucaps.h + +targets += $(addprefix ../../../,$(gen-y) $(kapi-hdrs-y)) + +PHONY += kapi + +kapi: $(kapi-hdrs-y) $(gen-y) + +# Create output directory if not already present +_dummy := $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)') + +quiet_cmd_gen_cpucaps = GEN $@ + cmd_gen_cpucaps = mkdir -p $(dir $@) && \ + $(AWK) -f $(filter-out $(PHONY),$^) > $@ + +$(kapi)/cpucaps.h: $(src)/gen-cpucaps.awk $(src)/cpucaps FORCE + $(call if_changed,gen_cpucaps) diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps new file mode 100644 index 000000000000..21fbdda7086e --- /dev/null +++ b/arch/arm64/tools/cpucaps @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Internal CPU capabilities constants, keep this list sorted + +BTI +HAS_32BIT_EL0 +HAS_32BIT_EL1 +HAS_ADDRESS_AUTH +HAS_ADDRESS_AUTH_ARCH +HAS_ADDRESS_AUTH_IMP_DEF +HAS_AMU_EXTN +HAS_ARMv8_4_TTL +HAS_CACHE_DIC +HAS_CACHE_IDC +HAS_CNP +HAS_CRC32 +HAS_DCPODP +HAS_DCPOP +HAS_E0PD +HAS_EPAN +HAS_GENERIC_AUTH +HAS_GENERIC_AUTH_ARCH +HAS_GENERIC_AUTH_IMP_DEF +HAS_IRQ_PRIO_MASKING +HAS_LDAPR +HAS_LSE_ATOMICS +HAS_NO_FPSIMD +HAS_NO_HW_PREFETCH +HAS_PAN +HAS_RAS_EXTN +HAS_RNG +HAS_SB +HAS_STAGE2_FWB +HAS_SYSREG_GIC_CPUIF +HAS_TLB_RANGE +HAS_VIRT_HOST_EXTN +HW_DBM +KVM_PROTECTED_MODE +MISMATCHED_CACHE_TYPE +MTE +SPECTRE_V2 +SPECTRE_V3A +SPECTRE_V4 +SSBS +SVE +UNMAP_KERNEL_AT_EL0 +WORKAROUND_834220 +WORKAROUND_843419 +WORKAROUND_845719 +WORKAROUND_858921 +WORKAROUND_1418040 +WORKAROUND_1463225 +WORKAROUND_1508412 +WORKAROUND_1542419 +WORKAROUND_CAVIUM_23154 +WORKAROUND_CAVIUM_27456 +WORKAROUND_CAVIUM_30115 +WORKAROUND_CAVIUM_TX2_219_PRFM +WORKAROUND_CAVIUM_TX2_219_TVM +WORKAROUND_CLEAN_CACHE +WORKAROUND_DEVICE_LOAD_ACQUIRE +WORKAROUND_NVIDIA_CARMEL_CNP +WORKAROUND_QCOM_FALKOR_E1003 +WORKAROUND_REPEAT_TLBI +WORKAROUND_SPECULATIVE_AT diff --git a/arch/arm64/tools/gen-cpucaps.awk b/arch/arm64/tools/gen-cpucaps.awk new file mode 100755 index 000000000000..00c9e72a200a --- /dev/null +++ b/arch/arm64/tools/gen-cpucaps.awk @@ -0,0 +1,40 @@ +#!/bin/awk -f +# SPDX-License-Identifier: GPL-2.0 +# gen-cpucaps.awk: arm64 cpucaps header generator +# +# Usage: awk -f gen-cpucaps.awk cpucaps.txt + +# Log an error and terminate +function fatal(msg) { + print "Error at line " NR ": " msg > "/dev/stderr" + exit 1 +} + +# skip blank lines and comment lines +/^$/ { next } +/^#/ { next } + +BEGIN { + print "#ifndef __ASM_CPUCAPS_H" + print "#define __ASM_CPUCAPS_H" + print "" + print "/* Generated file - do not edit */" + cap_num = 0 + print "" +} + +/^[vA-Z0-9_]+$/ { + printf("#define ARM64_%-30s\t%d\n", $0, cap_num++) + next +} + +END { + printf("#define ARM64_NCAPS\t\t\t\t%d\n", cap_num) + print "" + print "#endif /* __ASM_CPUCAPS_H */" +} + +# Any lines not handled by previous rules are unexpected +{ + fatal("unhandled statement") +} diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 1c7b75834e04..02ee6f5ac9fe 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -120,6 +120,7 @@ extern s32 patch__call_flush_branch_caches3; extern s32 patch__flush_count_cache_return; extern s32 patch__flush_link_stack_return; extern s32 patch__call_kvm_flush_link_stack; +extern s32 patch__call_kvm_flush_link_stack_p9; extern s32 patch__memset_nocache, patch__memcpy_nocache; extern long flush_branch_caches; @@ -140,7 +141,7 @@ void kvmhv_load_host_pmu(void); void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use); void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu); -int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu); +void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu); long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr); long kvmppc_h_set_xdabr(struct kvm_vcpu *vcpu, unsigned long dabr, diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index eace8c3f7b0a..c02f42d1031e 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -19,6 +19,7 @@ struct mmu_psize_def { int penc[MMU_PAGE_COUNT]; /* HPTE encoding */ unsigned int tlbiel; /* tlbiel supported for that page size */ unsigned long avpnm; /* bits to mask out in AVPN in the HPTE */ + unsigned long h_rpt_pgsize; /* H_RPT_INVALIDATE page size encoding */ union { unsigned long sllp; /* SLB L||LP (exact mask to use in slbmte) */ unsigned long ap; /* Ap encoding used by PowerISA 3.0 */ diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index 8b33601cdb9d..a46fd37ad552 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -4,6 +4,10 @@ #include <asm/hvcall.h> +#define RIC_FLUSH_TLB 0 +#define RIC_FLUSH_PWC 1 +#define RIC_FLUSH_ALL 2 + struct vm_area_struct; struct mm_struct; struct mmu_gather; diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h index 98c8bd155bf9..b167186aaee4 100644 --- a/arch/powerpc/include/asm/cputhreads.h +++ b/arch/powerpc/include/asm/cputhreads.h @@ -98,6 +98,36 @@ static inline int cpu_last_thread_sibling(int cpu) return cpu | (threads_per_core - 1); } +/* + * tlb_thread_siblings are siblings which share a TLB. This is not + * architected, is not something a hypervisor could emulate and a future + * CPU may change behaviour even in compat mode, so this should only be + * used on PowerNV, and only with care. + */ +static inline int cpu_first_tlb_thread_sibling(int cpu) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8)) + return cpu & ~0x6; /* Big Core */ + else + return cpu_first_thread_sibling(cpu); +} + +static inline int cpu_last_tlb_thread_sibling(int cpu) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8)) + return cpu | 0x6; /* Big Core */ + else + return cpu_last_thread_sibling(cpu); +} + +static inline int cpu_tlb_thread_sibling_step(void) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8)) + return 2; /* Big Core */ + else + return 1; +} + static inline u32 get_tensr(void) { #ifdef CONFIG_BOOKE diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index c1a8aac01cf9..bb6f78fcf981 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -35,6 +35,19 @@ /* PACA save area size in u64 units (exgen, exmc, etc) */ #define EX_SIZE 10 +/* PACA save area offsets */ +#define EX_R9 0 +#define EX_R10 8 +#define EX_R11 16 +#define EX_R12 24 +#define EX_R13 32 +#define EX_DAR 40 +#define EX_DSISR 48 +#define EX_CCR 52 +#define EX_CFAR 56 +#define EX_PPR 64 +#define EX_CTR 72 + /* * maximum recursive depth of MCE exceptions */ diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 443050906018..7e4b2cef40c2 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -413,9 +413,9 @@ #define H_RPTI_TYPE_NESTED 0x0001 /* Invalidate nested guest partition-scope */ #define H_RPTI_TYPE_TLB 0x0002 /* Invalidate TLB */ #define H_RPTI_TYPE_PWC 0x0004 /* Invalidate Page Walk Cache */ -/* Invalidate Process Table Entries if H_RPTI_TYPE_NESTED is clear */ +/* Invalidate caching of Process Table Entries if H_RPTI_TYPE_NESTED is clear */ #define H_RPTI_TYPE_PRT 0x0008 -/* Invalidate Partition Table Entries if H_RPTI_TYPE_NESTED is set */ +/* Invalidate caching of Partition Table Entries if H_RPTI_TYPE_NESTED is set */ #define H_RPTI_TYPE_PAT 0x0008 #define H_RPTI_TYPE_ALL (H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | \ H_RPTI_TYPE_PRT) @@ -448,6 +448,9 @@ */ long plpar_hcall_norets(unsigned long opcode, ...); +/* Variant which does not do hcall tracing */ +long plpar_hcall_norets_notrace(unsigned long opcode, ...); + /** * plpar_hcall: - Make a pseries hypervisor call * @opcode: The hypervisor call to make. diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 44cde2e129b8..59f704408d65 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -153,8 +153,6 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup */ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) { - if (user_mode(regs)) - kuep_unlock(); } static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) @@ -222,6 +220,13 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte local_paca->irq_soft_mask = IRQS_ALL_DISABLED; local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !(regs->msr & MSR_PR) && + regs->nip < (unsigned long)__end_interrupts) { + // Kernel code running below __end_interrupts is + // implicitly soft-masked. + regs->softe = IRQS_ALL_DISABLED; + } + /* Don't do any per-CPU operations until interrupt state is fixed */ if (nmi_disables_ftrace(regs)) { diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index a3633560493b..fbbf3cec92e9 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -147,6 +147,7 @@ #define KVM_GUEST_MODE_SKIP 2 #define KVM_GUEST_MODE_GUEST_HV 3 #define KVM_GUEST_MODE_HOST_HV 4 +#define KVM_GUEST_MODE_HV_P9 5 /* ISA >= v3.0 path */ #define KVM_INST_FETCH_FAILED -1 diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index e6b53c6e21e3..caaa0f592d8e 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -307,6 +307,9 @@ void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1); void kvmhv_release_all_nested(struct kvm *kvm); long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu); long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu); +long do_h_rpt_invalidate_pat(struct kvm_vcpu *vcpu, unsigned long lpid, + unsigned long type, unsigned long pg_sizes, + unsigned long start, unsigned long end); int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr); void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr); diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 9bb9bb370b53..eaf3a562bf1e 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -153,10 +153,18 @@ static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu *vcpu) return radix; } +int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr); + #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ #endif /* + * Invalid HDSISR value which is used to indicate when HW has not set the reg. + * Used to work around an errata. + */ +#define HDSISR_CANARY 0x7fff + +/* * We use a lock bit in HPTE dword 0 to synchronize updates and * accesses to each HPTE, and another bit to indicate non-present * HPTEs. diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index ae3d4af61b66..dd8bd4706259 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -298,7 +298,6 @@ struct kvm_arch { u8 fwnmi_enabled; u8 secure_guest; u8 svm_enabled; - bool threads_indep; bool nested_enable; bool dawr1_enabled; pgd_t *pgtable; @@ -684,7 +683,12 @@ struct kvm_vcpu_arch { ulong fault_dar; u32 fault_dsisr; unsigned long intr_msr; - ulong fault_gpa; /* guest real address of page fault (POWER9) */ + /* + * POWER9 and later: fault_gpa contains the guest real address of page + * fault for a radix guest, or segment descriptor (equivalent to result + * from slbmfev of SLB entry that translated the EA) for hash guests. + */ + ulong fault_gpa; #endif #ifdef CONFIG_BOOKE diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 5bf8ae9bb2cc..2d88944f9f34 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -129,6 +129,7 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu); extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu); extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags); +extern void kvmppc_core_queue_syscall(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags); extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu); @@ -606,6 +607,7 @@ extern void kvmppc_free_pimap(struct kvm *kvm); extern int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall); extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu); extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd); +extern int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req); extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu); extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval); extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev, @@ -638,6 +640,8 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { } static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) { return 0; } +static inline int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req) + { return 0; } #endif #ifdef CONFIG_KVM_XIVE @@ -655,8 +659,6 @@ extern int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority); extern int kvmppc_xive_int_on(struct kvm *kvm, u32 irq); extern int kvmppc_xive_int_off(struct kvm *kvm, u32 irq); -extern void kvmppc_xive_init_module(void); -extern void kvmppc_xive_exit_module(void); extern int kvmppc_xive_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, u32 cpu); @@ -671,6 +673,8 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval); extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status); extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu); +extern void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu); +extern void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu); static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu) { @@ -680,8 +684,6 @@ static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu) extern int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, u32 cpu); extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu); -extern void kvmppc_xive_native_init_module(void); -extern void kvmppc_xive_native_exit_module(void); extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val); extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, @@ -695,8 +697,6 @@ static inline int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority) { return -1; } static inline int kvmppc_xive_int_on(struct kvm *kvm, u32 irq) { return -1; } static inline int kvmppc_xive_int_off(struct kvm *kvm, u32 irq) { return -1; } -static inline void kvmppc_xive_init_module(void) { } -static inline void kvmppc_xive_exit_module(void) { } static inline int kvmppc_xive_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; } @@ -711,14 +711,14 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status) { return -ENODEV; } static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { } +static inline void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu) { } +static inline void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu) { } static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu) { return 0; } static inline int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; } static inline void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { } -static inline void kvmppc_xive_native_init_module(void) { } -static inline void kvmppc_xive_native_exit_module(void) { } static inline int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) { return 0; } @@ -754,7 +754,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, unsigned long tce_value, unsigned long npages); long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target, unsigned int yield_count); -long kvmppc_h_random(struct kvm_vcpu *vcpu); +long kvmppc_rm_h_random(struct kvm_vcpu *vcpu); void kvmhv_commence_exit(int trap); void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu); void kvmppc_subcore_enter_guest(void); diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 4bc45d3ed8b0..db186c539d37 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -122,12 +122,6 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea) } #endif -#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU) -extern void radix_kvm_prefetch_workaround(struct mm_struct *mm); -#else -static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { } -#endif - extern void switch_cop(struct mm_struct *next); extern int use_cop(unsigned long acop, struct mm_struct *mm); extern void drop_cop(unsigned long acop, struct mm_struct *mm); @@ -222,6 +216,18 @@ static inline void mm_context_add_copro(struct mm_struct *mm) { } static inline void mm_context_remove_copro(struct mm_struct *mm) { } #endif +#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU) +void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid, + unsigned long type, unsigned long pg_sizes, + unsigned long start, unsigned long end); +#else +static inline void do_h_rpt_invalidate_prt(unsigned long pid, + unsigned long lpid, + unsigned long type, + unsigned long pg_sizes, + unsigned long start, + unsigned long end) { } +#endif extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk); diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h index 5d1726bb28e7..bcb7b5f917be 100644 --- a/arch/powerpc/include/asm/paravirt.h +++ b/arch/powerpc/include/asm/paravirt.h @@ -28,19 +28,35 @@ static inline u32 yield_count_of(int cpu) return be32_to_cpu(yield_count); } +/* + * Spinlock code confers and prods, so don't trace the hcalls because the + * tracing code takes spinlocks which can cause recursion deadlocks. + * + * These calls are made while the lock is not held: the lock slowpath yields if + * it can not acquire the lock, and unlock slow path might prod if a waiter has + * yielded). So this may not be a problem for simple spin locks because the + * tracing does not technically recurse on the lock, but we avoid it anyway. + * + * However the queued spin lock contended path is more strictly ordered: the + * H_CONFER hcall is made after the task has queued itself on the lock, so then + * recursing on that lock will cause the task to then queue up again behind the + * first instance (or worse: queued spinlocks use tricks that assume a context + * never waits on more than one spinlock, so such recursion may cause random + * corruption in the lock code). + */ static inline void yield_to_preempted(int cpu, u32 yield_count) { - plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(cpu), yield_count); + plpar_hcall_norets_notrace(H_CONFER, get_hard_smp_processor_id(cpu), yield_count); } static inline void prod_cpu(int cpu) { - plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu)); + plpar_hcall_norets_notrace(H_PROD, get_hard_smp_processor_id(cpu)); } static inline void yield_to_any(void) { - plpar_hcall_norets(H_CONFER, -1, 0); + plpar_hcall_norets_notrace(H_CONFER, -1, 0); } #else static inline bool is_shared_processor(void) diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index ece84a430701..83e0f701ebc6 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -28,7 +28,11 @@ static inline void set_cede_latency_hint(u8 latency_hint) static inline long cede_processor(void) { - return plpar_hcall_norets(H_CEDE); + /* + * We cannot call tracepoints inside RCU idle regions which + * means we must not trace H_CEDE. + */ + return plpar_hcall_norets_notrace(H_CEDE); } static inline long extended_cede_processor(unsigned long latency_hint) diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 8dd3cdb25338..8c2c3dd4ddba 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -97,6 +97,18 @@ extern void div128_by_32(u64 dividend_high, u64 dividend_low, extern void secondary_cpu_time_init(void); extern void __init time_init(void); +#ifdef CONFIG_PPC64 +static inline unsigned long test_irq_work_pending(void) +{ + unsigned long x; + + asm volatile("lbz %0,%1(13)" + : "=r" (x) + : "i" (offsetof(struct paca_struct, irq_work_pending))); + return x; +} +#endif + DECLARE_PER_CPU(u64, decrementers_next_tb); /* Convert timebase ticks to nanoseconds */ diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index a09e4240c5b1..22c79ab40006 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -157,7 +157,7 @@ do { \ "2: lwz%X1 %L0, %L1\n" \ EX_TABLE(1b, %l2) \ EX_TABLE(2b, %l2) \ - : "=r" (x) \ + : "=&r" (x) \ : "m" (*addr) \ : \ : label) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 28af4efb4587..aa267d173ded 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -534,7 +534,6 @@ int main(void) OFFSET(VCPU_SLB_NR, kvm_vcpu, arch.slb_nr); OFFSET(VCPU_FAULT_DSISR, kvm_vcpu, arch.fault_dsisr); OFFSET(VCPU_FAULT_DAR, kvm_vcpu, arch.fault_dar); - OFFSET(VCPU_FAULT_GPA, kvm_vcpu, arch.fault_gpa); OFFSET(VCPU_INTR_MSR, kvm_vcpu, arch.intr_msr); OFFSET(VCPU_LAST_INST, kvm_vcpu, arch.last_inst); OFFSET(VCPU_TRAP, kvm_vcpu, arch.trap); diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 7c3654b0d0f4..f1ae710274bc 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -340,6 +340,12 @@ ret_from_mc_except: andi. r10,r10,IRQS_DISABLED; /* yes -> go out of line */ \ bne masked_interrupt_book3e_##n +/* + * Additional regs must be re-loaded from paca before EXCEPTION_COMMON* is + * called, because that does SAVE_NVGPRS which must see the original register + * values, otherwise the scratch values might be restored when exiting the + * interrupt. + */ #define PROLOG_ADDITION_2REGS_GEN(n) \ std r14,PACA_EXGEN+EX_R14(r13); \ std r15,PACA_EXGEN+EX_R15(r13) @@ -535,6 +541,10 @@ __end_interrupts: PROLOG_ADDITION_2REGS) mfspr r14,SPRN_DEAR mfspr r15,SPRN_ESR + std r14,_DAR(r1) + std r15,_DSISR(r1) + ld r14,PACA_EXGEN+EX_R14(r13) + ld r15,PACA_EXGEN+EX_R15(r13) EXCEPTION_COMMON(0x300) b storage_fault_common @@ -544,6 +554,10 @@ __end_interrupts: PROLOG_ADDITION_2REGS) li r15,0 mr r14,r10 + std r14,_DAR(r1) + std r15,_DSISR(r1) + ld r14,PACA_EXGEN+EX_R14(r13) + ld r15,PACA_EXGEN+EX_R15(r13) EXCEPTION_COMMON(0x400) b storage_fault_common @@ -557,6 +571,10 @@ __end_interrupts: PROLOG_ADDITION_2REGS) mfspr r14,SPRN_DEAR mfspr r15,SPRN_ESR + std r14,_DAR(r1) + std r15,_DSISR(r1) + ld r14,PACA_EXGEN+EX_R14(r13) + ld r15,PACA_EXGEN+EX_R15(r13) EXCEPTION_COMMON(0x600) b alignment_more /* no room, go out of line */ @@ -565,10 +583,10 @@ __end_interrupts: NORMAL_EXCEPTION_PROLOG(0x700, BOOKE_INTERRUPT_PROGRAM, PROLOG_ADDITION_1REG) mfspr r14,SPRN_ESR - EXCEPTION_COMMON(0x700) std r14,_DSISR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXGEN+EX_R14(r13) + EXCEPTION_COMMON(0x700) + addi r3,r1,STACK_FRAME_OVERHEAD bl program_check_exception REST_NVGPRS(r1) b interrupt_return @@ -725,11 +743,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) * normal exception */ mfspr r14,SPRN_DBSR - EXCEPTION_COMMON_CRIT(0xd00) std r14,_DSISR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXCRIT+EX_R14(r13) ld r15,PACA_EXCRIT+EX_R15(r13) + EXCEPTION_COMMON_CRIT(0xd00) + addi r3,r1,STACK_FRAME_OVERHEAD bl DebugException REST_NVGPRS(r1) b interrupt_return @@ -796,11 +814,11 @@ kernel_dbg_exc: * normal exception */ mfspr r14,SPRN_DBSR - EXCEPTION_COMMON_DBG(0xd08) std r14,_DSISR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXDBG+EX_R14(r13) ld r15,PACA_EXDBG+EX_R15(r13) + EXCEPTION_COMMON_DBG(0xd08) + addi r3,r1,STACK_FRAME_OVERHEAD bl DebugException REST_NVGPRS(r1) b interrupt_return @@ -931,11 +949,7 @@ masked_interrupt_book3e_0x2c0: * original values stashed away in the PACA */ storage_fault_common: - std r14,_DAR(r1) - std r15,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD - ld r14,PACA_EXGEN+EX_R14(r13) - ld r15,PACA_EXGEN+EX_R15(r13) bl do_page_fault b interrupt_return @@ -944,11 +958,7 @@ storage_fault_common: * continues here. */ alignment_more: - std r14,_DAR(r1) - std r15,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD - ld r14,PACA_EXGEN+EX_R14(r13) - ld r15,PACA_EXGEN+EX_R15(r13) bl alignment_exception REST_NVGPRS(r1) b interrupt_return diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index fa8e52a0239e..f7fc6e078d4e 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -21,22 +21,6 @@ #include <asm/feature-fixups.h> #include <asm/kup.h> -/* PACA save area offsets (exgen, exmc, etc) */ -#define EX_R9 0 -#define EX_R10 8 -#define EX_R11 16 -#define EX_R12 24 -#define EX_R13 32 -#define EX_DAR 40 -#define EX_DSISR 48 -#define EX_CCR 52 -#define EX_CFAR 56 -#define EX_PPR 64 -#define EX_CTR 72 -.if EX_SIZE != 10 - .error "EX_SIZE is wrong" -.endif - /* * Following are fixed section helper macros. * @@ -133,7 +117,6 @@ name: #define IBRANCH_TO_COMMON .L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */ #define IREALMODE_COMMON .L_IREALMODE_COMMON_\name\() /* Common runs in realmode */ #define IMASK .L_IMASK_\name\() /* IRQ soft-mask bit */ -#define IKVM_SKIP .L_IKVM_SKIP_\name\() /* Generate KVM skip handler */ #define IKVM_REAL .L_IKVM_REAL_\name\() /* Real entry tests KVM */ #define __IKVM_REAL(name) .L_IKVM_REAL_ ## name #define IKVM_VIRT .L_IKVM_VIRT_\name\() /* Virt entry tests KVM */ @@ -190,9 +173,6 @@ do_define_int n .ifndef IMASK IMASK=0 .endif - .ifndef IKVM_SKIP - IKVM_SKIP=0 - .endif .ifndef IKVM_REAL IKVM_REAL=0 .endif @@ -207,8 +187,6 @@ do_define_int n .endif .endm -#ifdef CONFIG_KVM_BOOK3S_64_HANDLER -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE /* * All interrupts which set HSRR registers, as well as SRESET and MCE and * syscall when invoked with "sc 1" switch to MSR[HV]=1 (HVMODE) to be taken, @@ -238,88 +216,28 @@ do_define_int n /* * If an interrupt is taken while a guest is running, it is immediately routed - * to KVM to handle. If both HV and PR KVM arepossible, KVM interrupts go first - * to kvmppc_interrupt_hv, which handles the PR guest case. + * to KVM to handle. */ -#define kvmppc_interrupt kvmppc_interrupt_hv -#else -#define kvmppc_interrupt kvmppc_interrupt_pr -#endif -.macro KVMTEST name +.macro KVMTEST name handler +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER lbz r10,HSTATE_IN_GUEST(r13) cmpwi r10,0 - bne \name\()_kvm -.endm - -.macro GEN_KVM name - .balign IFETCH_ALIGN_BYTES -\name\()_kvm: - - .if IKVM_SKIP - cmpwi r10,KVM_GUEST_MODE_SKIP - beq 89f - .else -BEGIN_FTR_SECTION - ld r10,IAREA+EX_CFAR(r13) - std r10,HSTATE_CFAR(r13) -END_FTR_SECTION_IFSET(CPU_FTR_CFAR) - .endif - - ld r10,IAREA+EX_CTR(r13) - mtctr r10 -BEGIN_FTR_SECTION - ld r10,IAREA+EX_PPR(r13) - std r10,HSTATE_PPR(r13) -END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) - ld r11,IAREA+EX_R11(r13) - ld r12,IAREA+EX_R12(r13) - std r12,HSTATE_SCRATCH0(r13) - sldi r12,r9,32 - ld r9,IAREA+EX_R9(r13) - ld r10,IAREA+EX_R10(r13) /* HSRR variants have the 0x2 bit added to their trap number */ .if IHSRR_IF_HVMODE BEGIN_FTR_SECTION - ori r12,r12,(IVEC + 0x2) - FTR_SECTION_ELSE - ori r12,r12,(IVEC) - ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) - .elseif IHSRR - ori r12,r12,(IVEC+ 0x2) - .else - ori r12,r12,(IVEC) - .endif - b kvmppc_interrupt - - .if IKVM_SKIP -89: mtocrf 0x80,r9 - ld r10,IAREA+EX_CTR(r13) - mtctr r10 - ld r9,IAREA+EX_R9(r13) - ld r10,IAREA+EX_R10(r13) - ld r11,IAREA+EX_R11(r13) - ld r12,IAREA+EX_R12(r13) - .if IHSRR_IF_HVMODE - BEGIN_FTR_SECTION - b kvmppc_skip_Hinterrupt + li r10,(IVEC + 0x2) FTR_SECTION_ELSE - b kvmppc_skip_interrupt + li r10,(IVEC) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) .elseif IHSRR - b kvmppc_skip_Hinterrupt + li r10,(IVEC + 0x2) .else - b kvmppc_skip_interrupt + li r10,(IVEC) .endif - .endif -.endm - -#else -.macro KVMTEST name -.endm -.macro GEN_KVM name -.endm + bne \handler #endif +.endm /* * This is the BOOK3S interrupt entry code macro. @@ -461,7 +379,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) DEFINE_FIXED_SYMBOL(\name\()_common_real) \name\()_common_real: .if IKVM_REAL - KVMTEST \name + KVMTEST \name kvm_interrupt .endif ld r10,PACAKMSR(r13) /* get MSR value for kernel */ @@ -484,7 +402,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) DEFINE_FIXED_SYMBOL(\name\()_common_virt) \name\()_common_virt: .if IKVM_VIRT - KVMTEST \name + KVMTEST \name kvm_interrupt 1: .endif .endif /* IVIRT */ @@ -498,7 +416,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_virt) DEFINE_FIXED_SYMBOL(\name\()_common_real) \name\()_common_real: .if IKVM_REAL - KVMTEST \name + KVMTEST \name kvm_interrupt .endif .endm @@ -1000,8 +918,6 @@ EXC_COMMON_BEGIN(system_reset_common) EXCEPTION_RESTORE_REGS RFI_TO_USER_OR_KERNEL - GEN_KVM system_reset - /** * Interrupt 0x200 - Machine Check Interrupt (MCE). @@ -1070,7 +986,6 @@ INT_DEFINE_BEGIN(machine_check) ISET_RI=0 IDAR=1 IDSISR=1 - IKVM_SKIP=1 IKVM_REAL=1 INT_DEFINE_END(machine_check) @@ -1166,7 +1081,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) /* * Check if we are coming from guest. If yes, then run the normal * exception handler which will take the - * machine_check_kvm->kvmppc_interrupt branch to deliver the MC event + * machine_check_kvm->kvm_interrupt branch to deliver the MC event * to guest. */ lbz r11,HSTATE_IN_GUEST(r13) @@ -1236,8 +1151,6 @@ EXC_COMMON_BEGIN(machine_check_common) bl machine_check_exception b interrupt_return - GEN_KVM machine_check - #ifdef CONFIG_PPC_P7_NAP /* @@ -1342,7 +1255,6 @@ INT_DEFINE_BEGIN(data_access) IVEC=0x300 IDAR=1 IDSISR=1 - IKVM_SKIP=1 IKVM_REAL=1 INT_DEFINE_END(data_access) @@ -1373,8 +1285,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) REST_NVGPRS(r1) b interrupt_return - GEN_KVM data_access - /** * Interrupt 0x380 - Data Segment Interrupt (DSLB). @@ -1396,7 +1306,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) INT_DEFINE_BEGIN(data_access_slb) IVEC=0x380 IDAR=1 - IKVM_SKIP=1 IKVM_REAL=1 INT_DEFINE_END(data_access_slb) @@ -1425,8 +1334,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) bl do_bad_slb_fault b interrupt_return - GEN_KVM data_access_slb - /** * Interrupt 0x400 - Instruction Storage Interrupt (ISI). @@ -1463,8 +1370,6 @@ MMU_FTR_SECTION_ELSE ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) b interrupt_return - GEN_KVM instruction_access - /** * Interrupt 0x480 - Instruction Segment Interrupt (ISLB). @@ -1509,8 +1414,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) bl do_bad_slb_fault b interrupt_return - GEN_KVM instruction_access_slb - /** * Interrupt 0x500 - External Interrupt. @@ -1555,8 +1458,6 @@ EXC_COMMON_BEGIN(hardware_interrupt_common) bl do_IRQ b interrupt_return - GEN_KVM hardware_interrupt - /** * Interrupt 0x600 - Alignment Interrupt @@ -1584,8 +1485,6 @@ EXC_COMMON_BEGIN(alignment_common) REST_NVGPRS(r1) /* instruction emulation may change GPRs */ b interrupt_return - GEN_KVM alignment - /** * Interrupt 0x700 - Program Interrupt (program check). @@ -1693,8 +1592,6 @@ EXC_COMMON_BEGIN(program_check_common) REST_NVGPRS(r1) /* instruction emulation may change GPRs */ b interrupt_return - GEN_KVM program_check - /* * Interrupt 0x800 - Floating-Point Unavailable Interrupt. @@ -1744,8 +1641,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) b interrupt_return #endif - GEN_KVM fp_unavailable - /** * Interrupt 0x900 - Decrementer Interrupt. @@ -1784,8 +1679,6 @@ EXC_COMMON_BEGIN(decrementer_common) bl timer_interrupt b interrupt_return - GEN_KVM decrementer - /** * Interrupt 0x980 - Hypervisor Decrementer Interrupt. @@ -1831,8 +1724,6 @@ EXC_COMMON_BEGIN(hdecrementer_common) ld r13,PACA_EXGEN+EX_R13(r13) HRFI_TO_KERNEL - GEN_KVM hdecrementer - /** * Interrupt 0xa00 - Directed Privileged Doorbell Interrupt. @@ -1872,8 +1763,6 @@ EXC_COMMON_BEGIN(doorbell_super_common) #endif b interrupt_return - GEN_KVM doorbell_super - EXC_REAL_NONE(0xb00, 0x100) EXC_VIRT_NONE(0x4b00, 0x100) @@ -1923,7 +1812,7 @@ INT_DEFINE_END(system_call) GET_PACA(r13) std r10,PACA_EXGEN+EX_R10(r13) INTERRUPT_TO_KERNEL - KVMTEST system_call /* uses r10, branch to system_call_kvm */ + KVMTEST system_call kvm_hcall /* uses r10, branch to kvm_hcall */ mfctr r9 #else mr r9,r13 @@ -1979,14 +1868,16 @@ EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100) EXC_VIRT_END(system_call, 0x4c00, 0x100) #ifdef CONFIG_KVM_BOOK3S_64_HANDLER -TRAMP_REAL_BEGIN(system_call_kvm) - /* - * This is a hcall, so register convention is as above, with these - * differences: - * r13 = PACA - * ctr = orig r13 - * orig r10 saved in PACA - */ +TRAMP_REAL_BEGIN(kvm_hcall) + std r9,PACA_EXGEN+EX_R9(r13) + std r11,PACA_EXGEN+EX_R11(r13) + std r12,PACA_EXGEN+EX_R12(r13) + mfcr r9 + mfctr r10 + std r10,PACA_EXGEN+EX_R13(r13) + li r10,0 + std r10,PACA_EXGEN+EX_CFAR(r13) + std r10,PACA_EXGEN+EX_CTR(r13) /* * Save the PPR (on systems that support it) before changing to * HMT_MEDIUM. That allows the KVM code to save that value into the @@ -1994,31 +1885,24 @@ TRAMP_REAL_BEGIN(system_call_kvm) */ BEGIN_FTR_SECTION mfspr r10,SPRN_PPR - std r10,HSTATE_PPR(r13) + std r10,PACA_EXGEN+EX_PPR(r13) END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + HMT_MEDIUM - mfctr r10 - SET_SCRATCH0(r10) - mfcr r10 - std r12,HSTATE_SCRATCH0(r13) - sldi r12,r10,32 - ori r12,r12,0xc00 + #ifdef CONFIG_RELOCATABLE /* - * Requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives + * Requires __LOAD_FAR_HANDLER beause kvmppc_hcall lives * outside the head section. */ - __LOAD_FAR_HANDLER(r10, kvmppc_interrupt) + __LOAD_FAR_HANDLER(r10, kvmppc_hcall) mtctr r10 - ld r10,PACA_EXGEN+EX_R10(r13) bctr #else - ld r10,PACA_EXGEN+EX_R10(r13) - b kvmppc_interrupt + b kvmppc_hcall #endif #endif - /** * Interrupt 0xd00 - Trace Interrupt. * This is a synchronous interrupt in response to instruction step or @@ -2043,8 +1927,6 @@ EXC_COMMON_BEGIN(single_step_common) bl single_step_exception b interrupt_return - GEN_KVM single_step - /** * Interrupt 0xe00 - Hypervisor Data Storage Interrupt (HDSI). @@ -2063,7 +1945,6 @@ INT_DEFINE_BEGIN(h_data_storage) IHSRR=1 IDAR=1 IDSISR=1 - IKVM_SKIP=1 IKVM_REAL=1 IKVM_VIRT=1 INT_DEFINE_END(h_data_storage) @@ -2084,8 +1965,6 @@ MMU_FTR_SECTION_ELSE ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX) b interrupt_return - GEN_KVM h_data_storage - /** * Interrupt 0xe20 - Hypervisor Instruction Storage Interrupt (HISI). @@ -2111,8 +1990,6 @@ EXC_COMMON_BEGIN(h_instr_storage_common) bl unknown_exception b interrupt_return - GEN_KVM h_instr_storage - /** * Interrupt 0xe40 - Hypervisor Emulation Assistance Interrupt. @@ -2137,8 +2014,6 @@ EXC_COMMON_BEGIN(emulation_assist_common) REST_NVGPRS(r1) /* instruction emulation may change GPRs */ b interrupt_return - GEN_KVM emulation_assist - /** * Interrupt 0xe60 - Hypervisor Maintenance Interrupt (HMI). @@ -2210,16 +2085,12 @@ EXC_COMMON_BEGIN(hmi_exception_early_common) EXCEPTION_RESTORE_REGS hsrr=1 GEN_INT_ENTRY hmi_exception, virt=0 - GEN_KVM hmi_exception_early - EXC_COMMON_BEGIN(hmi_exception_common) GEN_COMMON hmi_exception addi r3,r1,STACK_FRAME_OVERHEAD bl handle_hmi_exception b interrupt_return - GEN_KVM hmi_exception - /** * Interrupt 0xe80 - Directed Hypervisor Doorbell Interrupt. @@ -2250,8 +2121,6 @@ EXC_COMMON_BEGIN(h_doorbell_common) #endif b interrupt_return - GEN_KVM h_doorbell - /** * Interrupt 0xea0 - Hypervisor Virtualization Interrupt. @@ -2278,8 +2147,6 @@ EXC_COMMON_BEGIN(h_virt_irq_common) bl do_IRQ b interrupt_return - GEN_KVM h_virt_irq - EXC_REAL_NONE(0xec0, 0x20) EXC_VIRT_NONE(0x4ec0, 0x20) @@ -2323,8 +2190,6 @@ EXC_COMMON_BEGIN(performance_monitor_common) bl performance_monitor_exception b interrupt_return - GEN_KVM performance_monitor - /** * Interrupt 0xf20 - Vector Unavailable Interrupt. @@ -2374,8 +2239,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) bl altivec_unavailable_exception b interrupt_return - GEN_KVM altivec_unavailable - /** * Interrupt 0xf40 - VSX Unavailable Interrupt. @@ -2424,8 +2287,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) bl vsx_unavailable_exception b interrupt_return - GEN_KVM vsx_unavailable - /** * Interrupt 0xf60 - Facility Unavailable Interrupt. @@ -2454,8 +2315,6 @@ EXC_COMMON_BEGIN(facility_unavailable_common) REST_NVGPRS(r1) /* instruction emulation may change GPRs */ b interrupt_return - GEN_KVM facility_unavailable - /** * Interrupt 0xf60 - Hypervisor Facility Unavailable Interrupt. @@ -2484,8 +2343,6 @@ EXC_COMMON_BEGIN(h_facility_unavailable_common) REST_NVGPRS(r1) /* XXX Shouldn't be necessary in practice */ b interrupt_return - GEN_KVM h_facility_unavailable - EXC_REAL_NONE(0xfa0, 0x20) EXC_VIRT_NONE(0x4fa0, 0x20) @@ -2515,8 +2372,6 @@ EXC_COMMON_BEGIN(cbe_system_error_common) bl cbe_system_error_exception b interrupt_return - GEN_KVM cbe_system_error - #else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1200, 0x100) EXC_VIRT_NONE(0x5200, 0x100) @@ -2548,8 +2403,6 @@ EXC_COMMON_BEGIN(instruction_breakpoint_common) bl instruction_breakpoint_exception b interrupt_return - GEN_KVM instruction_breakpoint - EXC_REAL_NONE(0x1400, 0x100) EXC_VIRT_NONE(0x5400, 0x100) @@ -2670,8 +2523,6 @@ EXC_COMMON_BEGIN(denorm_exception_common) bl unknown_exception b interrupt_return - GEN_KVM denorm_exception - #ifdef CONFIG_CBE_RAS INT_DEFINE_BEGIN(cbe_maintenance) @@ -2689,8 +2540,6 @@ EXC_COMMON_BEGIN(cbe_maintenance_common) bl cbe_maintenance_exception b interrupt_return - GEN_KVM cbe_maintenance - #else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1600, 0x100) EXC_VIRT_NONE(0x5600, 0x100) @@ -2721,8 +2570,6 @@ EXC_COMMON_BEGIN(altivec_assist_common) #endif b interrupt_return - GEN_KVM altivec_assist - #ifdef CONFIG_CBE_RAS INT_DEFINE_BEGIN(cbe_thermal) @@ -2740,8 +2587,6 @@ EXC_COMMON_BEGIN(cbe_thermal_common) bl cbe_thermal_exception b interrupt_return - GEN_KVM cbe_thermal - #else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1800, 0x100) EXC_VIRT_NONE(0x5800, 0x100) @@ -2994,6 +2839,15 @@ TRAMP_REAL_BEGIN(rfscv_flush_fallback) USE_TEXT_SECTION() +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER +kvm_interrupt: + /* + * The conditional branch in KVMTEST can't reach all the way, + * make a stub. + */ + b kvmppc_interrupt +#endif + _GLOBAL(do_uaccess_flush) UACCESS_FLUSH_FIXUP_SECTION nop @@ -3009,32 +2863,6 @@ EXPORT_SYMBOL(do_uaccess_flush) MASKED_INTERRUPT MASKED_INTERRUPT hsrr=1 -#ifdef CONFIG_KVM_BOOK3S_64_HANDLER -kvmppc_skip_interrupt: - /* - * Here all GPRs are unchanged from when the interrupt happened - * except for r13, which is saved in SPRG_SCRATCH0. - */ - mfspr r13, SPRN_SRR0 - addi r13, r13, 4 - mtspr SPRN_SRR0, r13 - GET_SCRATCH0(r13) - RFI_TO_KERNEL - b . - -kvmppc_skip_Hinterrupt: - /* - * Here all GPRs are unchanged from when the interrupt happened - * except for r13, which is saved in SPRG_SCRATCH0. - */ - mfspr r13, SPRN_HSRR0 - addi r13, r13, 4 - mtspr SPRN_HSRR0, r13 - GET_SCRATCH0(r13) - HRFI_TO_KERNEL - b . -#endif - /* * Relocation-on interrupts: A subset of the interrupts can be delivered * with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index e4559f8914eb..e0938ba298f2 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -34,9 +34,6 @@ notrace long system_call_exception(long r3, long r4, long r5, syscall_fn f; kuep_lock(); -#ifdef CONFIG_PPC32 - kuap_save_and_lock(regs); -#endif regs->orig_gpr3 = r3; @@ -427,6 +424,7 @@ again: /* Restore user access locks last */ kuap_user_restore(regs); + kuep_unlock(); return ret; } diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 8b2c1a8553a0..cfc03e016ff2 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -356,13 +356,16 @@ static void __init setup_legacy_serial_console(int console) static int __init ioremap_legacy_serial_console(void) { - struct legacy_serial_info *info = &legacy_serial_infos[legacy_serial_console]; - struct plat_serial8250_port *port = &legacy_serial_ports[legacy_serial_console]; + struct plat_serial8250_port *port; + struct legacy_serial_info *info; void __iomem *vaddr; if (legacy_serial_console < 0) return 0; + info = &legacy_serial_infos[legacy_serial_console]; + port = &legacy_serial_ports[legacy_serial_console]; + if (!info->early_addr) return 0; diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 0fdfcdd9d880..c17d1c9362b5 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -432,16 +432,19 @@ device_initcall(stf_barrier_debugfs_init); static void update_branch_cache_flush(void) { - u32 *site; + u32 *site, __maybe_unused *site2; #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE site = &patch__call_kvm_flush_link_stack; + site2 = &patch__call_kvm_flush_link_stack_p9; // This controls the branch from guest_exit_cont to kvm_flush_link_stack if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) { patch_instruction_site(site, ppc_inst(PPC_INST_NOP)); + patch_instruction_site(site2, ppc_inst(PPC_INST_NOP)); } else { // Could use HW flush, but that could also flush count cache patch_branch_site(site, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK); + patch_branch_site(site2, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK); } #endif diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index f4aafa337c2e..1f07317964e4 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -166,9 +166,9 @@ copy_ckfpr_from_user(struct task_struct *task, void __user *from) } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ #else -#define unsafe_copy_fpr_to_user(to, task, label) do { } while (0) +#define unsafe_copy_fpr_to_user(to, task, label) do { if (0) goto label;} while (0) -#define unsafe_copy_fpr_from_user(task, from, label) do { } while (0) +#define unsafe_copy_fpr_from_user(task, from, label) do { if (0) goto label;} while (0) static inline unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index b67d93a609a2..da995c5fb97d 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -508,16 +508,6 @@ EXPORT_SYMBOL(profile_pc); * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... */ #ifdef CONFIG_PPC64 -static inline unsigned long test_irq_work_pending(void) -{ - unsigned long x; - - asm volatile("lbz %0,%1(13)" - : "=r" (x) - : "i" (offsetof(struct paca_struct, irq_work_pending))); - return x; -} - static inline void set_irq_work_pending_flag(void) { asm volatile("stb %0,%1(13)" : : diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 2bfeaa13befb..ab241317481c 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -57,6 +57,7 @@ kvm-pr-y := \ book3s_32_mmu.o kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ + book3s_64_entry.o \ tm.o ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE @@ -86,6 +87,7 @@ kvm-book3s_64-builtin-tm-objs-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \ ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ book3s_hv_hmi.o \ + book3s_hv_p9_entry.o \ book3s_hv_rmhandlers.o \ book3s_hv_rm_mmu.o \ book3s_hv_ras.o \ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 2b691f4d1f26..5e1e1cff0ee3 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -171,6 +171,12 @@ void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags) } EXPORT_SYMBOL_GPL(kvmppc_core_queue_machine_check); +void kvmppc_core_queue_syscall(struct kvm_vcpu *vcpu) +{ + kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_SYSCALL, 0); +} +EXPORT_SYMBOL(kvmppc_core_queue_syscall); + void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags) { /* might as well deliver this straight away */ @@ -1044,13 +1050,10 @@ static int kvmppc_book3s_init(void) #ifdef CONFIG_KVM_XICS #ifdef CONFIG_KVM_XIVE if (xics_on_xive()) { - kvmppc_xive_init_module(); kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS); - if (kvmppc_xive_native_supported()) { - kvmppc_xive_native_init_module(); + if (kvmppc_xive_native_supported()) kvm_register_device_ops(&kvm_xive_native_ops, KVM_DEV_TYPE_XIVE); - } } else #endif kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS); @@ -1060,12 +1063,6 @@ static int kvmppc_book3s_init(void) static void kvmppc_book3s_exit(void) { -#ifdef CONFIG_KVM_XICS - if (xics_on_xive()) { - kvmppc_xive_exit_module(); - kvmppc_xive_native_exit_module(); - } -#endif #ifdef CONFIG_KVM_BOOK3S_32_HANDLER kvmppc_book3s_exit_pr(); #endif diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S new file mode 100644 index 000000000000..983b8c18bc31 --- /dev/null +++ b/arch/powerpc/kvm/book3s_64_entry.S @@ -0,0 +1,416 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include <asm/asm-offsets.h> +#include <asm/cache.h> +#include <asm/code-patching-asm.h> +#include <asm/exception-64s.h> +#include <asm/export.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_book3s_asm.h> +#include <asm/mmu.h> +#include <asm/ppc_asm.h> +#include <asm/ptrace.h> +#include <asm/reg.h> +#include <asm/ultravisor-api.h> + +/* + * These are branched to from interrupt handlers in exception-64s.S which set + * IKVM_REAL or IKVM_VIRT, if HSTATE_IN_GUEST was found to be non-zero. + */ + +/* + * This is a hcall, so register convention is as + * Documentation/powerpc/papr_hcalls.rst. + * + * This may also be a syscall from PR-KVM userspace that is to be + * reflected to the PR guest kernel, so registers may be set up for + * a system call rather than hcall. We don't currently clobber + * anything here, but the 0xc00 handler has already clobbered CTR + * and CR0, so PR-KVM can not support a guest kernel that preserves + * those registers across its system calls. + * + * The state of registers is as kvmppc_interrupt, except CFAR is not + * saved, R13 is not in SCRATCH0, and R10 does not contain the trap. + */ +.global kvmppc_hcall +.balign IFETCH_ALIGN_BYTES +kvmppc_hcall: +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + lbz r10,HSTATE_IN_GUEST(r13) + cmpwi r10,KVM_GUEST_MODE_HV_P9 + beq kvmppc_p9_exit_hcall +#endif + ld r10,PACA_EXGEN+EX_R13(r13) + SET_SCRATCH0(r10) + li r10,0xc00 + /* Now we look like kvmppc_interrupt */ + li r11,PACA_EXGEN + b .Lgot_save_area + +/* + * KVM interrupt entry occurs after GEN_INT_ENTRY runs, and follows that + * call convention: + * + * guest R9-R13, CTR, CFAR, PPR saved in PACA EX_xxx save area + * guest (H)DAR, (H)DSISR are also in the save area for relevant interrupts + * guest R13 also saved in SCRATCH0 + * R13 = PACA + * R11 = (H)SRR0 + * R12 = (H)SRR1 + * R9 = guest CR + * PPR is set to medium + * + * With the addition for KVM: + * R10 = trap vector + */ +.global kvmppc_interrupt +.balign IFETCH_ALIGN_BYTES +kvmppc_interrupt: +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + std r10,HSTATE_SCRATCH0(r13) + lbz r10,HSTATE_IN_GUEST(r13) + cmpwi r10,KVM_GUEST_MODE_HV_P9 + beq kvmppc_p9_exit_interrupt + ld r10,HSTATE_SCRATCH0(r13) +#endif + li r11,PACA_EXGEN + cmpdi r10,0x200 + bgt+ .Lgot_save_area + li r11,PACA_EXMC + beq .Lgot_save_area + li r11,PACA_EXNMI +.Lgot_save_area: + add r11,r11,r13 +BEGIN_FTR_SECTION + ld r12,EX_CFAR(r11) + std r12,HSTATE_CFAR(r13) +END_FTR_SECTION_IFSET(CPU_FTR_CFAR) + ld r12,EX_CTR(r11) + mtctr r12 +BEGIN_FTR_SECTION + ld r12,EX_PPR(r11) + std r12,HSTATE_PPR(r13) +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + ld r12,EX_R12(r11) + std r12,HSTATE_SCRATCH0(r13) + sldi r12,r9,32 + or r12,r12,r10 + ld r9,EX_R9(r11) + ld r10,EX_R10(r11) + ld r11,EX_R11(r11) + + /* + * Hcalls and other interrupts come here after normalising register + * contents and save locations: + * + * R12 = (guest CR << 32) | interrupt vector + * R13 = PACA + * guest R12 saved in shadow HSTATE_SCRATCH0 + * guest R13 saved in SPRN_SCRATCH0 + */ + std r9,HSTATE_SCRATCH2(r13) + lbz r9,HSTATE_IN_GUEST(r13) + cmpwi r9,KVM_GUEST_MODE_SKIP + beq- .Lmaybe_skip +.Lno_skip: +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE + cmpwi r9,KVM_GUEST_MODE_GUEST + beq kvmppc_interrupt_pr +#endif + b kvmppc_interrupt_hv +#else + b kvmppc_interrupt_pr +#endif + +/* + * "Skip" interrupts are part of a trick KVM uses a with hash guests to load + * the faulting instruction in guest memory from the the hypervisor without + * walking page tables. + * + * When the guest takes a fault that requires the hypervisor to load the + * instruction (e.g., MMIO emulation), KVM is running in real-mode with HV=1 + * and the guest MMU context loaded. It sets KVM_GUEST_MODE_SKIP, and sets + * MSR[DR]=1 while leaving MSR[IR]=0, so it continues to fetch HV instructions + * but loads and stores will access the guest context. This is used to load + * the faulting instruction using the faulting guest effective address. + * + * However the guest context may not be able to translate, or it may cause a + * machine check or other issue, which results in a fault in the host + * (even with KVM-HV). + * + * These faults come here because KVM_GUEST_MODE_SKIP was set, so if they + * are (or are likely) caused by that load, the instruction is skipped by + * just returning with the PC advanced +4, where it is noticed the load did + * not execute and it goes to the slow path which walks the page tables to + * read guest memory. + */ +.Lmaybe_skip: + cmpwi r12,BOOK3S_INTERRUPT_MACHINE_CHECK + beq 1f + cmpwi r12,BOOK3S_INTERRUPT_DATA_STORAGE + beq 1f + cmpwi r12,BOOK3S_INTERRUPT_DATA_SEGMENT + beq 1f +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* HSRR interrupts get 2 added to interrupt number */ + cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE | 0x2 + beq 2f +#endif + b .Lno_skip +1: mfspr r9,SPRN_SRR0 + addi r9,r9,4 + mtspr SPRN_SRR0,r9 + ld r12,HSTATE_SCRATCH0(r13) + ld r9,HSTATE_SCRATCH2(r13) + GET_SCRATCH0(r13) + RFI_TO_KERNEL +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +2: mfspr r9,SPRN_HSRR0 + addi r9,r9,4 + mtspr SPRN_HSRR0,r9 + ld r12,HSTATE_SCRATCH0(r13) + ld r9,HSTATE_SCRATCH2(r13) + GET_SCRATCH0(r13) + HRFI_TO_KERNEL +#endif + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + +/* Stack frame offsets for kvmppc_p9_enter_guest */ +#define SFS (144 + STACK_FRAME_MIN_SIZE) +#define STACK_SLOT_NVGPRS (SFS - 144) /* 18 gprs */ + +/* + * void kvmppc_p9_enter_guest(struct vcpu *vcpu); + * + * Enter the guest on a ISAv3.0 or later system. + */ +.balign IFETCH_ALIGN_BYTES +_GLOBAL(kvmppc_p9_enter_guest) +EXPORT_SYMBOL_GPL(kvmppc_p9_enter_guest) + mflr r0 + std r0,PPC_LR_STKOFF(r1) + stdu r1,-SFS(r1) + + std r1,HSTATE_HOST_R1(r13) + + mfcr r4 + stw r4,SFS+8(r1) + + reg = 14 + .rept 18 + std reg,STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1) + reg = reg + 1 + .endr + + ld r4,VCPU_LR(r3) + mtlr r4 + ld r4,VCPU_CTR(r3) + mtctr r4 + ld r4,VCPU_XER(r3) + mtspr SPRN_XER,r4 + + ld r1,VCPU_CR(r3) + +BEGIN_FTR_SECTION + ld r4,VCPU_CFAR(r3) + mtspr SPRN_CFAR,r4 +END_FTR_SECTION_IFSET(CPU_FTR_CFAR) +BEGIN_FTR_SECTION + ld r4,VCPU_PPR(r3) + mtspr SPRN_PPR,r4 +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + + reg = 4 + .rept 28 + ld reg,__VCPU_GPR(reg)(r3) + reg = reg + 1 + .endr + + ld r4,VCPU_KVM(r3) + lbz r4,KVM_SECURE_GUEST(r4) + cmpdi r4,0 + ld r4,VCPU_GPR(R4)(r3) + bne .Lret_to_ultra + + mtcr r1 + + ld r0,VCPU_GPR(R0)(r3) + ld r1,VCPU_GPR(R1)(r3) + ld r2,VCPU_GPR(R2)(r3) + ld r3,VCPU_GPR(R3)(r3) + + HRFI_TO_GUEST + b . + + /* + * Use UV_RETURN ultracall to return control back to the Ultravisor + * after processing an hypercall or interrupt that was forwarded + * (a.k.a. reflected) to the Hypervisor. + * + * All registers have already been reloaded except the ucall requires: + * R0 = hcall result + * R2 = SRR1, so UV can detect a synthesized interrupt (if any) + * R3 = UV_RETURN + */ +.Lret_to_ultra: + mtcr r1 + ld r1,VCPU_GPR(R1)(r3) + + ld r0,VCPU_GPR(R3)(r3) + mfspr r2,SPRN_SRR1 + LOAD_REG_IMMEDIATE(r3, UV_RETURN) + sc 2 + +/* + * kvmppc_p9_exit_hcall and kvmppc_p9_exit_interrupt are branched to from + * above if the interrupt was taken for a guest that was entered via + * kvmppc_p9_enter_guest(). + * + * The exit code recovers the host stack and vcpu pointer, saves all guest GPRs + * and CR, LR, XER as well as guest MSR and NIA into the VCPU, then re- + * establishes the host stack and registers to return from the + * kvmppc_p9_enter_guest() function, which saves CTR and other guest registers + * (SPRs and FP, VEC, etc). + */ +.balign IFETCH_ALIGN_BYTES +kvmppc_p9_exit_hcall: + mfspr r11,SPRN_SRR0 + mfspr r12,SPRN_SRR1 + li r10,0xc00 + std r10,HSTATE_SCRATCH0(r13) + +.balign IFETCH_ALIGN_BYTES +kvmppc_p9_exit_interrupt: + /* + * If set to KVM_GUEST_MODE_HV_P9 but we're still in the + * hypervisor, that means we can't return from the entry stack. + */ + rldicl. r10,r12,64-MSR_HV_LG,63 + bne- kvmppc_p9_bad_interrupt + + std r1,HSTATE_SCRATCH1(r13) + std r3,HSTATE_SCRATCH2(r13) + ld r1,HSTATE_HOST_R1(r13) + ld r3,HSTATE_KVM_VCPU(r13) + + std r9,VCPU_CR(r3) + +1: + std r11,VCPU_PC(r3) + std r12,VCPU_MSR(r3) + + reg = 14 + .rept 18 + std reg,__VCPU_GPR(reg)(r3) + reg = reg + 1 + .endr + + /* r1, r3, r9-r13 are saved to vcpu by C code */ + std r0,VCPU_GPR(R0)(r3) + std r2,VCPU_GPR(R2)(r3) + reg = 4 + .rept 5 + std reg,__VCPU_GPR(reg)(r3) + reg = reg + 1 + .endr + + ld r2,PACATOC(r13) + + mflr r4 + std r4,VCPU_LR(r3) + mfspr r4,SPRN_XER + std r4,VCPU_XER(r3) + + reg = 14 + .rept 18 + ld reg,STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1) + reg = reg + 1 + .endr + + lwz r4,SFS+8(r1) + mtcr r4 + + /* + * Flush the link stack here, before executing the first blr on the + * way out of the guest. + * + * The link stack won't match coming out of the guest anyway so the + * only cost is the flush itself. The call clobbers r0. + */ +1: nop + patch_site 1b patch__call_kvm_flush_link_stack_p9 + + addi r1,r1,SFS + ld r0,PPC_LR_STKOFF(r1) + mtlr r0 + blr + +/* + * Took an interrupt somewhere right before HRFID to guest, so registers are + * in a bad way. Return things hopefully enough to run host virtual code and + * run the Linux interrupt handler (SRESET or MCE) to print something useful. + * + * We could be really clever and save all host registers in known locations + * before setting HSTATE_IN_GUEST, then restoring them all here, and setting + * return address to a fixup that sets them up again. But that's a lot of + * effort for a small bit of code. Lots of other things to do first. + */ +kvmppc_p9_bad_interrupt: +BEGIN_MMU_FTR_SECTION + /* + * Hash host doesn't try to recover MMU (requires host SLB reload) + */ + b . +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) + /* + * Clean up guest registers to give host a chance to run. + */ + li r10,0 + mtspr SPRN_AMR,r10 + mtspr SPRN_IAMR,r10 + mtspr SPRN_CIABR,r10 + mtspr SPRN_DAWRX0,r10 +BEGIN_FTR_SECTION + mtspr SPRN_DAWRX1,r10 +END_FTR_SECTION_IFSET(CPU_FTR_DAWR1) + mtspr SPRN_PID,r10 + + /* + * Switch to host MMU mode + */ + ld r10, HSTATE_KVM_VCPU(r13) + ld r10, VCPU_KVM(r10) + lwz r10, KVM_HOST_LPID(r10) + mtspr SPRN_LPID,r10 + + ld r10, HSTATE_KVM_VCPU(r13) + ld r10, VCPU_KVM(r10) + ld r10, KVM_HOST_LPCR(r10) + mtspr SPRN_LPCR,r10 + + /* + * Set GUEST_MODE_NONE so the handler won't branch to KVM, and clear + * MSR_RI in r12 ([H]SRR1) so the handler won't try to return. + */ + li r10,KVM_GUEST_MODE_NONE + stb r10,HSTATE_IN_GUEST(r13) + li r10,MSR_RI + andc r12,r12,r10 + + /* + * Go back to interrupt handler. MCE and SRESET have their specific + * PACA save area so they should be used directly. They set up their + * own stack. The other handlers all use EXGEN. They will use the + * guest r1 if it looks like a kernel stack, so just load the + * emergency stack and go to program check for all other interrupts. + */ + ld r10,HSTATE_SCRATCH0(r13) + cmpwi r10,BOOK3S_INTERRUPT_MACHINE_CHECK + beq machine_check_common + + cmpwi r10,BOOK3S_INTERRUPT_SYSTEM_RESET + beq system_reset_common + + b . +#endif diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 2d9193cd73be..c63e263312a4 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -840,7 +840,7 @@ bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range) kvm_unmap_radix(kvm, range->slot, gfn); } else { for (gfn = range->start; gfn < range->end; gfn++) - kvm_unmap_rmapp(kvm, range->slot, range->start); + kvm_unmap_rmapp(kvm, range->slot, gfn); } return false; diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index d909c069363e..b5905ae4377c 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -21,6 +21,7 @@ #include <asm/pte-walk.h> #include <asm/ultravisor.h> #include <asm/kvm_book3s_uvmem.h> +#include <asm/plpar_wrappers.h> /* * Supported radix tree geometry. @@ -318,9 +319,19 @@ void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, } psi = shift_to_mmu_psize(pshift); - rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58)); - rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1), - lpid, rb); + + if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE)) { + rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58)); + rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1), + lpid, rb); + } else { + rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU, + H_RPTI_TYPE_NESTED | + H_RPTI_TYPE_TLB, + psize_to_rpti_pgsize(psi), + addr, addr + psize); + } + if (rc) pr_err("KVM: TLB page invalidation hcall failed, rc=%ld\n", rc); } @@ -334,8 +345,14 @@ static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned int lpid) return; } - rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1), - lpid, TLBIEL_INVAL_SET_LPID); + if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE)) + rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1), + lpid, TLBIEL_INVAL_SET_LPID); + else + rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU, + H_RPTI_TYPE_NESTED | + H_RPTI_TYPE_PWC, H_RPTI_PAGE_ALL, + 0, -1UL); if (rc) pr_err("KVM: TLB PWC invalidation hcall failed, rc=%ld\n", rc); } diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 083a4e037718..dc6591548f0c 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -391,10 +391,6 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */ /* liobn, ioba, tce); */ - /* For radix, we might be in virtual mode, so punt */ - if (kvm_is_radix(vcpu->kvm)) - return H_TOO_HARD; - stt = kvmppc_find_table(vcpu->kvm, liobn); if (!stt) return H_TOO_HARD; @@ -489,10 +485,6 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, bool prereg = false; struct kvmppc_spapr_tce_iommu_table *stit; - /* For radix, we might be in virtual mode, so punt */ - if (kvm_is_radix(vcpu->kvm)) - return H_TOO_HARD; - /* * used to check for invalidations in progress */ @@ -602,10 +594,6 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, long i, ret; struct kvmppc_spapr_tce_iommu_table *stit; - /* For radix, we might be in virtual mode, so punt */ - if (kvm_is_radix(vcpu->kvm)) - return H_TOO_HARD; - stt = kvmppc_find_table(vcpu->kvm, liobn); if (!stt) return H_TOO_HARD; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 7360350e66ff..7e73e5bfe4ba 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -76,6 +76,7 @@ #include <asm/kvm_book3s_uvmem.h> #include <asm/ultravisor.h> #include <asm/dtl.h> +#include <asm/plpar_wrappers.h> #include "book3s.h" @@ -103,13 +104,9 @@ static int target_smt_mode; module_param(target_smt_mode, int, 0644); MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)"); -static bool indep_threads_mode = true; -module_param(indep_threads_mode, bool, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(indep_threads_mode, "Independent-threads mode (only on POWER9)"); - static bool one_vm_per_core; module_param(one_vm_per_core, bool, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(one_vm_per_core, "Only run vCPUs from the same VM on a core (requires indep_threads_mode=N)"); +MODULE_PARM_DESC(one_vm_per_core, "Only run vCPUs from the same VM on a core (requires POWER8 or older)"); #ifdef CONFIG_KVM_XICS static const struct kernel_param_ops module_param_ops = { @@ -134,9 +131,6 @@ static inline bool nesting_enabled(struct kvm *kvm) return kvm->arch.nested_enable && kvm_is_radix(kvm); } -/* If set, the threads on each CPU core have to be in the same MMU mode */ -static bool no_mixing_hpt_and_radix __read_mostly; - static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); /* @@ -807,7 +801,8 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, * KVM does not support mflags=2 (AIL=2) and AIL=1 is reserved. * Keep this in synch with kvmppc_filter_guest_lpcr_hv. */ - if (mflags != 0 && mflags != 3) + if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG) && + kvmhv_vcpu_is_radix(vcpu) && mflags == 3) return H_UNSUPPORTED_FLAG_START; return H_TOO_HARD; default: @@ -899,6 +894,10 @@ static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target) * H_SUCCESS if the source vcore wasn't idle (e.g. if it may * have useful work to do and should not confer) so we don't * recheck that here. + * + * In the case of the P9 single vcpu per vcore case, the real + * mode handler is not called but no other threads are in the + * source vcore. */ spin_lock(&vcore->lock); @@ -924,8 +923,71 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu) return yield_count; } +/* + * H_RPT_INVALIDATE hcall handler for nested guests. + * + * Handles only nested process-scoped invalidation requests in L0. + */ +static int kvmppc_nested_h_rpt_invalidate(struct kvm_vcpu *vcpu) +{ + unsigned long type = kvmppc_get_gpr(vcpu, 6); + unsigned long pid, pg_sizes, start, end; + + /* + * The partition-scoped invalidations aren't handled here in L0. + */ + if (type & H_RPTI_TYPE_NESTED) + return RESUME_HOST; + + pid = kvmppc_get_gpr(vcpu, 4); + pg_sizes = kvmppc_get_gpr(vcpu, 7); + start = kvmppc_get_gpr(vcpu, 8); + end = kvmppc_get_gpr(vcpu, 9); + + do_h_rpt_invalidate_prt(pid, vcpu->arch.nested->shadow_lpid, + type, pg_sizes, start, end); + + kvmppc_set_gpr(vcpu, 3, H_SUCCESS); + return RESUME_GUEST; +} + +static long kvmppc_h_rpt_invalidate(struct kvm_vcpu *vcpu, + unsigned long id, unsigned long target, + unsigned long type, unsigned long pg_sizes, + unsigned long start, unsigned long end) +{ + if (!kvm_is_radix(vcpu->kvm)) + return H_UNSUPPORTED; + + if (end < start) + return H_P5; + + /* + * Partition-scoped invalidation for nested guests. + */ + if (type & H_RPTI_TYPE_NESTED) { + if (!nesting_enabled(vcpu->kvm)) + return H_FUNCTION; + + /* Support only cores as target */ + if (target != H_RPTI_TARGET_CMMU) + return H_P2; + + return do_h_rpt_invalidate_pat(vcpu, id, type, pg_sizes, + start, end); + } + + /* + * Process-scoped invalidation for L1 guests. + */ + do_h_rpt_invalidate_prt(id, vcpu->kvm->arch.lpid, + type, pg_sizes, start, end); + return H_SUCCESS; +} + int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) { + struct kvm *kvm = vcpu->kvm; unsigned long req = kvmppc_get_gpr(vcpu, 3); unsigned long target, ret = H_SUCCESS; int yield_count; @@ -937,11 +999,57 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) return RESUME_HOST; switch (req) { + case H_REMOVE: + ret = kvmppc_h_remove(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5), + kvmppc_get_gpr(vcpu, 6)); + if (ret == H_TOO_HARD) + return RESUME_HOST; + break; + case H_ENTER: + ret = kvmppc_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5), + kvmppc_get_gpr(vcpu, 6), + kvmppc_get_gpr(vcpu, 7)); + if (ret == H_TOO_HARD) + return RESUME_HOST; + break; + case H_READ: + ret = kvmppc_h_read(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5)); + if (ret == H_TOO_HARD) + return RESUME_HOST; + break; + case H_CLEAR_MOD: + ret = kvmppc_h_clear_mod(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5)); + if (ret == H_TOO_HARD) + return RESUME_HOST; + break; + case H_CLEAR_REF: + ret = kvmppc_h_clear_ref(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5)); + if (ret == H_TOO_HARD) + return RESUME_HOST; + break; + case H_PROTECT: + ret = kvmppc_h_protect(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5), + kvmppc_get_gpr(vcpu, 6)); + if (ret == H_TOO_HARD) + return RESUME_HOST; + break; + case H_BULK_REMOVE: + ret = kvmppc_h_bulk_remove(vcpu); + if (ret == H_TOO_HARD) + return RESUME_HOST; + break; + case H_CEDE: break; case H_PROD: target = kvmppc_get_gpr(vcpu, 4); - tvcpu = kvmppc_find_vcpu(vcpu->kvm, target); + tvcpu = kvmppc_find_vcpu(kvm, target); if (!tvcpu) { ret = H_PARAMETER; break; @@ -955,7 +1063,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) target = kvmppc_get_gpr(vcpu, 4); if (target == -1) break; - tvcpu = kvmppc_find_vcpu(vcpu->kvm, target); + tvcpu = kvmppc_find_vcpu(kvm, target); if (!tvcpu) { ret = H_PARAMETER; break; @@ -971,12 +1079,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) kvmppc_get_gpr(vcpu, 6)); break; case H_RTAS: - if (list_empty(&vcpu->kvm->arch.rtas_tokens)) + if (list_empty(&kvm->arch.rtas_tokens)) return RESUME_HOST; - idx = srcu_read_lock(&vcpu->kvm->srcu); + idx = srcu_read_lock(&kvm->srcu); rc = kvmppc_rtas_hcall(vcpu); - srcu_read_unlock(&vcpu->kvm->srcu, idx); + srcu_read_unlock(&kvm->srcu, idx); if (rc == -ENOENT) return RESUME_HOST; @@ -1060,15 +1168,23 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) if (!powernv_get_random_long(&vcpu->arch.regs.gpr[4])) ret = H_HARDWARE; break; + case H_RPT_INVALIDATE: + ret = kvmppc_h_rpt_invalidate(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5), + kvmppc_get_gpr(vcpu, 6), + kvmppc_get_gpr(vcpu, 7), + kvmppc_get_gpr(vcpu, 8), + kvmppc_get_gpr(vcpu, 9)); + break; case H_SET_PARTITION_TABLE: ret = H_FUNCTION; - if (nesting_enabled(vcpu->kvm)) + if (nesting_enabled(kvm)) ret = kvmhv_set_partition_table(vcpu); break; case H_ENTER_NESTED: ret = H_FUNCTION; - if (!nesting_enabled(vcpu->kvm)) + if (!nesting_enabled(kvm)) break; ret = kvmhv_enter_nested_guest(vcpu); if (ret == H_INTERRUPT) { @@ -1083,12 +1199,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) break; case H_TLB_INVALIDATE: ret = H_FUNCTION; - if (nesting_enabled(vcpu->kvm)) + if (nesting_enabled(kvm)) ret = kvmhv_do_nested_tlbie(vcpu); break; case H_COPY_TOFROM_GUEST: ret = H_FUNCTION; - if (nesting_enabled(vcpu->kvm)) + if (nesting_enabled(kvm)) ret = kvmhv_copy_tofrom_guest_nested(vcpu); break; case H_PAGE_INIT: @@ -1099,7 +1215,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) case H_SVM_PAGE_IN: ret = H_UNSUPPORTED; if (kvmppc_get_srr1(vcpu) & MSR_S) - ret = kvmppc_h_svm_page_in(vcpu->kvm, + ret = kvmppc_h_svm_page_in(kvm, kvmppc_get_gpr(vcpu, 4), kvmppc_get_gpr(vcpu, 5), kvmppc_get_gpr(vcpu, 6)); @@ -1107,7 +1223,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) case H_SVM_PAGE_OUT: ret = H_UNSUPPORTED; if (kvmppc_get_srr1(vcpu) & MSR_S) - ret = kvmppc_h_svm_page_out(vcpu->kvm, + ret = kvmppc_h_svm_page_out(kvm, kvmppc_get_gpr(vcpu, 4), kvmppc_get_gpr(vcpu, 5), kvmppc_get_gpr(vcpu, 6)); @@ -1115,12 +1231,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) case H_SVM_INIT_START: ret = H_UNSUPPORTED; if (kvmppc_get_srr1(vcpu) & MSR_S) - ret = kvmppc_h_svm_init_start(vcpu->kvm); + ret = kvmppc_h_svm_init_start(kvm); break; case H_SVM_INIT_DONE: ret = H_UNSUPPORTED; if (kvmppc_get_srr1(vcpu) & MSR_S) - ret = kvmppc_h_svm_init_done(vcpu->kvm); + ret = kvmppc_h_svm_init_done(kvm); break; case H_SVM_INIT_ABORT: /* @@ -1130,24 +1246,26 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) * Instead the kvm->arch.secure_guest flag is checked inside * kvmppc_h_svm_init_abort(). */ - ret = kvmppc_h_svm_init_abort(vcpu->kvm); + ret = kvmppc_h_svm_init_abort(kvm); break; default: return RESUME_HOST; } + WARN_ON_ONCE(ret == H_TOO_HARD); kvmppc_set_gpr(vcpu, 3, ret); vcpu->arch.hcall_needed = 0; return RESUME_GUEST; } /* - * Handle H_CEDE in the nested virtualization case where we haven't - * called the real-mode hcall handlers in book3s_hv_rmhandlers.S. + * Handle H_CEDE in the P9 path where we don't call the real-mode hcall + * handlers in book3s_hv_rmhandlers.S. + * * This has to be done early, not in kvmppc_pseries_do_hcall(), so * that the cede logic in kvmppc_run_single_vcpu() works properly. */ -static void kvmppc_nested_cede(struct kvm_vcpu *vcpu) +static void kvmppc_cede(struct kvm_vcpu *vcpu) { vcpu->arch.shregs.msr |= MSR_EE; vcpu->arch.ceded = 1; @@ -1178,6 +1296,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd) case H_XIRR_X: #endif case H_PAGE_INIT: + case H_RPT_INVALIDATE: return 1; } @@ -1400,13 +1519,39 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, } case BOOK3S_INTERRUPT_SYSCALL: { - /* hcall - punt to userspace */ int i; - /* hypercall with MSR_PR has already been handled in rmode, - * and never reaches here. - */ + if (unlikely(vcpu->arch.shregs.msr & MSR_PR)) { + /* + * Guest userspace executed sc 1. This can only be + * reached by the P9 path because the old path + * handles this case in realmode hcall handlers. + */ + if (!kvmhv_vcpu_is_radix(vcpu)) { + /* + * A guest could be running PR KVM, so this + * may be a PR KVM hcall. It must be reflected + * to the guest kernel as a sc interrupt. + */ + kvmppc_core_queue_syscall(vcpu); + } else { + /* + * Radix guests can not run PR KVM or nested HV + * hash guests which might run PR KVM, so this + * is always a privilege fault. Send a program + * check to guest kernel. + */ + kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV); + } + r = RESUME_GUEST; + break; + } + /* + * hcall - gather args and set exit_reason. This will next be + * handled by kvmppc_pseries_do_hcall which may be able to deal + * with it and resume guest, or may punt to userspace. + */ run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3); for (i = 0; i < 9; ++i) run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i); @@ -1419,20 +1564,102 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, * We get these next two if the guest accesses a page which it thinks * it has mapped but which is not actually present, either because * it is for an emulated I/O device or because the corresonding - * host page has been paged out. Any other HDSI/HISI interrupts - * have been handled already. + * host page has been paged out. + * + * Any other HDSI/HISI interrupts have been handled already for P7/8 + * guests. For POWER9 hash guests not using rmhandlers, basic hash + * fault handling is done here. */ - case BOOK3S_INTERRUPT_H_DATA_STORAGE: - r = RESUME_PAGE_FAULT; + case BOOK3S_INTERRUPT_H_DATA_STORAGE: { + unsigned long vsid; + long err; + + if (vcpu->arch.fault_dsisr == HDSISR_CANARY) { + r = RESUME_GUEST; /* Just retry if it's the canary */ + break; + } + + if (kvm_is_radix(vcpu->kvm) || !cpu_has_feature(CPU_FTR_ARCH_300)) { + /* + * Radix doesn't require anything, and pre-ISAv3.0 hash + * already attempted to handle this in rmhandlers. The + * hash fault handling below is v3 only (it uses ASDR + * via fault_gpa). + */ + r = RESUME_PAGE_FAULT; + break; + } + + if (!(vcpu->arch.fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT))) { + kvmppc_core_queue_data_storage(vcpu, + vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); + r = RESUME_GUEST; + break; + } + + if (!(vcpu->arch.shregs.msr & MSR_DR)) + vsid = vcpu->kvm->arch.vrma_slb_v; + else + vsid = vcpu->arch.fault_gpa; + + err = kvmppc_hpte_hv_fault(vcpu, vcpu->arch.fault_dar, + vsid, vcpu->arch.fault_dsisr, true); + if (err == 0) { + r = RESUME_GUEST; + } else if (err == -1 || err == -2) { + r = RESUME_PAGE_FAULT; + } else { + kvmppc_core_queue_data_storage(vcpu, + vcpu->arch.fault_dar, err); + r = RESUME_GUEST; + } break; - case BOOK3S_INTERRUPT_H_INST_STORAGE: + } + case BOOK3S_INTERRUPT_H_INST_STORAGE: { + unsigned long vsid; + long err; + vcpu->arch.fault_dar = kvmppc_get_pc(vcpu); vcpu->arch.fault_dsisr = vcpu->arch.shregs.msr & DSISR_SRR1_MATCH_64S; - if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE) - vcpu->arch.fault_dsisr |= DSISR_ISSTORE; - r = RESUME_PAGE_FAULT; + if (kvm_is_radix(vcpu->kvm) || !cpu_has_feature(CPU_FTR_ARCH_300)) { + /* + * Radix doesn't require anything, and pre-ISAv3.0 hash + * already attempted to handle this in rmhandlers. The + * hash fault handling below is v3 only (it uses ASDR + * via fault_gpa). + */ + if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE) + vcpu->arch.fault_dsisr |= DSISR_ISSTORE; + r = RESUME_PAGE_FAULT; + break; + } + + if (!(vcpu->arch.fault_dsisr & SRR1_ISI_NOPT)) { + kvmppc_core_queue_inst_storage(vcpu, + vcpu->arch.fault_dsisr); + r = RESUME_GUEST; + break; + } + + if (!(vcpu->arch.shregs.msr & MSR_IR)) + vsid = vcpu->kvm->arch.vrma_slb_v; + else + vsid = vcpu->arch.fault_gpa; + + err = kvmppc_hpte_hv_fault(vcpu, vcpu->arch.fault_dar, + vsid, vcpu->arch.fault_dsisr, false); + if (err == 0) { + r = RESUME_GUEST; + } else if (err == -1) { + r = RESUME_PAGE_FAULT; + } else { + kvmppc_core_queue_inst_storage(vcpu, err); + r = RESUME_GUEST; + } break; + } + /* * This occurs if the guest executes an illegal instruction. * If the guest debug is disabled, generate a program interrupt @@ -1593,6 +1820,23 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) if (!xics_on_xive()) kvmppc_xics_rm_complete(vcpu, 0); break; + case BOOK3S_INTERRUPT_SYSCALL: + { + unsigned long req = kvmppc_get_gpr(vcpu, 3); + + /* + * The H_RPT_INVALIDATE hcalls issued by nested + * guests for process-scoped invalidations when + * GTSE=0, are handled here in L0. + */ + if (req == H_RPT_INVALIDATE) { + r = kvmppc_nested_h_rpt_invalidate(vcpu); + break; + } + + r = RESUME_HOST; + break; + } default: r = RESUME_HOST; break; @@ -1654,6 +1898,14 @@ unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm, unsigned long lpcr) lpcr &= ~LPCR_AIL; if ((lpcr & LPCR_AIL) != LPCR_AIL_3) lpcr &= ~LPCR_AIL; /* LPCR[AIL]=1/2 is disallowed */ + /* + * On some POWER9s we force AIL off for radix guests to prevent + * executing in MSR[HV]=1 mode with the MMU enabled and PIDR set to + * guest, which can result in Q0 translations with LPID=0 PID=PIDR to + * be cached, which the host TLB management does not expect. + */ + if (kvm_is_radix(kvm) && cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) + lpcr &= ~LPCR_AIL; /* * On POWER9, allow userspace to enable large decrementer for the @@ -2233,7 +2485,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, */ static int threads_per_vcore(struct kvm *kvm) { - if (kvm->arch.threads_indep) + if (cpu_has_feature(CPU_FTR_ARCH_300)) return 1; return threads_per_subcore; } @@ -2657,7 +2909,7 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) cpumask_t *cpu_in_guest; int i; - cpu = cpu_first_thread_sibling(cpu); + cpu = cpu_first_tlb_thread_sibling(cpu); if (nested) { cpumask_set_cpu(cpu, &nested->need_tlb_flush); cpu_in_guest = &nested->cpu_in_guest; @@ -2671,9 +2923,10 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) * the other side is the first smp_mb() in kvmppc_run_core(). */ smp_mb(); - for (i = 0; i < threads_per_core; ++i) - if (cpumask_test_cpu(cpu + i, cpu_in_guest)) - smp_call_function_single(cpu + i, do_nothing, NULL, 1); + for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu); + i += cpu_tlb_thread_sibling_step()) + if (cpumask_test_cpu(i, cpu_in_guest)) + smp_call_function_single(i, do_nothing, NULL, 1); } static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu) @@ -2704,8 +2957,8 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu) */ if (prev_cpu != pcpu) { if (prev_cpu >= 0 && - cpu_first_thread_sibling(prev_cpu) != - cpu_first_thread_sibling(pcpu)) + cpu_first_tlb_thread_sibling(prev_cpu) != + cpu_first_tlb_thread_sibling(pcpu)) radix_flush_cpu(kvm, prev_cpu, vcpu); if (nested) nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu; @@ -2967,9 +3220,6 @@ static void prepare_threads(struct kvmppc_vcore *vc) for_each_runnable_thread(i, vcpu, vc) { if (signal_pending(vcpu->arch.run_task)) vcpu->arch.ret = -EINTR; - else if (no_mixing_hpt_and_radix && - kvm_is_radix(vc->kvm) != radix_enabled()) - vcpu->arch.ret = -EINVAL; else if (vcpu->arch.vpa.update_pending || vcpu->arch.slb_shadow.update_pending || vcpu->arch.dtl.update_pending) @@ -3176,6 +3426,9 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) int trap; bool is_power8; + if (WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300))) + return; + /* * Remove from the list any threads that have a signal pending * or need a VPA update done @@ -3203,9 +3456,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) * Make sure we are running on primary threads, and that secondary * threads are offline. Also check if the number of threads in this * guest are greater than the current system threads per guest. - * On POWER9, we need to be not in independent-threads mode if - * this is a HPT guest on a radix host machine where the - * CPU threads may not be in different MMU modes. */ if ((controlled_threads > 1) && ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) { @@ -3230,18 +3480,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) collect_piggybacks(&core_info, target_threads); /* - * On radix, arrange for TLB flushing if necessary. - * This has to be done before disabling interrupts since - * it uses smp_call_function(). - */ - pcpu = smp_processor_id(); - if (kvm_is_radix(vc->kvm)) { - for (sub = 0; sub < core_info.n_subcores; ++sub) - for_each_runnable_thread(i, vcpu, core_info.vc[sub]) - kvmppc_prepare_radix_vcpu(vcpu, pcpu); - } - - /* * Hard-disable interrupts, and check resched flag and signals. * If we need to reschedule or deliver a signal, clean up * and return without going into the guest(s). @@ -3273,8 +3511,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) cmd_bit = stat_bit = 0; split = core_info.n_subcores; sip = NULL; - is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S) - && !cpu_has_feature(CPU_FTR_ARCH_300); + is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S); if (split > 1) { sip = &split_info; @@ -3478,184 +3715,113 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) trace_kvmppc_run_core(vc, 1); } -/* - * Load up hypervisor-mode registers on P9. - */ -static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, - unsigned long lpcr) +static void load_spr_state(struct kvm_vcpu *vcpu) { - struct kvmppc_vcore *vc = vcpu->arch.vcore; - s64 hdec; - u64 tb, purr, spurr; - int trap; - unsigned long host_hfscr = mfspr(SPRN_HFSCR); - unsigned long host_ciabr = mfspr(SPRN_CIABR); - unsigned long host_dawr0 = mfspr(SPRN_DAWR0); - unsigned long host_dawrx0 = mfspr(SPRN_DAWRX0); - unsigned long host_psscr = mfspr(SPRN_PSSCR); - unsigned long host_pidr = mfspr(SPRN_PID); - unsigned long host_dawr1 = 0; - unsigned long host_dawrx1 = 0; - - if (cpu_has_feature(CPU_FTR_DAWR1)) { - host_dawr1 = mfspr(SPRN_DAWR1); - host_dawrx1 = mfspr(SPRN_DAWRX1); - } + mtspr(SPRN_DSCR, vcpu->arch.dscr); + mtspr(SPRN_IAMR, vcpu->arch.iamr); + mtspr(SPRN_PSPB, vcpu->arch.pspb); + mtspr(SPRN_FSCR, vcpu->arch.fscr); + mtspr(SPRN_TAR, vcpu->arch.tar); + mtspr(SPRN_EBBHR, vcpu->arch.ebbhr); + mtspr(SPRN_EBBRR, vcpu->arch.ebbrr); + mtspr(SPRN_BESCR, vcpu->arch.bescr); + mtspr(SPRN_WORT, vcpu->arch.wort); + mtspr(SPRN_TIDR, vcpu->arch.tid); + mtspr(SPRN_AMR, vcpu->arch.amr); + mtspr(SPRN_UAMOR, vcpu->arch.uamor); /* - * P8 and P9 suppress the HDEC exception when LPCR[HDICE] = 0, - * so set HDICE before writing HDEC. + * DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI] + * clear (or hstate set appropriately to catch those registers + * being clobbered if we take a MCE or SRESET), so those are done + * later. */ - mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr | LPCR_HDICE); - isync(); - - hdec = time_limit - mftb(); - if (hdec < 0) { - mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr); - isync(); - return BOOK3S_INTERRUPT_HV_DECREMENTER; - } - mtspr(SPRN_HDEC, hdec); - - if (vc->tb_offset) { - u64 new_tb = mftb() + vc->tb_offset; - mtspr(SPRN_TBU40, new_tb); - tb = mftb(); - if ((tb & 0xffffff) < (new_tb & 0xffffff)) - mtspr(SPRN_TBU40, new_tb + 0x1000000); - vc->tb_offset_applied = vc->tb_offset; - } - - if (vc->pcr) - mtspr(SPRN_PCR, vc->pcr | PCR_MASK); - mtspr(SPRN_DPDES, vc->dpdes); - mtspr(SPRN_VTB, vc->vtb); - - local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR); - local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR); - mtspr(SPRN_PURR, vcpu->arch.purr); - mtspr(SPRN_SPURR, vcpu->arch.spurr); - - if (dawr_enabled()) { - mtspr(SPRN_DAWR0, vcpu->arch.dawr0); - mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0); - if (cpu_has_feature(CPU_FTR_DAWR1)) { - mtspr(SPRN_DAWR1, vcpu->arch.dawr1); - mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1); - } - } - mtspr(SPRN_CIABR, vcpu->arch.ciabr); - mtspr(SPRN_IC, vcpu->arch.ic); - mtspr(SPRN_PID, vcpu->arch.pid); - mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC | - (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG)); - - mtspr(SPRN_HFSCR, vcpu->arch.hfscr); - - mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0); - mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1); - mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2); - mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3); - - mtspr(SPRN_AMOR, ~0UL); - - mtspr(SPRN_LPCR, lpcr); - isync(); - - kvmppc_xive_push_vcpu(vcpu); - - mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0); - mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1); - - trap = __kvmhv_vcpu_entry_p9(vcpu); - - /* Advance host PURR/SPURR by the amount used by guest */ - purr = mfspr(SPRN_PURR); - spurr = mfspr(SPRN_SPURR); - mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr + - purr - vcpu->arch.purr); - mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr + - spurr - vcpu->arch.spurr); - vcpu->arch.purr = purr; - vcpu->arch.spurr = spurr; + if (!(vcpu->arch.ctrl & 1)) + mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1); +} - vcpu->arch.ic = mfspr(SPRN_IC); - vcpu->arch.pid = mfspr(SPRN_PID); - vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS; +static void store_spr_state(struct kvm_vcpu *vcpu) +{ + vcpu->arch.ctrl = mfspr(SPRN_CTRLF); - vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0); - vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1); - vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2); - vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3); + vcpu->arch.iamr = mfspr(SPRN_IAMR); + vcpu->arch.pspb = mfspr(SPRN_PSPB); + vcpu->arch.fscr = mfspr(SPRN_FSCR); + vcpu->arch.tar = mfspr(SPRN_TAR); + vcpu->arch.ebbhr = mfspr(SPRN_EBBHR); + vcpu->arch.ebbrr = mfspr(SPRN_EBBRR); + vcpu->arch.bescr = mfspr(SPRN_BESCR); + vcpu->arch.wort = mfspr(SPRN_WORT); + vcpu->arch.tid = mfspr(SPRN_TIDR); + vcpu->arch.amr = mfspr(SPRN_AMR); + vcpu->arch.uamor = mfspr(SPRN_UAMOR); + vcpu->arch.dscr = mfspr(SPRN_DSCR); +} - /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */ - mtspr(SPRN_PSSCR, host_psscr | - (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG)); - mtspr(SPRN_HFSCR, host_hfscr); - mtspr(SPRN_CIABR, host_ciabr); - mtspr(SPRN_DAWR0, host_dawr0); - mtspr(SPRN_DAWRX0, host_dawrx0); - if (cpu_has_feature(CPU_FTR_DAWR1)) { - mtspr(SPRN_DAWR1, host_dawr1); - mtspr(SPRN_DAWRX1, host_dawrx1); - } - mtspr(SPRN_PID, host_pidr); +/* + * Privileged (non-hypervisor) host registers to save. + */ +struct p9_host_os_sprs { + unsigned long dscr; + unsigned long tidr; + unsigned long iamr; + unsigned long amr; + unsigned long fscr; +}; - /* - * Since this is radix, do a eieio; tlbsync; ptesync sequence in - * case we interrupted the guest between a tlbie and a ptesync. - */ - asm volatile("eieio; tlbsync; ptesync"); +static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs) +{ + host_os_sprs->dscr = mfspr(SPRN_DSCR); + host_os_sprs->tidr = mfspr(SPRN_TIDR); + host_os_sprs->iamr = mfspr(SPRN_IAMR); + host_os_sprs->amr = mfspr(SPRN_AMR); + host_os_sprs->fscr = mfspr(SPRN_FSCR); +} - /* - * cp_abort is required if the processor supports local copy-paste - * to clear the copy buffer that was under control of the guest. - */ - if (cpu_has_feature(CPU_FTR_ARCH_31)) - asm volatile(PPC_CP_ABORT); +/* vcpu guest regs must already be saved */ +static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu, + struct p9_host_os_sprs *host_os_sprs) +{ + mtspr(SPRN_PSPB, 0); + mtspr(SPRN_WORT, 0); + mtspr(SPRN_UAMOR, 0); - mtspr(SPRN_LPID, vcpu->kvm->arch.host_lpid); /* restore host LPID */ - isync(); + mtspr(SPRN_DSCR, host_os_sprs->dscr); + mtspr(SPRN_TIDR, host_os_sprs->tidr); + mtspr(SPRN_IAMR, host_os_sprs->iamr); - vc->dpdes = mfspr(SPRN_DPDES); - vc->vtb = mfspr(SPRN_VTB); - mtspr(SPRN_DPDES, 0); - if (vc->pcr) - mtspr(SPRN_PCR, PCR_MASK); + if (host_os_sprs->amr != vcpu->arch.amr) + mtspr(SPRN_AMR, host_os_sprs->amr); - if (vc->tb_offset_applied) { - u64 new_tb = mftb() - vc->tb_offset_applied; - mtspr(SPRN_TBU40, new_tb); - tb = mftb(); - if ((tb & 0xffffff) < (new_tb & 0xffffff)) - mtspr(SPRN_TBU40, new_tb + 0x1000000); - vc->tb_offset_applied = 0; - } + if (host_os_sprs->fscr != vcpu->arch.fscr) + mtspr(SPRN_FSCR, host_os_sprs->fscr); - mtspr(SPRN_HDEC, 0x7fffffff); - mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr); + /* Save guest CTRL register, set runlatch to 1 */ + if (!(vcpu->arch.ctrl & 1)) + mtspr(SPRN_CTRLT, 1); +} - return trap; +static inline bool hcall_is_xics(unsigned long req) +{ + return req == H_EOI || req == H_CPPR || req == H_IPI || + req == H_IPOLL || req == H_XIRR || req == H_XIRR_X; } /* - * Virtual-mode guest entry for POWER9 and later when the host and - * guest are both using the radix MMU. The LPIDR has already been set. + * Guest entry for POWER9 and later CPUs. */ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr) { struct kvmppc_vcore *vc = vcpu->arch.vcore; - unsigned long host_dscr = mfspr(SPRN_DSCR); - unsigned long host_tidr = mfspr(SPRN_TIDR); - unsigned long host_iamr = mfspr(SPRN_IAMR); - unsigned long host_amr = mfspr(SPRN_AMR); - unsigned long host_fscr = mfspr(SPRN_FSCR); + struct p9_host_os_sprs host_os_sprs; s64 dec; u64 tb; int trap, save_pmu; + WARN_ON_ONCE(vcpu->arch.ceded); + dec = mfspr(SPRN_DEC); tb = mftb(); if (dec < 0) @@ -3664,7 +3830,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, if (local_paca->kvm_hstate.dec_expires < time_limit) time_limit = local_paca->kvm_hstate.dec_expires; - vcpu->arch.ceded = 0; + save_p9_host_os_sprs(&host_os_sprs); kvmhv_save_host_pmu(); /* saves it to PACA kvm_hstate */ @@ -3693,24 +3859,20 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, #endif mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); - mtspr(SPRN_DSCR, vcpu->arch.dscr); - mtspr(SPRN_IAMR, vcpu->arch.iamr); - mtspr(SPRN_PSPB, vcpu->arch.pspb); - mtspr(SPRN_FSCR, vcpu->arch.fscr); - mtspr(SPRN_TAR, vcpu->arch.tar); - mtspr(SPRN_EBBHR, vcpu->arch.ebbhr); - mtspr(SPRN_EBBRR, vcpu->arch.ebbrr); - mtspr(SPRN_BESCR, vcpu->arch.bescr); - mtspr(SPRN_WORT, vcpu->arch.wort); - mtspr(SPRN_TIDR, vcpu->arch.tid); - mtspr(SPRN_DAR, vcpu->arch.shregs.dar); - mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr); - mtspr(SPRN_AMR, vcpu->arch.amr); - mtspr(SPRN_UAMOR, vcpu->arch.uamor); - - if (!(vcpu->arch.ctrl & 1)) - mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1); + load_spr_state(vcpu); + /* + * When setting DEC, we must always deal with irq_work_raise via NMI vs + * setting DEC. The problem occurs right as we switch into guest mode + * if a NMI hits and sets pending work and sets DEC, then that will + * apply to the guest and not bring us back to the host. + * + * irq_work_raise could check a flag (or possibly LPCR[HDICE] for + * example) and set HDEC to 1? That wouldn't solve the nested hv + * case which needs to abort the hcall or zero the time limit. + * + * XXX: Another day's problem. + */ mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb()); if (kvmhv_on_pseries()) { @@ -3718,7 +3880,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, * We need to save and restore the guest visible part of the * psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor * doesn't do this for us. Note only required if pseries since - * this is done in kvmhv_load_hv_regs_and_go() below otherwise. + * this is done in kvmhv_vcpu_entry_p9() below otherwise. */ unsigned long host_psscr; /* call our hypervisor to load up HV regs and go */ @@ -3738,6 +3900,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, hvregs.vcpu_token = vcpu->vcpu_id; } hvregs.hdec_expiry = time_limit; + mtspr(SPRN_DAR, vcpu->arch.shregs.dar); + mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr); trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs), __pa(&vcpu->arch.regs)); kvmhv_restore_hv_return_state(vcpu, &hvregs); @@ -3750,15 +3914,41 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, /* H_CEDE has to be handled now, not later */ if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested && kvmppc_get_gpr(vcpu, 3) == H_CEDE) { - kvmppc_nested_cede(vcpu); + kvmppc_cede(vcpu); kvmppc_set_gpr(vcpu, 3, 0); trap = 0; } } else { - trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr); + kvmppc_xive_push_vcpu(vcpu); + trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr); + if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested && + !(vcpu->arch.shregs.msr & MSR_PR)) { + unsigned long req = kvmppc_get_gpr(vcpu, 3); + + /* H_CEDE has to be handled now, not later */ + if (req == H_CEDE) { + kvmppc_cede(vcpu); + kvmppc_xive_rearm_escalation(vcpu); /* may un-cede */ + kvmppc_set_gpr(vcpu, 3, 0); + trap = 0; + + /* XICS hcalls must be handled before xive is pulled */ + } else if (hcall_is_xics(req)) { + int ret; + + ret = kvmppc_xive_xics_hcall(vcpu, req); + if (ret != H_TOO_HARD) { + kvmppc_set_gpr(vcpu, 3, ret); + trap = 0; + } + } + } + kvmppc_xive_pull_vcpu(vcpu); + + if (kvm_is_radix(vcpu->kvm)) + vcpu->arch.slb_max = 0; } - vcpu->arch.slb_max = 0; dec = mfspr(SPRN_DEC); if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */ dec = (s32) dec; @@ -3766,36 +3956,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, vcpu->arch.dec_expires = dec + tb; vcpu->cpu = -1; vcpu->arch.thread_cpu = -1; - /* Save guest CTRL register, set runlatch to 1 */ - vcpu->arch.ctrl = mfspr(SPRN_CTRLF); - if (!(vcpu->arch.ctrl & 1)) - mtspr(SPRN_CTRLT, vcpu->arch.ctrl | 1); - - vcpu->arch.iamr = mfspr(SPRN_IAMR); - vcpu->arch.pspb = mfspr(SPRN_PSPB); - vcpu->arch.fscr = mfspr(SPRN_FSCR); - vcpu->arch.tar = mfspr(SPRN_TAR); - vcpu->arch.ebbhr = mfspr(SPRN_EBBHR); - vcpu->arch.ebbrr = mfspr(SPRN_EBBRR); - vcpu->arch.bescr = mfspr(SPRN_BESCR); - vcpu->arch.wort = mfspr(SPRN_WORT); - vcpu->arch.tid = mfspr(SPRN_TIDR); - vcpu->arch.amr = mfspr(SPRN_AMR); - vcpu->arch.uamor = mfspr(SPRN_UAMOR); - vcpu->arch.dscr = mfspr(SPRN_DSCR); - mtspr(SPRN_PSPB, 0); - mtspr(SPRN_WORT, 0); - mtspr(SPRN_UAMOR, 0); - mtspr(SPRN_DSCR, host_dscr); - mtspr(SPRN_TIDR, host_tidr); - mtspr(SPRN_IAMR, host_iamr); + store_spr_state(vcpu); - if (host_amr != vcpu->arch.amr) - mtspr(SPRN_AMR, host_amr); - - if (host_fscr != vcpu->arch.fscr) - mtspr(SPRN_FSCR, host_fscr); + restore_p9_host_os_sprs(vcpu, &host_os_sprs); msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX); store_fp_state(&vcpu->arch.fp); @@ -3825,6 +3989,9 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, vc->in_guest = 0; mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb()); + /* We may have raced with new irq work */ + if (test_irq_work_pending()) + set_dec(1); mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso); kvmhv_load_host_pmu(); @@ -4014,7 +4181,6 @@ out: /* * This never fails for a radix guest, as none of the operations it does * for a radix guest can fail or have a way to report failure. - * kvmhv_run_single_vcpu() relies on this fact. */ static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu) { @@ -4170,7 +4336,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, { struct kvm_run *run = vcpu->run; int trap, r, pcpu; - int srcu_idx, lpid; + int srcu_idx; struct kvmppc_vcore *vc; struct kvm *kvm = vcpu->kvm; struct kvm_nested_guest *nested = vcpu->arch.nested; @@ -4193,8 +4359,15 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, vc->runner = vcpu; /* See if the MMU is ready to go */ - if (!kvm->arch.mmu_ready) - kvmhv_setup_mmu(vcpu); + if (!kvm->arch.mmu_ready) { + r = kvmhv_setup_mmu(vcpu); + if (r) { + run->exit_reason = KVM_EXIT_FAIL_ENTRY; + run->fail_entry.hardware_entry_failure_reason = 0; + vcpu->arch.ret = r; + return r; + } + } if (need_resched()) cond_resched(); @@ -4207,7 +4380,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, preempt_disable(); pcpu = smp_processor_id(); vc->pcpu = pcpu; - kvmppc_prepare_radix_vcpu(vcpu, pcpu); + if (kvm_is_radix(kvm)) + kvmppc_prepare_radix_vcpu(vcpu, pcpu); local_irq_disable(); hard_irq_disable(); @@ -4244,13 +4418,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, vc->vcore_state = VCORE_RUNNING; trace_kvmppc_run_core(vc, 0); - if (cpu_has_feature(CPU_FTR_HVMODE)) { - lpid = nested ? nested->shadow_lpid : kvm->arch.lpid; - mtspr(SPRN_LPID, lpid); - isync(); - kvmppc_check_need_tlb_flush(kvm, pcpu, nested); - } - guest_enter_irqoff(); srcu_idx = srcu_read_lock(&kvm->srcu); @@ -4269,11 +4436,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, srcu_read_unlock(&kvm->srcu, srcu_idx); - if (cpu_has_feature(CPU_FTR_HVMODE)) { - mtspr(SPRN_LPID, kvm->arch.host_lpid); - isync(); - } - set_irq_happened(trap); kvmppc_set_host_core(pcpu); @@ -4419,19 +4581,23 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; do { - /* - * The TLB prefetch bug fixup is only in the kvmppc_run_vcpu - * path, which also handles hash and dependent threads mode. - */ - if (kvm->arch.threads_indep && kvm_is_radix(kvm) && - !cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) + if (cpu_has_feature(CPU_FTR_ARCH_300)) r = kvmhv_run_single_vcpu(vcpu, ~(u64)0, vcpu->arch.vcore->lpcr); else r = kvmppc_run_vcpu(vcpu); - if (run->exit_reason == KVM_EXIT_PAPR_HCALL && - !(vcpu->arch.shregs.msr & MSR_PR)) { + if (run->exit_reason == KVM_EXIT_PAPR_HCALL) { + if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_PR)) { + /* + * These should have been caught reflected + * into the guest by now. Final sanity check: + * don't allow userspace to execute hcalls in + * the hypervisor. + */ + r = RESUME_GUEST; + continue; + } trace_kvm_hcall_enter(vcpu); r = kvmppc_pseries_do_hcall(vcpu); trace_kvm_hcall_exit(vcpu, r); @@ -4455,7 +4621,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) mtspr(SPRN_EBBRR, ebb_regs[1]); mtspr(SPRN_BESCR, ebb_regs[2]); mtspr(SPRN_TAR, user_tar); - mtspr(SPRN_FSCR, current->thread.fscr); } mtspr(SPRN_VRSAVE, user_vrsave); @@ -5039,18 +5204,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) /* * Track that we now have a HV mode VM active. This blocks secondary * CPU threads from coming online. - * On POWER9, we only need to do this if the "indep_threads_mode" - * module parameter has been set to N. */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) { - if (!indep_threads_mode && !cpu_has_feature(CPU_FTR_HVMODE)) { - pr_warn("KVM: Ignoring indep_threads_mode=N in nested hypervisor\n"); - kvm->arch.threads_indep = true; - } else { - kvm->arch.threads_indep = indep_threads_mode; - } - } - if (!kvm->arch.threads_indep) + if (!cpu_has_feature(CPU_FTR_ARCH_300)) kvm_hv_vm_activated(); /* @@ -5091,7 +5246,7 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) { debugfs_remove_recursive(kvm->arch.debugfs_dir); - if (!kvm->arch.threads_indep) + if (!cpu_has_feature(CPU_FTR_ARCH_300)) kvm_hv_vm_deactivated(); kvmppc_free_vcores(kvm); @@ -5512,7 +5667,9 @@ static int kvmhv_enable_nested(struct kvm *kvm) { if (!nested) return -EPERM; - if (!cpu_has_feature(CPU_FTR_ARCH_300) || no_mixing_hpt_and_radix) + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + return -ENODEV; + if (!radix_enabled()) return -ENODEV; /* kvm == NULL means the caller is testing if the capability exists */ @@ -5675,11 +5832,25 @@ static int kvmhv_enable_dawr1(struct kvm *kvm) static bool kvmppc_hash_v3_possible(void) { - if (radix_enabled() && no_mixing_hpt_and_radix) + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + return false; + + if (!cpu_has_feature(CPU_FTR_HVMODE)) return false; - return cpu_has_feature(CPU_FTR_ARCH_300) && - cpu_has_feature(CPU_FTR_HVMODE); + /* + * POWER9 chips before version 2.02 can't have some threads in + * HPT mode and some in radix mode on the same core. + */ + if (radix_enabled()) { + unsigned int pvr = mfspr(SPRN_PVR); + if ((pvr >> 16) == PVR_POWER9 && + (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) || + ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101))) + return false; + } + + return true; } static struct kvmppc_ops kvm_ops_hv = { @@ -5823,18 +5994,6 @@ static int kvmppc_book3s_init_hv(void) if (kvmppc_radix_possible()) r = kvmppc_radix_init(); - /* - * POWER9 chips before version 2.02 can't have some threads in - * HPT mode and some in radix mode on the same core. - */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) { - unsigned int pvr = mfspr(SPRN_PVR); - if ((pvr >> 16) == PVR_POWER9 && - (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) || - ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101))) - no_mixing_hpt_and_radix = true; - } - r = kvmppc_uvmem_init(); if (r < 0) pr_err("KVM-HV: kvmppc_uvmem_init failed %d\n", r); diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 7a0e33a9c980..be8ef1c5b1bf 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -35,21 +35,6 @@ #include "book3s_xive.h" /* - * The XIVE module will populate these when it loads - */ -unsigned long (*__xive_vm_h_xirr)(struct kvm_vcpu *vcpu); -unsigned long (*__xive_vm_h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server); -int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server, - unsigned long mfrr); -int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr); -int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr); -EXPORT_SYMBOL_GPL(__xive_vm_h_xirr); -EXPORT_SYMBOL_GPL(__xive_vm_h_ipoll); -EXPORT_SYMBOL_GPL(__xive_vm_h_ipi); -EXPORT_SYMBOL_GPL(__xive_vm_h_cppr); -EXPORT_SYMBOL_GPL(__xive_vm_h_eoi); - -/* * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206) * should be power of 2. */ @@ -196,16 +181,9 @@ int kvmppc_hwrng_present(void) } EXPORT_SYMBOL_GPL(kvmppc_hwrng_present); -long kvmppc_h_random(struct kvm_vcpu *vcpu) +long kvmppc_rm_h_random(struct kvm_vcpu *vcpu) { - int r; - - /* Only need to do the expensive mfmsr() on radix */ - if (kvm_is_radix(vcpu->kvm) && (mfmsr() & MSR_IR)) - r = powernv_get_random_long(&vcpu->arch.regs.gpr[4]); - else - r = powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]); - if (r) + if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4])) return H_SUCCESS; return H_HARDWARE; @@ -221,15 +199,6 @@ void kvmhv_rm_send_ipi(int cpu) void __iomem *xics_phys; unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); - /* For a nested hypervisor, use the XICS via hcall */ - if (kvmhv_on_pseries()) { - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - - plpar_hcall_raw(H_IPI, retbuf, get_hard_smp_processor_id(cpu), - IPI_PRIORITY); - return; - } - /* On POWER9 we can use msgsnd for any destination cpu. */ if (cpu_has_feature(CPU_FTR_ARCH_300)) { msg |= get_hard_smp_processor_id(cpu); @@ -442,19 +411,12 @@ static long kvmppc_read_one_intr(bool *again) return 1; /* Now read the interrupt from the ICP */ - if (kvmhv_on_pseries()) { - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - - rc = plpar_hcall_raw(H_XIRR, retbuf, 0xFF); - xirr = cpu_to_be32(retbuf[0]); - } else { - xics_phys = local_paca->kvm_hstate.xics_phys; - rc = 0; - if (!xics_phys) - rc = opal_int_get_xirr(&xirr, false); - else - xirr = __raw_rm_readl(xics_phys + XICS_XIRR); - } + xics_phys = local_paca->kvm_hstate.xics_phys; + rc = 0; + if (!xics_phys) + rc = opal_int_get_xirr(&xirr, false); + else + xirr = __raw_rm_readl(xics_phys + XICS_XIRR); if (rc < 0) return 1; @@ -483,13 +445,7 @@ static long kvmppc_read_one_intr(bool *again) */ if (xisr == XICS_IPI) { rc = 0; - if (kvmhv_on_pseries()) { - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - - plpar_hcall_raw(H_IPI, retbuf, - hard_smp_processor_id(), 0xff); - plpar_hcall_raw(H_EOI, retbuf, h_xirr); - } else if (xics_phys) { + if (xics_phys) { __raw_rm_writeb(0xff, xics_phys + XICS_MFRR); __raw_rm_writel(xirr, xics_phys + XICS_XIRR); } else { @@ -515,13 +471,7 @@ static long kvmppc_read_one_intr(bool *again) /* We raced with the host, * we need to resend that IPI, bummer */ - if (kvmhv_on_pseries()) { - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - - plpar_hcall_raw(H_IPI, retbuf, - hard_smp_processor_id(), - IPI_PRIORITY); - } else if (xics_phys) + if (xics_phys) __raw_rm_writeb(IPI_PRIORITY, xics_phys + XICS_MFRR); else @@ -541,22 +491,13 @@ static long kvmppc_read_one_intr(bool *again) } #ifdef CONFIG_KVM_XICS -static inline bool is_rm(void) -{ - return !(mfmsr() & MSR_DR); -} - unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu) { if (!kvmppc_xics_enabled(vcpu)) return H_TOO_HARD; - if (xics_on_xive()) { - if (is_rm()) - return xive_rm_h_xirr(vcpu); - if (unlikely(!__xive_vm_h_xirr)) - return H_NOT_AVAILABLE; - return __xive_vm_h_xirr(vcpu); - } else + if (xics_on_xive()) + return xive_rm_h_xirr(vcpu); + else return xics_rm_h_xirr(vcpu); } @@ -565,13 +506,9 @@ unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu) if (!kvmppc_xics_enabled(vcpu)) return H_TOO_HARD; vcpu->arch.regs.gpr[5] = get_tb(); - if (xics_on_xive()) { - if (is_rm()) - return xive_rm_h_xirr(vcpu); - if (unlikely(!__xive_vm_h_xirr)) - return H_NOT_AVAILABLE; - return __xive_vm_h_xirr(vcpu); - } else + if (xics_on_xive()) + return xive_rm_h_xirr(vcpu); + else return xics_rm_h_xirr(vcpu); } @@ -579,13 +516,9 @@ unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server) { if (!kvmppc_xics_enabled(vcpu)) return H_TOO_HARD; - if (xics_on_xive()) { - if (is_rm()) - return xive_rm_h_ipoll(vcpu, server); - if (unlikely(!__xive_vm_h_ipoll)) - return H_NOT_AVAILABLE; - return __xive_vm_h_ipoll(vcpu, server); - } else + if (xics_on_xive()) + return xive_rm_h_ipoll(vcpu, server); + else return H_TOO_HARD; } @@ -594,13 +527,9 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, { if (!kvmppc_xics_enabled(vcpu)) return H_TOO_HARD; - if (xics_on_xive()) { - if (is_rm()) - return xive_rm_h_ipi(vcpu, server, mfrr); - if (unlikely(!__xive_vm_h_ipi)) - return H_NOT_AVAILABLE; - return __xive_vm_h_ipi(vcpu, server, mfrr); - } else + if (xics_on_xive()) + return xive_rm_h_ipi(vcpu, server, mfrr); + else return xics_rm_h_ipi(vcpu, server, mfrr); } @@ -608,13 +537,9 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) { if (!kvmppc_xics_enabled(vcpu)) return H_TOO_HARD; - if (xics_on_xive()) { - if (is_rm()) - return xive_rm_h_cppr(vcpu, cppr); - if (unlikely(!__xive_vm_h_cppr)) - return H_NOT_AVAILABLE; - return __xive_vm_h_cppr(vcpu, cppr); - } else + if (xics_on_xive()) + return xive_rm_h_cppr(vcpu, cppr); + else return xics_rm_h_cppr(vcpu, cppr); } @@ -622,13 +547,9 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) { if (!kvmppc_xics_enabled(vcpu)) return H_TOO_HARD; - if (xics_on_xive()) { - if (is_rm()) - return xive_rm_h_eoi(vcpu, xirr); - if (unlikely(!__xive_vm_h_eoi)) - return H_NOT_AVAILABLE; - return __xive_vm_h_eoi(vcpu, xirr); - } else + if (xics_on_xive()) + return xive_rm_h_eoi(vcpu, xirr); + else return xics_rm_h_eoi(vcpu, xirr); } #endif /* CONFIG_KVM_XICS */ @@ -800,7 +721,7 @@ void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu, * Thus we make all 4 threads use the same bit. */ if (cpu_has_feature(CPU_FTR_ARCH_300)) - pcpu = cpu_first_thread_sibling(pcpu); + pcpu = cpu_first_tlb_thread_sibling(pcpu); if (nested) need_tlb_flush = &nested->need_tlb_flush; diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 327417d79eac..4444f83cb133 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -58,7 +58,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) /* * Put whatever is in the decrementer into the * hypervisor decrementer. - * Because of a hardware deviation in P8 and P9, + * Because of a hardware deviation in P8, * we need to set LPCR[HDICE] before writing HDEC. */ ld r5, HSTATE_KVM_VCORE(r13) @@ -67,15 +67,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) ori r8, r9, LPCR_HDICE mtspr SPRN_LPCR, r8 isync - andis. r0, r9, LPCR_LD@h mfspr r8,SPRN_DEC mftb r7 -BEGIN_FTR_SECTION - /* On POWER9, don't sign-extend if host LPCR[LD] bit is set */ - bne 32f -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) extsw r8,r8 -32: mtspr SPRN_HDEC,r8 + mtspr SPRN_HDEC,r8 add r8,r8,r7 std r8,HSTATE_DECEXP(r13) diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 60724f674421..8543ad538b0c 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -19,6 +19,7 @@ #include <asm/pgalloc.h> #include <asm/pte-walk.h> #include <asm/reg.h> +#include <asm/plpar_wrappers.h> static struct patb_entry *pseries_partition_tb; @@ -53,7 +54,8 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) hr->dawrx1 = vcpu->arch.dawrx1; } -static void byteswap_pt_regs(struct pt_regs *regs) +/* Use noinline_for_stack due to https://bugs.llvm.org/show_bug.cgi?id=49610 */ +static noinline_for_stack void byteswap_pt_regs(struct pt_regs *regs) { unsigned long *addr = (unsigned long *) regs; @@ -467,8 +469,15 @@ static void kvmhv_flush_lpid(unsigned int lpid) return; } - rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1), - lpid, TLBIEL_INVAL_SET_LPID); + if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE)) + rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1), + lpid, TLBIEL_INVAL_SET_LPID); + else + rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU, + H_RPTI_TYPE_NESTED | + H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | + H_RPTI_TYPE_PAT, + H_RPTI_PAGE_ALL, 0, -1UL); if (rc) pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc); } @@ -1214,6 +1223,113 @@ long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu) return H_SUCCESS; } +static long do_tlb_invalidate_nested_all(struct kvm_vcpu *vcpu, + unsigned long lpid, unsigned long ric) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_nested_guest *gp; + + gp = kvmhv_get_nested(kvm, lpid, false); + if (gp) { + kvmhv_emulate_tlbie_lpid(vcpu, gp, ric); + kvmhv_put_nested(gp); + } + return H_SUCCESS; +} + +/* + * Number of pages above which we invalidate the entire LPID rather than + * flush individual pages. + */ +static unsigned long tlb_range_flush_page_ceiling __read_mostly = 33; + +static long do_tlb_invalidate_nested_tlb(struct kvm_vcpu *vcpu, + unsigned long lpid, + unsigned long pg_sizes, + unsigned long start, + unsigned long end) +{ + int ret = H_P4; + unsigned long addr, nr_pages; + struct mmu_psize_def *def; + unsigned long psize, ap, page_size; + bool flush_lpid; + + for (psize = 0; psize < MMU_PAGE_COUNT; psize++) { + def = &mmu_psize_defs[psize]; + if (!(pg_sizes & def->h_rpt_pgsize)) + continue; + + nr_pages = (end - start) >> def->shift; + flush_lpid = nr_pages > tlb_range_flush_page_ceiling; + if (flush_lpid) + return do_tlb_invalidate_nested_all(vcpu, lpid, + RIC_FLUSH_TLB); + addr = start; + ap = mmu_get_ap(psize); + page_size = 1UL << def->shift; + do { + ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap, + get_epn(addr)); + if (ret) + return H_P4; + addr += page_size; + } while (addr < end); + } + return ret; +} + +/* + * Performs partition-scoped invalidations for nested guests + * as part of H_RPT_INVALIDATE hcall. + */ +long do_h_rpt_invalidate_pat(struct kvm_vcpu *vcpu, unsigned long lpid, + unsigned long type, unsigned long pg_sizes, + unsigned long start, unsigned long end) +{ + /* + * If L2 lpid isn't valid, we need to return H_PARAMETER. + * + * However, nested KVM issues a L2 lpid flush call when creating + * partition table entries for L2. This happens even before the + * corresponding shadow lpid is created in HV which happens in + * H_ENTER_NESTED call. Since we can't differentiate this case from + * the invalid case, we ignore such flush requests and return success. + */ + if (!kvmhv_find_nested(vcpu->kvm, lpid)) + return H_SUCCESS; + + /* + * A flush all request can be handled by a full lpid flush only. + */ + if ((type & H_RPTI_TYPE_NESTED_ALL) == H_RPTI_TYPE_NESTED_ALL) + return do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_ALL); + + /* + * We don't need to handle a PWC flush like process table here, + * because intermediate partition scoped table in nested guest doesn't + * really have PWC. Only level we have PWC is in L0 and for nested + * invalidate at L0 we always do kvm_flush_lpid() which does + * radix__flush_all_lpid(). For range invalidate at any level, we + * are not removing the higher level page tables and hence there is + * no PWC invalidate needed. + * + * if (type & H_RPTI_TYPE_PWC) { + * ret = do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_PWC); + * if (ret) + * return H_P4; + * } + */ + + if (start == 0 && end == -1) + return do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_TLB); + + if (type & H_RPTI_TYPE_TLB) + return do_tlb_invalidate_nested_tlb(vcpu, lpid, pg_sizes, + start, end); + return H_SUCCESS; +} + /* Used to convert a nested guest real address to a L1 guest real address */ static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu, struct kvm_nested_guest *gp, diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c new file mode 100644 index 000000000000..83f592eadcd2 --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c @@ -0,0 +1,508 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/kernel.h> +#include <linux/kvm_host.h> +#include <asm/asm-prototypes.h> +#include <asm/dbell.h> +#include <asm/kvm_ppc.h> +#include <asm/ppc-opcode.h> + +#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +static void __start_timing(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next) +{ + struct kvmppc_vcore *vc = vcpu->arch.vcore; + u64 tb = mftb() - vc->tb_offset_applied; + + vcpu->arch.cur_activity = next; + vcpu->arch.cur_tb_start = tb; +} + +static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next) +{ + struct kvmppc_vcore *vc = vcpu->arch.vcore; + struct kvmhv_tb_accumulator *curr; + u64 tb = mftb() - vc->tb_offset_applied; + u64 prev_tb; + u64 delta; + u64 seq; + + curr = vcpu->arch.cur_activity; + vcpu->arch.cur_activity = next; + prev_tb = vcpu->arch.cur_tb_start; + vcpu->arch.cur_tb_start = tb; + + if (!curr) + return; + + delta = tb - prev_tb; + + seq = curr->seqcount; + curr->seqcount = seq + 1; + smp_wmb(); + curr->tb_total += delta; + if (seq == 0 || delta < curr->tb_min) + curr->tb_min = delta; + if (delta > curr->tb_max) + curr->tb_max = delta; + smp_wmb(); + curr->seqcount = seq + 2; +} + +#define start_timing(vcpu, next) __start_timing(vcpu, next) +#define end_timing(vcpu) __start_timing(vcpu, NULL) +#define accumulate_time(vcpu, next) __accumulate_time(vcpu, next) +#else +#define start_timing(vcpu, next) do {} while (0) +#define end_timing(vcpu) do {} while (0) +#define accumulate_time(vcpu, next) do {} while (0) +#endif + +static inline void mfslb(unsigned int idx, u64 *slbee, u64 *slbev) +{ + asm volatile("slbmfev %0,%1" : "=r" (*slbev) : "r" (idx)); + asm volatile("slbmfee %0,%1" : "=r" (*slbee) : "r" (idx)); +} + +static inline void mtslb(u64 slbee, u64 slbev) +{ + asm volatile("slbmte %0,%1" :: "r" (slbev), "r" (slbee)); +} + +static inline void clear_slb_entry(unsigned int idx) +{ + mtslb(idx, 0); +} + +static inline void slb_clear_invalidate_partition(void) +{ + clear_slb_entry(0); + asm volatile(PPC_SLBIA(6)); +} + +/* + * Malicious or buggy radix guests may have inserted SLB entries + * (only 0..3 because radix always runs with UPRT=1), so these must + * be cleared here to avoid side-channels. slbmte is used rather + * than slbia, as it won't clear cached translations. + */ +static void radix_clear_slb(void) +{ + int i; + + for (i = 0; i < 4; i++) + clear_slb_entry(i); +} + +static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr) +{ + struct kvm_nested_guest *nested = vcpu->arch.nested; + u32 lpid; + + lpid = nested ? nested->shadow_lpid : kvm->arch.lpid; + + /* + * All the isync()s are overkill but trivially follow the ISA + * requirements. Some can likely be replaced with justification + * comment for why they are not needed. + */ + isync(); + mtspr(SPRN_LPID, lpid); + isync(); + mtspr(SPRN_LPCR, lpcr); + isync(); + mtspr(SPRN_PID, vcpu->arch.pid); + isync(); +} + +static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr) +{ + u32 lpid; + int i; + + lpid = kvm->arch.lpid; + + mtspr(SPRN_LPID, lpid); + mtspr(SPRN_LPCR, lpcr); + mtspr(SPRN_PID, vcpu->arch.pid); + + for (i = 0; i < vcpu->arch.slb_max; i++) + mtslb(vcpu->arch.slb[i].orige, vcpu->arch.slb[i].origv); + + isync(); +} + +static void switch_mmu_to_host(struct kvm *kvm, u32 pid) +{ + isync(); + mtspr(SPRN_PID, pid); + isync(); + mtspr(SPRN_LPID, kvm->arch.host_lpid); + isync(); + mtspr(SPRN_LPCR, kvm->arch.host_lpcr); + isync(); + + if (!radix_enabled()) + slb_restore_bolted_realmode(); +} + +static void save_clear_host_mmu(struct kvm *kvm) +{ + if (!radix_enabled()) { + /* + * Hash host could save and restore host SLB entries to + * reduce SLB fault overheads of VM exits, but for now the + * existing code clears all entries and restores just the + * bolted ones when switching back to host. + */ + slb_clear_invalidate_partition(); + } +} + +static void save_clear_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu) +{ + if (kvm_is_radix(kvm)) { + radix_clear_slb(); + } else { + int i; + int nr = 0; + + /* + * This must run before switching to host (radix host can't + * access all SLBs). + */ + for (i = 0; i < vcpu->arch.slb_nr; i++) { + u64 slbee, slbev; + mfslb(i, &slbee, &slbev); + if (slbee & SLB_ESID_V) { + vcpu->arch.slb[nr].orige = slbee | i; + vcpu->arch.slb[nr].origv = slbev; + nr++; + } + } + vcpu->arch.slb_max = nr; + slb_clear_invalidate_partition(); + } +} + +int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_nested_guest *nested = vcpu->arch.nested; + struct kvmppc_vcore *vc = vcpu->arch.vcore; + s64 hdec; + u64 tb, purr, spurr; + u64 *exsave; + bool ri_set; + int trap; + unsigned long msr; + unsigned long host_hfscr; + unsigned long host_ciabr; + unsigned long host_dawr0; + unsigned long host_dawrx0; + unsigned long host_psscr; + unsigned long host_pidr; + unsigned long host_dawr1; + unsigned long host_dawrx1; + + hdec = time_limit - mftb(); + if (hdec < 0) + return BOOK3S_INTERRUPT_HV_DECREMENTER; + + WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_HV); + WARN_ON_ONCE(!(vcpu->arch.shregs.msr & MSR_ME)); + + start_timing(vcpu, &vcpu->arch.rm_entry); + + vcpu->arch.ceded = 0; + + if (vc->tb_offset) { + u64 new_tb = mftb() + vc->tb_offset; + mtspr(SPRN_TBU40, new_tb); + tb = mftb(); + if ((tb & 0xffffff) < (new_tb & 0xffffff)) + mtspr(SPRN_TBU40, new_tb + 0x1000000); + vc->tb_offset_applied = vc->tb_offset; + } + + msr = mfmsr(); + + host_hfscr = mfspr(SPRN_HFSCR); + host_ciabr = mfspr(SPRN_CIABR); + host_dawr0 = mfspr(SPRN_DAWR0); + host_dawrx0 = mfspr(SPRN_DAWRX0); + host_psscr = mfspr(SPRN_PSSCR); + host_pidr = mfspr(SPRN_PID); + if (cpu_has_feature(CPU_FTR_DAWR1)) { + host_dawr1 = mfspr(SPRN_DAWR1); + host_dawrx1 = mfspr(SPRN_DAWRX1); + } + + if (vc->pcr) + mtspr(SPRN_PCR, vc->pcr | PCR_MASK); + mtspr(SPRN_DPDES, vc->dpdes); + mtspr(SPRN_VTB, vc->vtb); + + local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR); + local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR); + mtspr(SPRN_PURR, vcpu->arch.purr); + mtspr(SPRN_SPURR, vcpu->arch.spurr); + + if (dawr_enabled()) { + mtspr(SPRN_DAWR0, vcpu->arch.dawr0); + mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0); + if (cpu_has_feature(CPU_FTR_DAWR1)) { + mtspr(SPRN_DAWR1, vcpu->arch.dawr1); + mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1); + } + } + mtspr(SPRN_CIABR, vcpu->arch.ciabr); + mtspr(SPRN_IC, vcpu->arch.ic); + + mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC | + (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG)); + + mtspr(SPRN_HFSCR, vcpu->arch.hfscr); + + mtspr(SPRN_HSRR0, vcpu->arch.regs.nip); + mtspr(SPRN_HSRR1, (vcpu->arch.shregs.msr & ~MSR_HV) | MSR_ME); + + /* + * On POWER9 DD2.1 and below, sometimes on a Hypervisor Data Storage + * Interrupt (HDSI) the HDSISR is not be updated at all. + * + * To work around this we put a canary value into the HDSISR before + * returning to a guest and then check for this canary when we take a + * HDSI. If we find the canary on a HDSI, we know the hardware didn't + * update the HDSISR. In this case we return to the guest to retake the + * HDSI which should correctly update the HDSISR the second time HDSI + * entry. + * + * Just do this on all p9 processors for now. + */ + mtspr(SPRN_HDSISR, HDSISR_CANARY); + + mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0); + mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1); + mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2); + mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3); + + mtspr(SPRN_AMOR, ~0UL); + + local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_HV_P9; + + /* + * Hash host, hash guest, or radix guest with prefetch bug, all have + * to disable the MMU before switching to guest MMU state. + */ + if (!radix_enabled() || !kvm_is_radix(kvm) || + cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) + __mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0); + + save_clear_host_mmu(kvm); + + if (kvm_is_radix(kvm)) { + switch_mmu_to_guest_radix(kvm, vcpu, lpcr); + if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) + __mtmsrd(0, 1); /* clear RI */ + + } else { + switch_mmu_to_guest_hpt(kvm, vcpu, lpcr); + } + + /* TLBIEL uses LPID=LPIDR, so run this after setting guest LPID */ + kvmppc_check_need_tlb_flush(kvm, vc->pcpu, nested); + + /* + * P9 suppresses the HDEC exception when LPCR[HDICE] = 0, + * so set guest LPCR (with HDICE) before writing HDEC. + */ + mtspr(SPRN_HDEC, hdec); + + mtspr(SPRN_DAR, vcpu->arch.shregs.dar); + mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr); + mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0); + mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1); + + accumulate_time(vcpu, &vcpu->arch.guest_time); + + kvmppc_p9_enter_guest(vcpu); + + accumulate_time(vcpu, &vcpu->arch.rm_intr); + + /* XXX: Could get these from r11/12 and paca exsave instead */ + vcpu->arch.shregs.srr0 = mfspr(SPRN_SRR0); + vcpu->arch.shregs.srr1 = mfspr(SPRN_SRR1); + vcpu->arch.shregs.dar = mfspr(SPRN_DAR); + vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR); + + /* 0x2 bit for HSRR is only used by PR and P7/8 HV paths, clear it */ + trap = local_paca->kvm_hstate.scratch0 & ~0x2; + + /* HSRR interrupts leave MSR[RI] unchanged, SRR interrupts clear it. */ + ri_set = false; + if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK)) { + if (trap != BOOK3S_INTERRUPT_SYSCALL && + (vcpu->arch.shregs.msr & MSR_RI)) + ri_set = true; + exsave = local_paca->exgen; + } else if (trap == BOOK3S_INTERRUPT_SYSTEM_RESET) { + exsave = local_paca->exnmi; + } else { /* trap == 0x200 */ + exsave = local_paca->exmc; + } + + vcpu->arch.regs.gpr[1] = local_paca->kvm_hstate.scratch1; + vcpu->arch.regs.gpr[3] = local_paca->kvm_hstate.scratch2; + + /* + * Only set RI after reading machine check regs (DAR, DSISR, SRR0/1) + * and hstate scratch (which we need to move into exsave to make + * re-entrant vs SRESET/MCE) + */ + if (ri_set) { + if (unlikely(!(mfmsr() & MSR_RI))) { + __mtmsrd(MSR_RI, 1); + WARN_ON_ONCE(1); + } + } else { + WARN_ON_ONCE(mfmsr() & MSR_RI); + __mtmsrd(MSR_RI, 1); + } + + vcpu->arch.regs.gpr[9] = exsave[EX_R9/sizeof(u64)]; + vcpu->arch.regs.gpr[10] = exsave[EX_R10/sizeof(u64)]; + vcpu->arch.regs.gpr[11] = exsave[EX_R11/sizeof(u64)]; + vcpu->arch.regs.gpr[12] = exsave[EX_R12/sizeof(u64)]; + vcpu->arch.regs.gpr[13] = exsave[EX_R13/sizeof(u64)]; + vcpu->arch.ppr = exsave[EX_PPR/sizeof(u64)]; + vcpu->arch.cfar = exsave[EX_CFAR/sizeof(u64)]; + vcpu->arch.regs.ctr = exsave[EX_CTR/sizeof(u64)]; + + vcpu->arch.last_inst = KVM_INST_FETCH_FAILED; + + if (unlikely(trap == BOOK3S_INTERRUPT_MACHINE_CHECK)) { + vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)]; + vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)]; + kvmppc_realmode_machine_check(vcpu); + + } else if (unlikely(trap == BOOK3S_INTERRUPT_HMI)) { + kvmppc_realmode_hmi_handler(); + + } else if (trap == BOOK3S_INTERRUPT_H_EMUL_ASSIST) { + vcpu->arch.emul_inst = mfspr(SPRN_HEIR); + + } else if (trap == BOOK3S_INTERRUPT_H_DATA_STORAGE) { + vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)]; + vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)]; + vcpu->arch.fault_gpa = mfspr(SPRN_ASDR); + + } else if (trap == BOOK3S_INTERRUPT_H_INST_STORAGE) { + vcpu->arch.fault_gpa = mfspr(SPRN_ASDR); + + } else if (trap == BOOK3S_INTERRUPT_H_FAC_UNAVAIL) { + vcpu->arch.hfscr = mfspr(SPRN_HFSCR); + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* + * Softpatch interrupt for transactional memory emulation cases + * on POWER9 DD2.2. This is early in the guest exit path - we + * haven't saved registers or done a treclaim yet. + */ + } else if (trap == BOOK3S_INTERRUPT_HV_SOFTPATCH) { + vcpu->arch.emul_inst = mfspr(SPRN_HEIR); + + /* + * The cases we want to handle here are those where the guest + * is in real suspend mode and is trying to transition to + * transactional mode. + */ + if (local_paca->kvm_hstate.fake_suspend && + (vcpu->arch.shregs.msr & MSR_TS_S)) { + if (kvmhv_p9_tm_emulation_early(vcpu)) { + /* Prevent it being handled again. */ + trap = 0; + } + } +#endif + } + + accumulate_time(vcpu, &vcpu->arch.rm_exit); + + /* Advance host PURR/SPURR by the amount used by guest */ + purr = mfspr(SPRN_PURR); + spurr = mfspr(SPRN_SPURR); + mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr + + purr - vcpu->arch.purr); + mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr + + spurr - vcpu->arch.spurr); + vcpu->arch.purr = purr; + vcpu->arch.spurr = spurr; + + vcpu->arch.ic = mfspr(SPRN_IC); + vcpu->arch.pid = mfspr(SPRN_PID); + vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS; + + vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0); + vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1); + vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2); + vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3); + + /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */ + mtspr(SPRN_PSSCR, host_psscr | + (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG)); + mtspr(SPRN_HFSCR, host_hfscr); + mtspr(SPRN_CIABR, host_ciabr); + mtspr(SPRN_DAWR0, host_dawr0); + mtspr(SPRN_DAWRX0, host_dawrx0); + if (cpu_has_feature(CPU_FTR_DAWR1)) { + mtspr(SPRN_DAWR1, host_dawr1); + mtspr(SPRN_DAWRX1, host_dawrx1); + } + + if (kvm_is_radix(kvm)) { + /* + * Since this is radix, do a eieio; tlbsync; ptesync sequence + * in case we interrupted the guest between a tlbie and a + * ptesync. + */ + asm volatile("eieio; tlbsync; ptesync"); + } + + /* + * cp_abort is required if the processor supports local copy-paste + * to clear the copy buffer that was under control of the guest. + */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + asm volatile(PPC_CP_ABORT); + + vc->dpdes = mfspr(SPRN_DPDES); + vc->vtb = mfspr(SPRN_VTB); + mtspr(SPRN_DPDES, 0); + if (vc->pcr) + mtspr(SPRN_PCR, PCR_MASK); + + if (vc->tb_offset_applied) { + u64 new_tb = mftb() - vc->tb_offset_applied; + mtspr(SPRN_TBU40, new_tb); + tb = mftb(); + if ((tb & 0xffffff) < (new_tb & 0xffffff)) + mtspr(SPRN_TBU40, new_tb + 0x1000000); + vc->tb_offset_applied = 0; + } + + mtspr(SPRN_HDEC, 0x7fffffff); + + save_clear_guest_mmu(kvm, vcpu); + switch_mmu_to_host(kvm, host_pidr); + local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_NONE; + + /* + * If we are in real mode, only switch MMU on after the MMU is + * switched to host, to avoid the P9_RADIX_PREFETCH_BUG. + */ + __mtmsrd(msr, 0); + + end_timing(vcpu); + + return trap; +} +EXPORT_SYMBOL_GPL(kvmhv_vcpu_entry_p9); diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 7af7c70f1468..8b70de4595f0 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -57,6 +57,10 @@ static int global_invalidates(struct kvm *kvm) else global = 1; + /* LPID has been switched to host if in virt mode so can't do local */ + if (!global && (mfmsr() & (MSR_IR|MSR_DR))) + global = 1; + if (!global) { /* any other core might now have stale TLB entries... */ smp_wmb(); @@ -67,7 +71,7 @@ static int global_invalidates(struct kvm *kvm) * so use the bit for the first thread to represent the core. */ if (cpu_has_feature(CPU_FTR_ARCH_300)) - cpu = cpu_first_thread_sibling(cpu); + cpu = cpu_first_tlb_thread_sibling(cpu); cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush); } @@ -409,6 +413,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, vcpu->arch.pgdir, true, &vcpu->arch.regs.gpr[4]); } +EXPORT_SYMBOL_GPL(kvmppc_h_enter); #ifdef __BIG_ENDIAN__ #define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) @@ -553,6 +558,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn, &vcpu->arch.regs.gpr[4]); } +EXPORT_SYMBOL_GPL(kvmppc_h_remove); long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) { @@ -671,6 +677,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) return ret; } +EXPORT_SYMBOL_GPL(kvmppc_h_bulk_remove); long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long pte_index, unsigned long avpn) @@ -741,6 +748,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, return H_SUCCESS; } +EXPORT_SYMBOL_GPL(kvmppc_h_protect); long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long pte_index) @@ -781,6 +789,7 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, } return H_SUCCESS; } +EXPORT_SYMBOL_GPL(kvmppc_h_read); long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long pte_index) @@ -829,6 +838,7 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); return ret; } +EXPORT_SYMBOL_GPL(kvmppc_h_clear_ref); long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long pte_index) @@ -876,6 +886,7 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); return ret; } +EXPORT_SYMBOL_GPL(kvmppc_h_clear_mod); static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq, unsigned long gpa, int writing, unsigned long *hpa, @@ -1294,3 +1305,4 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, return -1; /* send fault up to host kernel mode */ } +EXPORT_SYMBOL_GPL(kvmppc_hpte_hv_fault); diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index c2c9c733f359..0a11ec88a0ae 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -141,13 +141,6 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, return; } - if (xive_enabled() && kvmhv_on_pseries()) { - /* No XICS access or hypercalls available, too hard */ - this_icp->rm_action |= XICS_RM_KICK_VCPU; - this_icp->rm_kick_target = vcpu; - return; - } - /* * Check if the core is loaded, * if not, find an available host core to post to wake the VCPU, @@ -771,14 +764,6 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again) void __iomem *xics_phys; int64_t rc; - if (kvmhv_on_pseries()) { - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - - iosync(); - plpar_hcall_raw(H_EOI, retbuf, hwirq); - return; - } - rc = pnv_opal_pci_msi_eoi(c, hwirq); if (rc) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 5e634db4809b..8dd437d7a2c6 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -25,18 +25,10 @@ #include <asm/export.h> #include <asm/tm.h> #include <asm/opal.h> -#include <asm/xive-regs.h> #include <asm/thread_info.h> #include <asm/asm-compat.h> #include <asm/feature-fixups.h> #include <asm/cpuidle.h> -#include <asm/ultravisor-api.h> - -/* Sign-extend HDEC if not on POWER9 */ -#define EXTEND_HDEC(reg) \ -BEGIN_FTR_SECTION; \ - extsw reg, reg; \ -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) /* Values in HSTATE_NAPPING(r13) */ #define NAPPING_CEDE 1 @@ -44,9 +36,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) #define NAPPING_UNSPLIT 3 /* Stack frame offsets for kvmppc_hv_entry */ -#define SFS 208 +#define SFS 160 #define STACK_SLOT_TRAP (SFS-4) -#define STACK_SLOT_SHORT_PATH (SFS-8) #define STACK_SLOT_TID (SFS-16) #define STACK_SLOT_PSSCR (SFS-24) #define STACK_SLOT_PID (SFS-32) @@ -57,10 +48,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) #define STACK_SLOT_HFSCR (SFS-72) #define STACK_SLOT_AMR (SFS-80) #define STACK_SLOT_UAMOR (SFS-88) -#define STACK_SLOT_DAWR1 (SFS-96) -#define STACK_SLOT_DAWRX1 (SFS-104) -/* the following is used by the P9 short path */ -#define STACK_SLOT_NVGPRS (SFS-152) /* 18 gprs */ +#define STACK_SLOT_FSCR (SFS-96) /* * Call kvmppc_hv_entry in real mode. @@ -136,15 +124,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) /* Return the trap number on this thread as the return value */ mr r3, r12 - /* - * If we came back from the guest via a relocation-on interrupt, - * we will be in virtual mode at this point, which makes it a - * little easier to get back to the caller. - */ - mfmsr r0 - andi. r0, r0, MSR_IR /* in real mode? */ - bne .Lvirt_return - /* RFI into the highmem handler */ mfmsr r6 li r0, MSR_RI @@ -154,11 +133,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtsrr1 r7 RFI_TO_KERNEL - /* Virtual-mode return */ -.Lvirt_return: - mtlr r8 - blr - kvmppc_primary_no_guest: /* We handle this much like a ceded vcpu */ /* put the HDEC into the DEC, since HDEC interrupts don't wake us */ @@ -245,7 +219,7 @@ kvm_novcpu_wakeup: /* See if our timeslice has expired (HDEC is negative) */ mfspr r0, SPRN_HDEC - EXTEND_HDEC(r0) + extsw r0, r0 li r12, BOOK3S_INTERRUPT_HV_DECREMENTER cmpdi r0, 0 blt kvm_novcpu_exit @@ -347,10 +321,8 @@ kvm_secondary_got_guest: lbz r4, HSTATE_PTID(r13) cmpwi r4, 0 bne 63f - LOAD_REG_ADDR(r6, decrementer_max) - ld r6, 0(r6) + lis r6,0x7fff /* MAX_INT@h */ mtspr SPRN_HDEC, r6 -BEGIN_FTR_SECTION /* and set per-LPAR registers, if doing dynamic micro-threading */ ld r6, HSTATE_SPLIT_MODE(r13) cmpdi r6, 0 @@ -362,7 +334,6 @@ BEGIN_FTR_SECTION ld r0, KVM_SPLIT_LDBAR(r6) mtspr SPRN_LDBAR, r0 isync -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) 63: /* Order load of vcpu after load of vcore */ lwsync @@ -433,7 +404,6 @@ kvm_no_guest: blr 53: -BEGIN_FTR_SECTION HMT_LOW ld r5, HSTATE_KVM_VCORE(r13) cmpdi r5, 0 @@ -448,14 +418,6 @@ BEGIN_FTR_SECTION b kvm_unsplit_nap 60: HMT_MEDIUM b kvm_secondary_got_guest -FTR_SECTION_ELSE - HMT_LOW - ld r5, HSTATE_KVM_VCORE(r13) - cmpdi r5, 0 - beq kvm_no_guest - HMT_MEDIUM - b kvm_secondary_got_guest -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) 54: li r0, KVM_HWTHREAD_IN_KVM stb r0, HSTATE_HWTHREAD_STATE(r13) @@ -581,13 +543,11 @@ kvmppc_hv_entry: bne 10f lwz r7,KVM_LPID(r9) -BEGIN_FTR_SECTION ld r6,KVM_SDR1(r9) li r0,LPID_RSVD /* switch to reserved LPID */ mtspr SPRN_LPID,r0 ptesync mtspr SPRN_SDR1,r6 /* switch to partition page table */ -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) mtspr SPRN_LPID,r7 isync @@ -668,16 +628,6 @@ kvmppc_got_guest: /* Save host values of some registers */ BEGIN_FTR_SECTION - mfspr r5, SPRN_TIDR - mfspr r6, SPRN_PSSCR - mfspr r7, SPRN_PID - std r5, STACK_SLOT_TID(r1) - std r6, STACK_SLOT_PSSCR(r1) - std r7, STACK_SLOT_PID(r1) - mfspr r5, SPRN_HFSCR - std r5, STACK_SLOT_HFSCR(r1) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) -BEGIN_FTR_SECTION mfspr r5, SPRN_CIABR mfspr r6, SPRN_DAWR0 mfspr r7, SPRN_DAWRX0 @@ -686,13 +636,9 @@ BEGIN_FTR_SECTION std r6, STACK_SLOT_DAWR0(r1) std r7, STACK_SLOT_DAWRX0(r1) std r8, STACK_SLOT_IAMR(r1) + mfspr r5, SPRN_FSCR + std r5, STACK_SLOT_FSCR(r1) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) -BEGIN_FTR_SECTION - mfspr r6, SPRN_DAWR1 - mfspr r7, SPRN_DAWRX1 - std r6, STACK_SLOT_DAWR1(r1) - std r7, STACK_SLOT_DAWRX1(r1) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1) mfspr r5, SPRN_AMR std r5, STACK_SLOT_AMR(r1) @@ -710,13 +656,9 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -/* - * Branch around the call if both CPU_FTR_TM and - * CPU_FTR_P9_TM_HV_ASSIST are off. - */ BEGIN_FTR_SECTION b 91f -END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) +END_FTR_SECTION_IFCLR(CPU_FTR_TM) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR) */ @@ -783,12 +725,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) ld r6, VCPU_DAWRX0(r4) mtspr SPRN_DAWR0, r5 mtspr SPRN_DAWRX0, r6 -BEGIN_FTR_SECTION - ld r5, VCPU_DAWR1(r4) - ld r6, VCPU_DAWRX1(r4) - mtspr SPRN_DAWR1, r5 - mtspr SPRN_DAWRX1, r6 -END_FTR_SECTION_IFSET(CPU_FTR_DAWR1) 1: ld r7, VCPU_CIABR(r4) ld r8, VCPU_TAR(r4) @@ -806,7 +742,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR1) mtspr SPRN_BESCR, r6 mtspr SPRN_PID, r7 mtspr SPRN_WORT, r8 -BEGIN_FTR_SECTION /* POWER8-only registers */ ld r5, VCPU_TCSCR(r4) ld r6, VCPU_ACOP(r4) @@ -817,18 +752,6 @@ BEGIN_FTR_SECTION mtspr SPRN_CSIGR, r7 mtspr SPRN_TACR, r8 nop -FTR_SECTION_ELSE - /* POWER9-only registers */ - ld r5, VCPU_TID(r4) - ld r6, VCPU_PSSCR(r4) - lbz r8, HSTATE_FAKE_SUSPEND(r13) - oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */ - rldimi r6, r8, PSSCR_FAKE_SUSPEND_LG, 63 - PSSCR_FAKE_SUSPEND_LG - ld r7, VCPU_HFSCR(r4) - mtspr SPRN_TIDR, r5 - mtspr SPRN_PSSCR, r6 - mtspr SPRN_HFSCR, r7 -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) 8: ld r5, VCPU_SPRG0(r4) @@ -898,23 +821,15 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) /* Check if HDEC expires soon */ mfspr r3, SPRN_HDEC - EXTEND_HDEC(r3) + extsw r3, r3 cmpdi r3, 512 /* 1 microsecond */ blt hdec_soon - ld r6, VCPU_KVM(r4) - lbz r0, KVM_RADIX(r6) - cmpwi r0, 0 - bne 9f - - /* For hash guest, clear out and reload the SLB */ -BEGIN_MMU_FTR_SECTION - /* Radix host won't have populated the SLB, so no need to clear */ + /* Clear out and reload the SLB */ li r6, 0 slbmte r6, r6 PPC_SLBIA(6) ptesync -END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */ lwz r5,VCPU_SLB_MAX(r4) @@ -929,96 +844,9 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) bdnz 1b 9: -#ifdef CONFIG_KVM_XICS - /* We are entering the guest on that thread, push VCPU to XIVE */ - ld r11, VCPU_XIVE_SAVED_STATE(r4) - li r9, TM_QW1_OS - lwz r8, VCPU_XIVE_CAM_WORD(r4) - cmpwi r8, 0 - beq no_xive - li r7, TM_QW1_OS + TM_WORD2 - mfmsr r0 - andi. r0, r0, MSR_DR /* in real mode? */ - beq 2f - ld r10, HSTATE_XIVE_TIMA_VIRT(r13) - cmpldi cr1, r10, 0 - beq cr1, no_xive - eieio - stdx r11,r9,r10 - stwx r8,r7,r10 - b 3f -2: ld r10, HSTATE_XIVE_TIMA_PHYS(r13) - cmpldi cr1, r10, 0 - beq cr1, no_xive - eieio - stdcix r11,r9,r10 - stwcix r8,r7,r10 -3: li r9, 1 - stb r9, VCPU_XIVE_PUSHED(r4) - eieio - - /* - * We clear the irq_pending flag. There is a small chance of a - * race vs. the escalation interrupt happening on another - * processor setting it again, but the only consequence is to - * cause a spurrious wakeup on the next H_CEDE which is not an - * issue. - */ - li r0,0 - stb r0, VCPU_IRQ_PENDING(r4) - - /* - * In single escalation mode, if the escalation interrupt is - * on, we mask it. - */ - lbz r0, VCPU_XIVE_ESC_ON(r4) - cmpwi cr1, r0,0 - beq cr1, 1f - li r9, XIVE_ESB_SET_PQ_01 - beq 4f /* in real mode? */ - ld r10, VCPU_XIVE_ESC_VADDR(r4) - ldx r0, r10, r9 - b 5f -4: ld r10, VCPU_XIVE_ESC_RADDR(r4) - ldcix r0, r10, r9 -5: sync - - /* We have a possible subtle race here: The escalation interrupt might - * have fired and be on its way to the host queue while we mask it, - * and if we unmask it early enough (re-cede right away), there is - * a theorical possibility that it fires again, thus landing in the - * target queue more than once which is a big no-no. - * - * Fortunately, solving this is rather easy. If the above load setting - * PQ to 01 returns a previous value where P is set, then we know the - * escalation interrupt is somewhere on its way to the host. In that - * case we simply don't clear the xive_esc_on flag below. It will be - * eventually cleared by the handler for the escalation interrupt. - * - * Then, when doing a cede, we check that flag again before re-enabling - * the escalation interrupt, and if set, we abort the cede. - */ - andi. r0, r0, XIVE_ESB_VAL_P - bne- 1f - - /* Now P is 0, we can clear the flag */ - li r0, 0 - stb r0, VCPU_XIVE_ESC_ON(r4) -1: -no_xive: -#endif /* CONFIG_KVM_XICS */ - - li r0, 0 - stw r0, STACK_SLOT_SHORT_PATH(r1) - deliver_guest_interrupt: /* r4 = vcpu, r13 = paca */ /* Check if we can deliver an external or decrementer interrupt now */ ld r0, VCPU_PENDING_EXC(r4) -BEGIN_FTR_SECTION - /* On POWER9, also check for emulated doorbell interrupt */ - lbz r3, VCPU_DBELL_REQ(r4) - or r0, r0, r3 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) cmpdi r0, 0 beq 71f mr r3, r4 @@ -1030,7 +858,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) mtspr SPRN_SRR0, r6 mtspr SPRN_SRR1, r7 -fast_guest_entry_c: ld r10, VCPU_PC(r4) ld r11, VCPU_MSR(r4) /* r11 = vcpu->arch.msr & ~MSR_HV */ @@ -1092,18 +919,8 @@ BEGIN_FTR_SECTION mtspr SPRN_PPR, r0 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) -/* Move canary into DSISR to check for later */ -BEGIN_FTR_SECTION - li r0, 0x7fff - mtspr SPRN_HDSISR, r0 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - - ld r6, VCPU_KVM(r4) - lbz r7, KVM_SECURE_GUEST(r6) - cmpdi r7, 0 ld r6, VCPU_GPR(R6)(r4) ld r7, VCPU_GPR(R7)(r4) - bne ret_to_ultra ld r0, VCPU_CR(r4) mtcr r0 @@ -1114,117 +931,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) ld r4, VCPU_GPR(R4)(r4) HRFI_TO_GUEST b . -/* - * Use UV_RETURN ultracall to return control back to the Ultravisor after - * processing an hypercall or interrupt that was forwarded (a.k.a. reflected) - * to the Hypervisor. - * - * All registers have already been loaded, except: - * R0 = hcall result - * R2 = SRR1, so UV can detect a synthesized interrupt (if any) - * R3 = UV_RETURN - */ -ret_to_ultra: - ld r0, VCPU_CR(r4) - mtcr r0 - - ld r0, VCPU_GPR(R3)(r4) - mfspr r2, SPRN_SRR1 - li r3, 0 - ori r3, r3, UV_RETURN - ld r4, VCPU_GPR(R4)(r4) - sc 2 - -/* - * Enter the guest on a P9 or later system where we have exactly - * one vcpu per vcore and we don't need to go to real mode - * (which implies that host and guest are both using radix MMU mode). - * r3 = vcpu pointer - * Most SPRs and all the VSRs have been loaded already. - */ -_GLOBAL(__kvmhv_vcpu_entry_p9) -EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9) - mflr r0 - std r0, PPC_LR_STKOFF(r1) - stdu r1, -SFS(r1) - - li r0, 1 - stw r0, STACK_SLOT_SHORT_PATH(r1) - - std r3, HSTATE_KVM_VCPU(r13) - mfcr r4 - stw r4, SFS+8(r1) - - std r1, HSTATE_HOST_R1(r13) - - reg = 14 - .rept 18 - std reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1) - reg = reg + 1 - .endr - - reg = 14 - .rept 18 - ld reg, __VCPU_GPR(reg)(r3) - reg = reg + 1 - .endr - - mfmsr r10 - std r10, HSTATE_HOST_MSR(r13) - - mr r4, r3 - b fast_guest_entry_c -guest_exit_short_path: - /* - * Malicious or buggy radix guests may have inserted SLB entries - * (only 0..3 because radix always runs with UPRT=1), so these must - * be cleared here to avoid side-channels. slbmte is used rather - * than slbia, as it won't clear cached translations. - */ - li r0,0 - slbmte r0,r0 - li r4,1 - slbmte r0,r4 - li r4,2 - slbmte r0,r4 - li r4,3 - slbmte r0,r4 - - li r0, KVM_GUEST_MODE_NONE - stb r0, HSTATE_IN_GUEST(r13) - - reg = 14 - .rept 18 - std reg, __VCPU_GPR(reg)(r9) - reg = reg + 1 - .endr - - reg = 14 - .rept 18 - ld reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1) - reg = reg + 1 - .endr - - lwz r4, SFS+8(r1) - mtcr r4 - - mr r3, r12 /* trap number */ - - addi r1, r1, SFS - ld r0, PPC_LR_STKOFF(r1) - mtlr r0 - - /* If we are in real mode, do a rfid to get back to the caller */ - mfmsr r4 - andi. r5, r4, MSR_IR - bnelr - rldicl r5, r4, 64 - MSR_TS_S_LG, 62 /* extract TS field */ - mtspr SPRN_SRR0, r0 - ld r10, HSTATE_HOST_MSR(r13) - rldimi r10, r5, MSR_TS_S_LG, 63 - MSR_TS_T_LG - mtspr SPRN_SRR1, r10 - RFI_TO_KERNEL - b . secondary_too_late: li r12, 0 @@ -1265,21 +971,16 @@ hdec_soon: kvmppc_interrupt_hv: /* * Register contents: + * R9 = HSTATE_IN_GUEST * R12 = (guest CR << 32) | interrupt vector * R13 = PACA * guest R12 saved in shadow VCPU SCRATCH0 * guest R13 saved in SPRN_SCRATCH0 + * guest R9 saved in HSTATE_SCRATCH2 */ - std r9, HSTATE_SCRATCH2(r13) - lbz r9, HSTATE_IN_GUEST(r13) - cmpwi r9, KVM_GUEST_MODE_HOST_HV - beq kvmppc_bad_host_intr -#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE - cmpwi r9, KVM_GUEST_MODE_GUEST - ld r9, HSTATE_SCRATCH2(r13) - beq kvmppc_interrupt_pr -#endif /* We're now back in the host but in guest MMU context */ + cmpwi r9,KVM_GUEST_MODE_HOST_HV + beq kvmppc_bad_host_intr li r9, KVM_GUEST_MODE_HOST_HV stb r9, HSTATE_IN_GUEST(r13) @@ -1397,7 +1098,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER bne 2f mfspr r3,SPRN_HDEC - EXTEND_HDEC(r3) + extsw r3, r3 cmpdi r3,0 mr r4,r9 bge fast_guest_return @@ -1409,14 +1110,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) /* Hypervisor doorbell - exit only if host IPI flag set */ cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL bne 3f -BEGIN_FTR_SECTION - PPC_MSGSYNC - lwsync - /* always exit if we're running a nested guest */ - ld r0, VCPU_NESTED(r9) - cmpdi r0, 0 - bne guest_exit_cont -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) lbz r0, HSTATE_HOST_IPI(r13) cmpwi r0, 0 beq maybe_reenter_guest @@ -1446,62 +1139,16 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ mr r4, r9 bl kvmhv_accumulate_time #endif -#ifdef CONFIG_KVM_XICS - /* We are exiting, pull the VP from the XIVE */ - lbz r0, VCPU_XIVE_PUSHED(r9) - cmpwi cr0, r0, 0 - beq 1f - li r7, TM_SPC_PULL_OS_CTX - li r6, TM_QW1_OS - mfmsr r0 - andi. r0, r0, MSR_DR /* in real mode? */ - beq 2f - ld r10, HSTATE_XIVE_TIMA_VIRT(r13) - cmpldi cr0, r10, 0 - beq 1f - /* First load to pull the context, we ignore the value */ - eieio - lwzx r11, r7, r10 - /* Second load to recover the context state (Words 0 and 1) */ - ldx r11, r6, r10 - b 3f -2: ld r10, HSTATE_XIVE_TIMA_PHYS(r13) - cmpldi cr0, r10, 0 - beq 1f - /* First load to pull the context, we ignore the value */ - eieio - lwzcix r11, r7, r10 - /* Second load to recover the context state (Words 0 and 1) */ - ldcix r11, r6, r10 -3: std r11, VCPU_XIVE_SAVED_STATE(r9) - /* Fixup some of the state for the next load */ - li r10, 0 - li r0, 0xff - stb r10, VCPU_XIVE_PUSHED(r9) - stb r10, (VCPU_XIVE_SAVED_STATE+3)(r9) - stb r0, (VCPU_XIVE_SAVED_STATE+4)(r9) - eieio -1: -#endif /* CONFIG_KVM_XICS */ /* * Possibly flush the link stack here, before we do a blr in - * guest_exit_short_path. + * kvmhv_switch_to_host. */ 1: nop patch_site 1b patch__call_kvm_flush_link_stack - /* If we came in through the P9 short path, go back out to C now */ - lwz r0, STACK_SLOT_SHORT_PATH(r1) - cmpwi r0, 0 - bne guest_exit_short_path - /* For hash guest, read the guest SLB and save it away */ - ld r5, VCPU_KVM(r9) - lbz r0, KVM_RADIX(r5) li r5, 0 - cmpwi r0, 0 - bne 0f /* for radix, save 0 entries */ lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */ mtctr r0 li r6,0 @@ -1525,9 +1172,6 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ stw r5,VCPU_SLB_MAX(r9) /* load host SLB entries */ -BEGIN_MMU_FTR_SECTION - b guest_bypass -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) ld r8,PACA_SLBSHADOWPTR(r13) .rept SLB_NUM_BOLTED @@ -1540,21 +1184,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) slbmte r6,r5 1: addi r8,r8,16 .endr - b guest_bypass - -0: /* - * Sanitise radix guest SLB, see guest_exit_short_path comment. - * We clear vcpu->arch.slb_max to match earlier behaviour. - */ - li r0,0 - stw r0,VCPU_SLB_MAX(r9) - slbmte r0,r0 - li r4,1 - slbmte r0,r4 - li r4,2 - slbmte r0,r4 - li r4,3 - slbmte r0,r4 guest_bypass: stw r12, STACK_SLOT_TRAP(r1) @@ -1564,12 +1193,6 @@ guest_bypass: ld r3, HSTATE_KVM_VCORE(r13) mfspr r5,SPRN_DEC mftb r6 - /* On P9, if the guest has large decr enabled, don't sign extend */ -BEGIN_FTR_SECTION - ld r4, VCORE_LPCR(r3) - andis. r4, r4, LPCR_LD@h - bne 16f -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) extsw r5,r5 16: add r5,r5,r6 /* r5 is a guest timebase value here, convert to host TB */ @@ -1643,7 +1266,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) std r6, VCPU_BESCR(r9) stw r7, VCPU_GUEST_PID(r9) std r8, VCPU_WORT(r9) -BEGIN_FTR_SECTION mfspr r5, SPRN_TCSCR mfspr r6, SPRN_ACOP mfspr r7, SPRN_CSIGR @@ -1652,17 +1274,10 @@ BEGIN_FTR_SECTION std r6, VCPU_ACOP(r9) std r7, VCPU_CSIGR(r9) std r8, VCPU_TACR(r9) -FTR_SECTION_ELSE - mfspr r5, SPRN_TIDR - mfspr r6, SPRN_PSSCR - std r5, VCPU_TID(r9) - rldicl r6, r6, 4, 50 /* r6 &= PSSCR_GUEST_VIS */ - rotldi r6, r6, 60 - std r6, VCPU_PSSCR(r9) - /* Restore host HFSCR value */ - ld r7, STACK_SLOT_HFSCR(r1) - mtspr SPRN_HFSCR, r7 -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) +BEGIN_FTR_SECTION + ld r5, STACK_SLOT_FSCR(r1) + mtspr SPRN_FSCR, r5 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) /* * Restore various registers to 0, where non-zero values * set by the guest could disrupt the host. @@ -1670,13 +1285,11 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) li r0, 0 mtspr SPRN_PSPB, r0 mtspr SPRN_WORT, r0 -BEGIN_FTR_SECTION mtspr SPRN_TCSCR, r0 /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */ li r0, 1 sldi r0, r0, 31 mtspr SPRN_MMCRS, r0 -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) /* Save and restore AMR, IAMR and UAMOR before turning on the MMU */ ld r8, STACK_SLOT_IAMR(r1) @@ -1733,13 +1346,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) bl kvmppc_save_fp #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -/* - * Branch around the call if both CPU_FTR_TM and - * CPU_FTR_P9_TM_HV_ASSIST are off. - */ BEGIN_FTR_SECTION b 91f -END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) +END_FTR_SECTION_IFCLR(CPU_FTR_TM) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR) */ @@ -1785,80 +1394,6 @@ BEGIN_FTR_SECTION mtspr SPRN_DAWR0, r6 mtspr SPRN_DAWRX0, r7 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) -BEGIN_FTR_SECTION - ld r6, STACK_SLOT_DAWR1(r1) - ld r7, STACK_SLOT_DAWRX1(r1) - mtspr SPRN_DAWR1, r6 - mtspr SPRN_DAWRX1, r7 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1) -BEGIN_FTR_SECTION - ld r5, STACK_SLOT_TID(r1) - ld r6, STACK_SLOT_PSSCR(r1) - ld r7, STACK_SLOT_PID(r1) - mtspr SPRN_TIDR, r5 - mtspr SPRN_PSSCR, r6 - mtspr SPRN_PID, r7 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - -#ifdef CONFIG_PPC_RADIX_MMU - /* - * Are we running hash or radix ? - */ - ld r5, VCPU_KVM(r9) - lbz r0, KVM_RADIX(r5) - cmpwi cr2, r0, 0 - beq cr2, 2f - - /* - * Radix: do eieio; tlbsync; ptesync sequence in case we - * interrupted the guest between a tlbie and a ptesync. - */ - eieio - tlbsync - ptesync - -BEGIN_FTR_SECTION - /* Radix: Handle the case where the guest used an illegal PID */ - LOAD_REG_ADDR(r4, mmu_base_pid) - lwz r3, VCPU_GUEST_PID(r9) - lwz r5, 0(r4) - cmpw cr0,r3,r5 - blt 2f - - /* - * Illegal PID, the HW might have prefetched and cached in the TLB - * some translations for the LPID 0 / guest PID combination which - * Linux doesn't know about, so we need to flush that PID out of - * the TLB. First we need to set LPIDR to 0 so tlbiel applies to - * the right context. - */ - li r0,0 - mtspr SPRN_LPID,r0 - isync - - /* Then do a congruence class local flush */ - ld r6,VCPU_KVM(r9) - lwz r0,KVM_TLB_SETS(r6) - mtctr r0 - li r7,0x400 /* IS field = 0b01 */ - ptesync - sldi r0,r3,32 /* RS has PID */ -1: PPC_TLBIEL(7,0,2,1,1) /* RIC=2, PRS=1, R=1 */ - addi r7,r7,0x1000 - bdnz 1b - ptesync -END_FTR_SECTION_IFSET(CPU_FTR_P9_RADIX_PREFETCH_BUG) - -2: -#endif /* CONFIG_PPC_RADIX_MMU */ - - /* - * cp_abort is required if the processor supports local copy-paste - * to clear the copy buffer that was under control of the guest. - */ -BEGIN_FTR_SECTION - PPC_CP_ABORT -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) /* * POWER7/POWER8 guest -> host partition switch code. @@ -1895,13 +1430,11 @@ kvmhv_switch_to_host: /* Primary thread switches back to host partition */ lwz r7,KVM_HOST_LPID(r4) -BEGIN_FTR_SECTION ld r6,KVM_HOST_SDR1(r4) li r8,LPID_RSVD /* switch to reserved LPID */ mtspr SPRN_LPID,r8 ptesync mtspr SPRN_SDR1,r6 /* switch to host page table */ -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) mtspr SPRN_LPID,r7 isync @@ -2110,26 +1643,13 @@ kvmppc_tm_emul: * reflect the HDSI to the guest as a DSI. */ kvmppc_hdsi: - ld r3, VCPU_KVM(r9) - lbz r0, KVM_RADIX(r3) mfspr r4, SPRN_HDAR mfspr r6, SPRN_HDSISR -BEGIN_FTR_SECTION - /* Look for DSISR canary. If we find it, retry instruction */ - cmpdi r6, 0x7fff - beq 6f -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - cmpwi r0, 0 - bne .Lradix_hdsi /* on radix, just save DAR/DSISR/ASDR */ /* HPTE not found fault or protection fault? */ andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h beq 1f /* if not, send it to the guest */ andi. r0, r11, MSR_DR /* data relocation enabled? */ beq 3f -BEGIN_FTR_SECTION - mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */ - b 4f -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) clrrdi r0, r4, 28 PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */ li r0, BOOK3S_INTERRUPT_DATA_SEGMENT @@ -2197,31 +1717,15 @@ fast_interrupt_c_return: stb r0, HSTATE_IN_GUEST(r13) b guest_exit_cont -.Lradix_hdsi: - std r4, VCPU_FAULT_DAR(r9) - stw r6, VCPU_FAULT_DSISR(r9) -.Lradix_hisi: - mfspr r5, SPRN_ASDR - std r5, VCPU_FAULT_GPA(r9) - b guest_exit_cont - /* * Similarly for an HISI, reflect it to the guest as an ISI unless * it is an HPTE not found fault for a page that we have paged out. */ kvmppc_hisi: - ld r3, VCPU_KVM(r9) - lbz r0, KVM_RADIX(r3) - cmpwi r0, 0 - bne .Lradix_hisi /* for radix, just save ASDR */ andis. r0, r11, SRR1_ISI_NOPT@h beq 1f andi. r0, r11, MSR_IR /* instruction relocation enabled? */ beq 3f -BEGIN_FTR_SECTION - mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */ - b 4f -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) clrrdi r0, r10, 28 PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */ li r0, BOOK3S_INTERRUPT_INST_SEGMENT @@ -2269,10 +1773,6 @@ hcall_try_real_mode: andi. r0,r11,MSR_PR /* sc 1 from userspace - reflect to guest syscall */ bne sc_1_fast_return - /* sc 1 from nested guest - give it to L1 to handle */ - ld r0, VCPU_NESTED(r9) - cmpdi r0, 0 - bne guest_exit_cont clrrdi r3,r3,2 cmpldi r3,hcall_real_table_end - hcall_real_table bge guest_exit_cont @@ -2537,7 +2037,7 @@ hcall_real_table: #else .long 0 /* 0x2fc - H_XIRR_X*/ #endif - .long DOTSYM(kvmppc_h_random) - hcall_real_table + .long DOTSYM(kvmppc_rm_h_random) - hcall_real_table .globl hcall_real_table_end hcall_real_table_end: @@ -2668,13 +2168,9 @@ _GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */ bl kvmppc_save_fp #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -/* - * Branch around the call if both CPU_FTR_TM and - * CPU_FTR_P9_TM_HV_ASSIST are off. - */ BEGIN_FTR_SECTION b 91f -END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) +END_FTR_SECTION_IFCLR(CPU_FTR_TM) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR) */ @@ -2694,15 +2190,8 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) mfspr r3, SPRN_DEC mfspr r4, SPRN_HDEC mftb r5 -BEGIN_FTR_SECTION - /* On P9 check whether the guest has large decrementer mode enabled */ - ld r6, HSTATE_KVM_VCORE(r13) - ld r6, VCORE_LPCR(r6) - andis. r6, r6, LPCR_LD@h - bne 68f -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) extsw r3, r3 -68: EXTEND_HDEC(r4) + extsw r4, r4 cmpd r3, r4 ble 67f mtspr SPRN_DEC, r4 @@ -2747,28 +2236,11 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) kvm_nap_sequence: /* desired LPCR value in r5 */ -BEGIN_FTR_SECTION - /* - * PSSCR bits: exit criterion = 1 (wakeup based on LPCR at sreset) - * enable state loss = 1 (allow SMT mode switch) - * requested level = 0 (just stop dispatching) - */ - lis r3, (PSSCR_EC | PSSCR_ESL)@h - /* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */ - li r4, LPCR_PECE_HVEE@higher - sldi r4, r4, 32 - or r5, r5, r4 -FTR_SECTION_ELSE li r3, PNV_THREAD_NAP -ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) mtspr SPRN_LPCR,r5 isync -BEGIN_FTR_SECTION - bl isa300_idle_stop_mayloss -FTR_SECTION_ELSE bl isa206_idle_insn_mayloss -ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) mfspr r0, SPRN_CTRLF ori r0, r0, 1 @@ -2787,10 +2259,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) beq kvm_end_cede cmpwi r0, NAPPING_NOVCPU beq kvm_novcpu_wakeup -BEGIN_FTR_SECTION cmpwi r0, NAPPING_UNSPLIT beq kvm_unsplit_wakeup -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) twi 31,0,0 /* Nap state must not be zero */ 33: mr r4, r3 @@ -2810,13 +2280,9 @@ kvm_end_cede: #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -/* - * Branch around the call if both CPU_FTR_TM and - * CPU_FTR_P9_TM_HV_ASSIST are off. - */ BEGIN_FTR_SECTION b 91f -END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) +END_FTR_SECTION_IFCLR(CPU_FTR_TM) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR) */ @@ -2906,47 +2372,7 @@ kvm_cede_prodded: /* we've ceded but we want to give control to the host */ kvm_cede_exit: ld r9, HSTATE_KVM_VCPU(r13) -#ifdef CONFIG_KVM_XICS - /* are we using XIVE with single escalation? */ - ld r10, VCPU_XIVE_ESC_VADDR(r9) - cmpdi r10, 0 - beq 3f - li r6, XIVE_ESB_SET_PQ_00 - /* - * If we still have a pending escalation, abort the cede, - * and we must set PQ to 10 rather than 00 so that we don't - * potentially end up with two entries for the escalation - * interrupt in the XIVE interrupt queue. In that case - * we also don't want to set xive_esc_on to 1 here in - * case we race with xive_esc_irq(). - */ - lbz r5, VCPU_XIVE_ESC_ON(r9) - cmpwi r5, 0 - beq 4f - li r0, 0 - stb r0, VCPU_CEDED(r9) - /* - * The escalation interrupts are special as we don't EOI them. - * There is no need to use the load-after-store ordering offset - * to set PQ to 10 as we won't use StoreEOI. - */ - li r6, XIVE_ESB_SET_PQ_10 - b 5f -4: li r0, 1 - stb r0, VCPU_XIVE_ESC_ON(r9) - /* make sure store to xive_esc_on is seen before xive_esc_irq runs */ - sync -5: /* Enable XIVE escalation */ - mfmsr r0 - andi. r0, r0, MSR_DR /* in real mode? */ - beq 1f - ldx r0, r10, r6 - b 2f -1: ld r10, VCPU_XIVE_ESC_RADDR(r9) - ldcix r0, r10, r6 -2: sync -#endif /* CONFIG_KVM_XICS */ -3: b guest_exit_cont + b guest_exit_cont /* Try to do machine check recovery in real mode */ machine_check_realmode: @@ -3023,10 +2449,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) PPC_MSGCLR(6) /* see if it's a host IPI */ li r3, 1 -BEGIN_FTR_SECTION - PPC_MSGSYNC - lwsync -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) lbz r0, HSTATE_HOST_IPI(r13) cmpwi r0, 0 bnelr @@ -3335,73 +2757,12 @@ kvmppc_bad_host_intr: std r3, STACK_FRAME_OVERHEAD-16(r1) /* - * On POWER9 do a minimal restore of the MMU and call C code, - * which will print a message and panic. * XXX On POWER7 and POWER8, we just spin here since we don't * know what the other threads are doing (and we don't want to * coordinate with them) - but at least we now have register state * in memory that we might be able to look at from another CPU. */ -BEGIN_FTR_SECTION b . -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) - ld r9, HSTATE_KVM_VCPU(r13) - ld r10, VCPU_KVM(r9) - - li r0, 0 - mtspr SPRN_AMR, r0 - mtspr SPRN_IAMR, r0 - mtspr SPRN_CIABR, r0 - mtspr SPRN_DAWRX0, r0 -BEGIN_FTR_SECTION - mtspr SPRN_DAWRX1, r0 -END_FTR_SECTION_IFSET(CPU_FTR_DAWR1) - - /* Clear hash and radix guest SLB, see guest_exit_short_path comment. */ - slbmte r0, r0 - PPC_SLBIA(6) - -BEGIN_MMU_FTR_SECTION - b 4f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) - - ptesync - ld r8, PACA_SLBSHADOWPTR(r13) - .rept SLB_NUM_BOLTED - li r3, SLBSHADOW_SAVEAREA - LDX_BE r5, r8, r3 - addi r3, r3, 8 - LDX_BE r6, r8, r3 - andis. r7, r5, SLB_ESID_V@h - beq 3f - slbmte r6, r5 -3: addi r8, r8, 16 - .endr - -4: lwz r7, KVM_HOST_LPID(r10) - mtspr SPRN_LPID, r7 - mtspr SPRN_PID, r0 - ld r8, KVM_HOST_LPCR(r10) - mtspr SPRN_LPCR, r8 - isync - li r0, KVM_GUEST_MODE_NONE - stb r0, HSTATE_IN_GUEST(r13) - - /* - * Turn on the MMU and jump to C code - */ - bcl 20, 31, .+4 -5: mflr r3 - addi r3, r3, 9f - 5b - li r4, -1 - rldimi r3, r4, 62, 0 /* ensure 0xc000000000000000 bits are set */ - ld r4, PACAKMSR(r13) - mtspr SPRN_SRR0, r3 - mtspr SPRN_SRR1, r4 - RFI_TO_KERNEL -9: addi r3, r1, STACK_FRAME_OVERHEAD - bl kvmppc_bad_interrupt - b 9b /* * This mimics the MSR transition on IRQ delivery. The new guest MSR is taken diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index 1f492aa4c8d6..202046a83fc1 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -164,12 +164,15 @@ kvmppc_interrupt_pr: /* 64-bit entry. Register usage at this point: * * SPRG_SCRATCH0 = guest R13 + * R9 = HSTATE_IN_GUEST * R12 = (guest CR << 32) | exit handler id * R13 = PACA * HSTATE.SCRATCH0 = guest R12 + * HSTATE.SCRATCH2 = guest R9 */ #ifdef CONFIG_PPC64 /* Match 32-bit entry */ + ld r9,HSTATE_SCRATCH2(r13) rotldi r12, r12, 32 /* Flip R12 halves for stw */ stw r12, HSTATE_SCRATCH1(r13) /* CR is now in the low half */ srdi r12, r12, 32 /* shift trap into low half */ diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index e7219b6f5f9a..9268d386b128 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -128,6 +128,71 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) EXPORT_SYMBOL_GPL(kvmppc_xive_push_vcpu); /* + * Pull a vcpu's context from the XIVE on guest exit. + * This assumes we are in virtual mode (MMU on) + */ +void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu) +{ + void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt; + + if (!vcpu->arch.xive_pushed) + return; + + /* + * Should not have been pushed if there is no tima + */ + if (WARN_ON(!tima)) + return; + + eieio(); + /* First load to pull the context, we ignore the value */ + __raw_readl(tima + TM_SPC_PULL_OS_CTX); + /* Second load to recover the context state (Words 0 and 1) */ + vcpu->arch.xive_saved_state.w01 = __raw_readq(tima + TM_QW1_OS); + + /* Fixup some of the state for the next load */ + vcpu->arch.xive_saved_state.lsmfb = 0; + vcpu->arch.xive_saved_state.ack = 0xff; + vcpu->arch.xive_pushed = 0; + eieio(); +} +EXPORT_SYMBOL_GPL(kvmppc_xive_pull_vcpu); + +void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu) +{ + void __iomem *esc_vaddr = (void __iomem *)vcpu->arch.xive_esc_vaddr; + + if (!esc_vaddr) + return; + + /* we are using XIVE with single escalation */ + + if (vcpu->arch.xive_esc_on) { + /* + * If we still have a pending escalation, abort the cede, + * and we must set PQ to 10 rather than 00 so that we don't + * potentially end up with two entries for the escalation + * interrupt in the XIVE interrupt queue. In that case + * we also don't want to set xive_esc_on to 1 here in + * case we race with xive_esc_irq(). + */ + vcpu->arch.ceded = 0; + /* + * The escalation interrupts are special as we don't EOI them. + * There is no need to use the load-after-store ordering offset + * to set PQ to 10 as we won't use StoreEOI. + */ + __raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_10); + } else { + vcpu->arch.xive_esc_on = true; + mb(); + __raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_00); + } + mb(); +} +EXPORT_SYMBOL_GPL(kvmppc_xive_rearm_escalation); + +/* * This is a simple trigger for a generic XIVE IRQ. This must * only be called for interrupts that support a trigger page */ @@ -2075,6 +2140,36 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) return 0; } +int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req) +{ + struct kvmppc_vcore *vc = vcpu->arch.vcore; + + /* The VM should have configured XICS mode before doing XICS hcalls. */ + if (!kvmppc_xics_enabled(vcpu)) + return H_TOO_HARD; + + switch (req) { + case H_XIRR: + return xive_vm_h_xirr(vcpu); + case H_CPPR: + return xive_vm_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4)); + case H_EOI: + return xive_vm_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4)); + case H_IPI: + return xive_vm_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5)); + case H_IPOLL: + return xive_vm_h_ipoll(vcpu, kvmppc_get_gpr(vcpu, 4)); + case H_XIRR_X: + xive_vm_h_xirr(vcpu); + kvmppc_set_gpr(vcpu, 5, get_tb() + vc->tb_offset); + return H_SUCCESS; + } + + return H_UNSUPPORTED; +} +EXPORT_SYMBOL_GPL(kvmppc_xive_xics_hcall); + int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; @@ -2257,21 +2352,3 @@ struct kvm_device_ops kvm_xive_ops = { .get_attr = xive_get_attr, .has_attr = xive_has_attr, }; - -void kvmppc_xive_init_module(void) -{ - __xive_vm_h_xirr = xive_vm_h_xirr; - __xive_vm_h_ipoll = xive_vm_h_ipoll; - __xive_vm_h_ipi = xive_vm_h_ipi; - __xive_vm_h_cppr = xive_vm_h_cppr; - __xive_vm_h_eoi = xive_vm_h_eoi; -} - -void kvmppc_xive_exit_module(void) -{ - __xive_vm_h_xirr = NULL; - __xive_vm_h_ipoll = NULL; - __xive_vm_h_ipi = NULL; - __xive_vm_h_cppr = NULL; - __xive_vm_h_eoi = NULL; -} diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index 86c24a4ad809..afe9eeac6d56 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -289,13 +289,6 @@ extern int xive_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, extern int xive_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr); extern int xive_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr); -extern unsigned long (*__xive_vm_h_xirr)(struct kvm_vcpu *vcpu); -extern unsigned long (*__xive_vm_h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server); -extern int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server, - unsigned long mfrr); -extern int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr); -extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr); - /* * Common Xive routines for XICS-over-XIVE and XIVE native */ diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 76800c84f2a3..1253666dd4d8 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -1281,13 +1281,3 @@ struct kvm_device_ops kvm_xive_native_ops = { .has_attr = kvmppc_xive_native_has_attr, .mmap = kvmppc_xive_native_mmap, }; - -void kvmppc_xive_native_init_module(void) -{ - ; -} - -void kvmppc_xive_native_exit_module(void) -{ - ; -} diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index a2a68a958fa0..be33b5321a76 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -682,6 +682,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = !!(hv_enabled && kvmppc_hv_ops->enable_dawr1 && !kvmppc_hv_ops->enable_dawr1(NULL)); break; + case KVM_CAP_PPC_RPT_INVALIDATE: + r = 1; + break; #endif default: r = 0; diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 1fd31b4b0e13..fe26f2fa0f3f 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -14,6 +14,7 @@ #include <linux/string.h> #include <linux/init.h> #include <linux/sched/mm.h> +#include <linux/stop_machine.h> #include <asm/cputable.h> #include <asm/code-patching.h> #include <asm/page.h> @@ -149,17 +150,17 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); - - if (types & STF_BARRIER_FALLBACK) + // See comment in do_entry_flush_fixups() RE order of patching + if (types & STF_BARRIER_FALLBACK) { + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); patch_branch((struct ppc_inst *)(dest + 1), - (unsigned long)&stf_barrier_fallback, - BRANCH_SET_LINK); - else - patch_instruction((struct ppc_inst *)(dest + 1), - ppc_inst(instrs[1])); - - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + (unsigned long)&stf_barrier_fallback, BRANCH_SET_LINK); + } else { + patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + } } printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i, @@ -227,11 +228,25 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) : "unknown"); } +static int __do_stf_barrier_fixups(void *data) +{ + enum stf_barrier_type *types = data; + + do_stf_entry_barrier_fixups(*types); + do_stf_exit_barrier_fixups(*types); + + return 0; +} void do_stf_barrier_fixups(enum stf_barrier_type types) { - do_stf_entry_barrier_fixups(types); - do_stf_exit_barrier_fixups(types); + /* + * The call to the fallback entry flush, and the fallback/sync-ori exit + * flush can not be safely patched in/out while other CPUs are executing + * them. So call __do_stf_barrier_fixups() on one CPU while all other CPUs + * spin in the stop machine core with interrupts hard disabled. + */ + stop_machine(__do_stf_barrier_fixups, &types, NULL); } void do_uaccess_flush_fixups(enum l1d_flush_type types) @@ -284,8 +299,9 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types) : "unknown"); } -void do_entry_flush_fixups(enum l1d_flush_type types) +static int __do_entry_flush_fixups(void *data) { + enum l1d_flush_type types = *(enum l1d_flush_type *)data; unsigned int instrs[3], *dest; long *start, *end; int i; @@ -309,6 +325,31 @@ void do_entry_flush_fixups(enum l1d_flush_type types) if (types & L1D_FLUSH_MTTRIG) instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + /* + * If we're patching in or out the fallback flush we need to be careful about the + * order in which we patch instructions. That's because it's possible we could + * take a page fault after patching one instruction, so the sequence of + * instructions must be safe even in a half patched state. + * + * To make that work, when patching in the fallback flush we patch in this order: + * - the mflr (dest) + * - the mtlr (dest + 2) + * - the branch (dest + 1) + * + * That ensures the sequence is safe to execute at any point. In contrast if we + * patch the mtlr last, it's possible we could return from the branch and not + * restore LR, leading to a crash later. + * + * When patching out the fallback flush (either with nops or another flush type), + * we patch in this order: + * - the branch (dest + 1) + * - the mtlr (dest + 2) + * - the mflr (dest) + * + * Note we are protected by stop_machine() from other CPUs executing the code in a + * semi-patched state. + */ + start = PTRRELOC(&__start___entry_flush_fixup); end = PTRRELOC(&__stop___entry_flush_fixup); for (i = 0; start < end; start++, i++) { @@ -316,15 +357,16 @@ void do_entry_flush_fixups(enum l1d_flush_type types) pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); - - if (types == L1D_FLUSH_FALLBACK) - patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&entry_flush_fallback, - BRANCH_SET_LINK); - else + if (types == L1D_FLUSH_FALLBACK) { + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_branch((struct ppc_inst *)(dest + 1), + (unsigned long)&entry_flush_fallback, BRANCH_SET_LINK); + } else { patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); - - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + } } start = PTRRELOC(&__start___scv_entry_flush_fixup); @@ -334,15 +376,16 @@ void do_entry_flush_fixups(enum l1d_flush_type types) pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); - - if (types == L1D_FLUSH_FALLBACK) - patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&scv_entry_flush_fallback, - BRANCH_SET_LINK); - else + if (types == L1D_FLUSH_FALLBACK) { + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_branch((struct ppc_inst *)(dest + 1), + (unsigned long)&scv_entry_flush_fallback, BRANCH_SET_LINK); + } else { patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); - - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + } } @@ -354,6 +397,19 @@ void do_entry_flush_fixups(enum l1d_flush_type types) : "ori type" : (types & L1D_FLUSH_MTTRIG) ? "mttrig type" : "unknown"); + + return 0; +} + +void do_entry_flush_fixups(enum l1d_flush_type types) +{ + /* + * The call to the fallback flush can not be safely patched in/out while + * other CPUs are executing it. So call __do_entry_flush_fixups() on one + * CPU while all other CPUs spin in the stop machine core with interrupts + * hard disabled. + */ + stop_machine(__do_entry_flush_fixups, &types, NULL); } void do_rfi_flush_fixups(enum l1d_flush_type types) diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 5fef8db3b463..6e3495221ab7 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -357,30 +357,19 @@ static void __init radix_init_pgtable(void) } /* Find out how many PID bits are supported */ - if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) { - if (!mmu_pid_bits) - mmu_pid_bits = 20; - mmu_base_pid = 1; - } else if (cpu_has_feature(CPU_FTR_HVMODE)) { - if (!mmu_pid_bits) - mmu_pid_bits = 20; -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + if (!cpu_has_feature(CPU_FTR_HVMODE) && + cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) { /* - * When KVM is possible, we only use the top half of the - * PID space to avoid collisions between host and guest PIDs - * which can cause problems due to prefetch when exiting the - * guest with AIL=3 + * Older versions of KVM on these machines perfer if the + * guest only uses the low 19 PID bits. */ - mmu_base_pid = 1 << (mmu_pid_bits - 1); -#else - mmu_base_pid = 1; -#endif - } else { - /* The guest uses the bottom half of the PID space */ if (!mmu_pid_bits) mmu_pid_bits = 19; - mmu_base_pid = 1; + } else { + if (!mmu_pid_bits) + mmu_pid_bits = 20; } + mmu_base_pid = 1; /* * Allocate Partition table and process table for the @@ -486,6 +475,7 @@ static int __init radix_dt_scan_page_sizes(unsigned long node, def = &mmu_psize_defs[idx]; def->shift = shift; def->ap = ap; + def->h_rpt_pgsize = psize_to_rpti_pgsize(idx); } /* needed ? */ @@ -560,9 +550,13 @@ void __init radix__early_init_devtree(void) */ mmu_psize_defs[MMU_PAGE_4K].shift = 12; mmu_psize_defs[MMU_PAGE_4K].ap = 0x0; + mmu_psize_defs[MMU_PAGE_4K].h_rpt_pgsize = + psize_to_rpti_pgsize(MMU_PAGE_4K); mmu_psize_defs[MMU_PAGE_64K].shift = 16; mmu_psize_defs[MMU_PAGE_64K].ap = 0x5; + mmu_psize_defs[MMU_PAGE_64K].h_rpt_pgsize = + psize_to_rpti_pgsize(MMU_PAGE_64K); } /* diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 409e61210789..318ec4f33661 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -20,10 +20,6 @@ #include "internal.h" -#define RIC_FLUSH_TLB 0 -#define RIC_FLUSH_PWC 1 -#define RIC_FLUSH_ALL 2 - /* * tlbiel instruction for radix, set invalidation * i.e., r=1 and is=01 or is=10 or is=11 @@ -130,6 +126,21 @@ static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) trace_tlbie(0, 0, rb, rs, ric, prs, r); } +static __always_inline void __tlbie_pid_lpid(unsigned long pid, + unsigned long lpid, + unsigned long ric) +{ + unsigned long rb, rs, prs, r; + + rb = PPC_BIT(53); /* IS = 1 */ + rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); + prs = 1; /* process scoped */ + r = 1; /* radix format */ + + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(0, 0, rb, rs, ric, prs, r); +} static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) { unsigned long rb,rs,prs,r; @@ -190,6 +201,23 @@ static __always_inline void __tlbie_va(unsigned long va, unsigned long pid, trace_tlbie(0, 0, rb, rs, ric, prs, r); } +static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid, + unsigned long lpid, + unsigned long ap, unsigned long ric) +{ + unsigned long rb, rs, prs, r; + + rb = va & ~(PPC_BITMASK(52, 63)); + rb |= ap << PPC_BITLSHIFT(58); + rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); + prs = 1; /* process scoped */ + r = 1; /* radix format */ + + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(0, 0, rb, rs, ric, prs, r); +} + static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, unsigned long ap, unsigned long ric) { @@ -235,6 +263,22 @@ static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid, } } +static inline void fixup_tlbie_va_range_lpid(unsigned long va, + unsigned long pid, + unsigned long lpid, + unsigned long ap) +{ + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { + asm volatile("ptesync" : : : "memory"); + __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); + } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + asm volatile("ptesync" : : : "memory"); + __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB); + } +} + static inline void fixup_tlbie_pid(unsigned long pid) { /* @@ -254,6 +298,25 @@ static inline void fixup_tlbie_pid(unsigned long pid) } } +static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid) +{ + /* + * We can use any address for the invalidation, pick one which is + * probably unused as an optimisation. + */ + unsigned long va = ((1UL << 52) - 1); + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { + asm volatile("ptesync" : : : "memory"); + __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); + } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + asm volatile("ptesync" : : : "memory"); + __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K), + RIC_FLUSH_TLB); + } +} static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid, unsigned long ap) @@ -344,6 +407,31 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric) asm volatile("eieio; tlbsync; ptesync": : :"memory"); } +static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid, + unsigned long ric) +{ + asm volatile("ptesync" : : : "memory"); + + /* + * Workaround the fact that the "ric" argument to __tlbie_pid + * must be a compile-time contraint to match the "i" constraint + * in the asm statement. + */ + switch (ric) { + case RIC_FLUSH_TLB: + __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); + fixup_tlbie_pid_lpid(pid, lpid); + break; + case RIC_FLUSH_PWC: + __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); + break; + case RIC_FLUSH_ALL: + default: + __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); + fixup_tlbie_pid_lpid(pid, lpid); + } + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); +} struct tlbiel_pid { unsigned long pid; unsigned long ric; @@ -469,6 +557,20 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end, fixup_tlbie_va_range(addr - page_size, pid, ap); } +static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end, + unsigned long pid, unsigned long lpid, + unsigned long page_size, + unsigned long psize) +{ + unsigned long addr; + unsigned long ap = mmu_get_ap(psize); + + for (addr = start; addr < end; addr += page_size) + __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB); + + fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap); +} + static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, unsigned long psize, unsigned long ric) { @@ -549,6 +651,18 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end, asm volatile("eieio; tlbsync; ptesync": : :"memory"); } +static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end, + unsigned long pid, unsigned long lpid, + unsigned long page_size, + unsigned long psize, bool also_pwc) +{ + asm volatile("ptesync" : : : "memory"); + if (also_pwc) + __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); + __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize); + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); +} + static inline void _tlbiel_va_range_multicast(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long pid, unsigned long page_size, @@ -1338,47 +1452,57 @@ void radix__flush_tlb_all(void) } #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE -extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) +/* + * Performs process-scoped invalidations for a given LPID + * as part of H_RPT_INVALIDATE hcall. + */ +void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid, + unsigned long type, unsigned long pg_sizes, + unsigned long start, unsigned long end) { - unsigned long pid = mm->context.id; + unsigned long psize, nr_pages; + struct mmu_psize_def *def; + bool flush_pid; - if (unlikely(pid == MMU_NO_CONTEXT)) + /* + * A H_RPTI_TYPE_ALL request implies RIC=3, hence + * do a single IS=1 based flush. + */ + if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) { + _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); return; + } - if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) - return; + if (type & H_RPTI_TYPE_PWC) + _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); - /* - * If this context hasn't run on that CPU before and KVM is - * around, there's a slim chance that the guest on another - * CPU just brought in obsolete translation into the TLB of - * this CPU due to a bad prefetch using the guest PID on - * the way into the hypervisor. - * - * We work around this here. If KVM is possible, we check if - * any sibling thread is in KVM. If it is, the window may exist - * and thus we flush that PID from the core. - * - * A potential future improvement would be to mark which PIDs - * have never been used on the system and avoid it if the PID - * is new and the process has no other cpumask bit set. - */ - if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) { - int cpu = smp_processor_id(); - int sib = cpu_first_thread_sibling(cpu); - bool flush = false; - - for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) { - if (sib == cpu) - continue; - if (!cpu_possible(sib)) - continue; - if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu) - flush = true; + /* Full PID flush */ + if (start == 0 && end == -1) + return _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); + + /* Do range invalidation for all the valid page sizes */ + for (psize = 0; psize < MMU_PAGE_COUNT; psize++) { + def = &mmu_psize_defs[psize]; + if (!(pg_sizes & def->h_rpt_pgsize)) + continue; + + nr_pages = (end - start) >> def->shift; + flush_pid = nr_pages > tlb_single_page_flush_ceiling; + + /* + * If the number of pages spanning the range is above + * the ceiling, convert the request into a full PID flush. + * And since PID flush takes out all the page sizes, there + * is no need to consider remaining page sizes. + */ + if (flush_pid) { + _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); + return; } - if (flush) - _tlbiel_pid(pid, RIC_FLUSH_ALL); + _tlbie_va_range_lpid(start, end, pid, lpid, + (1UL << def->shift), psize, false); } } -EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround); +EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt); + #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c index a857af401738..74246536b832 100644 --- a/arch/powerpc/mm/mmu_context.c +++ b/arch/powerpc/mm/mmu_context.c @@ -83,9 +83,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, if (cpu_has_feature(CPU_FTR_ALTIVEC)) asm volatile ("dssall"); - if (new_on_cpu) - radix_kvm_prefetch_workaround(next); - else + if (!new_on_cpu) membarrier_arch_switch_mm(prev, next, tsk); /* diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 999997d9e9a9..528a7e0cf83a 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -604,7 +604,7 @@ struct p9_sprs { u64 uamor; }; -static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) +static unsigned long power9_idle_stop(unsigned long psscr) { int cpu = raw_smp_processor_id(); int first = cpu_first_thread_sibling(cpu); @@ -620,8 +620,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) if (!(psscr & (PSSCR_EC|PSSCR_ESL))) { /* EC=ESL=0 case */ - BUG_ON(!mmu_on); - /* * Wake synchronously. SRESET via xscom may still cause * a 0x100 powersave wakeup with SRR1 reason! @@ -803,8 +801,7 @@ core_woken: __slb_restore_bolted_realmode(); out: - if (mmu_on) - mtmsr(MSR_KERNEL); + mtmsr(MSR_KERNEL); return srr1; } @@ -895,7 +892,7 @@ struct p10_sprs { */ }; -static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on) +static unsigned long power10_idle_stop(unsigned long psscr) { int cpu = raw_smp_processor_id(); int first = cpu_first_thread_sibling(cpu); @@ -909,8 +906,6 @@ static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on) if (!(psscr & (PSSCR_EC|PSSCR_ESL))) { /* EC=ESL=0 case */ - BUG_ON(!mmu_on); - /* * Wake synchronously. SRESET via xscom may still cause * a 0x100 powersave wakeup with SRR1 reason! @@ -991,8 +986,7 @@ core_woken: __slb_restore_bolted_realmode(); out: - if (mmu_on) - mtmsr(MSR_KERNEL); + mtmsr(MSR_KERNEL); return srr1; } @@ -1002,40 +996,10 @@ static unsigned long arch300_offline_stop(unsigned long psscr) { unsigned long srr1; -#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE - __ppc64_runlatch_off(); if (cpu_has_feature(CPU_FTR_ARCH_31)) - srr1 = power10_idle_stop(psscr, true); + srr1 = power10_idle_stop(psscr); else - srr1 = power9_idle_stop(psscr, true); - __ppc64_runlatch_on(); -#else - /* - * Tell KVM we're entering idle. - * This does not have to be done in real mode because the P9 MMU - * is independent per-thread. Some steppings share radix/hash mode - * between threads, but in that case KVM has a barrier sync in real - * mode before and after switching between radix and hash. - * - * kvm_start_guest must still be called in real mode though, hence - * the false argument. - */ - local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE; - - __ppc64_runlatch_off(); - if (cpu_has_feature(CPU_FTR_ARCH_31)) - srr1 = power10_idle_stop(psscr, false); - else - srr1 = power9_idle_stop(psscr, false); - __ppc64_runlatch_on(); - - local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL; - /* Order setting hwthread_state vs. testing hwthread_req */ - smp_mb(); - if (local_paca->kvm_hstate.hwthread_req) - srr1 = idle_kvm_start_guest(srr1); - mtmsr(MSR_KERNEL); -#endif + srr1 = power9_idle_stop(psscr); return srr1; } @@ -1055,9 +1019,9 @@ void arch300_idle_type(unsigned long stop_psscr_val, __ppc64_runlatch_off(); if (cpu_has_feature(CPU_FTR_ARCH_31)) - srr1 = power10_idle_stop(psscr, true); + srr1 = power10_idle_stop(psscr); else - srr1 = power9_idle_stop(psscr, true); + srr1 = power9_idle_stop(psscr); __ppc64_runlatch_on(); fini_irq_for_idle_irqsoff(); diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index 2136e42833af..8a2b8d64265b 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -102,6 +102,16 @@ END_FTR_SECTION(0, 1); \ #define HCALL_BRANCH(LABEL) #endif +_GLOBAL_TOC(plpar_hcall_norets_notrace) + HMT_MEDIUM + + mfcr r0 + stw r0,8(r1) + HVSC /* invoke the hypervisor */ + lwz r0,8(r1) + mtcrf 0xff,r0 + blr /* return r3 = status */ + _GLOBAL_TOC(plpar_hcall_norets) HMT_MEDIUM diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 1f3152ad7213..dab356e3ff87 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -1829,30 +1829,28 @@ void hcall_tracepoint_unregfunc(void) #endif /* - * Since the tracing code might execute hcalls we need to guard against - * recursion. One example of this are spinlocks calling H_YIELD on - * shared processor partitions. + * Keep track of hcall tracing depth and prevent recursion. Warn if any is + * detected because it may indicate a problem. This will not catch all + * problems with tracing code making hcalls, because the tracing might have + * been invoked from a non-hcall, so the first hcall could recurse into it + * without warning here, but this better than nothing. + * + * Hcalls with specific problems being traced should use the _notrace + * plpar_hcall variants. */ static DEFINE_PER_CPU(unsigned int, hcall_trace_depth); -void __trace_hcall_entry(unsigned long opcode, unsigned long *args) +notrace void __trace_hcall_entry(unsigned long opcode, unsigned long *args) { unsigned long flags; unsigned int *depth; - /* - * We cannot call tracepoints inside RCU idle regions which - * means we must not trace H_CEDE. - */ - if (opcode == H_CEDE) - return; - local_irq_save(flags); depth = this_cpu_ptr(&hcall_trace_depth); - if (*depth) + if (WARN_ON_ONCE(*depth)) goto out; (*depth)++; @@ -1864,19 +1862,16 @@ out: local_irq_restore(flags); } -void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf) +notrace void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf) { unsigned long flags; unsigned int *depth; - if (opcode == H_CEDE) - return; - local_irq_save(flags); depth = this_cpu_ptr(&hcall_trace_depth); - if (*depth) + if (*depth) /* Don't warn again on the way out */ goto out; (*depth)++; diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index f5beecdac693..e76b22157099 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -180,7 +180,6 @@ static inline void arch_ftrace_nmi_exit(void) { } BUILD_TRAP_HANDLER(nmi) { - unsigned int cpu = smp_processor_id(); TRAP_HANDLER_DECL; arch_ftrace_nmi_enter(); diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 6e5522aebbbd..431bf7f846c3 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -30,6 +30,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ KBUILD_CFLAGS := -m$(BITS) -O2 KBUILD_CFLAGS += -fno-strict-aliasing -fPIE +KBUILD_CFLAGS += -Wundef KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING cflags-$(CONFIG_X86_32) := -march=i386 cflags-$(CONFIG_X86_64) := -mcmodel=small -mno-red-zone @@ -48,10 +49,10 @@ KBUILD_CFLAGS += $(call as-option,-Wa$(comma)-mrelax-relocations=no) KBUILD_CFLAGS += -include $(srctree)/include/linux/hidden.h KBUILD_CFLAGS += $(CLANG_FLAGS) -# sev-es.c indirectly inludes inat-table.h which is generated during +# sev.c indirectly inludes inat-table.h which is generated during # compilation and stored in $(objtree). Add the directory to the includes so # that the compiler finds it even with out-of-tree builds (make O=/some/path). -CFLAGS_sev-es.o += -I$(objtree)/arch/x86/lib/ +CFLAGS_sev.o += -I$(objtree)/arch/x86/lib/ KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n @@ -93,7 +94,7 @@ ifdef CONFIG_X86_64 vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o vmlinux-objs-y += $(obj)/mem_encrypt.o vmlinux-objs-y += $(obj)/pgtable_64.o - vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev-es.o + vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o endif vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index dde042f64cca..743f13ea25c1 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -172,7 +172,7 @@ void __puthex(unsigned long value) } } -#if CONFIG_X86_NEED_RELOCS +#ifdef CONFIG_X86_NEED_RELOCS static void handle_relocations(void *output, unsigned long output_len, unsigned long virt_addr) { diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index e5612f035498..31139256859f 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -79,7 +79,7 @@ struct mem_vector { u64 size; }; -#if CONFIG_RANDOMIZE_BASE +#ifdef CONFIG_RANDOMIZE_BASE /* kaslr.c */ void choose_random_location(unsigned long input, unsigned long input_size, diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev.c index 82041bd380e5..670e998fe930 100644 --- a/arch/x86/boot/compressed/sev-es.c +++ b/arch/x86/boot/compressed/sev.c @@ -13,7 +13,7 @@ #include "misc.h" #include <asm/pgtable_types.h> -#include <asm/sev-es.h> +#include <asm/sev.h> #include <asm/trapnr.h> #include <asm/trap_pf.h> #include <asm/msr-index.h> @@ -117,7 +117,7 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, #include "../../lib/insn.c" /* Include code for early handlers */ -#include "../../kernel/sev-es-shared.c" +#include "../../kernel/sev-shared.c" static bool early_setup_sev_es(void) { diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 742d89a00721..211ba3375ee9 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -537,9 +537,9 @@ /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a #define MSR_K8_TOP_MEM2 0xc001001d -#define MSR_K8_SYSCFG 0xc0010010 -#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23 -#define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT) +#define MSR_AMD64_SYSCFG 0xc0010010 +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23 +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT) #define MSR_K8_INT_PENDING_MSG 0xc0010055 /* C1E active bits in int pending message */ #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 154321d29050..556b2b17c3e2 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -787,8 +787,10 @@ DECLARE_PER_CPU(u64, msr_misc_features_shadow); #ifdef CONFIG_CPU_SUP_AMD extern u32 amd_get_nodes_per_socket(void); +extern u32 amd_get_highest_perf(void); #else static inline u32 amd_get_nodes_per_socket(void) { return 0; } +static inline u32 amd_get_highest_perf(void) { return 0; } #endif static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves) diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h new file mode 100644 index 000000000000..629c3df243f0 --- /dev/null +++ b/arch/x86/include/asm/sev-common.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AMD SEV header common between the guest and the hypervisor. + * + * Author: Brijesh Singh <brijesh.singh@amd.com> + */ + +#ifndef __ASM_X86_SEV_COMMON_H +#define __ASM_X86_SEV_COMMON_H + +#define GHCB_MSR_INFO_POS 0 +#define GHCB_MSR_INFO_MASK (BIT_ULL(12) - 1) + +#define GHCB_MSR_SEV_INFO_RESP 0x001 +#define GHCB_MSR_SEV_INFO_REQ 0x002 +#define GHCB_MSR_VER_MAX_POS 48 +#define GHCB_MSR_VER_MAX_MASK 0xffff +#define GHCB_MSR_VER_MIN_POS 32 +#define GHCB_MSR_VER_MIN_MASK 0xffff +#define GHCB_MSR_CBIT_POS 24 +#define GHCB_MSR_CBIT_MASK 0xff +#define GHCB_MSR_SEV_INFO(_max, _min, _cbit) \ + ((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) | \ + (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) | \ + (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) | \ + GHCB_MSR_SEV_INFO_RESP) +#define GHCB_MSR_INFO(v) ((v) & 0xfffUL) +#define GHCB_MSR_PROTO_MAX(v) (((v) >> GHCB_MSR_VER_MAX_POS) & GHCB_MSR_VER_MAX_MASK) +#define GHCB_MSR_PROTO_MIN(v) (((v) >> GHCB_MSR_VER_MIN_POS) & GHCB_MSR_VER_MIN_MASK) + +#define GHCB_MSR_CPUID_REQ 0x004 +#define GHCB_MSR_CPUID_RESP 0x005 +#define GHCB_MSR_CPUID_FUNC_POS 32 +#define GHCB_MSR_CPUID_FUNC_MASK 0xffffffff +#define GHCB_MSR_CPUID_VALUE_POS 32 +#define GHCB_MSR_CPUID_VALUE_MASK 0xffffffff +#define GHCB_MSR_CPUID_REG_POS 30 +#define GHCB_MSR_CPUID_REG_MASK 0x3 +#define GHCB_CPUID_REQ_EAX 0 +#define GHCB_CPUID_REQ_EBX 1 +#define GHCB_CPUID_REQ_ECX 2 +#define GHCB_CPUID_REQ_EDX 3 +#define GHCB_CPUID_REQ(fn, reg) \ + (GHCB_MSR_CPUID_REQ | \ + (((unsigned long)reg & GHCB_MSR_CPUID_REG_MASK) << GHCB_MSR_CPUID_REG_POS) | \ + (((unsigned long)fn) << GHCB_MSR_CPUID_FUNC_POS)) + +#define GHCB_MSR_TERM_REQ 0x100 +#define GHCB_MSR_TERM_REASON_SET_POS 12 +#define GHCB_MSR_TERM_REASON_SET_MASK 0xf +#define GHCB_MSR_TERM_REASON_POS 16 +#define GHCB_MSR_TERM_REASON_MASK 0xff +#define GHCB_SEV_TERM_REASON(reason_set, reason_val) \ + (((((u64)reason_set) & GHCB_MSR_TERM_REASON_SET_MASK) << GHCB_MSR_TERM_REASON_SET_POS) | \ + ((((u64)reason_val) & GHCB_MSR_TERM_REASON_MASK) << GHCB_MSR_TERM_REASON_POS)) + +#define GHCB_SEV_ES_REASON_GENERAL_REQUEST 0 +#define GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED 1 + +#define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK) + +#endif diff --git a/arch/x86/include/asm/sev-es.h b/arch/x86/include/asm/sev.h index cf1d957c7091..fa5cd05d3b5b 100644 --- a/arch/x86/include/asm/sev-es.h +++ b/arch/x86/include/asm/sev.h @@ -10,34 +10,12 @@ #include <linux/types.h> #include <asm/insn.h> +#include <asm/sev-common.h> -#define GHCB_SEV_INFO 0x001UL -#define GHCB_SEV_INFO_REQ 0x002UL -#define GHCB_INFO(v) ((v) & 0xfffUL) -#define GHCB_PROTO_MAX(v) (((v) >> 48) & 0xffffUL) -#define GHCB_PROTO_MIN(v) (((v) >> 32) & 0xffffUL) -#define GHCB_PROTO_OUR 0x0001UL -#define GHCB_SEV_CPUID_REQ 0x004UL -#define GHCB_CPUID_REQ_EAX 0 -#define GHCB_CPUID_REQ_EBX 1 -#define GHCB_CPUID_REQ_ECX 2 -#define GHCB_CPUID_REQ_EDX 3 -#define GHCB_CPUID_REQ(fn, reg) (GHCB_SEV_CPUID_REQ | \ - (((unsigned long)reg & 3) << 30) | \ - (((unsigned long)fn) << 32)) +#define GHCB_PROTO_OUR 0x0001UL +#define GHCB_PROTOCOL_MAX 1ULL +#define GHCB_DEFAULT_USAGE 0ULL -#define GHCB_PROTOCOL_MAX 0x0001UL -#define GHCB_DEFAULT_USAGE 0x0000UL - -#define GHCB_SEV_CPUID_RESP 0x005UL -#define GHCB_SEV_TERMINATE 0x100UL -#define GHCB_SEV_TERMINATE_REASON(reason_set, reason_val) \ - (((((u64)reason_set) & 0x7) << 12) | \ - ((((u64)reason_val) & 0xff) << 16)) -#define GHCB_SEV_ES_REASON_GENERAL_REQUEST 0 -#define GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED 1 - -#define GHCB_SEV_GHCB_RESP_CODE(v) ((v) & 0xfff) #define VMGEXIT() { asm volatile("rep; vmmcall\n\r"); } enum es_result { diff --git a/arch/x86/include/asm/vdso/clocksource.h b/arch/x86/include/asm/vdso/clocksource.h index 119ac8612d89..136e5e57cfe1 100644 --- a/arch/x86/include/asm/vdso/clocksource.h +++ b/arch/x86/include/asm/vdso/clocksource.h @@ -7,4 +7,6 @@ VDSO_CLOCKMODE_PVCLOCK, \ VDSO_CLOCKMODE_HVCLOCK +#define HAVE_VDSO_CLOCKMODE_HVCLOCK + #endif /* __ASM_VDSO_CLOCKSOURCE_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0704c2a94272..0f66682ac02a 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -20,7 +20,7 @@ CFLAGS_REMOVE_kvmclock.o = -pg CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_early_printk.o = -pg CFLAGS_REMOVE_head64.o = -pg -CFLAGS_REMOVE_sev-es.o = -pg +CFLAGS_REMOVE_sev.o = -pg endif KASAN_SANITIZE_head$(BITS).o := n @@ -28,7 +28,7 @@ KASAN_SANITIZE_dumpstack.o := n KASAN_SANITIZE_dumpstack_$(BITS).o := n KASAN_SANITIZE_stacktrace.o := n KASAN_SANITIZE_paravirt.o := n -KASAN_SANITIZE_sev-es.o := n +KASAN_SANITIZE_sev.o := n # With some compiler versions the generated code results in boot hangs, caused # by several compilation units. To be safe, disable all instrumentation. @@ -148,7 +148,7 @@ obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o -obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev-es.o +obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev.o ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 2d11384dc9ab..c06ac56eae4d 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -593,8 +593,8 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) */ if (cpu_has(c, X86_FEATURE_SME) || cpu_has(c, X86_FEATURE_SEV)) { /* Check if memory encryption is enabled */ - rdmsrl(MSR_K8_SYSCFG, msr); - if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + rdmsrl(MSR_AMD64_SYSCFG, msr); + if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT)) goto clear_all; /* @@ -1165,3 +1165,19 @@ void set_dr_addr_mask(unsigned long mask, int dr) break; } } + +u32 amd_get_highest_perf(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + + if (c->x86 == 0x17 && ((c->x86_model >= 0x30 && c->x86_model < 0x40) || + (c->x86_model >= 0x70 && c->x86_model < 0x80))) + return 166; + + if (c->x86 == 0x19 && ((c->x86_model >= 0x20 && c->x86_model < 0x30) || + (c->x86_model >= 0x40 && c->x86_model < 0x70))) + return 166; + + return 255; +} +EXPORT_SYMBOL_GPL(amd_get_highest_perf); diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 0c3b372318b7..b5f43049fa5f 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -836,7 +836,7 @@ int __init amd_special_default_mtrr(void) if (boot_cpu_data.x86 < 0xf) return 0; /* In case some hypervisor doesn't pass SYSCFG through: */ - if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) + if (rdmsr_safe(MSR_AMD64_SYSCFG, &l, &h) < 0) return 0; /* * Memory between 4GB and top of mem is forced WB by this magic bit. diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index b90f3f437765..558108296f3c 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -53,13 +53,13 @@ static inline void k8_check_syscfg_dram_mod_en(void) (boot_cpu_data.x86 >= 0x0f))) return; - rdmsr(MSR_K8_SYSCFG, lo, hi); + rdmsr(MSR_AMD64_SYSCFG, lo, hi); if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) { pr_err(FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]" " not cleared by BIOS, clearing this bit\n", smp_processor_id()); lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY; - mtrr_wrmsr(MSR_K8_SYSCFG, lo, hi); + mtrr_wrmsr(MSR_AMD64_SYSCFG, lo, hi); } } diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 18be44163a50..de01903c3735 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -39,7 +39,7 @@ #include <asm/realmode.h> #include <asm/extable.h> #include <asm/trapnr.h> -#include <asm/sev-es.h> +#include <asm/sev.h> /* * Manage page tables very early on. diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index b5cb49e57df8..c94dec6a1834 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -95,7 +95,7 @@ static void get_fam10h_pci_mmconf_base(void) return; /* SYS_CFG */ - address = MSR_K8_SYSCFG; + address = MSR_AMD64_SYSCFG; rdmsrl(address, val); /* TOP_MEM2 is not enabled? */ diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 2ef961cf4cfc..4bce802d25fb 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -33,7 +33,7 @@ #include <asm/reboot.h> #include <asm/cache.h> #include <asm/nospec-branch.h> -#include <asm/sev-es.h> +#include <asm/sev.h> #define CREATE_TRACE_POINTS #include <trace/events/nmi.h> diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-shared.c index 0aa9f13efd57..6ec8b3bfd76e 100644 --- a/arch/x86/kernel/sev-es-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -26,13 +26,13 @@ static bool __init sev_es_check_cpu_features(void) static void __noreturn sev_es_terminate(unsigned int reason) { - u64 val = GHCB_SEV_TERMINATE; + u64 val = GHCB_MSR_TERM_REQ; /* * Tell the hypervisor what went wrong - only reason-set 0 is * currently supported. */ - val |= GHCB_SEV_TERMINATE_REASON(0, reason); + val |= GHCB_SEV_TERM_REASON(0, reason); /* Request Guest Termination from Hypvervisor */ sev_es_wr_ghcb_msr(val); @@ -47,15 +47,15 @@ static bool sev_es_negotiate_protocol(void) u64 val; /* Do the GHCB protocol version negotiation */ - sev_es_wr_ghcb_msr(GHCB_SEV_INFO_REQ); + sev_es_wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ); VMGEXIT(); val = sev_es_rd_ghcb_msr(); - if (GHCB_INFO(val) != GHCB_SEV_INFO) + if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP) return false; - if (GHCB_PROTO_MAX(val) < GHCB_PROTO_OUR || - GHCB_PROTO_MIN(val) > GHCB_PROTO_OUR) + if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTO_OUR || + GHCB_MSR_PROTO_MIN(val) > GHCB_PROTO_OUR) return false; return true; @@ -153,28 +153,28 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EAX)); VMGEXIT(); val = sev_es_rd_ghcb_msr(); - if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) goto fail; regs->ax = val >> 32; sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EBX)); VMGEXIT(); val = sev_es_rd_ghcb_msr(); - if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) goto fail; regs->bx = val >> 32; sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_ECX)); VMGEXIT(); val = sev_es_rd_ghcb_msr(); - if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) goto fail; regs->cx = val >> 32; sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EDX)); VMGEXIT(); val = sev_es_rd_ghcb_msr(); - if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) goto fail; regs->dx = val >> 32; diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev.c index 73873b007838..9578c82832aa 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev.c @@ -22,7 +22,7 @@ #include <asm/cpu_entry_area.h> #include <asm/stacktrace.h> -#include <asm/sev-es.h> +#include <asm/sev.h> #include <asm/insn-eval.h> #include <asm/fpu/internal.h> #include <asm/processor.h> @@ -459,7 +459,7 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt } /* Include code shared with pre-decompression boot stage */ -#include "sev-es-shared.c" +#include "sev-shared.c" void noinstr __sev_es_nmi_complete(void) { diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0ad5214f598a..7770245cc7fa 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -2043,7 +2043,7 @@ static bool amd_set_max_freq_ratio(void) return false; } - highest_perf = perf_caps.highest_perf; + highest_perf = amd_get_highest_perf(); nominal_perf = perf_caps.nominal_perf; if (!highest_perf || !nominal_perf) { diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 12c06ea28f5c..4cee285b0185 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -876,8 +876,8 @@ static __init void svm_adjust_mmio_mask(void) return; /* If memory encryption is not enabled, use existing mask */ - rdmsrl(MSR_K8_SYSCFG, msr); - if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + rdmsrl(MSR_AMD64_SYSCFG, msr); + if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT)) return; enc_bit = cpuid_ebx(0x8000001f) & 0x3f; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 670f0c0ed73b..f89b623bb591 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -20,6 +20,7 @@ #include <linux/bits.h> #include <asm/svm.h> +#include <asm/sev-common.h> #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT) @@ -541,40 +542,9 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu); /* sev.c */ -#define GHCB_VERSION_MAX 1ULL -#define GHCB_VERSION_MIN 1ULL - -#define GHCB_MSR_INFO_POS 0 -#define GHCB_MSR_INFO_MASK (BIT_ULL(12) - 1) - -#define GHCB_MSR_SEV_INFO_RESP 0x001 -#define GHCB_MSR_SEV_INFO_REQ 0x002 -#define GHCB_MSR_VER_MAX_POS 48 -#define GHCB_MSR_VER_MAX_MASK 0xffff -#define GHCB_MSR_VER_MIN_POS 32 -#define GHCB_MSR_VER_MIN_MASK 0xffff -#define GHCB_MSR_CBIT_POS 24 -#define GHCB_MSR_CBIT_MASK 0xff -#define GHCB_MSR_SEV_INFO(_max, _min, _cbit) \ - ((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) | \ - (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) | \ - (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) | \ - GHCB_MSR_SEV_INFO_RESP) - -#define GHCB_MSR_CPUID_REQ 0x004 -#define GHCB_MSR_CPUID_RESP 0x005 -#define GHCB_MSR_CPUID_FUNC_POS 32 -#define GHCB_MSR_CPUID_FUNC_MASK 0xffffffff -#define GHCB_MSR_CPUID_VALUE_POS 32 -#define GHCB_MSR_CPUID_VALUE_MASK 0xffffffff -#define GHCB_MSR_CPUID_REG_POS 30 -#define GHCB_MSR_CPUID_REG_MASK 0x3 - -#define GHCB_MSR_TERM_REQ 0x100 -#define GHCB_MSR_TERM_REASON_SET_POS 12 -#define GHCB_MSR_TERM_REASON_SET_MASK 0xf -#define GHCB_MSR_TERM_REASON_POS 16 -#define GHCB_MSR_TERM_REASON_MASK 0xff +#define GHCB_VERSION_MAX 1ULL +#define GHCB_VERSION_MIN 1ULL + extern unsigned int max_sev_asid; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 76dae88cf524..38c003b60339 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3563,7 +3563,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_LASTBRANCHTOIP: case MSR_IA32_LASTINTFROMIP: case MSR_IA32_LASTINTTOIP: - case MSR_K8_SYSCFG: + case MSR_AMD64_SYSCFG: case MSR_K8_TSEG_ADDR: case MSR_K8_TSEG_MASK: case MSR_VM_HSAVE_PA: diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index b93d6cd08a7f..121921b2927c 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -5,7 +5,7 @@ #include <xen/xen.h> #include <asm/fpu/internal.h> -#include <asm/sev-es.h> +#include <asm/sev.h> #include <asm/traps.h> #include <asm/kdebug.h> diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 04aba7e80a36..a9639f663d25 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -529,7 +529,7 @@ void __init sme_enable(struct boot_params *bp) /* * No SME if Hypervisor bit is set. This check is here to * prevent a guest from trying to enable SME. For running as a - * KVM guest the MSR_K8_SYSCFG will be sufficient, but there + * KVM guest the MSR_AMD64_SYSCFG will be sufficient, but there * might be other hypervisors which emulate that MSR as non-zero * or even pass it through to the guest. * A malicious hypervisor can still trick a guest into this @@ -542,8 +542,8 @@ void __init sme_enable(struct boot_params *bp) return; /* For SME, check the SYSCFG MSR */ - msr = __rdmsr(MSR_K8_SYSCFG); - if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + msr = __rdmsr(MSR_AMD64_SYSCFG); + if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT)) return; } else { /* SEV state cannot be controlled by a command line option */ diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index ae744b6a0785..dd40d3fea74e 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -284,7 +284,7 @@ static int __init early_root_info_init(void) /* need to take out [4G, TOM2) for RAM*/ /* SYS_CFG */ - address = MSR_K8_SYSCFG; + address = MSR_AMD64_SYSCFG; rdmsrl(address, val); /* TOP_MEM2 is enabled? */ if (val & (1<<21)) { diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index df7b5477fc4f..7515e78ef898 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -47,7 +47,7 @@ #include <asm/realmode.h> #include <asm/time.h> #include <asm/pgalloc.h> -#include <asm/sev-es.h> +#include <asm/sev.h> /* * We allocate runtime services regions top-down, starting from -4G, i.e. diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index 1be71ef5e4c4..2e1c1bec0f9e 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -9,7 +9,7 @@ #include <asm/realmode.h> #include <asm/tlbflush.h> #include <asm/crash.h> -#include <asm/sev-es.h> +#include <asm/sev.h> struct real_mode_header *real_mode_header; u32 *trampoline_cr4_features; diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index 84c5d1b33d10..cc8391f86cdb 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -123,9 +123,9 @@ SYM_CODE_START(startup_32) */ btl $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags jnc .Ldone - movl $MSR_K8_SYSCFG, %ecx + movl $MSR_AMD64_SYSCFG, %ecx rdmsr - bts $MSR_K8_SYSCFG_MEM_ENCRYPT_BIT, %eax + bts $MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT, %eax jc .Ldone /* diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 0270cd7ca165..acd1f881273e 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -372,9 +372,38 @@ struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync) return bic->bfqq[is_sync]; } +static void bfq_put_stable_ref(struct bfq_queue *bfqq); + void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync) { + /* + * If bfqq != NULL, then a non-stable queue merge between + * bic->bfqq and bfqq is happening here. This causes troubles + * in the following case: bic->bfqq has also been scheduled + * for a possible stable merge with bic->stable_merge_bfqq, + * and bic->stable_merge_bfqq == bfqq happens to + * hold. Troubles occur because bfqq may then undergo a split, + * thereby becoming eligible for a stable merge. Yet, if + * bic->stable_merge_bfqq points exactly to bfqq, then bfqq + * would be stably merged with itself. To avoid this anomaly, + * we cancel the stable merge if + * bic->stable_merge_bfqq == bfqq. + */ bic->bfqq[is_sync] = bfqq; + + if (bfqq && bic->stable_merge_bfqq == bfqq) { + /* + * Actually, these same instructions are executed also + * in bfq_setup_cooperator, in case of abort or actual + * execution of a stable merge. We could avoid + * repeating these instructions there too, but if we + * did so, we would nest even more complexity in this + * function. + */ + bfq_put_stable_ref(bic->stable_merge_bfqq); + + bic->stable_merge_bfqq = NULL; + } } struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic) @@ -2263,10 +2292,9 @@ static void bfq_remove_request(struct request_queue *q, } -static bool bfq_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio, +static bool bfq_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { - struct request_queue *q = hctx->queue; struct bfq_data *bfqd = q->elevator->elevator_data; struct request *free = NULL; /* @@ -2631,8 +2659,6 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, static bool idling_boosts_thr_without_issues(struct bfq_data *bfqd, struct bfq_queue *bfqq); -static void bfq_put_stable_ref(struct bfq_queue *bfqq); - /* * Attempt to schedule a merge of bfqq with the currently in-service * queue or with a close queue among the scheduled queues. Return diff --git a/block/blk-iocost.c b/block/blk-iocost.c index e0c4baa01857..c2d6bc88d3f1 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -1069,7 +1069,17 @@ static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse, lockdep_assert_held(&ioc->lock); - inuse = clamp_t(u32, inuse, 1, active); + /* + * For an active leaf node, its inuse shouldn't be zero or exceed + * @active. An active internal node's inuse is solely determined by the + * inuse to active ratio of its children regardless of @inuse. + */ + if (list_empty(&iocg->active_list) && iocg->child_active_sum) { + inuse = DIV64_U64_ROUND_UP(active * iocg->child_inuse_sum, + iocg->child_active_sum); + } else { + inuse = clamp_t(u32, inuse, 1, active); + } iocg->last_inuse = iocg->inuse; if (save) @@ -1086,7 +1096,7 @@ static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse, /* update the level sums */ parent->child_active_sum += (s32)(active - child->active); parent->child_inuse_sum += (s32)(inuse - child->inuse); - /* apply the udpates */ + /* apply the updates */ child->active = active; child->inuse = inuse; diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 42a365b1b9c0..996a4b2f73aa 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -358,14 +358,16 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { struct elevator_queue *e = q->elevator; - struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); - struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx); + struct blk_mq_ctx *ctx; + struct blk_mq_hw_ctx *hctx; bool ret = false; enum hctx_type type; if (e && e->type->ops.bio_merge) - return e->type->ops.bio_merge(hctx, bio, nr_segs); + return e->type->ops.bio_merge(q, bio, nr_segs); + ctx = blk_mq_get_ctx(q); + hctx = blk_mq_map_queue(q, bio->bi_opf, ctx); type = hctx->type; if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) || list_empty_careful(&ctx->rq_lists[type])) diff --git a/block/blk-mq.c b/block/blk-mq.c index 466676bc2f0b..c86c01bfecdb 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2232,8 +2232,9 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) /* Bypass scheduler for flush requests */ blk_insert_flush(rq); blk_mq_run_hw_queue(data.hctx, true); - } else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs || - !blk_queue_nonrot(q))) { + } else if (plug && (q->nr_hw_queues == 1 || + blk_mq_is_sbitmap_shared(rq->mq_hctx->flags) || + q->mq_ops->commit_rqs || !blk_queue_nonrot(q))) { /* * Use plugging if we have a ->commit_rqs() hook as well, as * we know the driver uses bd->last in a smart fashion. @@ -3285,10 +3286,12 @@ EXPORT_SYMBOL(blk_mq_init_allocated_queue); /* tags can _not_ be used after returning from blk_mq_exit_queue */ void blk_mq_exit_queue(struct request_queue *q) { - struct blk_mq_tag_set *set = q->tag_set; + struct blk_mq_tag_set *set = q->tag_set; - blk_mq_del_queue_tag_set(q); + /* Checks hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED. */ blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); + /* May clear BLK_MQ_F_TAG_QUEUE_SHARED in hctx->flags. */ + blk_mq_del_queue_tag_set(q); } static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 8969e122f081..81e3279ecd57 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -561,11 +561,12 @@ static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data) } } -static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio, +static bool kyber_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { + struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); + struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx); struct kyber_hctx_data *khd = hctx->sched_data; - struct blk_mq_ctx *ctx = blk_mq_get_ctx(hctx->queue); struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw[hctx->type]]; unsigned int sched_domain = kyber_sched_domain(bio->bi_opf); struct list_head *rq_list = &kcq->rq_list[sched_domain]; diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 04aded71ead2..8eea2cbf2bf4 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -461,10 +461,9 @@ static int dd_request_merge(struct request_queue *q, struct request **rq, return ELEVATOR_NO_MERGE; } -static bool dd_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio, +static bool dd_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { - struct request_queue *q = hctx->queue; struct deadline_data *dd = q->elevator->elevator_data; struct request *free = NULL; bool ret; diff --git a/block/partitions/efi.c b/block/partitions/efi.c index b64bfdd4326c..e2716792ecc1 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c @@ -682,7 +682,7 @@ static void utf16_le_to_7bit(const __le16 *in, unsigned int size, u8 *out) } /** - * efi_partition(struct parsed_partitions *state) + * efi_partition - scan for GPT partitions * @state: disk parsed partitions * * Description: called from check.c, if the disk contains GPT diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 16c0fe8a72a7..d260bc1f3e6e 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -1313,6 +1313,7 @@ int acpi_dev_pm_attach(struct device *dev, bool power_on) {"PNP0C0B", }, /* Generic ACPI fan */ {"INT3404", }, /* Fan */ {"INTC1044", }, /* Fan for Tiger Lake generation */ + {"INTC1048", }, /* Fan for Alder Lake generation */ {} }; struct acpi_device *adev = ACPI_COMPANION(dev); diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index b852cff80287..f973bbe90e5e 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -142,6 +142,7 @@ int acpi_device_sleep_wake(struct acpi_device *dev, int acpi_power_get_inferred_state(struct acpi_device *device, int *state); int acpi_power_on_resources(struct acpi_device *device, int state); int acpi_power_transition(struct acpi_device *device, int state); +void acpi_turn_off_unused_power_resources(void); /* -------------------------------------------------------------------------- Device Power Management diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 958aaac869e8..23d9a09d7060 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -686,6 +686,13 @@ int nfit_spa_type(struct acpi_nfit_system_address *spa) return -1; } +static size_t sizeof_spa(struct acpi_nfit_system_address *spa) +{ + if (spa->flags & ACPI_NFIT_LOCATION_COOKIE_VALID) + return sizeof(*spa); + return sizeof(*spa) - 8; +} + static bool add_spa(struct acpi_nfit_desc *acpi_desc, struct nfit_table_prev *prev, struct acpi_nfit_system_address *spa) @@ -693,22 +700,22 @@ static bool add_spa(struct acpi_nfit_desc *acpi_desc, struct device *dev = acpi_desc->dev; struct nfit_spa *nfit_spa; - if (spa->header.length != sizeof(*spa)) + if (spa->header.length != sizeof_spa(spa)) return false; list_for_each_entry(nfit_spa, &prev->spas, list) { - if (memcmp(nfit_spa->spa, spa, sizeof(*spa)) == 0) { + if (memcmp(nfit_spa->spa, spa, sizeof_spa(spa)) == 0) { list_move_tail(&nfit_spa->list, &acpi_desc->spas); return true; } } - nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa) + sizeof(*spa), + nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa) + sizeof_spa(spa), GFP_KERNEL); if (!nfit_spa) return false; INIT_LIST_HEAD(&nfit_spa->list); - memcpy(nfit_spa->spa, spa, sizeof(*spa)); + memcpy(nfit_spa->spa, spa, sizeof_spa(spa)); list_add_tail(&nfit_spa->list, &acpi_desc->spas); dev_dbg(dev, "spa index: %d type: %s\n", spa->range_index, diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index 32974b575e46..56102eaaa2da 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -995,6 +995,7 @@ void acpi_resume_power_resources(void) mutex_unlock(&power_resource_list_lock); } +#endif void acpi_turn_off_unused_power_resources(void) { @@ -1015,4 +1016,3 @@ void acpi_turn_off_unused_power_resources(void) mutex_unlock(&power_resource_list_lock); } -#endif diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index a22778e880c2..453eff8ec8c3 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -700,6 +700,7 @@ int acpi_device_add(struct acpi_device *device, result = acpi_device_set_name(device, acpi_device_bus_id); if (result) { + kfree_const(acpi_device_bus_id->bus_id); kfree(acpi_device_bus_id); goto err_unlock; } @@ -2359,6 +2360,8 @@ int __init acpi_scan_init(void) } } + acpi_turn_off_unused_power_resources(); + acpi_scan_initialized = true; out: diff --git a/drivers/acpi/sleep.h b/drivers/acpi/sleep.h index 1856f76ac83f..7fe41ee489d6 100644 --- a/drivers/acpi/sleep.h +++ b/drivers/acpi/sleep.h @@ -8,7 +8,6 @@ extern struct list_head acpi_wakeup_device_list; extern struct mutex acpi_device_lock; extern void acpi_resume_power_resources(void); -extern void acpi_turn_off_unused_power_resources(void); static inline acpi_status acpi_set_waking_vector(u32 wakeup_address) { diff --git a/drivers/base/core.c b/drivers/base/core.c index 4a8bf8cda52b..628e33939aca 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -150,7 +150,7 @@ void fwnode_links_purge(struct fwnode_handle *fwnode) fwnode_links_purge_consumers(fwnode); } -static void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode) +void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode) { struct fwnode_handle *child; @@ -164,6 +164,7 @@ static void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode) fwnode_for_each_available_child_node(fwnode, child) fw_devlink_purge_absent_suppliers(child); } +EXPORT_SYMBOL_GPL(fw_devlink_purge_absent_suppliers); #ifdef CONFIG_SRCU static DEFINE_MUTEX(device_links_lock); diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 1fc1a992f90c..b570848d23e0 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1637,6 +1637,7 @@ void pm_runtime_init(struct device *dev) dev->power.request_pending = false; dev->power.request = RPM_REQ_NONE; dev->power.deferred_resume = false; + dev->power.needs_force_resume = 0; INIT_WORK(&dev->power.work, pm_runtime_work); dev->power.timer_expires = 0; @@ -1804,10 +1805,12 @@ int pm_runtime_force_suspend(struct device *dev) * its parent, but set its status to RPM_SUSPENDED anyway in case this * function will be called again for it in the meantime. */ - if (pm_runtime_need_not_resume(dev)) + if (pm_runtime_need_not_resume(dev)) { pm_runtime_set_suspended(dev); - else + } else { __update_runtime_status(dev, RPM_SUSPENDED); + dev->power.needs_force_resume = 1; + } return 0; @@ -1834,7 +1837,7 @@ int pm_runtime_force_resume(struct device *dev) int (*callback)(struct device *); int ret = 0; - if (!pm_runtime_status_suspended(dev) || pm_runtime_need_not_resume(dev)) + if (!pm_runtime_status_suspended(dev) || !dev->power.needs_force_resume) goto out; /* @@ -1853,6 +1856,7 @@ int pm_runtime_force_resume(struct device *dev) pm_runtime_mark_last_busy(dev); out: + dev->power.needs_force_resume = 0; pm_runtime_enable(dev); return ret; } diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 4ff71b579cfc..45d2c28c8fc8 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1980,7 +1980,8 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd) * config ref and try to destroy the workqueue from inside the work * queue. */ - flush_workqueue(nbd->recv_workq); + if (nbd->recv_workq) + flush_workqueue(nbd->recv_workq); if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF, &nbd->config->runtime_flags)) nbd_config_put(nbd); @@ -2014,12 +2015,11 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } mutex_unlock(&nbd_index_mutex); - if (!refcount_inc_not_zero(&nbd->config_refs)) { - nbd_put(nbd); - return 0; - } + if (!refcount_inc_not_zero(&nbd->config_refs)) + goto put_nbd; nbd_disconnect_and_put(nbd); nbd_config_put(nbd); +put_nbd: nbd_put(nbd); return 0; } diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c index eff1f12d981a..c84d23951219 100644 --- a/drivers/char/tpm/tpm2-cmd.c +++ b/drivers/char/tpm/tpm2-cmd.c @@ -656,6 +656,7 @@ int tpm2_get_cc_attrs_tbl(struct tpm_chip *chip) if (nr_commands != be32_to_cpup((__be32 *)&buf.data[TPM_HEADER_SIZE + 5])) { + rc = -EFAULT; tpm_buf_destroy(&buf); goto out; } diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c index a2e0395cbe61..55b9d3965ae1 100644 --- a/drivers/char/tpm/tpm_tis_core.c +++ b/drivers/char/tpm/tpm_tis_core.c @@ -709,16 +709,14 @@ static int tpm_tis_gen_interrupt(struct tpm_chip *chip) cap_t cap; int ret; - /* TPM 2.0 */ - if (chip->flags & TPM_CHIP_FLAG_TPM2) - return tpm2_get_tpm_pt(chip, 0x100, &cap2, desc); - - /* TPM 1.2 */ ret = request_locality(chip, 0); if (ret < 0) return ret; - ret = tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc, 0); + if (chip->flags & TPM_CHIP_FLAG_TPM2) + ret = tpm2_get_tpm_pt(chip, 0x100, &cap2, desc); + else + ret = tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc, 0); release_locality(chip, 0); @@ -1127,12 +1125,20 @@ int tpm_tis_resume(struct device *dev) if (ret) return ret; - /* TPM 1.2 requires self-test on resume. This function actually returns + /* + * TPM 1.2 requires self-test on resume. This function actually returns * an error code but for unknown reason it isn't handled. */ - if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) + if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) { + ret = request_locality(chip, 0); + if (ret < 0) + return ret; + tpm1_do_selftest(chip); + release_locality(chip, 0); + } + return 0; } EXPORT_SYMBOL_GPL(tpm_tis_resume); diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index e2ec1b745243..65508eb89ec9 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -4540,6 +4540,9 @@ int of_clk_add_provider(struct device_node *np, struct of_clk_provider *cp; int ret; + if (!np) + return 0; + cp = kzalloc(sizeof(*cp), GFP_KERNEL); if (!cp) return -ENOMEM; @@ -4579,6 +4582,9 @@ int of_clk_add_hw_provider(struct device_node *np, struct of_clk_provider *cp; int ret; + if (!np) + return 0; + cp = kzalloc(sizeof(*cp), GFP_KERNEL); if (!cp) return -ENOMEM; @@ -4676,6 +4682,9 @@ void of_clk_del_provider(struct device_node *np) { struct of_clk_provider *cp; + if (!np) + return; + mutex_lock(&of_clk_mutex); list_for_each_entry(cp, &of_clk_providers, link) { if (cp->node == np) { diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index 977fd05ac35f..d6ece7bbce89 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -419,7 +419,7 @@ static void resume_hv_clock_tsc(struct clocksource *arg) hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr); } -#ifdef VDSO_CLOCKMODE_HVCLOCK +#ifdef HAVE_VDSO_CLOCKMODE_HVCLOCK static int hv_cs_enable(struct clocksource *cs) { vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK); @@ -435,7 +435,7 @@ static struct clocksource hyperv_cs_tsc = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, .suspend= suspend_hv_clock_tsc, .resume = resume_hv_clock_tsc, -#ifdef VDSO_CLOCKMODE_HVCLOCK +#ifdef HAVE_VDSO_CLOCKMODE_HVCLOCK .enable = hv_cs_enable, .vdso_clock_mode = VDSO_CLOCKMODE_HVCLOCK, #else diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index d1bbc16fba4b..7e7450453714 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -646,7 +646,11 @@ static u64 get_max_boost_ratio(unsigned int cpu) return 0; } - highest_perf = perf_caps.highest_perf; + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + highest_perf = amd_get_highest_perf(); + else + highest_perf = perf_caps.highest_perf; + nominal_perf = perf_caps.nominal_perf; if (!highest_perf || !nominal_perf) { diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f0401064d7aa..0e69dffd5a76 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -3033,6 +3033,14 @@ static const struct x86_cpu_id hwp_support_ids[] __initconst = { {} }; +static bool intel_pstate_hwp_is_enabled(void) +{ + u64 value; + + rdmsrl(MSR_PM_ENABLE, value); + return !!(value & 0x1); +} + static int __init intel_pstate_init(void) { const struct x86_cpu_id *id; @@ -3051,8 +3059,12 @@ static int __init intel_pstate_init(void) * Avoid enabling HWP for processors without EPP support, * because that means incomplete HWP implementation which is a * corner case and supporting it is generally problematic. + * + * If HWP is enabled already, though, there is no choice but to + * deal with it. */ - if (!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) { + if ((!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) || + intel_pstate_hwp_is_enabled()) { hwp_active++; hwp_mode_bdw = id->driver_data; intel_pstate.attr = hwp_cpufreq_attrs; diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 9fa4dfc6ebee..f0d8f60acee1 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3083,7 +3083,7 @@ static void read_mc_regs(struct amd64_pvt *pvt) edac_dbg(0, " TOP_MEM: 0x%016llx\n", pvt->top_mem); /* Check first whether TOP_MEM2 is enabled: */ - rdmsrl(MSR_K8_SYSCFG, msr_val); + rdmsrl(MSR_AMD64_SYSCFG, msr_val); if (msr_val & BIT(21)) { rdmsrl(MSR_K8_TOP_MEM2, pvt->top_mem2); edac_dbg(0, " TOP_MEM2: 0x%016llx\n", pvt->top_mem2); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index dc3a69296321..264176a01e16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1006,6 +1006,7 @@ struct amdgpu_device { struct amdgpu_df df; struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM]; + uint32_t harvest_ip_mask; int num_ip_blocks; struct mutex mn_lock; DECLARE_HASHTABLE(mn_hash, 7); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 7d3b54615147..8b2a37bf2adf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1683,6 +1683,19 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev, if (!ip_block_version) return -EINVAL; + switch (ip_block_version->type) { + case AMD_IP_BLOCK_TYPE_VCN: + if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK) + return 0; + break; + case AMD_IP_BLOCK_TYPE_JPEG: + if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK) + return 0; + break; + default: + break; + } + DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks, ip_block_version->funcs->name); @@ -3111,7 +3124,6 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) return amdgpu_device_asic_has_dc_support(adev->asic_type); } - static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) { struct amdgpu_device *adev = @@ -3276,6 +3288,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->vm_manager.vm_pte_funcs = NULL; adev->vm_manager.vm_pte_num_scheds = 0; adev->gmc.gmc_funcs = NULL; + adev->harvest_ip_mask = 0x0; adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index b2dbcb4df020..e1b6f5891759 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -373,6 +373,34 @@ int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, return -EINVAL; } +void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) +{ + struct binary_header *bhdr; + struct harvest_table *harvest_info; + int i; + + bhdr = (struct binary_header *)adev->mman.discovery_bin; + harvest_info = (struct harvest_table *)(adev->mman.discovery_bin + + le16_to_cpu(bhdr->table_list[HARVEST_INFO].offset)); + + for (i = 0; i < 32; i++) { + if (le32_to_cpu(harvest_info->list[i].hw_id) == 0) + break; + + switch (le32_to_cpu(harvest_info->list[i].hw_id)) { + case VCN_HWID: + adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK; + adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK; + break; + case DMU_HWID: + adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK; + break; + default: + break; + } + } +} + int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) { struct binary_header *bhdr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h index 8f6183801cb3..1b1ae21b1037 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h @@ -29,6 +29,7 @@ void amdgpu_discovery_fini(struct amdgpu_device *adev); int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev); +void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev); int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int *major, int *minor, int *revision); int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index d54af7f8801b..d290ca0b06da 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -623,6 +623,16 @@ static const struct amdgpu_ip_block_version nv_common_ip_block = .funcs = &nv_common_ip_funcs, }; +static bool nv_is_headless_sku(struct pci_dev *pdev) +{ + if ((pdev->device == 0x731E && + (pdev->revision == 0xC6 || pdev->revision == 0xC7)) || + (pdev->device == 0x7340 && pdev->revision == 0xC9) || + (pdev->device == 0x7360 && pdev->revision == 0xC7)) + return true; + return false; +} + static int nv_reg_base_init(struct amdgpu_device *adev) { int r; @@ -635,6 +645,12 @@ static int nv_reg_base_init(struct amdgpu_device *adev) goto legacy_init; } + amdgpu_discovery_harvest_ip(adev); + if (nv_is_headless_sku(adev->pdev)) { + adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK; + adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK; + } + return 0; } @@ -671,16 +687,6 @@ void nv_set_virt_ops(struct amdgpu_device *adev) adev->virt.ops = &xgpu_nv_virt_ops; } -static bool nv_is_headless_sku(struct pci_dev *pdev) -{ - if ((pdev->device == 0x731E && - (pdev->revision == 0xC6 || pdev->revision == 0xC7)) || - (pdev->device == 0x7340 && pdev->revision == 0xC9) || - (pdev->device == 0x7360 && pdev->revision == 0xC7)) - return true; - return false; -} - int nv_set_ip_blocks(struct amdgpu_device *adev) { int r; @@ -728,8 +734,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); - if (!nv_is_headless_sku(adev->pdev)) - amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); + amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block); if (adev->enable_mes) amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block); @@ -752,8 +757,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); - if (!nv_is_headless_sku(adev->pdev)) - amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); + amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); if (!amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block); break; @@ -777,7 +781,6 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); if (!amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); - if (adev->enable_mes) amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block); break; @@ -1149,6 +1152,11 @@ static int nv_common_early_init(void *handle) return -EINVAL; } + if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK) + adev->pg_flags &= ~(AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_JPEG); + if (amdgpu_sriov_vf(adev)) { amdgpu_virt_init_setting(adev); xgpu_nv_mailbox_set_irq_funcs(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index d80e12b80c7e..8e1b9a40839f 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1401,7 +1401,8 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_MC_MGCG | AMD_CG_SUPPORT_MC_LS | AMD_CG_SUPPORT_SDMA_MGCG | - AMD_CG_SUPPORT_SDMA_LS; + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_VCN_MGCG; adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_MMHUB | diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 51a773a37a35..0c1beefa3e49 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -1119,10 +1119,10 @@ static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev) UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_LMI_STATUS, tmp, tmp); - /* put VCPU into reset */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), - UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK, - ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + /* stall UMC channel */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), + UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK | UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; @@ -1141,6 +1141,11 @@ static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev) UVD_SOFT_RESET__LMI_SOFT_RESET_MASK, ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK); + /* put VCPU into reset */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK, + ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + WREG32_SOC15(UVD, 0, mmUVD_STATUS, 0); vcn_v1_0_enable_clock_gating(adev); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 616f5b1ea3a8..666796a0067c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -650,6 +650,7 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, struct /* File created at /sys/class/drm/card0/device/hdcp_srm*/ hdcp_work[0].attr = data_attr; + sysfs_bin_attr_init(&hdcp_work[0].attr); if (sysfs_create_bin_file(&adev->dev->kobj, &hdcp_work[0].attr)) DRM_WARN("Failed to create device file hdcp_srm"); diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 43ed6291b2b8..9ab706cd07ff 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -216,6 +216,12 @@ enum PP_FEATURE_MASK { PP_GFX_DCS_MASK = 0x80000, }; +enum amd_harvest_ip_mask { + AMD_HARVEST_IP_VCN_MASK = 0x1, + AMD_HARVEST_IP_JPEG_MASK = 0x2, + AMD_HARVEST_IP_DMU_MASK = 0x4, +}; + enum DC_FEATURE_MASK { DC_FBC_MASK = 0x1, DC_MULTI_MON_PP_MCLK_SWITCH_MASK = 0x2, diff --git a/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c b/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c index 26a5321e621b..15c0b8af376f 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c @@ -4817,70 +4817,70 @@ static int si_populate_smc_initial_state(struct amdgpu_device *adev, u32 reg; int ret; - table->initialState.levels[0].mclk.vDLL_CNTL = + table->initialState.level.mclk.vDLL_CNTL = cpu_to_be32(si_pi->clock_registers.dll_cntl); - table->initialState.levels[0].mclk.vMCLK_PWRMGT_CNTL = + table->initialState.level.mclk.vMCLK_PWRMGT_CNTL = cpu_to_be32(si_pi->clock_registers.mclk_pwrmgt_cntl); - table->initialState.levels[0].mclk.vMPLL_AD_FUNC_CNTL = + table->initialState.level.mclk.vMPLL_AD_FUNC_CNTL = cpu_to_be32(si_pi->clock_registers.mpll_ad_func_cntl); - table->initialState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL = + table->initialState.level.mclk.vMPLL_DQ_FUNC_CNTL = cpu_to_be32(si_pi->clock_registers.mpll_dq_func_cntl); - table->initialState.levels[0].mclk.vMPLL_FUNC_CNTL = + table->initialState.level.mclk.vMPLL_FUNC_CNTL = cpu_to_be32(si_pi->clock_registers.mpll_func_cntl); - table->initialState.levels[0].mclk.vMPLL_FUNC_CNTL_1 = + table->initialState.level.mclk.vMPLL_FUNC_CNTL_1 = cpu_to_be32(si_pi->clock_registers.mpll_func_cntl_1); - table->initialState.levels[0].mclk.vMPLL_FUNC_CNTL_2 = + table->initialState.level.mclk.vMPLL_FUNC_CNTL_2 = cpu_to_be32(si_pi->clock_registers.mpll_func_cntl_2); - table->initialState.levels[0].mclk.vMPLL_SS = + table->initialState.level.mclk.vMPLL_SS = cpu_to_be32(si_pi->clock_registers.mpll_ss1); - table->initialState.levels[0].mclk.vMPLL_SS2 = + table->initialState.level.mclk.vMPLL_SS2 = cpu_to_be32(si_pi->clock_registers.mpll_ss2); - table->initialState.levels[0].mclk.mclk_value = + table->initialState.level.mclk.mclk_value = cpu_to_be32(initial_state->performance_levels[0].mclk); - table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL = + table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL = cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl); - table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_2 = + table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_2 = cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl_2); - table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_3 = + table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_3 = cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl_3); - table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_4 = + table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_4 = cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl_4); - table->initialState.levels[0].sclk.vCG_SPLL_SPREAD_SPECTRUM = + table->initialState.level.sclk.vCG_SPLL_SPREAD_SPECTRUM = cpu_to_be32(si_pi->clock_registers.cg_spll_spread_spectrum); - table->initialState.levels[0].sclk.vCG_SPLL_SPREAD_SPECTRUM_2 = + table->initialState.level.sclk.vCG_SPLL_SPREAD_SPECTRUM_2 = cpu_to_be32(si_pi->clock_registers.cg_spll_spread_spectrum_2); - table->initialState.levels[0].sclk.sclk_value = + table->initialState.level.sclk.sclk_value = cpu_to_be32(initial_state->performance_levels[0].sclk); - table->initialState.levels[0].arbRefreshState = + table->initialState.level.arbRefreshState = SISLANDS_INITIAL_STATE_ARB_INDEX; - table->initialState.levels[0].ACIndex = 0; + table->initialState.level.ACIndex = 0; ret = si_populate_voltage_value(adev, &eg_pi->vddc_voltage_table, initial_state->performance_levels[0].vddc, - &table->initialState.levels[0].vddc); + &table->initialState.level.vddc); if (!ret) { u16 std_vddc; ret = si_get_std_voltage_value(adev, - &table->initialState.levels[0].vddc, + &table->initialState.level.vddc, &std_vddc); if (!ret) si_populate_std_voltage_value(adev, std_vddc, - table->initialState.levels[0].vddc.index, - &table->initialState.levels[0].std_vddc); + table->initialState.level.vddc.index, + &table->initialState.level.std_vddc); } if (eg_pi->vddci_control) si_populate_voltage_value(adev, &eg_pi->vddci_voltage_table, initial_state->performance_levels[0].vddci, - &table->initialState.levels[0].vddci); + &table->initialState.level.vddci); if (si_pi->vddc_phase_shed_control) si_populate_phase_shedding_value(adev, @@ -4888,41 +4888,41 @@ static int si_populate_smc_initial_state(struct amdgpu_device *adev, initial_state->performance_levels[0].vddc, initial_state->performance_levels[0].sclk, initial_state->performance_levels[0].mclk, - &table->initialState.levels[0].vddc); + &table->initialState.level.vddc); - si_populate_initial_mvdd_value(adev, &table->initialState.levels[0].mvdd); + si_populate_initial_mvdd_value(adev, &table->initialState.level.mvdd); reg = CG_R(0xffff) | CG_L(0); - table->initialState.levels[0].aT = cpu_to_be32(reg); - table->initialState.levels[0].bSP = cpu_to_be32(pi->dsp); - table->initialState.levels[0].gen2PCIE = (u8)si_pi->boot_pcie_gen; + table->initialState.level.aT = cpu_to_be32(reg); + table->initialState.level.bSP = cpu_to_be32(pi->dsp); + table->initialState.level.gen2PCIE = (u8)si_pi->boot_pcie_gen; if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) { - table->initialState.levels[0].strobeMode = + table->initialState.level.strobeMode = si_get_strobe_mode_settings(adev, initial_state->performance_levels[0].mclk); if (initial_state->performance_levels[0].mclk > pi->mclk_edc_enable_threshold) - table->initialState.levels[0].mcFlags = SISLANDS_SMC_MC_EDC_RD_FLAG | SISLANDS_SMC_MC_EDC_WR_FLAG; + table->initialState.level.mcFlags = SISLANDS_SMC_MC_EDC_RD_FLAG | SISLANDS_SMC_MC_EDC_WR_FLAG; else - table->initialState.levels[0].mcFlags = 0; + table->initialState.level.mcFlags = 0; } table->initialState.levelCount = 1; table->initialState.flags |= PPSMC_SWSTATE_FLAG_DC; - table->initialState.levels[0].dpm2.MaxPS = 0; - table->initialState.levels[0].dpm2.NearTDPDec = 0; - table->initialState.levels[0].dpm2.AboveSafeInc = 0; - table->initialState.levels[0].dpm2.BelowSafeInc = 0; - table->initialState.levels[0].dpm2.PwrEfficiencyRatio = 0; + table->initialState.level.dpm2.MaxPS = 0; + table->initialState.level.dpm2.NearTDPDec = 0; + table->initialState.level.dpm2.AboveSafeInc = 0; + table->initialState.level.dpm2.BelowSafeInc = 0; + table->initialState.level.dpm2.PwrEfficiencyRatio = 0; reg = MIN_POWER_MASK | MAX_POWER_MASK; - table->initialState.levels[0].SQPowerThrottle = cpu_to_be32(reg); + table->initialState.level.SQPowerThrottle = cpu_to_be32(reg); reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK; - table->initialState.levels[0].SQPowerThrottle_2 = cpu_to_be32(reg); + table->initialState.level.SQPowerThrottle_2 = cpu_to_be32(reg); return 0; } @@ -4953,18 +4953,18 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev, if (pi->acpi_vddc) { ret = si_populate_voltage_value(adev, &eg_pi->vddc_voltage_table, - pi->acpi_vddc, &table->ACPIState.levels[0].vddc); + pi->acpi_vddc, &table->ACPIState.level.vddc); if (!ret) { u16 std_vddc; ret = si_get_std_voltage_value(adev, - &table->ACPIState.levels[0].vddc, &std_vddc); + &table->ACPIState.level.vddc, &std_vddc); if (!ret) si_populate_std_voltage_value(adev, std_vddc, - table->ACPIState.levels[0].vddc.index, - &table->ACPIState.levels[0].std_vddc); + table->ACPIState.level.vddc.index, + &table->ACPIState.level.std_vddc); } - table->ACPIState.levels[0].gen2PCIE = si_pi->acpi_pcie_gen; + table->ACPIState.level.gen2PCIE = si_pi->acpi_pcie_gen; if (si_pi->vddc_phase_shed_control) { si_populate_phase_shedding_value(adev, @@ -4972,23 +4972,23 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev, pi->acpi_vddc, 0, 0, - &table->ACPIState.levels[0].vddc); + &table->ACPIState.level.vddc); } } else { ret = si_populate_voltage_value(adev, &eg_pi->vddc_voltage_table, - pi->min_vddc_in_table, &table->ACPIState.levels[0].vddc); + pi->min_vddc_in_table, &table->ACPIState.level.vddc); if (!ret) { u16 std_vddc; ret = si_get_std_voltage_value(adev, - &table->ACPIState.levels[0].vddc, &std_vddc); + &table->ACPIState.level.vddc, &std_vddc); if (!ret) si_populate_std_voltage_value(adev, std_vddc, - table->ACPIState.levels[0].vddc.index, - &table->ACPIState.levels[0].std_vddc); + table->ACPIState.level.vddc.index, + &table->ACPIState.level.std_vddc); } - table->ACPIState.levels[0].gen2PCIE = + table->ACPIState.level.gen2PCIE = (u8)amdgpu_get_pcie_gen_support(adev, si_pi->sys_pcie_mask, si_pi->boot_pcie_gen, @@ -5000,14 +5000,14 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev, pi->min_vddc_in_table, 0, 0, - &table->ACPIState.levels[0].vddc); + &table->ACPIState.level.vddc); } if (pi->acpi_vddc) { if (eg_pi->acpi_vddci) si_populate_voltage_value(adev, &eg_pi->vddci_voltage_table, eg_pi->acpi_vddci, - &table->ACPIState.levels[0].vddci); + &table->ACPIState.level.vddci); } mclk_pwrmgt_cntl |= MRDCK0_RESET | MRDCK1_RESET; @@ -5018,59 +5018,59 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev, spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK; spll_func_cntl_2 |= SCLK_MUX_SEL(4); - table->ACPIState.levels[0].mclk.vDLL_CNTL = + table->ACPIState.level.mclk.vDLL_CNTL = cpu_to_be32(dll_cntl); - table->ACPIState.levels[0].mclk.vMCLK_PWRMGT_CNTL = + table->ACPIState.level.mclk.vMCLK_PWRMGT_CNTL = cpu_to_be32(mclk_pwrmgt_cntl); - table->ACPIState.levels[0].mclk.vMPLL_AD_FUNC_CNTL = + table->ACPIState.level.mclk.vMPLL_AD_FUNC_CNTL = cpu_to_be32(mpll_ad_func_cntl); - table->ACPIState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL = + table->ACPIState.level.mclk.vMPLL_DQ_FUNC_CNTL = cpu_to_be32(mpll_dq_func_cntl); - table->ACPIState.levels[0].mclk.vMPLL_FUNC_CNTL = + table->ACPIState.level.mclk.vMPLL_FUNC_CNTL = cpu_to_be32(mpll_func_cntl); - table->ACPIState.levels[0].mclk.vMPLL_FUNC_CNTL_1 = + table->ACPIState.level.mclk.vMPLL_FUNC_CNTL_1 = cpu_to_be32(mpll_func_cntl_1); - table->ACPIState.levels[0].mclk.vMPLL_FUNC_CNTL_2 = + table->ACPIState.level.mclk.vMPLL_FUNC_CNTL_2 = cpu_to_be32(mpll_func_cntl_2); - table->ACPIState.levels[0].mclk.vMPLL_SS = + table->ACPIState.level.mclk.vMPLL_SS = cpu_to_be32(si_pi->clock_registers.mpll_ss1); - table->ACPIState.levels[0].mclk.vMPLL_SS2 = + table->ACPIState.level.mclk.vMPLL_SS2 = cpu_to_be32(si_pi->clock_registers.mpll_ss2); - table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL = + table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL = cpu_to_be32(spll_func_cntl); - table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_2 = + table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_2 = cpu_to_be32(spll_func_cntl_2); - table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_3 = + table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_3 = cpu_to_be32(spll_func_cntl_3); - table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_4 = + table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_4 = cpu_to_be32(spll_func_cntl_4); - table->ACPIState.levels[0].mclk.mclk_value = 0; - table->ACPIState.levels[0].sclk.sclk_value = 0; + table->ACPIState.level.mclk.mclk_value = 0; + table->ACPIState.level.sclk.sclk_value = 0; - si_populate_mvdd_value(adev, 0, &table->ACPIState.levels[0].mvdd); + si_populate_mvdd_value(adev, 0, &table->ACPIState.level.mvdd); if (eg_pi->dynamic_ac_timing) - table->ACPIState.levels[0].ACIndex = 0; + table->ACPIState.level.ACIndex = 0; - table->ACPIState.levels[0].dpm2.MaxPS = 0; - table->ACPIState.levels[0].dpm2.NearTDPDec = 0; - table->ACPIState.levels[0].dpm2.AboveSafeInc = 0; - table->ACPIState.levels[0].dpm2.BelowSafeInc = 0; - table->ACPIState.levels[0].dpm2.PwrEfficiencyRatio = 0; + table->ACPIState.level.dpm2.MaxPS = 0; + table->ACPIState.level.dpm2.NearTDPDec = 0; + table->ACPIState.level.dpm2.AboveSafeInc = 0; + table->ACPIState.level.dpm2.BelowSafeInc = 0; + table->ACPIState.level.dpm2.PwrEfficiencyRatio = 0; reg = MIN_POWER_MASK | MAX_POWER_MASK; - table->ACPIState.levels[0].SQPowerThrottle = cpu_to_be32(reg); + table->ACPIState.level.SQPowerThrottle = cpu_to_be32(reg); reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK; - table->ACPIState.levels[0].SQPowerThrottle_2 = cpu_to_be32(reg); + table->ACPIState.level.SQPowerThrottle_2 = cpu_to_be32(reg); return 0; } static int si_populate_ulv_state(struct amdgpu_device *adev, - SISLANDS_SMC_SWSTATE *state) + struct SISLANDS_SMC_SWSTATE_SINGLE *state) { struct evergreen_power_info *eg_pi = evergreen_get_pi(adev); struct si_power_info *si_pi = si_get_pi(adev); @@ -5079,19 +5079,19 @@ static int si_populate_ulv_state(struct amdgpu_device *adev, int ret; ret = si_convert_power_level_to_smc(adev, &ulv->pl, - &state->levels[0]); + &state->level); if (!ret) { if (eg_pi->sclk_deep_sleep) { if (sclk_in_sr <= SCLK_MIN_DEEPSLEEP_FREQ) - state->levels[0].stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_BYPASS; + state->level.stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_BYPASS; else - state->levels[0].stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_THROTTLE; + state->level.stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_THROTTLE; } if (ulv->one_pcie_lane_in_ulv) state->flags |= PPSMC_SWSTATE_FLAG_PCIE_X1; - state->levels[0].arbRefreshState = (u8)(SISLANDS_ULV_STATE_ARB_INDEX); - state->levels[0].ACIndex = 1; - state->levels[0].std_vddc = state->levels[0].vddc; + state->level.arbRefreshState = (u8)(SISLANDS_ULV_STATE_ARB_INDEX); + state->level.ACIndex = 1; + state->level.std_vddc = state->level.vddc; state->levelCount = 1; state->flags |= PPSMC_SWSTATE_FLAG_DC; @@ -5190,7 +5190,9 @@ static int si_init_smc_table(struct amdgpu_device *adev) if (ret) return ret; - table->driverState = table->initialState; + table->driverState.flags = table->initialState.flags; + table->driverState.levelCount = table->initialState.levelCount; + table->driverState.levels[0] = table->initialState.level; ret = si_do_program_memory_timing_parameters(adev, amdgpu_boot_state, SISLANDS_INITIAL_STATE_ARB_INDEX); @@ -5737,8 +5739,8 @@ static int si_upload_ulv_state(struct amdgpu_device *adev) if (ulv->supported && ulv->pl.vddc) { u32 address = si_pi->state_table_start + offsetof(SISLANDS_SMC_STATETABLE, ULVState); - SISLANDS_SMC_SWSTATE *smc_state = &si_pi->smc_statetable.ULVState; - u32 state_size = sizeof(SISLANDS_SMC_SWSTATE); + struct SISLANDS_SMC_SWSTATE_SINGLE *smc_state = &si_pi->smc_statetable.ULVState; + u32 state_size = sizeof(struct SISLANDS_SMC_SWSTATE_SINGLE); memset(smc_state, 0, state_size); diff --git a/drivers/gpu/drm/amd/pm/powerplay/sislands_smc.h b/drivers/gpu/drm/amd/pm/powerplay/sislands_smc.h index 0f7554052c90..c7dc117a688c 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/sislands_smc.h +++ b/drivers/gpu/drm/amd/pm/powerplay/sislands_smc.h @@ -191,6 +191,14 @@ struct SISLANDS_SMC_SWSTATE typedef struct SISLANDS_SMC_SWSTATE SISLANDS_SMC_SWSTATE; +struct SISLANDS_SMC_SWSTATE_SINGLE { + uint8_t flags; + uint8_t levelCount; + uint8_t padding2; + uint8_t padding3; + SISLANDS_SMC_HW_PERFORMANCE_LEVEL level; +}; + #define SISLANDS_SMC_VOLTAGEMASK_VDDC 0 #define SISLANDS_SMC_VOLTAGEMASK_MVDD 1 #define SISLANDS_SMC_VOLTAGEMASK_VDDCI 2 @@ -208,19 +216,19 @@ typedef struct SISLANDS_SMC_VOLTAGEMASKTABLE SISLANDS_SMC_VOLTAGEMASKTABLE; struct SISLANDS_SMC_STATETABLE { - uint8_t thermalProtectType; - uint8_t systemFlags; - uint8_t maxVDDCIndexInPPTable; - uint8_t extraFlags; - uint32_t lowSMIO[SISLANDS_MAX_NO_VREG_STEPS]; - SISLANDS_SMC_VOLTAGEMASKTABLE voltageMaskTable; - SISLANDS_SMC_VOLTAGEMASKTABLE phaseMaskTable; - PP_SIslands_DPM2Parameters dpm2Params; - SISLANDS_SMC_SWSTATE initialState; - SISLANDS_SMC_SWSTATE ACPIState; - SISLANDS_SMC_SWSTATE ULVState; - SISLANDS_SMC_SWSTATE driverState; - SISLANDS_SMC_HW_PERFORMANCE_LEVEL dpmLevels[SISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE - 1]; + uint8_t thermalProtectType; + uint8_t systemFlags; + uint8_t maxVDDCIndexInPPTable; + uint8_t extraFlags; + uint32_t lowSMIO[SISLANDS_MAX_NO_VREG_STEPS]; + SISLANDS_SMC_VOLTAGEMASKTABLE voltageMaskTable; + SISLANDS_SMC_VOLTAGEMASKTABLE phaseMaskTable; + PP_SIslands_DPM2Parameters dpm2Params; + struct SISLANDS_SMC_SWSTATE_SINGLE initialState; + struct SISLANDS_SMC_SWSTATE_SINGLE ACPIState; + struct SISLANDS_SMC_SWSTATE_SINGLE ULVState; + SISLANDS_SMC_SWSTATE driverState; + SISLANDS_SMC_HW_PERFORMANCE_LEVEL dpmLevels[SISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE]; }; typedef struct SISLANDS_SMC_STATETABLE SISLANDS_SMC_STATETABLE; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 6a2dee8cef1f..642c60f3d9b1 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1095,44 +1095,6 @@ intel_dp_compute_link_config_wide(struct intel_dp *intel_dp, return -EINVAL; } -/* Optimize link config in order: max bpp, min lanes, min clock */ -static int -intel_dp_compute_link_config_fast(struct intel_dp *intel_dp, - struct intel_crtc_state *pipe_config, - const struct link_config_limits *limits) -{ - const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; - int bpp, clock, lane_count; - int mode_rate, link_clock, link_avail; - - for (bpp = limits->max_bpp; bpp >= limits->min_bpp; bpp -= 2 * 3) { - int output_bpp = intel_dp_output_bpp(pipe_config->output_format, bpp); - - mode_rate = intel_dp_link_required(adjusted_mode->crtc_clock, - output_bpp); - - for (lane_count = limits->min_lane_count; - lane_count <= limits->max_lane_count; - lane_count <<= 1) { - for (clock = limits->min_clock; clock <= limits->max_clock; clock++) { - link_clock = intel_dp->common_rates[clock]; - link_avail = intel_dp_max_data_rate(link_clock, - lane_count); - - if (mode_rate <= link_avail) { - pipe_config->lane_count = lane_count; - pipe_config->pipe_bpp = bpp; - pipe_config->port_clock = link_clock; - - return 0; - } - } - } - } - - return -EINVAL; -} - static int intel_dp_dsc_compute_bpp(struct intel_dp *intel_dp, u8 dsc_max_bpc) { int i, num_bpc; @@ -1382,22 +1344,11 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, intel_dp_can_bigjoiner(intel_dp)) pipe_config->bigjoiner = true; - if (intel_dp_is_edp(intel_dp)) - /* - * Optimize for fast and narrow. eDP 1.3 section 3.3 and eDP 1.4 - * section A.1: "It is recommended that the minimum number of - * lanes be used, using the minimum link rate allowed for that - * lane configuration." - * - * Note that we fall back to the max clock and lane count for eDP - * panels that fail with the fast optimal settings (see - * intel_dp->use_max_params), in which case the fast vs. wide - * choice doesn't matter. - */ - ret = intel_dp_compute_link_config_fast(intel_dp, pipe_config, &limits); - else - /* Optimize for slow and wide. */ - ret = intel_dp_compute_link_config_wide(intel_dp, pipe_config, &limits); + /* + * Optimize for slow and wide for everything, because there are some + * eDP 1.3 and 1.4 panels don't work well with fast and narrow. + */ + ret = intel_dp_compute_link_config_wide(intel_dp, pipe_config, &limits); /* enable compression if the mode doesn't fit available BW */ drm_dbg_kms(&i915->drm, "Force DSC en = %d\n", intel_dp->force_dsc_en); @@ -2160,7 +2111,7 @@ void intel_dp_check_frl_training(struct intel_dp *intel_dp) * -PCON supports SRC_CTL_MODE (VESA DP2.0-HDMI2.1 PCON Spec Draft-1 Sec-7) * -sink is HDMI2.1 */ - if (!(intel_dp->dpcd[2] & DP_PCON_SOURCE_CTL_MODE) || + if (!(intel_dp->downstream_ports[2] & DP_PCON_SOURCE_CTL_MODE) || !intel_dp_is_hdmi_2_1_sink(intel_dp) || intel_dp->frl.is_trained) return; diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index e5dadde422f7..bbaf05515e88 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -383,7 +383,7 @@ static void intel_overlay_off_tail(struct intel_overlay *overlay) i830_overlay_clock_gating(dev_priv, true); } -static void +__i915_active_call static void intel_overlay_last_flip_retire(struct i915_active *active) { struct intel_overlay *overlay = diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 23f6b00e08e2..f6fe5cb01438 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -189,7 +189,7 @@ compute_partial_view(const struct drm_i915_gem_object *obj, struct i915_ggtt_view view; if (i915_gem_object_is_tiled(obj)) - chunk = roundup(chunk, tile_row_pages(obj)); + chunk = roundup(chunk, tile_row_pages(obj) ?: 1); view.type = I915_GGTT_VIEW_PARTIAL; view.partial.offset = rounddown(page_offset, chunk); diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 176c19633412..74bf6fc8461f 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -641,7 +641,6 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) err = pin_pt_dma(vm, pde->pt.base); if (err) { - i915_gem_object_put(pde->pt.base); free_pd(vm, pde); return err; } diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c index e72b7a0dc316..8a322594210c 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c @@ -653,8 +653,8 @@ static void detect_bit_6_swizzle(struct i915_ggtt *ggtt) * banks of memory are paired and unswizzled on the * uneven portion, so leave that as unknown. */ - if (intel_uncore_read(uncore, C0DRB3) == - intel_uncore_read(uncore, C1DRB3)) { + if (intel_uncore_read16(uncore, C0DRB3) == + intel_uncore_read16(uncore, C1DRB3)) { swizzle_x = I915_BIT_6_SWIZZLE_9_10; swizzle_y = I915_BIT_6_SWIZZLE_9; } diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index cf9a3d384971..aa573b078ae7 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -1156,7 +1156,8 @@ static int auto_active(struct i915_active *ref) return 0; } -static void auto_retire(struct i915_active *ref) +__i915_active_call static void +auto_retire(struct i915_active *ref) { i915_active_put(ref); } diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index d553f62f4eeb..b4d8e1b01ee4 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1153,10 +1153,6 @@ static void a6xx_llc_slices_init(struct platform_device *pdev, { struct device_node *phandle; - a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx"); - if (IS_ERR(a6xx_gpu->llc_mmio)) - return; - /* * There is a different programming path for targets with an mmu500 * attached, so detect if that is the case @@ -1166,6 +1162,11 @@ static void a6xx_llc_slices_init(struct platform_device *pdev, of_device_is_compatible(phandle, "arm,mmu-500")); of_node_put(phandle); + if (a6xx_gpu->have_mmu500) + a6xx_gpu->llc_mmio = NULL; + else + a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx"); + a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU); a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW); diff --git a/drivers/gpu/drm/msm/dp/dp_audio.c b/drivers/gpu/drm/msm/dp/dp_audio.c index 82a8673ab8da..d7e4a39a904e 100644 --- a/drivers/gpu/drm/msm/dp/dp_audio.c +++ b/drivers/gpu/drm/msm/dp/dp_audio.c @@ -527,6 +527,7 @@ int dp_audio_hw_params(struct device *dev, dp_audio_setup_acr(audio); dp_audio_safe_to_exit_level(audio); dp_audio_enable(audio, true); + dp_display_signal_audio_start(dp_display); dp_display->audio_enabled = true; end: diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 5a39da6e1eaf..1784e119269b 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -178,6 +178,15 @@ static int dp_del_event(struct dp_display_private *dp_priv, u32 event) return 0; } +void dp_display_signal_audio_start(struct msm_dp *dp_display) +{ + struct dp_display_private *dp; + + dp = container_of(dp_display, struct dp_display_private, dp_display); + + reinit_completion(&dp->audio_comp); +} + void dp_display_signal_audio_complete(struct msm_dp *dp_display) { struct dp_display_private *dp; @@ -586,10 +595,8 @@ static int dp_connect_pending_timeout(struct dp_display_private *dp, u32 data) mutex_lock(&dp->event_mutex); state = dp->hpd_state; - if (state == ST_CONNECT_PENDING) { - dp_display_enable(dp, 0); + if (state == ST_CONNECT_PENDING) dp->hpd_state = ST_CONNECTED; - } mutex_unlock(&dp->event_mutex); @@ -651,7 +658,6 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data) dp_add_event(dp, EV_DISCONNECT_PENDING_TIMEOUT, 0, DP_TIMEOUT_5_SECOND); /* signal the disconnect event early to ensure proper teardown */ - reinit_completion(&dp->audio_comp); dp_display_handle_plugged_change(g_dp_display, false); dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_PLUG_INT_MASK | @@ -669,10 +675,8 @@ static int dp_disconnect_pending_timeout(struct dp_display_private *dp, u32 data mutex_lock(&dp->event_mutex); state = dp->hpd_state; - if (state == ST_DISCONNECT_PENDING) { - dp_display_disable(dp, 0); + if (state == ST_DISCONNECT_PENDING) dp->hpd_state = ST_DISCONNECTED; - } mutex_unlock(&dp->event_mutex); @@ -898,7 +902,6 @@ static int dp_display_disable(struct dp_display_private *dp, u32 data) /* wait only if audio was enabled */ if (dp_display->audio_enabled) { /* signal the disconnect event */ - reinit_completion(&dp->audio_comp); dp_display_handle_plugged_change(dp_display, false); if (!wait_for_completion_timeout(&dp->audio_comp, HZ * 5)) @@ -1272,7 +1275,12 @@ static int dp_pm_resume(struct device *dev) status = dp_catalog_link_is_connected(dp->catalog); - if (status) + /* + * can not declared display is connected unless + * HDMI cable is plugged in and sink_count of + * dongle become 1 + */ + if (status && dp->link->sink_count) dp->dp_display.is_connected = true; else dp->dp_display.is_connected = false; diff --git a/drivers/gpu/drm/msm/dp/dp_display.h b/drivers/gpu/drm/msm/dp/dp_display.h index 6092ba1ed85e..5173c89eedf7 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.h +++ b/drivers/gpu/drm/msm/dp/dp_display.h @@ -34,6 +34,7 @@ int dp_display_get_modes(struct msm_dp *dp_display, int dp_display_request_irq(struct msm_dp *dp_display); bool dp_display_check_video_test(struct msm_dp *dp_display); int dp_display_get_test_bpp(struct msm_dp *dp_display); +void dp_display_signal_audio_start(struct msm_dp *dp_display); void dp_display_signal_audio_complete(struct msm_dp *dp_display); #endif /* _DP_DISPLAY_H_ */ diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index f0a2ddf96a4b..ff7f2ec42030 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -843,7 +843,7 @@ int msm_dsi_phy_get_clk_provider(struct msm_dsi_phy *phy, if (pixel_clk_provider) *pixel_clk_provider = phy->provided_clocks->hws[DSI_PIXEL_PLL_CLK]->clk; - return -EINVAL; + return 0; } void msm_dsi_phy_pll_save_state(struct msm_dsi_phy *phy) diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c index 582b1428f971..86e40a0d41a3 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c @@ -405,6 +405,10 @@ static int pll_28nm_register(struct dsi_pll_28nm *pll_28nm, struct clk_hw **prov if (!vco_name) return -ENOMEM; + parent_name = devm_kzalloc(dev, 32, GFP_KERNEL); + if (!parent_name) + return -ENOMEM; + clk_name = devm_kzalloc(dev, 32, GFP_KERNEL); if (!clk_name) return -ENOMEM; diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index e1104d2454e2..fe7d17cd35ec 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -42,7 +42,7 @@ * - 1.7.0 - Add MSM_PARAM_SUSPENDS to access suspend count */ #define MSM_VERSION_MAJOR 1 -#define MSM_VERSION_MINOR 6 +#define MSM_VERSION_MINOR 7 #define MSM_VERSION_PATCHLEVEL 0 static const struct drm_mode_config_funcs mode_config_funcs = { diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index b199942266a2..56df86e5f740 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -190,13 +190,25 @@ struct page **msm_gem_get_pages(struct drm_gem_object *obj) } p = get_pages(obj); + + if (!IS_ERR(p)) { + msm_obj->pin_count++; + update_inactive(msm_obj); + } + msm_gem_unlock(obj); return p; } void msm_gem_put_pages(struct drm_gem_object *obj) { - /* when we start tracking the pin count, then do something here */ + struct msm_gem_object *msm_obj = to_msm_bo(obj); + + msm_gem_lock(obj); + msm_obj->pin_count--; + GEM_WARN_ON(msm_obj->pin_count < 0); + update_inactive(msm_obj); + msm_gem_unlock(obj); } int msm_gem_mmap_obj(struct drm_gem_object *obj, @@ -646,6 +658,8 @@ static void *get_vaddr(struct drm_gem_object *obj, unsigned madv) ret = -ENOMEM; goto fail; } + + update_inactive(msm_obj); } return msm_obj->vaddr; diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index a6480d2c81b2..03e2cc2a2ce1 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -221,7 +221,7 @@ static inline bool is_active(struct msm_gem_object *msm_obj) /* imported/exported objects are not purgeable: */ static inline bool is_unpurgeable(struct msm_gem_object *msm_obj) { - return msm_obj->base.dma_buf && msm_obj->base.import_attach; + return msm_obj->base.import_attach || msm_obj->pin_count; } static inline bool is_purgeable(struct msm_gem_object *msm_obj) @@ -271,7 +271,7 @@ static inline void mark_unpurgeable(struct msm_gem_object *msm_obj) static inline bool is_unevictable(struct msm_gem_object *msm_obj) { - return is_unpurgeable(msm_obj) || msm_obj->pin_count || msm_obj->vaddr; + return is_unpurgeable(msm_obj) || msm_obj->vaddr; } static inline void mark_evictable(struct msm_gem_object *msm_obj) diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c index dd5ef6493723..769f666335ac 100644 --- a/drivers/gpu/drm/radeon/ni_dpm.c +++ b/drivers/gpu/drm/radeon/ni_dpm.c @@ -1687,102 +1687,102 @@ static int ni_populate_smc_initial_state(struct radeon_device *rdev, u32 reg; int ret; - table->initialState.levels[0].mclk.vMPLL_AD_FUNC_CNTL = + table->initialState.level.mclk.vMPLL_AD_FUNC_CNTL = cpu_to_be32(ni_pi->clock_registers.mpll_ad_func_cntl); - table->initialState.levels[0].mclk.vMPLL_AD_FUNC_CNTL_2 = + table->initialState.level.mclk.vMPLL_AD_FUNC_CNTL_2 = cpu_to_be32(ni_pi->clock_registers.mpll_ad_func_cntl_2); - table->initialState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL = + table->initialState.level.mclk.vMPLL_DQ_FUNC_CNTL = cpu_to_be32(ni_pi->clock_registers.mpll_dq_func_cntl); - table->initialState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL_2 = + table->initialState.level.mclk.vMPLL_DQ_FUNC_CNTL_2 = cpu_to_be32(ni_pi->clock_registers.mpll_dq_func_cntl_2); - table->initialState.levels[0].mclk.vMCLK_PWRMGT_CNTL = + table->initialState.level.mclk.vMCLK_PWRMGT_CNTL = cpu_to_be32(ni_pi->clock_registers.mclk_pwrmgt_cntl); - table->initialState.levels[0].mclk.vDLL_CNTL = + table->initialState.level.mclk.vDLL_CNTL = cpu_to_be32(ni_pi->clock_registers.dll_cntl); - table->initialState.levels[0].mclk.vMPLL_SS = + table->initialState.level.mclk.vMPLL_SS = cpu_to_be32(ni_pi->clock_registers.mpll_ss1); - table->initialState.levels[0].mclk.vMPLL_SS2 = + table->initialState.level.mclk.vMPLL_SS2 = cpu_to_be32(ni_pi->clock_registers.mpll_ss2); - table->initialState.levels[0].mclk.mclk_value = + table->initialState.level.mclk.mclk_value = cpu_to_be32(initial_state->performance_levels[0].mclk); - table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL = + table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL = cpu_to_be32(ni_pi->clock_registers.cg_spll_func_cntl); - table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_2 = + table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_2 = cpu_to_be32(ni_pi->clock_registers.cg_spll_func_cntl_2); - table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_3 = + table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_3 = cpu_to_be32(ni_pi->clock_registers.cg_spll_func_cntl_3); - table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_4 = + table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_4 = cpu_to_be32(ni_pi->clock_registers.cg_spll_func_cntl_4); - table->initialState.levels[0].sclk.vCG_SPLL_SPREAD_SPECTRUM = + table->initialState.level.sclk.vCG_SPLL_SPREAD_SPECTRUM = cpu_to_be32(ni_pi->clock_registers.cg_spll_spread_spectrum); - table->initialState.levels[0].sclk.vCG_SPLL_SPREAD_SPECTRUM_2 = + table->initialState.level.sclk.vCG_SPLL_SPREAD_SPECTRUM_2 = cpu_to_be32(ni_pi->clock_registers.cg_spll_spread_spectrum_2); - table->initialState.levels[0].sclk.sclk_value = + table->initialState.level.sclk.sclk_value = cpu_to_be32(initial_state->performance_levels[0].sclk); - table->initialState.levels[0].arbRefreshState = + table->initialState.level.arbRefreshState = NISLANDS_INITIAL_STATE_ARB_INDEX; - table->initialState.levels[0].ACIndex = 0; + table->initialState.level.ACIndex = 0; ret = ni_populate_voltage_value(rdev, &eg_pi->vddc_voltage_table, initial_state->performance_levels[0].vddc, - &table->initialState.levels[0].vddc); + &table->initialState.level.vddc); if (!ret) { u16 std_vddc; ret = ni_get_std_voltage_value(rdev, - &table->initialState.levels[0].vddc, + &table->initialState.level.vddc, &std_vddc); if (!ret) ni_populate_std_voltage_value(rdev, std_vddc, - table->initialState.levels[0].vddc.index, - &table->initialState.levels[0].std_vddc); + table->initialState.level.vddc.index, + &table->initialState.level.std_vddc); } if (eg_pi->vddci_control) ni_populate_voltage_value(rdev, &eg_pi->vddci_voltage_table, initial_state->performance_levels[0].vddci, - &table->initialState.levels[0].vddci); + &table->initialState.level.vddci); - ni_populate_initial_mvdd_value(rdev, &table->initialState.levels[0].mvdd); + ni_populate_initial_mvdd_value(rdev, &table->initialState.level.mvdd); reg = CG_R(0xffff) | CG_L(0); - table->initialState.levels[0].aT = cpu_to_be32(reg); + table->initialState.level.aT = cpu_to_be32(reg); - table->initialState.levels[0].bSP = cpu_to_be32(pi->dsp); + table->initialState.level.bSP = cpu_to_be32(pi->dsp); if (pi->boot_in_gen2) - table->initialState.levels[0].gen2PCIE = 1; + table->initialState.level.gen2PCIE = 1; else - table->initialState.levels[0].gen2PCIE = 0; + table->initialState.level.gen2PCIE = 0; if (pi->mem_gddr5) { - table->initialState.levels[0].strobeMode = + table->initialState.level.strobeMode = cypress_get_strobe_mode_settings(rdev, initial_state->performance_levels[0].mclk); if (initial_state->performance_levels[0].mclk > pi->mclk_edc_enable_threshold) - table->initialState.levels[0].mcFlags = NISLANDS_SMC_MC_EDC_RD_FLAG | NISLANDS_SMC_MC_EDC_WR_FLAG; + table->initialState.level.mcFlags = NISLANDS_SMC_MC_EDC_RD_FLAG | NISLANDS_SMC_MC_EDC_WR_FLAG; else - table->initialState.levels[0].mcFlags = 0; + table->initialState.level.mcFlags = 0; } table->initialState.levelCount = 1; table->initialState.flags |= PPSMC_SWSTATE_FLAG_DC; - table->initialState.levels[0].dpm2.MaxPS = 0; - table->initialState.levels[0].dpm2.NearTDPDec = 0; - table->initialState.levels[0].dpm2.AboveSafeInc = 0; - table->initialState.levels[0].dpm2.BelowSafeInc = 0; + table->initialState.level.dpm2.MaxPS = 0; + table->initialState.level.dpm2.NearTDPDec = 0; + table->initialState.level.dpm2.AboveSafeInc = 0; + table->initialState.level.dpm2.BelowSafeInc = 0; reg = MIN_POWER_MASK | MAX_POWER_MASK; - table->initialState.levels[0].SQPowerThrottle = cpu_to_be32(reg); + table->initialState.level.SQPowerThrottle = cpu_to_be32(reg); reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK; - table->initialState.levels[0].SQPowerThrottle_2 = cpu_to_be32(reg); + table->initialState.level.SQPowerThrottle_2 = cpu_to_be32(reg); return 0; } @@ -1813,43 +1813,43 @@ static int ni_populate_smc_acpi_state(struct radeon_device *rdev, if (pi->acpi_vddc) { ret = ni_populate_voltage_value(rdev, &eg_pi->vddc_voltage_table, - pi->acpi_vddc, &table->ACPIState.levels[0].vddc); + pi->acpi_vddc, &table->ACPIState.level.vddc); if (!ret) { u16 std_vddc; ret = ni_get_std_voltage_value(rdev, - &table->ACPIState.levels[0].vddc, &std_vddc); + &table->ACPIState.level.vddc, &std_vddc); if (!ret) ni_populate_std_voltage_value(rdev, std_vddc, - table->ACPIState.levels[0].vddc.index, - &table->ACPIState.levels[0].std_vddc); + table->ACPIState.level.vddc.index, + &table->ACPIState.level.std_vddc); } if (pi->pcie_gen2) { if (pi->acpi_pcie_gen2) - table->ACPIState.levels[0].gen2PCIE = 1; + table->ACPIState.level.gen2PCIE = 1; else - table->ACPIState.levels[0].gen2PCIE = 0; + table->ACPIState.level.gen2PCIE = 0; } else { - table->ACPIState.levels[0].gen2PCIE = 0; + table->ACPIState.level.gen2PCIE = 0; } } else { ret = ni_populate_voltage_value(rdev, &eg_pi->vddc_voltage_table, pi->min_vddc_in_table, - &table->ACPIState.levels[0].vddc); + &table->ACPIState.level.vddc); if (!ret) { u16 std_vddc; ret = ni_get_std_voltage_value(rdev, - &table->ACPIState.levels[0].vddc, + &table->ACPIState.level.vddc, &std_vddc); if (!ret) ni_populate_std_voltage_value(rdev, std_vddc, - table->ACPIState.levels[0].vddc.index, - &table->ACPIState.levels[0].std_vddc); + table->ACPIState.level.vddc.index, + &table->ACPIState.level.std_vddc); } - table->ACPIState.levels[0].gen2PCIE = 0; + table->ACPIState.level.gen2PCIE = 0; } if (eg_pi->acpi_vddci) { @@ -1857,7 +1857,7 @@ static int ni_populate_smc_acpi_state(struct radeon_device *rdev, ni_populate_voltage_value(rdev, &eg_pi->vddci_voltage_table, eg_pi->acpi_vddci, - &table->ACPIState.levels[0].vddci); + &table->ACPIState.level.vddci); } @@ -1900,37 +1900,37 @@ static int ni_populate_smc_acpi_state(struct radeon_device *rdev, spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK; spll_func_cntl_2 |= SCLK_MUX_SEL(4); - table->ACPIState.levels[0].mclk.vMPLL_AD_FUNC_CNTL = cpu_to_be32(mpll_ad_func_cntl); - table->ACPIState.levels[0].mclk.vMPLL_AD_FUNC_CNTL_2 = cpu_to_be32(mpll_ad_func_cntl_2); - table->ACPIState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL = cpu_to_be32(mpll_dq_func_cntl); - table->ACPIState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL_2 = cpu_to_be32(mpll_dq_func_cntl_2); - table->ACPIState.levels[0].mclk.vMCLK_PWRMGT_CNTL = cpu_to_be32(mclk_pwrmgt_cntl); - table->ACPIState.levels[0].mclk.vDLL_CNTL = cpu_to_be32(dll_cntl); + table->ACPIState.level.mclk.vMPLL_AD_FUNC_CNTL = cpu_to_be32(mpll_ad_func_cntl); + table->ACPIState.level.mclk.vMPLL_AD_FUNC_CNTL_2 = cpu_to_be32(mpll_ad_func_cntl_2); + table->ACPIState.level.mclk.vMPLL_DQ_FUNC_CNTL = cpu_to_be32(mpll_dq_func_cntl); + table->ACPIState.level.mclk.vMPLL_DQ_FUNC_CNTL_2 = cpu_to_be32(mpll_dq_func_cntl_2); + table->ACPIState.level.mclk.vMCLK_PWRMGT_CNTL = cpu_to_be32(mclk_pwrmgt_cntl); + table->ACPIState.level.mclk.vDLL_CNTL = cpu_to_be32(dll_cntl); - table->ACPIState.levels[0].mclk.mclk_value = 0; + table->ACPIState.level.mclk.mclk_value = 0; - table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL = cpu_to_be32(spll_func_cntl); - table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_2 = cpu_to_be32(spll_func_cntl_2); - table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_3 = cpu_to_be32(spll_func_cntl_3); - table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_4 = cpu_to_be32(spll_func_cntl_4); + table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL = cpu_to_be32(spll_func_cntl); + table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_2 = cpu_to_be32(spll_func_cntl_2); + table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_3 = cpu_to_be32(spll_func_cntl_3); + table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_4 = cpu_to_be32(spll_func_cntl_4); - table->ACPIState.levels[0].sclk.sclk_value = 0; + table->ACPIState.level.sclk.sclk_value = 0; - ni_populate_mvdd_value(rdev, 0, &table->ACPIState.levels[0].mvdd); + ni_populate_mvdd_value(rdev, 0, &table->ACPIState.level.mvdd); if (eg_pi->dynamic_ac_timing) - table->ACPIState.levels[0].ACIndex = 1; + table->ACPIState.level.ACIndex = 1; - table->ACPIState.levels[0].dpm2.MaxPS = 0; - table->ACPIState.levels[0].dpm2.NearTDPDec = 0; - table->ACPIState.levels[0].dpm2.AboveSafeInc = 0; - table->ACPIState.levels[0].dpm2.BelowSafeInc = 0; + table->ACPIState.level.dpm2.MaxPS = 0; + table->ACPIState.level.dpm2.NearTDPDec = 0; + table->ACPIState.level.dpm2.AboveSafeInc = 0; + table->ACPIState.level.dpm2.BelowSafeInc = 0; reg = MIN_POWER_MASK | MAX_POWER_MASK; - table->ACPIState.levels[0].SQPowerThrottle = cpu_to_be32(reg); + table->ACPIState.level.SQPowerThrottle = cpu_to_be32(reg); reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK; - table->ACPIState.levels[0].SQPowerThrottle_2 = cpu_to_be32(reg); + table->ACPIState.level.SQPowerThrottle_2 = cpu_to_be32(reg); return 0; } @@ -1980,7 +1980,9 @@ static int ni_init_smc_table(struct radeon_device *rdev) if (ret) return ret; - table->driverState = table->initialState; + table->driverState.flags = table->initialState.flags; + table->driverState.levelCount = table->initialState.levelCount; + table->driverState.levels[0] = table->initialState.level; table->ULVState = table->initialState; diff --git a/drivers/gpu/drm/radeon/nislands_smc.h b/drivers/gpu/drm/radeon/nislands_smc.h index 7395cb6b3cac..42f3bab0f9ee 100644 --- a/drivers/gpu/drm/radeon/nislands_smc.h +++ b/drivers/gpu/drm/radeon/nislands_smc.h @@ -143,6 +143,14 @@ struct NISLANDS_SMC_SWSTATE typedef struct NISLANDS_SMC_SWSTATE NISLANDS_SMC_SWSTATE; +struct NISLANDS_SMC_SWSTATE_SINGLE { + uint8_t flags; + uint8_t levelCount; + uint8_t padding2; + uint8_t padding3; + NISLANDS_SMC_HW_PERFORMANCE_LEVEL level; +}; + #define NISLANDS_SMC_VOLTAGEMASK_VDDC 0 #define NISLANDS_SMC_VOLTAGEMASK_MVDD 1 #define NISLANDS_SMC_VOLTAGEMASK_VDDCI 2 @@ -160,19 +168,19 @@ typedef struct NISLANDS_SMC_VOLTAGEMASKTABLE NISLANDS_SMC_VOLTAGEMASKTABLE; struct NISLANDS_SMC_STATETABLE { - uint8_t thermalProtectType; - uint8_t systemFlags; - uint8_t maxVDDCIndexInPPTable; - uint8_t extraFlags; - uint8_t highSMIO[NISLANDS_MAX_NO_VREG_STEPS]; - uint32_t lowSMIO[NISLANDS_MAX_NO_VREG_STEPS]; - NISLANDS_SMC_VOLTAGEMASKTABLE voltageMaskTable; - PP_NIslands_DPM2Parameters dpm2Params; - NISLANDS_SMC_SWSTATE initialState; - NISLANDS_SMC_SWSTATE ACPIState; - NISLANDS_SMC_SWSTATE ULVState; - NISLANDS_SMC_SWSTATE driverState; - NISLANDS_SMC_HW_PERFORMANCE_LEVEL dpmLevels[NISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE - 1]; + uint8_t thermalProtectType; + uint8_t systemFlags; + uint8_t maxVDDCIndexInPPTable; + uint8_t extraFlags; + uint8_t highSMIO[NISLANDS_MAX_NO_VREG_STEPS]; + uint32_t lowSMIO[NISLANDS_MAX_NO_VREG_STEPS]; + NISLANDS_SMC_VOLTAGEMASKTABLE voltageMaskTable; + PP_NIslands_DPM2Parameters dpm2Params; + struct NISLANDS_SMC_SWSTATE_SINGLE initialState; + struct NISLANDS_SMC_SWSTATE_SINGLE ACPIState; + struct NISLANDS_SMC_SWSTATE_SINGLE ULVState; + NISLANDS_SMC_SWSTATE driverState; + NISLANDS_SMC_HW_PERFORMANCE_LEVEL dpmLevels[NISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE]; }; typedef struct NISLANDS_SMC_STATETABLE NISLANDS_SMC_STATETABLE; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 42281fce552e..56ed5634cebe 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1549,6 +1549,7 @@ struct radeon_dpm { void *priv; u32 new_active_crtcs; int new_active_crtc_count; + int high_pixelclock_count; u32 current_active_crtcs; int current_active_crtc_count; bool single_display; diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 0c1950f4e146..3861c0b98fcf 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -1767,6 +1767,7 @@ static void radeon_pm_compute_clocks_dpm(struct radeon_device *rdev) struct drm_device *ddev = rdev->ddev; struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; + struct radeon_connector *radeon_connector; if (!rdev->pm.dpm_enabled) return; @@ -1776,6 +1777,7 @@ static void radeon_pm_compute_clocks_dpm(struct radeon_device *rdev) /* update active crtc counts */ rdev->pm.dpm.new_active_crtcs = 0; rdev->pm.dpm.new_active_crtc_count = 0; + rdev->pm.dpm.high_pixelclock_count = 0; if (rdev->num_crtc && rdev->mode_info.mode_config_initialized) { list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) { @@ -1783,6 +1785,12 @@ static void radeon_pm_compute_clocks_dpm(struct radeon_device *rdev) if (crtc->enabled) { rdev->pm.dpm.new_active_crtcs |= (1 << radeon_crtc->crtc_id); rdev->pm.dpm.new_active_crtc_count++; + if (!radeon_crtc->connector) + continue; + + radeon_connector = to_radeon_connector(radeon_crtc->connector); + if (radeon_connector->pixelclock_for_modeset > 297000) + rdev->pm.dpm.high_pixelclock_count++; } } } diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 918609551804..3add39c1a689 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2979,6 +2979,9 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, (rdev->pdev->device == 0x6605)) { max_sclk = 75000; } + + if (rdev->pm.dpm.high_pixelclock_count > 1) + disable_sclk_switching = true; } if (rps->vce_active) { @@ -4350,70 +4353,70 @@ static int si_populate_smc_initial_state(struct radeon_device *rdev, u32 reg; int ret; - table->initialState.levels[0].mclk.vDLL_CNTL = + table->initialState.level.mclk.vDLL_CNTL = cpu_to_be32(si_pi->clock_registers.dll_cntl); - table->initialState.levels[0].mclk.vMCLK_PWRMGT_CNTL = + table->initialState.level.mclk.vMCLK_PWRMGT_CNTL = cpu_to_be32(si_pi->clock_registers.mclk_pwrmgt_cntl); - table->initialState.levels[0].mclk.vMPLL_AD_FUNC_CNTL = + table->initialState.level.mclk.vMPLL_AD_FUNC_CNTL = cpu_to_be32(si_pi->clock_registers.mpll_ad_func_cntl); - table->initialState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL = + table->initialState.level.mclk.vMPLL_DQ_FUNC_CNTL = cpu_to_be32(si_pi->clock_registers.mpll_dq_func_cntl); - table->initialState.levels[0].mclk.vMPLL_FUNC_CNTL = + table->initialState.level.mclk.vMPLL_FUNC_CNTL = cpu_to_be32(si_pi->clock_registers.mpll_func_cntl); - table->initialState.levels[0].mclk.vMPLL_FUNC_CNTL_1 = + table->initialState.level.mclk.vMPLL_FUNC_CNTL_1 = cpu_to_be32(si_pi->clock_registers.mpll_func_cntl_1); - table->initialState.levels[0].mclk.vMPLL_FUNC_CNTL_2 = + table->initialState.level.mclk.vMPLL_FUNC_CNTL_2 = cpu_to_be32(si_pi->clock_registers.mpll_func_cntl_2); - table->initialState.levels[0].mclk.vMPLL_SS = + table->initialState.level.mclk.vMPLL_SS = cpu_to_be32(si_pi->clock_registers.mpll_ss1); - table->initialState.levels[0].mclk.vMPLL_SS2 = + table->initialState.level.mclk.vMPLL_SS2 = cpu_to_be32(si_pi->clock_registers.mpll_ss2); - table->initialState.levels[0].mclk.mclk_value = + table->initialState.level.mclk.mclk_value = cpu_to_be32(initial_state->performance_levels[0].mclk); - table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL = + table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL = cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl); - table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_2 = + table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_2 = cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl_2); - table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_3 = + table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_3 = cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl_3); - table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_4 = + table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_4 = cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl_4); - table->initialState.levels[0].sclk.vCG_SPLL_SPREAD_SPECTRUM = + table->initialState.level.sclk.vCG_SPLL_SPREAD_SPECTRUM = cpu_to_be32(si_pi->clock_registers.cg_spll_spread_spectrum); - table->initialState.levels[0].sclk.vCG_SPLL_SPREAD_SPECTRUM_2 = + table->initialState.level.sclk.vCG_SPLL_SPREAD_SPECTRUM_2 = cpu_to_be32(si_pi->clock_registers.cg_spll_spread_spectrum_2); - table->initialState.levels[0].sclk.sclk_value = + table->initialState.level.sclk.sclk_value = cpu_to_be32(initial_state->performance_levels[0].sclk); - table->initialState.levels[0].arbRefreshState = + table->initialState.level.arbRefreshState = SISLANDS_INITIAL_STATE_ARB_INDEX; - table->initialState.levels[0].ACIndex = 0; + table->initialState.level.ACIndex = 0; ret = si_populate_voltage_value(rdev, &eg_pi->vddc_voltage_table, initial_state->performance_levels[0].vddc, - &table->initialState.levels[0].vddc); + &table->initialState.level.vddc); if (!ret) { u16 std_vddc; ret = si_get_std_voltage_value(rdev, - &table->initialState.levels[0].vddc, + &table->initialState.level.vddc, &std_vddc); if (!ret) si_populate_std_voltage_value(rdev, std_vddc, - table->initialState.levels[0].vddc.index, - &table->initialState.levels[0].std_vddc); + table->initialState.level.vddc.index, + &table->initialState.level.std_vddc); } if (eg_pi->vddci_control) si_populate_voltage_value(rdev, &eg_pi->vddci_voltage_table, initial_state->performance_levels[0].vddci, - &table->initialState.levels[0].vddci); + &table->initialState.level.vddci); if (si_pi->vddc_phase_shed_control) si_populate_phase_shedding_value(rdev, @@ -4421,43 +4424,43 @@ static int si_populate_smc_initial_state(struct radeon_device *rdev, initial_state->performance_levels[0].vddc, initial_state->performance_levels[0].sclk, initial_state->performance_levels[0].mclk, - &table->initialState.levels[0].vddc); + &table->initialState.level.vddc); - si_populate_initial_mvdd_value(rdev, &table->initialState.levels[0].mvdd); + si_populate_initial_mvdd_value(rdev, &table->initialState.level.mvdd); reg = CG_R(0xffff) | CG_L(0); - table->initialState.levels[0].aT = cpu_to_be32(reg); + table->initialState.level.aT = cpu_to_be32(reg); - table->initialState.levels[0].bSP = cpu_to_be32(pi->dsp); + table->initialState.level.bSP = cpu_to_be32(pi->dsp); - table->initialState.levels[0].gen2PCIE = (u8)si_pi->boot_pcie_gen; + table->initialState.level.gen2PCIE = (u8)si_pi->boot_pcie_gen; if (pi->mem_gddr5) { - table->initialState.levels[0].strobeMode = + table->initialState.level.strobeMode = si_get_strobe_mode_settings(rdev, initial_state->performance_levels[0].mclk); if (initial_state->performance_levels[0].mclk > pi->mclk_edc_enable_threshold) - table->initialState.levels[0].mcFlags = SISLANDS_SMC_MC_EDC_RD_FLAG | SISLANDS_SMC_MC_EDC_WR_FLAG; + table->initialState.level.mcFlags = SISLANDS_SMC_MC_EDC_RD_FLAG | SISLANDS_SMC_MC_EDC_WR_FLAG; else - table->initialState.levels[0].mcFlags = 0; + table->initialState.level.mcFlags = 0; } table->initialState.levelCount = 1; table->initialState.flags |= PPSMC_SWSTATE_FLAG_DC; - table->initialState.levels[0].dpm2.MaxPS = 0; - table->initialState.levels[0].dpm2.NearTDPDec = 0; - table->initialState.levels[0].dpm2.AboveSafeInc = 0; - table->initialState.levels[0].dpm2.BelowSafeInc = 0; - table->initialState.levels[0].dpm2.PwrEfficiencyRatio = 0; + table->initialState.level.dpm2.MaxPS = 0; + table->initialState.level.dpm2.NearTDPDec = 0; + table->initialState.level.dpm2.AboveSafeInc = 0; + table->initialState.level.dpm2.BelowSafeInc = 0; + table->initialState.level.dpm2.PwrEfficiencyRatio = 0; reg = MIN_POWER_MASK | MAX_POWER_MASK; - table->initialState.levels[0].SQPowerThrottle = cpu_to_be32(reg); + table->initialState.level.SQPowerThrottle = cpu_to_be32(reg); reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK; - table->initialState.levels[0].SQPowerThrottle_2 = cpu_to_be32(reg); + table->initialState.level.SQPowerThrottle_2 = cpu_to_be32(reg); return 0; } @@ -4488,18 +4491,18 @@ static int si_populate_smc_acpi_state(struct radeon_device *rdev, if (pi->acpi_vddc) { ret = si_populate_voltage_value(rdev, &eg_pi->vddc_voltage_table, - pi->acpi_vddc, &table->ACPIState.levels[0].vddc); + pi->acpi_vddc, &table->ACPIState.level.vddc); if (!ret) { u16 std_vddc; ret = si_get_std_voltage_value(rdev, - &table->ACPIState.levels[0].vddc, &std_vddc); + &table->ACPIState.level.vddc, &std_vddc); if (!ret) si_populate_std_voltage_value(rdev, std_vddc, - table->ACPIState.levels[0].vddc.index, - &table->ACPIState.levels[0].std_vddc); + table->ACPIState.level.vddc.index, + &table->ACPIState.level.std_vddc); } - table->ACPIState.levels[0].gen2PCIE = si_pi->acpi_pcie_gen; + table->ACPIState.level.gen2PCIE = si_pi->acpi_pcie_gen; if (si_pi->vddc_phase_shed_control) { si_populate_phase_shedding_value(rdev, @@ -4507,23 +4510,23 @@ static int si_populate_smc_acpi_state(struct radeon_device *rdev, pi->acpi_vddc, 0, 0, - &table->ACPIState.levels[0].vddc); + &table->ACPIState.level.vddc); } } else { ret = si_populate_voltage_value(rdev, &eg_pi->vddc_voltage_table, - pi->min_vddc_in_table, &table->ACPIState.levels[0].vddc); + pi->min_vddc_in_table, &table->ACPIState.level.vddc); if (!ret) { u16 std_vddc; ret = si_get_std_voltage_value(rdev, - &table->ACPIState.levels[0].vddc, &std_vddc); + &table->ACPIState.level.vddc, &std_vddc); if (!ret) si_populate_std_voltage_value(rdev, std_vddc, - table->ACPIState.levels[0].vddc.index, - &table->ACPIState.levels[0].std_vddc); + table->ACPIState.level.vddc.index, + &table->ACPIState.level.std_vddc); } - table->ACPIState.levels[0].gen2PCIE = (u8)r600_get_pcie_gen_support(rdev, + table->ACPIState.level.gen2PCIE = (u8)r600_get_pcie_gen_support(rdev, si_pi->sys_pcie_mask, si_pi->boot_pcie_gen, RADEON_PCIE_GEN1); @@ -4534,14 +4537,14 @@ static int si_populate_smc_acpi_state(struct radeon_device *rdev, pi->min_vddc_in_table, 0, 0, - &table->ACPIState.levels[0].vddc); + &table->ACPIState.level.vddc); } if (pi->acpi_vddc) { if (eg_pi->acpi_vddci) si_populate_voltage_value(rdev, &eg_pi->vddci_voltage_table, eg_pi->acpi_vddci, - &table->ACPIState.levels[0].vddci); + &table->ACPIState.level.vddci); } mclk_pwrmgt_cntl |= MRDCK0_RESET | MRDCK1_RESET; @@ -4552,59 +4555,59 @@ static int si_populate_smc_acpi_state(struct radeon_device *rdev, spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK; spll_func_cntl_2 |= SCLK_MUX_SEL(4); - table->ACPIState.levels[0].mclk.vDLL_CNTL = + table->ACPIState.level.mclk.vDLL_CNTL = cpu_to_be32(dll_cntl); - table->ACPIState.levels[0].mclk.vMCLK_PWRMGT_CNTL = + table->ACPIState.level.mclk.vMCLK_PWRMGT_CNTL = cpu_to_be32(mclk_pwrmgt_cntl); - table->ACPIState.levels[0].mclk.vMPLL_AD_FUNC_CNTL = + table->ACPIState.level.mclk.vMPLL_AD_FUNC_CNTL = cpu_to_be32(mpll_ad_func_cntl); - table->ACPIState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL = + table->ACPIState.level.mclk.vMPLL_DQ_FUNC_CNTL = cpu_to_be32(mpll_dq_func_cntl); - table->ACPIState.levels[0].mclk.vMPLL_FUNC_CNTL = + table->ACPIState.level.mclk.vMPLL_FUNC_CNTL = cpu_to_be32(mpll_func_cntl); - table->ACPIState.levels[0].mclk.vMPLL_FUNC_CNTL_1 = + table->ACPIState.level.mclk.vMPLL_FUNC_CNTL_1 = cpu_to_be32(mpll_func_cntl_1); - table->ACPIState.levels[0].mclk.vMPLL_FUNC_CNTL_2 = + table->ACPIState.level.mclk.vMPLL_FUNC_CNTL_2 = cpu_to_be32(mpll_func_cntl_2); - table->ACPIState.levels[0].mclk.vMPLL_SS = + table->ACPIState.level.mclk.vMPLL_SS = cpu_to_be32(si_pi->clock_registers.mpll_ss1); - table->ACPIState.levels[0].mclk.vMPLL_SS2 = + table->ACPIState.level.mclk.vMPLL_SS2 = cpu_to_be32(si_pi->clock_registers.mpll_ss2); - table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL = + table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL = cpu_to_be32(spll_func_cntl); - table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_2 = + table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_2 = cpu_to_be32(spll_func_cntl_2); - table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_3 = + table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_3 = cpu_to_be32(spll_func_cntl_3); - table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_4 = + table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_4 = cpu_to_be32(spll_func_cntl_4); - table->ACPIState.levels[0].mclk.mclk_value = 0; - table->ACPIState.levels[0].sclk.sclk_value = 0; + table->ACPIState.level.mclk.mclk_value = 0; + table->ACPIState.level.sclk.sclk_value = 0; - si_populate_mvdd_value(rdev, 0, &table->ACPIState.levels[0].mvdd); + si_populate_mvdd_value(rdev, 0, &table->ACPIState.level.mvdd); if (eg_pi->dynamic_ac_timing) - table->ACPIState.levels[0].ACIndex = 0; + table->ACPIState.level.ACIndex = 0; - table->ACPIState.levels[0].dpm2.MaxPS = 0; - table->ACPIState.levels[0].dpm2.NearTDPDec = 0; - table->ACPIState.levels[0].dpm2.AboveSafeInc = 0; - table->ACPIState.levels[0].dpm2.BelowSafeInc = 0; - table->ACPIState.levels[0].dpm2.PwrEfficiencyRatio = 0; + table->ACPIState.level.dpm2.MaxPS = 0; + table->ACPIState.level.dpm2.NearTDPDec = 0; + table->ACPIState.level.dpm2.AboveSafeInc = 0; + table->ACPIState.level.dpm2.BelowSafeInc = 0; + table->ACPIState.level.dpm2.PwrEfficiencyRatio = 0; reg = MIN_POWER_MASK | MAX_POWER_MASK; - table->ACPIState.levels[0].SQPowerThrottle = cpu_to_be32(reg); + table->ACPIState.level.SQPowerThrottle = cpu_to_be32(reg); reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK; - table->ACPIState.levels[0].SQPowerThrottle_2 = cpu_to_be32(reg); + table->ACPIState.level.SQPowerThrottle_2 = cpu_to_be32(reg); return 0; } static int si_populate_ulv_state(struct radeon_device *rdev, - SISLANDS_SMC_SWSTATE *state) + struct SISLANDS_SMC_SWSTATE_SINGLE *state) { struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev); struct si_power_info *si_pi = si_get_pi(rdev); @@ -4613,19 +4616,19 @@ static int si_populate_ulv_state(struct radeon_device *rdev, int ret; ret = si_convert_power_level_to_smc(rdev, &ulv->pl, - &state->levels[0]); + &state->level); if (!ret) { if (eg_pi->sclk_deep_sleep) { if (sclk_in_sr <= SCLK_MIN_DEEPSLEEP_FREQ) - state->levels[0].stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_BYPASS; + state->level.stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_BYPASS; else - state->levels[0].stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_THROTTLE; + state->level.stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_THROTTLE; } if (ulv->one_pcie_lane_in_ulv) state->flags |= PPSMC_SWSTATE_FLAG_PCIE_X1; - state->levels[0].arbRefreshState = (u8)(SISLANDS_ULV_STATE_ARB_INDEX); - state->levels[0].ACIndex = 1; - state->levels[0].std_vddc = state->levels[0].vddc; + state->level.arbRefreshState = (u8)(SISLANDS_ULV_STATE_ARB_INDEX); + state->level.ACIndex = 1; + state->level.std_vddc = state->level.vddc; state->levelCount = 1; state->flags |= PPSMC_SWSTATE_FLAG_DC; @@ -4725,7 +4728,9 @@ static int si_init_smc_table(struct radeon_device *rdev) if (ret) return ret; - table->driverState = table->initialState; + table->driverState.flags = table->initialState.flags; + table->driverState.levelCount = table->initialState.levelCount; + table->driverState.levels[0] = table->initialState.level; ret = si_do_program_memory_timing_parameters(rdev, radeon_boot_state, SISLANDS_INITIAL_STATE_ARB_INDEX); @@ -5275,8 +5280,8 @@ static int si_upload_ulv_state(struct radeon_device *rdev) if (ulv->supported && ulv->pl.vddc) { u32 address = si_pi->state_table_start + offsetof(SISLANDS_SMC_STATETABLE, ULVState); - SISLANDS_SMC_SWSTATE *smc_state = &si_pi->smc_statetable.ULVState; - u32 state_size = sizeof(SISLANDS_SMC_SWSTATE); + struct SISLANDS_SMC_SWSTATE_SINGLE *smc_state = &si_pi->smc_statetable.ULVState; + u32 state_size = sizeof(struct SISLANDS_SMC_SWSTATE_SINGLE); memset(smc_state, 0, state_size); diff --git a/drivers/gpu/drm/radeon/sislands_smc.h b/drivers/gpu/drm/radeon/sislands_smc.h index fbd6589bdab9..4ea1cb2e45a3 100644 --- a/drivers/gpu/drm/radeon/sislands_smc.h +++ b/drivers/gpu/drm/radeon/sislands_smc.h @@ -191,6 +191,14 @@ struct SISLANDS_SMC_SWSTATE typedef struct SISLANDS_SMC_SWSTATE SISLANDS_SMC_SWSTATE; +struct SISLANDS_SMC_SWSTATE_SINGLE { + uint8_t flags; + uint8_t levelCount; + uint8_t padding2; + uint8_t padding3; + SISLANDS_SMC_HW_PERFORMANCE_LEVEL level; +}; + #define SISLANDS_SMC_VOLTAGEMASK_VDDC 0 #define SISLANDS_SMC_VOLTAGEMASK_MVDD 1 #define SISLANDS_SMC_VOLTAGEMASK_VDDCI 2 @@ -208,19 +216,19 @@ typedef struct SISLANDS_SMC_VOLTAGEMASKTABLE SISLANDS_SMC_VOLTAGEMASKTABLE; struct SISLANDS_SMC_STATETABLE { - uint8_t thermalProtectType; - uint8_t systemFlags; - uint8_t maxVDDCIndexInPPTable; - uint8_t extraFlags; - uint32_t lowSMIO[SISLANDS_MAX_NO_VREG_STEPS]; - SISLANDS_SMC_VOLTAGEMASKTABLE voltageMaskTable; - SISLANDS_SMC_VOLTAGEMASKTABLE phaseMaskTable; - PP_SIslands_DPM2Parameters dpm2Params; - SISLANDS_SMC_SWSTATE initialState; - SISLANDS_SMC_SWSTATE ACPIState; - SISLANDS_SMC_SWSTATE ULVState; - SISLANDS_SMC_SWSTATE driverState; - SISLANDS_SMC_HW_PERFORMANCE_LEVEL dpmLevels[SISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE - 1]; + uint8_t thermalProtectType; + uint8_t systemFlags; + uint8_t maxVDDCIndexInPPTable; + uint8_t extraFlags; + uint32_t lowSMIO[SISLANDS_MAX_NO_VREG_STEPS]; + SISLANDS_SMC_VOLTAGEMASKTABLE voltageMaskTable; + SISLANDS_SMC_VOLTAGEMASKTABLE phaseMaskTable; + PP_SIslands_DPM2Parameters dpm2Params; + struct SISLANDS_SMC_SWSTATE_SINGLE initialState; + struct SISLANDS_SMC_SWSTATE_SINGLE ACPIState; + struct SISLANDS_SMC_SWSTATE_SINGLE ULVState; + SISLANDS_SMC_SWSTATE driverState; + SISLANDS_SMC_HW_PERFORMANCE_LEVEL dpmLevels[SISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE]; }; typedef struct SISLANDS_SMC_STATETABLE SISLANDS_SMC_STATETABLE; diff --git a/drivers/gpu/drm/vc4/vc4_vec.c b/drivers/gpu/drm/vc4/vc4_vec.c index bd5b8eb58b18..090529d0d5dc 100644 --- a/drivers/gpu/drm/vc4/vc4_vec.c +++ b/drivers/gpu/drm/vc4/vc4_vec.c @@ -197,12 +197,6 @@ struct vc4_vec_connector { struct drm_encoder *encoder; }; -static inline struct vc4_vec_connector * -to_vc4_vec_connector(struct drm_connector *connector) -{ - return container_of(connector, struct vc4_vec_connector, base); -} - enum vc4_vec_tv_mode_id { VC4_VEC_TV_MODE_NTSC, VC4_VEC_TV_MODE_NTSC_J, diff --git a/drivers/hwmon/adm9240.c b/drivers/hwmon/adm9240.c index 5677263bcf0d..483cd757abd3 100644 --- a/drivers/hwmon/adm9240.c +++ b/drivers/hwmon/adm9240.c @@ -485,7 +485,7 @@ static int adm9240_in_write(struct device *dev, u32 attr, int channel, long val) reg = ADM9240_REG_IN_MIN(channel); break; case hwmon_in_max: - reg = ADM9240_REG_IN(channel); + reg = ADM9240_REG_IN_MAX(channel); break; default: return -EOPNOTSUPP; diff --git a/drivers/hwmon/corsair-psu.c b/drivers/hwmon/corsair-psu.c index 3a5807e4a2ef..02298b86b57b 100644 --- a/drivers/hwmon/corsair-psu.c +++ b/drivers/hwmon/corsair-psu.c @@ -355,7 +355,7 @@ static umode_t corsairpsu_hwmon_power_is_visible(const struct corsairpsu_data *p return 0444; default: return 0; - }; + } } static umode_t corsairpsu_hwmon_in_is_visible(const struct corsairpsu_data *priv, u32 attr, @@ -376,7 +376,7 @@ static umode_t corsairpsu_hwmon_in_is_visible(const struct corsairpsu_data *priv break; default: break; - }; + } return res; } diff --git a/drivers/hwmon/ltc2992.c b/drivers/hwmon/ltc2992.c index 4382105bf142..2a4bed0ab226 100644 --- a/drivers/hwmon/ltc2992.c +++ b/drivers/hwmon/ltc2992.c @@ -900,11 +900,15 @@ static int ltc2992_parse_dt(struct ltc2992_state *st) fwnode_for_each_available_child_node(fwnode, child) { ret = fwnode_property_read_u32(child, "reg", &addr); - if (ret < 0) + if (ret < 0) { + fwnode_handle_put(child); return ret; + } - if (addr > 1) + if (addr > 1) { + fwnode_handle_put(child); return -EINVAL; + } ret = fwnode_property_read_u32(child, "shunt-resistor-micro-ohms", &val); if (!ret) diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c index f1ac153d0b56..967532afb1c0 100644 --- a/drivers/hwmon/occ/common.c +++ b/drivers/hwmon/occ/common.c @@ -217,9 +217,9 @@ int occ_update_response(struct occ *occ) return rc; /* limit the maximum rate of polling the OCC */ - if (time_after(jiffies, occ->last_update + OCC_UPDATE_FREQUENCY)) { + if (time_after(jiffies, occ->next_update)) { rc = occ_poll(occ); - occ->last_update = jiffies; + occ->next_update = jiffies + OCC_UPDATE_FREQUENCY; } else { rc = occ->last_error; } @@ -1165,6 +1165,7 @@ int occ_setup(struct occ *occ, const char *name) return rc; } + occ->next_update = jiffies + OCC_UPDATE_FREQUENCY; occ_parse_poll_response(occ); rc = occ_setup_sensor_attrs(occ); diff --git a/drivers/hwmon/occ/common.h b/drivers/hwmon/occ/common.h index 67e6968b8978..e6df719770e8 100644 --- a/drivers/hwmon/occ/common.h +++ b/drivers/hwmon/occ/common.h @@ -99,7 +99,7 @@ struct occ { u8 poll_cmd_data; /* to perform OCC poll command */ int (*send_cmd)(struct occ *occ, u8 *cmd); - unsigned long last_update; + unsigned long next_update; struct mutex lock; /* lock OCC access */ struct device *hwmon; diff --git a/drivers/hwmon/pmbus/fsp-3y.c b/drivers/hwmon/pmbus/fsp-3y.c index b177987286ae..e24842475254 100644 --- a/drivers/hwmon/pmbus/fsp-3y.c +++ b/drivers/hwmon/pmbus/fsp-3y.c @@ -57,7 +57,7 @@ static int page_log_to_page_real(int page_log, enum chips chip) case YH5151E_PAGE_12V_LOG: return YH5151E_PAGE_12V_REAL; case YH5151E_PAGE_5V_LOG: - return YH5151E_PAGE_5V_LOG; + return YH5151E_PAGE_5V_REAL; case YH5151E_PAGE_3V3_LOG: return YH5151E_PAGE_3V3_REAL; } @@ -103,8 +103,18 @@ static int set_page(struct i2c_client *client, int page_log) static int fsp3y_read_byte_data(struct i2c_client *client, int page, int reg) { + const struct pmbus_driver_info *info = pmbus_get_driver_info(client); + struct fsp3y_data *data = to_fsp3y_data(info); int rv; + /* + * YH5151-E outputs vout in linear11. The conversion is done when + * reading. Here, we have to inject pmbus_core with the correct + * exponent (it is -6). + */ + if (data->chip == yh5151e && reg == PMBUS_VOUT_MODE) + return 0x1A; + rv = set_page(client, page); if (rv < 0) return rv; @@ -114,6 +124,8 @@ static int fsp3y_read_byte_data(struct i2c_client *client, int page, int reg) static int fsp3y_read_word_data(struct i2c_client *client, int page, int phase, int reg) { + const struct pmbus_driver_info *info = pmbus_get_driver_info(client); + struct fsp3y_data *data = to_fsp3y_data(info); int rv; /* @@ -144,7 +156,18 @@ static int fsp3y_read_word_data(struct i2c_client *client, int page, int phase, if (rv < 0) return rv; - return i2c_smbus_read_word_data(client, reg); + rv = i2c_smbus_read_word_data(client, reg); + if (rv < 0) + return rv; + + /* + * YH-5151E is non-compliant and outputs output voltages in linear11 + * instead of linear16. + */ + if (data->chip == yh5151e && reg == PMBUS_READ_VOUT) + rv = sign_extend32(rv, 10) & 0xffff; + + return rv; } static struct pmbus_driver_info fsp3y_info[] = { diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig index cceda3cecbcf..8b1723635cce 100644 --- a/drivers/iio/accel/Kconfig +++ b/drivers/iio/accel/Kconfig @@ -229,7 +229,6 @@ config DMARD10 config HID_SENSOR_ACCEL_3D depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER tristate "HID Accelerometers 3D" diff --git a/drivers/iio/common/hid-sensors/Kconfig b/drivers/iio/common/hid-sensors/Kconfig index 24d492567336..2a3dd3b907be 100644 --- a/drivers/iio/common/hid-sensors/Kconfig +++ b/drivers/iio/common/hid-sensors/Kconfig @@ -19,6 +19,7 @@ config HID_SENSOR_IIO_TRIGGER tristate "Common module (trigger) for all HID Sensor IIO drivers" depends on HID_SENSOR_HUB && HID_SENSOR_IIO_COMMON && IIO_BUFFER select IIO_TRIGGER + select IIO_TRIGGERED_BUFFER help Say yes here to build trigger support for HID sensors. Triggers will be send if all requested attributes were read. diff --git a/drivers/iio/gyro/Kconfig b/drivers/iio/gyro/Kconfig index 5824f2edf975..20b5ac7ab66a 100644 --- a/drivers/iio/gyro/Kconfig +++ b/drivers/iio/gyro/Kconfig @@ -111,7 +111,6 @@ config FXAS21002C_SPI config HID_SENSOR_GYRO_3D depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER tristate "HID Gyroscope 3D" diff --git a/drivers/iio/gyro/mpu3050-core.c b/drivers/iio/gyro/mpu3050-core.c index ac90be03332a..f17a93519535 100644 --- a/drivers/iio/gyro/mpu3050-core.c +++ b/drivers/iio/gyro/mpu3050-core.c @@ -272,7 +272,16 @@ static int mpu3050_read_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_OFFSET: switch (chan->type) { case IIO_TEMP: - /* The temperature scaling is (x+23000)/280 Celsius */ + /* + * The temperature scaling is (x+23000)/280 Celsius + * for the "best fit straight line" temperature range + * of -30C..85C. The 23000 includes room temperature + * offset of +35C, 280 is the precision scale and x is + * the 16-bit signed integer reported by hardware. + * + * Temperature value itself represents temperature of + * the sensor die. + */ *val = 23000; return IIO_VAL_INT; default: @@ -329,7 +338,7 @@ static int mpu3050_read_raw(struct iio_dev *indio_dev, goto out_read_raw_unlock; } - *val = be16_to_cpu(raw_val); + *val = (s16)be16_to_cpu(raw_val); ret = IIO_VAL_INT; goto out_read_raw_unlock; diff --git a/drivers/iio/humidity/Kconfig b/drivers/iio/humidity/Kconfig index 6549fcf6db69..2de5494e7c22 100644 --- a/drivers/iio/humidity/Kconfig +++ b/drivers/iio/humidity/Kconfig @@ -52,7 +52,6 @@ config HID_SENSOR_HUMIDITY tristate "HID Environmental humidity sensor" depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER help diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index d92c58a94fe4..59efb36db2c7 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -1778,7 +1778,6 @@ static long iio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (!indio_dev->info) goto out_unlock; - ret = -EINVAL; list_for_each_entry(h, &iio_dev_opaque->ioctl_handlers, entry) { ret = h->ioctl(indio_dev, filp, cmd, arg); if (ret != IIO_IOCTL_UNHANDLED) @@ -1786,7 +1785,7 @@ static long iio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } if (ret == IIO_IOCTL_UNHANDLED) - ret = -EINVAL; + ret = -ENODEV; out_unlock: mutex_unlock(&indio_dev->info_exist_lock); @@ -1926,9 +1925,6 @@ EXPORT_SYMBOL(__iio_device_register); **/ void iio_device_unregister(struct iio_dev *indio_dev) { - struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); - struct iio_ioctl_handler *h, *t; - cdev_device_del(&indio_dev->chrdev, &indio_dev->dev); mutex_lock(&indio_dev->info_exist_lock); @@ -1939,9 +1935,6 @@ void iio_device_unregister(struct iio_dev *indio_dev) indio_dev->info = NULL; - list_for_each_entry_safe(h, t, &iio_dev_opaque->ioctl_handlers, entry) - list_del(&h->entry); - iio_device_wakeup_eventset(indio_dev); iio_buffer_wakeup_poll(indio_dev); diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig index 33ad4dd0b5c7..917f9becf9c7 100644 --- a/drivers/iio/light/Kconfig +++ b/drivers/iio/light/Kconfig @@ -256,7 +256,6 @@ config ISL29125 config HID_SENSOR_ALS depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER tristate "HID ALS" @@ -270,7 +269,6 @@ config HID_SENSOR_ALS config HID_SENSOR_PROX depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER tristate "HID PROX" diff --git a/drivers/iio/light/gp2ap002.c b/drivers/iio/light/gp2ap002.c index d048ae257c51..f960be7d4001 100644 --- a/drivers/iio/light/gp2ap002.c +++ b/drivers/iio/light/gp2ap002.c @@ -582,7 +582,7 @@ static int gp2ap002_probe(struct i2c_client *client, "gp2ap002", indio_dev); if (ret) { dev_err(dev, "unable to request IRQ\n"); - goto out_disable_vio; + goto out_put_pm; } gp2ap002->irq = client->irq; @@ -612,8 +612,9 @@ static int gp2ap002_probe(struct i2c_client *client, return 0; -out_disable_pm: +out_put_pm: pm_runtime_put_noidle(dev); +out_disable_pm: pm_runtime_disable(dev); out_disable_vio: regulator_disable(gp2ap002->vio); diff --git a/drivers/iio/light/tsl2583.c b/drivers/iio/light/tsl2583.c index 0f787bfc88fc..c9d8f07a6fcd 100644 --- a/drivers/iio/light/tsl2583.c +++ b/drivers/iio/light/tsl2583.c @@ -341,6 +341,14 @@ static int tsl2583_als_calibrate(struct iio_dev *indio_dev) return lux_val; } + /* Avoid division by zero of lux_value later on */ + if (lux_val == 0) { + dev_err(&chip->client->dev, + "%s: lux_val of 0 will produce out of range trim_value\n", + __func__); + return -ENODATA; + } + gain_trim_val = (unsigned int)(((chip->als_settings.als_cal_target) * chip->als_settings.als_gain_trim) / lux_val); if ((gain_trim_val < 250) || (gain_trim_val > 4000)) { diff --git a/drivers/iio/magnetometer/Kconfig b/drivers/iio/magnetometer/Kconfig index 5d4ffd66032e..74ad5701c6c2 100644 --- a/drivers/iio/magnetometer/Kconfig +++ b/drivers/iio/magnetometer/Kconfig @@ -95,7 +95,6 @@ config MAG3110 config HID_SENSOR_MAGNETOMETER_3D depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER tristate "HID Magenetometer 3D" diff --git a/drivers/iio/orientation/Kconfig b/drivers/iio/orientation/Kconfig index a505583cc2fd..396cbbb867f4 100644 --- a/drivers/iio/orientation/Kconfig +++ b/drivers/iio/orientation/Kconfig @@ -9,7 +9,6 @@ menu "Inclinometer sensors" config HID_SENSOR_INCLINOMETER_3D depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER tristate "HID Inclinometer 3D" @@ -20,7 +19,6 @@ config HID_SENSOR_INCLINOMETER_3D config HID_SENSOR_DEVICE_ROTATION depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER tristate "HID Device Rotation" diff --git a/drivers/iio/pressure/Kconfig b/drivers/iio/pressure/Kconfig index 689b978db4f9..fc0d3cfca418 100644 --- a/drivers/iio/pressure/Kconfig +++ b/drivers/iio/pressure/Kconfig @@ -79,7 +79,6 @@ config DPS310 config HID_SENSOR_PRESS depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER tristate "HID PRESS" diff --git a/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c b/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c index c685f10b5ae4..cc206bfa09c7 100644 --- a/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c +++ b/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c @@ -160,6 +160,7 @@ static int lidar_get_measurement(struct lidar_data *data, u16 *reg) ret = lidar_write_control(data, LIDAR_REG_CONTROL_ACQUIRE); if (ret < 0) { dev_err(&client->dev, "cannot send start measurement command"); + pm_runtime_put_noidle(&client->dev); return ret; } diff --git a/drivers/iio/temperature/Kconfig b/drivers/iio/temperature/Kconfig index f1f2a1499c9e..4df60082c1fa 100644 --- a/drivers/iio/temperature/Kconfig +++ b/drivers/iio/temperature/Kconfig @@ -45,7 +45,6 @@ config HID_SENSOR_TEMP tristate "HID Environmental temperature sensor" depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER help diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 522c9b229f80..762125f2905f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2901,7 +2901,7 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->hmmaxd = le16_to_cpu(id->hmmaxd); } - ret = nvme_mpath_init(ctrl, id); + ret = nvme_mpath_init_identify(ctrl, id); if (ret < 0) goto out_free; @@ -4364,6 +4364,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, min(default_ps_max_latency_us, (unsigned long)S32_MAX)); nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device)); + nvme_mpath_init_ctrl(ctrl); return 0; out_free_name: diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 0551796517e6..f81871c7128a 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -781,9 +781,18 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) put_disk(head->disk); } -int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) +void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl) { - int error; + mutex_init(&ctrl->ana_lock); + timer_setup(&ctrl->anatt_timer, nvme_anatt_timeout, 0); + INIT_WORK(&ctrl->ana_work, nvme_ana_work); +} + +int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) +{ + size_t max_transfer_size = ctrl->max_hw_sectors << SECTOR_SHIFT; + size_t ana_log_size; + int error = 0; /* check if multipath is enabled and we have the capability */ if (!multipath || !ctrl->subsys || @@ -795,37 +804,31 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) ctrl->nanagrpid = le32_to_cpu(id->nanagrpid); ctrl->anagrpmax = le32_to_cpu(id->anagrpmax); - mutex_init(&ctrl->ana_lock); - timer_setup(&ctrl->anatt_timer, nvme_anatt_timeout, 0); - ctrl->ana_log_size = sizeof(struct nvme_ana_rsp_hdr) + - ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc); - ctrl->ana_log_size += ctrl->max_namespaces * sizeof(__le32); - - if (ctrl->ana_log_size > ctrl->max_hw_sectors << SECTOR_SHIFT) { + ana_log_size = sizeof(struct nvme_ana_rsp_hdr) + + ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc) + + ctrl->max_namespaces * sizeof(__le32); + if (ana_log_size > max_transfer_size) { dev_err(ctrl->device, - "ANA log page size (%zd) larger than MDTS (%d).\n", - ctrl->ana_log_size, - ctrl->max_hw_sectors << SECTOR_SHIFT); + "ANA log page size (%zd) larger than MDTS (%zd).\n", + ana_log_size, max_transfer_size); dev_err(ctrl->device, "disabling ANA support.\n"); - return 0; + goto out_uninit; } - - INIT_WORK(&ctrl->ana_work, nvme_ana_work); - kfree(ctrl->ana_log_buf); - ctrl->ana_log_buf = kmalloc(ctrl->ana_log_size, GFP_KERNEL); - if (!ctrl->ana_log_buf) { - error = -ENOMEM; - goto out; + if (ana_log_size > ctrl->ana_log_size) { + nvme_mpath_stop(ctrl); + kfree(ctrl->ana_log_buf); + ctrl->ana_log_buf = kmalloc(ana_log_size, GFP_KERNEL); + if (!ctrl->ana_log_buf) + return -ENOMEM; } - + ctrl->ana_log_size = ana_log_size; error = nvme_read_ana_log(ctrl); if (error) - goto out_free_ana_log_buf; + goto out_uninit; return 0; -out_free_ana_log_buf: - kfree(ctrl->ana_log_buf); - ctrl->ana_log_buf = NULL; -out: + +out_uninit: + nvme_mpath_uninit(ctrl); return error; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 05f31a2c64bb..0015860ec12b 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -712,7 +712,8 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head); void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id); void nvme_mpath_remove_disk(struct nvme_ns_head *head); -int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id); +int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id); +void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl); void nvme_mpath_uninit(struct nvme_ctrl *ctrl); void nvme_mpath_stop(struct nvme_ctrl *ctrl); bool nvme_mpath_clear_current_path(struct nvme_ns *ns); @@ -780,7 +781,10 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) static inline void nvme_trace_bio_complete(struct request *req) { } -static inline int nvme_mpath_init(struct nvme_ctrl *ctrl, +static inline void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl) +{ +} +static inline int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) { if (ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index e7a367cf6d36..dcd49a72f2f3 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -975,10 +975,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req) case nvme_admin_keep_alive: req->execute = nvmet_execute_keep_alive; return 0; + default: + return nvmet_report_invalid_opcode(req); } - - pr_debug("unhandled cmd %d on qid %d\n", cmd->common.opcode, - req->sq->qid); - req->error_loc = offsetof(struct nvme_common_command, opcode); - return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 4845d12e374a..fc3645fc2c24 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -379,7 +379,7 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req) req->execute = nvmet_execute_disc_identify; return 0; default: - pr_err("unhandled cmd %d\n", cmd->common.opcode); + pr_debug("unhandled cmd %d\n", cmd->common.opcode); req->error_loc = offsetof(struct nvme_common_command, opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 1420a8e3e0b1..7d0f3523fdab 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -94,7 +94,7 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req) req->execute = nvmet_execute_prop_get; break; default: - pr_err("received unknown capsule type 0x%x\n", + pr_debug("received unknown capsule type 0x%x\n", cmd->fabrics.fctype); req->error_loc = offsetof(struct nvmf_common_command, fctype); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; @@ -284,13 +284,13 @@ u16 nvmet_parse_connect_cmd(struct nvmet_req *req) struct nvme_command *cmd = req->cmd; if (!nvme_is_fabrics(cmd)) { - pr_err("invalid command 0x%x on unconnected queue.\n", + pr_debug("invalid command 0x%x on unconnected queue.\n", cmd->fabrics.opcode); req->error_loc = offsetof(struct nvme_common_command, opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } if (cmd->fabrics.fctype != nvme_fabrics_type_connect) { - pr_err("invalid capsule type 0x%x on unconnected queue.\n", + pr_debug("invalid capsule type 0x%x on unconnected queue.\n", cmd->fabrics.fctype); req->error_loc = offsetof(struct nvmf_common_command, fctype); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 9a8b3726a37c..429263ca9b97 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -258,7 +258,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) sector = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba); - if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) { + if (nvmet_use_inline_bvec(req)) { bio = &req->b.inline_bio; bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); } else { diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 715d4376c997..7fdbdc496597 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -49,9 +49,11 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns) ns->file = filp_open(ns->device_path, flags, 0); if (IS_ERR(ns->file)) { - pr_err("failed to open file %s: (%ld)\n", - ns->device_path, PTR_ERR(ns->file)); - return PTR_ERR(ns->file); + ret = PTR_ERR(ns->file); + pr_err("failed to open file %s: (%d)\n", + ns->device_path, ret); + ns->file = NULL; + return ret; } ret = nvmet_file_ns_revalidate(ns); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 5566ed403576..d69a409515d6 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -616,4 +616,10 @@ static inline sector_t nvmet_lba_to_sect(struct nvmet_ns *ns, __le64 lba) return le64_to_cpu(lba) << (ns->blksize_shift - SECTOR_SHIFT); } +static inline bool nvmet_use_inline_bvec(struct nvmet_req *req) +{ + return req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN && + req->sg_cnt <= NVMET_MAX_INLINE_BIOVEC; +} + #endif /* _NVMET_H */ diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 2798944899b7..39b1473f7204 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -194,7 +194,7 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq) if (req->sg_cnt > BIO_MAX_VECS) return -EINVAL; - if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) { + if (nvmet_use_inline_bvec(req)) { bio = &req->p.inline_bio; bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); } else { diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 6c1f3ab7649c..7d607f435e36 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -700,7 +700,7 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) { struct nvmet_rdma_rsp *rsp = container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe); - struct nvmet_rdma_queue *queue = cq->cq_context; + struct nvmet_rdma_queue *queue = wc->qp->qp_context; nvmet_rdma_release_rsp(rsp); @@ -786,7 +786,7 @@ static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc) { struct nvmet_rdma_rsp *rsp = container_of(wc->wr_cqe, struct nvmet_rdma_rsp, write_cqe); - struct nvmet_rdma_queue *queue = cq->cq_context; + struct nvmet_rdma_queue *queue = wc->qp->qp_context; struct rdma_cm_id *cm_id = rsp->queue->cm_id; u16 status; diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c index c1dac6eec59f..a6d731e959a2 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c @@ -527,6 +527,9 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct security_priv *psecuritypriv = &(padapter->securitypriv); struct sta_priv *pstapriv = &padapter->stapriv; + char *grpkey = padapter->securitypriv.dot118021XGrpKey[param->u.crypt.idx].skey; + char *txkey = padapter->securitypriv.dot118021XGrptxmickey[param->u.crypt.idx].skey; + char *rxkey = padapter->securitypriv.dot118021XGrprxmickey[param->u.crypt.idx].skey; param->u.crypt.err = 0; param->u.crypt.alg[IEEE_CRYPT_ALG_NAME_LEN - 1] = '\0'; @@ -609,7 +612,7 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa { if (strcmp(param->u.crypt.alg, "WEP") == 0) { - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); psecuritypriv->dot118021XGrpPrivacy = _WEP40_; if (param->u.crypt.key_len == 13) @@ -622,12 +625,12 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa { psecuritypriv->dot118021XGrpPrivacy = _TKIP_; - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); /* DEBUG_ERR("set key length :param->u.crypt.key_len =%d\n", param->u.crypt.key_len); */ /* set mic key */ - memcpy(psecuritypriv->dot118021XGrptxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[16]), 8); - memcpy(psecuritypriv->dot118021XGrprxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[24]), 8); + memcpy(txkey, &(param->u.crypt.key[16]), 8); + memcpy(rxkey, &(param->u.crypt.key[24]), 8); psecuritypriv->busetkipkey = true; @@ -636,7 +639,7 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa { psecuritypriv->dot118021XGrpPrivacy = _AES_; - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); } else { @@ -713,7 +716,7 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa { if (strcmp(param->u.crypt.alg, "WEP") == 0) { - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); psecuritypriv->dot118021XGrpPrivacy = _WEP40_; if (param->u.crypt.key_len == 13) @@ -725,12 +728,12 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa { psecuritypriv->dot118021XGrpPrivacy = _TKIP_; - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); /* DEBUG_ERR("set key length :param->u.crypt.key_len =%d\n", param->u.crypt.key_len); */ /* set mic key */ - memcpy(psecuritypriv->dot118021XGrptxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[16]), 8); - memcpy(psecuritypriv->dot118021XGrprxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[24]), 8); + memcpy(txkey, &(param->u.crypt.key[16]), 8); + memcpy(rxkey, &(param->u.crypt.key[24]), 8); psecuritypriv->busetkipkey = true; @@ -739,7 +742,7 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa { psecuritypriv->dot118021XGrpPrivacy = _AES_; - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); } else { diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c index e98e5388d5c7..5088c3731b6d 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c @@ -2963,6 +2963,9 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param, struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct security_priv *psecuritypriv = &(padapter->securitypriv); struct sta_priv *pstapriv = &padapter->stapriv; + char *txkey = padapter->securitypriv.dot118021XGrptxmickey[param->u.crypt.idx].skey; + char *rxkey = padapter->securitypriv.dot118021XGrprxmickey[param->u.crypt.idx].skey; + char *grpkey = psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey; param->u.crypt.err = 0; param->u.crypt.alg[IEEE_CRYPT_ALG_NAME_LEN - 1] = '\0'; @@ -3064,7 +3067,7 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param, if (!psta && check_fwstate(pmlmepriv, WIFI_AP_STATE)) { /* group key */ if (param->u.crypt.set_tx == 1) { if (strcmp(param->u.crypt.alg, "WEP") == 0) { - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); psecuritypriv->dot118021XGrpPrivacy = _WEP40_; if (param->u.crypt.key_len == 13) @@ -3073,11 +3076,11 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param, } else if (strcmp(param->u.crypt.alg, "TKIP") == 0) { psecuritypriv->dot118021XGrpPrivacy = _TKIP_; - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); /* DEBUG_ERR("set key length :param->u.crypt.key_len =%d\n", param->u.crypt.key_len); */ /* set mic key */ - memcpy(psecuritypriv->dot118021XGrptxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[16]), 8); + memcpy(txkey, &(param->u.crypt.key[16]), 8); memcpy(psecuritypriv->dot118021XGrprxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[24]), 8); psecuritypriv->busetkipkey = true; @@ -3086,7 +3089,7 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param, else if (strcmp(param->u.crypt.alg, "CCMP") == 0) { psecuritypriv->dot118021XGrpPrivacy = _AES_; - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); } else { psecuritypriv->dot118021XGrpPrivacy = _NO_PRIVACY_; } @@ -3142,7 +3145,7 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param, } else { /* group key??? */ if (strcmp(param->u.crypt.alg, "WEP") == 0) { - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); psecuritypriv->dot118021XGrpPrivacy = _WEP40_; if (param->u.crypt.key_len == 13) @@ -3150,19 +3153,19 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param, } else if (strcmp(param->u.crypt.alg, "TKIP") == 0) { psecuritypriv->dot118021XGrpPrivacy = _TKIP_; - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); /* DEBUG_ERR("set key length :param->u.crypt.key_len =%d\n", param->u.crypt.key_len); */ /* set mic key */ - memcpy(psecuritypriv->dot118021XGrptxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[16]), 8); - memcpy(psecuritypriv->dot118021XGrprxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[24]), 8); + memcpy(txkey, &(param->u.crypt.key[16]), 8); + memcpy(rxkey, &(param->u.crypt.key[24]), 8); psecuritypriv->busetkipkey = true; } else if (strcmp(param->u.crypt.alg, "CCMP") == 0) { psecuritypriv->dot118021XGrpPrivacy = _AES_; - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); } else { psecuritypriv->dot118021XGrpPrivacy = _NO_PRIVACY_; } diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 01645e87b3d5..fa1548d4f94b 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -1171,7 +1171,7 @@ static inline int resize_screen(struct vc_data *vc, int width, int height, /* Resizes the resolution of the display adapater */ int err = 0; - if (vc->vc_mode != KD_GRAPHICS && vc->vc_sw->con_resize) + if (vc->vc_sw->con_resize) err = vc->vc_sw->con_resize(vc, width, height, user); return err; diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 89aeaf3c1bca..0e0cd9e9e589 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -671,21 +671,58 @@ static int vt_resizex(struct vc_data *vc, struct vt_consize __user *cs) if (copy_from_user(&v, cs, sizeof(struct vt_consize))) return -EFAULT; - if (v.v_vlin) - pr_info_once("\"struct vt_consize\"->v_vlin is ignored. Please report if you need this.\n"); - if (v.v_clin) - pr_info_once("\"struct vt_consize\"->v_clin is ignored. Please report if you need this.\n"); + /* FIXME: Should check the copies properly */ + if (!v.v_vlin) + v.v_vlin = vc->vc_scan_lines; + + if (v.v_clin) { + int rows = v.v_vlin / v.v_clin; + if (v.v_rows != rows) { + if (v.v_rows) /* Parameters don't add up */ + return -EINVAL; + v.v_rows = rows; + } + } + + if (v.v_vcol && v.v_ccol) { + int cols = v.v_vcol / v.v_ccol; + if (v.v_cols != cols) { + if (v.v_cols) + return -EINVAL; + v.v_cols = cols; + } + } + + if (v.v_clin > 32) + return -EINVAL; - console_lock(); for (i = 0; i < MAX_NR_CONSOLES; i++) { - vc = vc_cons[i].d; + struct vc_data *vcp; - if (vc) { - vc->vc_resize_user = 1; - vc_resize(vc, v.v_cols, v.v_rows); + if (!vc_cons[i].d) + continue; + console_lock(); + vcp = vc_cons[i].d; + if (vcp) { + int ret; + int save_scan_lines = vcp->vc_scan_lines; + int save_cell_height = vcp->vc_cell_height; + + if (v.v_vlin) + vcp->vc_scan_lines = v.v_vlin; + if (v.v_clin) + vcp->vc_cell_height = v.v_clin; + vcp->vc_resize_user = 1; + ret = vc_resize(vcp, v.v_cols, v.v_rows); + if (ret) { + vcp->vc_scan_lines = save_scan_lines; + vcp->vc_cell_height = save_cell_height; + console_unlock(); + return ret; + } } + console_unlock(); } - console_unlock(); return 0; } diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 508b1c3f8b73..d1e4a7379beb 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -321,12 +321,23 @@ exit: } -static void kill_urbs(struct wdm_device *desc) +static void poison_urbs(struct wdm_device *desc) { /* the order here is essential */ - usb_kill_urb(desc->command); - usb_kill_urb(desc->validity); - usb_kill_urb(desc->response); + usb_poison_urb(desc->command); + usb_poison_urb(desc->validity); + usb_poison_urb(desc->response); +} + +static void unpoison_urbs(struct wdm_device *desc) +{ + /* + * the order here is not essential + * it is symmetrical just to be nice + */ + usb_unpoison_urb(desc->response); + usb_unpoison_urb(desc->validity); + usb_unpoison_urb(desc->command); } static void free_urbs(struct wdm_device *desc) @@ -741,11 +752,12 @@ static int wdm_release(struct inode *inode, struct file *file) if (!desc->count) { if (!test_bit(WDM_DISCONNECTING, &desc->flags)) { dev_dbg(&desc->intf->dev, "wdm_release: cleanup\n"); - kill_urbs(desc); + poison_urbs(desc); spin_lock_irq(&desc->iuspin); desc->resp_count = 0; spin_unlock_irq(&desc->iuspin); desc->manage_power(desc->intf, 0); + unpoison_urbs(desc); } else { /* must avoid dev_printk here as desc->intf is invalid */ pr_debug(KBUILD_MODNAME " %s: device gone - cleaning up\n", __func__); @@ -1037,9 +1049,9 @@ static void wdm_disconnect(struct usb_interface *intf) wake_up_all(&desc->wait); mutex_lock(&desc->rlock); mutex_lock(&desc->wlock); + poison_urbs(desc); cancel_work_sync(&desc->rxwork); cancel_work_sync(&desc->service_outs_intr); - kill_urbs(desc); mutex_unlock(&desc->wlock); mutex_unlock(&desc->rlock); @@ -1080,9 +1092,10 @@ static int wdm_suspend(struct usb_interface *intf, pm_message_t message) set_bit(WDM_SUSPENDING, &desc->flags); spin_unlock_irq(&desc->iuspin); /* callback submits work - order is essential */ - kill_urbs(desc); + poison_urbs(desc); cancel_work_sync(&desc->rxwork); cancel_work_sync(&desc->service_outs_intr); + unpoison_urbs(desc); } if (!PMSG_IS_AUTO(message)) { mutex_unlock(&desc->wlock); @@ -1140,7 +1153,7 @@ static int wdm_pre_reset(struct usb_interface *intf) wake_up_all(&desc->wait); mutex_lock(&desc->rlock); mutex_lock(&desc->wlock); - kill_urbs(desc); + poison_urbs(desc); cancel_work_sync(&desc->rxwork); cancel_work_sync(&desc->service_outs_intr); return 0; @@ -1151,6 +1164,7 @@ static int wdm_post_reset(struct usb_interface *intf) struct wdm_device *desc = wdm_find_device(intf); int rv; + unpoison_urbs(desc); clear_bit(WDM_OVERFLOW, &desc->flags); clear_bit(WDM_RESETTING, &desc->flags); rv = recover_from_urb_loss(desc); diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index b2bc4b7c4289..fc7d6cdacf16 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -3642,9 +3642,6 @@ int usb_port_resume(struct usb_device *udev, pm_message_t msg) * sequence. */ status = hub_port_status(hub, port1, &portstatus, &portchange); - - /* TRSMRCY = 10 msec */ - msleep(10); } SuspendCleared: @@ -3659,6 +3656,9 @@ int usb_port_resume(struct usb_device *udev, pm_message_t msg) usb_clear_port_feature(hub->hdev, port1, USB_PORT_FEAT_C_SUSPEND); } + + /* TRSMRCY = 10 msec */ + msleep(10); } if (udev->persist_enabled) diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index da5ac4a4595b..ab6b815e0089 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -113,6 +113,7 @@ struct dwc2_hsotg_req; * @debugfs: File entry for debugfs file for this endpoint. * @dir_in: Set to true if this endpoint is of the IN direction, which * means that it is sending data to the Host. + * @map_dir: Set to the value of dir_in when the DMA buffer is mapped. * @index: The index for the endpoint registers. * @mc: Multi Count - number of transactions per microframe * @interval: Interval for periodic endpoints, in frames or microframes. @@ -162,6 +163,7 @@ struct dwc2_hsotg_ep { unsigned short fifo_index; unsigned char dir_in; + unsigned char map_dir; unsigned char index; unsigned char mc; u16 interval; diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index e6bb1bdb2760..184964174dc0 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -422,7 +422,7 @@ static void dwc2_hsotg_unmap_dma(struct dwc2_hsotg *hsotg, { struct usb_request *req = &hs_req->req; - usb_gadget_unmap_request(&hsotg->gadget, req, hs_ep->dir_in); + usb_gadget_unmap_request(&hsotg->gadget, req, hs_ep->map_dir); } /* @@ -1242,6 +1242,7 @@ static int dwc2_hsotg_map_dma(struct dwc2_hsotg *hsotg, { int ret; + hs_ep->map_dir = hs_ep->dir_in; ret = usb_gadget_map_request(&hsotg->gadget, req, hs_ep->dir_in); if (ret) goto dma_error; diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index 3024785d84cb..520a0beef77c 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -776,7 +776,3 @@ static struct platform_driver dwc2_platform_driver = { }; module_platform_driver(dwc2_platform_driver); - -MODULE_DESCRIPTION("DESIGNWARE HS OTG Platform Glue"); -MODULE_AUTHOR("Matthijs Kooijman <matthijs@stdin.nl>"); -MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index b1e875c58f20..c5d5760cdf53 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -57,7 +57,7 @@ #define DWC3_DEVICE_EVENT_LINK_STATUS_CHANGE 3 #define DWC3_DEVICE_EVENT_WAKEUP 4 #define DWC3_DEVICE_EVENT_HIBER_REQ 5 -#define DWC3_DEVICE_EVENT_EOPF 6 +#define DWC3_DEVICE_EVENT_SUSPEND 6 #define DWC3_DEVICE_EVENT_SOF 7 #define DWC3_DEVICE_EVENT_ERRATIC_ERROR 9 #define DWC3_DEVICE_EVENT_CMD_CMPL 10 @@ -460,7 +460,7 @@ #define DWC3_DEVTEN_CMDCMPLTEN BIT(10) #define DWC3_DEVTEN_ERRTICERREN BIT(9) #define DWC3_DEVTEN_SOFEN BIT(7) -#define DWC3_DEVTEN_EOPFEN BIT(6) +#define DWC3_DEVTEN_U3L2L1SUSPEN BIT(6) #define DWC3_DEVTEN_HIBERNATIONREQEVTEN BIT(5) #define DWC3_DEVTEN_WKUPEVTEN BIT(4) #define DWC3_DEVTEN_ULSTCNGEN BIT(3) @@ -850,6 +850,7 @@ struct dwc3_trb { * @hwparams6: GHWPARAMS6 * @hwparams7: GHWPARAMS7 * @hwparams8: GHWPARAMS8 + * @hwparams9: GHWPARAMS9 */ struct dwc3_hwparams { u32 hwparams0; @@ -1374,7 +1375,7 @@ struct dwc3_event_depevt { * 3 - ULStChng * 4 - WkUpEvt * 5 - Reserved - * 6 - EOPF + * 6 - Suspend (EOPF on revisions 2.10a and prior) * 7 - SOF * 8 - Reserved * 9 - ErrticErr diff --git a/drivers/usb/dwc3/debug.h b/drivers/usb/dwc3/debug.h index db231de46bb3..d0ac89c5b317 100644 --- a/drivers/usb/dwc3/debug.h +++ b/drivers/usb/dwc3/debug.h @@ -221,8 +221,8 @@ static inline const char *dwc3_gadget_event_string(char *str, size_t size, snprintf(str, size, "WakeUp [%s]", dwc3_gadget_link_string(state)); break; - case DWC3_DEVICE_EVENT_EOPF: - snprintf(str, size, "End-Of-Frame [%s]", + case DWC3_DEVICE_EVENT_SUSPEND: + snprintf(str, size, "Suspend [%s]", dwc3_gadget_link_string(state)); break; case DWC3_DEVICE_EVENT_SOF: @@ -353,8 +353,8 @@ static inline const char *dwc3_gadget_event_type_string(u8 event) return "Wake-Up"; case DWC3_DEVICE_EVENT_HIBER_REQ: return "Hibernation"; - case DWC3_DEVICE_EVENT_EOPF: - return "End of Periodic Frame"; + case DWC3_DEVICE_EVENT_SUSPEND: + return "Suspend"; case DWC3_DEVICE_EVENT_SOF: return "Start of Frame"; case DWC3_DEVICE_EVENT_ERRATIC_ERROR: diff --git a/drivers/usb/dwc3/dwc3-imx8mp.c b/drivers/usb/dwc3/dwc3-imx8mp.c index b13cfab89d53..756faa46d33a 100644 --- a/drivers/usb/dwc3/dwc3-imx8mp.c +++ b/drivers/usb/dwc3/dwc3-imx8mp.c @@ -165,8 +165,9 @@ static int dwc3_imx8mp_probe(struct platform_device *pdev) if (err < 0) goto disable_rpm; - dwc3_np = of_get_child_by_name(node, "dwc3"); + dwc3_np = of_get_compatible_child(node, "snps,dwc3"); if (!dwc3_np) { + err = -ENODEV; dev_err(dev, "failed to find dwc3 core child\n"); goto disable_rpm; } diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c index 3db17806e92e..e196673f5c64 100644 --- a/drivers/usb/dwc3/dwc3-omap.c +++ b/drivers/usb/dwc3/dwc3-omap.c @@ -437,8 +437,13 @@ static int dwc3_omap_extcon_register(struct dwc3_omap *omap) if (extcon_get_state(edev, EXTCON_USB) == true) dwc3_omap_set_mailbox(omap, OMAP_DWC3_VBUS_VALID); + else + dwc3_omap_set_mailbox(omap, OMAP_DWC3_VBUS_OFF); + if (extcon_get_state(edev, EXTCON_USB_HOST) == true) dwc3_omap_set_mailbox(omap, OMAP_DWC3_ID_GROUND); + else + dwc3_omap_set_mailbox(omap, OMAP_DWC3_ID_FLOAT); omap->edev = edev; } diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index e7b932dcbf82..1e51460938b8 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -123,6 +123,7 @@ static const struct property_entry dwc3_pci_mrfld_properties[] = { PROPERTY_ENTRY_STRING("linux,extcon-name", "mrfld_bcove_pwrsrc"), PROPERTY_ENTRY_BOOL("snps,dis_u3_susphy_quirk"), PROPERTY_ENTRY_BOOL("snps,dis_u2_susphy_quirk"), + PROPERTY_ENTRY_BOOL("snps,usb2-gadget-lpm-disable"), PROPERTY_ENTRY_BOOL("linux,sysdev_is_parent"), {} }; diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index dd80e5ca8c78..49ca5da5e279 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1684,7 +1684,9 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req) } } - return __dwc3_gadget_kick_transfer(dep); + __dwc3_gadget_kick_transfer(dep); + + return 0; } static int dwc3_gadget_ep_queue(struct usb_ep *ep, struct usb_request *request, @@ -2323,6 +2325,10 @@ static void dwc3_gadget_enable_irq(struct dwc3 *dwc) if (DWC3_VER_IS_PRIOR(DWC3, 250A)) reg |= DWC3_DEVTEN_ULSTCNGEN; + /* On 2.30a and above this bit enables U3/L2-L1 Suspend Events */ + if (!DWC3_VER_IS_PRIOR(DWC3, 230A)) + reg |= DWC3_DEVTEN_U3L2L1SUSPEN; + dwc3_writel(dwc->regs, DWC3_DEVTEN, reg); } @@ -3740,7 +3746,7 @@ static void dwc3_gadget_interrupt(struct dwc3 *dwc, case DWC3_DEVICE_EVENT_LINK_STATUS_CHANGE: dwc3_gadget_linksts_change_interrupt(dwc, event->event_info); break; - case DWC3_DEVICE_EVENT_EOPF: + case DWC3_DEVICE_EVENT_SUSPEND: /* It changed to be suspend event for version 2.30a and above */ if (!DWC3_VER_IS_PRIOR(DWC3, 230A)) { /* @@ -4058,8 +4064,9 @@ err0: void dwc3_gadget_exit(struct dwc3 *dwc) { - usb_del_gadget_udc(dwc->gadget); + usb_del_gadget(dwc->gadget); dwc3_gadget_free_endpoints(dwc); + usb_put_gadget(dwc->gadget); dma_free_coherent(dwc->sysdev, DWC3_BOUNCE_SIZE, dwc->bounce, dwc->bounce_addr); kfree(dwc->setup_buf); diff --git a/drivers/usb/host/fotg210-hcd.c b/drivers/usb/host/fotg210-hcd.c index 6cac642520fc..9c2eda0918e1 100644 --- a/drivers/usb/host/fotg210-hcd.c +++ b/drivers/usb/host/fotg210-hcd.c @@ -5568,7 +5568,7 @@ static int fotg210_hcd_probe(struct platform_device *pdev) struct usb_hcd *hcd; struct resource *res; int irq; - int retval = -ENODEV; + int retval; struct fotg210_hcd *fotg210; if (usb_disabled()) @@ -5588,7 +5588,7 @@ static int fotg210_hcd_probe(struct platform_device *pdev) hcd = usb_create_hcd(&fotg210_fotg210_hc_driver, dev, dev_name(dev)); if (!hcd) { - dev_err(dev, "failed to create hcd with err %d\n", retval); + dev_err(dev, "failed to create hcd\n"); retval = -ENOMEM; goto fail_create_hcd; } diff --git a/drivers/usb/host/xhci-ext-caps.h b/drivers/usb/host/xhci-ext-caps.h index fa59b242cd51..e8af0a125f84 100644 --- a/drivers/usb/host/xhci-ext-caps.h +++ b/drivers/usb/host/xhci-ext-caps.h @@ -7,8 +7,9 @@ * Author: Sarah Sharp * Some code borrowed from the Linux EHCI driver. */ -/* Up to 16 ms to halt an HC */ -#define XHCI_MAX_HALT_USEC (16*1000) + +/* HC should halt within 16 ms, but use 32 ms as some hosts take longer */ +#define XHCI_MAX_HALT_USEC (32 * 1000) /* HC not running - set to 1 when run/stop bit is cleared. */ #define XHCI_STS_HALT (1<<0) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 5bbccc9a0179..7bc18cf8042c 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -57,6 +57,7 @@ #define PCI_DEVICE_ID_INTEL_CML_XHCI 0xa3af #define PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI 0x9a13 #define PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI 0x1138 +#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI 0x461e #define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9 #define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba @@ -166,8 +167,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) (pdev->device == 0x15e0 || pdev->device == 0x15e1)) xhci->quirks |= XHCI_SNPS_BROKEN_SUSPEND; - if (pdev->vendor == PCI_VENDOR_ID_AMD && pdev->device == 0x15e5) + if (pdev->vendor == PCI_VENDOR_ID_AMD && pdev->device == 0x15e5) { xhci->quirks |= XHCI_DISABLE_SPARSE; + xhci->quirks |= XHCI_RESET_ON_RESUME; + } if (pdev->vendor == PCI_VENDOR_ID_AMD) xhci->quirks |= XHCI_TRUST_TX_LENGTH; @@ -243,7 +246,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_INTEL_TITAN_RIDGE_DD_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_ICE_LAKE_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI)) + pdev->device == PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI)) xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; if (pdev->vendor == PCI_VENDOR_ID_ETRON && diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 05c38dd3ee36..a8e4189277da 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -862,7 +862,7 @@ done: return ret; } -static void xhci_handle_halted_endpoint(struct xhci_hcd *xhci, +static int xhci_handle_halted_endpoint(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, unsigned int stream_id, struct xhci_td *td, enum xhci_ep_reset_type reset_type) @@ -875,7 +875,7 @@ static void xhci_handle_halted_endpoint(struct xhci_hcd *xhci, * Device will be reset soon to recover the link so don't do anything */ if (ep->vdev->flags & VDEV_PORT_ERROR) - return; + return -ENODEV; /* add td to cancelled list and let reset ep handler take care of it */ if (reset_type == EP_HARD_RESET) { @@ -888,16 +888,18 @@ static void xhci_handle_halted_endpoint(struct xhci_hcd *xhci, if (ep->ep_state & EP_HALTED) { xhci_dbg(xhci, "Reset ep command already pending\n"); - return; + return 0; } err = xhci_reset_halted_ep(xhci, slot_id, ep->ep_index, reset_type); if (err) - return; + return err; ep->ep_state |= EP_HALTED; xhci_ring_cmd_db(xhci); + + return 0; } /* @@ -1014,6 +1016,7 @@ static void xhci_handle_cmd_stop_ep(struct xhci_hcd *xhci, int slot_id, struct xhci_td *td = NULL; enum xhci_ep_reset_type reset_type; struct xhci_command *command; + int err; if (unlikely(TRB_TO_SUSPEND_PORT(le32_to_cpu(trb->generic.field[3])))) { if (!xhci->devs[slot_id]) @@ -1058,7 +1061,10 @@ static void xhci_handle_cmd_stop_ep(struct xhci_hcd *xhci, int slot_id, td->status = -EPROTO; } /* reset ep, reset handler cleans up cancelled tds */ - xhci_handle_halted_endpoint(xhci, ep, 0, td, reset_type); + err = xhci_handle_halted_endpoint(xhci, ep, 0, td, + reset_type); + if (err) + break; xhci_stop_watchdog_timer_in_irq(xhci, ep); return; case EP_STATE_RUNNING: diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index ca9385d22f68..27283654ca08 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1514,7 +1514,7 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci, * we need to issue an evaluate context command and wait on it. */ static int xhci_check_maxpacket(struct xhci_hcd *xhci, unsigned int slot_id, - unsigned int ep_index, struct urb *urb) + unsigned int ep_index, struct urb *urb, gfp_t mem_flags) { struct xhci_container_ctx *out_ctx; struct xhci_input_control_ctx *ctrl_ctx; @@ -1545,7 +1545,7 @@ static int xhci_check_maxpacket(struct xhci_hcd *xhci, unsigned int slot_id, * changes max packet sizes. */ - command = xhci_alloc_command(xhci, true, GFP_KERNEL); + command = xhci_alloc_command(xhci, true, mem_flags); if (!command) return -ENOMEM; @@ -1639,7 +1639,7 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag */ if (urb->dev->speed == USB_SPEED_FULL) { ret = xhci_check_maxpacket(xhci, slot_id, - ep_index, urb); + ep_index, urb, mem_flags); if (ret < 0) { xhci_urb_free_priv(urb_priv); urb->hcpriv = NULL; diff --git a/drivers/usb/musb/mediatek.c b/drivers/usb/musb/mediatek.c index eebeadd26946..6b92d037d8fc 100644 --- a/drivers/usb/musb/mediatek.c +++ b/drivers/usb/musb/mediatek.c @@ -518,8 +518,8 @@ static int mtk_musb_probe(struct platform_device *pdev) glue->xceiv = devm_usb_get_phy(dev, USB_PHY_TYPE_USB2); if (IS_ERR(glue->xceiv)) { - dev_err(dev, "fail to getting usb-phy %d\n", ret); ret = PTR_ERR(glue->xceiv); + dev_err(dev, "fail to getting usb-phy %d\n", ret); goto err_unregister_usb_phy; } diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index c4fdc00a3bc8..64133e586c64 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -259,6 +259,7 @@ enum frs_typec_current { #define ALTMODE_DISCOVERY_MAX (SVID_DISCOVERY_MAX * MODE_DISCOVERY_MAX) #define GET_SINK_CAP_RETRY_MS 100 +#define SEND_DISCOVER_RETRY_MS 100 struct pd_mode_data { int svid_index; /* current SVID index */ @@ -366,6 +367,8 @@ struct tcpm_port { struct kthread_work vdm_state_machine; struct hrtimer enable_frs_timer; struct kthread_work enable_frs; + struct hrtimer send_discover_timer; + struct kthread_work send_discover_work; bool state_machine_running; bool vdm_sm_running; @@ -1178,6 +1181,16 @@ static void mod_enable_frs_delayed_work(struct tcpm_port *port, unsigned int del } } +static void mod_send_discover_delayed_work(struct tcpm_port *port, unsigned int delay_ms) +{ + if (delay_ms) { + hrtimer_start(&port->send_discover_timer, ms_to_ktime(delay_ms), HRTIMER_MODE_REL); + } else { + hrtimer_cancel(&port->send_discover_timer); + kthread_queue_work(port->wq, &port->send_discover_work); + } +} + static void tcpm_set_state(struct tcpm_port *port, enum tcpm_state state, unsigned int delay_ms) { @@ -1855,6 +1868,9 @@ static void vdm_run_state_machine(struct tcpm_port *port) res = tcpm_ams_start(port, DISCOVER_IDENTITY); if (res == 0) port->send_discover = false; + else if (res == -EAGAIN) + mod_send_discover_delayed_work(port, + SEND_DISCOVER_RETRY_MS); break; case CMD_DISCOVER_SVID: res = tcpm_ams_start(port, DISCOVER_SVIDS); @@ -1880,7 +1896,7 @@ static void vdm_run_state_machine(struct tcpm_port *port) } if (res < 0) { - port->vdm_sm_running = false; + port->vdm_state = VDM_STATE_ERR_BUSY; return; } } @@ -1896,6 +1912,7 @@ static void vdm_run_state_machine(struct tcpm_port *port) port->vdo_data[0] = port->vdo_retry; port->vdo_count = 1; port->vdm_state = VDM_STATE_READY; + tcpm_ams_finish(port); break; case VDM_STATE_BUSY: port->vdm_state = VDM_STATE_ERR_TMOUT; @@ -1961,7 +1978,7 @@ static void vdm_state_machine_work(struct kthread_work *work) port->vdm_state != VDM_STATE_BUSY && port->vdm_state != VDM_STATE_SEND_MESSAGE); - if (port->vdm_state == VDM_STATE_ERR_TMOUT) + if (port->vdm_state < VDM_STATE_READY) port->vdm_sm_running = false; mutex_unlock(&port->lock); @@ -2390,7 +2407,7 @@ static void tcpm_pd_data_request(struct tcpm_port *port, port->nr_sink_caps = cnt; port->sink_cap_done = true; if (port->ams == GET_SINK_CAPABILITIES) - tcpm_pd_handle_state(port, ready_state(port), NONE_AMS, 0); + tcpm_set_state(port, ready_state(port), 0); /* Unexpected Sink Capabilities */ else tcpm_pd_handle_msg(port, @@ -2552,6 +2569,16 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port, port->sink_cap_done = true; tcpm_set_state(port, ready_state(port), 0); break; + case SRC_READY: + case SNK_READY: + if (port->vdm_state > VDM_STATE_READY) { + port->vdm_state = VDM_STATE_DONE; + if (tcpm_vdm_ams(port)) + tcpm_ams_finish(port); + mod_vdm_delayed_work(port, 0); + break; + } + fallthrough; default: tcpm_pd_handle_state(port, port->pwr_role == TYPEC_SOURCE ? @@ -3682,14 +3709,6 @@ static inline enum tcpm_state unattached_state(struct tcpm_port *port) return SNK_UNATTACHED; } -static void tcpm_check_send_discover(struct tcpm_port *port) -{ - if ((port->data_role == TYPEC_HOST || port->negotiated_rev > PD_REV20) && - port->send_discover && port->pd_capable) - tcpm_send_vdm(port, USB_SID_PD, CMD_DISCOVER_IDENT, NULL, 0); - port->send_discover = false; -} - static void tcpm_swap_complete(struct tcpm_port *port, int result) { if (port->swap_pending) { @@ -3926,7 +3945,18 @@ static void run_state_machine(struct tcpm_port *port) break; } - tcpm_check_send_discover(port); + /* + * 6.4.4.3.1 Discover Identity + * "The Discover Identity Command Shall only be sent to SOP when there is an + * Explicit Contract." + * For now, this driver only supports SOP for DISCOVER_IDENTITY, thus using + * port->explicit_contract to decide whether to send the command. + */ + if (port->explicit_contract) + mod_send_discover_delayed_work(port, 0); + else + port->send_discover = false; + /* * 6.3.5 * Sending ping messages is not necessary if @@ -4055,7 +4085,7 @@ static void run_state_machine(struct tcpm_port *port) if (port->vbus_present) { u32 current_lim = tcpm_get_current_limit(port); - if (port->slow_charger_loop || (current_lim > PD_P_SNK_STDBY_MW / 5)) + if (port->slow_charger_loop && (current_lim > PD_P_SNK_STDBY_MW / 5)) current_lim = PD_P_SNK_STDBY_MW / 5; tcpm_set_current_limit(port, current_lim, 5000); tcpm_set_charge(port, true); @@ -4194,7 +4224,18 @@ static void run_state_machine(struct tcpm_port *port) break; } - tcpm_check_send_discover(port); + /* + * 6.4.4.3.1 Discover Identity + * "The Discover Identity Command Shall only be sent to SOP when there is an + * Explicit Contract." + * For now, this driver only supports SOP for DISCOVER_IDENTITY, thus using + * port->explicit_contract. + */ + if (port->explicit_contract) + mod_send_discover_delayed_work(port, 0); + else + port->send_discover = false; + power_supply_changed(port->psy); break; @@ -5288,6 +5329,29 @@ unlock: mutex_unlock(&port->lock); } +static void tcpm_send_discover_work(struct kthread_work *work) +{ + struct tcpm_port *port = container_of(work, struct tcpm_port, send_discover_work); + + mutex_lock(&port->lock); + /* No need to send DISCOVER_IDENTITY anymore */ + if (!port->send_discover) + goto unlock; + + /* Retry if the port is not idle */ + if ((port->state != SRC_READY && port->state != SNK_READY) || port->vdm_sm_running) { + mod_send_discover_delayed_work(port, SEND_DISCOVER_RETRY_MS); + goto unlock; + } + + /* Only send the Message if the port is host for PD rev2.0 */ + if (port->data_role == TYPEC_HOST || port->negotiated_rev > PD_REV20) + tcpm_send_vdm(port, USB_SID_PD, CMD_DISCOVER_IDENT, NULL, 0); + +unlock: + mutex_unlock(&port->lock); +} + static int tcpm_dr_set(struct typec_port *p, enum typec_data_role data) { struct tcpm_port *port = typec_get_drvdata(p); @@ -5754,6 +5818,15 @@ static int tcpm_fw_get_caps(struct tcpm_port *port, if (!fwnode) return -EINVAL; + /* + * This fwnode has a "compatible" property, but is never populated as a + * struct device. Instead we simply parse it to read the properties. + * This it breaks fw_devlink=on. To maintain backward compatibility + * with existing DT files, we work around this by deleting any + * fwnode_links to/from this fwnode. + */ + fw_devlink_purge_absent_suppliers(fwnode); + /* USB data support is optional */ ret = fwnode_property_read_string(fwnode, "data-role", &cap_str); if (ret == 0) { @@ -6093,6 +6166,14 @@ static enum hrtimer_restart enable_frs_timer_handler(struct hrtimer *timer) return HRTIMER_NORESTART; } +static enum hrtimer_restart send_discover_timer_handler(struct hrtimer *timer) +{ + struct tcpm_port *port = container_of(timer, struct tcpm_port, send_discover_timer); + + kthread_queue_work(port->wq, &port->send_discover_work); + return HRTIMER_NORESTART; +} + struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc) { struct tcpm_port *port; @@ -6123,12 +6204,15 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc) kthread_init_work(&port->vdm_state_machine, vdm_state_machine_work); kthread_init_work(&port->event_work, tcpm_pd_event_handler); kthread_init_work(&port->enable_frs, tcpm_enable_frs_work); + kthread_init_work(&port->send_discover_work, tcpm_send_discover_work); hrtimer_init(&port->state_machine_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); port->state_machine_timer.function = state_machine_timer_handler; hrtimer_init(&port->vdm_state_machine_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); port->vdm_state_machine_timer.function = vdm_state_machine_timer_handler; hrtimer_init(&port->enable_frs_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); port->enable_frs_timer.function = enable_frs_timer_handler; + hrtimer_init(&port->send_discover_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + port->send_discover_timer.function = send_discover_timer_handler; spin_lock_init(&port->pd_event_lock); diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 282c3c825c13..1d8b7df59ff4 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -495,7 +495,8 @@ static void ucsi_unregister_altmodes(struct ucsi_connector *con, u8 recipient) } } -static void ucsi_get_pdos(struct ucsi_connector *con, int is_partner) +static int ucsi_get_pdos(struct ucsi_connector *con, int is_partner, + u32 *pdos, int offset, int num_pdos) { struct ucsi *ucsi = con->ucsi; u64 command; @@ -503,17 +504,39 @@ static void ucsi_get_pdos(struct ucsi_connector *con, int is_partner) command = UCSI_COMMAND(UCSI_GET_PDOS) | UCSI_CONNECTOR_NUMBER(con->num); command |= UCSI_GET_PDOS_PARTNER_PDO(is_partner); - command |= UCSI_GET_PDOS_NUM_PDOS(UCSI_MAX_PDOS - 1); + command |= UCSI_GET_PDOS_PDO_OFFSET(offset); + command |= UCSI_GET_PDOS_NUM_PDOS(num_pdos - 1); command |= UCSI_GET_PDOS_SRC_PDOS; - ret = ucsi_send_command(ucsi, command, con->src_pdos, - sizeof(con->src_pdos)); - if (ret < 0) { + ret = ucsi_send_command(ucsi, command, pdos + offset, + num_pdos * sizeof(u32)); + if (ret < 0) dev_err(ucsi->dev, "UCSI_GET_PDOS failed (%d)\n", ret); + if (ret == 0 && offset == 0) + dev_warn(ucsi->dev, "UCSI_GET_PDOS returned 0 bytes\n"); + + return ret; +} + +static void ucsi_get_src_pdos(struct ucsi_connector *con, int is_partner) +{ + int ret; + + /* UCSI max payload means only getting at most 4 PDOs at a time */ + ret = ucsi_get_pdos(con, 1, con->src_pdos, 0, UCSI_MAX_PDOS); + if (ret < 0) return; - } + con->num_pdos = ret / sizeof(u32); /* number of bytes to 32-bit PDOs */ - if (ret == 0) - dev_warn(ucsi->dev, "UCSI_GET_PDOS returned 0 bytes\n"); + if (con->num_pdos < UCSI_MAX_PDOS) + return; + + /* get the remaining PDOs, if any */ + ret = ucsi_get_pdos(con, 1, con->src_pdos, UCSI_MAX_PDOS, + PDO_MAX_OBJECTS - UCSI_MAX_PDOS); + if (ret < 0) + return; + + con->num_pdos += ret / sizeof(u32); } static void ucsi_pwr_opmode_change(struct ucsi_connector *con) @@ -522,7 +545,7 @@ static void ucsi_pwr_opmode_change(struct ucsi_connector *con) case UCSI_CONSTAT_PWR_OPMODE_PD: con->rdo = con->status.request_data_obj; typec_set_pwr_opmode(con->port, TYPEC_PWR_MODE_PD); - ucsi_get_pdos(con, 1); + ucsi_get_src_pdos(con, 1); break; case UCSI_CONSTAT_PWR_OPMODE_TYPEC1_5: con->rdo = 0; @@ -999,6 +1022,7 @@ static const struct typec_operations ucsi_ops = { .pr_set = ucsi_pr_swap }; +/* Caller must call fwnode_handle_put() after use */ static struct fwnode_handle *ucsi_find_fwnode(struct ucsi_connector *con) { struct fwnode_handle *fwnode; @@ -1033,7 +1057,7 @@ static int ucsi_register_port(struct ucsi *ucsi, int index) command |= UCSI_CONNECTOR_NUMBER(con->num); ret = ucsi_send_command(ucsi, command, &con->cap, sizeof(con->cap)); if (ret < 0) - goto out; + goto out_unlock; if (con->cap.op_mode & UCSI_CONCAP_OPMODE_DRP) cap->data = TYPEC_PORT_DRD; @@ -1151,6 +1175,8 @@ static int ucsi_register_port(struct ucsi *ucsi, int index) trace_ucsi_register_port(con->num, &con->status); out: + fwnode_handle_put(cap->fwnode); +out_unlock: mutex_unlock(&con->lock); return ret; } diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index 3920e20a9e9e..cee666790907 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -8,6 +8,7 @@ #include <linux/power_supply.h> #include <linux/types.h> #include <linux/usb/typec.h> +#include <linux/usb/pd.h> #include <linux/usb/role.h> /* -------------------------------------------------------------------------- */ @@ -134,7 +135,9 @@ void ucsi_connector_change(struct ucsi *ucsi, u8 num); /* GET_PDOS command bits */ #define UCSI_GET_PDOS_PARTNER_PDO(_r_) ((u64)(_r_) << 23) +#define UCSI_GET_PDOS_PDO_OFFSET(_r_) ((u64)(_r_) << 24) #define UCSI_GET_PDOS_NUM_PDOS(_r_) ((u64)(_r_) << 32) +#define UCSI_MAX_PDOS (4) #define UCSI_GET_PDOS_SRC_PDOS ((u64)1 << 34) /* -------------------------------------------------------------------------- */ @@ -302,7 +305,6 @@ struct ucsi { #define UCSI_MAX_SVID 5 #define UCSI_MAX_ALTMODES (UCSI_MAX_SVID * 6) -#define UCSI_MAX_PDOS (4) #define UCSI_TYPEC_VSAFE5V 5000 #define UCSI_TYPEC_1_5_CURRENT 1500 @@ -330,7 +332,7 @@ struct ucsi_connector { struct power_supply *psy; struct power_supply_desc psy_desc; u32 rdo; - u32 src_pdos[UCSI_MAX_PDOS]; + u32 src_pdos[PDO_MAX_OBJECTS]; int num_pdos; struct usb_role_switch *usb_role_sw; diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index 39258f9d36a0..ef9c57ce0906 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -380,7 +380,7 @@ static void vgacon_init(struct vc_data *c, int init) vc_resize(c, vga_video_num_columns, vga_video_num_lines); c->vc_scan_lines = vga_scan_lines; - c->vc_font.height = vga_video_font_height; + c->vc_font.height = c->vc_cell_height = vga_video_font_height; c->vc_complement_mask = 0x7700; if (vga_512_chars) c->vc_hi_font_mask = 0x0800; @@ -515,32 +515,32 @@ static void vgacon_cursor(struct vc_data *c, int mode) switch (CUR_SIZE(c->vc_cursor_type)) { case CUR_UNDERLINE: vgacon_set_cursor_size(c->state.x, - c->vc_font.height - - (c->vc_font.height < + c->vc_cell_height - + (c->vc_cell_height < 10 ? 2 : 3), - c->vc_font.height - - (c->vc_font.height < + c->vc_cell_height - + (c->vc_cell_height < 10 ? 1 : 2)); break; case CUR_TWO_THIRDS: vgacon_set_cursor_size(c->state.x, - c->vc_font.height / 3, - c->vc_font.height - - (c->vc_font.height < + c->vc_cell_height / 3, + c->vc_cell_height - + (c->vc_cell_height < 10 ? 1 : 2)); break; case CUR_LOWER_THIRD: vgacon_set_cursor_size(c->state.x, - (c->vc_font.height * 2) / 3, - c->vc_font.height - - (c->vc_font.height < + (c->vc_cell_height * 2) / 3, + c->vc_cell_height - + (c->vc_cell_height < 10 ? 1 : 2)); break; case CUR_LOWER_HALF: vgacon_set_cursor_size(c->state.x, - c->vc_font.height / 2, - c->vc_font.height - - (c->vc_font.height < + c->vc_cell_height / 2, + c->vc_cell_height - + (c->vc_cell_height < 10 ? 1 : 2)); break; case CUR_NONE: @@ -551,7 +551,7 @@ static void vgacon_cursor(struct vc_data *c, int mode) break; default: vgacon_set_cursor_size(c->state.x, 1, - c->vc_font.height); + c->vc_cell_height); break; } break; @@ -562,13 +562,13 @@ static int vgacon_doresize(struct vc_data *c, unsigned int width, unsigned int height) { unsigned long flags; - unsigned int scanlines = height * c->vc_font.height; + unsigned int scanlines = height * c->vc_cell_height; u8 scanlines_lo = 0, r7 = 0, vsync_end = 0, mode, max_scan; raw_spin_lock_irqsave(&vga_lock, flags); vgacon_xres = width * VGA_FONTWIDTH; - vgacon_yres = height * c->vc_font.height; + vgacon_yres = height * c->vc_cell_height; if (vga_video_type >= VIDEO_TYPE_VGAC) { outb_p(VGA_CRTC_MAX_SCAN, vga_video_port_reg); max_scan = inb_p(vga_video_port_val); @@ -623,9 +623,9 @@ static int vgacon_doresize(struct vc_data *c, static int vgacon_switch(struct vc_data *c) { int x = c->vc_cols * VGA_FONTWIDTH; - int y = c->vc_rows * c->vc_font.height; + int y = c->vc_rows * c->vc_cell_height; int rows = screen_info.orig_video_lines * vga_default_font_height/ - c->vc_font.height; + c->vc_cell_height; /* * We need to save screen size here as it's the only way * we can spot the screen has been resized and we need to @@ -1038,7 +1038,7 @@ static int vgacon_adjust_height(struct vc_data *vc, unsigned fontheight) cursor_size_lastto = 0; c->vc_sw->con_cursor(c, CM_DRAW); } - c->vc_font.height = fontheight; + c->vc_font.height = c->vc_cell_height = fontheight; vc_resize(c, 0, rows); /* Adjust console size */ } } @@ -1086,12 +1086,20 @@ static int vgacon_resize(struct vc_data *c, unsigned int width, if ((width << 1) * height > vga_vram_size) return -EINVAL; + if (user) { + /* + * Ho ho! Someone (svgatextmode, eh?) may have reprogrammed + * the video mode! Set the new defaults then and go away. + */ + screen_info.orig_video_cols = width; + screen_info.orig_video_lines = height; + vga_default_font_height = c->vc_cell_height; + return 0; + } if (width % 2 || width > screen_info.orig_video_cols || height > (screen_info.orig_video_lines * vga_default_font_height)/ - c->vc_font.height) - /* let svgatextmode tinker with video timings and - return success */ - return (user) ? 0 : -EINVAL; + c->vc_cell_height) + return -EINVAL; if (con_is_visible(c) && !vga_is_gfx) /* who knows */ vgacon_doresize(c, width, height); diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 3406067985b1..22bb3892f6bd 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2019,7 +2019,7 @@ static int fbcon_resize(struct vc_data *vc, unsigned int width, return -EINVAL; pr_debug("resize now %ix%i\n", var.xres, var.yres); - if (con_is_visible(vc)) { + if (con_is_visible(vc) && vc->vc_mode == KD_TEXT) { var.activate = FB_ACTIVATE_NOW | FB_ACTIVATE_FORCE; fb_set_var(info, &var); diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index f01d58c7a042..a3e7be96527d 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -1017,8 +1017,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) err = mmu_interval_notifier_insert_locked( &map->notifier, vma->vm_mm, vma->vm_start, vma->vm_end - vma->vm_start, &gntdev_mmu_ops); - if (err) + if (err) { + map->vma = NULL; goto out_unlock_put; + } } mutex_unlock(&priv->lock); diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 4c89afc0df62..24d11861ac7d 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -164,6 +164,11 @@ int __ref xen_swiotlb_init(void) int rc = -ENOMEM; char *start; + if (io_tlb_default_mem != NULL) { + pr_warn("swiotlb buffer already initialized\n"); + return -EEXIST; + } + retry: m_ret = XEN_SWIOTLB_ENOMEM; order = get_order(bytes); diff --git a/drivers/xen/unpopulated-alloc.c b/drivers/xen/unpopulated-alloc.c index e64e6befc63b..87e6b7db892f 100644 --- a/drivers/xen/unpopulated-alloc.c +++ b/drivers/xen/unpopulated-alloc.c @@ -39,8 +39,10 @@ static int fill_list(unsigned int nr_pages) } pgmap = kzalloc(sizeof(*pgmap), GFP_KERNEL); - if (!pgmap) + if (!pgmap) { + ret = -ENOMEM; goto err_pgmap; + } pgmap->type = MEMORY_DEVICE_GENERIC; pgmap->range = (struct range) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f83fd3cbf243..9fb76829a281 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3127,7 +3127,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, u64 new_size, u32 min_type); -int btrfs_start_delalloc_snapshot(struct btrfs_root *root); +int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context); int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr, bool in_reclaim_context); int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7a28314189b4..f1d15b68994a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1340,12 +1340,16 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, stripe = bbio->stripes; for (i = 0; i < bbio->num_stripes; i++, stripe++) { u64 bytes; + struct btrfs_device *device = stripe->dev; - if (!stripe->dev->bdev) { + if (!device->bdev) { ASSERT(btrfs_test_opt(fs_info, DEGRADED)); continue; } + if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) + continue; + ret = do_discard_extent(stripe, &bytes); if (!ret) { discarded_bytes += bytes; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 864c08d08a35..3b10d98b4ebb 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2067,6 +2067,30 @@ static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end) return ret; } +static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx) +{ + struct btrfs_inode *inode = BTRFS_I(ctx->inode); + struct btrfs_fs_info *fs_info = inode->root->fs_info; + + if (btrfs_inode_in_log(inode, fs_info->generation) && + list_empty(&ctx->ordered_extents)) + return true; + + /* + * If we are doing a fast fsync we can not bail out if the inode's + * last_trans is <= then the last committed transaction, because we only + * update the last_trans of the inode during ordered extent completion, + * and for a fast fsync we don't wait for that, we only wait for the + * writeback to complete. + */ + if (inode->last_trans <= fs_info->last_trans_committed && + (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) || + list_empty(&ctx->ordered_extents))) + return true; + + return false; +} + /* * fsync call for both files and directories. This logs the inode into * the tree log instead of forcing full commits whenever possible. @@ -2185,17 +2209,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) atomic_inc(&root->log_batch); - /* - * If we are doing a fast fsync we can not bail out if the inode's - * last_trans is <= then the last committed transaction, because we only - * update the last_trans of the inode during ordered extent completion, - * and for a fast fsync we don't wait for that, we only wait for the - * writeback to complete. - */ smp_mb(); - if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) || - (BTRFS_I(inode)->last_trans <= fs_info->last_trans_committed && - (full_sync || list_empty(&ctx.ordered_extents)))) { + if (skip_inode_logging(&ctx)) { /* * We've had everything committed since the last time we were * modified so clear this flag in case it was set for whatever diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index e54466fc101f..4806295116d8 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -3949,7 +3949,7 @@ static int cleanup_free_space_cache_v1(struct btrfs_fs_info *fs_info, { struct btrfs_block_group *block_group; struct rb_node *node; - int ret; + int ret = 0; btrfs_info(fs_info, "cleaning free space cache v1"); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4af336008b12..eb6fddf40841 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9678,7 +9678,7 @@ out: return ret; } -int btrfs_start_delalloc_snapshot(struct btrfs_root *root) +int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context) { struct writeback_control wbc = { .nr_to_write = LONG_MAX, @@ -9691,7 +9691,7 @@ int btrfs_start_delalloc_snapshot(struct btrfs_root *root) if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) return -EROFS; - return start_delalloc_inodes(root, &wbc, true, false); + return start_delalloc_inodes(root, &wbc, true, in_reclaim_context); } int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ee1dbabb5d3c..5dc2fd843ae3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -259,6 +259,8 @@ int btrfs_fileattr_set(struct user_namespace *mnt_userns, if (!fa->flags_valid) { /* 1 item for the inode */ trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); goto update_flags; } @@ -907,7 +909,7 @@ static noinline int btrfs_mksnapshot(const struct path *parent, */ btrfs_drew_read_lock(&root->snapshot_lock); - ret = btrfs_start_delalloc_snapshot(root); + ret = btrfs_start_delalloc_snapshot(root, false); if (ret) goto out; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 07b0b4218791..6c413bb451a3 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -984,7 +984,7 @@ int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pre, if (pre) ret = clone_ordered_extent(ordered, 0, pre); - if (post) + if (ret == 0 && post) ret = clone_ordered_extent(ordered, pre + ordered->disk_num_bytes, post); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 2319c923c9e6..3ded812f522c 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3545,11 +3545,15 @@ static int try_flush_qgroup(struct btrfs_root *root) struct btrfs_trans_handle *trans; int ret; - /* Can't hold an open transaction or we run the risk of deadlocking */ - ASSERT(current->journal_info == NULL || - current->journal_info == BTRFS_SEND_TRANS_STUB); - if (WARN_ON(current->journal_info && - current->journal_info != BTRFS_SEND_TRANS_STUB)) + /* + * Can't hold an open transaction or we run the risk of deadlocking, + * and can't either be under the context of a send operation (where + * current->journal_info is set to BTRFS_SEND_TRANS_STUB), as that + * would result in a crash when starting a transaction and does not + * make sense either (send is a read-only operation). + */ + ASSERT(current->journal_info == NULL); + if (WARN_ON(current->journal_info)) return 0; /* @@ -3562,7 +3566,7 @@ static int try_flush_qgroup(struct btrfs_root *root) return 0; } - ret = btrfs_start_delalloc_snapshot(root); + ret = btrfs_start_delalloc_snapshot(root, true); if (ret < 0) goto out; btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 55741adf9071..bd69db72acc5 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -7170,7 +7170,7 @@ static int flush_delalloc_roots(struct send_ctx *sctx) int i; if (root) { - ret = btrfs_start_delalloc_snapshot(root); + ret = btrfs_start_delalloc_snapshot(root, false); if (ret) return ret; btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX); @@ -7178,7 +7178,7 @@ static int flush_delalloc_roots(struct send_ctx *sctx) for (i = 0; i < sctx->clone_roots_cnt; i++) { root = sctx->clone_roots[i].root; - ret = btrfs_start_delalloc_snapshot(root); + ret = btrfs_start_delalloc_snapshot(root, false); if (ret) return ret; btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f67721d82e5d..95a600034d61 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -6061,7 +6061,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, * (since logging them is pointless, a link count of 0 means they * will never be accessible). */ - if (btrfs_inode_in_log(inode, trans->transid) || + if ((btrfs_inode_in_log(inode, trans->transid) && + list_empty(&ctx->ordered_extents)) || inode->vfs_inode.i_nlink == 0) { ret = BTRFS_NO_LOG_SYNC; goto end_no_trans; diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 70b23a0d03b1..304ce64c70a4 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1126,6 +1126,11 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) goto out; } + if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) { + ret = -EIO; + goto out; + } + switch (zone.cond) { case BLK_ZONE_COND_OFFLINE: case BLK_ZONE_COND_READONLY: @@ -144,6 +144,16 @@ struct wait_exceptional_entry_queue { struct exceptional_entry_key key; }; +/** + * enum dax_wake_mode: waitqueue wakeup behaviour + * @WAKE_ALL: wake all waiters in the waitqueue + * @WAKE_NEXT: wake only the first waiter in the waitqueue + */ +enum dax_wake_mode { + WAKE_ALL, + WAKE_NEXT, +}; + static wait_queue_head_t *dax_entry_waitqueue(struct xa_state *xas, void *entry, struct exceptional_entry_key *key) { @@ -182,7 +192,8 @@ static int wake_exceptional_entry_func(wait_queue_entry_t *wait, * The important information it's conveying is whether the entry at * this index used to be a PMD entry. */ -static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all) +static void dax_wake_entry(struct xa_state *xas, void *entry, + enum dax_wake_mode mode) { struct exceptional_entry_key key; wait_queue_head_t *wq; @@ -196,7 +207,7 @@ static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all) * must be in the waitqueue and the following check will see them. */ if (waitqueue_active(wq)) - __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key); + __wake_up(wq, TASK_NORMAL, mode == WAKE_ALL ? 0 : 1, &key); } /* @@ -264,11 +275,11 @@ static void wait_entry_unlocked(struct xa_state *xas, void *entry) finish_wait(wq, &ewait.wait); } -static void put_unlocked_entry(struct xa_state *xas, void *entry) +static void put_unlocked_entry(struct xa_state *xas, void *entry, + enum dax_wake_mode mode) { - /* If we were the only waiter woken, wake the next one */ if (entry && !dax_is_conflict(entry)) - dax_wake_entry(xas, entry, false); + dax_wake_entry(xas, entry, mode); } /* @@ -286,7 +297,7 @@ static void dax_unlock_entry(struct xa_state *xas, void *entry) old = xas_store(xas, entry); xas_unlock_irq(xas); BUG_ON(!dax_is_locked(old)); - dax_wake_entry(xas, entry, false); + dax_wake_entry(xas, entry, WAKE_NEXT); } /* @@ -524,7 +535,7 @@ retry: dax_disassociate_entry(entry, mapping, false); xas_store(xas, NULL); /* undo the PMD join */ - dax_wake_entry(xas, entry, true); + dax_wake_entry(xas, entry, WAKE_ALL); mapping->nrpages -= PG_PMD_NR; entry = NULL; xas_set(xas, index); @@ -622,7 +633,7 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping, entry = get_unlocked_entry(&xas, 0); if (entry) page = dax_busy_page(entry); - put_unlocked_entry(&xas, entry); + put_unlocked_entry(&xas, entry, WAKE_NEXT); if (page) break; if (++scanned % XA_CHECK_SCHED) @@ -664,7 +675,7 @@ static int __dax_invalidate_entry(struct address_space *mapping, mapping->nrpages -= 1UL << dax_entry_order(entry); ret = 1; out: - put_unlocked_entry(&xas, entry); + put_unlocked_entry(&xas, entry, WAKE_ALL); xas_unlock_irq(&xas); return ret; } @@ -937,13 +948,13 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev, xas_lock_irq(xas); xas_store(xas, entry); xas_clear_mark(xas, PAGECACHE_TAG_DIRTY); - dax_wake_entry(xas, entry, false); + dax_wake_entry(xas, entry, WAKE_NEXT); trace_dax_writeback_one(mapping->host, index, count); return ret; put_unlocked: - put_unlocked_entry(xas, entry); + put_unlocked_entry(xas, entry, WAKE_NEXT); return ret; } @@ -1684,7 +1695,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order) /* Did we race with someone splitting entry or so? */ if (!entry || dax_is_conflict(entry) || (order == 0 && !dax_is_pte_entry(entry))) { - put_unlocked_entry(&xas, entry); + put_unlocked_entry(&xas, entry, WAKE_NEXT); xas_unlock_irq(&xas); trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf, VM_FAULT_NOPAGE); diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index e62d813756f2..efaf32596b97 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -450,14 +450,31 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, lcn = m->lcn + 1; if (m->compressedlcs) goto out; - if (lcn == initial_lcn) - goto err_bonus_cblkcnt; err = z_erofs_load_cluster_from_disk(m, lcn); if (err) return err; + /* + * If the 1st NONHEAD lcluster has already been handled initially w/o + * valid compressedlcs, which means at least it mustn't be CBLKCNT, or + * an internal implemenatation error is detected. + * + * The following code can also handle it properly anyway, but let's + * BUG_ON in the debugging mode only for developers to notice that. + */ + DBG_BUGON(lcn == initial_lcn && + m->type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD); + switch (m->type) { + case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: + /* + * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type + * rather than CBLKCNT, it's a 1 lcluster-sized pcluster. + */ + m->compressedlcs = 1; + break; case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: if (m->delta[0] != 1) goto err_bonus_cblkcnt; diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 53b13787eb2c..925a5ca3744a 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -117,19 +117,6 @@ static void f2fs_unlock_rpages(struct compress_ctx *cc, int len) f2fs_drop_rpages(cc, len, true); } -static void f2fs_put_rpages_mapping(struct address_space *mapping, - pgoff_t start, int len) -{ - int i; - - for (i = 0; i < len; i++) { - struct page *page = find_get_page(mapping, start + i); - - put_page(page); - put_page(page); - } -} - static void f2fs_put_rpages_wbc(struct compress_ctx *cc, struct writeback_control *wbc, bool redirty, int unlock) { @@ -158,13 +145,14 @@ int f2fs_init_compress_ctx(struct compress_ctx *cc) return cc->rpages ? 0 : -ENOMEM; } -void f2fs_destroy_compress_ctx(struct compress_ctx *cc) +void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse) { page_array_free(cc->inode, cc->rpages, cc->cluster_size); cc->rpages = NULL; cc->nr_rpages = 0; cc->nr_cpages = 0; - cc->cluster_idx = NULL_CLUSTER; + if (!reuse) + cc->cluster_idx = NULL_CLUSTER; } void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page) @@ -1036,7 +1024,7 @@ retry: } if (PageUptodate(page)) - unlock_page(page); + f2fs_put_page(page, 1); else f2fs_compress_ctx_add_page(cc, page); } @@ -1046,33 +1034,35 @@ retry: ret = f2fs_read_multi_pages(cc, &bio, cc->cluster_size, &last_block_in_bio, false, true); - f2fs_destroy_compress_ctx(cc); + f2fs_put_rpages(cc); + f2fs_destroy_compress_ctx(cc, true); if (ret) - goto release_pages; + goto out; if (bio) f2fs_submit_bio(sbi, bio, DATA); ret = f2fs_init_compress_ctx(cc); if (ret) - goto release_pages; + goto out; } for (i = 0; i < cc->cluster_size; i++) { f2fs_bug_on(sbi, cc->rpages[i]); page = find_lock_page(mapping, start_idx + i); - f2fs_bug_on(sbi, !page); + if (!page) { + /* page can be truncated */ + goto release_and_retry; + } f2fs_wait_on_page_writeback(page, DATA, true, true); - f2fs_compress_ctx_add_page(cc, page); - f2fs_put_page(page, 0); if (!PageUptodate(page)) { +release_and_retry: + f2fs_put_rpages(cc); f2fs_unlock_rpages(cc, i + 1); - f2fs_put_rpages_mapping(mapping, start_idx, - cc->cluster_size); - f2fs_destroy_compress_ctx(cc); + f2fs_destroy_compress_ctx(cc, true); goto retry; } } @@ -1103,10 +1093,10 @@ retry: } unlock_pages: + f2fs_put_rpages(cc); f2fs_unlock_rpages(cc, i); -release_pages: - f2fs_put_rpages_mapping(mapping, start_idx, i); - f2fs_destroy_compress_ctx(cc); + f2fs_destroy_compress_ctx(cc, true); +out: return ret; } @@ -1141,7 +1131,7 @@ bool f2fs_compress_write_end(struct inode *inode, void *fsdata, set_cluster_dirty(&cc); f2fs_put_rpages_wbc(&cc, NULL, false, 1); - f2fs_destroy_compress_ctx(&cc); + f2fs_destroy_compress_ctx(&cc, false); return first_index; } @@ -1361,7 +1351,7 @@ unlock_continue: f2fs_put_rpages(cc); page_array_free(cc->inode, cc->cpages, cc->nr_cpages); cc->cpages = NULL; - f2fs_destroy_compress_ctx(cc); + f2fs_destroy_compress_ctx(cc, false); return 0; out_destroy_crypt: @@ -1372,7 +1362,8 @@ out_destroy_crypt: for (i = 0; i < cc->nr_cpages; i++) { if (!cc->cpages[i]) continue; - f2fs_put_page(cc->cpages[i], 1); + f2fs_compress_free_page(cc->cpages[i]); + cc->cpages[i] = NULL; } out_put_cic: kmem_cache_free(cic_entry_slab, cic); @@ -1522,7 +1513,7 @@ write: err = f2fs_write_raw_pages(cc, submitted, wbc, io_type); f2fs_put_rpages_wbc(cc, wbc, false, 0); destroy_out: - f2fs_destroy_compress_ctx(cc); + f2fs_destroy_compress_ctx(cc, false); return err; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 96f1a354f89f..009a09fb9d88 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2287,7 +2287,7 @@ static int f2fs_mpage_readpages(struct inode *inode, max_nr_pages, &last_block_in_bio, rac != NULL, false); - f2fs_destroy_compress_ctx(&cc); + f2fs_destroy_compress_ctx(&cc, false); if (ret) goto set_error_page; } @@ -2332,7 +2332,7 @@ next_page: max_nr_pages, &last_block_in_bio, rac != NULL, false); - f2fs_destroy_compress_ctx(&cc); + f2fs_destroy_compress_ctx(&cc, false); } } #endif @@ -3033,7 +3033,7 @@ next: } } if (f2fs_compressed_file(inode)) - f2fs_destroy_compress_ctx(&cc); + f2fs_destroy_compress_ctx(&cc, false); #endif if (retry) { index = 0; @@ -3801,6 +3801,7 @@ static int f2fs_is_file_aligned(struct inode *inode) block_t pblock; unsigned long nr_pblocks; unsigned int blocks_per_sec = BLKS_PER_SEC(sbi); + unsigned int not_aligned = 0; int ret = 0; cur_lblock = 0; @@ -3833,13 +3834,20 @@ static int f2fs_is_file_aligned(struct inode *inode) if ((pblock - main_blkaddr) & (blocks_per_sec - 1) || nr_pblocks & (blocks_per_sec - 1)) { - f2fs_err(sbi, "Swapfile does not align to section"); - ret = -EINVAL; - goto out; + if (f2fs_is_pinned_file(inode)) { + f2fs_err(sbi, "Swapfile does not align to section"); + ret = -EINVAL; + goto out; + } + not_aligned++; } cur_lblock += nr_pblocks; } + if (not_aligned) + f2fs_warn(sbi, "Swapfile (%u) is not align to section: \n" + "\t1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate()", + not_aligned); out: return ret; } @@ -3858,6 +3866,7 @@ static int check_swap_activate_fast(struct swap_info_struct *sis, int nr_extents = 0; unsigned long nr_pblocks; unsigned int blocks_per_sec = BLKS_PER_SEC(sbi); + unsigned int not_aligned = 0; int ret = 0; /* @@ -3887,7 +3896,7 @@ static int check_swap_activate_fast(struct swap_info_struct *sis, /* hole */ if (!(map.m_flags & F2FS_MAP_FLAGS)) { f2fs_err(sbi, "Swapfile has holes\n"); - ret = -ENOENT; + ret = -EINVAL; goto out; } @@ -3896,9 +3905,12 @@ static int check_swap_activate_fast(struct swap_info_struct *sis, if ((pblock - SM_I(sbi)->main_blkaddr) & (blocks_per_sec - 1) || nr_pblocks & (blocks_per_sec - 1)) { - f2fs_err(sbi, "Swapfile does not align to section"); - ret = -EINVAL; - goto out; + if (f2fs_is_pinned_file(inode)) { + f2fs_err(sbi, "Swapfile does not align to section"); + ret = -EINVAL; + goto out; + } + not_aligned++; } if (cur_lblock + nr_pblocks >= sis->max) @@ -3927,6 +3939,11 @@ static int check_swap_activate_fast(struct swap_info_struct *sis, sis->max = cur_lblock; sis->pages = cur_lblock - 1; sis->highest_bit = cur_lblock - 1; + + if (not_aligned) + f2fs_warn(sbi, "Swapfile (%u) is not align to section: \n" + "\t1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate()", + not_aligned); out: return ret; } @@ -4035,7 +4052,7 @@ out: return ret; bad_bmap: f2fs_err(sbi, "Swapfile has holes\n"); - return -ENOENT; + return -EINVAL; } static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 044878866ca3..c83d90125ebd 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3956,7 +3956,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc); void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed); void f2fs_put_page_dic(struct page *page); int f2fs_init_compress_ctx(struct compress_ctx *cc); -void f2fs_destroy_compress_ctx(struct compress_ctx *cc); +void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse); void f2fs_init_compress_info(struct f2fs_sb_info *sbi); int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi); void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 44a4650aea7b..ceb575f99048 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1817,7 +1817,8 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) struct f2fs_inode_info *fi = F2FS_I(inode); u32 masked_flags = fi->i_flags & mask; - f2fs_bug_on(F2FS_I_SB(inode), (iflags & ~mask)); + /* mask can be shrunk by flags_valid selector */ + iflags &= mask; /* Is it quota file? Do not allow user to mess with it */ if (IS_NOQUOTA(inode)) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c605415840b5..51dc79fad4fe 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3574,12 +3574,12 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) return err; drop_bio: - if (fio->bio) { + if (fio->bio && *(fio->bio)) { struct bio *bio = *(fio->bio); bio->bi_status = BLK_STS_IOERR; bio_endio(bio); - fio->bio = NULL; + *(fio->bio) = NULL; } return err; } diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index a930ddd15681..7054a542689f 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -598,13 +598,15 @@ void hfsplus_file_truncate(struct inode *inode) res = __hfsplus_ext_cache_extent(&fd, inode, alloc_cnt); if (res) break; - hfs_brec_remove(&fd); - mutex_unlock(&fd.tree->tree_lock); start = hip->cached_start; + if (blk_cnt <= start) + hfs_brec_remove(&fd); + mutex_unlock(&fd.tree->tree_lock); hfsplus_free_extents(sb, hip->cached_extents, alloc_cnt - start, alloc_cnt - blk_cnt); hfsplus_dump_extent(hip->cached_extents); + mutex_lock(&fd.tree->tree_lock); if (blk_cnt > start) { hip->extent_state |= HFSPLUS_EXT_DIRTY; break; @@ -612,7 +614,6 @@ void hfsplus_file_truncate(struct inode *inode) alloc_cnt = start; hip->cached_start = hip->cached_blocks = 0; hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW); - mutex_lock(&fd.tree->tree_lock); } hfs_find_exit(&fd); diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a2a42335e8fd..9d9e0097c1d3 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -131,6 +131,7 @@ static void huge_pagevec_release(struct pagevec *pvec) static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) { struct inode *inode = file_inode(file); + struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode); loff_t len, vma_len; int ret; struct hstate *h = hstate_file(file); @@ -146,6 +147,10 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND; vma->vm_ops = &hugetlb_vm_ops; + ret = seal_check_future_write(info->seals, vma); + if (ret) + return ret; + /* * page based offset in vm_pgoff could be sufficiently large to * overflow a loff_t when converted to byte offset. This can diff --git a/fs/io_uring.c b/fs/io_uring.c index f46acbbeed57..e481ac8a757a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -100,6 +100,8 @@ #define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \ IORING_REGISTER_LAST + IORING_OP_LAST) +#define IORING_MAX_REG_BUFFERS (1U << 14) + #define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \ IOSQE_IO_HARDLINK | IOSQE_ASYNC | \ IOSQE_BUFFER_SELECT) @@ -4035,7 +4037,7 @@ static int io_epoll_ctl_prep(struct io_kiocb *req, #if defined(CONFIG_EPOLL) if (sqe->ioprio || sqe->buf_index) return -EINVAL; - if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL))) + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; req->epoll.epfd = READ_ONCE(sqe->fd); @@ -4150,7 +4152,7 @@ static int io_fadvise(struct io_kiocb *req, unsigned int issue_flags) static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL))) + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (sqe->ioprio || sqe->buf_index) return -EINVAL; @@ -5827,8 +5829,6 @@ done: static int io_rsrc_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - if (unlikely(req->ctx->flags & IORING_SETUP_SQPOLL)) - return -EINVAL; if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) return -EINVAL; if (sqe->ioprio || sqe->rw_flags) @@ -6354,19 +6354,20 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) * We don't expect the list to be empty, that will only happen if we * race with the completion of the linked work. */ - if (prev && req_ref_inc_not_zero(prev)) + if (prev) { io_remove_next_linked(prev); - else - prev = NULL; + if (!req_ref_inc_not_zero(prev)) + prev = NULL; + } spin_unlock_irqrestore(&ctx->completion_lock, flags); if (prev) { io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME); io_put_req_deferred(prev, 1); + io_put_req_deferred(req, 1); } else { io_req_complete_post(req, -ETIME, 0); } - io_put_req_deferred(req, 1); return HRTIMER_NORESTART; } @@ -8390,7 +8391,7 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, if (ctx->user_bufs) return -EBUSY; - if (!nr_args || nr_args > UIO_MAXIOV) + if (!nr_args || nr_args > IORING_MAX_REG_BUFFERS) return -EINVAL; ret = io_rsrc_node_switch_start(ctx); if (ret) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index f2cd2034a87b..9023717c5188 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -394,7 +394,7 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops) { struct inode *inode = rac->mapping->host; loff_t pos = readahead_pos(rac); - loff_t length = readahead_length(rac); + size_t length = readahead_length(rac); struct iomap_readpage_ctx ctx = { .rac = rac, }; @@ -402,7 +402,7 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops) trace_iomap_readahead(inode, readahead_count(rac)); while (length > 0) { - loff_t ret = iomap_apply(inode, pos, length, 0, ops, + ssize_t ret = iomap_apply(inode, pos, length, 0, ops, &ctx, iomap_readahead_actor); if (ret <= 0) { WARN_ON_ONCE(ret == 0); diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index 7b1128398976..89d492916dea 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -211,11 +211,11 @@ failure: * If the skip factor is limited in this way then the file will use multiple * slots. */ -static inline int calculate_skip(int blocks) +static inline int calculate_skip(u64 blocks) { - int skip = blocks / ((SQUASHFS_META_ENTRIES + 1) + u64 skip = blocks / ((SQUASHFS_META_ENTRIES + 1) * SQUASHFS_META_INDEXES); - return min(SQUASHFS_CACHED_BLKS - 1, skip + 1); + return min((u64) SQUASHFS_CACHED_BLKS - 1, skip + 1); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1255823b2bc0..f69c75bd6d27 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -676,11 +676,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); extern void blk_set_pm_only(struct request_queue *q); extern void blk_clear_pm_only(struct request_queue *q); -static inline bool blk_account_rq(struct request *rq) -{ - return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq); -} - #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) #define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ) diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index 153734816b49..d5b9c8d40c18 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h @@ -101,6 +101,7 @@ struct vc_data { unsigned int vc_rows; unsigned int vc_size_row; /* Bytes per row */ unsigned int vc_scan_lines; /* # of scan lines */ + unsigned int vc_cell_height; /* CRTC character cell height */ unsigned long vc_origin; /* [!] Start of real screen */ unsigned long vc_scr_end; /* [!] End of real screen */ unsigned long vc_visible_origin; /* [!] Top of visible window */ diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 1fe8e105b83b..dcb2f9022c1d 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -34,7 +34,7 @@ struct elevator_mq_ops { void (*depth_updated)(struct blk_mq_hw_ctx *); bool (*allow_merge)(struct request_queue *, struct request *, struct bio *); - bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *, unsigned int); + bool (*bio_merge)(struct request_queue *, struct bio *, unsigned int); int (*request_merge)(struct request_queue *q, struct request **, struct bio *); void (*request_merged)(struct request_queue *, struct request *, enum elv_merge); void (*requests_merged)(struct request_queue *, struct request *, struct request *); diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index ed4e67a7ff1c..59828516ebaf 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -187,5 +187,6 @@ extern u32 fw_devlink_get_flags(void); extern bool fw_devlink_is_strict(void); int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup); void fwnode_links_purge(struct fwnode_handle *fwnode); +void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode); #endif diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 01f251b6e36c..89b69e645ac7 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -141,7 +141,6 @@ static inline void __iomem *devm_nvdimm_ioremap(struct device *dev, struct nvdimm_bus; struct module; -struct device; struct nd_blk_region; struct nd_blk_region_desc { int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); diff --git a/include/linux/mm.h b/include/linux/mm.h index 322ec61d0da7..c274f75efcf9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3216,5 +3216,37 @@ void mem_dump_obj(void *object); static inline void mem_dump_obj(void *object) {} #endif +/** + * seal_check_future_write - Check for F_SEAL_FUTURE_WRITE flag and handle it + * @seals: the seals to check + * @vma: the vma to operate on + * + * Check whether F_SEAL_FUTURE_WRITE is set; if so, do proper check/handling on + * the vma flags. Return 0 if check pass, or <0 for errors. + */ +static inline int seal_check_future_write(int seals, struct vm_area_struct *vma) +{ + if (seals & F_SEAL_FUTURE_WRITE) { + /* + * New PROT_WRITE and MAP_SHARED mmaps are not allowed when + * "future write" seal active. + */ + if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE)) + return -EPERM; + + /* + * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as + * MAP_SHARED and read-only, take care to not allow mprotect to + * revert protections on such mappings. Do this only for shared + * mappings. For private mappings, don't need to mask + * VM_MAYWRITE as we still want them to be COW-writable. + */ + if (vma->vm_flags & VM_SHARED) + vma->vm_flags &= ~(VM_MAYWRITE); + } + + return 0; +} + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6613b26a8894..5aacc1c10a45 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -97,10 +97,10 @@ struct page { }; struct { /* page_pool used by netstack */ /** - * @dma_addr: might require a 64-bit value even on + * @dma_addr: might require a 64-bit value on * 32-bit architectures. */ - dma_addr_t dma_addr; + unsigned long dma_addr[2]; }; struct { /* slab, slob and slub */ union { diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index a4bd41128bf3..e89df447fae3 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -997,9 +997,9 @@ static inline loff_t readahead_pos(struct readahead_control *rac) * readahead_length - The number of bytes in this readahead request. * @rac: The readahead request. */ -static inline loff_t readahead_length(struct readahead_control *rac) +static inline size_t readahead_length(struct readahead_control *rac) { - return (loff_t)rac->_nr_pages * PAGE_SIZE; + return rac->_nr_pages * PAGE_SIZE; } /** @@ -1024,7 +1024,7 @@ static inline unsigned int readahead_count(struct readahead_control *rac) * readahead_batch_length - The number of bytes in the current batch. * @rac: The readahead request. */ -static inline loff_t readahead_batch_length(struct readahead_control *rac) +static inline size_t readahead_batch_length(struct readahead_control *rac) { return rac->_batch_count * PAGE_SIZE; } diff --git a/include/linux/pm.h b/include/linux/pm.h index c9657408fee1..1d8209c09686 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -601,6 +601,7 @@ struct dev_pm_info { unsigned int idle_notification:1; unsigned int request_pending:1; unsigned int deferred_resume:1; + unsigned int needs_force_resume:1; unsigned int runtime_auto:1; bool ignore_children:1; unsigned int no_callbacks:1; diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h index fd80fab663a9..bebc911161b6 100644 --- a/include/linux/randomize_kstack.h +++ b/include/linux/randomize_kstack.h @@ -38,7 +38,7 @@ void *__builtin_alloca(size_t size); u32 offset = raw_cpu_read(kstack_offset); \ u8 *ptr = __builtin_alloca(KSTACK_OFFSET_MAX(offset)); \ /* Keep allocation even after "ptr" loses scope. */ \ - asm volatile("" : "=o"(*ptr) :: "memory"); \ + asm volatile("" :: "r"(ptr) : "memory"); \ } \ } while (0) diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 6d517a37c18b..b4b6de909c93 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -198,7 +198,17 @@ static inline void page_pool_recycle_direct(struct page_pool *pool, static inline dma_addr_t page_pool_get_dma_addr(struct page *page) { - return page->dma_addr; + dma_addr_t ret = page->dma_addr[0]; + if (sizeof(dma_addr_t) > sizeof(unsigned long)) + ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16; + return ret; +} + +static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr) +{ + page->dma_addr[0] = addr; + if (sizeof(dma_addr_t) > sizeof(unsigned long)) + page->dma_addr[1] = upper_32_bits(addr); } static inline bool is_page_pool_compiled_in(void) diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index f44eb0a04afd..4c32e97dcdf0 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -185,7 +185,7 @@ struct fsxattr { #define BLKROTATIONAL _IO(0x12,126) #define BLKZEROOUT _IO(0x12,127) /* - * A jump here: 130-131 are reserved for zoned block devices + * A jump here: 130-136 are reserved for zoned block devices * (see uapi/linux/blkzoned.h) */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 9febe1412f7a..330835f1005b 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1086,6 +1086,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HYPERV_ENFORCE_CPUID 199 #define KVM_CAP_SREGS2 200 #define KVM_CAP_EXIT_HYPERCALL 201 +#define KVM_CAP_PPC_RPT_INVALIDATE 202 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/include/xen/arm/swiotlb-xen.h b/include/xen/arm/swiotlb-xen.h index 2994fe6031a0..33336ab58afc 100644 --- a/include/xen/arm/swiotlb-xen.h +++ b/include/xen/arm/swiotlb-xen.h @@ -2,6 +2,19 @@ #ifndef _ASM_ARM_SWIOTLB_XEN_H #define _ASM_ARM_SWIOTLB_XEN_H -extern int xen_swiotlb_detect(void); +#include <xen/features.h> +#include <xen/xen.h> + +static inline int xen_swiotlb_detect(void) +{ + if (!xen_domain()) + return 0; + if (xen_feature(XENFEAT_direct_mapped)) + return 1; + /* legacy case */ + if (!xen_feature(XENFEAT_not_direct_mapped) && xen_initial_domain()) + return 1; + return 0; +} #endif /* _ASM_ARM_SWIOTLB_XEN_H */ diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 76f09456ec4b..2997ca600d18 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -170,6 +170,21 @@ void __ptrace_unlink(struct task_struct *child) spin_unlock(&child->sighand->siglock); } +static bool looks_like_a_spurious_pid(struct task_struct *task) +{ + if (task->exit_code != ((PTRACE_EVENT_EXEC << 8) | SIGTRAP)) + return false; + + if (task_pid_vnr(task) == task->ptrace_message) + return false; + /* + * The tracee changed its pid but the PTRACE_EVENT_EXEC event + * was not wait()'ed, most probably debugger targets the old + * leader which was destroyed in de_thread(). + */ + return true; +} + /* Ensure that nothing can wake it up, even SIGKILL */ static bool ptrace_freeze_traced(struct task_struct *task) { @@ -180,7 +195,8 @@ static bool ptrace_freeze_traced(struct task_struct *task) return ret; spin_lock_irq(&task->sighand->siglock); - if (task_is_traced(task) && !__fatal_signal_pending(task)) { + if (task_is_traced(task) && !looks_like_a_spurious_pid(task) && + !__fatal_signal_pending(task)) { task->state = __TASK_TRACED; ret = true; } diff --git a/kernel/resource.c b/kernel/resource.c index 028a5ab18818..ca9f5198a01f 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -1805,7 +1805,7 @@ static struct resource *__request_free_mem_region(struct device *dev, REGION_DISJOINT) continue; - if (!__request_region_locked(res, &iomem_resource, addr, size, + if (__request_region_locked(res, &iomem_resource, addr, size, name, 0)) break; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 20aa234ffe04..3248e24a90b0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6217,7 +6217,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool } if (has_idle_core) - set_idle_cores(this, false); + set_idle_cores(target, false); if (sched_feat(SIS_PROP) && !has_idle_core) { time = cpu_clock(this) - time; diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index bea9d08b1698..5897828b9d7e 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -92,7 +92,7 @@ static int alarmtimer_rtc_add_device(struct device *dev, if (rtcdev) return -EBUSY; - if (!rtc->ops->set_alarm) + if (!test_bit(RTC_FEATURE_ALARM, rtc->features)) return -1; if (!device_may_wakeup(rtc->dev.parent)) return -1; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 560e4c8d3825..a21ef9cd2aae 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3704,6 +3704,9 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, goto print; while (*p) { + bool star = false; + int len = 0; + j = 0; /* We only care about %s and variants */ @@ -3725,13 +3728,17 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, /* Need to test cases like %08.*s */ for (j = 1; p[i+j]; j++) { if (isdigit(p[i+j]) || - p[i+j] == '*' || p[i+j] == '.') continue; + if (p[i+j] == '*') { + star = true; + continue; + } break; } if (p[i+j] == 's') break; + star = false; } j = 0; } @@ -3744,6 +3751,9 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, iter->fmt[i] = '\0'; trace_seq_vprintf(&iter->seq, iter->fmt, ap); + if (star) + len = va_arg(ap, int); + /* The ap now points to the string data of the %s */ str = va_arg(ap, const char *); @@ -3762,8 +3772,18 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, int ret; /* Try to safely read the string */ - ret = strncpy_from_kernel_nofault(iter->fmt, str, - iter->fmt_size); + if (star) { + if (len + 1 > iter->fmt_size) + len = iter->fmt_size - 1; + if (len < 0) + len = 0; + ret = copy_from_kernel_nofault(iter->fmt, str, len); + iter->fmt[len] = 0; + star = false; + } else { + ret = strncpy_from_kernel_nofault(iter->fmt, str, + iter->fmt_size); + } if (ret < 0) trace_seq_printf(&iter->seq, "(0x%px)", str); else @@ -3775,7 +3795,10 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, strncpy(iter->fmt, p + i, j + 1); iter->fmt[j+1] = '\0'; } - trace_seq_printf(&iter->seq, iter->fmt, str); + if (star) + trace_seq_printf(&iter->seq, iter->fmt, len, str); + else + trace_seq_printf(&iter->seq, iter->fmt, str); p += i + j + 1; } diff --git a/lib/test_kasan.c b/lib/test_kasan.c index dc05cfc2d12f..cacbbbdef768 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -654,8 +654,20 @@ static char global_array[10]; static void kasan_global_oob(struct kunit *test) { - volatile int i = 3; - char *p = &global_array[ARRAY_SIZE(global_array) + i]; + /* + * Deliberate out-of-bounds access. To prevent CONFIG_UBSAN_LOCAL_BOUNDS + * from failing here and panicing the kernel, access the array via a + * volatile pointer, which will prevent the compiler from being able to + * determine the array bounds. + * + * This access uses a volatile pointer to char (char *volatile) rather + * than the more conventional pointer to volatile char (volatile char *) + * because we want to prevent the compiler from making inferences about + * the pointer itself (i.e. its array bounds), not the data that it + * refers to. + */ + char *volatile array = global_array; + char *p = &array[ARRAY_SIZE(global_array) + 3]; /* Only generic mode instruments globals. */ KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC); @@ -703,8 +715,9 @@ static void ksize_uaf(struct kunit *test) static void kasan_stack_oob(struct kunit *test) { char stack_array[10]; - volatile int i = OOB_TAG_OFF; - char *p = &stack_array[ARRAY_SIZE(stack_array) + i]; + /* See comment in kasan_global_oob. */ + char *volatile array = stack_array; + char *p = &array[ARRAY_SIZE(stack_array) + OOB_TAG_OFF]; KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_STACK); @@ -715,7 +728,9 @@ static void kasan_alloca_oob_left(struct kunit *test) { volatile int i = 10; char alloca_array[i]; - char *p = alloca_array - 1; + /* See comment in kasan_global_oob. */ + char *volatile array = alloca_array; + char *p = array - 1; /* Only generic mode instruments dynamic allocas. */ KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC); @@ -728,7 +743,9 @@ static void kasan_alloca_oob_right(struct kunit *test) { volatile int i = 10; char alloca_array[i]; - char *p = alloca_array + i; + /* See comment in kasan_global_oob. */ + char *volatile array = alloca_array; + char *p = array + i; /* Only generic mode instruments dynamic allocas. */ KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3db405dea3dc..95918f410c0f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4056,6 +4056,7 @@ again: * See Documentation/vm/mmu_notifier.rst */ huge_ptep_set_wrprotect(src, addr, src_pte); + entry = huge_pte_wrprotect(entry); } page_dup_rmap(ptepage, true); diff --git a/mm/ioremap.c b/mm/ioremap.c index d1dcc7e744ac..8ee0136f8cb0 100644 --- a/mm/ioremap.c +++ b/mm/ioremap.c @@ -16,16 +16,16 @@ #include "pgalloc-track.h" #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP -static bool __ro_after_init iomap_max_page_shift = PAGE_SHIFT; +static unsigned int __ro_after_init iomap_max_page_shift = BITS_PER_LONG - 1; static int __init set_nohugeiomap(char *str) { - iomap_max_page_shift = P4D_SHIFT; + iomap_max_page_shift = PAGE_SHIFT; return 0; } early_param("nohugeiomap", set_nohugeiomap); #else /* CONFIG_HAVE_ARCH_HUGE_VMAP */ -static const bool iomap_max_page_shift = PAGE_SHIFT; +static const unsigned int iomap_max_page_shift = PAGE_SHIFT; #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ int ioremap_page_range(unsigned long addr, @@ -776,11 +776,12 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item) struct page *page; stable_node = rmap_item->head; - page = get_ksm_page(stable_node, GET_KSM_PAGE_NOLOCK); + page = get_ksm_page(stable_node, GET_KSM_PAGE_LOCK); if (!page) goto out; hlist_del(&rmap_item->hlist); + unlock_page(page); put_page(page); if (!hlist_empty(&stable_node->hlist)) diff --git a/mm/shmem.c b/mm/shmem.c index a08cedefbfaa..5d46611cba8d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2258,25 +2258,11 @@ out_nomem: static int shmem_mmap(struct file *file, struct vm_area_struct *vma) { struct shmem_inode_info *info = SHMEM_I(file_inode(file)); + int ret; - if (info->seals & F_SEAL_FUTURE_WRITE) { - /* - * New PROT_WRITE and MAP_SHARED mmaps are not allowed when - * "future write" seal active. - */ - if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE)) - return -EPERM; - - /* - * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as - * MAP_SHARED and read-only, take care to not allow mprotect to - * revert protections on such mappings. Do this only for shared - * mappings. For private mappings, don't need to mask - * VM_MAYWRITE as we still want them to be COW-writable. - */ - if (vma->vm_flags & VM_SHARED) - vma->vm_flags &= ~(VM_MAYWRITE); - } + ret = seal_check_future_write(info->seals, vma); + if (ret) + return ret; /* arm64 - allow memory tagging on RAM-based files */ vma->vm_flags |= VM_MTE_ALLOWED; @@ -2375,8 +2361,18 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, pgoff_t offset, max_off; ret = -ENOMEM; - if (!shmem_inode_acct_block(inode, 1)) + if (!shmem_inode_acct_block(inode, 1)) { + /* + * We may have got a page, returned -ENOENT triggering a retry, + * and now we find ourselves with -ENOMEM. Release the page, to + * avoid a BUG_ON in our caller. + */ + if (unlikely(*pagep)) { + put_page(*pagep); + *pagep = NULL; + } goto out; + } if (!*pagep) { page = shmem_alloc_page(gfp, info, pgoff); diff --git a/mm/slab_common.c b/mm/slab_common.c index f8833d3e5d47..a4a571428c51 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -318,6 +318,16 @@ kmem_cache_create_usercopy(const char *name, const char *cache_name; int err; +#ifdef CONFIG_SLUB_DEBUG + /* + * If no slub_debug was enabled globally, the static key is not yet + * enabled by setup_slub_debug(). Enable it if the cache is being + * created with any of the debugging flags passed explicitly. + */ + if (flags & SLAB_DEBUG_FLAGS) + static_branch_enable(&slub_debug_enabled); +#endif + mutex_lock(&slab_mutex); err = kmem_cache_sanity_check(name, size); diff --git a/mm/slub.c b/mm/slub.c index feda53ae62ba..438fa8d4c970 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3828,15 +3828,6 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags) { -#ifdef CONFIG_SLUB_DEBUG - /* - * If no slub_debug was enabled globally, the static key is not yet - * enabled by setup_slub_debug(). Enable it if the cache is being - * created with any of the debugging flags passed explicitly. - */ - if (flags & SLAB_DEBUG_FLAGS) - static_branch_enable(&slub_debug_enabled); -#endif s->flags = kmem_cache_flags(s->size, flags, s->name); #ifdef CONFIG_SLAB_FREELIST_HARDENED s->random = get_random_long(); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 9ec1aa9640ad..3c4c4c7a0402 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -174,8 +174,10 @@ static void page_pool_dma_sync_for_device(struct page_pool *pool, struct page *page, unsigned int dma_sync_size) { + dma_addr_t dma_addr = page_pool_get_dma_addr(page); + dma_sync_size = min(dma_sync_size, pool->p.max_len); - dma_sync_single_range_for_device(pool->p.dev, page->dma_addr, + dma_sync_single_range_for_device(pool->p.dev, dma_addr, pool->p.offset, dma_sync_size, pool->p.dma_dir); } @@ -195,7 +197,7 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page) if (dma_mapping_error(pool->p.dev, dma)) return false; - page->dma_addr = dma; + page_pool_set_dma_addr(page, dma); if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) page_pool_dma_sync_for_device(pool, page, pool->p.max_len); @@ -331,13 +333,13 @@ void page_pool_release_page(struct page_pool *pool, struct page *page) */ goto skip_dma_unmap; - dma = page->dma_addr; + dma = page_pool_get_dma_addr(page); - /* When page is unmapped, it cannot be returned our pool */ + /* When page is unmapped, it cannot be returned to our pool */ dma_unmap_page_attrs(pool->p.dev, dma, PAGE_SIZE << pool->p.order, pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC); - page->dma_addr = 0; + page_pool_set_dma_addr(page, 0); skip_dma_unmap: /* This may be the last page returned, releasing the pool, so * it is not safe to reference pool afterwards. diff --git a/security/keys/trusted-keys/trusted_tpm1.c b/security/keys/trusted-keys/trusted_tpm1.c index 469394550801..aa108bea6739 100644 --- a/security/keys/trusted-keys/trusted_tpm1.c +++ b/security/keys/trusted-keys/trusted_tpm1.c @@ -493,10 +493,12 @@ static int tpm_seal(struct tpm_buf *tb, uint16_t keytype, ret = tpm_get_random(chip, td->nonceodd, TPM_NONCE_SIZE); if (ret < 0) - return ret; + goto out; - if (ret != TPM_NONCE_SIZE) - return -EIO; + if (ret != TPM_NONCE_SIZE) { + ret = -EIO; + goto out; + } ordinal = htonl(TPM_ORD_SEAL); datsize = htonl(datalen); diff --git a/security/keys/trusted-keys/trusted_tpm2.c b/security/keys/trusted-keys/trusted_tpm2.c index 617fabd4d913..0165da386289 100644 --- a/security/keys/trusted-keys/trusted_tpm2.c +++ b/security/keys/trusted-keys/trusted_tpm2.c @@ -336,9 +336,9 @@ out: rc = -EPERM; } if (blob_len < 0) - return blob_len; - - payload->blob_len = blob_len; + rc = blob_len; + else + payload->blob_len = blob_len; tpm_put_ops(chip); return rc; diff --git a/tools/arch/powerpc/include/uapi/asm/errno.h b/tools/arch/powerpc/include/uapi/asm/errno.h index cc79856896a1..4ba87de32be0 100644 --- a/tools/arch/powerpc/include/uapi/asm/errno.h +++ b/tools/arch/powerpc/include/uapi/asm/errno.h @@ -2,6 +2,7 @@ #ifndef _ASM_POWERPC_ERRNO_H #define _ASM_POWERPC_ERRNO_H +#undef EDEADLOCK #include <asm-generic/errno.h> #undef EDEADLOCK diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index cc96e26d69f7..ac37830ae941 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -84,7 +84,7 @@ /* CPU types for specific tunings: */ #define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ -#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ +/* FREE, was #define X86_FEATURE_K7 ( 3*32+ 5) "" Athlon */ #define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ #define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ @@ -236,6 +236,8 @@ #define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ #define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ +#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* "" PV unlock function */ +#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* "" PV vcpu_is_preempted function */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ @@ -290,6 +292,8 @@ #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ #define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ +#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ +#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ @@ -336,6 +340,7 @@ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ #define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ +#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */ #define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ @@ -354,6 +359,7 @@ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ #define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ +#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* Bus Lock detect */ #define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ #define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ #define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ @@ -374,6 +380,7 @@ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */ +#define X86_FEATURE_HYBRID_CPU (18*32+15) /* "" This part has CPUs of more than one type */ #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 45029354e0a8..211ba3375ee9 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -185,6 +185,9 @@ #define MSR_PEBS_DATA_CFG 0x000003f2 #define MSR_IA32_DS_AREA 0x00000600 #define MSR_IA32_PERF_CAPABILITIES 0x00000345 +#define PERF_CAP_METRICS_IDX 15 +#define PERF_CAP_PT_IDX 16 + #define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 #define MSR_IA32_RTIT_CTL 0x00000570 @@ -265,6 +268,7 @@ #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ #define DEBUGCTLMSR_BTF_SHIFT 1 #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ +#define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2) #define DEBUGCTLMSR_TR (1UL << 6) #define DEBUGCTLMSR_BTS (1UL << 7) #define DEBUGCTLMSR_BTINT (1UL << 8) @@ -533,9 +537,9 @@ /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a #define MSR_K8_TOP_MEM2 0xc001001d -#define MSR_K8_SYSCFG 0xc0010010 -#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23 -#define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT) +#define MSR_AMD64_SYSCFG 0xc0010010 +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23 +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT) #define MSR_K8_INT_PENDING_MSG 0xc0010055 /* C1E active bits in int pending message */ #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index b8e650a985e3..946d761adbd3 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -27,6 +27,7 @@ #define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 +#define VMX_EXIT_REASONS_SGX_ENCLAVE_MODE 0x08000000 #define EXIT_REASON_EXCEPTION_NMI 0 #define EXIT_REASON_EXTERNAL_INTERRUPT 1 diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 1e299ac73c86..1cc9da6e29c7 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -4,7 +4,7 @@ #include <linux/linkage.h> #include <asm/errno.h> #include <asm/cpufeatures.h> -#include <asm/alternative-asm.h> +#include <asm/alternative.h> #include <asm/export.h> .pushsection .noinstr.text, "ax" diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index 0bfd26e4ca9e..9827ae267f96 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -3,7 +3,7 @@ #include <linux/linkage.h> #include <asm/cpufeatures.h> -#include <asm/alternative-asm.h> +#include <asm/alternative.h> #include <asm/export.h> /* diff --git a/tools/include/asm/alternative-asm.h b/tools/include/asm/alternative.h index b54bd860dff6..b54bd860dff6 100644 --- a/tools/include/asm/alternative-asm.h +++ b/tools/include/asm/alternative.h diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index ce58cff99b66..6de5a7fc066b 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -863,9 +863,18 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise) __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) #define __NR_mount_setattr 442 __SYSCALL(__NR_mount_setattr, sys_mount_setattr) +#define __NR_quotactl_path 443 +__SYSCALL(__NR_quotactl_path, sys_quotactl_path) + +#define __NR_landlock_create_ruleset 444 +__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) +#define __NR_landlock_add_rule 445 +__SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule) +#define __NR_landlock_restrict_self 446 +__SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) #undef __NR_syscalls -#define __NR_syscalls 443 +#define __NR_syscalls 447 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 0827037c5484..67b94bc3c885 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -625,30 +625,147 @@ struct drm_gem_open { __u64 size; }; +/** + * DRM_CAP_DUMB_BUFFER + * + * If set to 1, the driver supports creating dumb buffers via the + * &DRM_IOCTL_MODE_CREATE_DUMB ioctl. + */ #define DRM_CAP_DUMB_BUFFER 0x1 +/** + * DRM_CAP_VBLANK_HIGH_CRTC + * + * If set to 1, the kernel supports specifying a CRTC index in the high bits of + * &drm_wait_vblank_request.type. + * + * Starting kernel version 2.6.39, this capability is always set to 1. + */ #define DRM_CAP_VBLANK_HIGH_CRTC 0x2 +/** + * DRM_CAP_DUMB_PREFERRED_DEPTH + * + * The preferred bit depth for dumb buffers. + * + * The bit depth is the number of bits used to indicate the color of a single + * pixel excluding any padding. This is different from the number of bits per + * pixel. For instance, XRGB8888 has a bit depth of 24 but has 32 bits per + * pixel. + * + * Note that this preference only applies to dumb buffers, it's irrelevant for + * other types of buffers. + */ #define DRM_CAP_DUMB_PREFERRED_DEPTH 0x3 +/** + * DRM_CAP_DUMB_PREFER_SHADOW + * + * If set to 1, the driver prefers userspace to render to a shadow buffer + * instead of directly rendering to a dumb buffer. For best speed, userspace + * should do streaming ordered memory copies into the dumb buffer and never + * read from it. + * + * Note that this preference only applies to dumb buffers, it's irrelevant for + * other types of buffers. + */ #define DRM_CAP_DUMB_PREFER_SHADOW 0x4 +/** + * DRM_CAP_PRIME + * + * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT + * and &DRM_PRIME_CAP_EXPORT. + * + * PRIME buffers are exposed as dma-buf file descriptors. See + * Documentation/gpu/drm-mm.rst, section "PRIME Buffer Sharing". + */ #define DRM_CAP_PRIME 0x5 +/** + * DRM_PRIME_CAP_IMPORT + * + * If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME + * buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl. + */ #define DRM_PRIME_CAP_IMPORT 0x1 +/** + * DRM_PRIME_CAP_EXPORT + * + * If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME + * buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl. + */ #define DRM_PRIME_CAP_EXPORT 0x2 +/** + * DRM_CAP_TIMESTAMP_MONOTONIC + * + * If set to 0, the kernel will report timestamps with ``CLOCK_REALTIME`` in + * struct drm_event_vblank. If set to 1, the kernel will report timestamps with + * ``CLOCK_MONOTONIC``. See ``clock_gettime(2)`` for the definition of these + * clocks. + * + * Starting from kernel version 2.6.39, the default value for this capability + * is 1. Starting kernel version 4.15, this capability is always set to 1. + */ #define DRM_CAP_TIMESTAMP_MONOTONIC 0x6 +/** + * DRM_CAP_ASYNC_PAGE_FLIP + * + * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC. + */ #define DRM_CAP_ASYNC_PAGE_FLIP 0x7 -/* - * The CURSOR_WIDTH and CURSOR_HEIGHT capabilities return a valid widthxheight - * combination for the hardware cursor. The intention is that a hardware - * agnostic userspace can query a cursor plane size to use. +/** + * DRM_CAP_CURSOR_WIDTH + * + * The ``CURSOR_WIDTH`` and ``CURSOR_HEIGHT`` capabilities return a valid + * width x height combination for the hardware cursor. The intention is that a + * hardware agnostic userspace can query a cursor plane size to use. * * Note that the cross-driver contract is to merely return a valid size; * drivers are free to attach another meaning on top, eg. i915 returns the * maximum plane size. */ #define DRM_CAP_CURSOR_WIDTH 0x8 +/** + * DRM_CAP_CURSOR_HEIGHT + * + * See &DRM_CAP_CURSOR_WIDTH. + */ #define DRM_CAP_CURSOR_HEIGHT 0x9 +/** + * DRM_CAP_ADDFB2_MODIFIERS + * + * If set to 1, the driver supports supplying modifiers in the + * &DRM_IOCTL_MODE_ADDFB2 ioctl. + */ #define DRM_CAP_ADDFB2_MODIFIERS 0x10 +/** + * DRM_CAP_PAGE_FLIP_TARGET + * + * If set to 1, the driver supports the &DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE and + * &DRM_MODE_PAGE_FLIP_TARGET_RELATIVE flags in + * &drm_mode_crtc_page_flip_target.flags for the &DRM_IOCTL_MODE_PAGE_FLIP + * ioctl. + */ #define DRM_CAP_PAGE_FLIP_TARGET 0x11 +/** + * DRM_CAP_CRTC_IN_VBLANK_EVENT + * + * If set to 1, the kernel supports reporting the CRTC ID in + * &drm_event_vblank.crtc_id for the &DRM_EVENT_VBLANK and + * &DRM_EVENT_FLIP_COMPLETE events. + * + * Starting kernel version 4.12, this capability is always set to 1. + */ #define DRM_CAP_CRTC_IN_VBLANK_EVENT 0x12 +/** + * DRM_CAP_SYNCOBJ + * + * If set to 1, the driver supports sync objects. See + * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects". + */ #define DRM_CAP_SYNCOBJ 0x13 +/** + * DRM_CAP_SYNCOBJ_TIMELINE + * + * If set to 1, the driver supports timeline operations on sync objects. See + * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects". + */ #define DRM_CAP_SYNCOBJ_TIMELINE 0x14 /* DRM_IOCTL_GET_CAP ioctl argument type */ diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 1987e2ea79a3..ddc47bbf48b6 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -943,6 +943,7 @@ struct drm_i915_gem_exec_object { __u64 offset; }; +/* DRM_IOCTL_I915_GEM_EXECBUFFER was removed in Linux 5.13 */ struct drm_i915_gem_execbuffer { /** * List of buffers to be validated with their relocations to be diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 26e6d94d64ed..79d9c44d1ad7 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1079,6 +1079,10 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_DIRTY_LOG_RING 192 #define KVM_CAP_X86_BUS_LOCK_EXIT 193 #define KVM_CAP_PPC_DAWR1 194 +#define KVM_CAP_SET_GUEST_DEBUG2 195 +#define KVM_CAP_SGX_ATTRIBUTE 196 +#define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197 +#define KVM_CAP_PTP_KVM 198 #ifdef KVM_CAP_IRQ_ROUTING @@ -1672,6 +1676,8 @@ enum sev_cmd_id { KVM_SEV_CERT_EXPORT, /* Attestation report */ KVM_SEV_GET_ATTESTATION_REPORT, + /* Guest Migration Extension */ + KVM_SEV_SEND_CANCEL, KVM_SEV_NR_MAX, }; @@ -1730,6 +1736,45 @@ struct kvm_sev_attestation_report { __u32 len; }; +struct kvm_sev_send_start { + __u32 policy; + __u64 pdh_cert_uaddr; + __u32 pdh_cert_len; + __u64 plat_certs_uaddr; + __u32 plat_certs_len; + __u64 amd_certs_uaddr; + __u32 amd_certs_len; + __u64 session_uaddr; + __u32 session_len; +}; + +struct kvm_sev_send_update_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u64 guest_uaddr; + __u32 guest_len; + __u64 trans_uaddr; + __u32 trans_len; +}; + +struct kvm_sev_receive_start { + __u32 handle; + __u32 policy; + __u64 pdh_uaddr; + __u32 pdh_len; + __u64 session_uaddr; + __u32 session_len; +}; + +struct kvm_sev_receive_update_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u64 guest_uaddr; + __u32 guest_len; + __u64 trans_uaddr; + __u32 trans_len; +}; + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 14332f4cf816..bf8143505c49 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -127,6 +127,7 @@ enum perf_sw_ids { PERF_COUNT_SW_EMULATION_FAULTS = 8, PERF_COUNT_SW_DUMMY = 9, PERF_COUNT_SW_BPF_OUTPUT = 10, + PERF_COUNT_SW_CGROUP_SWITCHES = 11, PERF_COUNT_SW_MAX, /* non-ABI */ }; @@ -326,6 +327,7 @@ enum perf_event_read_format { #define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ #define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ #define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */ +#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */ /* * Hardware event_id to monitor via a performance monitoring event: @@ -404,7 +406,10 @@ struct perf_event_attr { cgroup : 1, /* include cgroup events */ text_poke : 1, /* include text poke events */ build_id : 1, /* use build id in mmap2 events */ - __reserved_1 : 29; + inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */ + remove_on_exec : 1, /* event is removed from task on exec */ + sigtrap : 1, /* send synchronous SIGTRAP on event */ + __reserved_1 : 26; union { __u32 wakeup_events; /* wakeup every n events */ @@ -456,6 +461,12 @@ struct perf_event_attr { __u16 __reserved_2; __u32 aux_sample_size; __u32 __reserved_3; + + /* + * User provided data if sigtrap=1, passed back to user via + * siginfo_t::si_perf, e.g. to permit user to identify the event. + */ + __u64 sig_data; }; /* @@ -1171,10 +1182,15 @@ enum perf_callchain_context { /** * PERF_RECORD_AUX::flags bits */ -#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ -#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ -#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ -#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ +#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ +#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ +#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ +#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ +#define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */ + +/* CoreSight PMU AUX buffer formats */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ #define PERF_FLAG_FD_NO_GROUP (1UL << 0) #define PERF_FLAG_FD_OUTPUT (1UL << 1) diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 667f1aed091c..18a9f59dc067 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -255,4 +255,8 @@ struct prctl_mm_map { # define SYSCALL_DISPATCH_FILTER_ALLOW 0 # define SYSCALL_DISPATCH_FILTER_BLOCK 1 +/* Set/get enabled arm64 pointer authentication keys */ +#define PR_PAC_SET_ENABLED_KEYS 60 +#define PR_PAC_GET_ENABLED_KEYS 61 + #endif /* _LINUX_PRCTL_H */ diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index cedf3ede7545..24295d39713b 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -19,6 +19,7 @@ #include <objtool/elf.h> #include <objtool/arch.h> #include <objtool/warn.h> +#include <objtool/endianness.h> #include <arch/elf.h> static int is_x86_64(const struct elf *elf) @@ -725,7 +726,7 @@ static int elf_add_alternative(struct elf *elf, return -1; } - alt->cpuid = cpuid; + alt->cpuid = bswap_if_needed(cpuid); alt->instrlen = orig_len; alt->replacementlen = repl_len; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index d08f5f3670f8..743c2e9d0f56 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -762,6 +762,7 @@ struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name) data->d_buf = &sym->sym; data->d_size = sizeof(sym->sym); data->d_align = 1; + data->d_type = ELF_T_SYM; sym->idx = symtab->len / sizeof(sym->sym); diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 0d6619064a83..406a9519145e 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -540,6 +540,7 @@ ifndef NO_LIBELF ifdef LIBBPF_DYNAMIC ifeq ($(feature-libbpf), 1) EXTLIBS += -lbpf + $(call detected,CONFIG_LIBBPF_DYNAMIC) else dummy := $(error Error: No libbpf devel library found, please install libbpf-devel); endif diff --git a/tools/perf/arch/arm64/util/kvm-stat.c b/tools/perf/arch/arm64/util/kvm-stat.c index 2303256b7d05..73d18e0ed6f6 100644 --- a/tools/perf/arch/arm64/util/kvm-stat.c +++ b/tools/perf/arch/arm64/util/kvm-stat.c @@ -71,7 +71,7 @@ struct kvm_reg_events_ops kvm_reg_events_ops[] = { .name = "vmexit", .ops = &exit_events, }, - { NULL }, + { NULL, NULL }, }; const char * const kvm_skip_events[] = { diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index 91649690b52f..9974f5f8e49b 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -356,3 +356,8 @@ 439 n64 faccessat2 sys_faccessat2 440 n64 process_madvise sys_process_madvise 441 n64 epoll_pwait2 sys_epoll_pwait2 +442 n64 mount_setattr sys_mount_setattr +443 n64 quotactl_path sys_quotactl_path +444 n64 landlock_create_ruleset sys_landlock_create_ruleset +445 n64 landlock_add_rule sys_landlock_add_rule +446 n64 landlock_restrict_self sys_landlock_restrict_self diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 0b2480cf3e47..2e68fbb57cc6 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -522,3 +522,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr +443 common quotactl_path sys_quotactl_path +444 common landlock_create_ruleset sys_landlock_create_ruleset +445 common landlock_add_rule sys_landlock_add_rule +446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index 3abef2144dac..7e4a2aba366d 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -445,3 +445,7 @@ 440 common process_madvise sys_process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr sys_mount_setattr +443 common quotactl_path sys_quotactl_path sys_quotactl_path +444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset +445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule +446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 7bf01cbe582f..ecd551b08d05 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -364,6 +364,10 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr +443 common quotactl_path sys_quotactl_path +444 common landlock_create_ruleset sys_landlock_create_ruleset +445 common landlock_add_rule sys_landlock_add_rule +446 common landlock_restrict_self sys_landlock_restrict_self # # Due to a historical design error, certain syscalls are numbered differently diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index ed4f0bd72e5a..7422b0ea8790 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -1123,8 +1123,10 @@ static int process_one_file(const char *fpath, const struct stat *sb, mapfile = strdup(fpath); return 0; } - - pr_info("%s: Ignoring file %s\n", prog, fpath); + if (is_json_file(bname)) + pr_debug("%s: ArchStd json is preprocessed %s\n", prog, fpath); + else + pr_info("%s: Ignoring file %s\n", prog, fpath); return 0; } diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record index 645009c08b3c..4a7b8deef3fd 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/attr/base-record @@ -5,7 +5,7 @@ group_fd=-1 flags=0|8 cpu=* type=0|1 -size=120 +size=128 config=0 sample_period=* sample_type=263 diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/attr/base-stat index b0f42c34882e..408164456530 100644 --- a/tools/perf/tests/attr/base-stat +++ b/tools/perf/tests/attr/base-stat @@ -5,7 +5,7 @@ group_fd=-1 flags=0|8 cpu=* type=0 -size=120 +size=128 config=0 sample_period=0 sample_type=65536 diff --git a/tools/perf/tests/attr/system-wide-dummy b/tools/perf/tests/attr/system-wide-dummy index eba723cc0d38..86a15dd359d9 100644 --- a/tools/perf/tests/attr/system-wide-dummy +++ b/tools/perf/tests/attr/system-wide-dummy @@ -7,7 +7,7 @@ cpu=* pid=-1 flags=8 type=1 -size=120 +size=128 config=9 sample_period=4000 sample_type=455 diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 8c0d9f368ebc..b64bdc1a7026 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -145,7 +145,14 @@ perf-$(CONFIG_LIBELF) += symbol-elf.o perf-$(CONFIG_LIBELF) += probe-file.o perf-$(CONFIG_LIBELF) += probe-event.o +ifdef CONFIG_LIBBPF_DYNAMIC + hashmap := 1 +endif ifndef CONFIG_LIBBPF + hashmap := 1 +endif + +ifdef hashmap perf-y += hashmap.o endif diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index f99852d54b14..43e5b563dee8 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -157,9 +157,15 @@ static int get_max_rate(unsigned int *rate) static int record_opts__config_freq(struct record_opts *opts) { bool user_freq = opts->user_freq != UINT_MAX; + bool user_interval = opts->user_interval != ULLONG_MAX; unsigned int max_rate; - if (opts->user_interval != ULLONG_MAX) + if (user_interval && user_freq) { + pr_err("cannot set frequency and period at the same time\n"); + return -1; + } + + if (user_interval) opts->default_interval = opts->user_interval; if (user_freq) opts->freq = opts->user_freq; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index a12cf4f0e97a..106b3d60881a 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -904,7 +904,7 @@ static void perf_event__cpu_map_swap(union perf_event *event, struct perf_record_record_cpu_map *mask; unsigned i; - data->type = bswap_64(data->type); + data->type = bswap_16(data->type); switch (data->type) { case PERF_CPU_MAP__CPUS: @@ -937,7 +937,7 @@ static void perf_event__stat_config_swap(union perf_event *event, { u64 size; - size = event->stat_config.nr * sizeof(event->stat_config.data[0]); + size = bswap_64(event->stat_config.nr) * sizeof(event->stat_config.data[0]); size += 1; /* nr item itself */ mem_bswap_64(&event->stat_config.nr, size); } diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index c62d372d426f..ed563bdd88f3 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -62,7 +62,7 @@ struct nfit_test_resource *get_nfit_res(resource_size_t resource) } EXPORT_SYMBOL(get_nfit_res); -void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size, +static void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size, void __iomem *(*fallback_fn)(resource_size_t, unsigned long)) { struct nfit_test_resource *nfit_res = get_nfit_res(offset); diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 9b185bf82da8..54f367cbadae 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -1871,9 +1871,16 @@ static void smart_init(struct nfit_test *t) } } +static size_t sizeof_spa(struct acpi_nfit_system_address *spa) +{ + /* until spa location cookie support is added... */ + return sizeof(*spa) - 8; +} + static int nfit_test0_alloc(struct nfit_test *t) { - size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA + struct acpi_nfit_system_address *spa = NULL; + size_t nfit_size = sizeof_spa(spa) * NUM_SPA + sizeof(struct acpi_nfit_memory_map) * NUM_MEM + sizeof(struct acpi_nfit_control_region) * NUM_DCR + offsetof(struct acpi_nfit_control_region, @@ -1937,7 +1944,8 @@ static int nfit_test0_alloc(struct nfit_test *t) static int nfit_test1_alloc(struct nfit_test *t) { - size_t nfit_size = sizeof(struct acpi_nfit_system_address) * 2 + struct acpi_nfit_system_address *spa = NULL; + size_t nfit_size = sizeof_spa(spa) * 2 + sizeof(struct acpi_nfit_memory_map) * 2 + offsetof(struct acpi_nfit_control_region, window_size) * 2; int i; @@ -2000,7 +2008,7 @@ static void nfit_test0_setup(struct nfit_test *t) */ spa = nfit_buf; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); spa->range_index = 0+1; spa->address = t->spa_set_dma[0]; @@ -2014,7 +2022,7 @@ static void nfit_test0_setup(struct nfit_test *t) */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); spa->range_index = 1+1; spa->address = t->spa_set_dma[1]; @@ -2024,7 +2032,7 @@ static void nfit_test0_setup(struct nfit_test *t) /* spa2 (dcr0) dimm0 */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); spa->range_index = 2+1; spa->address = t->dcr_dma[0]; @@ -2034,7 +2042,7 @@ static void nfit_test0_setup(struct nfit_test *t) /* spa3 (dcr1) dimm1 */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); spa->range_index = 3+1; spa->address = t->dcr_dma[1]; @@ -2044,7 +2052,7 @@ static void nfit_test0_setup(struct nfit_test *t) /* spa4 (dcr2) dimm2 */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); spa->range_index = 4+1; spa->address = t->dcr_dma[2]; @@ -2054,7 +2062,7 @@ static void nfit_test0_setup(struct nfit_test *t) /* spa5 (dcr3) dimm3 */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); spa->range_index = 5+1; spa->address = t->dcr_dma[3]; @@ -2064,7 +2072,7 @@ static void nfit_test0_setup(struct nfit_test *t) /* spa6 (bdw for dcr0) dimm0 */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 6+1; spa->address = t->dimm_dma[0]; @@ -2074,7 +2082,7 @@ static void nfit_test0_setup(struct nfit_test *t) /* spa7 (bdw for dcr1) dimm1 */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 7+1; spa->address = t->dimm_dma[1]; @@ -2084,7 +2092,7 @@ static void nfit_test0_setup(struct nfit_test *t) /* spa8 (bdw for dcr2) dimm2 */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 8+1; spa->address = t->dimm_dma[2]; @@ -2094,7 +2102,7 @@ static void nfit_test0_setup(struct nfit_test *t) /* spa9 (bdw for dcr3) dimm3 */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 9+1; spa->address = t->dimm_dma[3]; @@ -2581,7 +2589,7 @@ static void nfit_test0_setup(struct nfit_test *t) /* spa10 (dcr4) dimm4 */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); spa->range_index = 10+1; spa->address = t->dcr_dma[4]; @@ -2595,7 +2603,7 @@ static void nfit_test0_setup(struct nfit_test *t) */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); spa->range_index = 11+1; spa->address = t->spa_set_dma[2]; @@ -2605,7 +2613,7 @@ static void nfit_test0_setup(struct nfit_test *t) /* spa12 (bdw for dcr4) dimm4 */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 12+1; spa->address = t->dimm_dma[4]; @@ -2739,7 +2747,7 @@ static void nfit_test1_setup(struct nfit_test *t) /* spa0 (flat range with no bdw aliasing) */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); spa->range_index = 0+1; spa->address = t->spa_set_dma[0]; @@ -2749,7 +2757,7 @@ static void nfit_test1_setup(struct nfit_test *t) /* virtual cd region */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16); spa->range_index = 0; spa->address = t->spa_set_dma[1]; diff --git a/tools/testing/selftests/arm64/bti/test.c b/tools/testing/selftests/arm64/bti/test.c index 656b04976ccc..67b77ab83c20 100644 --- a/tools/testing/selftests/arm64/bti/test.c +++ b/tools/testing/selftests/arm64/bti/test.c @@ -6,6 +6,7 @@ #include "system.h" +#include <stddef.h> #include <linux/errno.h> #include <linux/auxvec.h> #include <linux/signal.h> |