summaryrefslogtreecommitdiff
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig60
-rw-r--r--arch/powerpc/configs/40x/klondike_defconfig2
-rw-r--r--arch/powerpc/configs/44x/fsp2_defconfig2
-rw-r--r--arch/powerpc/configs/44x/sam440ep_defconfig2
-rw-r--r--arch/powerpc/configs/52xx/tqm5200_defconfig2
-rw-r--r--arch/powerpc/configs/85xx/stx_gp3_defconfig2
-rw-r--r--arch/powerpc/configs/cell_defconfig2
-rw-r--r--arch/powerpc/configs/ep8248e_defconfig2
-rw-r--r--arch/powerpc/configs/mgcoge_defconfig4
-rw-r--r--arch/powerpc/configs/microwatt_defconfig2
-rw-r--r--arch/powerpc/configs/pasemi_defconfig2
-rw-r--r--arch/powerpc/configs/pmac32_defconfig2
-rw-r--r--arch/powerpc/configs/powernv_defconfig2
-rw-r--r--arch/powerpc/configs/ppc64_defconfig2
-rw-r--r--arch/powerpc/configs/ppc64e_defconfig2
-rw-r--r--arch/powerpc/configs/ppc6xx_defconfig2
-rw-r--r--arch/powerpc/configs/ps3_defconfig4
-rw-r--r--arch/powerpc/crypto/.gitignore3
-rw-r--r--arch/powerpc/crypto/Kconfig26
-rw-r--r--arch/powerpc/crypto/Makefile4
-rw-r--r--arch/powerpc/crypto/chacha-p10-glue.c221
-rw-r--r--arch/powerpc/crypto/chacha-p10le-8x.S842
-rw-r--r--arch/powerpc/crypto/poly1305-p10-glue.c186
-rw-r--r--arch/powerpc/crypto/poly1305-p10le_64.S1075
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h5
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h9
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h161
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix.h49
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-radix.h2
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush.h9
-rw-r--r--arch/powerpc/include/asm/book3s/pgtable.h11
-rw-r--r--arch/powerpc/include/asm/bug.h69
-rw-r--r--arch/powerpc/include/asm/cacheflush.h14
-rw-r--r--arch/powerpc/include/asm/elf.h6
-rw-r--r--arch/powerpc/include/asm/io.h17
-rw-r--r--arch/powerpc/include/asm/irq.h2
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h10
-rw-r--r--arch/powerpc/include/asm/nohash/pgtable.h16
-rw-r--r--arch/powerpc/include/asm/pgalloc.h4
-rw-r--r--arch/powerpc/include/asm/pgtable.h39
-rw-r--r--arch/powerpc/include/asm/processor.h3
-rw-r--r--arch/powerpc/include/asm/thread_info.h6
-rw-r--r--arch/powerpc/include/asm/word-at-a-time.h2
-rw-r--r--arch/powerpc/kernel/dma-mask.c1
-rw-r--r--arch/powerpc/kernel/head_64.S3
-rw-r--r--arch/powerpc/kernel/ima_arch.c8
-rw-r--r--arch/powerpc/kernel/rtas_flash.c6
-rw-r--r--arch/powerpc/kernel/stacktrace.c4
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/powerpc/kernel/trace/ftrace_mprofile.S9
-rw-r--r--arch/powerpc/kernel/traps.c9
-rw-r--r--arch/powerpc/kernel/watchdog.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv_uvmem.c1
-rw-r--r--arch/powerpc/mm/book3s64/hash_pgtable.c2
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c11
-rw-r--r--arch/powerpc/mm/book3s64/mmu_context.c10
-rw-r--r--arch/powerpc/mm/book3s64/pgtable.c110
-rw-r--r--arch/powerpc/mm/book3s64/radix_hugetlbpage.c1
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c574
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c11
-rw-r--r--arch/powerpc/mm/book3s64/subpage_prot.c1
-rw-r--r--arch/powerpc/mm/cacheflush.c41
-rw-r--r--arch/powerpc/mm/fault.c7
-rw-r--r--arch/powerpc/mm/init_64.c40
-rw-r--r--arch/powerpc/mm/ioremap.c26
-rw-r--r--arch/powerpc/mm/ioremap_32.c19
-rw-r--r--arch/powerpc/mm/ioremap_64.c12
-rw-r--r--arch/powerpc/mm/kasan/Makefile1
-rw-r--r--arch/powerpc/mm/nohash/e500_hugetlbpage.c3
-rw-r--r--arch/powerpc/mm/pgtable-frag.c73
-rw-r--r--arch/powerpc/mm/pgtable.c61
-rw-r--r--arch/powerpc/platforms/512x/mpc512x_lpbfifo.c6
-rw-r--r--arch/powerpc/platforms/85xx/smp.c2
-rw-r--r--arch/powerpc/platforms/8xx/adder875.c1
-rw-r--r--arch/powerpc/platforms/8xx/mpc885ads_setup.c1
-rw-r--r--arch/powerpc/platforms/8xx/tqm8xx_setup.c1
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype1
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c2
-rw-r--r--arch/powerpc/platforms/powermac/time.c6
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c2
-rw-r--r--arch/powerpc/platforms/pseries/vas.c9
-rw-r--r--arch/powerpc/sysdev/fsl_soc.c3
-rw-r--r--arch/powerpc/xmon/xmon.c2
83 files changed, 3533 insertions, 439 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 0b1172cbeccb..21edd664689e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -157,6 +157,7 @@ config PPC
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_KEEP_MEMBLOCK
+ select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE if PPC_RADIX_MMU
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
@@ -174,6 +175,7 @@ config PPC
select ARCH_WANT_IPC_PARSE_VERSION
select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
select ARCH_WANT_LD_ORPHAN_WARN
+ select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP if PPC_RADIX_MMU
select ARCH_WANTS_MODULES_DATA_IN_VMALLOC if PPC_BOOK3S_32 || PPC_8xx
select ARCH_WEAK_RELEASE_ACQUIRE
select BINFMT_ELF
@@ -193,6 +195,7 @@ config PPC
select GENERIC_CPU_VULNERABILITIES if PPC_BARRIER_NOSPEC
select GENERIC_EARLY_IOREMAP
select GENERIC_GETTIMEOFDAY
+ select GENERIC_IOREMAP
select GENERIC_IRQ_SHOW
select GENERIC_IRQ_SHOW_LEVEL
select GENERIC_PCI_IOMAP if PCI
@@ -589,41 +592,21 @@ config PPC64_SUPPORTS_MEMORY_FAILURE
default "y" if PPC_POWERNV
select ARCH_SUPPORTS_MEMORY_FAILURE
-config KEXEC
- bool "kexec system call"
- depends on PPC_BOOK3S || PPC_E500 || (44x && !SMP)
- select KEXEC_CORE
- help
- kexec is a system call that implements the ability to shutdown your
- current kernel, and to start another kernel. It is like a reboot
- but it is independent of the system firmware. And like a reboot
- you can start any kernel with it, not just Linux.
-
- The name comes from the similarity to the exec system call.
-
- It is an ongoing process to be certain the hardware in a machine
- is properly shutdown, so do not be surprised if this code does not
- initially work for you. As of this writing the exact hardware
- interface is strongly in flux, so no good recommendation can be
- made.
-
-config KEXEC_FILE
- bool "kexec file based system call"
- select KEXEC_CORE
- select HAVE_IMA_KEXEC if IMA
- select KEXEC_ELF
- depends on PPC64
- depends on CRYPTO=y
- depends on CRYPTO_SHA256=y
- help
- This is a new version of the kexec system call. This call is
- file based and takes in file descriptors as system call arguments
- for kernel and initramfs as opposed to a list of segments as is the
- case for the older kexec call.
+config ARCH_SUPPORTS_KEXEC
+ def_bool PPC_BOOK3S || PPC_E500 || (44x && !SMP)
+
+config ARCH_SUPPORTS_KEXEC_FILE
+ def_bool PPC64 && CRYPTO=y && CRYPTO_SHA256=y
-config ARCH_HAS_KEXEC_PURGATORY
+config ARCH_SUPPORTS_KEXEC_PURGATORY
def_bool KEXEC_FILE
+config ARCH_SELECTS_KEXEC_FILE
+ def_bool y
+ depends on KEXEC_FILE
+ select KEXEC_ELF
+ select HAVE_IMA_KEXEC if IMA
+
config PPC64_BIG_ENDIAN_ELF_ABI_V2
# Option is available to BFD, but LLD does not support ELFv1 so this is
# always true there.
@@ -683,14 +666,13 @@ config RELOCATABLE_TEST
loaded at, which tends to be non-zero and therefore test the
relocation code.
-config CRASH_DUMP
- bool "Build a dump capture kernel"
- depends on PPC64 || PPC_BOOK3S_32 || PPC_85xx || (44x && !SMP)
+config ARCH_SUPPORTS_CRASH_DUMP
+ def_bool PPC64 || PPC_BOOK3S_32 || PPC_85xx || (44x && !SMP)
+
+config ARCH_SELECTS_CRASH_DUMP
+ def_bool y
+ depends on CRASH_DUMP
select RELOCATABLE if PPC64 || 44x || PPC_85xx
- help
- Build a kernel suitable for use as a dump capture kernel.
- The same kernel binary can be used as production kernel and dump
- capture kernel.
config FA_DUMP
bool "Firmware-assisted dump"
diff --git a/arch/powerpc/configs/40x/klondike_defconfig b/arch/powerpc/configs/40x/klondike_defconfig
index acafbb8f6808..a974d1e945cc 100644
--- a/arch/powerpc/configs/40x/klondike_defconfig
+++ b/arch/powerpc/configs/40x/klondike_defconfig
@@ -4,7 +4,7 @@ CONFIG_LOG_BUF_SHIFT=14
CONFIG_SYSFS_DEPRECATED=y
CONFIG_SYSFS_DEPRECATED_V2=y
CONFIG_BLK_DEV_INITRD=y
-CONFIG_EMBEDDED=y
+CONFIG_EXPERT=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_APM8018X=y
diff --git a/arch/powerpc/configs/44x/fsp2_defconfig b/arch/powerpc/configs/44x/fsp2_defconfig
index 3fdfbb29b854..5492537f4c6c 100644
--- a/arch/powerpc/configs/44x/fsp2_defconfig
+++ b/arch/powerpc/configs/44x/fsp2_defconfig
@@ -15,7 +15,7 @@ CONFIG_BLK_DEV_INITRD=y
# CONFIG_RD_LZ4 is not set
CONFIG_KALLSYMS_ALL=y
CONFIG_BPF_SYSCALL=y
-CONFIG_EMBEDDED=y
+CONFIG_EXPERT=y
CONFIG_PROFILING=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
diff --git a/arch/powerpc/configs/44x/sam440ep_defconfig b/arch/powerpc/configs/44x/sam440ep_defconfig
index ef09786d49b9..51499ee6366b 100644
--- a/arch/powerpc/configs/44x/sam440ep_defconfig
+++ b/arch/powerpc/configs/44x/sam440ep_defconfig
@@ -79,7 +79,7 @@ CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_REISERFS_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
diff --git a/arch/powerpc/configs/52xx/tqm5200_defconfig b/arch/powerpc/configs/52xx/tqm5200_defconfig
index e6735b945327..688f703d8e22 100644
--- a/arch/powerpc/configs/52xx/tqm5200_defconfig
+++ b/arch/powerpc/configs/52xx/tqm5200_defconfig
@@ -3,7 +3,7 @@ CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
# CONFIG_KALLSYMS is not set
# CONFIG_EPOLL is not set
-CONFIG_EMBEDDED=y
+CONFIG_EXPERT=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODVERSIONS=y
diff --git a/arch/powerpc/configs/85xx/stx_gp3_defconfig b/arch/powerpc/configs/85xx/stx_gp3_defconfig
index ecbcc853307d..e7080497048d 100644
--- a/arch/powerpc/configs/85xx/stx_gp3_defconfig
+++ b/arch/powerpc/configs/85xx/stx_gp3_defconfig
@@ -50,7 +50,7 @@ CONFIG_DRM=m
CONFIG_SOUND=m
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_ISO9660_FS=m
CONFIG_UDF_FS=m
CONFIG_MSDOS_FS=m
diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig
index 06391cc2af3a..53f43a34e1a9 100644
--- a/arch/powerpc/configs/cell_defconfig
+++ b/arch/powerpc/configs/cell_defconfig
@@ -172,7 +172,7 @@ CONFIG_EDAC_CELL=y
CONFIG_UIO=m
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
CONFIG_UDF_FS=m
diff --git a/arch/powerpc/configs/ep8248e_defconfig b/arch/powerpc/configs/ep8248e_defconfig
index b9dfa3a0713b..0d8d3f41f194 100644
--- a/arch/powerpc/configs/ep8248e_defconfig
+++ b/arch/powerpc/configs/ep8248e_defconfig
@@ -47,7 +47,7 @@ CONFIG_SERIAL_CPM_CONSOLE=y
# CONFIG_USB_SUPPORT is not set
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_CRAMFS=y
diff --git a/arch/powerpc/configs/mgcoge_defconfig b/arch/powerpc/configs/mgcoge_defconfig
index ce220ceeb91c..f65001e7877f 100644
--- a/arch/powerpc/configs/mgcoge_defconfig
+++ b/arch/powerpc/configs/mgcoge_defconfig
@@ -9,7 +9,7 @@ CONFIG_BLK_DEV_INITRD=y
# CONFIG_RD_GZIP is not set
CONFIG_KALLSYMS_ALL=y
# CONFIG_PCSPKR_PLATFORM is not set
-CONFIG_EMBEDDED=y
+CONFIG_EXPERT=y
CONFIG_PARTITION_ADVANCED=y
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_82xx=y
@@ -60,7 +60,7 @@ CONFIG_USB_FSL_USB2=y
CONFIG_USB_G_SERIAL=y
CONFIG_UIO=y
CONFIG_EXT2_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
diff --git a/arch/powerpc/configs/microwatt_defconfig b/arch/powerpc/configs/microwatt_defconfig
index 795a127908e7..a64fb1ef8c75 100644
--- a/arch/powerpc/configs/microwatt_defconfig
+++ b/arch/powerpc/configs/microwatt_defconfig
@@ -8,7 +8,7 @@ CONFIG_CGROUPS=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_KALLSYMS_ALL=y
-CONFIG_EMBEDDED=y
+CONFIG_EXPERT=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_SLUB_DEBUG is not set
# CONFIG_COMPAT_BRK is not set
diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig
index 96aa5355911f..61993944db40 100644
--- a/arch/powerpc/configs/pasemi_defconfig
+++ b/arch/powerpc/configs/pasemi_defconfig
@@ -143,7 +143,7 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT4_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_ISO9660_FS=y
CONFIG_UDF_FS=y
CONFIG_MSDOS_FS=y
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index 019163c2571e..05ed58576464 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -254,7 +254,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index e02ab94a09bf..ee84ade7a033 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -270,7 +270,7 @@ CONFIG_BTRFS_FS=m
CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_NILFS2_FS=m
CONFIG_FANOTIFY=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_OVERLAY_FS=m
CONFIG_ISO9660_FS=y
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 268fa361a06d..c0f4bbc2c975 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -327,7 +327,7 @@ CONFIG_BTRFS_FS=m
CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_NILFS2_FS=m
CONFIG_FS_DAX=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_OVERLAY_FS=m
CONFIG_ISO9660_FS=y
diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig
index 776c32964e12..624c371ffcc3 100644
--- a/arch/powerpc/configs/ppc64e_defconfig
+++ b/arch/powerpc/configs/ppc64e_defconfig
@@ -185,7 +185,7 @@ CONFIG_JFS_SECURITY=y
CONFIG_XFS_FS=m
CONFIG_XFS_POSIX_ACL=y
CONFIG_FS_DAX=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_ISO9660_FS=y
CONFIG_UDF_FS=m
CONFIG_MSDOS_FS=y
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index f21170b8fa11..1034aeabdd6c 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -969,7 +969,7 @@ CONFIG_XFS_POSIX_ACL=y
CONFIG_GFS2_FS=m
CONFIG_FS_DAX=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
index ea3ee0719bb2..2b175ddf82f0 100644
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -3,7 +3,7 @@ CONFIG_POSIX_MQUEUE=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_EMBEDDED=y
+CONFIG_EXPERT=y
# CONFIG_PERF_EVENTS is not set
CONFIG_PROFILING=y
CONFIG_PPC64=y
@@ -129,7 +129,7 @@ CONFIG_EXT2_FS=m
CONFIG_EXT4_FS=y
CONFIG_QUOTA=y
CONFIG_QFMT_V2=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
CONFIG_UDF_FS=m
diff --git a/arch/powerpc/crypto/.gitignore b/arch/powerpc/crypto/.gitignore
new file mode 100644
index 000000000000..e1094f08f713
--- /dev/null
+++ b/arch/powerpc/crypto/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+aesp10-ppc.S
+ghashp10-ppc.S
diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig
index ad1872518992..f25024afdda5 100644
--- a/arch/powerpc/crypto/Kconfig
+++ b/arch/powerpc/crypto/Kconfig
@@ -111,4 +111,30 @@ config CRYPTO_AES_GCM_P10
Support for cryptographic acceleration instructions on Power10 or
later CPU. This module supports stitched acceleration for AES/GCM.
+config CRYPTO_CHACHA20_P10
+ tristate "Ciphers: ChaCha20, XChacha20, XChacha12 (P10 or later)"
+ depends on PPC64 && CPU_LITTLE_ENDIAN
+ select CRYPTO_SKCIPHER
+ select CRYPTO_LIB_CHACHA_GENERIC
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
+ help
+ Length-preserving ciphers: ChaCha20, XChaCha20, and XChaCha12
+ stream cipher algorithms
+
+ Architecture: PowerPC64
+ - Power10 or later
+ - Little-endian
+
+config CRYPTO_POLY1305_P10
+ tristate "Hash functions: Poly1305 (P10 or later)"
+ depends on PPC64 && CPU_LITTLE_ENDIAN
+ select CRYPTO_HASH
+ select CRYPTO_LIB_POLY1305_GENERIC
+ help
+ Poly1305 authenticator algorithm (RFC7539)
+
+ Architecture: PowerPC64
+ - Power10 or later
+ - Little-endian
+
endmenu
diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile
index 7b4f516abec1..ebdac1b9eb9a 100644
--- a/arch/powerpc/crypto/Makefile
+++ b/arch/powerpc/crypto/Makefile
@@ -14,6 +14,8 @@ obj-$(CONFIG_CRYPTO_CRC32C_VPMSUM) += crc32c-vpmsum.o
obj-$(CONFIG_CRYPTO_CRCT10DIF_VPMSUM) += crct10dif-vpmsum.o
obj-$(CONFIG_CRYPTO_VPMSUM_TESTER) += crc-vpmsum_test.o
obj-$(CONFIG_CRYPTO_AES_GCM_P10) += aes-gcm-p10-crypto.o
+obj-$(CONFIG_CRYPTO_CHACHA20_P10) += chacha-p10-crypto.o
+obj-$(CONFIG_CRYPTO_POLY1305_P10) += poly1305-p10-crypto.o
aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o
md5-ppc-y := md5-asm.o md5-glue.o
@@ -23,6 +25,8 @@ sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o
crc32c-vpmsum-y := crc32c-vpmsum_asm.o crc32c-vpmsum_glue.o
crct10dif-vpmsum-y := crct10dif-vpmsum_asm.o crct10dif-vpmsum_glue.o
aes-gcm-p10-crypto-y := aes-gcm-p10-glue.o aes-gcm-p10.o ghashp10-ppc.o aesp10-ppc.o
+chacha-p10-crypto-y := chacha-p10-glue.o chacha-p10le-8x.o
+poly1305-p10-crypto-y := poly1305-p10-glue.o poly1305-p10le_64.o
quiet_cmd_perl = PERL $@
cmd_perl = $(PERL) $< $(if $(CONFIG_CPU_LITTLE_ENDIAN), linux-ppc64le, linux-ppc64) > $@
diff --git a/arch/powerpc/crypto/chacha-p10-glue.c b/arch/powerpc/crypto/chacha-p10-glue.c
new file mode 100644
index 000000000000..74fb86b0d209
--- /dev/null
+++ b/arch/powerpc/crypto/chacha-p10-glue.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerPC P10 (ppc64le) accelerated ChaCha and XChaCha stream ciphers,
+ * including ChaCha20 (RFC7539)
+ *
+ * Copyright 2023- IBM Corp. All rights reserved.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/chacha.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/sizes.h>
+#include <asm/simd.h>
+#include <asm/switch_to.h>
+
+asmlinkage void chacha_p10le_8x(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_p10);
+
+static void vsx_begin(void)
+{
+ preempt_disable();
+ enable_kernel_vsx();
+}
+
+static void vsx_end(void)
+{
+ disable_kernel_vsx();
+ preempt_enable();
+}
+
+static void chacha_p10_do_8x(u32 *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds)
+{
+ unsigned int l = bytes & ~0x0FF;
+
+ if (l > 0) {
+ chacha_p10le_8x(state, dst, src, l, nrounds);
+ bytes -= l;
+ src += l;
+ dst += l;
+ state[12] += l / CHACHA_BLOCK_SIZE;
+ }
+
+ if (bytes > 0)
+ chacha_crypt_generic(state, dst, src, bytes, nrounds);
+}
+
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
+{
+ hchacha_block_generic(state, stream, nrounds);
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+ chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
+ int nrounds)
+{
+ if (!static_branch_likely(&have_p10) || bytes <= CHACHA_BLOCK_SIZE ||
+ !crypto_simd_usable())
+ return chacha_crypt_generic(state, dst, src, bytes, nrounds);
+
+ do {
+ unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
+
+ vsx_begin();
+ chacha_p10_do_8x(state, dst, src, todo, nrounds);
+ vsx_end();
+
+ bytes -= todo;
+ src += todo;
+ dst += todo;
+ } while (bytes);
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+static int chacha_p10_stream_xor(struct skcipher_request *req,
+ const struct chacha_ctx *ctx, const u8 *iv)
+{
+ struct skcipher_walk walk;
+ u32 state[16];
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+ if (err)
+ return err;
+
+ chacha_init_generic(state, ctx->key, iv);
+
+ while (walk.nbytes > 0) {
+ unsigned int nbytes = walk.nbytes;
+
+ if (nbytes < walk.total)
+ nbytes = rounddown(nbytes, walk.stride);
+
+ if (!crypto_simd_usable()) {
+ chacha_crypt_generic(state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes,
+ ctx->nrounds);
+ } else {
+ vsx_begin();
+ chacha_p10_do_8x(state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes, ctx->nrounds);
+ vsx_end();
+ }
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+static int chacha_p10(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return chacha_p10_stream_xor(req, ctx, req->iv);
+}
+
+static int xchacha_p10(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct chacha_ctx subctx;
+ u32 state[16];
+ u8 real_iv[16];
+
+ chacha_init_generic(state, ctx->key, req->iv);
+ hchacha_block_arch(state, subctx.key, ctx->nrounds);
+ subctx.nrounds = ctx->nrounds;
+
+ memcpy(&real_iv[0], req->iv + 24, 8);
+ memcpy(&real_iv[8], req->iv + 16, 8);
+ return chacha_p10_stream_xor(req, &subctx, real_iv);
+}
+
+static struct skcipher_alg algs[] = {
+ {
+ .base.cra_name = "chacha20",
+ .base.cra_driver_name = "chacha20-p10",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = CHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = chacha_p10,
+ .decrypt = chacha_p10,
+ }, {
+ .base.cra_name = "xchacha20",
+ .base.cra_driver_name = "xchacha20-p10",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = xchacha_p10,
+ .decrypt = xchacha_p10,
+ }, {
+ .base.cra_name = "xchacha12",
+ .base.cra_driver_name = "xchacha12-p10",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha12_setkey,
+ .encrypt = xchacha_p10,
+ .decrypt = xchacha_p10,
+ }
+};
+
+static int __init chacha_p10_init(void)
+{
+ static_branch_enable(&have_p10);
+
+ return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit chacha_p10_exit(void)
+{
+ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+module_cpu_feature_match(PPC_MODULE_FEATURE_P10, chacha_p10_init);
+module_exit(chacha_p10_exit);
+
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (P10 accelerated)");
+MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-p10");
+MODULE_ALIAS_CRYPTO("xchacha20");
+MODULE_ALIAS_CRYPTO("xchacha20-p10");
+MODULE_ALIAS_CRYPTO("xchacha12");
+MODULE_ALIAS_CRYPTO("xchacha12-p10");
diff --git a/arch/powerpc/crypto/chacha-p10le-8x.S b/arch/powerpc/crypto/chacha-p10le-8x.S
new file mode 100644
index 000000000000..17bedb66b822
--- /dev/null
+++ b/arch/powerpc/crypto/chacha-p10le-8x.S
@@ -0,0 +1,842 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#
+# Accelerated chacha20 implementation for ppc64le.
+#
+# Copyright 2023- IBM Corp. All rights reserved
+#
+#===================================================================================
+# Written by Danny Tsen <dtsen@us.ibm.com>
+#
+# chacha_p10le_8x(u32 *state, byte *dst, const byte *src,
+# size_t len, int nrounds);
+#
+# do rounds, 8 quarter rounds
+# 1. a += b; d ^= a; d <<<= 16;
+# 2. c += d; b ^= c; b <<<= 12;
+# 3. a += b; d ^= a; d <<<= 8;
+# 4. c += d; b ^= c; b <<<= 7
+#
+# row1 = (row1 + row2), row4 = row1 xor row4, row4 rotate each word by 16
+# row3 = (row3 + row4), row2 = row3 xor row2, row2 rotate each word by 12
+# row1 = (row1 + row2), row4 = row1 xor row4, row4 rotate each word by 8
+# row3 = (row3 + row4), row2 = row3 xor row2, row2 rotate each word by 7
+#
+# 4 blocks (a b c d)
+#
+# a0 b0 c0 d0
+# a1 b1 c1 d1
+# ...
+# a4 b4 c4 d4
+# ...
+# a8 b8 c8 d8
+# ...
+# a12 b12 c12 d12
+# a13 ...
+# a14 ...
+# a15 b15 c15 d15
+#
+# Column round (v0, v4, v8, v12, v1, v5, v9, v13, v2, v6, v10, v14, v3, v7, v11, v15)
+# Diagnal round (v0, v5, v10, v15, v1, v6, v11, v12, v2, v7, v8, v13, v3, v4, v9, v14)
+#
+
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/asm-compat.h>
+#include <linux/linkage.h>
+
+.machine "any"
+.text
+
+.macro SAVE_GPR GPR OFFSET FRAME
+ std \GPR,\OFFSET(\FRAME)
+.endm
+
+.macro SAVE_VRS VRS OFFSET FRAME
+ li 16, \OFFSET
+ stvx \VRS, 16, \FRAME
+.endm
+
+.macro SAVE_VSX VSX OFFSET FRAME
+ li 16, \OFFSET
+ stxvx \VSX, 16, \FRAME
+.endm
+
+.macro RESTORE_GPR GPR OFFSET FRAME
+ ld \GPR,\OFFSET(\FRAME)
+.endm
+
+.macro RESTORE_VRS VRS OFFSET FRAME
+ li 16, \OFFSET
+ lvx \VRS, 16, \FRAME
+.endm
+
+.macro RESTORE_VSX VSX OFFSET FRAME
+ li 16, \OFFSET
+ lxvx \VSX, 16, \FRAME
+.endm
+
+.macro SAVE_REGS
+ mflr 0
+ std 0, 16(1)
+ stdu 1,-752(1)
+
+ SAVE_GPR 14, 112, 1
+ SAVE_GPR 15, 120, 1
+ SAVE_GPR 16, 128, 1
+ SAVE_GPR 17, 136, 1
+ SAVE_GPR 18, 144, 1
+ SAVE_GPR 19, 152, 1
+ SAVE_GPR 20, 160, 1
+ SAVE_GPR 21, 168, 1
+ SAVE_GPR 22, 176, 1
+ SAVE_GPR 23, 184, 1
+ SAVE_GPR 24, 192, 1
+ SAVE_GPR 25, 200, 1
+ SAVE_GPR 26, 208, 1
+ SAVE_GPR 27, 216, 1
+ SAVE_GPR 28, 224, 1
+ SAVE_GPR 29, 232, 1
+ SAVE_GPR 30, 240, 1
+ SAVE_GPR 31, 248, 1
+
+ addi 9, 1, 256
+ SAVE_VRS 20, 0, 9
+ SAVE_VRS 21, 16, 9
+ SAVE_VRS 22, 32, 9
+ SAVE_VRS 23, 48, 9
+ SAVE_VRS 24, 64, 9
+ SAVE_VRS 25, 80, 9
+ SAVE_VRS 26, 96, 9
+ SAVE_VRS 27, 112, 9
+ SAVE_VRS 28, 128, 9
+ SAVE_VRS 29, 144, 9
+ SAVE_VRS 30, 160, 9
+ SAVE_VRS 31, 176, 9
+
+ SAVE_VSX 14, 192, 9
+ SAVE_VSX 15, 208, 9
+ SAVE_VSX 16, 224, 9
+ SAVE_VSX 17, 240, 9
+ SAVE_VSX 18, 256, 9
+ SAVE_VSX 19, 272, 9
+ SAVE_VSX 20, 288, 9
+ SAVE_VSX 21, 304, 9
+ SAVE_VSX 22, 320, 9
+ SAVE_VSX 23, 336, 9
+ SAVE_VSX 24, 352, 9
+ SAVE_VSX 25, 368, 9
+ SAVE_VSX 26, 384, 9
+ SAVE_VSX 27, 400, 9
+ SAVE_VSX 28, 416, 9
+ SAVE_VSX 29, 432, 9
+ SAVE_VSX 30, 448, 9
+ SAVE_VSX 31, 464, 9
+.endm # SAVE_REGS
+
+.macro RESTORE_REGS
+ addi 9, 1, 256
+ RESTORE_VRS 20, 0, 9
+ RESTORE_VRS 21, 16, 9
+ RESTORE_VRS 22, 32, 9
+ RESTORE_VRS 23, 48, 9
+ RESTORE_VRS 24, 64, 9
+ RESTORE_VRS 25, 80, 9
+ RESTORE_VRS 26, 96, 9
+ RESTORE_VRS 27, 112, 9
+ RESTORE_VRS 28, 128, 9
+ RESTORE_VRS 29, 144, 9
+ RESTORE_VRS 30, 160, 9
+ RESTORE_VRS 31, 176, 9
+
+ RESTORE_VSX 14, 192, 9
+ RESTORE_VSX 15, 208, 9
+ RESTORE_VSX 16, 224, 9
+ RESTORE_VSX 17, 240, 9
+ RESTORE_VSX 18, 256, 9
+ RESTORE_VSX 19, 272, 9
+ RESTORE_VSX 20, 288, 9
+ RESTORE_VSX 21, 304, 9
+ RESTORE_VSX 22, 320, 9
+ RESTORE_VSX 23, 336, 9
+ RESTORE_VSX 24, 352, 9
+ RESTORE_VSX 25, 368, 9
+ RESTORE_VSX 26, 384, 9
+ RESTORE_VSX 27, 400, 9
+ RESTORE_VSX 28, 416, 9
+ RESTORE_VSX 29, 432, 9
+ RESTORE_VSX 30, 448, 9
+ RESTORE_VSX 31, 464, 9
+
+ RESTORE_GPR 14, 112, 1
+ RESTORE_GPR 15, 120, 1
+ RESTORE_GPR 16, 128, 1
+ RESTORE_GPR 17, 136, 1
+ RESTORE_GPR 18, 144, 1
+ RESTORE_GPR 19, 152, 1
+ RESTORE_GPR 20, 160, 1
+ RESTORE_GPR 21, 168, 1
+ RESTORE_GPR 22, 176, 1
+ RESTORE_GPR 23, 184, 1
+ RESTORE_GPR 24, 192, 1
+ RESTORE_GPR 25, 200, 1
+ RESTORE_GPR 26, 208, 1
+ RESTORE_GPR 27, 216, 1
+ RESTORE_GPR 28, 224, 1
+ RESTORE_GPR 29, 232, 1
+ RESTORE_GPR 30, 240, 1
+ RESTORE_GPR 31, 248, 1
+
+ addi 1, 1, 752
+ ld 0, 16(1)
+ mtlr 0
+.endm # RESTORE_REGS
+
+.macro QT_loop_8x
+ # QR(v0, v4, v8, v12, v1, v5, v9, v13, v2, v6, v10, v14, v3, v7, v11, v15)
+ xxlor 0, 32+25, 32+25
+ xxlor 32+25, 20, 20
+ vadduwm 0, 0, 4
+ vadduwm 1, 1, 5
+ vadduwm 2, 2, 6
+ vadduwm 3, 3, 7
+ vadduwm 16, 16, 20
+ vadduwm 17, 17, 21
+ vadduwm 18, 18, 22
+ vadduwm 19, 19, 23
+
+ vpermxor 12, 12, 0, 25
+ vpermxor 13, 13, 1, 25
+ vpermxor 14, 14, 2, 25
+ vpermxor 15, 15, 3, 25
+ vpermxor 28, 28, 16, 25
+ vpermxor 29, 29, 17, 25
+ vpermxor 30, 30, 18, 25
+ vpermxor 31, 31, 19, 25
+ xxlor 32+25, 0, 0
+ vadduwm 8, 8, 12
+ vadduwm 9, 9, 13
+ vadduwm 10, 10, 14
+ vadduwm 11, 11, 15
+ vadduwm 24, 24, 28
+ vadduwm 25, 25, 29
+ vadduwm 26, 26, 30
+ vadduwm 27, 27, 31
+ vxor 4, 4, 8
+ vxor 5, 5, 9
+ vxor 6, 6, 10
+ vxor 7, 7, 11
+ vxor 20, 20, 24
+ vxor 21, 21, 25
+ vxor 22, 22, 26
+ vxor 23, 23, 27
+
+ xxlor 0, 32+25, 32+25
+ xxlor 32+25, 21, 21
+ vrlw 4, 4, 25 #
+ vrlw 5, 5, 25
+ vrlw 6, 6, 25
+ vrlw 7, 7, 25
+ vrlw 20, 20, 25 #
+ vrlw 21, 21, 25
+ vrlw 22, 22, 25
+ vrlw 23, 23, 25
+ xxlor 32+25, 0, 0
+ vadduwm 0, 0, 4
+ vadduwm 1, 1, 5
+ vadduwm 2, 2, 6
+ vadduwm 3, 3, 7
+ vadduwm 16, 16, 20
+ vadduwm 17, 17, 21
+ vadduwm 18, 18, 22
+ vadduwm 19, 19, 23
+
+ xxlor 0, 32+25, 32+25
+ xxlor 32+25, 22, 22
+ vpermxor 12, 12, 0, 25
+ vpermxor 13, 13, 1, 25
+ vpermxor 14, 14, 2, 25
+ vpermxor 15, 15, 3, 25
+ vpermxor 28, 28, 16, 25
+ vpermxor 29, 29, 17, 25
+ vpermxor 30, 30, 18, 25
+ vpermxor 31, 31, 19, 25
+ xxlor 32+25, 0, 0
+ vadduwm 8, 8, 12
+ vadduwm 9, 9, 13
+ vadduwm 10, 10, 14
+ vadduwm 11, 11, 15
+ vadduwm 24, 24, 28
+ vadduwm 25, 25, 29
+ vadduwm 26, 26, 30
+ vadduwm 27, 27, 31
+ xxlor 0, 32+28, 32+28
+ xxlor 32+28, 23, 23
+ vxor 4, 4, 8
+ vxor 5, 5, 9
+ vxor 6, 6, 10
+ vxor 7, 7, 11
+ vxor 20, 20, 24
+ vxor 21, 21, 25
+ vxor 22, 22, 26
+ vxor 23, 23, 27
+ vrlw 4, 4, 28 #
+ vrlw 5, 5, 28
+ vrlw 6, 6, 28
+ vrlw 7, 7, 28
+ vrlw 20, 20, 28 #
+ vrlw 21, 21, 28
+ vrlw 22, 22, 28
+ vrlw 23, 23, 28
+ xxlor 32+28, 0, 0
+
+ # QR(v0, v5, v10, v15, v1, v6, v11, v12, v2, v7, v8, v13, v3, v4, v9, v14)
+ xxlor 0, 32+25, 32+25
+ xxlor 32+25, 20, 20
+ vadduwm 0, 0, 5
+ vadduwm 1, 1, 6
+ vadduwm 2, 2, 7
+ vadduwm 3, 3, 4
+ vadduwm 16, 16, 21
+ vadduwm 17, 17, 22
+ vadduwm 18, 18, 23
+ vadduwm 19, 19, 20
+
+ vpermxor 15, 15, 0, 25
+ vpermxor 12, 12, 1, 25
+ vpermxor 13, 13, 2, 25
+ vpermxor 14, 14, 3, 25
+ vpermxor 31, 31, 16, 25
+ vpermxor 28, 28, 17, 25
+ vpermxor 29, 29, 18, 25
+ vpermxor 30, 30, 19, 25
+
+ xxlor 32+25, 0, 0
+ vadduwm 10, 10, 15
+ vadduwm 11, 11, 12
+ vadduwm 8, 8, 13
+ vadduwm 9, 9, 14
+ vadduwm 26, 26, 31
+ vadduwm 27, 27, 28
+ vadduwm 24, 24, 29
+ vadduwm 25, 25, 30
+ vxor 5, 5, 10
+ vxor 6, 6, 11
+ vxor 7, 7, 8
+ vxor 4, 4, 9
+ vxor 21, 21, 26
+ vxor 22, 22, 27
+ vxor 23, 23, 24
+ vxor 20, 20, 25
+
+ xxlor 0, 32+25, 32+25
+ xxlor 32+25, 21, 21
+ vrlw 5, 5, 25
+ vrlw 6, 6, 25
+ vrlw 7, 7, 25
+ vrlw 4, 4, 25
+ vrlw 21, 21, 25
+ vrlw 22, 22, 25
+ vrlw 23, 23, 25
+ vrlw 20, 20, 25
+ xxlor 32+25, 0, 0
+
+ vadduwm 0, 0, 5
+ vadduwm 1, 1, 6
+ vadduwm 2, 2, 7
+ vadduwm 3, 3, 4
+ vadduwm 16, 16, 21
+ vadduwm 17, 17, 22
+ vadduwm 18, 18, 23
+ vadduwm 19, 19, 20
+
+ xxlor 0, 32+25, 32+25
+ xxlor 32+25, 22, 22
+ vpermxor 15, 15, 0, 25
+ vpermxor 12, 12, 1, 25
+ vpermxor 13, 13, 2, 25
+ vpermxor 14, 14, 3, 25
+ vpermxor 31, 31, 16, 25
+ vpermxor 28, 28, 17, 25
+ vpermxor 29, 29, 18, 25
+ vpermxor 30, 30, 19, 25
+ xxlor 32+25, 0, 0
+
+ vadduwm 10, 10, 15
+ vadduwm 11, 11, 12
+ vadduwm 8, 8, 13
+ vadduwm 9, 9, 14
+ vadduwm 26, 26, 31
+ vadduwm 27, 27, 28
+ vadduwm 24, 24, 29
+ vadduwm 25, 25, 30
+
+ xxlor 0, 32+28, 32+28
+ xxlor 32+28, 23, 23
+ vxor 5, 5, 10
+ vxor 6, 6, 11
+ vxor 7, 7, 8
+ vxor 4, 4, 9
+ vxor 21, 21, 26
+ vxor 22, 22, 27
+ vxor 23, 23, 24
+ vxor 20, 20, 25
+ vrlw 5, 5, 28
+ vrlw 6, 6, 28
+ vrlw 7, 7, 28
+ vrlw 4, 4, 28
+ vrlw 21, 21, 28
+ vrlw 22, 22, 28
+ vrlw 23, 23, 28
+ vrlw 20, 20, 28
+ xxlor 32+28, 0, 0
+.endm
+
+.macro QT_loop_4x
+ # QR(v0, v4, v8, v12, v1, v5, v9, v13, v2, v6, v10, v14, v3, v7, v11, v15)
+ vadduwm 0, 0, 4
+ vadduwm 1, 1, 5
+ vadduwm 2, 2, 6
+ vadduwm 3, 3, 7
+ vpermxor 12, 12, 0, 20
+ vpermxor 13, 13, 1, 20
+ vpermxor 14, 14, 2, 20
+ vpermxor 15, 15, 3, 20
+ vadduwm 8, 8, 12
+ vadduwm 9, 9, 13
+ vadduwm 10, 10, 14
+ vadduwm 11, 11, 15
+ vxor 4, 4, 8
+ vxor 5, 5, 9
+ vxor 6, 6, 10
+ vxor 7, 7, 11
+ vrlw 4, 4, 21
+ vrlw 5, 5, 21
+ vrlw 6, 6, 21
+ vrlw 7, 7, 21
+ vadduwm 0, 0, 4
+ vadduwm 1, 1, 5
+ vadduwm 2, 2, 6
+ vadduwm 3, 3, 7
+ vpermxor 12, 12, 0, 22
+ vpermxor 13, 13, 1, 22
+ vpermxor 14, 14, 2, 22
+ vpermxor 15, 15, 3, 22
+ vadduwm 8, 8, 12
+ vadduwm 9, 9, 13
+ vadduwm 10, 10, 14
+ vadduwm 11, 11, 15
+ vxor 4, 4, 8
+ vxor 5, 5, 9
+ vxor 6, 6, 10
+ vxor 7, 7, 11
+ vrlw 4, 4, 23
+ vrlw 5, 5, 23
+ vrlw 6, 6, 23
+ vrlw 7, 7, 23
+
+ # QR(v0, v5, v10, v15, v1, v6, v11, v12, v2, v7, v8, v13, v3, v4, v9, v14)
+ vadduwm 0, 0, 5
+ vadduwm 1, 1, 6
+ vadduwm 2, 2, 7
+ vadduwm 3, 3, 4
+ vpermxor 15, 15, 0, 20
+ vpermxor 12, 12, 1, 20
+ vpermxor 13, 13, 2, 20
+ vpermxor 14, 14, 3, 20
+ vadduwm 10, 10, 15
+ vadduwm 11, 11, 12
+ vadduwm 8, 8, 13
+ vadduwm 9, 9, 14
+ vxor 5, 5, 10
+ vxor 6, 6, 11
+ vxor 7, 7, 8
+ vxor 4, 4, 9
+ vrlw 5, 5, 21
+ vrlw 6, 6, 21
+ vrlw 7, 7, 21
+ vrlw 4, 4, 21
+ vadduwm 0, 0, 5
+ vadduwm 1, 1, 6
+ vadduwm 2, 2, 7
+ vadduwm 3, 3, 4
+ vpermxor 15, 15, 0, 22
+ vpermxor 12, 12, 1, 22
+ vpermxor 13, 13, 2, 22
+ vpermxor 14, 14, 3, 22
+ vadduwm 10, 10, 15
+ vadduwm 11, 11, 12
+ vadduwm 8, 8, 13
+ vadduwm 9, 9, 14
+ vxor 5, 5, 10
+ vxor 6, 6, 11
+ vxor 7, 7, 8
+ vxor 4, 4, 9
+ vrlw 5, 5, 23
+ vrlw 6, 6, 23
+ vrlw 7, 7, 23
+ vrlw 4, 4, 23
+.endm
+
+# Transpose
+.macro TP_4x a0 a1 a2 a3
+ xxmrghw 10, 32+\a0, 32+\a1 # a0, a1, b0, b1
+ xxmrghw 11, 32+\a2, 32+\a3 # a2, a3, b2, b3
+ xxmrglw 12, 32+\a0, 32+\a1 # c0, c1, d0, d1
+ xxmrglw 13, 32+\a2, 32+\a3 # c2, c3, d2, d3
+ xxpermdi 32+\a0, 10, 11, 0 # a0, a1, a2, a3
+ xxpermdi 32+\a1, 10, 11, 3 # b0, b1, b2, b3
+ xxpermdi 32+\a2, 12, 13, 0 # c0, c1, c2, c3
+ xxpermdi 32+\a3, 12, 13, 3 # d0, d1, d2, d3
+.endm
+
+# key stream = working state + state
+.macro Add_state S
+ vadduwm \S+0, \S+0, 16-\S
+ vadduwm \S+4, \S+4, 17-\S
+ vadduwm \S+8, \S+8, 18-\S
+ vadduwm \S+12, \S+12, 19-\S
+
+ vadduwm \S+1, \S+1, 16-\S
+ vadduwm \S+5, \S+5, 17-\S
+ vadduwm \S+9, \S+9, 18-\S
+ vadduwm \S+13, \S+13, 19-\S
+
+ vadduwm \S+2, \S+2, 16-\S
+ vadduwm \S+6, \S+6, 17-\S
+ vadduwm \S+10, \S+10, 18-\S
+ vadduwm \S+14, \S+14, 19-\S
+
+ vadduwm \S+3, \S+3, 16-\S
+ vadduwm \S+7, \S+7, 17-\S
+ vadduwm \S+11, \S+11, 18-\S
+ vadduwm \S+15, \S+15, 19-\S
+.endm
+
+#
+# write 256 bytes
+#
+.macro Write_256 S
+ add 9, 14, 5
+ add 16, 14, 4
+ lxvw4x 0, 0, 9
+ lxvw4x 1, 17, 9
+ lxvw4x 2, 18, 9
+ lxvw4x 3, 19, 9
+ lxvw4x 4, 20, 9
+ lxvw4x 5, 21, 9
+ lxvw4x 6, 22, 9
+ lxvw4x 7, 23, 9
+ lxvw4x 8, 24, 9
+ lxvw4x 9, 25, 9
+ lxvw4x 10, 26, 9
+ lxvw4x 11, 27, 9
+ lxvw4x 12, 28, 9
+ lxvw4x 13, 29, 9
+ lxvw4x 14, 30, 9
+ lxvw4x 15, 31, 9
+
+ xxlxor \S+32, \S+32, 0
+ xxlxor \S+36, \S+36, 1
+ xxlxor \S+40, \S+40, 2
+ xxlxor \S+44, \S+44, 3
+ xxlxor \S+33, \S+33, 4
+ xxlxor \S+37, \S+37, 5
+ xxlxor \S+41, \S+41, 6
+ xxlxor \S+45, \S+45, 7
+ xxlxor \S+34, \S+34, 8
+ xxlxor \S+38, \S+38, 9
+ xxlxor \S+42, \S+42, 10
+ xxlxor \S+46, \S+46, 11
+ xxlxor \S+35, \S+35, 12
+ xxlxor \S+39, \S+39, 13
+ xxlxor \S+43, \S+43, 14
+ xxlxor \S+47, \S+47, 15
+
+ stxvw4x \S+32, 0, 16
+ stxvw4x \S+36, 17, 16
+ stxvw4x \S+40, 18, 16
+ stxvw4x \S+44, 19, 16
+
+ stxvw4x \S+33, 20, 16
+ stxvw4x \S+37, 21, 16
+ stxvw4x \S+41, 22, 16
+ stxvw4x \S+45, 23, 16
+
+ stxvw4x \S+34, 24, 16
+ stxvw4x \S+38, 25, 16
+ stxvw4x \S+42, 26, 16
+ stxvw4x \S+46, 27, 16
+
+ stxvw4x \S+35, 28, 16
+ stxvw4x \S+39, 29, 16
+ stxvw4x \S+43, 30, 16
+ stxvw4x \S+47, 31, 16
+
+.endm
+
+#
+# chacha20_p10le_8x(u32 *state, byte *dst, const byte *src, size_t len, int nrounds);
+#
+SYM_FUNC_START(chacha_p10le_8x)
+.align 5
+ cmpdi 6, 0
+ ble Out_no_chacha
+
+ SAVE_REGS
+
+ # r17 - r31 mainly for Write_256 macro.
+ li 17, 16
+ li 18, 32
+ li 19, 48
+ li 20, 64
+ li 21, 80
+ li 22, 96
+ li 23, 112
+ li 24, 128
+ li 25, 144
+ li 26, 160
+ li 27, 176
+ li 28, 192
+ li 29, 208
+ li 30, 224
+ li 31, 240
+
+ mr 15, 6 # len
+ li 14, 0 # offset to inp and outp
+
+ lxvw4x 48, 0, 3 # vr16, constants
+ lxvw4x 49, 17, 3 # vr17, key 1
+ lxvw4x 50, 18, 3 # vr18, key 2
+ lxvw4x 51, 19, 3 # vr19, counter, nonce
+
+ # create (0, 1, 2, 3) counters
+ vspltisw 0, 0
+ vspltisw 1, 1
+ vspltisw 2, 2
+ vspltisw 3, 3
+ vmrghw 4, 0, 1
+ vmrglw 5, 2, 3
+ vsldoi 30, 4, 5, 8 # vr30 counter, 4 (0, 1, 2, 3)
+
+ vspltisw 21, 12
+ vspltisw 23, 7
+
+ addis 11, 2, permx@toc@ha
+ addi 11, 11, permx@toc@l
+ lxvw4x 32+20, 0, 11
+ lxvw4x 32+22, 17, 11
+
+ sradi 8, 7, 1
+
+ mtctr 8
+
+ # save constants to vsx
+ xxlor 16, 48, 48
+ xxlor 17, 49, 49
+ xxlor 18, 50, 50
+ xxlor 19, 51, 51
+
+ vspltisw 25, 4
+ vspltisw 26, 8
+
+ xxlor 25, 32+26, 32+26
+ xxlor 24, 32+25, 32+25
+
+ vadduwm 31, 30, 25 # counter = (0, 1, 2, 3) + (4, 4, 4, 4)
+ xxlor 30, 32+30, 32+30
+ xxlor 31, 32+31, 32+31
+
+ xxlor 20, 32+20, 32+20
+ xxlor 21, 32+21, 32+21
+ xxlor 22, 32+22, 32+22
+ xxlor 23, 32+23, 32+23
+
+ cmpdi 6, 512
+ blt Loop_last
+
+Loop_8x:
+ xxspltw 32+0, 16, 0
+ xxspltw 32+1, 16, 1
+ xxspltw 32+2, 16, 2
+ xxspltw 32+3, 16, 3
+
+ xxspltw 32+4, 17, 0
+ xxspltw 32+5, 17, 1
+ xxspltw 32+6, 17, 2
+ xxspltw 32+7, 17, 3
+ xxspltw 32+8, 18, 0
+ xxspltw 32+9, 18, 1
+ xxspltw 32+10, 18, 2
+ xxspltw 32+11, 18, 3
+ xxspltw 32+12, 19, 0
+ xxspltw 32+13, 19, 1
+ xxspltw 32+14, 19, 2
+ xxspltw 32+15, 19, 3
+ vadduwm 12, 12, 30 # increase counter
+
+ xxspltw 32+16, 16, 0
+ xxspltw 32+17, 16, 1
+ xxspltw 32+18, 16, 2
+ xxspltw 32+19, 16, 3
+
+ xxspltw 32+20, 17, 0
+ xxspltw 32+21, 17, 1
+ xxspltw 32+22, 17, 2
+ xxspltw 32+23, 17, 3
+ xxspltw 32+24, 18, 0
+ xxspltw 32+25, 18, 1
+ xxspltw 32+26, 18, 2
+ xxspltw 32+27, 18, 3
+ xxspltw 32+28, 19, 0
+ xxspltw 32+29, 19, 1
+ vadduwm 28, 28, 31 # increase counter
+ xxspltw 32+30, 19, 2
+ xxspltw 32+31, 19, 3
+
+.align 5
+quarter_loop_8x:
+ QT_loop_8x
+
+ bdnz quarter_loop_8x
+
+ xxlor 0, 32+30, 32+30
+ xxlor 32+30, 30, 30
+ vadduwm 12, 12, 30
+ xxlor 32+30, 0, 0
+ TP_4x 0, 1, 2, 3
+ TP_4x 4, 5, 6, 7
+ TP_4x 8, 9, 10, 11
+ TP_4x 12, 13, 14, 15
+
+ xxlor 0, 48, 48
+ xxlor 1, 49, 49
+ xxlor 2, 50, 50
+ xxlor 3, 51, 51
+ xxlor 48, 16, 16
+ xxlor 49, 17, 17
+ xxlor 50, 18, 18
+ xxlor 51, 19, 19
+ Add_state 0
+ xxlor 48, 0, 0
+ xxlor 49, 1, 1
+ xxlor 50, 2, 2
+ xxlor 51, 3, 3
+ Write_256 0
+ addi 14, 14, 256 # offset +=256
+ addi 15, 15, -256 # len -=256
+
+ xxlor 5, 32+31, 32+31
+ xxlor 32+31, 31, 31
+ vadduwm 28, 28, 31
+ xxlor 32+31, 5, 5
+ TP_4x 16+0, 16+1, 16+2, 16+3
+ TP_4x 16+4, 16+5, 16+6, 16+7
+ TP_4x 16+8, 16+9, 16+10, 16+11
+ TP_4x 16+12, 16+13, 16+14, 16+15
+
+ xxlor 32, 16, 16
+ xxlor 33, 17, 17
+ xxlor 34, 18, 18
+ xxlor 35, 19, 19
+ Add_state 16
+ Write_256 16
+ addi 14, 14, 256 # offset +=256
+ addi 15, 15, -256 # len +=256
+
+ xxlor 32+24, 24, 24
+ xxlor 32+25, 25, 25
+ xxlor 32+30, 30, 30
+ vadduwm 30, 30, 25
+ vadduwm 31, 30, 24
+ xxlor 30, 32+30, 32+30
+ xxlor 31, 32+31, 32+31
+
+ cmpdi 15, 0
+ beq Out_loop
+
+ cmpdi 15, 512
+ blt Loop_last
+
+ mtctr 8
+ b Loop_8x
+
+Loop_last:
+ lxvw4x 48, 0, 3 # vr16, constants
+ lxvw4x 49, 17, 3 # vr17, key 1
+ lxvw4x 50, 18, 3 # vr18, key 2
+ lxvw4x 51, 19, 3 # vr19, counter, nonce
+
+ vspltisw 21, 12
+ vspltisw 23, 7
+ addis 11, 2, permx@toc@ha
+ addi 11, 11, permx@toc@l
+ lxvw4x 32+20, 0, 11
+ lxvw4x 32+22, 17, 11
+
+ sradi 8, 7, 1
+ mtctr 8
+
+Loop_4x:
+ vspltw 0, 16, 0
+ vspltw 1, 16, 1
+ vspltw 2, 16, 2
+ vspltw 3, 16, 3
+
+ vspltw 4, 17, 0
+ vspltw 5, 17, 1
+ vspltw 6, 17, 2
+ vspltw 7, 17, 3
+ vspltw 8, 18, 0
+ vspltw 9, 18, 1
+ vspltw 10, 18, 2
+ vspltw 11, 18, 3
+ vspltw 12, 19, 0
+ vadduwm 12, 12, 30 # increase counter
+ vspltw 13, 19, 1
+ vspltw 14, 19, 2
+ vspltw 15, 19, 3
+
+.align 5
+quarter_loop:
+ QT_loop_4x
+
+ bdnz quarter_loop
+
+ vadduwm 12, 12, 30
+ TP_4x 0, 1, 2, 3
+ TP_4x 4, 5, 6, 7
+ TP_4x 8, 9, 10, 11
+ TP_4x 12, 13, 14, 15
+
+ Add_state 0
+ Write_256 0
+ addi 14, 14, 256 # offset += 256
+ addi 15, 15, -256 # len += 256
+
+ # Update state counter
+ vspltisw 25, 4
+ vadduwm 30, 30, 25
+
+ cmpdi 15, 0
+ beq Out_loop
+ cmpdi 15, 256
+ blt Out_loop
+
+ mtctr 8
+ b Loop_4x
+
+Out_loop:
+ RESTORE_REGS
+ blr
+
+Out_no_chacha:
+ li 3, 0
+ blr
+SYM_FUNC_END(chacha_p10le_8x)
+
+SYM_DATA_START_LOCAL(PERMX)
+.align 5
+permx:
+.long 0x22330011, 0x66774455, 0xaabb8899, 0xeeffccdd
+.long 0x11223300, 0x55667744, 0x99aabb88, 0xddeeffcc
+SYM_DATA_END(PERMX)
diff --git a/arch/powerpc/crypto/poly1305-p10-glue.c b/arch/powerpc/crypto/poly1305-p10-glue.c
new file mode 100644
index 000000000000..95dd708573ee
--- /dev/null
+++ b/arch/powerpc/crypto/poly1305-p10-glue.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Poly1305 authenticator algorithm, RFC7539.
+ *
+ * Copyright 2023- IBM Corp. All rights reserved.
+ */
+
+#include <crypto/algapi.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/jump_label.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
+#include <crypto/internal/simd.h>
+#include <linux/cpufeature.h>
+#include <asm/unaligned.h>
+#include <asm/simd.h>
+#include <asm/switch_to.h>
+
+asmlinkage void poly1305_p10le_4blocks(void *h, const u8 *m, u32 mlen);
+asmlinkage void poly1305_64s(void *h, const u8 *m, u32 mlen, int highbit);
+asmlinkage void poly1305_emit_64(void *h, void *s, u8 *dst);
+
+static void vsx_begin(void)
+{
+ preempt_disable();
+ enable_kernel_vsx();
+}
+
+static void vsx_end(void)
+{
+ disable_kernel_vsx();
+ preempt_enable();
+}
+
+static int crypto_poly1305_p10_init(struct shash_desc *desc)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ poly1305_core_init(&dctx->h);
+ dctx->buflen = 0;
+ dctx->rset = 0;
+ dctx->sset = false;
+
+ return 0;
+}
+
+static unsigned int crypto_poly1305_setdctxkey(struct poly1305_desc_ctx *dctx,
+ const u8 *inp, unsigned int len)
+{
+ unsigned int acc = 0;
+
+ if (unlikely(!dctx->sset)) {
+ if (!dctx->rset && len >= POLY1305_BLOCK_SIZE) {
+ struct poly1305_core_key *key = &dctx->core_r;
+
+ key->key.r64[0] = get_unaligned_le64(&inp[0]);
+ key->key.r64[1] = get_unaligned_le64(&inp[8]);
+ inp += POLY1305_BLOCK_SIZE;
+ len -= POLY1305_BLOCK_SIZE;
+ acc += POLY1305_BLOCK_SIZE;
+ dctx->rset = 1;
+ }
+ if (len >= POLY1305_BLOCK_SIZE) {
+ dctx->s[0] = get_unaligned_le32(&inp[0]);
+ dctx->s[1] = get_unaligned_le32(&inp[4]);
+ dctx->s[2] = get_unaligned_le32(&inp[8]);
+ dctx->s[3] = get_unaligned_le32(&inp[12]);
+ acc += POLY1305_BLOCK_SIZE;
+ dctx->sset = true;
+ }
+ }
+ return acc;
+}
+
+static int crypto_poly1305_p10_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+ unsigned int bytes, used;
+
+ if (unlikely(dctx->buflen)) {
+ bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
+ memcpy(dctx->buf + dctx->buflen, src, bytes);
+ src += bytes;
+ srclen -= bytes;
+ dctx->buflen += bytes;
+
+ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+ if (likely(!crypto_poly1305_setdctxkey(dctx, dctx->buf,
+ POLY1305_BLOCK_SIZE))) {
+ vsx_begin();
+ poly1305_64s(&dctx->h, dctx->buf,
+ POLY1305_BLOCK_SIZE, 1);
+ vsx_end();
+ }
+ dctx->buflen = 0;
+ }
+ }
+
+ if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
+ bytes = round_down(srclen, POLY1305_BLOCK_SIZE);
+ used = crypto_poly1305_setdctxkey(dctx, src, bytes);
+ if (likely(used)) {
+ srclen -= used;
+ src += used;
+ }
+ if (crypto_simd_usable() && (srclen >= POLY1305_BLOCK_SIZE*4)) {
+ vsx_begin();
+ poly1305_p10le_4blocks(&dctx->h, src, srclen);
+ vsx_end();
+ src += srclen - (srclen % (POLY1305_BLOCK_SIZE * 4));
+ srclen %= POLY1305_BLOCK_SIZE * 4;
+ }
+ while (srclen >= POLY1305_BLOCK_SIZE) {
+ vsx_begin();
+ poly1305_64s(&dctx->h, src, POLY1305_BLOCK_SIZE, 1);
+ vsx_end();
+ srclen -= POLY1305_BLOCK_SIZE;
+ src += POLY1305_BLOCK_SIZE;
+ }
+ }
+
+ if (unlikely(srclen)) {
+ dctx->buflen = srclen;
+ memcpy(dctx->buf, src, srclen);
+ }
+
+ return 0;
+}
+
+static int crypto_poly1305_p10_final(struct shash_desc *desc, u8 *dst)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ if (unlikely(!dctx->sset))
+ return -ENOKEY;
+
+ if ((dctx->buflen)) {
+ dctx->buf[dctx->buflen++] = 1;
+ memset(dctx->buf + dctx->buflen, 0,
+ POLY1305_BLOCK_SIZE - dctx->buflen);
+ vsx_begin();
+ poly1305_64s(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
+ vsx_end();
+ dctx->buflen = 0;
+ }
+
+ poly1305_emit_64(&dctx->h, &dctx->s, dst);
+ return 0;
+}
+
+static struct shash_alg poly1305_alg = {
+ .digestsize = POLY1305_DIGEST_SIZE,
+ .init = crypto_poly1305_p10_init,
+ .update = crypto_poly1305_p10_update,
+ .final = crypto_poly1305_p10_final,
+ .descsize = sizeof(struct poly1305_desc_ctx),
+ .base = {
+ .cra_name = "poly1305",
+ .cra_driver_name = "poly1305-p10",
+ .cra_priority = 300,
+ .cra_blocksize = POLY1305_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ },
+};
+
+static int __init poly1305_p10_init(void)
+{
+ return crypto_register_shash(&poly1305_alg);
+}
+
+static void __exit poly1305_p10_exit(void)
+{
+ crypto_unregister_shash(&poly1305_alg);
+}
+
+module_cpu_feature_match(PPC_MODULE_FEATURE_P10, poly1305_p10_init);
+module_exit(poly1305_p10_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com>");
+MODULE_DESCRIPTION("Optimized Poly1305 for P10");
+MODULE_ALIAS_CRYPTO("poly1305");
+MODULE_ALIAS_CRYPTO("poly1305-p10");
diff --git a/arch/powerpc/crypto/poly1305-p10le_64.S b/arch/powerpc/crypto/poly1305-p10le_64.S
new file mode 100644
index 000000000000..a3c1987f1ecd
--- /dev/null
+++ b/arch/powerpc/crypto/poly1305-p10le_64.S
@@ -0,0 +1,1075 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#
+# Accelerated poly1305 implementation for ppc64le.
+#
+# Copyright 2023- IBM Corp. All rights reserved
+#
+#===================================================================================
+# Written by Danny Tsen <dtsen@us.ibm.com>
+#
+# Poly1305 - this version mainly using vector/VSX/Scalar
+# - 26 bits limbs
+# - Handle multiple 64 byte blcok.
+#
+# Block size 16 bytes
+# key = (r, s)
+# clamp r &= 0x0FFFFFFC0FFFFFFC 0x0FFFFFFC0FFFFFFF
+# p = 2^130 - 5
+# a += m
+# a = (r + a) % p
+# a += s
+#
+# Improve performance by breaking down polynominal to the sum of products with
+# h4 = m1 * r⁴ + m2 * r³ + m3 * r² + m4 * r
+#
+# 07/22/21 - this revison based on the above sum of products. Setup r^4, r^3, r^2, r and s3, s2, s1, s0
+# to 9 vectors for multiplications.
+#
+# setup r^4, r^3, r^2, r vectors
+# vs [r^1, r^3, r^2, r^4]
+# vs0 = [r0,.....]
+# vs1 = [r1,.....]
+# vs2 = [r2,.....]
+# vs3 = [r3,.....]
+# vs4 = [r4,.....]
+# vs5 = [r1*5,...]
+# vs6 = [r2*5,...]
+# vs7 = [r2*5,...]
+# vs8 = [r4*5,...]
+#
+# Each word in a vector consists a member of a "r/s" in [a * r/s].
+#
+# r0, r4*5, r3*5, r2*5, r1*5;
+# r1, r0, r4*5, r3*5, r2*5;
+# r2, r1, r0, r4*5, r3*5;
+# r3, r2, r1, r0, r4*5;
+# r4, r3, r2, r1, r0 ;
+#
+#
+# poly1305_p10le_4blocks( uint8_t *k, uint32_t mlen, uint8_t *m)
+# k = 32 bytes key
+# r3 = k (r, s)
+# r4 = mlen
+# r5 = m
+#
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/asm-compat.h>
+#include <linux/linkage.h>
+
+.machine "any"
+
+.text
+
+.macro SAVE_GPR GPR OFFSET FRAME
+ std \GPR,\OFFSET(\FRAME)
+.endm
+
+.macro SAVE_VRS VRS OFFSET FRAME
+ li 16, \OFFSET
+ stvx \VRS, 16, \FRAME
+.endm
+
+.macro SAVE_VSX VSX OFFSET FRAME
+ li 16, \OFFSET
+ stxvx \VSX, 16, \FRAME
+.endm
+
+.macro RESTORE_GPR GPR OFFSET FRAME
+ ld \GPR,\OFFSET(\FRAME)
+.endm
+
+.macro RESTORE_VRS VRS OFFSET FRAME
+ li 16, \OFFSET
+ lvx \VRS, 16, \FRAME
+.endm
+
+.macro RESTORE_VSX VSX OFFSET FRAME
+ li 16, \OFFSET
+ lxvx \VSX, 16, \FRAME
+.endm
+
+.macro SAVE_REGS
+ mflr 0
+ std 0, 16(1)
+ stdu 1,-752(1)
+
+ SAVE_GPR 14, 112, 1
+ SAVE_GPR 15, 120, 1
+ SAVE_GPR 16, 128, 1
+ SAVE_GPR 17, 136, 1
+ SAVE_GPR 18, 144, 1
+ SAVE_GPR 19, 152, 1
+ SAVE_GPR 20, 160, 1
+ SAVE_GPR 21, 168, 1
+ SAVE_GPR 22, 176, 1
+ SAVE_GPR 23, 184, 1
+ SAVE_GPR 24, 192, 1
+ SAVE_GPR 25, 200, 1
+ SAVE_GPR 26, 208, 1
+ SAVE_GPR 27, 216, 1
+ SAVE_GPR 28, 224, 1
+ SAVE_GPR 29, 232, 1
+ SAVE_GPR 30, 240, 1
+ SAVE_GPR 31, 248, 1
+
+ addi 9, 1, 256
+ SAVE_VRS 20, 0, 9
+ SAVE_VRS 21, 16, 9
+ SAVE_VRS 22, 32, 9
+ SAVE_VRS 23, 48, 9
+ SAVE_VRS 24, 64, 9
+ SAVE_VRS 25, 80, 9
+ SAVE_VRS 26, 96, 9
+ SAVE_VRS 27, 112, 9
+ SAVE_VRS 28, 128, 9
+ SAVE_VRS 29, 144, 9
+ SAVE_VRS 30, 160, 9
+ SAVE_VRS 31, 176, 9
+
+ SAVE_VSX 14, 192, 9
+ SAVE_VSX 15, 208, 9
+ SAVE_VSX 16, 224, 9
+ SAVE_VSX 17, 240, 9
+ SAVE_VSX 18, 256, 9
+ SAVE_VSX 19, 272, 9
+ SAVE_VSX 20, 288, 9
+ SAVE_VSX 21, 304, 9
+ SAVE_VSX 22, 320, 9
+ SAVE_VSX 23, 336, 9
+ SAVE_VSX 24, 352, 9
+ SAVE_VSX 25, 368, 9
+ SAVE_VSX 26, 384, 9
+ SAVE_VSX 27, 400, 9
+ SAVE_VSX 28, 416, 9
+ SAVE_VSX 29, 432, 9
+ SAVE_VSX 30, 448, 9
+ SAVE_VSX 31, 464, 9
+.endm # SAVE_REGS
+
+.macro RESTORE_REGS
+ addi 9, 1, 256
+ RESTORE_VRS 20, 0, 9
+ RESTORE_VRS 21, 16, 9
+ RESTORE_VRS 22, 32, 9
+ RESTORE_VRS 23, 48, 9
+ RESTORE_VRS 24, 64, 9
+ RESTORE_VRS 25, 80, 9
+ RESTORE_VRS 26, 96, 9
+ RESTORE_VRS 27, 112, 9
+ RESTORE_VRS 28, 128, 9
+ RESTORE_VRS 29, 144, 9
+ RESTORE_VRS 30, 160, 9
+ RESTORE_VRS 31, 176, 9
+
+ RESTORE_VSX 14, 192, 9
+ RESTORE_VSX 15, 208, 9
+ RESTORE_VSX 16, 224, 9
+ RESTORE_VSX 17, 240, 9
+ RESTORE_VSX 18, 256, 9
+ RESTORE_VSX 19, 272, 9
+ RESTORE_VSX 20, 288, 9
+ RESTORE_VSX 21, 304, 9
+ RESTORE_VSX 22, 320, 9
+ RESTORE_VSX 23, 336, 9
+ RESTORE_VSX 24, 352, 9
+ RESTORE_VSX 25, 368, 9
+ RESTORE_VSX 26, 384, 9
+ RESTORE_VSX 27, 400, 9
+ RESTORE_VSX 28, 416, 9
+ RESTORE_VSX 29, 432, 9
+ RESTORE_VSX 30, 448, 9
+ RESTORE_VSX 31, 464, 9
+
+ RESTORE_GPR 14, 112, 1
+ RESTORE_GPR 15, 120, 1
+ RESTORE_GPR 16, 128, 1
+ RESTORE_GPR 17, 136, 1
+ RESTORE_GPR 18, 144, 1
+ RESTORE_GPR 19, 152, 1
+ RESTORE_GPR 20, 160, 1
+ RESTORE_GPR 21, 168, 1
+ RESTORE_GPR 22, 176, 1
+ RESTORE_GPR 23, 184, 1
+ RESTORE_GPR 24, 192, 1
+ RESTORE_GPR 25, 200, 1
+ RESTORE_GPR 26, 208, 1
+ RESTORE_GPR 27, 216, 1
+ RESTORE_GPR 28, 224, 1
+ RESTORE_GPR 29, 232, 1
+ RESTORE_GPR 30, 240, 1
+ RESTORE_GPR 31, 248, 1
+
+ addi 1, 1, 752
+ ld 0, 16(1)
+ mtlr 0
+.endm # RESTORE_REGS
+
+#
+# p[0] = a0*r0 + a1*r4*5 + a2*r3*5 + a3*r2*5 + a4*r1*5;
+# p[1] = a0*r1 + a1*r0 + a2*r4*5 + a3*r3*5 + a4*r2*5;
+# p[2] = a0*r2 + a1*r1 + a2*r0 + a3*r4*5 + a4*r3*5;
+# p[3] = a0*r3 + a1*r2 + a2*r1 + a3*r0 + a4*r4*5;
+# p[4] = a0*r4 + a1*r3 + a2*r2 + a3*r1 + a4*r0 ;
+#
+# [r^2, r^3, r^1, r^4]
+# [m3, m2, m4, m1]
+#
+# multiply odd and even words
+.macro mul_odd
+ vmulouw 14, 4, 26
+ vmulouw 10, 5, 3
+ vmulouw 11, 6, 2
+ vmulouw 12, 7, 1
+ vmulouw 13, 8, 0
+ vmulouw 15, 4, 27
+ vaddudm 14, 14, 10
+ vaddudm 14, 14, 11
+ vmulouw 10, 5, 26
+ vmulouw 11, 6, 3
+ vaddudm 14, 14, 12
+ vaddudm 14, 14, 13 # x0
+ vaddudm 15, 15, 10
+ vaddudm 15, 15, 11
+ vmulouw 12, 7, 2
+ vmulouw 13, 8, 1
+ vaddudm 15, 15, 12
+ vaddudm 15, 15, 13 # x1
+ vmulouw 16, 4, 28
+ vmulouw 10, 5, 27
+ vmulouw 11, 6, 26
+ vaddudm 16, 16, 10
+ vaddudm 16, 16, 11
+ vmulouw 12, 7, 3
+ vmulouw 13, 8, 2
+ vaddudm 16, 16, 12
+ vaddudm 16, 16, 13 # x2
+ vmulouw 17, 4, 29
+ vmulouw 10, 5, 28
+ vmulouw 11, 6, 27
+ vaddudm 17, 17, 10
+ vaddudm 17, 17, 11
+ vmulouw 12, 7, 26
+ vmulouw 13, 8, 3
+ vaddudm 17, 17, 12
+ vaddudm 17, 17, 13 # x3
+ vmulouw 18, 4, 30
+ vmulouw 10, 5, 29
+ vmulouw 11, 6, 28
+ vaddudm 18, 18, 10
+ vaddudm 18, 18, 11
+ vmulouw 12, 7, 27
+ vmulouw 13, 8, 26
+ vaddudm 18, 18, 12
+ vaddudm 18, 18, 13 # x4
+.endm
+
+.macro mul_even
+ vmuleuw 9, 4, 26
+ vmuleuw 10, 5, 3
+ vmuleuw 11, 6, 2
+ vmuleuw 12, 7, 1
+ vmuleuw 13, 8, 0
+ vaddudm 14, 14, 9
+ vaddudm 14, 14, 10
+ vaddudm 14, 14, 11
+ vaddudm 14, 14, 12
+ vaddudm 14, 14, 13 # x0
+
+ vmuleuw 9, 4, 27
+ vmuleuw 10, 5, 26
+ vmuleuw 11, 6, 3
+ vmuleuw 12, 7, 2
+ vmuleuw 13, 8, 1
+ vaddudm 15, 15, 9
+ vaddudm 15, 15, 10
+ vaddudm 15, 15, 11
+ vaddudm 15, 15, 12
+ vaddudm 15, 15, 13 # x1
+
+ vmuleuw 9, 4, 28
+ vmuleuw 10, 5, 27
+ vmuleuw 11, 6, 26
+ vmuleuw 12, 7, 3
+ vmuleuw 13, 8, 2
+ vaddudm 16, 16, 9
+ vaddudm 16, 16, 10
+ vaddudm 16, 16, 11
+ vaddudm 16, 16, 12
+ vaddudm 16, 16, 13 # x2
+
+ vmuleuw 9, 4, 29
+ vmuleuw 10, 5, 28
+ vmuleuw 11, 6, 27
+ vmuleuw 12, 7, 26
+ vmuleuw 13, 8, 3
+ vaddudm 17, 17, 9
+ vaddudm 17, 17, 10
+ vaddudm 17, 17, 11
+ vaddudm 17, 17, 12
+ vaddudm 17, 17, 13 # x3
+
+ vmuleuw 9, 4, 30
+ vmuleuw 10, 5, 29
+ vmuleuw 11, 6, 28
+ vmuleuw 12, 7, 27
+ vmuleuw 13, 8, 26
+ vaddudm 18, 18, 9
+ vaddudm 18, 18, 10
+ vaddudm 18, 18, 11
+ vaddudm 18, 18, 12
+ vaddudm 18, 18, 13 # x4
+.endm
+
+#
+# poly1305_setup_r
+#
+# setup r^4, r^3, r^2, r vectors
+# [r, r^3, r^2, r^4]
+# vs0 = [r0,...]
+# vs1 = [r1,...]
+# vs2 = [r2,...]
+# vs3 = [r3,...]
+# vs4 = [r4,...]
+# vs5 = [r4*5,...]
+# vs6 = [r3*5,...]
+# vs7 = [r2*5,...]
+# vs8 = [r1*5,...]
+#
+# r0, r4*5, r3*5, r2*5, r1*5;
+# r1, r0, r4*5, r3*5, r2*5;
+# r2, r1, r0, r4*5, r3*5;
+# r3, r2, r1, r0, r4*5;
+# r4, r3, r2, r1, r0 ;
+#
+.macro poly1305_setup_r
+
+ # save r
+ xxlor 26, 58, 58
+ xxlor 27, 59, 59
+ xxlor 28, 60, 60
+ xxlor 29, 61, 61
+ xxlor 30, 62, 62
+
+ xxlxor 31, 31, 31
+
+# [r, r^3, r^2, r^4]
+ # compute r^2
+ vmr 4, 26
+ vmr 5, 27
+ vmr 6, 28
+ vmr 7, 29
+ vmr 8, 30
+ bl do_mul # r^2 r^1
+ xxpermdi 58, 58, 36, 0x3 # r0
+ xxpermdi 59, 59, 37, 0x3 # r1
+ xxpermdi 60, 60, 38, 0x3 # r2
+ xxpermdi 61, 61, 39, 0x3 # r3
+ xxpermdi 62, 62, 40, 0x3 # r4
+ xxpermdi 36, 36, 36, 0x3
+ xxpermdi 37, 37, 37, 0x3
+ xxpermdi 38, 38, 38, 0x3
+ xxpermdi 39, 39, 39, 0x3
+ xxpermdi 40, 40, 40, 0x3
+ vspltisb 13, 2
+ vsld 9, 27, 13
+ vsld 10, 28, 13
+ vsld 11, 29, 13
+ vsld 12, 30, 13
+ vaddudm 0, 9, 27
+ vaddudm 1, 10, 28
+ vaddudm 2, 11, 29
+ vaddudm 3, 12, 30
+
+ bl do_mul # r^4 r^3
+ vmrgow 26, 26, 4
+ vmrgow 27, 27, 5
+ vmrgow 28, 28, 6
+ vmrgow 29, 29, 7
+ vmrgow 30, 30, 8
+ vspltisb 13, 2
+ vsld 9, 27, 13
+ vsld 10, 28, 13
+ vsld 11, 29, 13
+ vsld 12, 30, 13
+ vaddudm 0, 9, 27
+ vaddudm 1, 10, 28
+ vaddudm 2, 11, 29
+ vaddudm 3, 12, 30
+
+ # r^2 r^4
+ xxlor 0, 58, 58
+ xxlor 1, 59, 59
+ xxlor 2, 60, 60
+ xxlor 3, 61, 61
+ xxlor 4, 62, 62
+ xxlor 5, 32, 32
+ xxlor 6, 33, 33
+ xxlor 7, 34, 34
+ xxlor 8, 35, 35
+
+ vspltw 9, 26, 3
+ vspltw 10, 26, 2
+ vmrgow 26, 10, 9
+ vspltw 9, 27, 3
+ vspltw 10, 27, 2
+ vmrgow 27, 10, 9
+ vspltw 9, 28, 3
+ vspltw 10, 28, 2
+ vmrgow 28, 10, 9
+ vspltw 9, 29, 3
+ vspltw 10, 29, 2
+ vmrgow 29, 10, 9
+ vspltw 9, 30, 3
+ vspltw 10, 30, 2
+ vmrgow 30, 10, 9
+
+ vsld 9, 27, 13
+ vsld 10, 28, 13
+ vsld 11, 29, 13
+ vsld 12, 30, 13
+ vaddudm 0, 9, 27
+ vaddudm 1, 10, 28
+ vaddudm 2, 11, 29
+ vaddudm 3, 12, 30
+.endm
+
+SYM_FUNC_START_LOCAL(do_mul)
+ mul_odd
+
+ # do reduction ( h %= p )
+ # carry reduction
+ vspltisb 9, 2
+ vsrd 10, 14, 31
+ vsrd 11, 17, 31
+ vand 7, 17, 25
+ vand 4, 14, 25
+ vaddudm 18, 18, 11
+ vsrd 12, 18, 31
+ vaddudm 15, 15, 10
+
+ vsrd 11, 15, 31
+ vand 8, 18, 25
+ vand 5, 15, 25
+ vaddudm 4, 4, 12
+ vsld 10, 12, 9
+ vaddudm 6, 16, 11
+
+ vsrd 13, 6, 31
+ vand 6, 6, 25
+ vaddudm 4, 4, 10
+ vsrd 10, 4, 31
+ vaddudm 7, 7, 13
+
+ vsrd 11, 7, 31
+ vand 7, 7, 25
+ vand 4, 4, 25
+ vaddudm 5, 5, 10
+ vaddudm 8, 8, 11
+ blr
+SYM_FUNC_END(do_mul)
+
+#
+# init key
+#
+.macro do_poly1305_init
+ addis 10, 2, rmask@toc@ha
+ addi 10, 10, rmask@toc@l
+
+ ld 11, 0(10)
+ ld 12, 8(10)
+
+ li 14, 16
+ li 15, 32
+ addis 10, 2, cnum@toc@ha
+ addi 10, 10, cnum@toc@l
+ lvx 25, 0, 10 # v25 - mask
+ lvx 31, 14, 10 # v31 = 1a
+ lvx 19, 15, 10 # v19 = 1 << 24
+ lxv 24, 48(10) # vs24
+ lxv 25, 64(10) # vs25
+
+ # initialize
+ # load key from r3 to vectors
+ ld 9, 24(3)
+ ld 10, 32(3)
+ and. 9, 9, 11
+ and. 10, 10, 12
+
+ # break 26 bits
+ extrdi 14, 9, 26, 38
+ extrdi 15, 9, 26, 12
+ extrdi 16, 9, 12, 0
+ mtvsrdd 58, 0, 14
+ insrdi 16, 10, 14, 38
+ mtvsrdd 59, 0, 15
+ extrdi 17, 10, 26, 24
+ mtvsrdd 60, 0, 16
+ extrdi 18, 10, 24, 0
+ mtvsrdd 61, 0, 17
+ mtvsrdd 62, 0, 18
+
+ # r1 = r1 * 5, r2 = r2 * 5, r3 = r3 * 5, r4 = r4 * 5
+ li 9, 5
+ mtvsrdd 36, 0, 9
+ vmulouw 0, 27, 4 # v0 = rr0
+ vmulouw 1, 28, 4 # v1 = rr1
+ vmulouw 2, 29, 4 # v2 = rr2
+ vmulouw 3, 30, 4 # v3 = rr3
+.endm
+
+#
+# poly1305_p10le_4blocks( uint8_t *k, uint32_t mlen, uint8_t *m)
+# k = 32 bytes key
+# r3 = k (r, s)
+# r4 = mlen
+# r5 = m
+#
+SYM_FUNC_START(poly1305_p10le_4blocks)
+.align 5
+ cmpdi 5, 64
+ blt Out_no_poly1305
+
+ SAVE_REGS
+
+ do_poly1305_init
+
+ li 21, 0 # counter to message
+
+ poly1305_setup_r
+
+ # load previous H state
+ # break/convert r6 to 26 bits
+ ld 9, 0(3)
+ ld 10, 8(3)
+ ld 19, 16(3)
+ sldi 19, 19, 24
+ mtvsrdd 41, 0, 19
+ extrdi 14, 9, 26, 38
+ extrdi 15, 9, 26, 12
+ extrdi 16, 9, 12, 0
+ mtvsrdd 36, 0, 14
+ insrdi 16, 10, 14, 38
+ mtvsrdd 37, 0, 15
+ extrdi 17, 10, 26, 24
+ mtvsrdd 38, 0, 16
+ extrdi 18, 10, 24, 0
+ mtvsrdd 39, 0, 17
+ mtvsrdd 40, 0, 18
+ vor 8, 8, 9
+
+ # input m1 m2
+ add 20, 4, 21
+ xxlor 49, 24, 24
+ xxlor 50, 25, 25
+ lxvw4x 43, 0, 20
+ addi 17, 20, 16
+ lxvw4x 44, 0, 17
+ vperm 14, 11, 12, 17
+ vperm 15, 11, 12, 18
+ vand 9, 14, 25 # a0
+ vsrd 10, 14, 31 # >> 26
+ vsrd 11, 10, 31 # 12 bits left
+ vand 10, 10, 25 # a1
+ vspltisb 13, 12
+ vand 16, 15, 25
+ vsld 12, 16, 13
+ vor 11, 11, 12
+ vand 11, 11, 25 # a2
+ vspltisb 13, 14
+ vsrd 12, 15, 13 # >> 14
+ vsrd 13, 12, 31 # >> 26, a4
+ vand 12, 12, 25 # a3
+
+ vaddudm 20, 4, 9
+ vaddudm 21, 5, 10
+ vaddudm 22, 6, 11
+ vaddudm 23, 7, 12
+ vaddudm 24, 8, 13
+
+ # m3 m4
+ addi 17, 17, 16
+ lxvw4x 43, 0, 17
+ addi 17, 17, 16
+ lxvw4x 44, 0, 17
+ vperm 14, 11, 12, 17
+ vperm 15, 11, 12, 18
+ vand 9, 14, 25 # a0
+ vsrd 10, 14, 31 # >> 26
+ vsrd 11, 10, 31 # 12 bits left
+ vand 10, 10, 25 # a1
+ vspltisb 13, 12
+ vand 16, 15, 25
+ vsld 12, 16, 13
+ vspltisb 13, 14
+ vor 11, 11, 12
+ vand 11, 11, 25 # a2
+ vsrd 12, 15, 13 # >> 14
+ vsrd 13, 12, 31 # >> 26, a4
+ vand 12, 12, 25 # a3
+
+ # Smash 4 message blocks into 5 vectors of [m4, m2, m3, m1]
+ vmrgow 4, 9, 20
+ vmrgow 5, 10, 21
+ vmrgow 6, 11, 22
+ vmrgow 7, 12, 23
+ vmrgow 8, 13, 24
+ vaddudm 8, 8, 19
+
+ addi 5, 5, -64 # len -= 64
+ addi 21, 21, 64 # offset += 64
+
+ li 9, 64
+ divdu 31, 5, 9
+
+ cmpdi 31, 0
+ ble Skip_block_loop
+
+ mtctr 31
+
+# h4 = m1 * r⁴ + m2 * r³ + m3 * r² + m4 * r
+# Rewrite the polynominal sum of product as follows,
+# h1 = (h0 + m1) * r^2, h2 = (h0 + m2) * r^2
+# h3 = (h1 + m3) * r^2, h4 = (h2 + m4) * r^2 --> (h0 + m1) r*4 + (h3 + m3) r^2, (h0 + m2) r^4 + (h0 + m4) r^2
+# .... Repeat
+# h5 = (h3 + m5) * r^2, h6 = (h4 + m6) * r^2 -->
+# h7 = (h5 + m7) * r^2, h8 = (h6 + m8) * r^1 --> m5 * r^4 + m6 * r^3 + m7 * r^2 + m8 * r
+#
+loop_4blocks:
+
+ # Multiply odd words and even words
+ mul_odd
+ mul_even
+ # carry reduction
+ vspltisb 9, 2
+ vsrd 10, 14, 31
+ vsrd 11, 17, 31
+ vand 7, 17, 25
+ vand 4, 14, 25
+ vaddudm 18, 18, 11
+ vsrd 12, 18, 31
+ vaddudm 15, 15, 10
+
+ vsrd 11, 15, 31
+ vand 8, 18, 25
+ vand 5, 15, 25
+ vaddudm 4, 4, 12
+ vsld 10, 12, 9
+ vaddudm 6, 16, 11
+
+ vsrd 13, 6, 31
+ vand 6, 6, 25
+ vaddudm 4, 4, 10
+ vsrd 10, 4, 31
+ vaddudm 7, 7, 13
+
+ vsrd 11, 7, 31
+ vand 7, 7, 25
+ vand 4, 4, 25
+ vaddudm 5, 5, 10
+ vaddudm 8, 8, 11
+
+ # input m1 m2 m3 m4
+ add 20, 4, 21
+ xxlor 49, 24, 24
+ xxlor 50, 25, 25
+ lxvw4x 43, 0, 20
+ addi 17, 20, 16
+ lxvw4x 44, 0, 17
+ vperm 14, 11, 12, 17
+ vperm 15, 11, 12, 18
+ addi 17, 17, 16
+ lxvw4x 43, 0, 17
+ addi 17, 17, 16
+ lxvw4x 44, 0, 17
+ vperm 17, 11, 12, 17
+ vperm 18, 11, 12, 18
+
+ vand 20, 14, 25 # a0
+ vand 9, 17, 25 # a0
+ vsrd 21, 14, 31 # >> 26
+ vsrd 22, 21, 31 # 12 bits left
+ vsrd 10, 17, 31 # >> 26
+ vsrd 11, 10, 31 # 12 bits left
+
+ vand 21, 21, 25 # a1
+ vand 10, 10, 25 # a1
+
+ vspltisb 13, 12
+ vand 16, 15, 25
+ vsld 23, 16, 13
+ vor 22, 22, 23
+ vand 22, 22, 25 # a2
+ vand 16, 18, 25
+ vsld 12, 16, 13
+ vor 11, 11, 12
+ vand 11, 11, 25 # a2
+ vspltisb 13, 14
+ vsrd 23, 15, 13 # >> 14
+ vsrd 24, 23, 31 # >> 26, a4
+ vand 23, 23, 25 # a3
+ vsrd 12, 18, 13 # >> 14
+ vsrd 13, 12, 31 # >> 26, a4
+ vand 12, 12, 25 # a3
+
+ vaddudm 4, 4, 20
+ vaddudm 5, 5, 21
+ vaddudm 6, 6, 22
+ vaddudm 7, 7, 23
+ vaddudm 8, 8, 24
+
+ # Smash 4 message blocks into 5 vectors of [m4, m2, m3, m1]
+ vmrgow 4, 9, 4
+ vmrgow 5, 10, 5
+ vmrgow 6, 11, 6
+ vmrgow 7, 12, 7
+ vmrgow 8, 13, 8
+ vaddudm 8, 8, 19
+
+ addi 5, 5, -64 # len -= 64
+ addi 21, 21, 64 # offset += 64
+
+ bdnz loop_4blocks
+
+Skip_block_loop:
+ xxlor 58, 0, 0
+ xxlor 59, 1, 1
+ xxlor 60, 2, 2
+ xxlor 61, 3, 3
+ xxlor 62, 4, 4
+ xxlor 32, 5, 5
+ xxlor 33, 6, 6
+ xxlor 34, 7, 7
+ xxlor 35, 8, 8
+
+ # Multiply odd words and even words
+ mul_odd
+ mul_even
+
+ # Sum the products.
+ xxpermdi 41, 31, 46, 0
+ xxpermdi 42, 31, 47, 0
+ vaddudm 4, 14, 9
+ xxpermdi 36, 31, 36, 3
+ vaddudm 5, 15, 10
+ xxpermdi 37, 31, 37, 3
+ xxpermdi 43, 31, 48, 0
+ vaddudm 6, 16, 11
+ xxpermdi 38, 31, 38, 3
+ xxpermdi 44, 31, 49, 0
+ vaddudm 7, 17, 12
+ xxpermdi 39, 31, 39, 3
+ xxpermdi 45, 31, 50, 0
+ vaddudm 8, 18, 13
+ xxpermdi 40, 31, 40, 3
+
+ # carry reduction
+ vspltisb 9, 2
+ vsrd 10, 4, 31
+ vsrd 11, 7, 31
+ vand 7, 7, 25
+ vand 4, 4, 25
+ vaddudm 8, 8, 11
+ vsrd 12, 8, 31
+ vaddudm 5, 5, 10
+
+ vsrd 11, 5, 31
+ vand 8, 8, 25
+ vand 5, 5, 25
+ vaddudm 4, 4, 12
+ vsld 10, 12, 9
+ vaddudm 6, 6, 11
+
+ vsrd 13, 6, 31
+ vand 6, 6, 25
+ vaddudm 4, 4, 10
+ vsrd 10, 4, 31
+ vaddudm 7, 7, 13
+
+ vsrd 11, 7, 31
+ vand 7, 7, 25
+ vand 4, 4, 25
+ vaddudm 5, 5, 10
+ vsrd 10, 5, 31
+ vand 5, 5, 25
+ vaddudm 6, 6, 10
+ vaddudm 8, 8, 11
+
+ b do_final_update
+
+do_final_update:
+ # combine 26 bit limbs
+ # v4, v5, v6, v7 and v8 are 26 bit vectors
+ vsld 5, 5, 31
+ vor 20, 4, 5
+ vspltisb 11, 12
+ vsrd 12, 6, 11
+ vsld 6, 6, 31
+ vsld 6, 6, 31
+ vor 20, 20, 6
+ vspltisb 11, 14
+ vsld 7, 7, 11
+ vor 21, 7, 12
+ mfvsrld 16, 40 # save last 2 bytes
+ vsld 8, 8, 11
+ vsld 8, 8, 31
+ vor 21, 21, 8
+ mfvsrld 17, 52
+ mfvsrld 19, 53
+ srdi 16, 16, 24
+
+ std 17, 0(3)
+ std 19, 8(3)
+ stw 16, 16(3)
+
+Out_loop:
+ li 3, 0
+
+ RESTORE_REGS
+
+ blr
+
+Out_no_poly1305:
+ li 3, 0
+ blr
+SYM_FUNC_END(poly1305_p10le_4blocks)
+
+#
+# =======================================================================
+# The following functions implement 64 x 64 bits multiplication poly1305.
+#
+SYM_FUNC_START_LOCAL(Poly1305_init_64)
+ # mask 0x0FFFFFFC0FFFFFFC
+ # mask 0x0FFFFFFC0FFFFFFF
+ addis 10, 2, rmask@toc@ha
+ addi 10, 10, rmask@toc@l
+ ld 11, 0(10)
+ ld 12, 8(10)
+
+ # initialize
+ # load key from r3
+ ld 9, 24(3)
+ ld 10, 32(3)
+ and. 9, 9, 11 # cramp mask r0
+ and. 10, 10, 12 # cramp mask r1
+
+ srdi 21, 10, 2
+ add 19, 21, 10 # s1: r19 - (r1 >> 2) *5
+
+ # setup r and s
+ li 25, 0
+ mtvsrdd 32+0, 9, 19 # r0, s1
+ mtvsrdd 32+1, 10, 9 # r1, r0
+ mtvsrdd 32+2, 19, 25 # s1
+ mtvsrdd 32+3, 9, 25 # r0
+
+ blr
+SYM_FUNC_END(Poly1305_init_64)
+
+# Poly1305_mult
+# v6 = (h0, h1), v8 = h2
+# v0 = (r0, s1), v1 = (r1, r0), v2 = s1, v3 = r0
+#
+# Output: v7, v10, v11
+#
+SYM_FUNC_START_LOCAL(Poly1305_mult)
+ #
+ # d0 = h0 * r0 + h1 * s1
+ vmsumudm 7, 6, 0, 9 # h0 * r0, h1 * s1
+
+ # d1 = h0 * r1 + h1 * r0 + h2 * s1
+ vmsumudm 11, 6, 1, 9 # h0 * r1, h1 * r0
+ vmsumudm 10, 8, 2, 11 # d1 += h2 * s1
+
+ # d2 = r0
+ vmsumudm 11, 8, 3, 9 # d2 = h2 * r0
+ blr
+SYM_FUNC_END(Poly1305_mult)
+
+#
+# carry reduction
+# h %=p
+#
+# Input: v7, v10, v11
+# Output: r27, r28, r29
+#
+SYM_FUNC_START_LOCAL(Carry_reduction)
+ mfvsrld 27, 32+7
+ mfvsrld 28, 32+10
+ mfvsrld 29, 32+11
+ mfvsrd 20, 32+7 # h0.h
+ mfvsrd 21, 32+10 # h1.h
+
+ addc 28, 28, 20
+ adde 29, 29, 21
+ srdi 22, 29, 0x2
+ sldi 23, 22, 0x2
+ add 23, 23, 22 # (h2 & 3) * 5
+ addc 27, 27, 23 # h0
+ addze 28, 28 # h1
+ andi. 29, 29, 0x3 # h2
+ blr
+SYM_FUNC_END(Carry_reduction)
+
+#
+# poly1305 multiplication
+# h *= r, h %= p
+# d0 = h0 * r0 + h1 * s1
+# d1 = h0 * r1 + h1 * r0 + h2 * s1
+# d2 = h0 * r0
+#
+#
+# unsigned int poly1305_test_64s(unisgned char *state, const byte *src, size_t len, highbit)
+# - no highbit if final leftover block (highbit = 0)
+#
+SYM_FUNC_START(poly1305_64s)
+ cmpdi 5, 0
+ ble Out_no_poly1305_64
+
+ mflr 0
+ std 0, 16(1)
+ stdu 1,-400(1)
+
+ SAVE_GPR 14, 112, 1
+ SAVE_GPR 15, 120, 1
+ SAVE_GPR 16, 128, 1
+ SAVE_GPR 17, 136, 1
+ SAVE_GPR 18, 144, 1
+ SAVE_GPR 19, 152, 1
+ SAVE_GPR 20, 160, 1
+ SAVE_GPR 21, 168, 1
+ SAVE_GPR 22, 176, 1
+ SAVE_GPR 23, 184, 1
+ SAVE_GPR 24, 192, 1
+ SAVE_GPR 25, 200, 1
+ SAVE_GPR 26, 208, 1
+ SAVE_GPR 27, 216, 1
+ SAVE_GPR 28, 224, 1
+ SAVE_GPR 29, 232, 1
+ SAVE_GPR 30, 240, 1
+ SAVE_GPR 31, 248, 1
+
+ # Init poly1305
+ bl Poly1305_init_64
+
+ li 25, 0 # offset to inp and outp
+
+ add 11, 25, 4
+
+ # load h
+ # h0, h1, h2?
+ ld 27, 0(3)
+ ld 28, 8(3)
+ lwz 29, 16(3)
+
+ li 30, 16
+ divdu 31, 5, 30
+
+ mtctr 31
+
+ mr 24, 6 # highbit
+
+Loop_block_64:
+ vxor 9, 9, 9
+
+ ld 20, 0(11)
+ ld 21, 8(11)
+ addi 11, 11, 16
+
+ addc 27, 27, 20
+ adde 28, 28, 21
+ adde 29, 29, 24
+
+ li 22, 0
+ mtvsrdd 32+6, 27, 28 # h0, h1
+ mtvsrdd 32+8, 29, 22 # h2
+
+ bl Poly1305_mult
+
+ bl Carry_reduction
+
+ bdnz Loop_block_64
+
+ std 27, 0(3)
+ std 28, 8(3)
+ stw 29, 16(3)
+
+ li 3, 0
+
+ RESTORE_GPR 14, 112, 1
+ RESTORE_GPR 15, 120, 1
+ RESTORE_GPR 16, 128, 1
+ RESTORE_GPR 17, 136, 1
+ RESTORE_GPR 18, 144, 1
+ RESTORE_GPR 19, 152, 1
+ RESTORE_GPR 20, 160, 1
+ RESTORE_GPR 21, 168, 1
+ RESTORE_GPR 22, 176, 1
+ RESTORE_GPR 23, 184, 1
+ RESTORE_GPR 24, 192, 1
+ RESTORE_GPR 25, 200, 1
+ RESTORE_GPR 26, 208, 1
+ RESTORE_GPR 27, 216, 1
+ RESTORE_GPR 28, 224, 1
+ RESTORE_GPR 29, 232, 1
+ RESTORE_GPR 30, 240, 1
+ RESTORE_GPR 31, 248, 1
+
+ addi 1, 1, 400
+ ld 0, 16(1)
+ mtlr 0
+
+ blr
+
+Out_no_poly1305_64:
+ li 3, 0
+ blr
+SYM_FUNC_END(poly1305_64s)
+
+#
+# Input: r3 = h, r4 = s, r5 = mac
+# mac = h + s
+#
+SYM_FUNC_START(poly1305_emit_64)
+ ld 10, 0(3)
+ ld 11, 8(3)
+ ld 12, 16(3)
+
+ # compare modulus
+ # h + 5 + (-p)
+ mr 6, 10
+ mr 7, 11
+ mr 8, 12
+ addic. 6, 6, 5
+ addze 7, 7
+ addze 8, 8
+ srdi 9, 8, 2 # overflow?
+ cmpdi 9, 0
+ beq Skip_h64
+ mr 10, 6
+ mr 11, 7
+ mr 12, 8
+
+Skip_h64:
+ ld 6, 0(4)
+ ld 7, 8(4)
+ addc 10, 10, 6
+ adde 11, 11, 7
+ addze 12, 12
+
+ std 10, 0(5)
+ std 11, 8(5)
+ blr
+SYM_FUNC_END(poly1305_emit_64)
+
+SYM_DATA_START_LOCAL(RMASK)
+.align 5
+rmask:
+.byte 0xff, 0xff, 0xff, 0x0f, 0xfc, 0xff, 0xff, 0x0f, 0xfc, 0xff, 0xff, 0x0f, 0xfc, 0xff, 0xff, 0x0f
+cnum:
+.long 0x03ffffff, 0x00000000, 0x03ffffff, 0x00000000
+.long 0x1a, 0x00, 0x1a, 0x00
+.long 0x01000000, 0x01000000, 0x01000000, 0x01000000
+.long 0x00010203, 0x04050607, 0x10111213, 0x14151617
+.long 0x08090a0b, 0x0c0d0e0f, 0x18191a1b, 0x1c1d1e1f
+SYM_DATA_END(RMASK)
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 7bf1fe7297c6..5f12b9382909 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -462,11 +462,6 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
pgprot_val(pgprot));
}
-static inline unsigned long pte_pfn(pte_t pte)
-{
- return pte_val(pte) >> PTE_RPN_SHIFT;
-}
-
/* Generic modifiers for PTE bits */
static inline pte_t pte_wrprotect(pte_t pte)
{
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index d4a19e6547ac..6e70ae511631 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -138,7 +138,16 @@ static inline int hash__pmd_same(pmd_t pmd_a, pmd_t pmd_b)
}
#define hash__pmd_bad(pmd) (pmd_val(pmd) & H_PMD_BAD_BITS)
+
+/*
+ * pud comparison that will work with both pte and page table pointer.
+ */
+static inline int hash__pud_same(pud_t pud_a, pud_t pud_b)
+{
+ return (((pud_raw(pud_a) ^ pud_raw(pud_b)) & ~cpu_to_be64(_PAGE_HPTEFLAGS)) == 0);
+}
#define hash__pud_bad(pud) (pud_val(pud) & H_PUD_BAD_BITS)
+
static inline int hash__p4d_bad(p4d_t p4d)
{
return (p4d_val(p4d) == 0);
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 4acc9690f599..8269b231c533 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -104,6 +104,7 @@
* and every thing below PAGE_SHIFT;
*/
#define PTE_RPN_MASK (((1UL << _PAGE_PA_MAX) - 1) & (PAGE_MASK))
+#define PTE_RPN_SHIFT PAGE_SHIFT
/*
* set of bits not changed in pmd_modify. Even though we have hash specific bits
* in here, on radix we expect them to be zero.
@@ -569,11 +570,6 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
return __pte(((pte_basic_t)pfn << PAGE_SHIFT) | pgprot_val(pgprot) | _PAGE_PTE);
}
-static inline unsigned long pte_pfn(pte_t pte)
-{
- return (pte_val(pte) & PTE_RPN_MASK) >> PAGE_SHIFT;
-}
-
/* Generic modifiers for PTE bits */
static inline pte_t pte_wrprotect(pte_t pte)
{
@@ -921,8 +917,29 @@ static inline pud_t pte_pud(pte_t pte)
{
return __pud_raw(pte_raw(pte));
}
+
+static inline pte_t *pudp_ptep(pud_t *pud)
+{
+ return (pte_t *)pud;
+}
+
+#define pud_pfn(pud) pte_pfn(pud_pte(pud))
+#define pud_dirty(pud) pte_dirty(pud_pte(pud))
+#define pud_young(pud) pte_young(pud_pte(pud))
+#define pud_mkold(pud) pte_pud(pte_mkold(pud_pte(pud)))
+#define pud_wrprotect(pud) pte_pud(pte_wrprotect(pud_pte(pud)))
+#define pud_mkdirty(pud) pte_pud(pte_mkdirty(pud_pte(pud)))
+#define pud_mkclean(pud) pte_pud(pte_mkclean(pud_pte(pud)))
+#define pud_mkyoung(pud) pte_pud(pte_mkyoung(pud_pte(pud)))
+#define pud_mkwrite(pud) pte_pud(pte_mkwrite(pud_pte(pud)))
#define pud_write(pud) pte_write(pud_pte(pud))
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+#define pud_soft_dirty(pmd) pte_soft_dirty(pud_pte(pud))
+#define pud_mksoft_dirty(pmd) pte_pud(pte_mksoft_dirty(pud_pte(pud)))
+#define pud_clear_soft_dirty(pmd) pte_pud(pte_clear_soft_dirty(pud_pte(pud)))
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
static inline int pud_bad(pud_t pud)
{
if (radix_enabled())
@@ -1115,15 +1132,24 @@ static inline bool pmd_access_permitted(pmd_t pmd, bool write)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
+extern pud_t pfn_pud(unsigned long pfn, pgprot_t pgprot);
extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd);
+extern void set_pud_at(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, pud_t pud);
+
static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmd)
{
}
+static inline void update_mmu_cache_pud(struct vm_area_struct *vma,
+ unsigned long addr, pud_t *pud)
+{
+}
+
extern int hash__has_transparent_hugepage(void);
static inline int has_transparent_hugepage(void)
{
@@ -1133,6 +1159,14 @@ static inline int has_transparent_hugepage(void)
}
#define has_transparent_hugepage has_transparent_hugepage
+static inline int has_transparent_pud_hugepage(void)
+{
+ if (radix_enabled())
+ return radix__has_transparent_pud_hugepage();
+ return 0;
+}
+#define has_transparent_pud_hugepage has_transparent_pud_hugepage
+
static inline unsigned long
pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp,
unsigned long clr, unsigned long set)
@@ -1142,6 +1176,16 @@ pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp,
return hash__pmd_hugepage_update(mm, addr, pmdp, clr, set);
}
+static inline unsigned long
+pud_hugepage_update(struct mm_struct *mm, unsigned long addr, pud_t *pudp,
+ unsigned long clr, unsigned long set)
+{
+ if (radix_enabled())
+ return radix__pud_hugepage_update(mm, addr, pudp, clr, set);
+ BUG();
+ return pud_val(*pudp);
+}
+
/*
* returns true for pmd migration entries, THP, devmap, hugetlb
* But compile time dependent on THP config
@@ -1151,6 +1195,11 @@ static inline int pmd_large(pmd_t pmd)
return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
}
+static inline int pud_large(pud_t pud)
+{
+ return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
+}
+
/*
* For radix we should always find H_PAGE_HASHPTE zero. Hence
* the below will work for radix too
@@ -1166,6 +1215,17 @@ static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
return ((old & _PAGE_ACCESSED) != 0);
}
+static inline int __pudp_test_and_clear_young(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp)
+{
+ unsigned long old;
+
+ if ((pud_raw(*pudp) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
+ return 0;
+ old = pud_hugepage_update(mm, addr, pudp, _PAGE_ACCESSED, 0);
+ return ((old & _PAGE_ACCESSED) != 0);
+}
+
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp)
@@ -1174,6 +1234,14 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
}
+#define __HAVE_ARCH_PUDP_SET_WRPROTECT
+static inline void pudp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp)
+{
+ if (pud_write(*pudp))
+ pud_hugepage_update(mm, addr, pudp, _PAGE_WRITE, 0);
+}
+
/*
* Only returns true for a THP. False for pmd migration entry.
* We also need to return true when we come across a pte that
@@ -1195,6 +1263,17 @@ static inline int pmd_trans_huge(pmd_t pmd)
return hash__pmd_trans_huge(pmd);
}
+static inline int pud_trans_huge(pud_t pud)
+{
+ if (!pud_present(pud))
+ return false;
+
+ if (radix_enabled())
+ return radix__pud_trans_huge(pud);
+ return 0;
+}
+
+
#define __HAVE_ARCH_PMD_SAME
static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
{
@@ -1203,6 +1282,15 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
return hash__pmd_same(pmd_a, pmd_b);
}
+#define pud_same pud_same
+static inline int pud_same(pud_t pud_a, pud_t pud_b)
+{
+ if (radix_enabled())
+ return radix__pud_same(pud_a, pud_b);
+ return hash__pud_same(pud_a, pud_b);
+}
+
+
static inline pmd_t __pmd_mkhuge(pmd_t pmd)
{
if (radix_enabled())
@@ -1210,6 +1298,14 @@ static inline pmd_t __pmd_mkhuge(pmd_t pmd)
return hash__pmd_mkhuge(pmd);
}
+static inline pud_t __pud_mkhuge(pud_t pud)
+{
+ if (radix_enabled())
+ return radix__pud_mkhuge(pud);
+ BUG();
+ return pud;
+}
+
/*
* pfn_pmd return a pmd_t that can be used as pmd pte entry.
*/
@@ -1225,14 +1321,34 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
return pmd;
}
+static inline pud_t pud_mkhuge(pud_t pud)
+{
+#ifdef CONFIG_DEBUG_VM
+ if (radix_enabled())
+ WARN_ON((pud_raw(pud) & cpu_to_be64(_PAGE_PTE)) == 0);
+ else
+ WARN_ON(1);
+#endif
+ return pud;
+}
+
+
#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
extern int pmdp_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp,
pmd_t entry, int dirty);
+#define __HAVE_ARCH_PUDP_SET_ACCESS_FLAGS
+extern int pudp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp,
+ pud_t entry, int dirty);
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp);
+#define __HAVE_ARCH_PUDP_TEST_AND_CLEAR_YOUNG
+extern int pudp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp);
+
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
@@ -1243,6 +1359,16 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
return hash__pmdp_huge_get_and_clear(mm, addr, pmdp);
}
+#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
+static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp)
+{
+ if (radix_enabled())
+ return radix__pudp_huge_get_and_clear(mm, addr, pudp);
+ BUG();
+ return *pudp;
+}
+
static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp)
{
@@ -1257,6 +1383,11 @@ pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
unsigned long addr,
pmd_t *pmdp, int full);
+#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL
+pud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma,
+ unsigned long addr,
+ pud_t *pudp, int full);
+
#define __HAVE_ARCH_PGTABLE_DEPOSIT
static inline void pgtable_trans_huge_deposit(struct mm_struct *mm,
pmd_t *pmdp, pgtable_t pgtable)
@@ -1305,6 +1436,14 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
return hash__pmd_mkdevmap(pmd);
}
+static inline pud_t pud_mkdevmap(pud_t pud)
+{
+ if (radix_enabled())
+ return radix__pud_mkdevmap(pud);
+ BUG();
+ return pud;
+}
+
static inline int pmd_devmap(pmd_t pmd)
{
return pte_devmap(pmd_pte(pmd));
@@ -1312,7 +1451,7 @@ static inline int pmd_devmap(pmd_t pmd)
static inline int pud_devmap(pud_t pud)
{
- return 0;
+ return pte_devmap(pud_pte(pud));
}
static inline int pgd_devmap(pgd_t pgd)
@@ -1321,16 +1460,6 @@ static inline int pgd_devmap(pgd_t pgd)
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-static inline int pud_pfn(pud_t pud)
-{
- /*
- * Currently all calls to pud_pfn() are gated around a pud_devmap()
- * check so this should never be used. If it grows another user we
- * want to know about it.
- */
- BUILD_BUG();
- return 0;
-}
#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 686001eda936..357e23a403d3 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -250,6 +250,10 @@ static inline int radix__pud_bad(pud_t pud)
return !!(pud_val(pud) & RADIX_PUD_BAD_BITS);
}
+static inline int radix__pud_same(pud_t pud_a, pud_t pud_b)
+{
+ return ((pud_raw(pud_a) ^ pud_raw(pud_b)) == 0);
+}
static inline int radix__p4d_bad(p4d_t p4d)
{
@@ -268,9 +272,22 @@ static inline pmd_t radix__pmd_mkhuge(pmd_t pmd)
return __pmd(pmd_val(pmd) | _PAGE_PTE);
}
+static inline int radix__pud_trans_huge(pud_t pud)
+{
+ return (pud_val(pud) & (_PAGE_PTE | _PAGE_DEVMAP)) == _PAGE_PTE;
+}
+
+static inline pud_t radix__pud_mkhuge(pud_t pud)
+{
+ return __pud(pud_val(pud) | _PAGE_PTE);
+}
+
extern unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, unsigned long clr,
unsigned long set);
+extern unsigned long radix__pud_hugepage_update(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, unsigned long clr,
+ unsigned long set);
extern pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp);
extern void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
@@ -278,6 +295,9 @@ extern void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
extern pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
extern pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp);
+pud_t radix__pudp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp);
+
static inline int radix__has_transparent_hugepage(void)
{
/* For radix 2M at PMD level means thp */
@@ -285,6 +305,14 @@ static inline int radix__has_transparent_hugepage(void)
return 1;
return 0;
}
+
+static inline int radix__has_transparent_pud_hugepage(void)
+{
+ /* For radix 1G at PUD level means pud hugepage support */
+ if (mmu_psize_defs[MMU_PAGE_1G].shift == PUD_SHIFT)
+ return 1;
+ return 0;
+}
#endif
static inline pmd_t radix__pmd_mkdevmap(pmd_t pmd)
@@ -292,9 +320,20 @@ static inline pmd_t radix__pmd_mkdevmap(pmd_t pmd)
return __pmd(pmd_val(pmd) | (_PAGE_PTE | _PAGE_DEVMAP));
}
+static inline pud_t radix__pud_mkdevmap(pud_t pud)
+{
+ return __pud(pud_val(pud) | (_PAGE_PTE | _PAGE_DEVMAP));
+}
+
+struct vmem_altmap;
+struct dev_pagemap;
extern int __meminit radix__vmemmap_create_mapping(unsigned long start,
unsigned long page_size,
unsigned long phys);
+int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end,
+ int node, struct vmem_altmap *altmap);
+void __ref radix__vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap);
extern void radix__vmemmap_remove_mapping(unsigned long start,
unsigned long page_size);
@@ -325,5 +364,15 @@ int radix__remove_section_mapping(unsigned long start, unsigned long end);
void radix__kernel_map_pages(struct page *page, int numpages, int enable);
+#ifdef CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP
+#define vmemmap_can_optimize vmemmap_can_optimize
+bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap);
+#endif
+
+#define vmemmap_populate_compound_pages vmemmap_populate_compound_pages
+int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
+ unsigned long start,
+ unsigned long end, int node,
+ struct dev_pagemap *pgmap);
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 77797a2a82eb..a38542259fab 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -68,6 +68,8 @@ void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
unsigned long end, int psize);
extern void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
+extern void radix__flush_pud_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end);
extern void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end);
extern void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end);
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index 0d0c1447ecf0..1950c1b825b4 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -5,6 +5,7 @@
#define MMU_NO_CONTEXT ~0UL
#include <linux/mm_types.h>
+#include <linux/mmu_notifier.h>
#include <asm/book3s/64/tlbflush-hash.h>
#include <asm/book3s/64/tlbflush-radix.h>
@@ -50,6 +51,14 @@ static inline void flush_pmd_tlb_range(struct vm_area_struct *vma,
radix__flush_pmd_tlb_range(vma, start, end);
}
+#define __HAVE_ARCH_FLUSH_PUD_TLB_RANGE
+static inline void flush_pud_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ if (radix_enabled())
+ radix__flush_pud_tlb_range(vma, start, end);
+}
+
#define __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma,
unsigned long start,
diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h
index d18b748ea3ae..3b7bd36a2321 100644
--- a/arch/powerpc/include/asm/book3s/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/pgtable.h
@@ -9,13 +9,6 @@
#endif
#ifndef __ASSEMBLY__
-/* Insert a PTE, top-level function is out of line. It uses an inline
- * low level function in the respective pgtable-* files
- */
-extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
- pte_t pte);
-
-
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
pte_t *ptep, pte_t entry, int dirty);
@@ -36,7 +29,9 @@ void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t
* corresponding HPTE into the hash table ahead of time, instead of
* waiting for the inevitable extra hash-table miss exception.
*/
-static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
+static inline void update_mmu_cache_range(struct vm_fault *vmf,
+ struct vm_area_struct *vma, unsigned long address,
+ pte_t *ptep, unsigned int nr)
{
if (IS_ENABLED(CONFIG_PPC32) && !mmu_has_feature(MMU_FTR_HPTE_TABLE))
return;
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index ef42adb44aa3..00c6b0b4ede4 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -4,14 +4,13 @@
#ifdef __KERNEL__
#include <asm/asm-compat.h>
-#include <asm/extable.h>
#ifdef CONFIG_BUG
#ifdef __ASSEMBLY__
#include <asm/asm-offsets.h>
#ifdef CONFIG_DEBUG_BUGVERBOSE
-.macro __EMIT_BUG_ENTRY addr,file,line,flags
+.macro EMIT_BUG_ENTRY addr,file,line,flags
.section __bug_table,"aw"
5001: .4byte \addr - .
.4byte 5002f - .
@@ -23,7 +22,7 @@
.previous
.endm
#else
-.macro __EMIT_BUG_ENTRY addr,file,line,flags
+.macro EMIT_BUG_ENTRY addr,file,line,flags
.section __bug_table,"aw"
5001: .4byte \addr - .
.short \flags
@@ -32,18 +31,6 @@
.endm
#endif /* verbose */
-.macro EMIT_WARN_ENTRY addr,file,line,flags
- EX_TABLE(\addr,\addr+4)
- __EMIT_BUG_ENTRY \addr,\file,\line,\flags
-.endm
-
-.macro EMIT_BUG_ENTRY addr,file,line,flags
- .if \flags & 1 /* BUGFLAG_WARNING */
- .err /* Use EMIT_WARN_ENTRY for warnings */
- .endif
- __EMIT_BUG_ENTRY \addr,\file,\line,\flags
-.endm
-
#else /* !__ASSEMBLY__ */
/* _EMIT_BUG_ENTRY expects args %0,%1,%2,%3 to be FILE, LINE, flags and
sizeof(struct bug_entry), respectively */
@@ -73,16 +60,6 @@
"i" (sizeof(struct bug_entry)), \
##__VA_ARGS__)
-#define WARN_ENTRY(insn, flags, label, ...) \
- asm_volatile_goto( \
- "1: " insn "\n" \
- EX_TABLE(1b, %l[label]) \
- _EMIT_BUG_ENTRY \
- : : "i" (__FILE__), "i" (__LINE__), \
- "i" (flags), \
- "i" (sizeof(struct bug_entry)), \
- ##__VA_ARGS__ : : label)
-
/*
* BUG_ON() and WARN_ON() do their best to cooperate with compile-time
* optimisations. However depending on the complexity of the condition
@@ -95,16 +72,7 @@
} while (0)
#define HAVE_ARCH_BUG
-#define __WARN_FLAGS(flags) do { \
- __label__ __label_warn_on; \
- \
- WARN_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags), __label_warn_on); \
- barrier_before_unreachable(); \
- __builtin_unreachable(); \
- \
-__label_warn_on: \
- break; \
-} while (0)
+#define __WARN_FLAGS(flags) BUG_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags))
#ifdef CONFIG_PPC64
#define BUG_ON(x) do { \
@@ -117,25 +85,15 @@ __label_warn_on: \
} while (0)
#define WARN_ON(x) ({ \
- bool __ret_warn_on = false; \
- do { \
- if (__builtin_constant_p((x))) { \
- if (!(x)) \
- break; \
+ int __ret_warn_on = !!(x); \
+ if (__builtin_constant_p(__ret_warn_on)) { \
+ if (__ret_warn_on) \
__WARN(); \
- __ret_warn_on = true; \
- } else { \
- __label__ __label_warn_on; \
- \
- WARN_ENTRY(PPC_TLNEI " %4, 0", \
- BUGFLAG_WARNING | BUGFLAG_TAINT(TAINT_WARN), \
- __label_warn_on, \
- "r" ((__force long)(x))); \
- break; \
-__label_warn_on: \
- __ret_warn_on = true; \
- } \
- } while (0); \
+ } else { \
+ BUG_ENTRY(PPC_TLNEI " %4, 0", \
+ BUGFLAG_WARNING | BUGFLAG_TAINT(TAINT_WARN), \
+ "r" (__ret_warn_on)); \
+ } \
unlikely(__ret_warn_on); \
})
@@ -148,14 +106,13 @@ __label_warn_on: \
#ifdef __ASSEMBLY__
.macro EMIT_BUG_ENTRY addr,file,line,flags
.endm
-.macro EMIT_WARN_ENTRY addr,file,line,flags
-.endm
#else /* !__ASSEMBLY__ */
#define _EMIT_BUG_ENTRY
-#define _EMIT_WARN_ENTRY
#endif
#endif /* CONFIG_BUG */
+#define EMIT_WARN_ENTRY EMIT_BUG_ENTRY
+
#include <asm-generic/bug.h>
#ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index 7564dd4fd12b..ef7d2de33b89 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -35,13 +35,19 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end)
* It just marks the page as not i-cache clean. We do the i-cache
* flush later when the page is given to a user process, if necessary.
*/
-static inline void flush_dcache_page(struct page *page)
+static inline void flush_dcache_folio(struct folio *folio)
{
if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
return;
/* avoid an atomic op if possible */
- if (test_bit(PG_dcache_clean, &page->flags))
- clear_bit(PG_dcache_clean, &page->flags);
+ if (test_bit(PG_dcache_clean, &folio->flags))
+ clear_bit(PG_dcache_clean, &folio->flags);
+}
+#define flush_dcache_folio flush_dcache_folio
+
+static inline void flush_dcache_page(struct page *page)
+{
+ flush_dcache_folio(page_folio(page));
}
void flush_icache_range(unsigned long start, unsigned long stop);
@@ -51,7 +57,7 @@ void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long addr, int len);
#define flush_icache_user_page flush_icache_user_page
-void flush_dcache_icache_page(struct page *page);
+void flush_dcache_icache_folio(struct folio *folio);
/**
* flush_dcache_range(): Write any modified data cache blocks out to memory and
diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index a26ca097d032..79f1c480b5eb 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -12,14 +12,8 @@
/*
* This is used to ensure we don't load something for the wrong architecture.
- * 64le only supports ELFv2 64-bit binaries (64be supports v1 and v2).
*/
-#if defined(CONFIG_PPC64) && defined(CONFIG_CPU_LITTLE_ENDIAN)
-#define elf_check_arch(x) (((x)->e_machine == ELF_ARCH) && \
- (((x)->e_flags & 0x3) == 0x2))
-#else
#define elf_check_arch(x) ((x)->e_machine == ELF_ARCH)
-#endif
#define compat_elf_check_arch(x) ((x)->e_machine == EM_PPC)
#define CORE_DUMP_USE_REGSET
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index f1e657c9bbe8..0732b743e099 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -3,11 +3,6 @@
#define _ASM_POWERPC_IO_H
#ifdef __KERNEL__
-#define ARCH_HAS_IOREMAP_WC
-#ifdef CONFIG_PPC32
-#define ARCH_HAS_IOREMAP_WT
-#endif
-
/*
*/
@@ -732,9 +727,7 @@ static inline void name at \
#define writel_relaxed(v, addr) writel(v, addr)
#define writeq_relaxed(v, addr) writeq(v, addr)
-#ifdef CONFIG_GENERIC_IOMAP
-#include <asm-generic/iomap.h>
-#else
+#ifndef CONFIG_GENERIC_IOMAP
/*
* Here comes the implementation of the IOMAP interfaces.
*/
@@ -896,8 +889,8 @@ static inline void iosync(void)
*
*/
extern void __iomem *ioremap(phys_addr_t address, unsigned long size);
-extern void __iomem *ioremap_prot(phys_addr_t address, unsigned long size,
- unsigned long flags);
+#define ioremap ioremap
+#define ioremap_prot ioremap_prot
extern void __iomem *ioremap_wc(phys_addr_t address, unsigned long size);
#define ioremap_wc ioremap_wc
@@ -911,14 +904,12 @@ void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size);
#define ioremap_cache(addr, size) \
ioremap_prot((addr), (size), pgprot_val(PAGE_KERNEL))
-extern void iounmap(volatile void __iomem *addr);
+#define iounmap iounmap
void __iomem *ioremap_phb(phys_addr_t paddr, unsigned long size);
int early_ioremap_range(unsigned long ea, phys_addr_t pa,
unsigned long size, pgprot_t prot);
-void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size,
- pgprot_t prot, void *caller);
extern void __iomem *__ioremap_caller(phys_addr_t, unsigned long size,
pgprot_t prot, void *caller);
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index f257cacb49a9..ba1a5974e714 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -55,7 +55,7 @@ int irq_choose_cpu(const struct cpumask *mask);
#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI)
extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
- bool exclude_self);
+ int exclude_cpu);
#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
#endif
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index d16d80ad2ae4..b4da8514af43 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -894,7 +894,7 @@ void kvmppc_init_lpid(unsigned long nr_lpids);
static inline void kvmppc_mmu_flush_icache(kvm_pfn_t pfn)
{
- struct page *page;
+ struct folio *folio;
/*
* We can only access pages that the kernel maps
* as memory. Bail out for unmapped ones.
@@ -903,10 +903,10 @@ static inline void kvmppc_mmu_flush_icache(kvm_pfn_t pfn)
return;
/* Clear i-cache for new pages */
- page = pfn_to_page(pfn);
- if (!test_bit(PG_dcache_clean, &page->flags)) {
- flush_dcache_icache_page(page);
- set_bit(PG_dcache_clean, &page->flags);
+ folio = page_folio(pfn_to_page(pfn));
+ if (!test_bit(PG_dcache_clean, &folio->flags)) {
+ flush_dcache_icache_folio(folio);
+ set_bit(PG_dcache_clean, &folio->flags);
}
}
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
index a6caaaab6f92..56ea48276356 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -101,8 +101,6 @@ static inline bool pte_access_permitted(pte_t pte, bool write)
static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) {
return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) |
pgprot_val(pgprot)); }
-static inline unsigned long pte_pfn(pte_t pte) {
- return pte_val(pte) >> PTE_RPN_SHIFT; }
/* Generic modifiers for PTE bits */
static inline pte_t pte_exprotect(pte_t pte)
@@ -166,12 +164,6 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
return __pte(pte_val(pte) & ~_PAGE_SWP_EXCLUSIVE);
}
-/* Insert a PTE, top-level function is out of line. It uses an inline
- * low level function in the respective pgtable-* files
- */
-extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
- pte_t pte);
-
/* This low level function performs the actual PTE insertion
* Setting the PTE depends on the MMU type and other factors. It's
* an horrible mess that I'm not going to try to clean up now but
@@ -282,10 +274,12 @@ static inline int pud_huge(pud_t pud)
* for the page which has just been mapped in.
*/
#if defined(CONFIG_PPC_E500) && defined(CONFIG_HUGETLB_PAGE)
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep);
+void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep, unsigned int nr);
#else
-static inline
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) {}
+static inline void update_mmu_cache_range(struct vm_fault *vmf,
+ struct vm_area_struct *vma, unsigned long address,
+ pte_t *ptep, unsigned int nr) {}
#endif
#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index 3360cad78ace..3a971e2a8c73 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -45,6 +45,10 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
pte_fragment_free((unsigned long *)ptepage, 0);
}
+/* arch use pte_free_defer() implementation in arch/powerpc/mm/pgtable-frag.c */
+#define pte_free_defer pte_free_defer
+void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable);
+
/*
* Functions that deal with pagetables that could be at any level of
* the table need to be passed an "index_size" so they know how to
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 6a88bfdaa69b..b2e9bc4a52c1 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -41,6 +41,12 @@ struct mm_struct;
#ifndef __ASSEMBLY__
+void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+ pte_t pte, unsigned int nr);
+#define set_ptes set_ptes
+#define update_mmu_cache(vma, addr, ptep) \
+ update_mmu_cache_range(NULL, vma, addr, ptep, 1)
+
#ifndef MAX_PTRS_PER_PGD
#define MAX_PTRS_PER_PGD PTRS_PER_PGD
#endif
@@ -48,6 +54,12 @@ struct mm_struct;
/* Keep these as a macros to avoid include dependency mess */
#define pte_page(x) pfn_to_page(pte_pfn(x))
#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
+
+static inline unsigned long pte_pfn(pte_t pte)
+{
+ return (pte_val(pte) & PTE_RPN_MASK) >> PTE_RPN_SHIFT;
+}
+
/*
* Select all bits except the pfn
*/
@@ -158,13 +170,30 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
}
#ifdef CONFIG_PPC64
-#define is_ioremap_addr is_ioremap_addr
-static inline bool is_ioremap_addr(const void *x)
+int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_map_size);
+bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start,
+ unsigned long page_size);
+/*
+ * mm/memory_hotplug.c:mhp_supports_memmap_on_memory goes into details
+ * some of the restrictions. We don't check for PMD_SIZE because our
+ * vmemmap allocation code can fallback correctly. The pageblock
+ * alignment requirement is met using altmap->reserve blocks.
+ */
+#define arch_supports_memmap_on_memory arch_supports_memmap_on_memory
+static inline bool arch_supports_memmap_on_memory(unsigned long vmemmap_size)
{
- unsigned long addr = (unsigned long)x;
-
- return addr >= IOREMAP_BASE && addr < IOREMAP_END;
+ if (!radix_enabled())
+ return false;
+ /*
+ * With 4K page size and 2M PMD_SIZE, we can align
+ * things better with memory block size value
+ * starting from 128MB. Hence align things with PMD_SIZE.
+ */
+ if (IS_ENABLED(CONFIG_PPC_4K_PAGES))
+ return IS_ALIGNED(vmemmap_size, PMD_SIZE);
+ return true;
}
+
#endif /* CONFIG_PPC64 */
#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 8a6754ffdc7e..a6c7069bec5d 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -393,7 +393,6 @@ int validate_sp_size(unsigned long sp, struct task_struct *p,
*/
#define ARCH_HAS_PREFETCH
#define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
static inline void prefetch(const void *x)
{
@@ -411,8 +410,6 @@ static inline void prefetchw(const void *x)
__asm__ __volatile__ ("dcbtst 0,%0" : : "r" (x));
}
-#define spin_lock_prefetch(x) prefetchw(x)
-
/* asm stubs */
extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val);
extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val);
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index bc5d39a835fe..bf5dde1a4114 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -183,13 +183,9 @@ static inline bool test_thread_local_flags(unsigned int flags)
#define clear_tsk_compat_task(tsk) do { } while (0)
#endif
-#ifdef CONFIG_PPC64
-#ifdef CONFIG_CPU_BIG_ENDIAN
+#if defined(CONFIG_PPC64)
#define is_elf2_task() (test_thread_flag(TIF_ELF2ABI))
#else
-#define is_elf2_task() (1)
-#endif
-#else
#define is_elf2_task() (0)
#endif
diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h
index 46c31fb8748d..30a12d208687 100644
--- a/arch/powerpc/include/asm/word-at-a-time.h
+++ b/arch/powerpc/include/asm/word-at-a-time.h
@@ -34,7 +34,7 @@ static inline long find_zero(unsigned long mask)
return leading_zero_bits >> 3;
}
-static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
+static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
{
unsigned long rhs = val | c->low_bits;
*data = rhs;
diff --git a/arch/powerpc/kernel/dma-mask.c b/arch/powerpc/kernel/dma-mask.c
index ffbbbc432612..5b07ca7b73aa 100644
--- a/arch/powerpc/kernel/dma-mask.c
+++ b/arch/powerpc/kernel/dma-mask.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/dma-mapping.h>
+#include <linux/dma-map-ops.h>
#include <linux/export.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index f132d8704263..6440b1bb332a 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -375,8 +375,7 @@ _GLOBAL(generic_secondary_smp_init)
beq 20f
/* start the specified thread */
- LOAD_REG_ADDR(r5, fsl_secondary_thread_init)
- ld r4, 0(r5)
+ LOAD_REG_ADDR(r5, DOTSYM(fsl_secondary_thread_init))
bl book3e_start_thread
/* stop the current thread */
diff --git a/arch/powerpc/kernel/ima_arch.c b/arch/powerpc/kernel/ima_arch.c
index 957abd592075..b7029beed847 100644
--- a/arch/powerpc/kernel/ima_arch.c
+++ b/arch/powerpc/kernel/ima_arch.c
@@ -23,9 +23,9 @@ bool arch_ima_get_secureboot(void)
* is not enabled.
*/
static const char *const secure_rules[] = {
- "appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
+ "appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig|modsig",
#ifndef CONFIG_MODULE_SIG
- "appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
+ "appraise func=MODULE_CHECK appraise_type=imasig|modsig",
#endif
NULL
};
@@ -49,9 +49,9 @@ static const char *const trusted_rules[] = {
static const char *const secure_and_trusted_rules[] = {
"measure func=KEXEC_KERNEL_CHECK template=ima-modsig",
"measure func=MODULE_CHECK template=ima-modsig",
- "appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
+ "appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig|modsig",
#ifndef CONFIG_MODULE_SIG
- "appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
+ "appraise func=MODULE_CHECK appraise_type=imasig|modsig",
#endif
NULL
};
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index 4caf5e3079eb..359577ec1680 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -709,9 +709,9 @@ static int __init rtas_flash_init(void)
if (!rtas_validate_flash_data.buf)
return -ENOMEM;
- flash_block_cache = kmem_cache_create("rtas_flash_cache",
- RTAS_BLK_SIZE, RTAS_BLK_SIZE, 0,
- NULL);
+ flash_block_cache = kmem_cache_create_usercopy("rtas_flash_cache",
+ RTAS_BLK_SIZE, RTAS_BLK_SIZE,
+ 0, 0, RTAS_BLK_SIZE, NULL);
if (!flash_block_cache) {
printk(KERN_ERR "%s: failed to create block cache\n",
__func__);
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 5de8597eaab8..b15f15dcacb5 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -221,8 +221,8 @@ static void raise_backtrace_ipi(cpumask_t *mask)
}
}
-void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
{
- nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
+ nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise_backtrace_ipi);
}
#endif /* defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) */
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 8c0b08b7a80e..20e50586e8a2 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -538,3 +538,4 @@
449 common futex_waitv sys_futex_waitv
450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node
451 common cachestat sys_cachestat
+452 common fchmodat2 sys_fchmodat2
diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S
index ffb1db386849..1f7d86de1538 100644
--- a/arch/powerpc/kernel/trace/ftrace_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S
@@ -33,6 +33,9 @@
* and then arrange for the ftrace function to be called.
*/
.macro ftrace_regs_entry allregs
+ /* Create a minimal stack frame for representing B */
+ PPC_STLU r1, -STACK_FRAME_MIN_SIZE(r1)
+
/* Create our stack frame + pt_regs */
PPC_STLU r1,-SWITCH_FRAME_SIZE(r1)
@@ -42,7 +45,7 @@
#ifdef CONFIG_PPC64
/* Save the original return address in A's stack frame */
- std r0, LRSAVE+SWITCH_FRAME_SIZE(r1)
+ std r0, LRSAVE+SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE(r1)
/* Ok to continue? */
lbz r3, PACA_FTRACE_ENABLED(r13)
cmpdi r3, 0
@@ -77,6 +80,8 @@
mflr r7
/* Save it as pt_regs->nip */
PPC_STL r7, _NIP(r1)
+ /* Also save it in B's stackframe header for proper unwind */
+ PPC_STL r7, LRSAVE+SWITCH_FRAME_SIZE(r1)
/* Save the read LR in pt_regs->link */
PPC_STL r0, _LINK(r1)
@@ -142,7 +147,7 @@
#endif
/* Pop our stack frame */
- addi r1, r1, SWITCH_FRAME_SIZE
+ addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE
#ifdef CONFIG_LIVEPATCH_64
/* Based on the cmpd above, if the NIP was altered handle livepatch */
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index e59ec6d32d37..7ef147e2a20d 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1508,13 +1508,8 @@ static void do_program_check(struct pt_regs *regs)
if (!(regs->msr & MSR_PR) && /* not user-mode */
report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) {
- const struct exception_table_entry *entry;
-
- entry = search_exception_tables(bugaddr);
- if (entry) {
- regs_set_return_ip(regs, extable_fixup(entry) + regs->nip - bugaddr);
- return;
- }
+ regs_add_return_ip(regs, 4);
+ return;
}
if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE) && user_mode(regs)) {
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index edb2dd1f53eb..8c464a5d8246 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -245,7 +245,7 @@ static void watchdog_smp_panic(int cpu)
__cpumask_clear_cpu(c, &wd_smp_cpus_ipi);
}
} else {
- trigger_allbutself_cpu_backtrace();
+ trigger_allbutcpu_cpu_backtrace(cpu);
cpumask_clear(&wd_smp_cpus_ipi);
}
@@ -416,7 +416,7 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
xchg(&__wd_nmi_output, 1); // see wd_lockup_ipi
if (sysctl_hardlockup_all_cpu_backtrace)
- trigger_allbutself_cpu_backtrace();
+ trigger_allbutcpu_cpu_backtrace(cpu);
if (hardlockup_panic)
nmi_panic(regs, "Hard LOCKUP");
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index 709ebd578394..e2d6f9327f77 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -410,6 +410,7 @@ static int kvmppc_memslot_page_merge(struct kvm *kvm,
ret = H_STATE;
break;
}
+ vma_start_write(vma);
/* Copy vm_flags to avoid partial modifications in ksm_madvise */
vm_flags = vma->vm_flags;
ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c
index 51f48984abca..988948d69bc1 100644
--- a/arch/powerpc/mm/book3s64/hash_pgtable.c
+++ b/arch/powerpc/mm/book3s64/hash_pgtable.c
@@ -214,7 +214,7 @@ unsigned long hash__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr
old = be64_to_cpu(old_be);
- trace_hugepage_update(addr, old, clr, set);
+ trace_hugepage_update_pmd(addr, old, clr, set);
if (old & H_PAGE_HASHPTE)
hpte_do_hugepage_flush(mm, addr, pmdp, old);
return old;
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index fedffe3ae136..ad2afa08e62e 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1307,18 +1307,19 @@ void hash__early_init_mmu_secondary(void)
*/
unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
{
- struct page *page;
+ struct folio *folio;
if (!pfn_valid(pte_pfn(pte)))
return pp;
- page = pte_page(pte);
+ folio = page_folio(pte_page(pte));
/* page is dirty */
- if (!test_bit(PG_dcache_clean, &page->flags) && !PageReserved(page)) {
+ if (!test_bit(PG_dcache_clean, &folio->flags) &&
+ !folio_test_reserved(folio)) {
if (trap == INTERRUPT_INST_STORAGE) {
- flush_dcache_icache_page(page);
- set_bit(PG_dcache_clean, &page->flags);
+ flush_dcache_icache_folio(folio);
+ set_bit(PG_dcache_clean, &folio->flags);
} else
pp |= HPTE_R_N;
}
diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c
index c766e4c26e42..1715b07c630c 100644
--- a/arch/powerpc/mm/book3s64/mmu_context.c
+++ b/arch/powerpc/mm/book3s64/mmu_context.c
@@ -246,15 +246,15 @@ static void destroy_contexts(mm_context_t *ctx)
static void pmd_frag_destroy(void *pmd_frag)
{
int count;
- struct page *page;
+ struct ptdesc *ptdesc;
- page = virt_to_page(pmd_frag);
+ ptdesc = virt_to_ptdesc(pmd_frag);
/* drop all the pending references */
count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT;
/* We allow PTE_FRAG_NR fragments from a PTE page */
- if (atomic_sub_and_test(PMD_FRAG_NR - count, &page->pt_frag_refcount)) {
- pgtable_pmd_page_dtor(page);
- __free_page(page);
+ if (atomic_sub_and_test(PMD_FRAG_NR - count, &ptdesc->pt_frag_refcount)) {
+ pagetable_pmd_dtor(ptdesc);
+ pagetable_free(ptdesc);
}
}
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index 85c84e89e3ea..1498ccd08367 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -64,11 +64,39 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
return changed;
}
+int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp, pud_t entry, int dirty)
+{
+ int changed;
+#ifdef CONFIG_DEBUG_VM
+ WARN_ON(!pud_devmap(*pudp));
+ assert_spin_locked(pud_lockptr(vma->vm_mm, pudp));
+#endif
+ changed = !pud_same(*(pudp), entry);
+ if (changed) {
+ /*
+ * We can use MMU_PAGE_1G here, because only radix
+ * path look at the psize.
+ */
+ __ptep_set_access_flags(vma, pudp_ptep(pudp),
+ pud_pte(entry), address, MMU_PAGE_1G);
+ }
+ return changed;
+}
+
+
int pmdp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp)
{
return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
}
+
+int pudp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp)
+{
+ return __pudp_test_and_clear_young(vma->vm_mm, address, pudp);
+}
+
/*
* set a new huge pmd. We should not be called for updating
* an existing pmd entry. That should go via pmd_hugepage_update.
@@ -90,6 +118,23 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
}
+void set_pud_at(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, pud_t pud)
+{
+#ifdef CONFIG_DEBUG_VM
+ /*
+ * Make sure hardware valid bit is not set. We don't do
+ * tlb flush for this update.
+ */
+
+ WARN_ON(pte_hw_valid(pud_pte(*pudp)));
+ assert_spin_locked(pud_lockptr(mm, pudp));
+ WARN_ON(!(pud_large(pud)));
+#endif
+ trace_hugepage_set_pud(addr, pud_val(pud));
+ return set_pte_at(mm, addr, pudp_ptep(pudp), pud_pte(pud));
+}
+
static void do_serialize(void *arg)
{
/* We've taken the IPI, so try to trim the mask while here */
@@ -147,11 +192,35 @@ pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
return pmd;
}
+pud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma,
+ unsigned long addr, pud_t *pudp, int full)
+{
+ pud_t pud;
+
+ VM_BUG_ON(addr & ~HPAGE_PMD_MASK);
+ VM_BUG_ON((pud_present(*pudp) && !pud_devmap(*pudp)) ||
+ !pud_present(*pudp));
+ pud = pudp_huge_get_and_clear(vma->vm_mm, addr, pudp);
+ /*
+ * if it not a fullmm flush, then we can possibly end up converting
+ * this PMD pte entry to a regular level 0 PTE by a parallel page fault.
+ * Make sure we flush the tlb in this case.
+ */
+ if (!full)
+ flush_pud_tlb_range(vma, addr, addr + HPAGE_PUD_SIZE);
+ return pud;
+}
+
static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
{
return __pmd(pmd_val(pmd) | pgprot_val(pgprot));
}
+static pud_t pud_set_protbits(pud_t pud, pgprot_t pgprot)
+{
+ return __pud(pud_val(pud) | pgprot_val(pgprot));
+}
+
/*
* At some point we should be able to get rid of
* pmd_mkhuge() and mk_huge_pmd() when we update all the
@@ -166,6 +235,15 @@ pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
return __pmd_mkhuge(pmd_set_protbits(__pmd(pmdv), pgprot));
}
+pud_t pfn_pud(unsigned long pfn, pgprot_t pgprot)
+{
+ unsigned long pudv;
+
+ pudv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK;
+
+ return __pud_mkhuge(pud_set_protbits(__pud(pudv), pgprot));
+}
+
pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
{
return pfn_pmd(page_to_pfn(page), pgprot);
@@ -306,22 +384,22 @@ static pmd_t *get_pmd_from_cache(struct mm_struct *mm)
static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
{
void *ret = NULL;
- struct page *page;
+ struct ptdesc *ptdesc;
gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO;
if (mm == &init_mm)
gfp &= ~__GFP_ACCOUNT;
- page = alloc_page(gfp);
- if (!page)
+ ptdesc = pagetable_alloc(gfp, 0);
+ if (!ptdesc)
return NULL;
- if (!pgtable_pmd_page_ctor(page)) {
- __free_pages(page, 0);
+ if (!pagetable_pmd_ctor(ptdesc)) {
+ pagetable_free(ptdesc);
return NULL;
}
- atomic_set(&page->pt_frag_refcount, 1);
+ atomic_set(&ptdesc->pt_frag_refcount, 1);
- ret = page_address(page);
+ ret = ptdesc_address(ptdesc);
/*
* if we support only one fragment just return the
* allocated page.
@@ -331,12 +409,12 @@ static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
spin_lock(&mm->page_table_lock);
/*
- * If we find pgtable_page set, we return
+ * If we find ptdesc_page set, we return
* the allocated page with single fragment
* count.
*/
if (likely(!mm->context.pmd_frag)) {
- atomic_set(&page->pt_frag_refcount, PMD_FRAG_NR);
+ atomic_set(&ptdesc->pt_frag_refcount, PMD_FRAG_NR);
mm->context.pmd_frag = ret + PMD_FRAG_SIZE;
}
spin_unlock(&mm->page_table_lock);
@@ -357,15 +435,15 @@ pmd_t *pmd_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr)
void pmd_fragment_free(unsigned long *pmd)
{
- struct page *page = virt_to_page(pmd);
+ struct ptdesc *ptdesc = virt_to_ptdesc(pmd);
- if (PageReserved(page))
- return free_reserved_page(page);
+ if (pagetable_is_reserved(ptdesc))
+ return free_reserved_ptdesc(ptdesc);
- BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
- if (atomic_dec_and_test(&page->pt_frag_refcount)) {
- pgtable_pmd_page_dtor(page);
- __free_page(page);
+ BUG_ON(atomic_read(&ptdesc->pt_frag_refcount) <= 0);
+ if (atomic_dec_and_test(&ptdesc->pt_frag_refcount)) {
+ pagetable_pmd_dtor(ptdesc);
+ pagetable_free(ptdesc);
}
}
diff --git a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
index 5e3195568525..17075c78d4bc 100644
--- a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
+++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
@@ -39,6 +39,7 @@ void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma, unsigned long st
radix__flush_tlb_pwc_range_psize(vma->vm_mm, start, end, psize);
else
radix__flush_tlb_range_psize(vma->vm_mm, start, end, psize);
+ mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
}
void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index e7ea492ac510..96679018e7fb 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -601,17 +601,6 @@ void __init radix__early_init_mmu(void)
#else
mmu_virtual_psize = MMU_PAGE_4K;
#endif
-
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
- /* vmemmap mapping */
- if (mmu_psize_defs[MMU_PAGE_2M].shift) {
- /*
- * map vmemmap using 2M if available
- */
- mmu_vmemmap_psize = MMU_PAGE_2M;
- } else
- mmu_vmemmap_psize = mmu_virtual_psize;
-#endif
#endif
/*
* initialize page table size
@@ -744,8 +733,58 @@ static void free_pud_table(pud_t *pud_start, p4d_t *p4d)
p4d_clear(p4d);
}
-static void remove_pte_table(pte_t *pte_start, unsigned long addr,
- unsigned long end, bool direct)
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+static bool __meminit vmemmap_pmd_is_unused(unsigned long addr, unsigned long end)
+{
+ unsigned long start = ALIGN_DOWN(addr, PMD_SIZE);
+
+ return !vmemmap_populated(start, PMD_SIZE);
+}
+
+static bool __meminit vmemmap_page_is_unused(unsigned long addr, unsigned long end)
+{
+ unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE);
+
+ return !vmemmap_populated(start, PAGE_SIZE);
+
+}
+#endif
+
+static void __meminit free_vmemmap_pages(struct page *page,
+ struct vmem_altmap *altmap,
+ int order)
+{
+ unsigned int nr_pages = 1 << order;
+
+ if (altmap) {
+ unsigned long alt_start, alt_end;
+ unsigned long base_pfn = page_to_pfn(page);
+
+ /*
+ * with 2M vmemmap mmaping we can have things setup
+ * such that even though atlmap is specified we never
+ * used altmap.
+ */
+ alt_start = altmap->base_pfn;
+ alt_end = altmap->base_pfn + altmap->reserve + altmap->free;
+
+ if (base_pfn >= alt_start && base_pfn < alt_end) {
+ vmem_altmap_free(altmap, nr_pages);
+ return;
+ }
+ }
+
+ if (PageReserved(page)) {
+ /* allocated from memblock */
+ while (nr_pages--)
+ free_reserved_page(page++);
+ } else
+ free_pages((unsigned long)page_address(page), order);
+}
+
+static void __meminit remove_pte_table(pte_t *pte_start, unsigned long addr,
+ unsigned long end, bool direct,
+ struct vmem_altmap *altmap)
{
unsigned long next, pages = 0;
pte_t *pte;
@@ -759,24 +798,26 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr,
if (!pte_present(*pte))
continue;
- if (!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(next)) {
- /*
- * The vmemmap_free() and remove_section_mapping()
- * codepaths call us with aligned addresses.
- */
- WARN_ONCE(1, "%s: unaligned range\n", __func__);
- continue;
+ if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) {
+ if (!direct)
+ free_vmemmap_pages(pte_page(*pte), altmap, 0);
+ pte_clear(&init_mm, addr, pte);
+ pages++;
}
-
- pte_clear(&init_mm, addr, pte);
- pages++;
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ else if (!direct && vmemmap_page_is_unused(addr, next)) {
+ free_vmemmap_pages(pte_page(*pte), altmap, 0);
+ pte_clear(&init_mm, addr, pte);
+ }
+#endif
}
if (direct)
update_page_count(mmu_virtual_psize, -pages);
}
static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
- unsigned long end, bool direct)
+ unsigned long end, bool direct,
+ struct vmem_altmap *altmap)
{
unsigned long next, pages = 0;
pte_t *pte_base;
@@ -790,18 +831,24 @@ static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
continue;
if (pmd_is_leaf(*pmd)) {
- if (!IS_ALIGNED(addr, PMD_SIZE) ||
- !IS_ALIGNED(next, PMD_SIZE)) {
- WARN_ONCE(1, "%s: unaligned range\n", __func__);
- continue;
+ if (IS_ALIGNED(addr, PMD_SIZE) &&
+ IS_ALIGNED(next, PMD_SIZE)) {
+ if (!direct)
+ free_vmemmap_pages(pmd_page(*pmd), altmap, get_order(PMD_SIZE));
+ pte_clear(&init_mm, addr, (pte_t *)pmd);
+ pages++;
}
- pte_clear(&init_mm, addr, (pte_t *)pmd);
- pages++;
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ else if (!direct && vmemmap_pmd_is_unused(addr, next)) {
+ free_vmemmap_pages(pmd_page(*pmd), altmap, get_order(PMD_SIZE));
+ pte_clear(&init_mm, addr, (pte_t *)pmd);
+ }
+#endif
continue;
}
pte_base = (pte_t *)pmd_page_vaddr(*pmd);
- remove_pte_table(pte_base, addr, next, direct);
+ remove_pte_table(pte_base, addr, next, direct, altmap);
free_pte_table(pte_base, pmd);
}
if (direct)
@@ -809,7 +856,8 @@ static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
}
static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr,
- unsigned long end, bool direct)
+ unsigned long end, bool direct,
+ struct vmem_altmap *altmap)
{
unsigned long next, pages = 0;
pmd_t *pmd_base;
@@ -834,15 +882,16 @@ static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr,
}
pmd_base = pud_pgtable(*pud);
- remove_pmd_table(pmd_base, addr, next, direct);
+ remove_pmd_table(pmd_base, addr, next, direct, altmap);
free_pmd_table(pmd_base, pud);
}
if (direct)
update_page_count(MMU_PAGE_1G, -pages);
}
-static void __meminit remove_pagetable(unsigned long start, unsigned long end,
- bool direct)
+static void __meminit
+remove_pagetable(unsigned long start, unsigned long end, bool direct,
+ struct vmem_altmap *altmap)
{
unsigned long addr, next;
pud_t *pud_base;
@@ -871,7 +920,7 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end,
}
pud_base = p4d_pgtable(*p4d);
- remove_pud_table(pud_base, addr, next, direct);
+ remove_pud_table(pud_base, addr, next, direct, altmap);
free_pud_table(pud_base, p4d);
}
@@ -894,7 +943,7 @@ int __meminit radix__create_section_mapping(unsigned long start,
int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
{
- remove_pagetable(start, end, true);
+ remove_pagetable(start, end, true, NULL);
return 0;
}
#endif /* CONFIG_MEMORY_HOTPLUG */
@@ -926,10 +975,429 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start,
return 0;
}
+
+bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap)
+{
+ if (radix_enabled())
+ return __vmemmap_can_optimize(altmap, pgmap);
+
+ return false;
+}
+
+int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node,
+ unsigned long addr, unsigned long next)
+{
+ int large = pmd_large(*pmdp);
+
+ if (large)
+ vmemmap_verify(pmdp_ptep(pmdp), node, addr, next);
+
+ return large;
+}
+
+void __meminit vmemmap_set_pmd(pmd_t *pmdp, void *p, int node,
+ unsigned long addr, unsigned long next)
+{
+ pte_t entry;
+ pte_t *ptep = pmdp_ptep(pmdp);
+
+ VM_BUG_ON(!IS_ALIGNED(addr, PMD_SIZE));
+ entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
+ set_pte_at(&init_mm, addr, ptep, entry);
+ asm volatile("ptesync": : :"memory");
+
+ vmemmap_verify(ptep, node, addr, next);
+}
+
+static pte_t * __meminit radix__vmemmap_pte_populate(pmd_t *pmdp, unsigned long addr,
+ int node,
+ struct vmem_altmap *altmap,
+ struct page *reuse)
+{
+ pte_t *pte = pte_offset_kernel(pmdp, addr);
+
+ if (pte_none(*pte)) {
+ pte_t entry;
+ void *p;
+
+ if (!reuse) {
+ /*
+ * make sure we don't create altmap mappings
+ * covering things outside the device.
+ */
+ if (altmap && altmap_cross_boundary(altmap, addr, PAGE_SIZE))
+ altmap = NULL;
+
+ p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap);
+ if (!p && altmap)
+ p = vmemmap_alloc_block_buf(PAGE_SIZE, node, NULL);
+ if (!p)
+ return NULL;
+ pr_debug("PAGE_SIZE vmemmap mapping\n");
+ } else {
+ /*
+ * When a PTE/PMD entry is freed from the init_mm
+ * there's a free_pages() call to this page allocated
+ * above. Thus this get_page() is paired with the
+ * put_page_testzero() on the freeing path.
+ * This can only called by certain ZONE_DEVICE path,
+ * and through vmemmap_populate_compound_pages() when
+ * slab is available.
+ */
+ get_page(reuse);
+ p = page_to_virt(reuse);
+ pr_debug("Tail page reuse vmemmap mapping\n");
+ }
+
+ VM_BUG_ON(!PAGE_ALIGNED(addr));
+ entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
+ set_pte_at(&init_mm, addr, pte, entry);
+ asm volatile("ptesync": : :"memory");
+ }
+ return pte;
+}
+
+static inline pud_t *vmemmap_pud_alloc(p4d_t *p4dp, int node,
+ unsigned long address)
+{
+ pud_t *pud;
+
+ /* All early vmemmap mapping to keep simple do it at PAGE_SIZE */
+ if (unlikely(p4d_none(*p4dp))) {
+ if (unlikely(!slab_is_available())) {
+ pud = early_alloc_pgtable(PAGE_SIZE, node, 0, 0);
+ p4d_populate(&init_mm, p4dp, pud);
+ /* go to the pud_offset */
+ } else
+ return pud_alloc(&init_mm, p4dp, address);
+ }
+ return pud_offset(p4dp, address);
+}
+
+static inline pmd_t *vmemmap_pmd_alloc(pud_t *pudp, int node,
+ unsigned long address)
+{
+ pmd_t *pmd;
+
+ /* All early vmemmap mapping to keep simple do it at PAGE_SIZE */
+ if (unlikely(pud_none(*pudp))) {
+ if (unlikely(!slab_is_available())) {
+ pmd = early_alloc_pgtable(PAGE_SIZE, node, 0, 0);
+ pud_populate(&init_mm, pudp, pmd);
+ } else
+ return pmd_alloc(&init_mm, pudp, address);
+ }
+ return pmd_offset(pudp, address);
+}
+
+static inline pte_t *vmemmap_pte_alloc(pmd_t *pmdp, int node,
+ unsigned long address)
+{
+ pte_t *pte;
+
+ /* All early vmemmap mapping to keep simple do it at PAGE_SIZE */
+ if (unlikely(pmd_none(*pmdp))) {
+ if (unlikely(!slab_is_available())) {
+ pte = early_alloc_pgtable(PAGE_SIZE, node, 0, 0);
+ pmd_populate(&init_mm, pmdp, pte);
+ } else
+ return pte_alloc_kernel(pmdp, address);
+ }
+ return pte_offset_kernel(pmdp, address);
+}
+
+
+
+int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
+{
+ unsigned long addr;
+ unsigned long next;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ for (addr = start; addr < end; addr = next) {
+ next = pmd_addr_end(addr, end);
+
+ pgd = pgd_offset_k(addr);
+ p4d = p4d_offset(pgd, addr);
+ pud = vmemmap_pud_alloc(p4d, node, addr);
+ if (!pud)
+ return -ENOMEM;
+ pmd = vmemmap_pmd_alloc(pud, node, addr);
+ if (!pmd)
+ return -ENOMEM;
+
+ if (pmd_none(READ_ONCE(*pmd))) {
+ void *p;
+
+ /*
+ * keep it simple by checking addr PMD_SIZE alignment
+ * and verifying the device boundary condition.
+ * For us to use a pmd mapping, both addr and pfn should
+ * be aligned. We skip if addr is not aligned and for
+ * pfn we hope we have extra area in the altmap that
+ * can help to find an aligned block. This can result
+ * in altmap block allocation failures, in which case
+ * we fallback to RAM for vmemmap allocation.
+ */
+ if (altmap && (!IS_ALIGNED(addr, PMD_SIZE) ||
+ altmap_cross_boundary(altmap, addr, PMD_SIZE))) {
+ /*
+ * make sure we don't create altmap mappings
+ * covering things outside the device.
+ */
+ goto base_mapping;
+ }
+
+ p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
+ if (p) {
+ vmemmap_set_pmd(pmd, p, node, addr, next);
+ pr_debug("PMD_SIZE vmemmap mapping\n");
+ continue;
+ } else if (altmap) {
+ /*
+ * A vmemmap block allocation can fail due to
+ * alignment requirements and we trying to align
+ * things aggressively there by running out of
+ * space. Try base mapping on failure.
+ */
+ goto base_mapping;
+ }
+ } else if (vmemmap_check_pmd(pmd, node, addr, next)) {
+ /*
+ * If a huge mapping exist due to early call to
+ * vmemmap_populate, let's try to use that.
+ */
+ continue;
+ }
+base_mapping:
+ /*
+ * Not able allocate higher order memory to back memmap
+ * or we found a pointer to pte page. Allocate base page
+ * size vmemmap
+ */
+ pte = vmemmap_pte_alloc(pmd, node, addr);
+ if (!pte)
+ return -ENOMEM;
+
+ pte = radix__vmemmap_pte_populate(pmd, addr, node, altmap, NULL);
+ if (!pte)
+ return -ENOMEM;
+
+ vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+ next = addr + PAGE_SIZE;
+ }
+ return 0;
+}
+
+static pte_t * __meminit radix__vmemmap_populate_address(unsigned long addr, int node,
+ struct vmem_altmap *altmap,
+ struct page *reuse)
+{
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pgd = pgd_offset_k(addr);
+ p4d = p4d_offset(pgd, addr);
+ pud = vmemmap_pud_alloc(p4d, node, addr);
+ if (!pud)
+ return NULL;
+ pmd = vmemmap_pmd_alloc(pud, node, addr);
+ if (!pmd)
+ return NULL;
+ if (pmd_leaf(*pmd))
+ /*
+ * The second page is mapped as a hugepage due to a nearby request.
+ * Force our mapping to page size without deduplication
+ */
+ return NULL;
+ pte = vmemmap_pte_alloc(pmd, node, addr);
+ if (!pte)
+ return NULL;
+ radix__vmemmap_pte_populate(pmd, addr, node, NULL, NULL);
+ vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+
+ return pte;
+}
+
+static pte_t * __meminit vmemmap_compound_tail_page(unsigned long addr,
+ unsigned long pfn_offset, int node)
+{
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long map_addr;
+
+ /* the second vmemmap page which we use for duplication */
+ map_addr = addr - pfn_offset * sizeof(struct page) + PAGE_SIZE;
+ pgd = pgd_offset_k(map_addr);
+ p4d = p4d_offset(pgd, map_addr);
+ pud = vmemmap_pud_alloc(p4d, node, map_addr);
+ if (!pud)
+ return NULL;
+ pmd = vmemmap_pmd_alloc(pud, node, map_addr);
+ if (!pmd)
+ return NULL;
+ if (pmd_leaf(*pmd))
+ /*
+ * The second page is mapped as a hugepage due to a nearby request.
+ * Force our mapping to page size without deduplication
+ */
+ return NULL;
+ pte = vmemmap_pte_alloc(pmd, node, map_addr);
+ if (!pte)
+ return NULL;
+ /*
+ * Check if there exist a mapping to the left
+ */
+ if (pte_none(*pte)) {
+ /*
+ * Populate the head page vmemmap page.
+ * It can fall in different pmd, hence
+ * vmemmap_populate_address()
+ */
+ pte = radix__vmemmap_populate_address(map_addr - PAGE_SIZE, node, NULL, NULL);
+ if (!pte)
+ return NULL;
+ /*
+ * Populate the tail pages vmemmap page
+ */
+ pte = radix__vmemmap_pte_populate(pmd, map_addr, node, NULL, NULL);
+ if (!pte)
+ return NULL;
+ vmemmap_verify(pte, node, map_addr, map_addr + PAGE_SIZE);
+ return pte;
+ }
+ return pte;
+}
+
+int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
+ unsigned long start,
+ unsigned long end, int node,
+ struct dev_pagemap *pgmap)
+{
+ /*
+ * we want to map things as base page size mapping so that
+ * we can save space in vmemmap. We could have huge mapping
+ * covering out both edges.
+ */
+ unsigned long addr;
+ unsigned long addr_pfn = start_pfn;
+ unsigned long next;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ for (addr = start; addr < end; addr = next) {
+
+ pgd = pgd_offset_k(addr);
+ p4d = p4d_offset(pgd, addr);
+ pud = vmemmap_pud_alloc(p4d, node, addr);
+ if (!pud)
+ return -ENOMEM;
+ pmd = vmemmap_pmd_alloc(pud, node, addr);
+ if (!pmd)
+ return -ENOMEM;
+
+ if (pmd_leaf(READ_ONCE(*pmd))) {
+ /* existing huge mapping. Skip the range */
+ addr_pfn += (PMD_SIZE >> PAGE_SHIFT);
+ next = pmd_addr_end(addr, end);
+ continue;
+ }
+ pte = vmemmap_pte_alloc(pmd, node, addr);
+ if (!pte)
+ return -ENOMEM;
+ if (!pte_none(*pte)) {
+ /*
+ * This could be because we already have a compound
+ * page whose VMEMMAP_RESERVE_NR pages were mapped and
+ * this request fall in those pages.
+ */
+ addr_pfn += 1;
+ next = addr + PAGE_SIZE;
+ continue;
+ } else {
+ unsigned long nr_pages = pgmap_vmemmap_nr(pgmap);
+ unsigned long pfn_offset = addr_pfn - ALIGN_DOWN(addr_pfn, nr_pages);
+ pte_t *tail_page_pte;
+
+ /*
+ * if the address is aligned to huge page size it is the
+ * head mapping.
+ */
+ if (pfn_offset == 0) {
+ /* Populate the head page vmemmap page */
+ pte = radix__vmemmap_pte_populate(pmd, addr, node, NULL, NULL);
+ if (!pte)
+ return -ENOMEM;
+ vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+
+ /*
+ * Populate the tail pages vmemmap page
+ * It can fall in different pmd, hence
+ * vmemmap_populate_address()
+ */
+ pte = radix__vmemmap_populate_address(addr + PAGE_SIZE, node, NULL, NULL);
+ if (!pte)
+ return -ENOMEM;
+
+ addr_pfn += 2;
+ next = addr + 2 * PAGE_SIZE;
+ continue;
+ }
+ /*
+ * get the 2nd mapping details
+ * Also create it if that doesn't exist
+ */
+ tail_page_pte = vmemmap_compound_tail_page(addr, pfn_offset, node);
+ if (!tail_page_pte) {
+
+ pte = radix__vmemmap_pte_populate(pmd, addr, node, NULL, NULL);
+ if (!pte)
+ return -ENOMEM;
+ vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+
+ addr_pfn += 1;
+ next = addr + PAGE_SIZE;
+ continue;
+ }
+
+ pte = radix__vmemmap_pte_populate(pmd, addr, node, NULL, pte_page(*tail_page_pte));
+ if (!pte)
+ return -ENOMEM;
+ vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+
+ addr_pfn += 1;
+ next = addr + PAGE_SIZE;
+ continue;
+ }
+ }
+ return 0;
+}
+
+
#ifdef CONFIG_MEMORY_HOTPLUG
void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
{
- remove_pagetable(start, start + page_size, false);
+ remove_pagetable(start, start + page_size, true, NULL);
+}
+
+void __ref radix__vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap)
+{
+ remove_pagetable(start, end, false, altmap);
}
#endif
#endif
@@ -962,7 +1430,24 @@ unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long add
#endif
old = radix__pte_update(mm, addr, pmdp_ptep(pmdp), clr, set, 1);
- trace_hugepage_update(addr, old, clr, set);
+ trace_hugepage_update_pmd(addr, old, clr, set);
+
+ return old;
+}
+
+unsigned long radix__pud_hugepage_update(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, unsigned long clr,
+ unsigned long set)
+{
+ unsigned long old;
+
+#ifdef CONFIG_DEBUG_VM
+ WARN_ON(!pud_devmap(*pudp));
+ assert_spin_locked(pud_lockptr(mm, pudp));
+#endif
+
+ old = radix__pte_update(mm, addr, pudp_ptep(pudp), clr, set, 1);
+ trace_hugepage_update_pud(addr, old, clr, set);
return old;
}
@@ -1043,6 +1528,17 @@ pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
return old_pmd;
}
+pud_t radix__pudp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp)
+{
+ pud_t old_pud;
+ unsigned long old;
+
+ old = radix__pud_hugepage_update(mm, addr, pudp, ~0UL, 0);
+ old_pud = __pud(old);
+ return old_pud;
+}
+
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 0bd4866d9824..3020a8b38572 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -987,6 +987,7 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
}
}
preempt_enable();
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
}
EXPORT_SYMBOL(radix__flush_tlb_mm);
@@ -1020,6 +1021,7 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
}
preempt_enable();
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
}
void radix__flush_all_mm(struct mm_struct *mm)
@@ -1228,6 +1230,7 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
}
out:
preempt_enable();
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
}
void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
@@ -1392,6 +1395,7 @@ static void __radix__flush_tlb_range_psize(struct mm_struct *mm,
}
out:
preempt_enable();
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
}
void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
@@ -1461,6 +1465,13 @@ void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
}
EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
+void radix__flush_pud_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_1G);
+}
+EXPORT_SYMBOL(radix__flush_pud_tlb_range);
+
void radix__flush_tlb_all(void)
{
unsigned long rb,prs,r,rs;
diff --git a/arch/powerpc/mm/book3s64/subpage_prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c
index 0dc85556dec5..ec98e526167e 100644
--- a/arch/powerpc/mm/book3s64/subpage_prot.c
+++ b/arch/powerpc/mm/book3s64/subpage_prot.c
@@ -145,6 +145,7 @@ static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
static const struct mm_walk_ops subpage_walk_ops = {
.pmd_entry = subpage_walk_pmd_entry,
+ .walk_lock = PGWALK_WRLOCK_VERIFY,
};
static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c
index 0e9b4879c0f9..15189592da09 100644
--- a/arch/powerpc/mm/cacheflush.c
+++ b/arch/powerpc/mm/cacheflush.c
@@ -148,44 +148,31 @@ static void __flush_dcache_icache(void *p)
invalidate_icache_range(addr, addr + PAGE_SIZE);
}
-static void flush_dcache_icache_hugepage(struct page *page)
+void flush_dcache_icache_folio(struct folio *folio)
{
- int i;
- int nr = compound_nr(page);
+ unsigned int i, nr = folio_nr_pages(folio);
- if (!PageHighMem(page)) {
+ if (flush_coherent_icache())
+ return;
+
+ if (!folio_test_highmem(folio)) {
+ void *addr = folio_address(folio);
for (i = 0; i < nr; i++)
- __flush_dcache_icache(lowmem_page_address(page + i));
- } else {
+ __flush_dcache_icache(addr + i * PAGE_SIZE);
+ } else if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) {
for (i = 0; i < nr; i++) {
- void *start = kmap_local_page(page + i);
+ void *start = kmap_local_folio(folio, i * PAGE_SIZE);
__flush_dcache_icache(start);
kunmap_local(start);
}
- }
-}
-
-void flush_dcache_icache_page(struct page *page)
-{
- if (flush_coherent_icache())
- return;
-
- if (PageCompound(page))
- return flush_dcache_icache_hugepage(page);
-
- if (!PageHighMem(page)) {
- __flush_dcache_icache(lowmem_page_address(page));
- } else if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) {
- void *start = kmap_local_page(page);
-
- __flush_dcache_icache(start);
- kunmap_local(start);
} else {
- flush_dcache_icache_phys(page_to_phys(page));
+ unsigned long pfn = folio_pfn(folio);
+ for (i = 0; i < nr; i++)
+ flush_dcache_icache_phys((pfn + i) * PAGE_SIZE);
}
}
-EXPORT_SYMBOL(flush_dcache_icache_page);
+EXPORT_SYMBOL(flush_dcache_icache_folio);
void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
{
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 5bfdf6ecfa96..b1723094d464 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -469,7 +469,6 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
if (is_exec)
flags |= FAULT_FLAG_INSTRUCTION;
-#ifdef CONFIG_PER_VMA_LOCK
if (!(flags & FAULT_FLAG_USER))
goto lock_mmap;
@@ -489,7 +488,8 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
}
fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
- vma_end_read(vma);
+ if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
+ vma_end_read(vma);
if (!(fault & VM_FAULT_RETRY)) {
count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
@@ -501,7 +501,6 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
return user_mode(regs) ? 0 : SIGBUS;
lock_mmap:
-#endif /* CONFIG_PER_VMA_LOCK */
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
@@ -551,9 +550,7 @@ retry:
mmap_read_unlock(current->mm);
-#ifdef CONFIG_PER_VMA_LOCK
done:
-#endif
if (unlikely(fault & VM_FAULT_ERROR))
return mm_fault_error(regs, address, fault);
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index fe1b83020e0d..e0208cb12058 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -92,7 +92,7 @@ static struct page * __meminit vmemmap_subsection_start(unsigned long vmemmap_ad
* a page table lookup here because with the hash translation we don't keep
* vmemmap details in linux page table.
*/
-static int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_map_size)
+int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_map_size)
{
struct page *start;
unsigned long vmemmap_end = vmemmap_addr + vmemmap_map_size;
@@ -183,8 +183,8 @@ static __meminit int vmemmap_list_populate(unsigned long phys,
return 0;
}
-static bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start,
- unsigned long page_size)
+bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start,
+ unsigned long page_size)
{
unsigned long nr_pfn = page_size / sizeof(struct page);
unsigned long start_pfn = page_to_pfn((struct page *)start);
@@ -198,8 +198,8 @@ static bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long star
return false;
}
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
- struct vmem_altmap *altmap)
+static int __meminit __vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
{
bool altmap_alloc;
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
@@ -272,6 +272,18 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
return 0;
}
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
+{
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (radix_enabled())
+ return radix__vmemmap_populate(start, end, node, altmap);
+#endif
+
+ return __vmemmap_populate(start, end, node, altmap);
+}
+
#ifdef CONFIG_MEMORY_HOTPLUG
static unsigned long vmemmap_list_free(unsigned long start)
{
@@ -303,8 +315,8 @@ static unsigned long vmemmap_list_free(unsigned long start)
return vmem_back->phys;
}
-void __ref vmemmap_free(unsigned long start, unsigned long end,
- struct vmem_altmap *altmap)
+static void __ref __vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap)
{
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
unsigned long page_order = get_order(page_size);
@@ -314,8 +326,7 @@ void __ref vmemmap_free(unsigned long start, unsigned long end,
start = ALIGN_DOWN(start, page_size);
if (altmap) {
alt_start = altmap->base_pfn;
- alt_end = altmap->base_pfn + altmap->reserve +
- altmap->free + altmap->alloc + altmap->align;
+ alt_end = altmap->base_pfn + altmap->reserve + altmap->free;
}
pr_debug("vmemmap_free %lx...%lx\n", start, end);
@@ -362,6 +373,17 @@ void __ref vmemmap_free(unsigned long start, unsigned long end,
vmemmap_remove_mapping(start, page_size);
}
}
+
+void __ref vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (radix_enabled())
+ return radix__vmemmap_free(start, end, altmap);
+#endif
+ return __vmemmap_free(start, end, altmap);
+}
+
#endif
void register_page_bootmem_memmap(unsigned long section_nr,
struct page *start_page, unsigned long size)
diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c
index 4f12504fb405..705e8e8ffde4 100644
--- a/arch/powerpc/mm/ioremap.c
+++ b/arch/powerpc/mm/ioremap.c
@@ -41,7 +41,7 @@ void __iomem *ioremap_coherent(phys_addr_t addr, unsigned long size)
return __ioremap_caller(addr, size, prot, caller);
}
-void __iomem *ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags)
+void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long flags)
{
pte_t pte = __pte(flags);
void *caller = __builtin_return_address(0);
@@ -74,27 +74,3 @@ int early_ioremap_range(unsigned long ea, phys_addr_t pa,
return 0;
}
-
-void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size,
- pgprot_t prot, void *caller)
-{
- struct vm_struct *area;
- int ret;
- unsigned long va;
-
- area = __get_vm_area_caller(size, VM_IOREMAP, IOREMAP_START, IOREMAP_END, caller);
- if (area == NULL)
- return NULL;
-
- area->phys_addr = pa;
- va = (unsigned long)area->addr;
-
- ret = ioremap_page_range(va, va + size, pa, prot);
- if (!ret)
- return (void __iomem *)area->addr + offset;
-
- vunmap_range(va, va + size);
- free_vm_area(area);
-
- return NULL;
-}
diff --git a/arch/powerpc/mm/ioremap_32.c b/arch/powerpc/mm/ioremap_32.c
index 9d13143b8be4..ca5bc6be3e6f 100644
--- a/arch/powerpc/mm/ioremap_32.c
+++ b/arch/powerpc/mm/ioremap_32.c
@@ -22,6 +22,13 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *call
int err;
/*
+ * If the address lies within the first 16 MB, assume it's in ISA
+ * memory space
+ */
+ if (addr < SZ_16M)
+ addr += _ISA_MEM_BASE;
+
+ /*
* Choose an address to map it to.
* Once the vmalloc system is running, we use it.
* Before then, we use space going down from IOREMAP_TOP
@@ -31,13 +38,6 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *call
offset = addr & ~PAGE_MASK;
size = PAGE_ALIGN(addr + size) - p;
- /*
- * If the address lies within the first 16 MB, assume it's in ISA
- * memory space
- */
- if (p < 16 * 1024 * 1024)
- p += _ISA_MEM_BASE;
-
#ifndef CONFIG_CRASH_DUMP
/*
* Don't allow anybody to remap normal RAM that we're using.
@@ -63,7 +63,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *call
return (void __iomem *)v + offset;
if (slab_is_available())
- return do_ioremap(p, offset, size, prot, caller);
+ return generic_ioremap_prot(addr, size, prot);
/*
* Should check if it is a candidate for a BAT mapping
@@ -87,7 +87,6 @@ void iounmap(volatile void __iomem *addr)
if (v_block_mapped((unsigned long)addr))
return;
- if (addr > high_memory && (unsigned long)addr < ioremap_bot)
- vunmap((void *)(PAGE_MASK & (unsigned long)addr));
+ generic_iounmap(addr);
}
EXPORT_SYMBOL(iounmap);
diff --git a/arch/powerpc/mm/ioremap_64.c b/arch/powerpc/mm/ioremap_64.c
index 3acece00b33e..d24e5f166723 100644
--- a/arch/powerpc/mm/ioremap_64.c
+++ b/arch/powerpc/mm/ioremap_64.c
@@ -29,7 +29,7 @@ void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size,
return NULL;
if (slab_is_available())
- return do_ioremap(paligned, offset, size, prot, caller);
+ return generic_ioremap_prot(addr, size, prot);
pr_warn("ioremap() called early from %pS. Use early_ioremap() instead\n", caller);
@@ -49,17 +49,9 @@ void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size,
*/
void iounmap(volatile void __iomem *token)
{
- void *addr;
-
if (!slab_is_available())
return;
- addr = (void *)((unsigned long __force)PCI_FIX_ADDR(token) & PAGE_MASK);
-
- if ((unsigned long)addr < ioremap_bot) {
- pr_warn("Attempt to iounmap early bolted mapping at 0x%p\n", addr);
- return;
- }
- vunmap(addr);
+ generic_iounmap(PCI_FIX_ADDR(token));
}
EXPORT_SYMBOL(iounmap);
diff --git a/arch/powerpc/mm/kasan/Makefile b/arch/powerpc/mm/kasan/Makefile
index 699eeffd9f55..f9522fd70b2f 100644
--- a/arch/powerpc/mm/kasan/Makefile
+++ b/arch/powerpc/mm/kasan/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
KASAN_SANITIZE := n
+KCOV_INSTRUMENT := n
obj-$(CONFIG_PPC32) += init_32.o
obj-$(CONFIG_PPC_8xx) += 8xx.o
diff --git a/arch/powerpc/mm/nohash/e500_hugetlbpage.c b/arch/powerpc/mm/nohash/e500_hugetlbpage.c
index 58c8d9849cb1..6b30e40d4590 100644
--- a/arch/powerpc/mm/nohash/e500_hugetlbpage.c
+++ b/arch/powerpc/mm/nohash/e500_hugetlbpage.c
@@ -178,7 +178,8 @@ book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, pte_t pte)
*
* This must always be called with the pte lock held.
*/
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
+void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep, unsigned int nr)
{
if (is_vm_hugetlb_page(vma))
book3e_hugetlb_preload(vma, address, *ptep);
diff --git a/arch/powerpc/mm/pgtable-frag.c b/arch/powerpc/mm/pgtable-frag.c
index 20652daa1d7e..8c31802f97e8 100644
--- a/arch/powerpc/mm/pgtable-frag.c
+++ b/arch/powerpc/mm/pgtable-frag.c
@@ -18,15 +18,15 @@
void pte_frag_destroy(void *pte_frag)
{
int count;
- struct page *page;
+ struct ptdesc *ptdesc;
- page = virt_to_page(pte_frag);
+ ptdesc = virt_to_ptdesc(pte_frag);
/* drop all the pending references */
count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
/* We allow PTE_FRAG_NR fragments from a PTE page */
- if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) {
- pgtable_pte_page_dtor(page);
- __free_page(page);
+ if (atomic_sub_and_test(PTE_FRAG_NR - count, &ptdesc->pt_frag_refcount)) {
+ pagetable_pte_dtor(ptdesc);
+ pagetable_free(ptdesc);
}
}
@@ -55,25 +55,25 @@ static pte_t *get_pte_from_cache(struct mm_struct *mm)
static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
{
void *ret = NULL;
- struct page *page;
+ struct ptdesc *ptdesc;
if (!kernel) {
- page = alloc_page(PGALLOC_GFP | __GFP_ACCOUNT);
- if (!page)
+ ptdesc = pagetable_alloc(PGALLOC_GFP | __GFP_ACCOUNT, 0);
+ if (!ptdesc)
return NULL;
- if (!pgtable_pte_page_ctor(page)) {
- __free_page(page);
+ if (!pagetable_pte_ctor(ptdesc)) {
+ pagetable_free(ptdesc);
return NULL;
}
} else {
- page = alloc_page(PGALLOC_GFP);
- if (!page)
+ ptdesc = pagetable_alloc(PGALLOC_GFP, 0);
+ if (!ptdesc)
return NULL;
}
- atomic_set(&page->pt_frag_refcount, 1);
+ atomic_set(&ptdesc->pt_frag_refcount, 1);
- ret = page_address(page);
+ ret = ptdesc_address(ptdesc);
/*
* if we support only one fragment just return the
* allocated page.
@@ -82,12 +82,12 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
return ret;
spin_lock(&mm->page_table_lock);
/*
- * If we find pgtable_page set, we return
+ * If we find ptdesc_page set, we return
* the allocated page with single fragment
* count.
*/
if (likely(!pte_frag_get(&mm->context))) {
- atomic_set(&page->pt_frag_refcount, PTE_FRAG_NR);
+ atomic_set(&ptdesc->pt_frag_refcount, PTE_FRAG_NR);
pte_frag_set(&mm->context, ret + PTE_FRAG_SIZE);
}
spin_unlock(&mm->page_table_lock);
@@ -106,17 +106,40 @@ pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel)
return __alloc_for_ptecache(mm, kernel);
}
-void pte_fragment_free(unsigned long *table, int kernel)
+static void pte_free_now(struct rcu_head *head)
{
- struct page *page = virt_to_page(table);
+ struct ptdesc *ptdesc;
- if (PageReserved(page))
- return free_reserved_page(page);
+ ptdesc = container_of(head, struct ptdesc, pt_rcu_head);
+ pagetable_pte_dtor(ptdesc);
+ pagetable_free(ptdesc);
+}
- BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
- if (atomic_dec_and_test(&page->pt_frag_refcount)) {
- if (!kernel)
- pgtable_pte_page_dtor(page);
- __free_page(page);
+void pte_fragment_free(unsigned long *table, int kernel)
+{
+ struct ptdesc *ptdesc = virt_to_ptdesc(table);
+
+ if (pagetable_is_reserved(ptdesc))
+ return free_reserved_ptdesc(ptdesc);
+
+ BUG_ON(atomic_read(&ptdesc->pt_frag_refcount) <= 0);
+ if (atomic_dec_and_test(&ptdesc->pt_frag_refcount)) {
+ if (kernel)
+ pagetable_free(ptdesc);
+ else if (folio_test_clear_active(ptdesc_folio(ptdesc)))
+ call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
+ else
+ pte_free_now(&ptdesc->pt_rcu_head);
}
}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable)
+{
+ struct page *page;
+
+ page = virt_to_page(pgtable);
+ SetPageActive(page);
+ pte_fragment_free((unsigned long *)pgtable, 0);
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index cb2dcdb18f8e..3f86fd217690 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -58,7 +58,7 @@ static inline int pte_looks_normal(pte_t pte)
return 0;
}
-static struct page *maybe_pte_to_page(pte_t pte)
+static struct folio *maybe_pte_to_folio(pte_t pte)
{
unsigned long pfn = pte_pfn(pte);
struct page *page;
@@ -68,7 +68,7 @@ static struct page *maybe_pte_to_page(pte_t pte)
page = pfn_to_page(pfn);
if (PageReserved(page))
return NULL;
- return page;
+ return page_folio(page);
}
#ifdef CONFIG_PPC_BOOK3S
@@ -84,12 +84,12 @@ static pte_t set_pte_filter_hash(pte_t pte)
pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) ||
cpu_has_feature(CPU_FTR_NOEXECUTE))) {
- struct page *pg = maybe_pte_to_page(pte);
- if (!pg)
+ struct folio *folio = maybe_pte_to_folio(pte);
+ if (!folio)
return pte;
- if (!test_bit(PG_dcache_clean, &pg->flags)) {
- flush_dcache_icache_page(pg);
- set_bit(PG_dcache_clean, &pg->flags);
+ if (!test_bit(PG_dcache_clean, &folio->flags)) {
+ flush_dcache_icache_folio(folio);
+ set_bit(PG_dcache_clean, &folio->flags);
}
}
return pte;
@@ -107,7 +107,7 @@ static pte_t set_pte_filter_hash(pte_t pte) { return pte; }
*/
static inline pte_t set_pte_filter(pte_t pte)
{
- struct page *pg;
+ struct folio *folio;
if (radix_enabled())
return pte;
@@ -120,18 +120,18 @@ static inline pte_t set_pte_filter(pte_t pte)
return pte;
/* If you set _PAGE_EXEC on weird pages you're on your own */
- pg = maybe_pte_to_page(pte);
- if (unlikely(!pg))
+ folio = maybe_pte_to_folio(pte);
+ if (unlikely(!folio))
return pte;
/* If the page clean, we move on */
- if (test_bit(PG_dcache_clean, &pg->flags))
+ if (test_bit(PG_dcache_clean, &folio->flags))
return pte;
/* If it's an exec fault, we flush the cache and make it clean */
if (is_exec_fault()) {
- flush_dcache_icache_page(pg);
- set_bit(PG_dcache_clean, &pg->flags);
+ flush_dcache_icache_folio(folio);
+ set_bit(PG_dcache_clean, &folio->flags);
return pte;
}
@@ -142,7 +142,7 @@ static inline pte_t set_pte_filter(pte_t pte)
static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
int dirty)
{
- struct page *pg;
+ struct folio *folio;
if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
return pte;
@@ -168,17 +168,17 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
#endif /* CONFIG_DEBUG_VM */
/* If you set _PAGE_EXEC on weird pages you're on your own */
- pg = maybe_pte_to_page(pte);
- if (unlikely(!pg))
+ folio = maybe_pte_to_folio(pte);
+ if (unlikely(!folio))
goto bail;
/* If the page is already clean, we move on */
- if (test_bit(PG_dcache_clean, &pg->flags))
+ if (test_bit(PG_dcache_clean, &folio->flags))
goto bail;
/* Clean the page and set PG_dcache_clean */
- flush_dcache_icache_page(pg);
- set_bit(PG_dcache_clean, &pg->flags);
+ flush_dcache_icache_folio(folio);
+ set_bit(PG_dcache_clean, &folio->flags);
bail:
return pte_mkexec(pte);
@@ -187,8 +187,8 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
/*
* set_pte stores a linux PTE into the linux page table.
*/
-void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
- pte_t pte)
+void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+ pte_t pte, unsigned int nr)
{
/*
* Make sure hardware valid bit is not set. We don't do
@@ -203,7 +203,16 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
pte = set_pte_filter(pte);
/* Perform the setting of the PTE */
- __set_pte_at(mm, addr, ptep, pte, 0);
+ arch_enter_lazy_mmu_mode();
+ for (;;) {
+ __set_pte_at(mm, addr, ptep, pte, 0);
+ if (--nr == 0)
+ break;
+ ptep++;
+ pte = __pte(pte_val(pte) + (1UL << PTE_RPN_SHIFT));
+ addr += PAGE_SIZE;
+ }
+ arch_leave_lazy_mmu_mode();
}
void unmap_kernel_page(unsigned long va)
@@ -311,6 +320,8 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
+ pte_t *pte;
+ spinlock_t *ptl;
if (mm == &init_mm)
return;
@@ -329,8 +340,10 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
*/
if (pmd_none(*pmd))
return;
- BUG_ON(!pmd_present(*pmd));
- assert_spin_locked(pte_lockptr(mm, pmd));
+ pte = pte_offset_map_nolock(mm, pmd, addr, &ptl);
+ BUG_ON(!pte);
+ assert_spin_locked(ptl);
+ pte_unmap(pte);
}
#endif /* CONFIG_DEBUG_VM */
diff --git a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
index 1bfb29574caa..c1e981649bd9 100644
--- a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
+++ b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
@@ -477,7 +477,7 @@ static int mpc512x_lpbfifo_probe(struct platform_device *pdev)
return ret;
}
-static int mpc512x_lpbfifo_remove(struct platform_device *pdev)
+static void mpc512x_lpbfifo_remove(struct platform_device *pdev)
{
unsigned long flags;
struct dma_device *dma_dev = lpbfifo.chan->device;
@@ -494,8 +494,6 @@ static int mpc512x_lpbfifo_remove(struct platform_device *pdev)
free_irq(lpbfifo.irq, &pdev->dev);
irq_dispose_mapping(lpbfifo.irq);
dma_release_channel(lpbfifo.chan);
-
- return 0;
}
static const struct of_device_id mpc512x_lpbfifo_match[] = {
@@ -506,7 +504,7 @@ MODULE_DEVICE_TABLE(of, mpc512x_lpbfifo_match);
static struct platform_driver mpc512x_lpbfifo_driver = {
.probe = mpc512x_lpbfifo_probe,
- .remove = mpc512x_lpbfifo_remove,
+ .remove_new = mpc512x_lpbfifo_remove,
.driver = {
.name = DRV_NAME,
.of_match_table = mpc512x_lpbfifo_match,
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index 9c43cf32f4c9..40aa58206888 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -180,7 +180,7 @@ static void wake_hw_thread(void *info)
unsigned long inia;
int cpu = *(const int *)info;
- inia = *(unsigned long *)fsl_secondary_thread_init;
+ inia = ppc_function_entry(fsl_secondary_thread_init);
book3e_start_thread(cpu_thread_in_core(cpu), inia);
}
#endif
diff --git a/arch/powerpc/platforms/8xx/adder875.c b/arch/powerpc/platforms/8xx/adder875.c
index 7e83eb6746f4..f6bd232f8323 100644
--- a/arch/powerpc/platforms/8xx/adder875.c
+++ b/arch/powerpc/platforms/8xx/adder875.c
@@ -7,7 +7,6 @@
*/
#include <linux/init.h>
-#include <linux/fs_enet_pd.h>
#include <linux/of_platform.h>
#include <asm/time.h>
diff --git a/arch/powerpc/platforms/8xx/mpc885ads_setup.c b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
index 2fc7cacbcd96..c7c4f082b838 100644
--- a/arch/powerpc/platforms/8xx/mpc885ads_setup.c
+++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
@@ -21,7 +21,6 @@
#include <linux/device.h>
#include <linux/delay.h>
-#include <linux/fs_enet_pd.h>
#include <linux/fs_uart_pd.h>
#include <linux/fsl_devices.h>
#include <linux/mii.h>
diff --git a/arch/powerpc/platforms/8xx/tqm8xx_setup.c b/arch/powerpc/platforms/8xx/tqm8xx_setup.c
index 7d8eb50bb9cd..6e56be852b2c 100644
--- a/arch/powerpc/platforms/8xx/tqm8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/tqm8xx_setup.c
@@ -24,7 +24,6 @@
#include <linux/device.h>
#include <linux/delay.h>
-#include <linux/fs_enet_pd.h>
#include <linux/fs_uart_pd.h>
#include <linux/fsl_devices.h>
#include <linux/mii.h>
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 45fd975ef521..340b86ef7284 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -94,6 +94,7 @@ config PPC_BOOK3S_64
select PPC_FPU
select PPC_HAVE_PMU_SUPPORT
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
select ARCH_ENABLE_SPLIT_PMD_PTLOCK
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index ea807aa0c31a..38c5be34c895 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -86,7 +86,7 @@ spufs_new_inode(struct super_block *sb, umode_t mode)
inode->i_mode = mode;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
out:
return inode;
}
diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c
index 4c5790aff1b5..8633891b7aa5 100644
--- a/arch/powerpc/platforms/powermac/time.c
+++ b/arch/powerpc/platforms/powermac/time.c
@@ -26,8 +26,8 @@
#include <linux/rtc.h>
#include <linux/of_address.h>
+#include <asm/early_ioremap.h>
#include <asm/sections.h>
-#include <asm/io.h>
#include <asm/machdep.h>
#include <asm/time.h>
#include <asm/nvram.h>
@@ -182,7 +182,7 @@ static int __init via_calibrate_decr(void)
return 0;
}
of_node_put(vias);
- via = ioremap(rsrc.start, resource_size(&rsrc));
+ via = early_ioremap(rsrc.start, resource_size(&rsrc));
if (via == NULL) {
printk(KERN_ERR "Failed to map VIA for timer calibration !\n");
return 0;
@@ -207,7 +207,7 @@ static int __init via_calibrate_decr(void)
ppc_tb_freq = (dstart - dend) * 100 / 6;
- iounmap(via);
+ early_iounmap((void *)via, resource_size(&rsrc));
return 1;
}
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 9c62c2c3b3d0..4f3d6a2f9065 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -637,7 +637,7 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
nid = first_online_node;
/* Add the memory */
- rc = __add_memory(nid, lmb->base_addr, block_sz, MHP_NONE);
+ rc = __add_memory(nid, lmb->base_addr, block_sz, MHP_MEMMAP_ON_MEMORY);
if (rc) {
invalidate_lmb_associativity_index(lmb);
return rc;
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
index 9a44a98ba342..3fbc2a6aa319 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -744,6 +744,12 @@ static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
}
task_ref = &win->vas_win.task_ref;
+ /*
+ * VAS mmap (coproc_mmap()) and its fault handler
+ * (vas_mmap_fault()) are called after holding mmap lock.
+ * So hold mmap mutex after mmap_lock to avoid deadlock.
+ */
+ mmap_write_lock(task_ref->mm);
mutex_lock(&task_ref->mmap_mutex);
vma = task_ref->vma;
/*
@@ -752,7 +758,6 @@ static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
*/
win->vas_win.status |= flag;
- mmap_write_lock(task_ref->mm);
/*
* vma is set in the original mapping. But this mapping
* is done with mmap() after the window is opened with ioctl.
@@ -762,8 +767,8 @@ static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
if (vma)
zap_vma_pages(vma);
- mmap_write_unlock(task_ref->mm);
mutex_unlock(&task_ref->mmap_mutex);
+ mmap_write_unlock(task_ref->mm);
/*
* Close VAS window in the hypervisor, but do not
* free vas_window struct since it may be reused
diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index 68709743450e..c11771542bec 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -23,7 +23,6 @@
#include <linux/phy.h>
#include <linux/spi/spi.h>
#include <linux/fsl_devices.h>
-#include <linux/fs_enet_pd.h>
#include <linux/fs_uart_pd.h>
#include <linux/reboot.h>
@@ -37,8 +36,6 @@
#include <asm/cpm2.h>
#include <asm/fsl_hcalls.h> /* For the Freescale hypervisor */
-extern void init_fcc_ioports(struct fs_platform_info*);
-extern void init_fec_ioports(struct fs_platform_info*);
extern void init_smc_ioports(struct fs_uart_platform_info*);
static phys_addr_t immrbase = -1;
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index fae747cc57d2..ee17270d35d0 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1084,7 +1084,7 @@ cmds(struct pt_regs *excp)
memzcan();
break;
case 'i':
- show_mem(0, NULL);
+ show_mem();
break;
default:
termch = cmd;