summaryrefslogtreecommitdiff
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/include/asm/gmap.h2
-rw-r--r--arch/s390/include/asm/gmap_helpers.h15
-rw-r--r--arch/s390/include/asm/nospec-branch.h4
-rw-r--r--arch/s390/include/asm/pci_dma.h3
-rw-r--r--arch/s390/include/asm/pgalloc.h2
-rw-r--r--arch/s390/include/asm/pgtable.h11
-rw-r--r--arch/s390/include/asm/syscall.h21
-rw-r--r--arch/s390/include/asm/tlb.h5
-rw-r--r--arch/s390/include/asm/uv.h1
-rw-r--r--arch/s390/kernel/uv.c97
-rw-r--r--arch/s390/kvm/Makefile2
-rw-r--r--arch/s390/kvm/diag.c30
-rw-r--r--arch/s390/kvm/gaccess.c3
-rw-r--r--arch/s390/kvm/gmap-vsie.c1
-rw-r--r--arch/s390/kvm/gmap.c121
-rw-r--r--arch/s390/kvm/gmap.h39
-rw-r--r--arch/s390/kvm/intercept.c9
-rw-r--r--arch/s390/kvm/kvm-s390.c10
-rw-r--r--arch/s390/kvm/kvm-s390.h42
-rw-r--r--arch/s390/kvm/priv.c6
-rw-r--r--arch/s390/kvm/pv.c61
-rw-r--r--arch/s390/kvm/vsie.c19
-rw-r--r--arch/s390/lib/crypto/Makefile3
-rw-r--r--arch/s390/mm/Makefile2
-rw-r--r--arch/s390/mm/dump_pagetables.c46
-rw-r--r--arch/s390/mm/fault.c1
-rw-r--r--arch/s390/mm/gmap.c185
-rw-r--r--arch/s390/mm/gmap_helpers.c221
-rw-r--r--arch/s390/mm/init.c1
-rw-r--r--arch/s390/mm/pgalloc.c4
-rw-r--r--arch/s390/mm/pgtable.c1
-rw-r--r--arch/s390/net/bpf_jit_comp.c138
32 files changed, 620 insertions, 486 deletions
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index 9f2814d0e1e9..66c5808fd011 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -110,7 +110,6 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
-void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
void __gmap_zap(struct gmap *, unsigned long gaddr);
void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr);
@@ -134,7 +133,6 @@ int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
unsigned long gaddr, unsigned long vmaddr);
-int s390_disable_cow_sharing(void);
int s390_replace_asce(struct gmap *gmap);
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
diff --git a/arch/s390/include/asm/gmap_helpers.h b/arch/s390/include/asm/gmap_helpers.h
new file mode 100644
index 000000000000..5356446a61c4
--- /dev/null
+++ b/arch/s390/include/asm/gmap_helpers.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Helper functions for KVM guest address space mapping code
+ *
+ * Copyright IBM Corp. 2025
+ */
+
+#ifndef _ASM_S390_GMAP_HELPERS_H
+#define _ASM_S390_GMAP_HELPERS_H
+
+void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr);
+void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end);
+int gmap_helper_disable_cow_sharing(void);
+
+#endif /* _ASM_S390_GMAP_HELPERS_H */
diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h
index 192835a3e24d..c7c96282f011 100644
--- a/arch/s390/include/asm/nospec-branch.h
+++ b/arch/s390/include/asm/nospec-branch.h
@@ -26,8 +26,6 @@ static inline bool nospec_uses_trampoline(void)
return __is_defined(CC_USING_EXPOLINE) && !nospec_disable;
}
-#ifdef CONFIG_EXPOLINE_EXTERN
-
void __s390_indirect_jump_r1(void);
void __s390_indirect_jump_r2(void);
void __s390_indirect_jump_r3(void);
@@ -44,8 +42,6 @@ void __s390_indirect_jump_r13(void);
void __s390_indirect_jump_r14(void);
void __s390_indirect_jump_r15(void);
-#endif
-
#endif /* __ASSEMBLY__ */
#endif /* _ASM_S390_EXPOLINE_H */
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 42d7cc4262ca..d12e17201661 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -25,6 +25,7 @@ enum zpci_ioat_dtype {
#define ZPCI_KEY (PAGE_DEFAULT_KEY << 5)
#define ZPCI_TABLE_SIZE_RT (1UL << 42)
+#define ZPCI_TABLE_SIZE_RS (1UL << 53)
#define ZPCI_IOTA_STO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_ST)
#define ZPCI_IOTA_RTTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RT)
@@ -55,6 +56,8 @@ enum zpci_ioat_dtype {
#define ZPCI_PT_BITS 8
#define ZPCI_ST_SHIFT (ZPCI_PT_BITS + PAGE_SHIFT)
#define ZPCI_RT_SHIFT (ZPCI_ST_SHIFT + ZPCI_TABLE_BITS)
+#define ZPCI_RS_SHIFT (ZPCI_RT_SHIFT + ZPCI_TABLE_BITS)
+#define ZPCI_RF_SHIFT (ZPCI_RS_SHIFT + ZPCI_TABLE_BITS)
#define ZPCI_RTE_FLAG_MASK 0x3fffUL
#define ZPCI_RTE_ADDR_MASK (~ZPCI_RTE_FLAG_MASK)
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 005497ffebda..5345398df653 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -97,7 +97,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
if (!table)
return NULL;
crst_table_init(table, _SEGMENT_ENTRY_EMPTY);
- if (!pagetable_pmd_ctor(virt_to_ptdesc(table))) {
+ if (!pagetable_pmd_ctor(mm, virt_to_ptdesc(table))) {
crst_table_free(mm, table);
return NULL;
}
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index f8a6b54986ec..1c661ac62ce8 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1448,16 +1448,6 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
return pte_mkyoung(__pte);
}
-static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
-{
- unsigned long physpage = page_to_phys(page);
- pte_t __pte = mk_pte_phys(physpage, pgprot);
-
- if (pte_write(__pte) && PageDirty(page))
- __pte = pte_mkdirty(__pte);
- return __pte;
-}
-
#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
#define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
@@ -1879,7 +1869,6 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
#define pmdp_collapse_flush pmdp_collapse_flush
#define pfn_pmd(pfn, pgprot) mk_pmd_phys(((pfn) << PAGE_SHIFT), (pgprot))
-#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot))
static inline int pmd_trans_huge(pmd_t pmd)
{
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index 0213ec800b57..bd4cb00ccd5e 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -24,6 +24,18 @@ static inline long syscall_get_nr(struct task_struct *task,
(regs->int_code & 0xffff) : -1;
}
+static inline void syscall_set_nr(struct task_struct *task,
+ struct pt_regs *regs,
+ int nr)
+{
+ /*
+ * Unlike syscall_get_nr(), syscall_set_nr() can be called only when
+ * the target task is stopped for tracing on entering syscall, so
+ * there is no need to have the same check syscall_get_nr() has.
+ */
+ regs->int_code = (regs->int_code & ~0xffff) | (nr & 0xffff);
+}
+
static inline void syscall_rollback(struct task_struct *task,
struct pt_regs *regs)
{
@@ -76,6 +88,15 @@ static inline void syscall_get_arguments(struct task_struct *task,
args[0] = regs->orig_gpr2 & mask;
}
+static inline void syscall_set_arguments(struct task_struct *task,
+ struct pt_regs *regs,
+ const unsigned long *args)
+{
+ regs->orig_gpr2 = args[0];
+ for (int n = 1; n < 6; n++)
+ regs->gprs[2 + n] = args[n];
+}
+
static inline int syscall_get_arch(struct task_struct *task)
{
#ifdef CONFIG_COMPAT
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index f20601995bb0..1e50f6f1ad9d 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -36,11 +36,12 @@ static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb,
#include <asm/tlbflush.h>
#include <asm-generic/tlb.h>
+#include <asm/gmap.h>
/*
* Release the page cache reference for a pte removed by
* tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page
- * has already been freed, so just do free_page_and_swap_cache.
+ * has already been freed, so just do free_folio_and_swap_cache.
*
* s390 doesn't delay rmap removal.
*/
@@ -49,7 +50,7 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
{
VM_WARN_ON_ONCE(delay_rmap);
- free_page_and_swap_cache(page);
+ free_folio_and_swap_cache(page_folio(page));
return false;
}
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index b008402ec9aa..8018549a1ad2 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -16,7 +16,6 @@
#include <linux/bug.h>
#include <linux/sched.h>
#include <asm/page.h>
-#include <asm/gmap.h>
#include <asm/asm.h>
#define UVC_CC_OK 0
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 4ab0b6b4866e..b99478e84da4 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -15,6 +15,7 @@
#include <linux/pagemap.h>
#include <linux/swap.h>
#include <linux/pagewalk.h>
+#include <linux/backing-dev.h>
#include <asm/facility.h>
#include <asm/sections.h>
#include <asm/uv.h>
@@ -135,7 +136,7 @@ int uv_destroy_folio(struct folio *folio)
{
int rc;
- /* See gmap_make_secure(): large folios cannot be secure */
+ /* Large folios cannot be secure */
if (unlikely(folio_test_large(folio)))
return 0;
@@ -184,7 +185,7 @@ int uv_convert_from_secure_folio(struct folio *folio)
{
int rc;
- /* See gmap_make_secure(): large folios cannot be secure */
+ /* Large folios cannot be secure */
if (unlikely(folio_test_large(folio)))
return 0;
@@ -324,32 +325,87 @@ static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct u
}
/**
- * s390_wiggle_split_folio() - try to drain extra references to a folio and optionally split.
+ * s390_wiggle_split_folio() - try to drain extra references to a folio and
+ * split the folio if it is large.
* @mm: the mm containing the folio to work on
* @folio: the folio
- * @split: whether to split a large folio
*
* Context: Must be called while holding an extra reference to the folio;
* the mm lock should not be held.
- * Return: 0 if the folio was split successfully;
- * -EAGAIN if the folio was not split successfully but another attempt
- * can be made, or if @split was set to false;
- * -EINVAL in case of other errors. See split_folio().
+ * Return: 0 if the operation was successful;
+ * -EAGAIN if splitting the large folio was not successful,
+ * but another attempt can be made;
+ * -EINVAL in case of other folio splitting errors. See split_folio().
*/
-static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split)
+static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio)
{
- int rc;
+ int rc, tried_splits;
lockdep_assert_not_held(&mm->mmap_lock);
folio_wait_writeback(folio);
lru_add_drain_all();
- if (split) {
+
+ if (!folio_test_large(folio))
+ return 0;
+
+ for (tried_splits = 0; tried_splits < 2; tried_splits++) {
+ struct address_space *mapping;
+ loff_t lstart, lend;
+ struct inode *inode;
+
folio_lock(folio);
rc = split_folio(folio);
+ if (rc != -EBUSY) {
+ folio_unlock(folio);
+ return rc;
+ }
+
+ /*
+ * Splitting with -EBUSY can fail for various reasons, but we
+ * have to handle one case explicitly for now: some mappings
+ * don't allow for splitting dirty folios; writeback will
+ * mark them clean again, including marking all page table
+ * entries mapping the folio read-only, to catch future write
+ * attempts.
+ *
+ * While the system should be writing back dirty folios in the
+ * background, we obtained this folio by looking up a writable
+ * page table entry. On these problematic mappings, writable
+ * page table entries imply dirty folios, preventing the
+ * split in the first place.
+ *
+ * To prevent a livelock when trigger writeback manually and
+ * letting the caller look up the folio again in the page
+ * table (turning it dirty), immediately try to split again.
+ *
+ * This is only a problem for some mappings (e.g., XFS);
+ * mappings that do not support writeback (e.g., shmem) do not
+ * apply.
+ */
+ if (!folio_test_dirty(folio) || folio_test_anon(folio) ||
+ !folio->mapping || !mapping_can_writeback(folio->mapping)) {
+ folio_unlock(folio);
+ break;
+ }
+
+ /*
+ * Ideally, we'd only trigger writeback on this exact folio. But
+ * there is no easy way to do that, so we'll stabilize the
+ * mapping while we still hold the folio lock, so we can drop
+ * the folio lock to trigger writeback on the range currently
+ * covered by the folio instead.
+ */
+ mapping = folio->mapping;
+ lstart = folio_pos(folio);
+ lend = lstart + folio_size(folio) - 1;
+ inode = igrab(mapping->host);
folio_unlock(folio);
- if (rc != -EBUSY)
- return rc;
+ if (unlikely(!inode))
+ break;
+
+ filemap_write_and_wait_range(mapping, lstart, lend);
+ iput(mapping->host);
}
return -EAGAIN;
}
@@ -393,8 +449,11 @@ int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header
folio_walk_end(&fw, vma);
mmap_read_unlock(mm);
- if (rc == -E2BIG || rc == -EBUSY)
- rc = s390_wiggle_split_folio(mm, folio, rc == -E2BIG);
+ if (rc == -E2BIG || rc == -EBUSY) {
+ rc = s390_wiggle_split_folio(mm, folio);
+ if (!rc)
+ rc = -EAGAIN;
+ }
folio_put(folio);
return rc;
@@ -403,15 +462,15 @@ EXPORT_SYMBOL_GPL(make_hva_secure);
/*
* To be called with the folio locked or with an extra reference! This will
- * prevent gmap_make_secure from touching the folio concurrently. Having 2
- * parallel arch_make_folio_accessible is fine, as the UV calls will become a
- * no-op if the folio is already exported.
+ * prevent kvm_s390_pv_make_secure() from touching the folio concurrently.
+ * Having 2 parallel arch_make_folio_accessible is fine, as the UV calls will
+ * become a no-op if the folio is already exported.
*/
int arch_make_folio_accessible(struct folio *folio)
{
int rc = 0;
- /* See gmap_make_secure(): large folios cannot be secure */
+ /* Large folios cannot be secure */
if (unlikely(folio_test_large(folio)))
return 0;
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index f0ffe874adc2..9a723c48b05a 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -8,7 +8,7 @@ include $(srctree)/virt/kvm/Makefile.kvm
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
-kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap.o gmap-vsie.o
+kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap-vsie.o
kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o
obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 74f73141f9b9..53233dec8cad 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -11,12 +11,30 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <asm/gmap.h>
+#include <asm/gmap_helpers.h>
#include <asm/virtio-ccw.h>
#include "kvm-s390.h"
#include "trace.h"
#include "trace-s390.h"
#include "gaccess.h"
+static void do_discard_gfn_range(struct kvm_vcpu *vcpu, gfn_t gfn_start, gfn_t gfn_end)
+{
+ struct kvm_memslot_iter iter;
+ struct kvm_memory_slot *slot;
+ struct kvm_memslots *slots;
+ unsigned long start, end;
+
+ slots = kvm_vcpu_memslots(vcpu);
+
+ kvm_for_each_memslot_in_gfn_range(&iter, slots, gfn_start, gfn_end) {
+ slot = iter.slot;
+ start = __gfn_to_hva_memslot(slot, max(gfn_start, slot->base_gfn));
+ end = __gfn_to_hva_memslot(slot, min(gfn_end, slot->base_gfn + slot->npages));
+ gmap_helper_discard(vcpu->kvm->mm, start, end);
+ }
+}
+
static int diag_release_pages(struct kvm_vcpu *vcpu)
{
unsigned long start, end;
@@ -32,12 +50,13 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end);
+ mmap_read_lock(vcpu->kvm->mm);
/*
* We checked for start >= end above, so lets check for the
* fast path (no prefix swap page involved)
*/
if (end <= prefix || start >= prefix + 2 * PAGE_SIZE) {
- gmap_discard(vcpu->arch.gmap, start, end);
+ do_discard_gfn_range(vcpu, gpa_to_gfn(start), gpa_to_gfn(end));
} else {
/*
* This is slow path. gmap_discard will check for start
@@ -45,13 +64,14 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
* prefix and let gmap_discard make some of these calls
* NOPs.
*/
- gmap_discard(vcpu->arch.gmap, start, prefix);
+ do_discard_gfn_range(vcpu, gpa_to_gfn(start), gpa_to_gfn(prefix));
if (start <= prefix)
- gmap_discard(vcpu->arch.gmap, 0, PAGE_SIZE);
+ do_discard_gfn_range(vcpu, 0, 1);
if (end > prefix + PAGE_SIZE)
- gmap_discard(vcpu->arch.gmap, PAGE_SIZE, 2 * PAGE_SIZE);
- gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end);
+ do_discard_gfn_range(vcpu, 1, 2);
+ do_discard_gfn_range(vcpu, gpa_to_gfn(prefix) + 2, gpa_to_gfn(end));
}
+ mmap_read_unlock(vcpu->kvm->mm);
return 0;
}
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index f6fded15633a..e23670e1949c 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -16,9 +16,10 @@
#include <asm/gmap.h>
#include <asm/dat-bits.h>
#include "kvm-s390.h"
-#include "gmap.h"
#include "gaccess.h"
+#define GMAP_SHADOW_FAKE_TABLE 1ULL
+
/*
* vaddress union in order to easily decode a virtual address into its
* region first index, region second index etc. parts.
diff --git a/arch/s390/kvm/gmap-vsie.c b/arch/s390/kvm/gmap-vsie.c
index a6d1dbb04c97..56ef153eb8fe 100644
--- a/arch/s390/kvm/gmap-vsie.c
+++ b/arch/s390/kvm/gmap-vsie.c
@@ -22,7 +22,6 @@
#include <asm/uv.h>
#include "kvm-s390.h"
-#include "gmap.h"
/**
* gmap_find_shadow - find a specific asce in the list of shadow tables
diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c
deleted file mode 100644
index 6d8944d1b4a0..000000000000
--- a/arch/s390/kvm/gmap.c
+++ /dev/null
@@ -1,121 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Guest memory management for KVM/s390
- *
- * Copyright IBM Corp. 2008, 2020, 2024
- *
- * Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
- * Martin Schwidefsky <schwidefsky@de.ibm.com>
- * David Hildenbrand <david@redhat.com>
- * Janosch Frank <frankja@linux.vnet.ibm.com>
- */
-
-#include <linux/compiler.h>
-#include <linux/kvm.h>
-#include <linux/kvm_host.h>
-#include <linux/pgtable.h>
-#include <linux/pagemap.h>
-
-#include <asm/lowcore.h>
-#include <asm/gmap.h>
-#include <asm/uv.h>
-
-#include "gmap.h"
-
-/**
- * gmap_make_secure() - make one guest page secure
- * @gmap: the guest gmap
- * @gaddr: the guest address that needs to be made secure
- * @uvcb: the UVCB specifying which operation needs to be performed
- *
- * Context: needs to be called with kvm->srcu held.
- * Return: 0 on success, < 0 in case of error.
- */
-int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
-{
- struct kvm *kvm = gmap->private;
- unsigned long vmaddr;
-
- lockdep_assert_held(&kvm->srcu);
-
- vmaddr = gfn_to_hva(kvm, gpa_to_gfn(gaddr));
- if (kvm_is_error_hva(vmaddr))
- return -EFAULT;
- return make_hva_secure(gmap->mm, vmaddr, uvcb);
-}
-
-int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
-{
- struct uv_cb_cts uvcb = {
- .header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
- .header.len = sizeof(uvcb),
- .guest_handle = gmap->guest_handle,
- .gaddr = gaddr,
- };
-
- return gmap_make_secure(gmap, gaddr, &uvcb);
-}
-
-/**
- * __gmap_destroy_page() - Destroy a guest page.
- * @gmap: the gmap of the guest
- * @page: the page to destroy
- *
- * An attempt will be made to destroy the given guest page. If the attempt
- * fails, an attempt is made to export the page. If both attempts fail, an
- * appropriate error is returned.
- *
- * Context: must be called holding the mm lock for gmap->mm
- */
-static int __gmap_destroy_page(struct gmap *gmap, struct page *page)
-{
- struct folio *folio = page_folio(page);
- int rc;
-
- /*
- * See gmap_make_secure(): large folios cannot be secure. Small
- * folio implies FW_LEVEL_PTE.
- */
- if (folio_test_large(folio))
- return -EFAULT;
-
- rc = uv_destroy_folio(folio);
- /*
- * Fault handlers can race; it is possible that two CPUs will fault
- * on the same secure page. One CPU can destroy the page, reboot,
- * re-enter secure mode and import it, while the second CPU was
- * stuck at the beginning of the handler. At some point the second
- * CPU will be able to progress, and it will not be able to destroy
- * the page. In that case we do not want to terminate the process,
- * we instead try to export the page.
- */
- if (rc)
- rc = uv_convert_from_secure_folio(folio);
-
- return rc;
-}
-
-/**
- * gmap_destroy_page() - Destroy a guest page.
- * @gmap: the gmap of the guest
- * @gaddr: the guest address to destroy
- *
- * An attempt will be made to destroy the given guest page. If the attempt
- * fails, an attempt is made to export the page. If both attempts fail, an
- * appropriate error is returned.
- *
- * Context: may sleep.
- */
-int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
-{
- struct page *page;
- int rc = 0;
-
- mmap_read_lock(gmap->mm);
- page = gfn_to_page(gmap->private, gpa_to_gfn(gaddr));
- if (page)
- rc = __gmap_destroy_page(gmap, page);
- kvm_release_page_clean(page);
- mmap_read_unlock(gmap->mm);
- return rc;
-}
diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h
deleted file mode 100644
index c8f031c9ea5f..000000000000
--- a/arch/s390/kvm/gmap.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * KVM guest address space mapping code
- *
- * Copyright IBM Corp. 2007, 2016, 2025
- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- * Claudio Imbrenda <imbrenda@linux.ibm.com>
- */
-
-#ifndef ARCH_KVM_S390_GMAP_H
-#define ARCH_KVM_S390_GMAP_H
-
-#define GMAP_SHADOW_FAKE_TABLE 1ULL
-
-int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
-int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
-int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
-struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level);
-
-/**
- * gmap_shadow_valid - check if a shadow guest address space matches the
- * given properties and is still valid
- * @sg: pointer to the shadow guest address space structure
- * @asce: ASCE for which the shadow table is requested
- * @edat_level: edat level to be used for the shadow translation
- *
- * Returns 1 if the gmap shadow is still valid and matches the given
- * properties, the caller can continue using it. Returns 0 otherwise, the
- * caller has to request a new shadow gmap in this case.
- *
- */
-static inline int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
-{
- if (sg->removed)
- return 0;
- return sg->orig_asce == asce && sg->edat_level == edat_level;
-}
-
-#endif
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index a06a000f196c..c7908950c1f4 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -21,7 +21,6 @@
#include "gaccess.h"
#include "trace.h"
#include "trace-s390.h"
-#include "gmap.h"
u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
{
@@ -545,7 +544,7 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu)
guest_uvcb->header.cmd);
return 0;
}
- rc = gmap_make_secure(vcpu->arch.gmap, uvcb.gaddr, &uvcb);
+ rc = kvm_s390_pv_make_secure(vcpu->kvm, uvcb.gaddr, &uvcb);
/*
* If the unpin did not succeed, the guest will exit again for the UVC
* and we will retry the unpin.
@@ -653,10 +652,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
break;
case ICPT_PV_PREF:
rc = 0;
- gmap_convert_to_secure(vcpu->arch.gmap,
- kvm_s390_get_prefix(vcpu));
- gmap_convert_to_secure(vcpu->arch.gmap,
- kvm_s390_get_prefix(vcpu) + PAGE_SIZE);
+ kvm_s390_pv_convert_to_secure(vcpu->kvm, kvm_s390_get_prefix(vcpu));
+ kvm_s390_pv_convert_to_secure(vcpu->kvm, kvm_s390_get_prefix(vcpu) + PAGE_SIZE);
break;
default:
return -EOPNOTSUPP;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 3f3175193fd7..d5ad10791c25 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -40,6 +40,7 @@
#include <asm/machine.h>
#include <asm/stp.h>
#include <asm/gmap.h>
+#include <asm/gmap_helpers.h>
#include <asm/nmi.h>
#include <asm/isc.h>
#include <asm/sclp.h>
@@ -52,7 +53,6 @@
#include "kvm-s390.h"
#include "gaccess.h"
#include "pci.h"
-#include "gmap.h"
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -2674,7 +2674,9 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
if (r)
break;
- r = s390_disable_cow_sharing();
+ mmap_write_lock(kvm->mm);
+ r = gmap_helper_disable_cow_sharing();
+ mmap_write_unlock(kvm->mm);
if (r)
break;
@@ -4973,7 +4975,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
* previous protected guest. The old pages need to be destroyed
* so the new guest can use them.
*/
- if (gmap_destroy_page(vcpu->arch.gmap, gaddr)) {
+ if (kvm_s390_pv_destroy_page(vcpu->kvm, gaddr)) {
/*
* Either KVM messed up the secure guest mapping or the
* same page is mapped into multiple secure guests.
@@ -4995,7 +4997,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
* guest has not been imported yet. Try to import the page into
* the protected guest.
*/
- rc = gmap_convert_to_secure(vcpu->arch.gmap, gaddr);
+ rc = kvm_s390_pv_convert_to_secure(vcpu->kvm, gaddr);
if (rc == -EINVAL)
send_sig(SIGSEGV, current, 0);
if (rc != -ENXIO)
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 8d3bbb2dd8d2..c44fe0c3a097 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -308,6 +308,9 @@ int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc);
int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
u16 *rc, u16 *rrc);
+int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr);
+int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr);
+int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb);
static inline u64 kvm_s390_pv_get_handle(struct kvm *kvm)
{
@@ -319,6 +322,41 @@ static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu)
return vcpu->arch.pv.handle;
}
+/**
+ * __kvm_s390_pv_destroy_page() - Destroy a guest page.
+ * @page: the page to destroy
+ *
+ * An attempt will be made to destroy the given guest page. If the attempt
+ * fails, an attempt is made to export the page. If both attempts fail, an
+ * appropriate error is returned.
+ *
+ * Context: must be called holding the mm lock for gmap->mm
+ */
+static inline int __kvm_s390_pv_destroy_page(struct page *page)
+{
+ struct folio *folio = page_folio(page);
+ int rc;
+
+ /* Large folios cannot be secure. Small folio implies FW_LEVEL_PTE. */
+ if (folio_test_large(folio))
+ return -EFAULT;
+
+ rc = uv_destroy_folio(folio);
+ /*
+ * Fault handlers can race; it is possible that two CPUs will fault
+ * on the same secure page. One CPU can destroy the page, reboot,
+ * re-enter secure mode and import it, while the second CPU was
+ * stuck at the beginning of the handler. At some point the second
+ * CPU will be able to progress, and it will not be able to destroy
+ * the page. In that case we do not want to terminate the process,
+ * we instead try to export the page.
+ */
+ if (rc)
+ rc = uv_convert_from_secure_folio(folio);
+
+ return rc;
+}
+
/* implemented in interrupt.c */
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
@@ -398,6 +436,10 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
unsigned long end);
void kvm_s390_vsie_init(struct kvm *kvm);
void kvm_s390_vsie_destroy(struct kvm *kvm);
+int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level);
+
+/* implemented in gmap-vsie.c */
+struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level);
/* implemented in sigp.c */
int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 1a49b89706f8..9253c70897a8 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -1248,6 +1248,8 @@ static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc)
static int handle_essa(struct kvm_vcpu *vcpu)
{
+ lockdep_assert_held(&vcpu->kvm->srcu);
+
/* entries expected to be 1FF */
int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
unsigned long *cbrlo;
@@ -1297,12 +1299,8 @@ static int handle_essa(struct kvm_vcpu *vcpu)
/* Retry the ESSA instruction */
kvm_s390_retry_instr(vcpu);
} else {
- int srcu_idx;
-
mmap_read_lock(vcpu->kvm->mm);
- srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
i = __do_essa(vcpu, orc);
- srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
mmap_read_unlock(vcpu->kvm->mm);
if (i < 0)
return i;
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 22c012aa5206..14c330ec8ceb 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -17,7 +17,6 @@
#include <linux/sched/mm.h>
#include <linux/mmu_notifier.h>
#include "kvm-s390.h"
-#include "gmap.h"
bool kvm_s390_pv_is_protected(struct kvm *kvm)
{
@@ -34,6 +33,64 @@ bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
/**
+ * kvm_s390_pv_make_secure() - make one guest page secure
+ * @kvm: the guest
+ * @gaddr: the guest address that needs to be made secure
+ * @uvcb: the UVCB specifying which operation needs to be performed
+ *
+ * Context: needs to be called with kvm->srcu held.
+ * Return: 0 on success, < 0 in case of error.
+ */
+int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb)
+{
+ unsigned long vmaddr;
+
+ lockdep_assert_held(&kvm->srcu);
+
+ vmaddr = gfn_to_hva(kvm, gpa_to_gfn(gaddr));
+ if (kvm_is_error_hva(vmaddr))
+ return -EFAULT;
+ return make_hva_secure(kvm->mm, vmaddr, uvcb);
+}
+
+int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr)
+{
+ struct uv_cb_cts uvcb = {
+ .header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
+ .header.len = sizeof(uvcb),
+ .guest_handle = kvm_s390_pv_get_handle(kvm),
+ .gaddr = gaddr,
+ };
+
+ return kvm_s390_pv_make_secure(kvm, gaddr, &uvcb);
+}
+
+/**
+ * kvm_s390_pv_destroy_page() - Destroy a guest page.
+ * @kvm: the guest
+ * @gaddr: the guest address to destroy
+ *
+ * An attempt will be made to destroy the given guest page. If the attempt
+ * fails, an attempt is made to export the page. If both attempts fail, an
+ * appropriate error is returned.
+ *
+ * Context: may sleep.
+ */
+int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr)
+{
+ struct page *page;
+ int rc = 0;
+
+ mmap_read_lock(kvm->mm);
+ page = gfn_to_page(kvm, gpa_to_gfn(gaddr));
+ if (page)
+ rc = __kvm_s390_pv_destroy_page(page);
+ kvm_release_page_clean(page);
+ mmap_read_unlock(kvm->mm);
+ return rc;
+}
+
+/**
* struct pv_vm_to_be_destroyed - Represents a protected VM that needs to
* be destroyed
*
@@ -638,7 +695,7 @@ static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
.tweak[0] = tweak,
.tweak[1] = offset,
};
- int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb);
+ int ret = kvm_s390_pv_make_secure(kvm, addr, &uvcb);
unsigned long vmaddr;
bool unlocked;
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index a78df3a4f353..13a9661d2b28 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -23,7 +23,6 @@
#include <asm/facility.h>
#include "kvm-s390.h"
#include "gaccess.h"
-#include "gmap.h"
enum vsie_page_flags {
VSIE_PAGE_IN_USE = 0,
@@ -68,6 +67,24 @@ struct vsie_page {
__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */
};
+/**
+ * gmap_shadow_valid() - check if a shadow guest address space matches the
+ * given properties and is still valid
+ * @sg: pointer to the shadow guest address space structure
+ * @asce: ASCE for which the shadow table is requested
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * Returns 1 if the gmap shadow is still valid and matches the given
+ * properties, the caller can continue using it. Returns 0 otherwise; the
+ * caller has to request a new shadow gmap in this case.
+ */
+int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
+{
+ if (sg->removed)
+ return 0;
+ return sg->orig_asce == asce && sg->edat_level == edat_level;
+}
+
/* trigger a validity icpt for the given scb */
static int set_validity_icpt(struct kvm_s390_sie_block *scb,
__u16 reason_code)
diff --git a/arch/s390/lib/crypto/Makefile b/arch/s390/lib/crypto/Makefile
index 920197967f46..5df30f1e7930 100644
--- a/arch/s390/lib/crypto/Makefile
+++ b/arch/s390/lib/crypto/Makefile
@@ -3,4 +3,5 @@
obj-$(CONFIG_CRYPTO_CHACHA_S390) += chacha_s390.o
chacha_s390-y := chacha-glue.o chacha-s390.o
-obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256.o
+obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256-s390.o
+sha256-s390-y := sha256.o
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 9726b91fe7e4..bd0401cc7ca5 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -12,3 +12,5 @@ obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_PTDUMP) += dump_pagetables.o
obj-$(CONFIG_PGSTE) += gmap.o
obj-$(CONFIG_PFAULT) += pfault.o
+
+obj-$(subst m,y,$(CONFIG_KVM)) += gmap_helpers.o
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index d3e943752fa0..ac604b176660 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -147,11 +147,48 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
}
}
+static void note_page_pte(struct ptdump_state *pt_st, unsigned long addr, pte_t pte)
+{
+ note_page(pt_st, addr, 4, pte_val(pte));
+}
+
+static void note_page_pmd(struct ptdump_state *pt_st, unsigned long addr, pmd_t pmd)
+{
+ note_page(pt_st, addr, 3, pmd_val(pmd));
+}
+
+static void note_page_pud(struct ptdump_state *pt_st, unsigned long addr, pud_t pud)
+{
+ note_page(pt_st, addr, 2, pud_val(pud));
+}
+
+static void note_page_p4d(struct ptdump_state *pt_st, unsigned long addr, p4d_t p4d)
+{
+ note_page(pt_st, addr, 1, p4d_val(p4d));
+}
+
+static void note_page_pgd(struct ptdump_state *pt_st, unsigned long addr, pgd_t pgd)
+{
+ note_page(pt_st, addr, 0, pgd_val(pgd));
+}
+
+static void note_page_flush(struct ptdump_state *pt_st)
+{
+ pte_t pte_zero = {0};
+
+ note_page(pt_st, 0, -1, pte_val(pte_zero));
+}
+
bool ptdump_check_wx(void)
{
struct pg_state st = {
.ptdump = {
- .note_page = note_page,
+ .note_page_pte = note_page_pte,
+ .note_page_pmd = note_page_pmd,
+ .note_page_pud = note_page_pud,
+ .note_page_p4d = note_page_p4d,
+ .note_page_pgd = note_page_pgd,
+ .note_page_flush = note_page_flush,
.range = (struct ptdump_range[]) {
{.start = 0, .end = max_addr},
{.start = 0, .end = 0},
@@ -190,7 +227,12 @@ static int ptdump_show(struct seq_file *m, void *v)
{
struct pg_state st = {
.ptdump = {
- .note_page = note_page,
+ .note_page_pte = note_page_pte,
+ .note_page_pmd = note_page_pmd,
+ .note_page_pud = note_page_pud,
+ .note_page_p4d = note_page_p4d,
+ .note_page_pgd = note_page_pgd,
+ .note_page_flush = note_page_flush,
.range = (struct ptdump_range[]) {
{.start = 0, .end = max_addr},
{.start = 0, .end = 0},
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index da84ff6770de..3829521450dd 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -40,7 +40,6 @@
#include <asm/ptrace.h>
#include <asm/fault.h>
#include <asm/diag.h>
-#include <asm/gmap.h>
#include <asm/irq.h>
#include <asm/facility.h>
#include <asm/uv.h>
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index a94bd4870c65..012a4366a2ad 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -22,9 +22,9 @@
#include <asm/page-states.h>
#include <asm/pgalloc.h>
#include <asm/machine.h>
+#include <asm/gmap_helpers.h>
#include <asm/gmap.h>
#include <asm/page.h>
-#include <asm/tlb.h>
/*
* The address is saved in a radix tree directly; NULL would be ambiguous,
@@ -620,63 +620,20 @@ EXPORT_SYMBOL(__gmap_link);
*/
void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
{
- struct vm_area_struct *vma;
unsigned long vmaddr;
- spinlock_t *ptl;
- pte_t *ptep;
+
+ mmap_assert_locked(gmap->mm);
/* Find the vm address for the guest address */
vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
gaddr >> PMD_SHIFT);
if (vmaddr) {
vmaddr |= gaddr & ~PMD_MASK;
-
- vma = vma_lookup(gmap->mm, vmaddr);
- if (!vma || is_vm_hugetlb_page(vma))
- return;
-
- /* Get pointer to the page table entry */
- ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
- if (likely(ptep)) {
- ptep_zap_unused(gmap->mm, vmaddr, ptep, 0);
- pte_unmap_unlock(ptep, ptl);
- }
+ gmap_helper_zap_one_page(gmap->mm, vmaddr);
}
}
EXPORT_SYMBOL_GPL(__gmap_zap);
-void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
-{
- unsigned long gaddr, vmaddr, size;
- struct vm_area_struct *vma;
-
- mmap_read_lock(gmap->mm);
- for (gaddr = from; gaddr < to;
- gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
- /* Find the vm address for the guest address */
- vmaddr = (unsigned long)
- radix_tree_lookup(&gmap->guest_to_host,
- gaddr >> PMD_SHIFT);
- if (!vmaddr)
- continue;
- vmaddr |= gaddr & ~PMD_MASK;
- /* Find vma in the parent mm */
- vma = find_vma(gmap->mm, vmaddr);
- if (!vma)
- continue;
- /*
- * We do not discard pages that are backed by
- * hugetlbfs, so we don't have to refault them.
- */
- if (is_vm_hugetlb_page(vma))
- continue;
- size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
- zap_page_range_single(vma, vmaddr, size, NULL);
- }
- mmap_read_unlock(gmap->mm);
-}
-EXPORT_SYMBOL_GPL(gmap_discard);
-
static LIST_HEAD(gmap_notifier_list);
static DEFINE_SPINLOCK(gmap_notifier_lock);
@@ -2269,138 +2226,6 @@ int s390_enable_sie(void)
}
EXPORT_SYMBOL_GPL(s390_enable_sie);
-static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
- unsigned long end, struct mm_walk *walk)
-{
- unsigned long *found_addr = walk->private;
-
- /* Return 1 of the page is a zeropage. */
- if (is_zero_pfn(pte_pfn(*pte))) {
- /*
- * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
- * right thing and likely don't care: FAULT_FLAG_UNSHARE
- * currently only works in COW mappings, which is also where
- * mm_forbids_zeropage() is checked.
- */
- if (!is_cow_mapping(walk->vma->vm_flags))
- return -EFAULT;
-
- *found_addr = addr;
- return 1;
- }
- return 0;
-}
-
-static const struct mm_walk_ops find_zeropage_ops = {
- .pte_entry = find_zeropage_pte_entry,
- .walk_lock = PGWALK_WRLOCK,
-};
-
-/*
- * Unshare all shared zeropages, replacing them by anonymous pages. Note that
- * we cannot simply zap all shared zeropages, because this could later
- * trigger unexpected userfaultfd missing events.
- *
- * This must be called after mm->context.allow_cow_sharing was
- * set to 0, to avoid future mappings of shared zeropages.
- *
- * mm contracts with s390, that even if mm were to remove a page table,
- * and racing with walk_page_range_vma() calling pte_offset_map_lock()
- * would fail, it will never insert a page table containing empty zero
- * pages once mm_forbids_zeropage(mm) i.e.
- * mm->context.allow_cow_sharing is set to 0.
- */
-static int __s390_unshare_zeropages(struct mm_struct *mm)
-{
- struct vm_area_struct *vma;
- VMA_ITERATOR(vmi, mm, 0);
- unsigned long addr;
- vm_fault_t fault;
- int rc;
-
- for_each_vma(vmi, vma) {
- /*
- * We could only look at COW mappings, but it's more future
- * proof to catch unexpected zeropages in other mappings and
- * fail.
- */
- if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
- continue;
- addr = vma->vm_start;
-
-retry:
- rc = walk_page_range_vma(vma, addr, vma->vm_end,
- &find_zeropage_ops, &addr);
- if (rc < 0)
- return rc;
- else if (!rc)
- continue;
-
- /* addr was updated by find_zeropage_pte_entry() */
- fault = handle_mm_fault(vma, addr,
- FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
- NULL);
- if (fault & VM_FAULT_OOM)
- return -ENOMEM;
- /*
- * See break_ksm(): even after handle_mm_fault() returned 0, we
- * must start the lookup from the current address, because
- * handle_mm_fault() may back out if there's any difficulty.
- *
- * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
- * maybe they could trigger in the future on concurrent
- * truncation. In that case, the shared zeropage would be gone
- * and we can simply retry and make progress.
- */
- cond_resched();
- goto retry;
- }
-
- return 0;
-}
-
-static int __s390_disable_cow_sharing(struct mm_struct *mm)
-{
- int rc;
-
- if (!mm->context.allow_cow_sharing)
- return 0;
-
- mm->context.allow_cow_sharing = 0;
-
- /* Replace all shared zeropages by anonymous pages. */
- rc = __s390_unshare_zeropages(mm);
- /*
- * Make sure to disable KSM (if enabled for the whole process or
- * individual VMAs). Note that nothing currently hinders user space
- * from re-enabling it.
- */
- if (!rc)
- rc = ksm_disable(mm);
- if (rc)
- mm->context.allow_cow_sharing = 1;
- return rc;
-}
-
-/*
- * Disable most COW-sharing of memory pages for the whole process:
- * (1) Disable KSM and unmerge/unshare any KSM pages.
- * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
- *
- * Not that we currently don't bother with COW-shared pages that are shared
- * with parent/child processes due to fork().
- */
-int s390_disable_cow_sharing(void)
-{
- int rc;
-
- mmap_write_lock(current->mm);
- rc = __s390_disable_cow_sharing(current->mm);
- mmap_write_unlock(current->mm);
- return rc;
-}
-EXPORT_SYMBOL_GPL(s390_disable_cow_sharing);
-
/*
* Enable storage key handling from now on and initialize the storage
* keys with the default key.
@@ -2468,7 +2293,7 @@ int s390_enable_skey(void)
goto out_up;
mm->context.uses_skeys = 1;
- rc = __s390_disable_cow_sharing(mm);
+ rc = gmap_helper_disable_cow_sharing();
if (rc) {
mm->context.uses_skeys = 0;
goto out_up;
diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c
new file mode 100644
index 000000000000..a45d417ad951
--- /dev/null
+++ b/arch/s390/mm/gmap_helpers.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Helper functions for KVM guest address space mapping code
+ *
+ * Copyright IBM Corp. 2007, 2025
+ */
+#include <linux/mm_types.h>
+#include <linux/mmap_lock.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/pagewalk.h>
+#include <linux/ksm.h>
+#include <asm/gmap_helpers.h>
+
+/**
+ * ptep_zap_swap_entry() - discard a swap entry.
+ * @mm: the mm
+ * @entry: the swap entry that needs to be zapped
+ *
+ * Discards the given swap entry. If the swap entry was an actual swap
+ * entry (and not a migration entry, for example), the actual swapped
+ * page is also discarded from swap.
+ */
+static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
+{
+ if (!non_swap_entry(entry))
+ dec_mm_counter(mm, MM_SWAPENTS);
+ else if (is_migration_entry(entry))
+ dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry)));
+ free_swap_and_cache(entry);
+}
+
+/**
+ * gmap_helper_zap_one_page() - discard a page if it was swapped.
+ * @mm: the mm
+ * @vmaddr: the userspace virtual address that needs to be discarded
+ *
+ * If the given address maps to a swap entry, discard it.
+ *
+ * Context: needs to be called while holding the mmap lock.
+ */
+void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
+{
+ struct vm_area_struct *vma;
+ spinlock_t *ptl;
+ pte_t *ptep;
+
+ mmap_assert_locked(mm);
+
+ /* Find the vm address for the guest address */
+ vma = vma_lookup(mm, vmaddr);
+ if (!vma || is_vm_hugetlb_page(vma))
+ return;
+
+ /* Get pointer to the page table entry */
+ ptep = get_locked_pte(mm, vmaddr, &ptl);
+ if (unlikely(!ptep))
+ return;
+ if (pte_swap(*ptep))
+ ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
+ pte_unmap_unlock(ptep, ptl);
+}
+EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page);
+
+/**
+ * gmap_helper_discard() - discard user pages in the given range
+ * @mm: the mm
+ * @vmaddr: starting userspace address
+ * @end: end address (first address outside the range)
+ *
+ * All userpace pages in the range [@vamddr, @end) are discarded and unmapped.
+ *
+ * Context: needs to be called while holding the mmap lock.
+ */
+void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)
+{
+ struct vm_area_struct *vma;
+
+ mmap_assert_locked(mm);
+
+ while (vmaddr < end) {
+ vma = find_vma_intersection(mm, vmaddr, end);
+ if (!vma)
+ return;
+ if (!is_vm_hugetlb_page(vma))
+ zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL);
+ vmaddr = vma->vm_end;
+ }
+}
+EXPORT_SYMBOL_GPL(gmap_helper_discard);
+
+static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
+{
+ unsigned long *found_addr = walk->private;
+
+ /* Return 1 of the page is a zeropage. */
+ if (is_zero_pfn(pte_pfn(*pte))) {
+ /*
+ * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
+ * right thing and likely don't care: FAULT_FLAG_UNSHARE
+ * currently only works in COW mappings, which is also where
+ * mm_forbids_zeropage() is checked.
+ */
+ if (!is_cow_mapping(walk->vma->vm_flags))
+ return -EFAULT;
+
+ *found_addr = addr;
+ return 1;
+ }
+ return 0;
+}
+
+static const struct mm_walk_ops find_zeropage_ops = {
+ .pte_entry = find_zeropage_pte_entry,
+ .walk_lock = PGWALK_WRLOCK,
+};
+
+/** __gmap_helper_unshare_zeropages() - unshare all shared zeropages
+ * @mm: the mm whose zero pages are to be unshared
+ *
+ * Unshare all shared zeropages, replacing them by anonymous pages. Note that
+ * we cannot simply zap all shared zeropages, because this could later
+ * trigger unexpected userfaultfd missing events.
+ *
+ * This must be called after mm->context.allow_cow_sharing was
+ * set to 0, to avoid future mappings of shared zeropages.
+ *
+ * mm contracts with s390, that even if mm were to remove a page table,
+ * and racing with walk_page_range_vma() calling pte_offset_map_lock()
+ * would fail, it will never insert a page table containing empty zero
+ * pages once mm_forbids_zeropage(mm) i.e.
+ * mm->context.allow_cow_sharing is set to 0.
+ */
+static int __gmap_helper_unshare_zeropages(struct mm_struct *mm)
+{
+ struct vm_area_struct *vma;
+ VMA_ITERATOR(vmi, mm, 0);
+ unsigned long addr;
+ vm_fault_t fault;
+ int rc;
+
+ for_each_vma(vmi, vma) {
+ /*
+ * We could only look at COW mappings, but it's more future
+ * proof to catch unexpected zeropages in other mappings and
+ * fail.
+ */
+ if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
+ continue;
+ addr = vma->vm_start;
+
+retry:
+ rc = walk_page_range_vma(vma, addr, vma->vm_end,
+ &find_zeropage_ops, &addr);
+ if (rc < 0)
+ return rc;
+ else if (!rc)
+ continue;
+
+ /* addr was updated by find_zeropage_pte_entry() */
+ fault = handle_mm_fault(vma, addr,
+ FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
+ NULL);
+ if (fault & VM_FAULT_OOM)
+ return -ENOMEM;
+ /*
+ * See break_ksm(): even after handle_mm_fault() returned 0, we
+ * must start the lookup from the current address, because
+ * handle_mm_fault() may back out if there's any difficulty.
+ *
+ * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
+ * maybe they could trigger in the future on concurrent
+ * truncation. In that case, the shared zeropage would be gone
+ * and we can simply retry and make progress.
+ */
+ cond_resched();
+ goto retry;
+ }
+
+ return 0;
+}
+
+/**
+ * gmap_helper_disable_cow_sharing() - disable all COW sharing
+ *
+ * Disable most COW-sharing of memory pages for the whole process:
+ * (1) Disable KSM and unmerge/unshare any KSM pages.
+ * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
+ *
+ * Not that we currently don't bother with COW-shared pages that are shared
+ * with parent/child processes due to fork().
+ */
+int gmap_helper_disable_cow_sharing(void)
+{
+ struct mm_struct *mm = current->mm;
+ int rc;
+
+ mmap_assert_write_locked(mm);
+
+ if (!mm->context.allow_cow_sharing)
+ return 0;
+
+ mm->context.allow_cow_sharing = 0;
+
+ /* Replace all shared zeropages by anonymous pages. */
+ rc = __gmap_helper_unshare_zeropages(mm);
+ /*
+ * Make sure to disable KSM (if enabled for the whole process or
+ * individual VMAs). Note that nothing currently hinders user space
+ * from re-enabling it.
+ */
+ if (!rc)
+ rc = ksm_disable(mm);
+ if (rc)
+ mm->context.allow_cow_sharing = 1;
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing);
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index afa085e8186c..074bf4fb4ce2 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -40,7 +40,6 @@
#include <asm/kfence.h>
#include <asm/dma.h>
#include <asm/abs_lowcore.h>
-#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
#include <asm/sclp.h>
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index d177bea0bd73..b449fd2605b0 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -12,8 +12,6 @@
#include <asm/mmu_context.h>
#include <asm/page-states.h>
#include <asm/pgalloc.h>
-#include <asm/gmap.h>
-#include <asm/tlb.h>
#include <asm/tlbflush.h>
unsigned long *crst_table_alloc(struct mm_struct *mm)
@@ -144,7 +142,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
ptdesc = pagetable_alloc(GFP_KERNEL, 0);
if (!ptdesc)
return NULL;
- if (!pagetable_pte_ctor(ptdesc)) {
+ if (!pagetable_pte_ctor(mm, ptdesc)) {
pagetable_free(ptdesc);
return NULL;
}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 9901934284ec..7df70cd8f739 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -20,7 +20,6 @@
#include <linux/ksm.h>
#include <linux/mman.h>
-#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/page-states.h>
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 0776dfde2dba..c7f8313ba449 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -48,8 +48,6 @@ struct bpf_jit {
int lit64; /* Current position in 64-bit literal pool */
int base_ip; /* Base address for literal pool */
int exit_ip; /* Address of exit */
- int r1_thunk_ip; /* Address of expoline thunk for 'br %r1' */
- int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */
int tail_call_start; /* Tail call start offset */
int excnt; /* Number of exception table entries */
int prologue_plt_ret; /* Return address for prologue hotpatch PLT */
@@ -127,6 +125,18 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
jit->seen_regs |= (1 << r1);
}
+static s32 off_to_pcrel(struct bpf_jit *jit, u32 off)
+{
+ return off - jit->prg;
+}
+
+static s64 ptr_to_pcrel(struct bpf_jit *jit, const void *ptr)
+{
+ if (jit->prg_buf)
+ return (const u8 *)ptr - ((const u8 *)jit->prg_buf + jit->prg);
+ return 0;
+}
+
#define REG_SET_SEEN(b1) \
({ \
reg_set_seen(jit, b1); \
@@ -201,7 +211,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT4_PCREL_RIC(op, mask, target) \
({ \
- int __rel = ((target) - jit->prg) / 2; \
+ int __rel = off_to_pcrel(jit, target) / 2; \
_EMIT4((op) | (mask) << 20 | (__rel & 0xffff)); \
})
@@ -239,7 +249,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT6_PCREL_RIEB(op1, op2, b1, b2, mask, target) \
({ \
- unsigned int rel = (int)((target) - jit->prg) / 2; \
+ unsigned int rel = off_to_pcrel(jit, target) / 2; \
_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), \
(op2) | (mask) << 12); \
REG_SET_SEEN(b1); \
@@ -248,7 +258,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT6_PCREL_RIEC(op1, op2, b1, imm, mask, target) \
({ \
- unsigned int rel = (int)((target) - jit->prg) / 2; \
+ unsigned int rel = off_to_pcrel(jit, target) / 2; \
_EMIT6((op1) | (reg_high(b1) | (mask)) << 16 | \
(rel & 0xffff), (op2) | ((imm) & 0xff) << 8); \
REG_SET_SEEN(b1); \
@@ -257,29 +267,41 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask) \
({ \
- int rel = (addrs[(i) + (off) + 1] - jit->prg) / 2; \
+ int rel = off_to_pcrel(jit, addrs[(i) + (off) + 1]) / 2;\
_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
+static void emit6_pcrel_ril(struct bpf_jit *jit, u32 op, s64 pcrel)
+{
+ u32 pc32dbl = (s32)(pcrel / 2);
+
+ _EMIT6(op | pc32dbl >> 16, pc32dbl & 0xffff);
+}
+
+static void emit6_pcrel_rilb(struct bpf_jit *jit, u32 op, u8 b, s64 pcrel)
+{
+ emit6_pcrel_ril(jit, op | reg_high(b) << 16, pcrel);
+ REG_SET_SEEN(b);
+}
+
#define EMIT6_PCREL_RILB(op, b, target) \
-({ \
- unsigned int rel = (int)((target) - jit->prg) / 2; \
- _EMIT6((op) | reg_high(b) << 16 | rel >> 16, rel & 0xffff);\
- REG_SET_SEEN(b); \
-})
+ emit6_pcrel_rilb(jit, op, b, off_to_pcrel(jit, target))
-#define EMIT6_PCREL_RIL(op, target) \
-({ \
- unsigned int rel = (int)((target) - jit->prg) / 2; \
- _EMIT6((op) | rel >> 16, rel & 0xffff); \
-})
+#define EMIT6_PCREL_RILB_PTR(op, b, target_ptr) \
+ emit6_pcrel_rilb(jit, op, b, ptr_to_pcrel(jit, target_ptr))
+
+static void emit6_pcrel_rilc(struct bpf_jit *jit, u32 op, u8 mask, s64 pcrel)
+{
+ emit6_pcrel_ril(jit, op | mask << 20, pcrel);
+}
#define EMIT6_PCREL_RILC(op, mask, target) \
-({ \
- EMIT6_PCREL_RIL((op) | (mask) << 20, (target)); \
-})
+ emit6_pcrel_rilc(jit, op, mask, off_to_pcrel(jit, target))
+
+#define EMIT6_PCREL_RILC_PTR(op, mask, target_ptr) \
+ emit6_pcrel_rilc(jit, op, mask, ptr_to_pcrel(jit, target_ptr))
#define _EMIT6_IMM(op, imm) \
({ \
@@ -503,7 +525,7 @@ static void bpf_skip(struct bpf_jit *jit, int size)
{
if (size >= 6 && !is_valid_rel(size)) {
/* brcl 0xf,size */
- EMIT6_PCREL_RIL(0xc0f4000000, size);
+ EMIT6_PCREL_RILC(0xc0040000, 0xf, size);
size -= 6;
} else if (size >= 4 && is_valid_rel(size)) {
/* brc 0xf,size */
@@ -605,43 +627,30 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
}
/* Setup stack and backchain */
if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) {
- if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
- /* lgr %w1,%r15 (backchain) */
- EMIT4(0xb9040000, REG_W1, REG_15);
+ /* lgr %w1,%r15 (backchain) */
+ EMIT4(0xb9040000, REG_W1, REG_15);
/* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
/* aghi %r15,-STK_OFF */
EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth));
- if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
- /* stg %w1,152(%r15) (backchain) */
- EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
- REG_15, 152);
+ /* stg %w1,152(%r15) (backchain) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
+ REG_15, 152);
}
}
/*
- * Emit an expoline for a jump that follows
+ * Jump using a register either directly or via an expoline thunk
*/
-static void emit_expoline(struct bpf_jit *jit)
-{
- /* exrl %r0,.+10 */
- EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
- /* j . */
- EMIT4_PCREL(0xa7f40000, 0);
-}
-
-/*
- * Emit __s390_indirect_jump_r1 thunk if necessary
- */
-static void emit_r1_thunk(struct bpf_jit *jit)
-{
- if (nospec_uses_trampoline()) {
- jit->r1_thunk_ip = jit->prg;
- emit_expoline(jit);
- /* br %r1 */
- _EMIT2(0x07f1);
- }
-}
+#define EMIT_JUMP_REG(reg) do { \
+ if (nospec_uses_trampoline()) \
+ /* brcl 0xf,__s390_indirect_jump_rN */ \
+ EMIT6_PCREL_RILC_PTR(0xc0040000, 0x0f, \
+ __s390_indirect_jump_r ## reg); \
+ else \
+ /* br %rN */ \
+ _EMIT2(0x07f0 | reg); \
+} while (0)
/*
* Call r1 either directly or via __s390_indirect_jump_r1 thunk
@@ -650,7 +659,8 @@ static void call_r1(struct bpf_jit *jit)
{
if (nospec_uses_trampoline())
/* brasl %r14,__s390_indirect_jump_r1 */
- EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
+ EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14,
+ __s390_indirect_jump_r1);
else
/* basr %r14,%r1 */
EMIT2(0x0d00, REG_14, REG_1);
@@ -666,16 +676,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
EMIT4(0xb9040000, REG_2, BPF_REG_0);
/* Restore registers */
save_restore_regs(jit, REGS_RESTORE, stack_depth, 0);
- if (nospec_uses_trampoline()) {
- jit->r14_thunk_ip = jit->prg;
- /* Generate __s390_indirect_jump_r14 thunk */
- emit_expoline(jit);
- }
- /* br %r14 */
- _EMIT2(0x07fe);
-
- if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
- emit_r1_thunk(jit);
+ EMIT_JUMP_REG(14);
jit->prg = ALIGN(jit->prg, 8);
jit->prologue_plt = jit->prg;
@@ -1877,7 +1878,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
/* aghi %r1,tail_call_start */
EMIT4_IMM(0xa70b0000, REG_1, jit->tail_call_start);
/* brcl 0xf,__s390_indirect_jump_r1 */
- EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->r1_thunk_ip);
+ EMIT6_PCREL_RILC_PTR(0xc0040000, 0xf,
+ __s390_indirect_jump_r1);
} else {
/* bc 0xf,tail_call_start(%r1) */
_EMIT4(0x47f01000 + jit->tail_call_start);
@@ -2585,9 +2587,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
if (nr_stack_args > MAX_NR_STACK_ARGS)
return -ENOTSUPP;
- /* Return to %r14, since func_addr and %r0 are not available. */
- if ((!func_addr && !(flags & BPF_TRAMP_F_ORIG_STACK)) ||
- (flags & BPF_TRAMP_F_INDIRECT))
+ /* Return to %r14 in the struct_ops case. */
+ if (flags & BPF_TRAMP_F_INDIRECT)
flags |= BPF_TRAMP_F_SKIP_FRAME;
/*
@@ -2847,17 +2848,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
0xf000 | tjit->tccnt_off);
/* aghi %r15,stack_size */
EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
- /* Emit an expoline for the following indirect jump. */
- if (nospec_uses_trampoline())
- emit_expoline(jit);
if (flags & BPF_TRAMP_F_SKIP_FRAME)
- /* br %r14 */
- _EMIT2(0x07fe);
+ EMIT_JUMP_REG(14);
else
- /* br %r1 */
- _EMIT2(0x07f1);
-
- emit_r1_thunk(jit);
+ EMIT_JUMP_REG(1);
return 0;
}