summaryrefslogtreecommitdiff
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/boot/decompressor.c6
-rw-r--r--arch/s390/configs/debug_defconfig7
-rw-r--r--arch/s390/configs/defconfig6
-rw-r--r--arch/s390/configs/zfcpdump_defconfig2
-rw-r--r--arch/s390/include/asm/cpu_mf.h31
-rw-r--r--arch/s390/include/asm/debug.h6
-rw-r--r--arch/s390/include/asm/kvm_host.h14
-rw-r--r--arch/s390/include/asm/mem_encrypt.h4
-rw-r--r--arch/s390/include/asm/pci.h5
-rw-r--r--arch/s390/include/asm/percpu.h2
-rw-r--r--arch/s390/include/asm/pgtable.h2
-rw-r--r--arch/s390/include/asm/stacktrace.h1
-rw-r--r--arch/s390/include/asm/tlb.h11
-rw-r--r--arch/s390/include/asm/uv.h10
-rw-r--r--arch/s390/kernel/asm-offsets.c1
-rw-r--r--arch/s390/kernel/entry.S26
-rw-r--r--arch/s390/kernel/machine_kexec_file.c5
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c101
-rw-r--r--arch/s390/kernel/setup.c6
-rw-r--r--arch/s390/kernel/uv.c7
-rw-r--r--arch/s390/kernel/vmlinux.lds.S4
-rw-r--r--arch/s390/kvm/intercept.c9
-rw-r--r--arch/s390/kvm/interrupt.c17
-rw-r--r--arch/s390/kvm/irq.h19
-rw-r--r--arch/s390/kvm/kvm-s390.c116
-rw-r--r--arch/s390/kvm/kvm-s390.h8
-rw-r--r--arch/s390/kvm/pci.c6
-rw-r--r--arch/s390/kvm/priv.c3
-rw-r--r--arch/s390/kvm/pv.c357
-rw-r--r--arch/s390/kvm/vsie.c4
-rw-r--r--arch/s390/mm/gmap.c152
-rw-r--r--arch/s390/mm/init.c12
-rw-r--r--arch/s390/pci/pci.c13
-rw-r--r--arch/s390/pci/pci_dma.c77
34 files changed, 750 insertions, 300 deletions
diff --git a/arch/s390/boot/decompressor.c b/arch/s390/boot/decompressor.c
index e27c2140d620..b519a1f045d8 100644
--- a/arch/s390/boot/decompressor.c
+++ b/arch/s390/boot/decompressor.c
@@ -23,9 +23,9 @@
#define memmove memmove
#define memzero(s, n) memset((s), 0, (n))
-#ifdef CONFIG_KERNEL_BZIP2
+#if defined(CONFIG_KERNEL_BZIP2)
#define BOOT_HEAP_SIZE 0x400000
-#elif CONFIG_KERNEL_ZSTD
+#elif defined(CONFIG_KERNEL_ZSTD)
#define BOOT_HEAP_SIZE 0x30000
#else
#define BOOT_HEAP_SIZE 0x10000
@@ -80,6 +80,6 @@ void *decompress_kernel(void)
void *output = (void *)decompress_offset;
__decompress(_compressed_start, _compressed_end - _compressed_start,
- NULL, NULL, output, 0, NULL, error);
+ NULL, NULL, output, vmlinux.image_size, NULL, error);
return output;
}
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index a7b4e1d82758..74b35ec2ad28 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -190,7 +190,6 @@ CONFIG_NFT_CT=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
CONFIG_NFT_NAT=m
-CONFIG_NFT_OBJREF=m
CONFIG_NFT_REJECT=m
CONFIG_NFT_COMPAT=m
CONFIG_NFT_HASH=m
@@ -569,6 +568,7 @@ CONFIG_INPUT_EVDEV=y
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
CONFIG_LEGACY_PTY_COUNT=0
+# CONFIG_LEGACY_TIOCSTI is not set
CONFIG_VIRTIO_CONSOLE=m
CONFIG_HW_RANDOM_VIRTIO=m
CONFIG_HANGCHECK_TIMER=m
@@ -660,6 +660,7 @@ CONFIG_CONFIGFS_FS=m
CONFIG_ECRYPT_FS=m
CONFIG_CRAMFS=m
CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT=y
CONFIG_SQUASHFS_XATTR=y
CONFIG_SQUASHFS_LZ4=y
CONFIG_SQUASHFS_LZO=y
@@ -705,6 +706,7 @@ CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y
CONFIG_SECURITY_LANDLOCK=y
CONFIG_INTEGRITY_SIGNATURE=y
CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
+CONFIG_INTEGRITY_PLATFORM_KEYRING=y
CONFIG_IMA=y
CONFIG_IMA_DEFAULT_HASH_SHA256=y
CONFIG_IMA_WRITE_POLICY=y
@@ -781,6 +783,7 @@ CONFIG_ZCRYPT=m
CONFIG_PKEY=m
CONFIG_CRYPTO_PAES_S390=m
CONFIG_CRYPTO_DEV_VIRTIO=m
+CONFIG_SYSTEM_BLACKLIST_KEYRING=y
CONFIG_CORDIC=m
CONFIG_CRYPTO_LIB_CURVE25519=m
CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
@@ -848,7 +851,6 @@ CONFIG_PREEMPT_TRACER=y
CONFIG_SCHED_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_BPF_KPROBE_OVERRIDE=y
CONFIG_HIST_TRIGGERS=y
CONFIG_FTRACE_STARTUP_TEST=y
# CONFIG_EVENT_TRACE_STARTUP_TEST is not set
@@ -870,7 +872,6 @@ CONFIG_FAIL_MAKE_REQUEST=y
CONFIG_FAIL_IO_TIMEOUT=y
CONFIG_FAIL_FUTEX=y
CONFIG_FAULT_INJECTION_DEBUG_FS=y
-CONFIG_FAIL_FUNCTION=y
CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
CONFIG_LKDTM=m
CONFIG_TEST_MIN_HEAP=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 2bc2d0fe5774..cec71268e3bc 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -181,7 +181,6 @@ CONFIG_NFT_CT=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
CONFIG_NFT_NAT=m
-CONFIG_NFT_OBJREF=m
CONFIG_NFT_REJECT=m
CONFIG_NFT_COMPAT=m
CONFIG_NFT_HASH=m
@@ -559,6 +558,7 @@ CONFIG_INPUT_EVDEV=y
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
CONFIG_LEGACY_PTY_COUNT=0
+# CONFIG_LEGACY_TIOCSTI is not set
CONFIG_VIRTIO_CONSOLE=m
CONFIG_HW_RANDOM_VIRTIO=m
CONFIG_HANGCHECK_TIMER=m
@@ -645,6 +645,7 @@ CONFIG_CONFIGFS_FS=m
CONFIG_ECRYPT_FS=m
CONFIG_CRAMFS=m
CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT=y
CONFIG_SQUASHFS_XATTR=y
CONFIG_SQUASHFS_LZ4=y
CONFIG_SQUASHFS_LZO=y
@@ -688,6 +689,7 @@ CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y
CONFIG_SECURITY_LANDLOCK=y
CONFIG_INTEGRITY_SIGNATURE=y
CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
+CONFIG_INTEGRITY_PLATFORM_KEYRING=y
CONFIG_IMA=y
CONFIG_IMA_DEFAULT_HASH_SHA256=y
CONFIG_IMA_WRITE_POLICY=y
@@ -766,6 +768,7 @@ CONFIG_ZCRYPT=m
CONFIG_PKEY=m
CONFIG_CRYPTO_PAES_S390=m
CONFIG_CRYPTO_DEV_VIRTIO=m
+CONFIG_SYSTEM_BLACKLIST_KEYRING=y
CONFIG_CORDIC=m
CONFIG_PRIME_NUMBERS=m
CONFIG_CRYPTO_LIB_CURVE25519=m
@@ -798,7 +801,6 @@ CONFIG_STACK_TRACER=y
CONFIG_SCHED_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_BPF_KPROBE_OVERRIDE=y
CONFIG_HIST_TRIGGERS=y
CONFIG_SAMPLES=y
CONFIG_SAMPLE_TRACE_PRINTK=m
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index ae14ab0b864d..a9c0c81d1de9 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -13,7 +13,6 @@ CONFIG_TUNE_ZEC12=y
# CONFIG_COMPAT is not set
CONFIG_NR_CPUS=2
CONFIG_HZ_100=y
-# CONFIG_RELOCATABLE is not set
# CONFIG_CHSC_SCH is not set
# CONFIG_SCM_BUS is not set
CONFIG_CRASH_DUMP=y
@@ -50,6 +49,7 @@ CONFIG_ZFCP=y
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
+# CONFIG_LEGACY_TIOCSTI is not set
# CONFIG_HVC_IUCV is not set
# CONFIG_HW_RANDOM_S390 is not set
# CONFIG_HMC_DRV is not set
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index feaba12dbecb..efa103b52a1a 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -131,19 +131,21 @@ struct hws_combined_entry {
struct hws_diag_entry diag; /* Diagnostic-sampling data entry */
} __packed;
-struct hws_trailer_entry {
- union {
- struct {
- unsigned int f:1; /* 0 - Block Full Indicator */
- unsigned int a:1; /* 1 - Alert request control */
- unsigned int t:1; /* 2 - Timestamp format */
- unsigned int :29; /* 3 - 31: Reserved */
- unsigned int bsdes:16; /* 32-47: size of basic SDE */
- unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */
- };
- unsigned long long flags; /* 0 - 63: All indicators */
+union hws_trailer_header {
+ struct {
+ unsigned int f:1; /* 0 - Block Full Indicator */
+ unsigned int a:1; /* 1 - Alert request control */
+ unsigned int t:1; /* 2 - Timestamp format */
+ unsigned int :29; /* 3 - 31: Reserved */
+ unsigned int bsdes:16; /* 32-47: size of basic SDE */
+ unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */
+ unsigned long long overflow; /* 64 - Overflow Count */
};
- unsigned long long overflow; /* 64 - sample Overflow count */
+ __uint128_t val;
+};
+
+struct hws_trailer_entry {
+ union hws_trailer_header header; /* 0 - 15 Flags + Overflow Count */
unsigned char timestamp[16]; /* 16 - 31 timestamp */
unsigned long long reserved1; /* 32 -Reserved */
unsigned long long reserved2; /* */
@@ -290,14 +292,11 @@ static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
return USEC_PER_SEC * qsi->cpu_speed / rate;
}
-#define SDB_TE_ALERT_REQ_MASK 0x4000000000000000UL
-#define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL
-
/* Return TOD timestamp contained in an trailer entry */
static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
{
/* TOD in STCKE format */
- if (te->t)
+ if (te->header.t)
return *((unsigned long long *) &te->timestamp[1]);
/* TOD in STCK format */
diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
index 77f24262c25c..ac665b9670c5 100644
--- a/arch/s390/include/asm/debug.h
+++ b/arch/s390/include/asm/debug.h
@@ -4,8 +4,8 @@
*
* Copyright IBM Corp. 1999, 2020
*/
-#ifndef DEBUG_H
-#define DEBUG_H
+#ifndef _ASM_S390_DEBUG_H
+#define _ASM_S390_DEBUG_H
#include <linux/string.h>
#include <linux/spinlock.h>
@@ -487,4 +487,4 @@ void debug_register_static(debug_info_t *id, int pages_per_area, int nr_areas);
#endif /* MODULE */
-#endif /* DEBUG_H */
+#endif /* _ASM_S390_DEBUG_H */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index b1e98a9ed152..d67ce719d16a 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -142,8 +142,7 @@ struct mcck_volatile_info {
CR14_EXTERNAL_DAMAGE_SUBMASK)
#define SIDAD_SIZE_MASK 0xff
-#define sida_origin(sie_block) \
- ((sie_block)->sidad & PAGE_MASK)
+#define sida_addr(sie_block) phys_to_virt((sie_block)->sidad & PAGE_MASK)
#define sida_size(sie_block) \
((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE)
@@ -276,6 +275,7 @@ struct kvm_s390_sie_block {
#define ECB3_AES 0x04
#define ECB3_RI 0x01
__u8 ecb3; /* 0x0063 */
+#define ESCA_SCAOL_MASK ~0x3fU
__u32 scaol; /* 0x0064 */
__u8 sdf; /* 0x0068 */
__u8 epdx; /* 0x0069 */
@@ -942,6 +942,8 @@ struct kvm_s390_pv {
unsigned long stor_base;
void *stor_var;
bool dumping;
+ void *set_aside;
+ struct list_head need_cleanup;
struct mmu_notifier mmu_notifier;
};
@@ -1017,7 +1019,13 @@ void kvm_arch_crypto_clear_masks(struct kvm *kvm);
void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
unsigned long *aqm, unsigned long *adm);
-extern int sie64a(struct kvm_s390_sie_block *, u64 *);
+int __sie64a(phys_addr_t sie_block_phys, struct kvm_s390_sie_block *sie_block, u64 *rsa);
+
+static inline int sie64a(struct kvm_s390_sie_block *sie_block, u64 *rsa)
+{
+ return __sie64a(virt_to_phys(sie_block), sie_block, rsa);
+}
+
extern char sie_exit;
extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc);
diff --git a/arch/s390/include/asm/mem_encrypt.h b/arch/s390/include/asm/mem_encrypt.h
index 08a8b96606d7..b85e13505a0f 100644
--- a/arch/s390/include/asm/mem_encrypt.h
+++ b/arch/s390/include/asm/mem_encrypt.h
@@ -4,8 +4,8 @@
#ifndef __ASSEMBLY__
-int set_memory_encrypted(unsigned long addr, int numpages);
-int set_memory_decrypted(unsigned long addr, int numpages);
+int set_memory_encrypted(unsigned long vaddr, int numpages);
+int set_memory_decrypted(unsigned long vaddr, int numpages);
#endif /* __ASSEMBLY__ */
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 108e732d7b14..b248694e0024 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -117,7 +117,9 @@ struct zpci_bus {
struct zpci_dev {
struct zpci_bus *zbus;
struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */
+ struct list_head iommu_list;
struct kref kref;
+ struct rcu_head rcu;
struct hotplug_slot hotplug_slot;
enum zpci_state state;
@@ -155,7 +157,6 @@ struct zpci_dev {
/* DMA stuff */
unsigned long *dma_table;
- spinlock_t dma_table_lock;
int tlb_refresh;
spinlock_t iommu_bitmap_lock;
@@ -220,7 +221,7 @@ void zpci_device_reserved(struct zpci_dev *zdev);
bool zpci_is_device_configured(struct zpci_dev *zdev);
int zpci_hot_reset_device(struct zpci_dev *zdev);
-int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
+int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64, u8 *);
int zpci_unregister_ioat(struct zpci_dev *, u8);
void zpci_remove_reserved_devices(void);
void zpci_update_fh(struct zpci_dev *zdev, u32 fh);
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index cb5fc0690435..081837b391e3 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -31,7 +31,7 @@
pcp_op_T__ *ptr__; \
preempt_disable_notrace(); \
ptr__ = raw_cpu_ptr(&(pcp)); \
- prev__ = *ptr__; \
+ prev__ = READ_ONCE(*ptr__); \
do { \
old__ = prev__; \
new__ = old__ op (val); \
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 11e901286414..b26cbf1c533c 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1774,8 +1774,6 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-#define kern_addr_valid(addr) (1)
-
extern int vmem_add_mapping(unsigned long start, unsigned long size);
extern void vmem_remove_mapping(unsigned long start, unsigned long size);
extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc);
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index b23c658dce77..1802be5abb5d 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -46,6 +46,7 @@ struct stack_frame {
unsigned long sie_savearea;
unsigned long sie_reason;
unsigned long sie_flags;
+ unsigned long sie_control_block_phys;
};
};
unsigned long gprs[10];
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 3a5c8fb590e5..b91f4a9b044c 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -25,7 +25,8 @@
void __tlb_remove_table(void *_table);
static inline void tlb_flush(struct mmu_gather *tlb);
static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size);
+ struct encoded_page *page,
+ int page_size);
#define tlb_flush tlb_flush
#define pte_free_tlb pte_free_tlb
@@ -40,11 +41,15 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
* Release the page cache reference for a pte removed by
* tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page
* has already been freed, so just do free_page_and_swap_cache.
+ *
+ * s390 doesn't delay rmap removal, so there is nothing encoded in
+ * the page pointer.
*/
static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
+ struct encoded_page *page,
+ int page_size)
{
- free_page_and_swap_cache(page);
+ free_page_and_swap_cache(encoded_page_ptr(page));
return false;
}
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index be3ef9dd6972..28a9ad57b6f1 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -34,6 +34,7 @@
#define UVC_CMD_INIT_UV 0x000f
#define UVC_CMD_CREATE_SEC_CONF 0x0100
#define UVC_CMD_DESTROY_SEC_CONF 0x0101
+#define UVC_CMD_DESTROY_SEC_CONF_FAST 0x0102
#define UVC_CMD_CREATE_SEC_CPU 0x0120
#define UVC_CMD_DESTROY_SEC_CPU 0x0121
#define UVC_CMD_CONV_TO_SEC_STOR 0x0200
@@ -81,6 +82,7 @@ enum uv_cmds_inst {
BIT_UVC_CMD_UNSHARE_ALL = 20,
BIT_UVC_CMD_PIN_PAGE_SHARED = 21,
BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22,
+ BIT_UVC_CMD_DESTROY_SEC_CONF_FAST = 23,
BIT_UVC_CMD_DUMP_INIT = 24,
BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE = 25,
BIT_UVC_CMD_DUMP_CPU = 26,
@@ -230,6 +232,14 @@ struct uv_cb_nodata {
u64 reserved20[4];
} __packed __aligned(8);
+/* Destroy Configuration Fast */
+struct uv_cb_destroy_fast {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 handle;
+ u64 reserved20[5];
+} __packed __aligned(8);
+
/* Set Shared Access */
struct uv_cb_share {
struct uv_cb_header header;
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index d8ce965c0a97..3f8e760298c2 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -62,6 +62,7 @@ int main(void)
OFFSET(__SF_SIE_SAVEAREA, stack_frame, sie_savearea);
OFFSET(__SF_SIE_REASON, stack_frame, sie_reason);
OFFSET(__SF_SIE_FLAGS, stack_frame, sie_flags);
+ OFFSET(__SF_SIE_CONTROL_PHYS, stack_frame, sie_control_block_phys);
DEFINE(STACK_FRAME_OVERHEAD, sizeof(struct stack_frame));
BLANK();
/* idle data offsets */
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index e0d11f3adfcc..0f423e9df095 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -207,18 +207,20 @@ ENDPROC(__switch_to)
#if IS_ENABLED(CONFIG_KVM)
/*
- * sie64a calling convention:
- * %r2 pointer to sie control block
- * %r3 guest register save area
+ * __sie64a calling convention:
+ * %r2 pointer to sie control block phys
+ * %r3 pointer to sie control block virt
+ * %r4 guest register save area
*/
-ENTRY(sie64a)
+ENTRY(__sie64a)
stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers
lg %r12,__LC_CURRENT
- stg %r2,__SF_SIE_CONTROL(%r15) # save control block pointer
- stg %r3,__SF_SIE_SAVEAREA(%r15) # save guest register save area
+ stg %r2,__SF_SIE_CONTROL_PHYS(%r15) # save sie block physical..
+ stg %r3,__SF_SIE_CONTROL(%r15) # ...and virtual addresses
+ stg %r4,__SF_SIE_SAVEAREA(%r15) # save guest register save area
xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0
mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags
- lmg %r0,%r13,0(%r3) # load guest gprs 0-13
+ lmg %r0,%r13,0(%r4) # load guest gprs 0-13
lg %r14,__LC_GMAP # get gmap pointer
ltgr %r14,%r14
jz .Lsie_gmap
@@ -230,6 +232,7 @@ ENTRY(sie64a)
jnz .Lsie_skip
TSTMSK __LC_CPU_FLAGS,_CIF_FPU
jo .Lsie_skip # exit if fp/vx regs changed
+ lg %r14,__SF_SIE_CONTROL_PHYS(%r15) # get sie block phys addr
BPEXIT __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
.Lsie_entry:
sie 0(%r14)
@@ -240,13 +243,14 @@ ENTRY(sie64a)
BPOFF
BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
.Lsie_skip:
+ lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce
.Lsie_done:
# some program checks are suppressing. C code (e.g. do_protection_exception)
# will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There
# are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable.
-# Other instructions between sie64a and .Lsie_done should not cause program
+# Other instructions between __sie64a and .Lsie_done should not cause program
# interrupts. So lets use 3 nops as a landing pad for all possible rewinds.
.Lrewind_pad6:
nopr 7
@@ -275,8 +279,8 @@ sie_exit:
EX_TABLE(.Lrewind_pad4,.Lsie_fault)
EX_TABLE(.Lrewind_pad2,.Lsie_fault)
EX_TABLE(sie_exit,.Lsie_fault)
-ENDPROC(sie64a)
-EXPORT_SYMBOL(sie64a)
+ENDPROC(__sie64a)
+EXPORT_SYMBOL(__sie64a)
EXPORT_SYMBOL(sie_exit)
#endif
@@ -355,7 +359,7 @@ ENTRY(pgm_check_handler)
j 3f # -> fault in user space
.Lpgm_skip_asce:
#if IS_ENABLED(CONFIG_KVM)
- # cleanup critical section for program checks in sie64a
+ # cleanup critical section for program checks in __sie64a
OUTSIDE %r9,.Lsie_gmap,.Lsie_done,1f
SIEEXIT
lghi %r10,_PIF_GUEST_FAULT
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index fc6d5f58debe..2df94d32140c 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -187,8 +187,6 @@ static int kexec_file_add_ipl_report(struct kimage *image,
data->memsz = ALIGN(data->memsz, PAGE_SIZE);
buf.mem = data->memsz;
- if (image->type == KEXEC_TYPE_CRASH)
- buf.mem += crashk_res.start;
ptr = (void *)ipl_cert_list_addr;
end = ptr + ipl_cert_list_size;
@@ -225,6 +223,9 @@ static int kexec_file_add_ipl_report(struct kimage *image,
data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr);
*lc_ipl_parmblock_ptr = (__u32)buf.mem;
+ if (image->type == KEXEC_TYPE_CRASH)
+ buf.mem += crashk_res.start;
+
ret = kexec_add_buffer(&buf);
out:
return ret;
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 332a49965130..ce886a03545a 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -163,14 +163,15 @@ static void free_sampling_buffer(struct sf_buffer *sfb)
static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
{
- unsigned long sdb, *trailer;
+ struct hws_trailer_entry *te;
+ unsigned long sdb;
/* Allocate and initialize sample-data-block */
sdb = get_zeroed_page(gfp_flags);
if (!sdb)
return -ENOMEM;
- trailer = trailer_entry_ptr(sdb);
- *trailer = SDB_TE_ALERT_REQ_MASK;
+ te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb);
+ te->header.a = 1;
/* Link SDB into the sample-data-block-table */
*sdbt = sdb;
@@ -1206,7 +1207,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
"%s: Found unknown"
" sampling data entry: te->f %i"
" basic.def %#4x (%p)\n", __func__,
- te->f, sample->def, sample);
+ te->header.f, sample->def, sample);
/* Sample slot is not yet written or other record.
*
* This condition can occur if the buffer was reused
@@ -1217,7 +1218,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
* that are not full. Stop processing if the first
* invalid format was detected.
*/
- if (!te->f)
+ if (!te->header.f)
break;
}
@@ -1227,6 +1228,16 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
}
}
+static inline __uint128_t __cdsg(__uint128_t *ptr, __uint128_t old, __uint128_t new)
+{
+ asm volatile(
+ " cdsg %[old],%[new],%[ptr]\n"
+ : [old] "+d" (old), [ptr] "+QS" (*ptr)
+ : [new] "d" (new)
+ : "memory", "cc");
+ return old;
+}
+
/* hw_perf_event_update() - Process sampling buffer
* @event: The perf event
* @flush_all: Flag to also flush partially filled sample-data-blocks
@@ -1243,10 +1254,11 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
*/
static void hw_perf_event_update(struct perf_event *event, int flush_all)
{
+ unsigned long long event_overflow, sampl_overflow, num_sdb;
+ union hws_trailer_header old, prev, new;
struct hw_perf_event *hwc = &event->hw;
struct hws_trailer_entry *te;
unsigned long *sdbt;
- unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags;
int done;
/*
@@ -1266,25 +1278,25 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
/* Leave loop if no more work to do (block full indicator) */
- if (!te->f) {
+ if (!te->header.f) {
done = 1;
if (!flush_all)
break;
}
/* Check the sample overflow count */
- if (te->overflow)
+ if (te->header.overflow)
/* Account sample overflows and, if a particular limit
* is reached, extend the sampling buffer.
* For details, see sfb_account_overflows().
*/
- sampl_overflow += te->overflow;
+ sampl_overflow += te->header.overflow;
/* Timestamps are valid for full sample-data-blocks only */
debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx "
"overflow %llu timestamp %#llx\n",
- __func__, (unsigned long)sdbt, te->overflow,
- (te->f) ? trailer_timestamp(te) : 0ULL);
+ __func__, (unsigned long)sdbt, te->header.overflow,
+ (te->header.f) ? trailer_timestamp(te) : 0ULL);
/* Collect all samples from a single sample-data-block and
* flag if an (perf) event overflow happened. If so, the PMU
@@ -1294,12 +1306,16 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
num_sdb++;
/* Reset trailer (using compare-double-and-swap) */
+ /* READ_ONCE() 16 byte header */
+ prev.val = __cdsg(&te->header.val, 0, 0);
do {
- te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK;
- te_flags |= SDB_TE_ALERT_REQ_MASK;
- } while (!cmpxchg_double(&te->flags, &te->overflow,
- te->flags, te->overflow,
- te_flags, 0ULL));
+ old.val = prev.val;
+ new.val = prev.val;
+ new.f = 0;
+ new.a = 1;
+ new.overflow = 0;
+ prev.val = __cdsg(&te->header.val, old.val, new.val);
+ } while (prev.val != old.val);
/* Advance to next sample-data-block */
sdbt++;
@@ -1384,7 +1400,7 @@ static void aux_output_end(struct perf_output_handle *handle)
range_scan = AUX_SDB_NUM_ALERT(aux);
for (i = 0, idx = aux->head; i < range_scan; i++, idx++) {
te = aux_sdb_trailer(aux, idx);
- if (!(te->flags & SDB_TE_BUFFER_FULL_MASK))
+ if (!te->header.f)
break;
}
/* i is num of SDBs which are full */
@@ -1392,7 +1408,7 @@ static void aux_output_end(struct perf_output_handle *handle)
/* Remove alert indicators in the buffer */
te = aux_sdb_trailer(aux, aux->alert_mark);
- te->flags &= ~SDB_TE_ALERT_REQ_MASK;
+ te->header.a = 0;
debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n",
__func__, i, range_scan, aux->head);
@@ -1437,9 +1453,9 @@ static int aux_output_begin(struct perf_output_handle *handle,
idx = aux->empty_mark + 1;
for (i = 0; i < range_scan; i++, idx++) {
te = aux_sdb_trailer(aux, idx);
- te->flags &= ~(SDB_TE_BUFFER_FULL_MASK |
- SDB_TE_ALERT_REQ_MASK);
- te->overflow = 0;
+ te->header.f = 0;
+ te->header.a = 0;
+ te->header.overflow = 0;
}
/* Save the position of empty SDBs */
aux->empty_mark = aux->head + range - 1;
@@ -1448,7 +1464,7 @@ static int aux_output_begin(struct perf_output_handle *handle,
/* Set alert indicator */
aux->alert_mark = aux->head + range/2 - 1;
te = aux_sdb_trailer(aux, aux->alert_mark);
- te->flags = te->flags | SDB_TE_ALERT_REQ_MASK;
+ te->header.a = 1;
/* Reset hardware buffer head */
head = AUX_SDB_INDEX(aux, aux->head);
@@ -1475,14 +1491,17 @@ static int aux_output_begin(struct perf_output_handle *handle,
static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
unsigned long long *overflow)
{
- unsigned long long orig_overflow, orig_flags, new_flags;
+ union hws_trailer_header old, prev, new;
struct hws_trailer_entry *te;
te = aux_sdb_trailer(aux, alert_index);
+ /* READ_ONCE() 16 byte header */
+ prev.val = __cdsg(&te->header.val, 0, 0);
do {
- orig_flags = te->flags;
- *overflow = orig_overflow = te->overflow;
- if (orig_flags & SDB_TE_BUFFER_FULL_MASK) {
+ old.val = prev.val;
+ new.val = prev.val;
+ *overflow = old.overflow;
+ if (old.f) {
/*
* SDB is already set by hardware.
* Abort and try to set somewhere
@@ -1490,10 +1509,10 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
*/
return false;
}
- new_flags = orig_flags | SDB_TE_ALERT_REQ_MASK;
- } while (!cmpxchg_double(&te->flags, &te->overflow,
- orig_flags, orig_overflow,
- new_flags, 0ULL));
+ new.a = 1;
+ new.overflow = 0;
+ prev.val = __cdsg(&te->header.val, old.val, new.val);
+ } while (prev.val != old.val);
return true;
}
@@ -1522,8 +1541,9 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
unsigned long long *overflow)
{
- unsigned long long orig_overflow, orig_flags, new_flags;
unsigned long i, range_scan, idx, idx_old;
+ union hws_trailer_header old, prev, new;
+ unsigned long long orig_overflow;
struct hws_trailer_entry *te;
debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld "
@@ -1554,17 +1574,20 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
idx_old = idx = aux->empty_mark + 1;
for (i = 0; i < range_scan; i++, idx++) {
te = aux_sdb_trailer(aux, idx);
+ /* READ_ONCE() 16 byte header */
+ prev.val = __cdsg(&te->header.val, 0, 0);
do {
- orig_flags = te->flags;
- orig_overflow = te->overflow;
- new_flags = orig_flags & ~SDB_TE_BUFFER_FULL_MASK;
+ old.val = prev.val;
+ new.val = prev.val;
+ orig_overflow = old.overflow;
+ new.f = 0;
+ new.overflow = 0;
if (idx == aux->alert_mark)
- new_flags |= SDB_TE_ALERT_REQ_MASK;
+ new.a = 1;
else
- new_flags &= ~SDB_TE_ALERT_REQ_MASK;
- } while (!cmpxchg_double(&te->flags, &te->overflow,
- orig_flags, orig_overflow,
- new_flags, 0ULL));
+ new.a = 0;
+ prev.val = __cdsg(&te->header.val, old.val, new.val);
+ } while (prev.val != old.val);
*overflow += orig_overflow;
}
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 2094f575c532..696c9e007a36 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -52,6 +52,7 @@
#include <linux/hugetlb.h>
#include <linux/kmemleak.h>
+#include <asm/archrandom.h>
#include <asm/boot_data.h>
#include <asm/ipl.h>
#include <asm/facility.h>
@@ -507,6 +508,7 @@ static void __init setup_lowcore_dat_on(void)
{
struct lowcore *abs_lc;
unsigned long flags;
+ int i;
__ctl_clear_bit(0, 28);
S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
@@ -522,8 +524,8 @@ static void __init setup_lowcore_dat_on(void)
abs_lc = get_abs_lowcore(&flags);
abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
abs_lc->program_new_psw = S390_lowcore.program_new_psw;
- memcpy(abs_lc->cregs_save_area, S390_lowcore.cregs_save_area,
- sizeof(abs_lc->cregs_save_area));
+ for (i = 0; i < 16; i++)
+ abs_lc->cregs_save_area[i] = S390_lowcore.cregs_save_area[i];
put_abs_lowcore(abs_lc, flags);
}
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index f9810d2a267c..9f18a4af9c13 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -255,6 +255,13 @@ static int make_secure_pte(pte_t *ptep, unsigned long addr,
*/
static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
{
+ /*
+ * The misc feature indicates, among other things, that importing a
+ * shared page from a different protected VM will automatically also
+ * transfer its ownership.
+ */
+ if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications))
+ return false;
if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
return false;
return atomic_read(&mm->context.protected_count) > 1;
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 5ea3830af0cc..cbf9c1b0beda 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -17,6 +17,8 @@
/* Handle ro_after_init data on our own. */
#define RO_AFTER_INIT_DATA
+#define RUNTIME_DISCARD_EXIT
+
#define EMITS_PT_NOTE
#include <asm-generic/vmlinux.lds.h>
@@ -79,6 +81,7 @@ SECTIONS
_end_amode31_refs = .;
}
+ . = ALIGN(PAGE_SIZE);
_edata = .; /* End of data section */
/* will be freed after init */
@@ -193,6 +196,7 @@ SECTIONS
BSS_SECTION(PAGE_SIZE, 4 * PAGE_SIZE, PAGE_SIZE)
+ . = ALIGN(PAGE_SIZE);
_end = . ;
/*
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 88112065d941..0ee02dae14b2 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -217,7 +217,7 @@ static int handle_itdb(struct kvm_vcpu *vcpu)
return 0;
if (current->thread.per_flags & PER_FLAG_NO_TE)
return 0;
- itdb = (struct kvm_s390_itdb *)vcpu->arch.sie_block->itdba;
+ itdb = phys_to_virt(vcpu->arch.sie_block->itdba);
rc = write_guest_lc(vcpu, __LC_PGM_TDB, itdb, sizeof(*itdb));
if (rc)
return rc;
@@ -409,8 +409,7 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
out:
if (!cc) {
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
- memcpy((void *)(sida_origin(vcpu->arch.sie_block)),
- sctns, PAGE_SIZE);
+ memcpy(sida_addr(vcpu->arch.sie_block), sctns, PAGE_SIZE);
} else {
r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE);
if (r) {
@@ -464,7 +463,7 @@ static int handle_operexc(struct kvm_vcpu *vcpu)
static int handle_pv_spx(struct kvm_vcpu *vcpu)
{
- u32 pref = *(u32 *)vcpu->arch.sie_block->sidad;
+ u32 pref = *(u32 *)sida_addr(vcpu->arch.sie_block);
kvm_s390_set_prefix(vcpu, pref);
trace_kvm_s390_handle_prefix(vcpu, 1, pref);
@@ -497,7 +496,7 @@ static int handle_pv_sclp(struct kvm_vcpu *vcpu)
static int handle_pv_uvc(struct kvm_vcpu *vcpu)
{
- struct uv_cb_share *guest_uvcb = (void *)vcpu->arch.sie_block->sidad;
+ struct uv_cb_share *guest_uvcb = sida_addr(vcpu->arch.sie_block);
struct uv_cb_cts uvcb = {
.header.cmd = UVC_CMD_UNPIN_PAGE_SHARED,
.header.len = sizeof(uvcb),
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index ab569faf0df2..ab26aa53ee37 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -83,8 +83,9 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
struct esca_block *sca = vcpu->kvm->arch.sca;
union esca_sigp_ctrl *sigp_ctrl =
&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
- union esca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl;
+ union esca_sigp_ctrl new_val = {0}, old_val;
+ old_val = READ_ONCE(*sigp_ctrl);
new_val.scn = src_id;
new_val.c = 1;
old_val.c = 0;
@@ -95,8 +96,9 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
struct bsca_block *sca = vcpu->kvm->arch.sca;
union bsca_sigp_ctrl *sigp_ctrl =
&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
- union bsca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl;
+ union bsca_sigp_ctrl new_val = {0}, old_val;
+ old_val = READ_ONCE(*sigp_ctrl);
new_val.scn = src_id;
new_val.c = 1;
old_val.c = 0;
@@ -126,16 +128,18 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
struct esca_block *sca = vcpu->kvm->arch.sca;
union esca_sigp_ctrl *sigp_ctrl =
&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
- union esca_sigp_ctrl old = *sigp_ctrl;
+ union esca_sigp_ctrl old;
+ old = READ_ONCE(*sigp_ctrl);
expect = old.value;
rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
} else {
struct bsca_block *sca = vcpu->kvm->arch.sca;
union bsca_sigp_ctrl *sigp_ctrl =
&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
- union bsca_sigp_ctrl old = *sigp_ctrl;
+ union bsca_sigp_ctrl old;
+ old = READ_ONCE(*sigp_ctrl);
expect = old.value;
rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
}
@@ -314,11 +318,6 @@ static inline u8 gisa_get_ipm(struct kvm_s390_gisa *gisa)
return READ_ONCE(gisa->ipm);
}
-static inline void gisa_clear_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
-{
- clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
-}
-
static inline int gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
{
return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
deleted file mode 100644
index 484608c71dd0..000000000000
--- a/arch/s390/kvm/irq.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * s390 irqchip routines
- *
- * Copyright IBM Corp. 2014
- *
- * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
- */
-#ifndef __KVM_IRQ_H
-#define __KVM_IRQ_H
-
-#include <linux/kvm_host.h>
-
-static inline int irqchip_in_kernel(struct kvm *kvm)
-{
- return 1;
-}
-
-#endif
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index bc491a73815c..e4890e04b210 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -210,6 +210,14 @@ module_param(diag9c_forwarding_hz, uint, 0644);
MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
/*
+ * allow asynchronous deinit for protected guests; enable by default since
+ * the feature is opt-in anyway
+ */
+static int async_destroy = 1;
+module_param(async_destroy, int, 0444);
+MODULE_PARM_DESC(async_destroy, "Asynchronous destroy for protected guests");
+
+/*
* For now we handle at most 16 double words as this is what the s390 base
* kernel handles and stores in the prefix page. If we ever need to go beyond
* this, this requires changes to code, but the external uapi can stay.
@@ -616,6 +624,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_BPB:
r = test_facility(82);
break;
+ case KVM_CAP_S390_PROTECTED_ASYNC_DISABLE:
+ r = async_destroy && is_prot_virt_host();
+ break;
case KVM_CAP_S390_PROTECTED:
r = is_prot_virt_host();
break;
@@ -2519,9 +2530,13 @@ static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
{
+ const bool need_lock = (cmd->cmd != KVM_PV_ASYNC_CLEANUP_PERFORM);
+ void __user *argp = (void __user *)cmd->data;
int r = 0;
u16 dummy;
- void __user *argp = (void __user *)cmd->data;
+
+ if (need_lock)
+ mutex_lock(&kvm->lock);
switch (cmd->cmd) {
case KVM_PV_ENABLE: {
@@ -2555,6 +2570,31 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
break;
}
+ case KVM_PV_ASYNC_CLEANUP_PREPARE:
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm) || !async_destroy)
+ break;
+
+ r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
+ /*
+ * If a CPU could not be destroyed, destroy VM will also fail.
+ * There is no point in trying to destroy it. Instead return
+ * the rc and rrc from the first CPU that failed destroying.
+ */
+ if (r)
+ break;
+ r = kvm_s390_pv_set_aside(kvm, &cmd->rc, &cmd->rrc);
+
+ /* no need to block service interrupts any more */
+ clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
+ break;
+ case KVM_PV_ASYNC_CLEANUP_PERFORM:
+ r = -EINVAL;
+ if (!async_destroy)
+ break;
+ /* kvm->lock must not be held; this is asserted inside the function. */
+ r = kvm_s390_pv_deinit_aside_vm(kvm, &cmd->rc, &cmd->rrc);
+ break;
case KVM_PV_DISABLE: {
r = -EINVAL;
if (!kvm_s390_pv_is_protected(kvm))
@@ -2568,7 +2608,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
*/
if (r)
break;
- r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
+ r = kvm_s390_pv_deinit_cleanup_all(kvm, &cmd->rc, &cmd->rrc);
/* no need to block service interrupts any more */
clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
@@ -2718,6 +2758,9 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
default:
r = -ENOTTY;
}
+ if (need_lock)
+ mutex_unlock(&kvm->lock);
+
return r;
}
@@ -2922,9 +2965,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = -EINVAL;
break;
}
- mutex_lock(&kvm->lock);
+ /* must be called without kvm->lock */
r = kvm_s390_handle_pv(kvm, &args);
- mutex_unlock(&kvm->lock);
if (copy_to_user(argp, &args, sizeof(args))) {
r = -EFAULT;
break;
@@ -3243,6 +3285,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_s390_vsie_init(kvm);
if (use_gisa)
kvm_s390_gisa_init(kvm);
+ INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup);
+ kvm->arch.pv.set_aside = NULL;
KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
return 0;
@@ -3287,11 +3331,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
/*
* We are already at the end of life and kvm->lock is not taken.
* This is ok as the file descriptor is closed by now and nobody
- * can mess with the pv state. To avoid lockdep_assert_held from
- * complaining we do not use kvm_s390_pv_is_protected.
+ * can mess with the pv state.
*/
- if (kvm_s390_pv_get_handle(kvm))
- kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
+ kvm_s390_pv_deinit_cleanup_all(kvm, &rc, &rrc);
/*
* Remove the mmu notifier only when the whole KVM VM is torn down,
* and only if one was registered to begin with. If the VM is
@@ -3344,28 +3386,30 @@ static void sca_del_vcpu(struct kvm_vcpu *vcpu)
static void sca_add_vcpu(struct kvm_vcpu *vcpu)
{
if (!kvm_s390_use_sca_entries()) {
- struct bsca_block *sca = vcpu->kvm->arch.sca;
+ phys_addr_t sca_phys = virt_to_phys(vcpu->kvm->arch.sca);
/* we still need the basic sca for the ipte control */
- vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
- vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
+ vcpu->arch.sie_block->scaoh = sca_phys >> 32;
+ vcpu->arch.sie_block->scaol = sca_phys;
return;
}
read_lock(&vcpu->kvm->arch.sca_lock);
if (vcpu->kvm->arch.use_esca) {
struct esca_block *sca = vcpu->kvm->arch.sca;
+ phys_addr_t sca_phys = virt_to_phys(sca);
- sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
- vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
- vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
+ sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
+ vcpu->arch.sie_block->scaoh = sca_phys >> 32;
+ vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK;
vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
} else {
struct bsca_block *sca = vcpu->kvm->arch.sca;
+ phys_addr_t sca_phys = virt_to_phys(sca);
- sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
- vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
- vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
+ sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
+ vcpu->arch.sie_block->scaoh = sca_phys >> 32;
+ vcpu->arch.sie_block->scaol = sca_phys;
set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
}
read_unlock(&vcpu->kvm->arch.sca_lock);
@@ -3396,6 +3440,7 @@ static int sca_switch_to_extended(struct kvm *kvm)
struct kvm_vcpu *vcpu;
unsigned long vcpu_idx;
u32 scaol, scaoh;
+ phys_addr_t new_sca_phys;
if (kvm->arch.use_esca)
return 0;
@@ -3404,8 +3449,9 @@ static int sca_switch_to_extended(struct kvm *kvm)
if (!new_sca)
return -ENOMEM;
- scaoh = (u32)((u64)(new_sca) >> 32);
- scaol = (u32)(u64)(new_sca) & ~0x3fU;
+ new_sca_phys = virt_to_phys(new_sca);
+ scaoh = new_sca_phys >> 32;
+ scaol = new_sca_phys & ESCA_SCAOL_MASK;
kvm_s390_vcpu_block_all(kvm);
write_lock(&kvm->arch.sca_lock);
@@ -3625,15 +3671,18 @@ static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
{
- free_page(vcpu->arch.sie_block->cbrlo);
+ free_page((unsigned long)phys_to_virt(vcpu->arch.sie_block->cbrlo));
vcpu->arch.sie_block->cbrlo = 0;
}
int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
{
- vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
- if (!vcpu->arch.sie_block->cbrlo)
+ void *cbrlo_page = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
+
+ if (!cbrlo_page)
return -ENOMEM;
+
+ vcpu->arch.sie_block->cbrlo = virt_to_phys(cbrlo_page);
return 0;
}
@@ -3643,7 +3692,7 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->ibc = model->ibc;
if (test_kvm_facility(vcpu->kvm, 7))
- vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
+ vcpu->arch.sie_block->fac = virt_to_phys(model->fac_list);
}
static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
@@ -3700,9 +3749,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
}
- vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
- | SDNXC;
- vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
+ vcpu->arch.sie_block->sdnxo = virt_to_phys(&vcpu->run->s.regs.sdnx) | SDNXC;
+ vcpu->arch.sie_block->riccbd = virt_to_phys(&vcpu->run->s.regs.riccb);
if (sclp.has_kss)
kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
@@ -3752,7 +3800,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
return -ENOMEM;
vcpu->arch.sie_block = &sie_page->sie_block;
- vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
+ vcpu->arch.sie_block->itdba = virt_to_phys(&sie_page->itdb);
/* the real guest size will always be smaller than msl */
vcpu->arch.sie_block->mso = 0;
@@ -5169,6 +5217,7 @@ static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
struct kvm_s390_mem_op *mop)
{
void __user *uaddr = (void __user *)mop->buf;
+ void *sida_addr;
int r = 0;
if (mop->flags || !mop->size)
@@ -5180,16 +5229,16 @@ static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
if (!kvm_s390_pv_cpu_is_protected(vcpu))
return -EINVAL;
+ sida_addr = (char *)sida_addr(vcpu->arch.sie_block) + mop->sida_offset;
+
switch (mop->op) {
case KVM_S390_MEMOP_SIDA_READ:
- if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
- mop->sida_offset), mop->size))
+ if (copy_to_user(uaddr, sida_addr, mop->size))
r = -EFAULT;
break;
case KVM_S390_MEMOP_SIDA_WRITE:
- if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
- mop->sida_offset), uaddr, mop->size))
+ if (copy_from_user(sida_addr, uaddr, mop->size))
r = -EFAULT;
break;
}
@@ -5567,6 +5616,11 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
}
+bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
+{
+ return true;
+}
+
/* Section: memory related */
int kvm_arch_prepare_memory_region(struct kvm *kvm,
const struct kvm_memory_slot *old,
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 4755492dfabc..d48588c207d8 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -23,7 +23,8 @@
/* Transactional Memory Execution related macros */
#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & ECB_TE))
#define TDB_FORMAT1 1
-#define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
+#define IS_ITDB_VALID(vcpu) \
+ ((*(char *)phys_to_virt((vcpu)->arch.sie_block->itdba) == TDB_FORMAT1))
extern debug_info_t *kvm_s390_dbf;
extern debug_info_t *kvm_s390_dbf_uv;
@@ -233,7 +234,7 @@ static inline unsigned long kvm_s390_get_gfn_end(struct kvm_memslots *slots)
static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm)
{
- u32 gd = (u32)(u64)kvm->arch.gisa_int.origin;
+ u32 gd = virt_to_phys(kvm->arch.gisa_int.origin);
if (gd && sclp.has_gisaf)
gd |= GISA_FORMAT1;
@@ -243,6 +244,9 @@ static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm)
/* implemented in pv.c */
int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
+int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc);
+int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
+int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc);
int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
index ded1af2ddae9..ec51e810e381 100644
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -434,6 +434,7 @@ static void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm)
{
struct zpci_dev *zdev = opaque;
+ u8 status;
int rc;
if (!zdev)
@@ -486,7 +487,7 @@ static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm)
/* Re-register the IOMMU that was already created */
rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table));
+ virt_to_phys(zdev->dma_table), &status);
if (rc)
goto clear_gisa;
@@ -516,6 +517,7 @@ static void kvm_s390_pci_unregister_kvm(void *opaque)
{
struct zpci_dev *zdev = opaque;
struct kvm *kvm;
+ u8 status;
if (!zdev)
return;
@@ -554,7 +556,7 @@ static void kvm_s390_pci_unregister_kvm(void *opaque)
/* Re-register the IOMMU that was already created */
zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table));
+ virt_to_phys(zdev->dma_table), &status);
out:
spin_lock(&kvm->arch.kzdev_list_lock);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 3335fa09b6f1..9f8a192bd750 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -924,8 +924,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
return -EREMOTE;
}
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
- memcpy((void *)sida_origin(vcpu->arch.sie_block), (void *)mem,
- PAGE_SIZE);
+ memcpy(sida_addr(vcpu->arch.sie_block), (void *)mem, PAGE_SIZE);
rc = 0;
} else {
rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE);
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 7cb7799a0acb..e032ebbf51b9 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -18,6 +18,29 @@
#include <linux/mmu_notifier.h>
#include "kvm-s390.h"
+/**
+ * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to
+ * be destroyed
+ *
+ * @list: list head for the list of leftover VMs
+ * @old_gmap_table: the gmap table of the leftover protected VM
+ * @handle: the handle of the leftover protected VM
+ * @stor_var: pointer to the variable storage of the leftover protected VM
+ * @stor_base: address of the base storage of the leftover protected VM
+ *
+ * Represents a protected VM that is still registered with the Ultravisor,
+ * but which does not correspond any longer to an active KVM VM. It should
+ * be destroyed at some point later, either asynchronously or when the
+ * process terminates.
+ */
+struct pv_vm_to_be_destroyed {
+ struct list_head list;
+ unsigned long old_gmap_table;
+ u64 handle;
+ void *stor_var;
+ unsigned long stor_base;
+};
+
static void kvm_s390_clear_pv_state(struct kvm *kvm)
{
kvm->arch.pv.handle = 0;
@@ -44,7 +67,7 @@ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
free_pages(vcpu->arch.pv.stor_base,
get_order(uv_info.guest_cpu_stor_len));
- free_page(sida_origin(vcpu->arch.sie_block));
+ free_page((unsigned long)sida_addr(vcpu->arch.sie_block));
vcpu->arch.sie_block->pv_handle_cpu = 0;
vcpu->arch.sie_block->pv_handle_config = 0;
memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv));
@@ -66,6 +89,7 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
.header.cmd = UVC_CMD_CREATE_SEC_CPU,
.header.len = sizeof(uvcb),
};
+ void *sida_addr;
int cc;
if (kvm_s390_pv_cpu_get_handle(vcpu))
@@ -79,16 +103,17 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
/* Input */
uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm);
uvcb.num = vcpu->arch.sie_block->icpua;
- uvcb.state_origin = (u64)vcpu->arch.sie_block;
- uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base;
+ uvcb.state_origin = virt_to_phys(vcpu->arch.sie_block);
+ uvcb.stor_origin = virt_to_phys((void *)vcpu->arch.pv.stor_base);
/* Alloc Secure Instruction Data Area Designation */
- vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
- if (!vcpu->arch.sie_block->sidad) {
+ sida_addr = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ if (!sida_addr) {
free_pages(vcpu->arch.pv.stor_base,
get_order(uv_info.guest_cpu_stor_len));
return -ENOMEM;
}
+ vcpu->arch.sie_block->sidad = virt_to_phys(sida_addr);
cc = uv_call(0, (u64)&uvcb);
*rc = uvcb.header.rc;
@@ -159,23 +184,192 @@ out_err:
return -ENOMEM;
}
-/* this should not fail, but if it does, we must not free the donated memory */
-int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+/**
+ * kvm_s390_pv_dispose_one_leftover - Clean up one leftover protected VM.
+ * @kvm: the KVM that was associated with this leftover protected VM
+ * @leftover: details about the leftover protected VM that needs a clean up
+ * @rc: the RC code of the Destroy Secure Configuration UVC
+ * @rrc: the RRC code of the Destroy Secure Configuration UVC
+ *
+ * Destroy one leftover protected VM.
+ * On success, kvm->mm->context.protected_count will be decremented atomically
+ * and all other resources used by the VM will be freed.
+ *
+ * Return: 0 in case of success, otherwise 1
+ */
+static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm,
+ struct pv_vm_to_be_destroyed *leftover,
+ u16 *rc, u16 *rrc)
{
int cc;
- cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
- UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
+ /* It used the destroy-fast UVC, nothing left to do here */
+ if (!leftover->handle)
+ goto done_fast;
+ cc = uv_cmd_nodata(leftover->handle, UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY LEFTOVER VM: rc %x rrc %x", *rc, *rrc);
+ WARN_ONCE(cc, "protvirt destroy leftover vm failed rc %x rrc %x", *rc, *rrc);
+ if (cc)
+ return cc;
+ /*
+ * Intentionally leak unusable memory. If the UVC fails, the memory
+ * used for the VM and its metadata is permanently unusable.
+ * This can only happen in case of a serious KVM or hardware bug; it
+ * is not expected to happen in normal operation.
+ */
+ free_pages(leftover->stor_base, get_order(uv_info.guest_base_stor_len));
+ free_pages(leftover->old_gmap_table, CRST_ALLOC_ORDER);
+ vfree(leftover->stor_var);
+done_fast:
+ atomic_dec(&kvm->mm->context.protected_count);
+ return 0;
+}
+
+/**
+ * kvm_s390_destroy_lower_2g - Destroy the first 2GB of protected guest memory.
+ * @kvm: the VM whose memory is to be cleared.
+ *
+ * Destroy the first 2GB of guest memory, to avoid prefix issues after reboot.
+ * The CPUs of the protected VM need to be destroyed beforehand.
+ */
+static void kvm_s390_destroy_lower_2g(struct kvm *kvm)
+{
+ const unsigned long pages_2g = SZ_2G / PAGE_SIZE;
+ struct kvm_memory_slot *slot;
+ unsigned long len;
+ int srcu_idx;
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+
+ /* Take the memslot containing guest absolute address 0 */
+ slot = gfn_to_memslot(kvm, 0);
+ /* Clear all slots or parts thereof that are below 2GB */
+ while (slot && slot->base_gfn < pages_2g) {
+ len = min_t(u64, slot->npages, pages_2g - slot->base_gfn) * PAGE_SIZE;
+ s390_uv_destroy_range(kvm->mm, slot->userspace_addr, slot->userspace_addr + len);
+ /* Take the next memslot */
+ slot = gfn_to_memslot(kvm, slot->base_gfn + slot->npages);
+ }
+
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+}
+
+static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+ struct uv_cb_destroy_fast uvcb = {
+ .header.cmd = UVC_CMD_DESTROY_SEC_CONF_FAST,
+ .header.len = sizeof(uvcb),
+ .handle = kvm_s390_pv_get_handle(kvm),
+ };
+ int cc;
+
+ cc = uv_call_sched(0, (u64)&uvcb);
+ if (rc)
+ *rc = uvcb.header.rc;
+ if (rrc)
+ *rrc = uvcb.header.rrc;
WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x",
+ uvcb.header.rc, uvcb.header.rrc);
+ WARN_ONCE(cc, "protvirt destroy vm fast failed handle %llx rc %x rrc %x",
+ kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc);
+ /* Inteded memory leak on "impossible" error */
+ if (!cc)
+ kvm_s390_pv_dealloc_vm(kvm);
+ return cc ? -EIO : 0;
+}
+
+static inline bool is_destroy_fast_available(void)
+{
+ return test_bit_inv(BIT_UVC_CMD_DESTROY_SEC_CONF_FAST, uv_info.inst_calls_list);
+}
+
+/**
+ * kvm_s390_pv_set_aside - Set aside a protected VM for later teardown.
+ * @kvm: the VM
+ * @rc: return value for the RC field of the UVCB
+ * @rrc: return value for the RRC field of the UVCB
+ *
+ * Set aside the protected VM for a subsequent teardown. The VM will be able
+ * to continue immediately as a non-secure VM, and the information needed to
+ * properly tear down the protected VM is set aside. If another protected VM
+ * was already set aside without starting its teardown, this function will
+ * fail.
+ * The CPUs of the protected VM need to be destroyed beforehand.
+ *
+ * Context: kvm->lock needs to be held
+ *
+ * Return: 0 in case of success, -EINVAL if another protected VM was already set
+ * aside, -ENOMEM if the system ran out of memory.
+ */
+int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+ struct pv_vm_to_be_destroyed *priv;
+ int res = 0;
+
+ lockdep_assert_held(&kvm->lock);
/*
- * if the mm still has a mapping, make all its pages accessible
- * before destroying the guest
+ * If another protected VM was already prepared for teardown, refuse.
+ * A normal deinitialization has to be performed instead.
*/
- if (mmget_not_zero(kvm->mm)) {
- s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE);
- mmput(kvm->mm);
+ if (kvm->arch.pv.set_aside)
+ return -EINVAL;
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ if (is_destroy_fast_available()) {
+ res = kvm_s390_pv_deinit_vm_fast(kvm, rc, rrc);
+ } else {
+ priv->stor_var = kvm->arch.pv.stor_var;
+ priv->stor_base = kvm->arch.pv.stor_base;
+ priv->handle = kvm_s390_pv_get_handle(kvm);
+ priv->old_gmap_table = (unsigned long)kvm->arch.gmap->table;
+ WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
+ if (s390_replace_asce(kvm->arch.gmap))
+ res = -ENOMEM;
}
+ if (res) {
+ kfree(priv);
+ return res;
+ }
+
+ kvm_s390_destroy_lower_2g(kvm);
+ kvm_s390_clear_pv_state(kvm);
+ kvm->arch.pv.set_aside = priv;
+
+ *rc = UVC_RC_EXECUTED;
+ *rrc = 42;
+ return 0;
+}
+
+/**
+ * kvm_s390_pv_deinit_vm - Deinitialize the current protected VM
+ * @kvm: the KVM whose protected VM needs to be deinitialized
+ * @rc: the RC code of the UVC
+ * @rrc: the RRC code of the UVC
+ *
+ * Deinitialize the current protected VM. This function will destroy and
+ * cleanup the current protected VM, but it will not cleanup the guest
+ * memory. This function should only be called when the protected VM has
+ * just been created and therefore does not have any guest memory, or when
+ * the caller cleans up the guest memory separately.
+ *
+ * This function should not fail, but if it does, the donated memory must
+ * not be freed.
+ *
+ * Context: kvm->lock needs to be held
+ *
+ * Return: 0 in case of success, otherwise -EIO
+ */
+int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+ int cc;
+
+ cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+ UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
+ WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
if (!cc) {
atomic_dec(&kvm->mm->context.protected_count);
kvm_s390_pv_dealloc_vm(kvm);
@@ -189,11 +383,137 @@ int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
return cc ? -EIO : 0;
}
+/**
+ * kvm_s390_pv_deinit_cleanup_all - Clean up all protected VMs associated
+ * with a specific KVM.
+ * @kvm: the KVM to be cleaned up
+ * @rc: the RC code of the first failing UVC
+ * @rrc: the RRC code of the first failing UVC
+ *
+ * This function will clean up all protected VMs associated with a KVM.
+ * This includes the active one, the one prepared for deinitialization with
+ * kvm_s390_pv_set_aside, and any still pending in the need_cleanup list.
+ *
+ * Context: kvm->lock needs to be held unless being called from
+ * kvm_arch_destroy_vm.
+ *
+ * Return: 0 if all VMs are successfully cleaned up, otherwise -EIO
+ */
+int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+ struct pv_vm_to_be_destroyed *cur;
+ bool need_zap = false;
+ u16 _rc, _rrc;
+ int cc = 0;
+
+ /* Make sure the counter does not reach 0 before calling s390_uv_destroy_range */
+ atomic_inc(&kvm->mm->context.protected_count);
+
+ *rc = 1;
+ /* If the current VM is protected, destroy it */
+ if (kvm_s390_pv_get_handle(kvm)) {
+ cc = kvm_s390_pv_deinit_vm(kvm, rc, rrc);
+ need_zap = true;
+ }
+
+ /* If a previous protected VM was set aside, put it in the need_cleanup list */
+ if (kvm->arch.pv.set_aside) {
+ list_add(kvm->arch.pv.set_aside, &kvm->arch.pv.need_cleanup);
+ kvm->arch.pv.set_aside = NULL;
+ }
+
+ /* Cleanup all protected VMs in the need_cleanup list */
+ while (!list_empty(&kvm->arch.pv.need_cleanup)) {
+ cur = list_first_entry(&kvm->arch.pv.need_cleanup, typeof(*cur), list);
+ need_zap = true;
+ if (kvm_s390_pv_dispose_one_leftover(kvm, cur, &_rc, &_rrc)) {
+ cc = 1;
+ /*
+ * Only return the first error rc and rrc, so make
+ * sure it is not overwritten. All destroys will
+ * additionally be reported via KVM_UV_EVENT().
+ */
+ if (*rc == UVC_RC_EXECUTED) {
+ *rc = _rc;
+ *rrc = _rrc;
+ }
+ }
+ list_del(&cur->list);
+ kfree(cur);
+ }
+
+ /*
+ * If the mm still has a mapping, try to mark all its pages as
+ * accessible. The counter should not reach zero before this
+ * cleanup has been performed.
+ */
+ if (need_zap && mmget_not_zero(kvm->mm)) {
+ s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE);
+ mmput(kvm->mm);
+ }
+
+ /* Now the counter can safely reach 0 */
+ atomic_dec(&kvm->mm->context.protected_count);
+ return cc ? -EIO : 0;
+}
+
+/**
+ * kvm_s390_pv_deinit_aside_vm - Teardown a previously set aside protected VM.
+ * @kvm: the VM previously associated with the protected VM
+ * @rc: return value for the RC field of the UVCB
+ * @rrc: return value for the RRC field of the UVCB
+ *
+ * Tear down the protected VM that had been previously prepared for teardown
+ * using kvm_s390_pv_set_aside_vm. Ideally this should be called by
+ * userspace asynchronously from a separate thread.
+ *
+ * Context: kvm->lock must not be held.
+ *
+ * Return: 0 in case of success, -EINVAL if no protected VM had been
+ * prepared for asynchronous teardowm, -EIO in case of other errors.
+ */
+int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+ struct pv_vm_to_be_destroyed *p;
+ int ret = 0;
+
+ lockdep_assert_not_held(&kvm->lock);
+ mutex_lock(&kvm->lock);
+ p = kvm->arch.pv.set_aside;
+ kvm->arch.pv.set_aside = NULL;
+ mutex_unlock(&kvm->lock);
+ if (!p)
+ return -EINVAL;
+
+ /* When a fatal signal is received, stop immediately */
+ if (s390_uv_destroy_range_interruptible(kvm->mm, 0, TASK_SIZE_MAX))
+ goto done;
+ if (kvm_s390_pv_dispose_one_leftover(kvm, p, rc, rrc))
+ ret = -EIO;
+ kfree(p);
+ p = NULL;
+done:
+ /*
+ * p is not NULL if we aborted because of a fatal signal, in which
+ * case queue the leftover for later cleanup.
+ */
+ if (p) {
+ mutex_lock(&kvm->lock);
+ list_add(&p->list, &kvm->arch.pv.need_cleanup);
+ mutex_unlock(&kvm->lock);
+ /* Did not finish, but pretend things went well */
+ *rc = UVC_RC_EXECUTED;
+ *rrc = 42;
+ }
+ return ret;
+}
+
static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription,
struct mm_struct *mm)
{
struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier);
u16 dummy;
+ int r;
/*
* No locking is needed since this is the last thread of the last user of this
@@ -202,7 +522,9 @@ static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription,
* unregistered. This means that if this notifier runs, then the
* struct kvm is still valid.
*/
- kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
+ r = kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
+ if (!r && is_destroy_fast_available() && kvm_s390_pv_get_handle(kvm))
+ kvm_s390_pv_deinit_vm_fast(kvm, &dummy, &dummy);
}
static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = {
@@ -226,8 +548,9 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */
uvcb.guest_stor_len = kvm->arch.pv.guest_len;
uvcb.guest_asce = kvm->arch.gmap->asce;
- uvcb.guest_sca = (unsigned long)kvm->arch.sca;
- uvcb.conf_base_stor_origin = (u64)kvm->arch.pv.stor_base;
+ uvcb.guest_sca = virt_to_phys(kvm->arch.sca);
+ uvcb.conf_base_stor_origin =
+ virt_to_phys((void *)kvm->arch.pv.stor_base);
uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
cc = uv_call_sched(0, (u64)&uvcb);
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index ace2541ababd..b6a0219e470a 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -656,7 +656,7 @@ static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa)
page = gfn_to_page(kvm, gpa_to_gfn(gpa));
if (is_error_page(page))
return -EINVAL;
- *hpa = (hpa_t) page_to_virt(page) + (gpa & ~PAGE_MASK);
+ *hpa = (hpa_t)page_to_phys(page) + (gpa & ~PAGE_MASK);
return 0;
}
@@ -871,7 +871,7 @@ static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
WARN_ON_ONCE(rc);
return 1;
}
- vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa;
+ vsie_page->scb_o = phys_to_virt(hpa);
return 0;
}
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 02d15c8dc92e..74e1d873dce0 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -72,7 +72,7 @@ static struct gmap *gmap_alloc(unsigned long limit)
goto out_free;
page->index = 0;
list_add(&page->lru, &gmap->crst_list);
- table = (unsigned long *) page_to_phys(page);
+ table = page_to_virt(page);
crst_table_init(table, etype);
gmap->table = table;
gmap->asce = atype | _ASCE_TABLE_LENGTH |
@@ -311,12 +311,12 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
if (!page)
return -ENOMEM;
- new = (unsigned long *) page_to_phys(page);
+ new = page_to_virt(page);
crst_table_init(new, init);
spin_lock(&gmap->guest_table_lock);
if (*table & _REGION_ENTRY_INVALID) {
list_add(&page->lru, &gmap->crst_list);
- *table = (unsigned long) new | _REGION_ENTRY_LENGTH |
+ *table = __pa(new) | _REGION_ENTRY_LENGTH |
(*table & _REGION_ENTRY_TYPE_MASK);
page->index = gaddr;
page = NULL;
@@ -336,12 +336,11 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
static unsigned long __gmap_segment_gaddr(unsigned long *entry)
{
struct page *page;
- unsigned long offset, mask;
+ unsigned long offset;
offset = (unsigned long) entry / sizeof(unsigned long);
offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
- mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
- page = virt_to_page((void *)((unsigned long) entry & mask));
+ page = pmd_pgtable_page((pmd_t *) entry);
return page->index + offset;
}
@@ -557,7 +556,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
gaddr & _REGION1_MASK))
return -ENOMEM;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = __va(*table & _REGION_ENTRY_ORIGIN);
}
if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
@@ -565,7 +564,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
gaddr & _REGION2_MASK))
return -ENOMEM;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = __va(*table & _REGION_ENTRY_ORIGIN);
}
if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
@@ -573,7 +572,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
gaddr & _REGION3_MASK))
return -ENOMEM;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = __va(*table & _REGION_ENTRY_ORIGIN);
}
table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
/* Walk the parent mm page table */
@@ -813,7 +812,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
break;
if (*table & _REGION_ENTRY_INVALID)
return NULL;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = __va(*table & _REGION_ENTRY_ORIGIN);
fallthrough;
case _ASCE_TYPE_REGION2:
table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
@@ -821,7 +820,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
break;
if (*table & _REGION_ENTRY_INVALID)
return NULL;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = __va(*table & _REGION_ENTRY_ORIGIN);
fallthrough;
case _ASCE_TYPE_REGION3:
table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
@@ -829,7 +828,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
break;
if (*table & _REGION_ENTRY_INVALID)
return NULL;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = __va(*table & _REGION_ENTRY_ORIGIN);
fallthrough;
case _ASCE_TYPE_SEGMENT:
table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
@@ -837,7 +836,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
break;
if (*table & _REGION_ENTRY_INVALID)
return NULL;
- table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
+ table = __va(*table & _SEGMENT_ENTRY_ORIGIN);
table += (gaddr & _PAGE_INDEX) >> _PAGE_SHIFT;
}
return table;
@@ -1150,7 +1149,7 @@ int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val)
if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) {
address = pte_val(pte) & PAGE_MASK;
address += gaddr & ~PAGE_MASK;
- *val = *(unsigned long *) address;
+ *val = *(unsigned long *)__va(address);
set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_YOUNG)));
/* Do *NOT* clear the _PAGE_INVALID bit! */
rc = 0;
@@ -1335,7 +1334,8 @@ static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr,
*/
static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
{
- unsigned long sto, *ste, *pgt;
+ unsigned long *ste;
+ phys_addr_t sto, pgt;
struct page *page;
BUG_ON(!gmap_is_shadow(sg));
@@ -1343,13 +1343,13 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN))
return;
gmap_call_notifier(sg, raddr, raddr + _SEGMENT_SIZE - 1);
- sto = (unsigned long) (ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT));
+ sto = __pa(ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT));
gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr);
- pgt = (unsigned long *)(*ste & _SEGMENT_ENTRY_ORIGIN);
+ pgt = *ste & _SEGMENT_ENTRY_ORIGIN;
*ste = _SEGMENT_ENTRY_EMPTY;
- __gmap_unshadow_pgt(sg, raddr, pgt);
+ __gmap_unshadow_pgt(sg, raddr, __va(pgt));
/* Free page table */
- page = pfn_to_page(__pa(pgt) >> PAGE_SHIFT);
+ page = phys_to_page(pgt);
list_del(&page->lru);
page_table_free_pgste(page);
}
@@ -1365,19 +1365,19 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
unsigned long *sgt)
{
- unsigned long *pgt;
struct page *page;
+ phys_addr_t pgt;
int i;
BUG_ON(!gmap_is_shadow(sg));
for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) {
if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN))
continue;
- pgt = (unsigned long *)(sgt[i] & _REGION_ENTRY_ORIGIN);
+ pgt = sgt[i] & _REGION_ENTRY_ORIGIN;
sgt[i] = _SEGMENT_ENTRY_EMPTY;
- __gmap_unshadow_pgt(sg, raddr, pgt);
+ __gmap_unshadow_pgt(sg, raddr, __va(pgt));
/* Free page table */
- page = pfn_to_page(__pa(pgt) >> PAGE_SHIFT);
+ page = phys_to_page(pgt);
list_del(&page->lru);
page_table_free_pgste(page);
}
@@ -1392,7 +1392,8 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
*/
static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
{
- unsigned long r3o, *r3e, *sgt;
+ unsigned long r3o, *r3e;
+ phys_addr_t sgt;
struct page *page;
BUG_ON(!gmap_is_shadow(sg));
@@ -1401,12 +1402,12 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
return;
gmap_call_notifier(sg, raddr, raddr + _REGION3_SIZE - 1);
r3o = (unsigned long) (r3e - ((raddr & _REGION3_INDEX) >> _REGION3_SHIFT));
- gmap_idte_one(r3o | _ASCE_TYPE_REGION3, raddr);
- sgt = (unsigned long *)(*r3e & _REGION_ENTRY_ORIGIN);
+ gmap_idte_one(__pa(r3o) | _ASCE_TYPE_REGION3, raddr);
+ sgt = *r3e & _REGION_ENTRY_ORIGIN;
*r3e = _REGION3_ENTRY_EMPTY;
- __gmap_unshadow_sgt(sg, raddr, sgt);
+ __gmap_unshadow_sgt(sg, raddr, __va(sgt));
/* Free segment table */
- page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT);
+ page = phys_to_page(sgt);
list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
@@ -1422,19 +1423,19 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
unsigned long *r3t)
{
- unsigned long *sgt;
struct page *page;
+ phys_addr_t sgt;
int i;
BUG_ON(!gmap_is_shadow(sg));
for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) {
if (!(r3t[i] & _REGION_ENTRY_ORIGIN))
continue;
- sgt = (unsigned long *)(r3t[i] & _REGION_ENTRY_ORIGIN);
+ sgt = r3t[i] & _REGION_ENTRY_ORIGIN;
r3t[i] = _REGION3_ENTRY_EMPTY;
- __gmap_unshadow_sgt(sg, raddr, sgt);
+ __gmap_unshadow_sgt(sg, raddr, __va(sgt));
/* Free segment table */
- page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT);
+ page = phys_to_page(sgt);
list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
@@ -1449,7 +1450,8 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
*/
static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
{
- unsigned long r2o, *r2e, *r3t;
+ unsigned long r2o, *r2e;
+ phys_addr_t r3t;
struct page *page;
BUG_ON(!gmap_is_shadow(sg));
@@ -1458,12 +1460,12 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
return;
gmap_call_notifier(sg, raddr, raddr + _REGION2_SIZE - 1);
r2o = (unsigned long) (r2e - ((raddr & _REGION2_INDEX) >> _REGION2_SHIFT));
- gmap_idte_one(r2o | _ASCE_TYPE_REGION2, raddr);
- r3t = (unsigned long *)(*r2e & _REGION_ENTRY_ORIGIN);
+ gmap_idte_one(__pa(r2o) | _ASCE_TYPE_REGION2, raddr);
+ r3t = *r2e & _REGION_ENTRY_ORIGIN;
*r2e = _REGION2_ENTRY_EMPTY;
- __gmap_unshadow_r3t(sg, raddr, r3t);
+ __gmap_unshadow_r3t(sg, raddr, __va(r3t));
/* Free region 3 table */
- page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT);
+ page = phys_to_page(r3t);
list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
@@ -1479,7 +1481,7 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
unsigned long *r2t)
{
- unsigned long *r3t;
+ phys_addr_t r3t;
struct page *page;
int i;
@@ -1487,11 +1489,11 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) {
if (!(r2t[i] & _REGION_ENTRY_ORIGIN))
continue;
- r3t = (unsigned long *)(r2t[i] & _REGION_ENTRY_ORIGIN);
+ r3t = r2t[i] & _REGION_ENTRY_ORIGIN;
r2t[i] = _REGION2_ENTRY_EMPTY;
- __gmap_unshadow_r3t(sg, raddr, r3t);
+ __gmap_unshadow_r3t(sg, raddr, __va(r3t));
/* Free region 3 table */
- page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT);
+ page = phys_to_page(r3t);
list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
@@ -1506,8 +1508,9 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
*/
static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
{
- unsigned long r1o, *r1e, *r2t;
+ unsigned long r1o, *r1e;
struct page *page;
+ phys_addr_t r2t;
BUG_ON(!gmap_is_shadow(sg));
r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */
@@ -1515,12 +1518,12 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
return;
gmap_call_notifier(sg, raddr, raddr + _REGION1_SIZE - 1);
r1o = (unsigned long) (r1e - ((raddr & _REGION1_INDEX) >> _REGION1_SHIFT));
- gmap_idte_one(r1o | _ASCE_TYPE_REGION1, raddr);
- r2t = (unsigned long *)(*r1e & _REGION_ENTRY_ORIGIN);
+ gmap_idte_one(__pa(r1o) | _ASCE_TYPE_REGION1, raddr);
+ r2t = *r1e & _REGION_ENTRY_ORIGIN;
*r1e = _REGION1_ENTRY_EMPTY;
- __gmap_unshadow_r2t(sg, raddr, r2t);
+ __gmap_unshadow_r2t(sg, raddr, __va(r2t));
/* Free region 2 table */
- page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT);
+ page = phys_to_page(r2t);
list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
@@ -1536,22 +1539,23 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
unsigned long *r1t)
{
- unsigned long asce, *r2t;
+ unsigned long asce;
struct page *page;
+ phys_addr_t r2t;
int i;
BUG_ON(!gmap_is_shadow(sg));
- asce = (unsigned long) r1t | _ASCE_TYPE_REGION1;
+ asce = __pa(r1t) | _ASCE_TYPE_REGION1;
for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION1_SIZE) {
if (!(r1t[i] & _REGION_ENTRY_ORIGIN))
continue;
- r2t = (unsigned long *)(r1t[i] & _REGION_ENTRY_ORIGIN);
- __gmap_unshadow_r2t(sg, raddr, r2t);
+ r2t = r1t[i] & _REGION_ENTRY_ORIGIN;
+ __gmap_unshadow_r2t(sg, raddr, __va(r2t));
/* Clear entry and flush translation r1t -> r2t */
gmap_idte_one(asce, raddr);
r1t[i] = _REGION1_ENTRY_EMPTY;
/* Free region 2 table */
- page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT);
+ page = phys_to_page(r2t);
list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
@@ -1573,7 +1577,7 @@ static void gmap_unshadow(struct gmap *sg)
sg->removed = 1;
gmap_call_notifier(sg, 0, -1UL);
gmap_flush_tlb(sg);
- table = (unsigned long *)(sg->asce & _ASCE_ORIGIN);
+ table = __va(sg->asce & _ASCE_ORIGIN);
switch (sg->asce & _ASCE_TYPE_MASK) {
case _ASCE_TYPE_REGION1:
__gmap_unshadow_r1t(sg, 0, table);
@@ -1748,7 +1752,8 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
int fake)
{
unsigned long raddr, origin, offset, len;
- unsigned long *s_r2t, *table;
+ unsigned long *table;
+ phys_addr_t s_r2t;
struct page *page;
int rc;
@@ -1760,7 +1765,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
page->index = r2t & _REGION_ENTRY_ORIGIN;
if (fake)
page->index |= GMAP_SHADOW_FAKE_TABLE;
- s_r2t = (unsigned long *) page_to_phys(page);
+ s_r2t = page_to_phys(page);
/* Install shadow region second table */
spin_lock(&sg->guest_table_lock);
table = gmap_table_walk(sg, saddr, 4); /* get region-1 pointer */
@@ -1775,9 +1780,9 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
rc = -EAGAIN; /* Race with shadow */
goto out_free;
}
- crst_table_init(s_r2t, _REGION2_ENTRY_EMPTY);
+ crst_table_init(__va(s_r2t), _REGION2_ENTRY_EMPTY);
/* mark as invalid as long as the parent table is not protected */
- *table = (unsigned long) s_r2t | _REGION_ENTRY_LENGTH |
+ *table = s_r2t | _REGION_ENTRY_LENGTH |
_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID;
if (sg->edat_level >= 1)
*table |= (r2t & _REGION_ENTRY_PROTECT);
@@ -1798,8 +1803,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
spin_lock(&sg->guest_table_lock);
if (!rc) {
table = gmap_table_walk(sg, saddr, 4);
- if (!table || (*table & _REGION_ENTRY_ORIGIN) !=
- (unsigned long) s_r2t)
+ if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r2t)
rc = -EAGAIN; /* Race with unshadow */
else
*table &= ~_REGION_ENTRY_INVALID;
@@ -1832,7 +1836,8 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
int fake)
{
unsigned long raddr, origin, offset, len;
- unsigned long *s_r3t, *table;
+ unsigned long *table;
+ phys_addr_t s_r3t;
struct page *page;
int rc;
@@ -1844,7 +1849,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
page->index = r3t & _REGION_ENTRY_ORIGIN;
if (fake)
page->index |= GMAP_SHADOW_FAKE_TABLE;
- s_r3t = (unsigned long *) page_to_phys(page);
+ s_r3t = page_to_phys(page);
/* Install shadow region second table */
spin_lock(&sg->guest_table_lock);
table = gmap_table_walk(sg, saddr, 3); /* get region-2 pointer */
@@ -1859,9 +1864,9 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
rc = -EAGAIN; /* Race with shadow */
goto out_free;
}
- crst_table_init(s_r3t, _REGION3_ENTRY_EMPTY);
+ crst_table_init(__va(s_r3t), _REGION3_ENTRY_EMPTY);
/* mark as invalid as long as the parent table is not protected */
- *table = (unsigned long) s_r3t | _REGION_ENTRY_LENGTH |
+ *table = s_r3t | _REGION_ENTRY_LENGTH |
_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID;
if (sg->edat_level >= 1)
*table |= (r3t & _REGION_ENTRY_PROTECT);
@@ -1882,8 +1887,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
spin_lock(&sg->guest_table_lock);
if (!rc) {
table = gmap_table_walk(sg, saddr, 3);
- if (!table || (*table & _REGION_ENTRY_ORIGIN) !=
- (unsigned long) s_r3t)
+ if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r3t)
rc = -EAGAIN; /* Race with unshadow */
else
*table &= ~_REGION_ENTRY_INVALID;
@@ -1916,7 +1920,8 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
int fake)
{
unsigned long raddr, origin, offset, len;
- unsigned long *s_sgt, *table;
+ unsigned long *table;
+ phys_addr_t s_sgt;
struct page *page;
int rc;
@@ -1928,7 +1933,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
page->index = sgt & _REGION_ENTRY_ORIGIN;
if (fake)
page->index |= GMAP_SHADOW_FAKE_TABLE;
- s_sgt = (unsigned long *) page_to_phys(page);
+ s_sgt = page_to_phys(page);
/* Install shadow region second table */
spin_lock(&sg->guest_table_lock);
table = gmap_table_walk(sg, saddr, 2); /* get region-3 pointer */
@@ -1943,9 +1948,9 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
rc = -EAGAIN; /* Race with shadow */
goto out_free;
}
- crst_table_init(s_sgt, _SEGMENT_ENTRY_EMPTY);
+ crst_table_init(__va(s_sgt), _SEGMENT_ENTRY_EMPTY);
/* mark as invalid as long as the parent table is not protected */
- *table = (unsigned long) s_sgt | _REGION_ENTRY_LENGTH |
+ *table = s_sgt | _REGION_ENTRY_LENGTH |
_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID;
if (sg->edat_level >= 1)
*table |= sgt & _REGION_ENTRY_PROTECT;
@@ -1966,8 +1971,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
spin_lock(&sg->guest_table_lock);
if (!rc) {
table = gmap_table_walk(sg, saddr, 2);
- if (!table || (*table & _REGION_ENTRY_ORIGIN) !=
- (unsigned long) s_sgt)
+ if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_sgt)
rc = -EAGAIN; /* Race with unshadow */
else
*table &= ~_REGION_ENTRY_INVALID;
@@ -2040,8 +2044,9 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
int fake)
{
unsigned long raddr, origin;
- unsigned long *s_pgt, *table;
+ unsigned long *table;
struct page *page;
+ phys_addr_t s_pgt;
int rc;
BUG_ON(!gmap_is_shadow(sg) || (pgt & _SEGMENT_ENTRY_LARGE));
@@ -2052,7 +2057,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
page->index = pgt & _SEGMENT_ENTRY_ORIGIN;
if (fake)
page->index |= GMAP_SHADOW_FAKE_TABLE;
- s_pgt = (unsigned long *) page_to_phys(page);
+ s_pgt = page_to_phys(page);
/* Install shadow page table */
spin_lock(&sg->guest_table_lock);
table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
@@ -2085,8 +2090,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
spin_lock(&sg->guest_table_lock);
if (!rc) {
table = gmap_table_walk(sg, saddr, 1);
- if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) !=
- (unsigned long) s_pgt)
+ if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) != s_pgt)
rc = -EAGAIN; /* Race with unshadow */
else
*table &= ~_SEGMENT_ENTRY_INVALID;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 1a25d456d865..30ab55f868f6 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -141,25 +141,25 @@ void mark_rodata_ro(void)
debug_checkwx();
}
-int set_memory_encrypted(unsigned long addr, int numpages)
+int set_memory_encrypted(unsigned long vaddr, int numpages)
{
int i;
/* make specified pages unshared, (swiotlb, dma_free) */
for (i = 0; i < numpages; ++i) {
- uv_remove_shared(addr);
- addr += PAGE_SIZE;
+ uv_remove_shared(virt_to_phys((void *)vaddr));
+ vaddr += PAGE_SIZE;
}
return 0;
}
-int set_memory_decrypted(unsigned long addr, int numpages)
+int set_memory_decrypted(unsigned long vaddr, int numpages)
{
int i;
/* make specified pages shared (swiotlb, dma_alloca) */
for (i = 0; i < numpages; ++i) {
- uv_set_shared(addr);
- addr += PAGE_SIZE;
+ uv_set_shared(virt_to_phys((void *)vaddr));
+ vaddr += PAGE_SIZE;
}
return 0;
}
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 73cdc5539384..ef38b1514c77 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -116,20 +116,20 @@ EXPORT_SYMBOL_GPL(pci_proc_domain);
/* Modify PCI: Register I/O address translation parameters */
int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
- u64 base, u64 limit, u64 iota)
+ u64 base, u64 limit, u64 iota, u8 *status)
{
u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_REG_IOAT);
struct zpci_fib fib = {0};
- u8 cc, status;
+ u8 cc;
WARN_ON_ONCE(iota & 0x3fff);
fib.pba = base;
fib.pal = limit;
fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
fib.gd = zdev->gisa;
- cc = zpci_mod_fc(req, &fib, &status);
+ cc = zpci_mod_fc(req, &fib, status);
if (cc)
- zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status);
+ zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, *status);
return cc;
}
EXPORT_SYMBOL_GPL(zpci_register_ioat);
@@ -764,6 +764,7 @@ EXPORT_SYMBOL_GPL(zpci_disable_device);
*/
int zpci_hot_reset_device(struct zpci_dev *zdev)
{
+ u8 status;
int rc;
zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh);
@@ -787,7 +788,7 @@ int zpci_hot_reset_device(struct zpci_dev *zdev)
if (zdev->dma_table)
rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table));
+ virt_to_phys(zdev->dma_table), &status);
else
rc = zpci_dma_init_device(zdev);
if (rc) {
@@ -995,7 +996,7 @@ void zpci_release_device(struct kref *kref)
break;
}
zpci_dbg(3, "rem fid:%x\n", zdev->fid);
- kfree(zdev);
+ kfree_rcu(zdev, rcu);
}
int zpci_report_error(struct pci_dev *pdev,
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 227cf0a62800..ea478d11fbd1 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -63,37 +63,55 @@ static void dma_free_page_table(void *table)
kmem_cache_free(dma_page_table_cache, table);
}
-static unsigned long *dma_get_seg_table_origin(unsigned long *entry)
+static unsigned long *dma_get_seg_table_origin(unsigned long *rtep)
{
+ unsigned long old_rte, rte;
unsigned long *sto;
- if (reg_entry_isvalid(*entry))
- sto = get_rt_sto(*entry);
- else {
+ rte = READ_ONCE(*rtep);
+ if (reg_entry_isvalid(rte)) {
+ sto = get_rt_sto(rte);
+ } else {
sto = dma_alloc_cpu_table();
if (!sto)
return NULL;
- set_rt_sto(entry, virt_to_phys(sto));
- validate_rt_entry(entry);
- entry_clr_protected(entry);
+ set_rt_sto(&rte, virt_to_phys(sto));
+ validate_rt_entry(&rte);
+ entry_clr_protected(&rte);
+
+ old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte);
+ if (old_rte != ZPCI_TABLE_INVALID) {
+ /* Somone else was faster, use theirs */
+ dma_free_cpu_table(sto);
+ sto = get_rt_sto(old_rte);
+ }
}
return sto;
}
-static unsigned long *dma_get_page_table_origin(unsigned long *entry)
+static unsigned long *dma_get_page_table_origin(unsigned long *step)
{
+ unsigned long old_ste, ste;
unsigned long *pto;
- if (reg_entry_isvalid(*entry))
- pto = get_st_pto(*entry);
- else {
+ ste = READ_ONCE(*step);
+ if (reg_entry_isvalid(ste)) {
+ pto = get_st_pto(ste);
+ } else {
pto = dma_alloc_page_table();
if (!pto)
return NULL;
- set_st_pto(entry, virt_to_phys(pto));
- validate_st_entry(entry);
- entry_clr_protected(entry);
+ set_st_pto(&ste, virt_to_phys(pto));
+ validate_st_entry(&ste);
+ entry_clr_protected(&ste);
+
+ old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste);
+ if (old_ste != ZPCI_TABLE_INVALID) {
+ /* Somone else was faster, use theirs */
+ dma_free_page_table(pto);
+ pto = get_st_pto(old_ste);
+ }
}
return pto;
}
@@ -117,19 +135,24 @@ unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
return &pto[px];
}
-void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags)
+void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags)
{
+ unsigned long pte;
+
+ pte = READ_ONCE(*ptep);
if (flags & ZPCI_PTE_INVALID) {
- invalidate_pt_entry(entry);
+ invalidate_pt_entry(&pte);
} else {
- set_pt_pfaa(entry, page_addr);
- validate_pt_entry(entry);
+ set_pt_pfaa(&pte, page_addr);
+ validate_pt_entry(&pte);
}
if (flags & ZPCI_TABLE_PROTECTED)
- entry_set_protected(entry);
+ entry_set_protected(&pte);
else
- entry_clr_protected(entry);
+ entry_clr_protected(&pte);
+
+ xchg(ptep, pte);
}
static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
@@ -137,18 +160,14 @@ static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
{
unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
phys_addr_t page_addr = (pa & PAGE_MASK);
- unsigned long irq_flags;
unsigned long *entry;
int i, rc = 0;
if (!nr_pages)
return -EINVAL;
- spin_lock_irqsave(&zdev->dma_table_lock, irq_flags);
- if (!zdev->dma_table) {
- rc = -EINVAL;
- goto out_unlock;
- }
+ if (!zdev->dma_table)
+ return -EINVAL;
for (i = 0; i < nr_pages; i++) {
entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
@@ -173,8 +192,6 @@ undo_cpu_trans:
dma_update_cpu_trans(entry, page_addr, flags);
}
}
-out_unlock:
- spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
return rc;
}
@@ -547,6 +564,7 @@ static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
int zpci_dma_init_device(struct zpci_dev *zdev)
{
+ u8 status;
int rc;
/*
@@ -557,7 +575,6 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
WARN_ON(zdev->s390_domain);
spin_lock_init(&zdev->iommu_bitmap_lock);
- spin_lock_init(&zdev->dma_table_lock);
zdev->dma_table = dma_alloc_cpu_table();
if (!zdev->dma_table) {
@@ -598,7 +615,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
}
if (zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table))) {
+ virt_to_phys(zdev->dma_table), &status)) {
rc = -EIO;
goto free_bitmap;
}