diff options
Diffstat (limited to 'arch/s390')
40 files changed, 3248 insertions, 457 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f6a68e178fc5..29a7940f284f 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -62,6 +62,10 @@ config GENERIC_LOCKBREAK default y depends on SMP && PREEMPT +config PGSTE + bool + default y if KVM + mainmenu "Linux Kernel Configuration" config S390 @@ -69,6 +73,7 @@ config S390 select HAVE_OPROFILE select HAVE_KPROBES select HAVE_KRETPROBES + select HAVE_KVM if 64BIT source "init/Kconfig" @@ -295,6 +300,14 @@ comment "Kernel preemption" source "kernel/Kconfig.preempt" +config ARCH_SPARSEMEM_ENABLE + def_bool y + select SPARSEMEM_VMEMMAP_ENABLE + select SPARSEMEM_VMEMMAP + +config ARCH_SPARSEMEM_DEFAULT + def_bool y + source "mm/Kconfig" comment "I/O subsystem configuration" @@ -515,6 +528,13 @@ config ZFCPDUMP Select this option if you want to build an zfcpdump enabled kernel. Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this. +config S390_GUEST +bool "s390 guest support (EXPERIMENTAL)" + depends on 64BIT && EXPERIMENTAL + select VIRTIO + select VIRTIO_RING + help + Select this option if you want to run the kernel under s390 linux endmenu source "net/Kconfig" @@ -536,3 +556,5 @@ source "security/Kconfig" source "crypto/Kconfig" source "lib/Kconfig" + +source "arch/s390/kvm/Kconfig" diff --git a/arch/s390/Makefile b/arch/s390/Makefile index f708be367b03..792a4e7743ce 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -87,7 +87,7 @@ LDFLAGS_vmlinux := -e start head-y := arch/s390/kernel/head.o arch/s390/kernel/init_task.o core-y += arch/s390/mm/ arch/s390/kernel/ arch/s390/crypto/ \ - arch/s390/appldata/ arch/s390/hypfs/ + arch/s390/appldata/ arch/s390/hypfs/ arch/s390/kvm/ libs-y += arch/s390/lib/ drivers-y += drivers/s390/ drivers-$(CONFIG_MATHEMU) += arch/s390/math-emu/ diff --git a/arch/s390/defconfig b/arch/s390/defconfig index a72f208e62d0..aa341d0ea1e6 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.25-rc4 -# Wed Mar 5 11:22:59 2008 +# Linux kernel version: 2.6.25 +# Wed Apr 30 11:07:45 2008 # CONFIG_SCHED_MC=y CONFIG_MMU=y @@ -14,10 +14,12 @@ CONFIG_RWSEM_XCHGADD_ALGORITHM=y # CONFIG_ARCH_HAS_ILOG2_U64 is not set CONFIG_GENERIC_HWEIGHT=y CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_CLOCKEVENTS=y CONFIG_GENERIC_BUG=y CONFIG_NO_IOMEM=y CONFIG_NO_DMA=y CONFIG_GENERIC_LOCKBREAK=y +CONFIG_PGSTE=y CONFIG_S390=y CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" @@ -43,6 +45,7 @@ CONFIG_LOG_BUF_SHIFT=17 CONFIG_CGROUPS=y # CONFIG_CGROUP_DEBUG is not set CONFIG_CGROUP_NS=y +# CONFIG_CGROUP_DEVICE is not set # CONFIG_CPUSETS is not set CONFIG_GROUP_SCHED=y CONFIG_FAIR_GROUP_SCHED=y @@ -65,6 +68,7 @@ CONFIG_INITRAMFS_SOURCE="" CONFIG_SYSCTL=y # CONFIG_EMBEDDED is not set CONFIG_SYSCTL_SYSCALL=y +CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set @@ -92,6 +96,7 @@ CONFIG_KPROBES=y CONFIG_KRETPROBES=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y +# CONFIG_HAVE_DMA_ATTRS is not set CONFIG_PROC_PAGE_MONITOR=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -121,8 +126,8 @@ CONFIG_DEFAULT_DEADLINE=y # CONFIG_DEFAULT_CFQ is not set # CONFIG_DEFAULT_NOOP is not set CONFIG_DEFAULT_IOSCHED="deadline" +CONFIG_PREEMPT_NOTIFIERS=y CONFIG_CLASSIC_RCU=y -# CONFIG_PREEMPT_RCU is not set # # Base setup @@ -131,6 +136,10 @@ CONFIG_CLASSIC_RCU=y # # Processor type and features # +CONFIG_TICK_ONESHOT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y CONFIG_64BIT=y CONFIG_SMP=y CONFIG_NR_CPUS=32 @@ -161,15 +170,20 @@ CONFIG_ARCH_POPULATES_NODE_MAP=y # CONFIG_PREEMPT_NONE is not set # CONFIG_PREEMPT_VOLUNTARY is not set CONFIG_PREEMPT=y -# CONFIG_RCU_TRACE is not set +# CONFIG_PREEMPT_RCU is not set +CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_DEFAULT=y CONFIG_SELECT_MEMORY_MODEL=y -CONFIG_FLATMEM_MANUAL=y +# CONFIG_FLATMEM_MANUAL is not set # CONFIG_DISCONTIGMEM_MANUAL is not set -# CONFIG_SPARSEMEM_MANUAL is not set -CONFIG_FLATMEM=y -CONFIG_FLAT_NODE_MEM_MAP=y +CONFIG_SPARSEMEM_MANUAL=y +CONFIG_SPARSEMEM=y +CONFIG_HAVE_MEMORY_PRESENT=y # CONFIG_SPARSEMEM_STATIC is not set -# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set +CONFIG_SPARSEMEM_EXTREME=y +CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y +CONFIG_SPARSEMEM_VMEMMAP=y +CONFIG_PAGEFLAGS_EXTENDED=y CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_RESOURCES_64BIT=y CONFIG_ZONE_DMA_FLAG=1 @@ -205,11 +219,10 @@ CONFIG_HZ_100=y # CONFIG_HZ_1000 is not set CONFIG_HZ=100 # CONFIG_SCHED_HRTICK is not set -CONFIG_NO_IDLE_HZ=y -CONFIG_NO_IDLE_HZ_INIT=y CONFIG_S390_HYPFS_FS=y CONFIG_KEXEC=y # CONFIG_ZFCPDUMP is not set +CONFIG_S390_GUEST=y # # Networking @@ -272,8 +285,10 @@ CONFIG_INET6_XFRM_MODE_TUNNEL=y CONFIG_INET6_XFRM_MODE_BEET=y # CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set CONFIG_IPV6_SIT=y +CONFIG_IPV6_NDISC_NODETYPE=y # CONFIG_IPV6_TUNNEL is not set # CONFIG_IPV6_MULTIPLE_TABLES is not set +# CONFIG_IPV6_MROUTE is not set # CONFIG_NETWORK_SECMARK is not set CONFIG_NETFILTER=y # CONFIG_NETFILTER_DEBUG is not set @@ -289,6 +304,7 @@ CONFIG_NF_CONNTRACK=m # CONFIG_NF_CT_ACCT is not set # CONFIG_NF_CONNTRACK_MARK is not set # CONFIG_NF_CONNTRACK_EVENTS is not set +# CONFIG_NF_CT_PROTO_DCCP is not set # CONFIG_NF_CT_PROTO_SCTP is not set # CONFIG_NF_CT_PROTO_UDPLITE is not set # CONFIG_NF_CONNTRACK_AMANDA is not set @@ -439,6 +455,7 @@ CONFIG_DASD_ECKD=y CONFIG_DASD_FBA=y CONFIG_DASD_DIAG=y CONFIG_DASD_EER=y +CONFIG_VIRTIO_BLK=m CONFIG_MISC_DEVICES=y # CONFIG_EEPROM_93CX6 is not set # CONFIG_ENCLOSURE_SERVICES is not set @@ -533,7 +550,7 @@ CONFIG_NETDEV_10000=y # S/390 network device drivers # CONFIG_LCS=m -CONFIG_CTC=m +CONFIG_CTCM=m # CONFIG_NETIUCV is not set # CONFIG_SMSGIUCV is not set # CONFIG_CLAW is not set @@ -547,10 +564,12 @@ CONFIG_CCWGROUP=y # CONFIG_NETCONSOLE is not set # CONFIG_NETPOLL is not set # CONFIG_NET_POLL_CONTROLLER is not set +CONFIG_VIRTIO_NET=m # # Character devices # +CONFIG_DEVKMEM=y CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 @@ -600,6 +619,7 @@ CONFIG_S390_VMUR=m # Sonics Silicon Backplane # # CONFIG_MEMSTICK is not set +# CONFIG_NEW_LEDS is not set # # File systems @@ -652,6 +672,7 @@ CONFIG_PROC_SYSCTL=y CONFIG_SYSFS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y +# CONFIG_HUGETLBFS is not set # CONFIG_HUGETLB_PAGE is not set CONFIG_CONFIGFS_FS=m @@ -678,12 +699,10 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3=y # CONFIG_NFS_V3_ACL is not set # CONFIG_NFS_V4 is not set -# CONFIG_NFS_DIRECTIO is not set CONFIG_NFSD=y CONFIG_NFSD_V3=y # CONFIG_NFSD_V3_ACL is not set # CONFIG_NFSD_V4 is not set -CONFIG_NFSD_TCP=y CONFIG_LOCKD=y CONFIG_LOCKD_V4=y CONFIG_EXPORTFS=y @@ -731,6 +750,7 @@ CONFIG_TRACE_IRQFLAGS_SUPPORT=y # CONFIG_PRINTK_TIME is not set CONFIG_ENABLE_WARN_DEPRECATED=y CONFIG_ENABLE_MUST_CHECK=y +CONFIG_FRAME_WARN=2048 CONFIG_MAGIC_SYSRQ=y # CONFIG_UNUSED_SYMBOLS is not set CONFIG_DEBUG_FS=y @@ -754,6 +774,7 @@ CONFIG_DEBUG_SPINLOCK_SLEEP=y CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_WRITECOUNT is not set # CONFIG_DEBUG_LIST is not set # CONFIG_DEBUG_SG is not set # CONFIG_FRAME_POINTER is not set @@ -775,58 +796,88 @@ CONFIG_SAMPLES=y # CONFIG_SECURITY is not set # CONFIG_SECURITY_FILE_CAPABILITIES is not set CONFIG_CRYPTO=y + +# +# Crypto core or helper +# CONFIG_CRYPTO_ALGAPI=y CONFIG_CRYPTO_AEAD=m CONFIG_CRYPTO_BLKCIPHER=y -CONFIG_CRYPTO_SEQIV=m CONFIG_CRYPTO_HASH=m CONFIG_CRYPTO_MANAGER=y +CONFIG_CRYPTO_GF128MUL=m +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_CRYPTD is not set +CONFIG_CRYPTO_AUTHENC=m +# CONFIG_CRYPTO_TEST is not set + +# +# Authenticated Encryption with Associated Data +# +CONFIG_CRYPTO_CCM=m +CONFIG_CRYPTO_GCM=m +CONFIG_CRYPTO_SEQIV=m + +# +# Block modes +# +CONFIG_CRYPTO_CBC=y +CONFIG_CRYPTO_CTR=m +CONFIG_CRYPTO_CTS=m +CONFIG_CRYPTO_ECB=m +# CONFIG_CRYPTO_LRW is not set +CONFIG_CRYPTO_PCBC=m +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# CONFIG_CRYPTO_HMAC=m # CONFIG_CRYPTO_XCBC is not set -# CONFIG_CRYPTO_NULL is not set + +# +# Digest +# +# CONFIG_CRYPTO_CRC32C is not set # CONFIG_CRYPTO_MD4 is not set CONFIG_CRYPTO_MD5=m +# CONFIG_CRYPTO_MICHAEL_MIC is not set CONFIG_CRYPTO_SHA1=m # CONFIG_CRYPTO_SHA256 is not set # CONFIG_CRYPTO_SHA512 is not set -# CONFIG_CRYPTO_WP512 is not set # CONFIG_CRYPTO_TGR192 is not set -CONFIG_CRYPTO_GF128MUL=m -CONFIG_CRYPTO_ECB=m -CONFIG_CRYPTO_CBC=y -CONFIG_CRYPTO_PCBC=m -# CONFIG_CRYPTO_LRW is not set -# CONFIG_CRYPTO_XTS is not set -CONFIG_CRYPTO_CTR=m -CONFIG_CRYPTO_GCM=m -CONFIG_CRYPTO_CCM=m -# CONFIG_CRYPTO_CRYPTD is not set -# CONFIG_CRYPTO_DES is not set -CONFIG_CRYPTO_FCRYPT=m -# CONFIG_CRYPTO_BLOWFISH is not set -# CONFIG_CRYPTO_TWOFISH is not set -# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# # CONFIG_CRYPTO_AES is not set +# CONFIG_CRYPTO_ANUBIS is not set +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_BLOWFISH is not set +CONFIG_CRYPTO_CAMELLIA=m # CONFIG_CRYPTO_CAST5 is not set # CONFIG_CRYPTO_CAST6 is not set -# CONFIG_CRYPTO_TEA is not set -# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_DES is not set +CONFIG_CRYPTO_FCRYPT=m # CONFIG_CRYPTO_KHAZAD is not set -# CONFIG_CRYPTO_ANUBIS is not set -CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SALSA20=m +CONFIG_CRYPTO_SEED=m +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_TWOFISH is not set + +# +# Compression +# # CONFIG_CRYPTO_DEFLATE is not set -# CONFIG_CRYPTO_MICHAEL_MIC is not set -# CONFIG_CRYPTO_CRC32C is not set -CONFIG_CRYPTO_CAMELLIA=m -# CONFIG_CRYPTO_TEST is not set -CONFIG_CRYPTO_AUTHENC=m CONFIG_CRYPTO_LZO=m CONFIG_CRYPTO_HW=y CONFIG_ZCRYPT=m # CONFIG_ZCRYPT_MONOLITHIC is not set # CONFIG_CRYPTO_SHA1_S390 is not set # CONFIG_CRYPTO_SHA256_S390 is not set +CONFIG_CRYPTO_SHA512_S390=m # CONFIG_CRYPTO_DES_S390 is not set # CONFIG_CRYPTO_AES_S390 is not set CONFIG_S390_PRNG=m @@ -835,6 +886,8 @@ CONFIG_S390_PRNG=m # Library routines # CONFIG_BITREVERSE=m +# CONFIG_GENERIC_FIND_FIRST_BIT is not set +# CONFIG_GENERIC_FIND_NEXT_BIT is not set # CONFIG_CRC_CCITT is not set # CONFIG_CRC16 is not set # CONFIG_CRC_ITU_T is not set @@ -844,3 +897,9 @@ CONFIG_LIBCRC32C=m CONFIG_LZO_COMPRESS=m CONFIG_LZO_DECOMPRESS=m CONFIG_PLIST=y +CONFIG_HAVE_KVM=y +CONFIG_VIRTUALIZATION=y +CONFIG_KVM=m +CONFIG_VIRTIO=y +CONFIG_VIRTIO_RING=y +CONFIG_VIRTIO_BALLOON=m diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 77051cd27925..6302f5082588 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -2,8 +2,6 @@ # Makefile for the linux kernel. # -EXTRA_AFLAGS := -traditional - # # Passing null pointers is ok for smp code, since we access the lowcore here. # diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 1375f8a4469e..fa28ecae636b 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -5,44 +5,38 @@ */ #include <linux/sched.h> - -/* Use marker if you need to separate the values later */ - -#define DEFINE(sym, val, marker) \ - asm volatile("\n->" #sym " %0 " #val " " #marker : : "i" (val)) - -#define BLANK() asm volatile("\n->" : : ) +#include <linux/kbuild.h> int main(void) { - DEFINE(__THREAD_info, offsetof(struct task_struct, stack),); - DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp),); - DEFINE(__THREAD_per, offsetof(struct task_struct, thread.per_info),); + DEFINE(__THREAD_info, offsetof(struct task_struct, stack)); + DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp)); + DEFINE(__THREAD_per, offsetof(struct task_struct, thread.per_info)); DEFINE(__THREAD_mm_segment, - offsetof(struct task_struct, thread.mm_segment),); + offsetof(struct task_struct, thread.mm_segment)); BLANK(); - DEFINE(__TASK_pid, offsetof(struct task_struct, pid),); + DEFINE(__TASK_pid, offsetof(struct task_struct, pid)); BLANK(); - DEFINE(__PER_atmid, offsetof(per_struct, lowcore.words.perc_atmid),); - DEFINE(__PER_address, offsetof(per_struct, lowcore.words.address),); - DEFINE(__PER_access_id, offsetof(per_struct, lowcore.words.access_id),); + DEFINE(__PER_atmid, offsetof(per_struct, lowcore.words.perc_atmid)); + DEFINE(__PER_address, offsetof(per_struct, lowcore.words.address)); + DEFINE(__PER_access_id, offsetof(per_struct, lowcore.words.access_id)); BLANK(); - DEFINE(__TI_task, offsetof(struct thread_info, task),); - DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain),); - DEFINE(__TI_flags, offsetof(struct thread_info, flags),); - DEFINE(__TI_cpu, offsetof(struct thread_info, cpu),); - DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count),); + DEFINE(__TI_task, offsetof(struct thread_info, task)); + DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain)); + DEFINE(__TI_flags, offsetof(struct thread_info, flags)); + DEFINE(__TI_cpu, offsetof(struct thread_info, cpu)); + DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count)); BLANK(); - DEFINE(__PT_ARGS, offsetof(struct pt_regs, args),); - DEFINE(__PT_PSW, offsetof(struct pt_regs, psw),); - DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs),); - DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2),); - DEFINE(__PT_ILC, offsetof(struct pt_regs, ilc),); - DEFINE(__PT_TRAP, offsetof(struct pt_regs, trap),); - DEFINE(__PT_SIZE, sizeof(struct pt_regs),); + DEFINE(__PT_ARGS, offsetof(struct pt_regs, args)); + DEFINE(__PT_PSW, offsetof(struct pt_regs, psw)); + DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs)); + DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2)); + DEFINE(__PT_ILC, offsetof(struct pt_regs, ilc)); + DEFINE(__PT_TRAP, offsetof(struct pt_regs, trap)); + DEFINE(__PT_SIZE, sizeof(struct pt_regs)); BLANK(); - DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain),); - DEFINE(__SF_GPRS, offsetof(struct stack_frame, gprs),); - DEFINE(__SF_EMPTY, offsetof(struct stack_frame, empty1),); + DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain)); + DEFINE(__SF_GPRS, offsetof(struct stack_frame, gprs)); + DEFINE(__SF_EMPTY, offsetof(struct stack_frame, empty1)); return 0; } diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 540a67f979b6..d0e09684b9ce 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -139,11 +139,15 @@ static noinline __init void detect_machine_type(void) /* Running under z/VM ? */ if (cpuinfo->cpu_id.version == 0xff) - machine_flags |= 1; + machine_flags |= MACHINE_FLAG_VM; /* Running on a P/390 ? */ if (cpuinfo->cpu_id.machine == 0x7490) - machine_flags |= 4; + machine_flags |= MACHINE_FLAG_P390; + + /* Running under KVM ? */ + if (cpuinfo->cpu_id.version == 0xfe) + machine_flags |= MACHINE_FLAG_KVM; } #ifdef CONFIG_64BIT @@ -264,6 +268,118 @@ static noinline __init void setup_lowcore_early(void) s390_base_pgm_handler_fn = early_pgm_check_handler; } +static noinline __init void setup_hpage(void) +{ +#ifndef CONFIG_DEBUG_PAGEALLOC + unsigned int facilities; + + facilities = stfl(); + if (!(facilities & (1UL << 23)) || !(facilities & (1UL << 29))) + return; + machine_flags |= MACHINE_FLAG_HPAGE; + __ctl_set_bit(0, 23); +#endif +} + +static __init void detect_mvpg(void) +{ +#ifndef CONFIG_64BIT + int rc; + + asm volatile( + " la 0,0\n" + " mvpg %2,%2\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "=d" (rc) : "0" (-EOPNOTSUPP), "a" (0) : "memory", "cc", "0"); + if (!rc) + machine_flags |= MACHINE_FLAG_MVPG; +#endif +} + +static __init void detect_ieee(void) +{ +#ifndef CONFIG_64BIT + int rc, tmp; + + asm volatile( + " efpc %1,0\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "=d" (rc), "=d" (tmp): "0" (-EOPNOTSUPP) : "cc"); + if (!rc) + machine_flags |= MACHINE_FLAG_IEEE; +#endif +} + +static __init void detect_csp(void) +{ +#ifndef CONFIG_64BIT + int rc; + + asm volatile( + " la 0,0\n" + " la 1,0\n" + " la 2,4\n" + " csp 0,2\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc", "0", "1", "2"); + if (!rc) + machine_flags |= MACHINE_FLAG_CSP; +#endif +} + +static __init void detect_diag9c(void) +{ + unsigned int cpu_address; + int rc; + + cpu_address = stap(); + asm volatile( + " diag %2,0,0x9c\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc"); + if (!rc) + machine_flags |= MACHINE_FLAG_DIAG9C; +} + +static __init void detect_diag44(void) +{ +#ifdef CONFIG_64BIT + int rc; + + asm volatile( + " diag 0,0,0x44\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc"); + if (!rc) + machine_flags |= MACHINE_FLAG_DIAG44; +#endif +} + +static __init void detect_machine_facilities(void) +{ +#ifdef CONFIG_64BIT + unsigned int facilities; + + facilities = stfl(); + if (facilities & (1 << 28)) + machine_flags |= MACHINE_FLAG_IDTE; + if (facilities & (1 << 23)) + machine_flags |= MACHINE_FLAG_PFMF; + if (facilities & (1 << 4)) + machine_flags |= MACHINE_FLAG_MVCOS; +#endif +} + /* * Save ipl parameters, clear bss memory, initialize storage keys * and create a kernel NSS at startup if the SAVESYS= parm is defined @@ -281,6 +397,13 @@ void __init startup_init(void) create_kernel_nss(); sort_main_extable(); setup_lowcore_early(); + detect_mvpg(); + detect_ieee(); + detect_csp(); + detect_diag9c(); + detect_diag44(); + detect_machine_facilities(); + setup_hpage(); sclp_read_info_early(); sclp_facilities_detect(); memsize = sclp_memory_detect(); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 6766e37fe8ea..bdbb3bcd78a5 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -49,9 +49,9 @@ SP_ILC = STACK_FRAME_OVERHEAD + __PT_ILC SP_TRAP = STACK_FRAME_OVERHEAD + __PT_TRAP SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE -_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NEED_RESCHED | \ +_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_SINGLE_STEP ) -_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NEED_RESCHED | \ +_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING) STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER @@ -316,7 +316,7 @@ sysc_work: bo BASED(sysc_mcck_pending) tm __TI_flags+3(%r9),_TIF_NEED_RESCHED bo BASED(sysc_reschedule) - tm __TI_flags+3(%r9),(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK) + tm __TI_flags+3(%r9),_TIF_SIGPENDING bnz BASED(sysc_sigpending) tm __TI_flags+3(%r9),_TIF_RESTART_SVC bo BASED(sysc_restart) @@ -342,7 +342,7 @@ sysc_mcck_pending: br %r1 # TIF bit will be cleared by handler # -# _TIF_SIGPENDING or _TIF_RESTORE_SIGMASK is set, call do_signal +# _TIF_SIGPENDING is set, call do_signal # sysc_sigpending: ni __TI_flags+3(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP @@ -657,7 +657,7 @@ io_work: lr %r15,%r1 # # One of the work bits is on. Find out which one. -# Checked are: _TIF_SIGPENDING, _TIF_RESTORE_SIGMASK, _TIF_NEED_RESCHED +# Checked are: _TIF_SIGPENDING, _TIF_NEED_RESCHED # and _TIF_MCCK_PENDING # io_work_loop: @@ -665,7 +665,7 @@ io_work_loop: bo BASED(io_mcck_pending) tm __TI_flags+3(%r9),_TIF_NEED_RESCHED bo BASED(io_reschedule) - tm __TI_flags+3(%r9),(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK) + tm __TI_flags+3(%r9),_TIF_SIGPENDING bnz BASED(io_sigpending) b BASED(io_restore) io_work_done: @@ -693,7 +693,7 @@ io_reschedule: b BASED(io_work_loop) # -# _TIF_SIGPENDING or _TIF_RESTORE_SIGMASK is set, call do_signal +# _TIF_SIGPENDING is set, call do_signal # io_sigpending: TRACE_IRQS_ON diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index cd959c0b2e16..5a4a7bcd2bba 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -52,9 +52,9 @@ SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT -_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NEED_RESCHED | \ +_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_SINGLE_STEP ) -_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NEED_RESCHED | \ +_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING) #define BASED(name) name-system_call(%r13) @@ -308,7 +308,7 @@ sysc_work: jo sysc_mcck_pending tm __TI_flags+7(%r9),_TIF_NEED_RESCHED jo sysc_reschedule - tm __TI_flags+7(%r9),(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK) + tm __TI_flags+7(%r9),_TIF_SIGPENDING jnz sysc_sigpending tm __TI_flags+7(%r9),_TIF_RESTART_SVC jo sysc_restart @@ -332,7 +332,7 @@ sysc_mcck_pending: jg s390_handle_mcck # TIF bit will be cleared by handler # -# _TIF_SIGPENDING or _TIF_RESTORE_SIGMASK is set, call do_signal +# _TIF_SIGPENDING is set, call do_signal # sysc_sigpending: ni __TI_flags+7(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP @@ -648,7 +648,7 @@ io_work_loop: jo io_mcck_pending tm __TI_flags+7(%r9),_TIF_NEED_RESCHED jo io_reschedule - tm __TI_flags+7(%r9),(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK) + tm __TI_flags+7(%r9),_TIF_SIGPENDING jnz io_sigpending j io_restore io_work_done: @@ -674,7 +674,7 @@ io_reschedule: j io_work_loop # -# _TIF_SIGPENDING or _TIF_RESTORE_SIGMASK is set, call do_signal +# _TIF_SIGPENDING or is set, call do_signal # io_sigpending: TRACE_IRQS_ON diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S index dc364c1419af..a816e2de32b9 100644 --- a/arch/s390/kernel/head31.S +++ b/arch/s390/kernel/head31.S @@ -57,61 +57,6 @@ startup_continue: # l %r14,.Lstartup_init-.LPG1(%r13) basr %r14,%r14 - - l %r12,.Lmflags-.LPG1(%r13) # get address of machine_flags -# -# find out if we have an IEEE fpu -# - mvc __LC_PGM_NEW_PSW(8),.Lpcfpu-.LPG1(%r13) - efpc %r0,0 # test IEEE extract fpc instruction - oi 3(%r12),2 # set IEEE fpu flag -.Lchkfpu: - -# -# find out if we have the CSP instruction -# - mvc __LC_PGM_NEW_PSW(8),.Lpccsp-.LPG1(%r13) - la %r0,0 - lr %r1,%r0 - la %r2,4 - csp %r0,%r2 # Test CSP instruction - oi 3(%r12),8 # set CSP flag -.Lchkcsp: - -# -# find out if we have the MVPG instruction -# - mvc __LC_PGM_NEW_PSW(8),.Lpcmvpg-.LPG1(%r13) - sr %r0,%r0 - la %r1,0 - la %r2,0 - mvpg %r1,%r2 # Test CSP instruction - oi 3(%r12),16 # set MVPG flag -.Lchkmvpg: - -# -# find out if we have the IDTE instruction -# - mvc __LC_PGM_NEW_PSW(8),.Lpcidte-.LPG1(%r13) - .long 0xb2b10000 # store facility list - tm 0xc8,0x08 # check bit for clearing-by-ASCE - bno .Lchkidte-.LPG1(%r13) - lhi %r1,2094 - lhi %r2,0 - .long 0xb98e2001 - oi 3(%r12),0x80 # set IDTE flag -.Lchkidte: - -# -# find out if the diag 0x9c is available -# - mvc __LC_PGM_NEW_PSW(8),.Lpcdiag9c-.LPG1(%r13) - stap __LC_CPUID+4 # store cpu address - lh %r1,__LC_CPUID+4 - diag %r1,0,0x9c # test diag 0x9c - oi 2(%r12),1 # set diag9c flag -.Lchkdiag9c: - lpsw .Lentry-.LPG1(13) # jump to _stext in primary-space, # virtual and never return ... .align 8 @@ -132,13 +77,7 @@ startup_continue: .long 0 # cr13: home space segment table .long 0xc0000000 # cr14: machine check handling off .long 0 # cr15: linkage stack operations -.Lpcfpu:.long 0x00080000,0x80000000 + .Lchkfpu -.Lpccsp:.long 0x00080000,0x80000000 + .Lchkcsp -.Lpcmvpg:.long 0x00080000,0x80000000 + .Lchkmvpg -.Lpcidte:.long 0x00080000,0x80000000 + .Lchkidte -.Lpcdiag9c:.long 0x00080000,0x80000000 + .Lchkdiag9c .Lmchunk:.long memory_chunk -.Lmflags:.long machine_flags .Lbss_bgn: .long __bss_start .Lbss_end: .long _end .Lparmaddr: .long PARMAREA diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 79dccd206a6e..1d06961e87b3 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -125,73 +125,11 @@ startup_continue: # and create a kernel NSS if the SAVESYS= parm is defined # brasl %r14,startup_init - # set program check new psw mask - mvc __LC_PGM_NEW_PSW(8),.Lpcmsk-.LPG1(%r13) - larl %r12,machine_flags -# -# find out if we have the MVPG instruction -# - la %r1,0f-.LPG1(%r13) # set program check address - stg %r1,__LC_PGM_NEW_PSW+8 - sgr %r0,%r0 - lghi %r1,0 - lghi %r2,0 - mvpg %r1,%r2 # test MVPG instruction - oi 7(%r12),16 # set MVPG flag -0: - -# -# find out if the diag 0x44 works in 64 bit mode -# - la %r1,0f-.LPG1(%r13) # set program check address - stg %r1,__LC_PGM_NEW_PSW+8 - diag 0,0,0x44 # test diag 0x44 - oi 7(%r12),32 # set diag44 flag -0: - -# -# find out if we have the IDTE instruction -# - la %r1,0f-.LPG1(%r13) # set program check address - stg %r1,__LC_PGM_NEW_PSW+8 - .long 0xb2b10000 # store facility list - tm 0xc8,0x08 # check bit for clearing-by-ASCE - bno 0f-.LPG1(%r13) - lhi %r1,2048 - lhi %r2,0 - .long 0xb98e2001 - oi 7(%r12),0x80 # set IDTE flag -0: - -# -# find out if the diag 0x9c is available -# - la %r1,0f-.LPG1(%r13) # set program check address - stg %r1,__LC_PGM_NEW_PSW+8 - stap __LC_CPUID+4 # store cpu address - lh %r1,__LC_CPUID+4 - diag %r1,0,0x9c # test diag 0x9c - oi 6(%r12),1 # set diag9c flag -0: - -# -# find out if we have the MVCOS instruction -# - la %r1,0f-.LPG1(%r13) # set program check address - stg %r1,__LC_PGM_NEW_PSW+8 - .short 0xc800 # mvcos 0(%r0),0(%r0),%r0 - .short 0x0000 - .short 0x0000 -0: tm 0x8f,0x13 # special-operation exception? - bno 1f-.LPG1(%r13) # if yes, MVCOS is present - oi 6(%r12),2 # set MVCOS flag -1: - lpswe .Lentry-.LPG1(13) # jump to _stext in primary-space, # virtual and never return ... .align 16 .Lentry:.quad 0x0000000180000000,_stext -.Lctl: .quad 0x04b50002 # cr0: various things +.Lctl: .quad 0x04350002 # cr0: various things .quad 0 # cr1: primary space segment table .quad .Lduct # cr2: dispatchable unit control table .quad 0 # cr3: instruction authorization diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index c36d8123ca14..c59a86dca584 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -60,8 +60,6 @@ init_IRQ(void) /* * Switch to the asynchronous interrupt stack for softirq execution. */ -extern void __do_softirq(void); - asmlinkage void do_softirq(void) { unsigned long flags, old, new; diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index c1aff194141d..7920861109d2 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -180,24 +180,6 @@ void cpu_idle(void) } } -void show_regs(struct pt_regs *regs) -{ - print_modules(); - printk("CPU: %d %s %s %.*s\n", - task_thread_info(current)->cpu, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - printk("Process %s (pid: %d, task: %p, ksp: %p)\n", - current->comm, current->pid, current, - (void *) current->thread.ksp); - show_registers(regs); - /* Show stack backtrace if pt_regs is from kernel mode */ - if (!(regs->psw.mask & PSW_MASK_PSTATE)) - show_trace(NULL, (unsigned long *) regs->gprs[15]); - show_last_breaking_event(regs); -} - extern void kernel_thread_starter(void); asm( diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 58a064296987..7f4270163744 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -607,38 +607,8 @@ do_ptrace_emu31(struct task_struct *child, long request, long addr, long data) } #endif -#define PT32_IEEE_IP 0x13c - -static int -do_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, long addr, long data) { - int ret; - - if (request == PTRACE_ATTACH) - return ptrace_attach(child); - - /* - * Special cases to get/store the ieee instructions pointer. - */ - if (child == current) { - if (request == PTRACE_PEEKUSR && addr == PT_IEEE_IP) - return peek_user(child, addr, data); - if (request == PTRACE_POKEUSR && addr == PT_IEEE_IP) - return poke_user(child, addr, data); -#ifdef CONFIG_COMPAT - if (request == PTRACE_PEEKUSR && - addr == PT32_IEEE_IP && test_thread_flag(TIF_31BIT)) - return peek_user_emu31(child, addr, data); - if (request == PTRACE_POKEUSR && - addr == PT32_IEEE_IP && test_thread_flag(TIF_31BIT)) - return poke_user_emu31(child, addr, data); -#endif - } - - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - return ret; - switch (request) { case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ @@ -693,31 +663,6 @@ do_ptrace(struct task_struct *child, long request, long addr, long data) return -EIO; } -asmlinkage long -sys_ptrace(long request, long pid, long addr, long data) -{ - struct task_struct *child; - int ret; - - lock_kernel(); - if (request == PTRACE_TRACEME) { - ret = ptrace_traceme(); - goto out; - } - - child = ptrace_get_task_struct(pid); - if (IS_ERR(child)) { - ret = PTR_ERR(child); - goto out; - } - - ret = do_ptrace(child, request, addr, data); - put_task_struct(child); -out: - unlock_kernel(); - return ret; -} - asmlinkage void syscall_trace(struct pt_regs *regs, int entryexit) { diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 7141147e6b63..2bc70b6e876a 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -73,7 +73,7 @@ EXPORT_SYMBOL(uaccess); unsigned int console_mode = 0; unsigned int console_devno = -1; unsigned int console_irq = -1; -unsigned long machine_flags = 0; +unsigned long machine_flags; unsigned long elf_hwcap = 0; char elf_platform[ELF_PLATFORM_SIZE]; @@ -316,7 +316,11 @@ static int __init early_parse_ipldelay(char *p) early_param("ipldelay", early_parse_ipldelay); #ifdef CONFIG_S390_SWITCH_AMODE +#ifdef CONFIG_PGSTE +unsigned int switch_amode = 1; +#else unsigned int switch_amode = 0; +#endif EXPORT_SYMBOL_GPL(switch_amode); static void set_amode_and_uaccess(unsigned long user_amode, @@ -679,15 +683,6 @@ setup_memory(void) #endif } -static __init unsigned int stfl(void) -{ - asm volatile( - " .insn s,0xb2b10000,0(0)\n" /* stfl */ - "0:\n" - EX_TABLE(0b,0b)); - return S390_lowcore.stfl_fac_list; -} - static int __init __stfle(unsigned long long *list, int doublewords) { typedef struct { unsigned long long _[doublewords]; } addrtype; @@ -754,6 +749,9 @@ static void __init setup_hwcaps(void) elf_hwcap |= 1UL << 6; } + if (MACHINE_HAS_HPAGE) + elf_hwcap |= 1UL << 7; + switch (cpuinfo->cpu_id.machine) { case 0x9672: #if !defined(CONFIG_64BIT) @@ -797,9 +795,13 @@ setup_arch(char **cmdline_p) "This machine has an IEEE fpu\n" : "This machine has no IEEE fpu\n"); #else /* CONFIG_64BIT */ - printk((MACHINE_IS_VM) ? - "We are running under VM (64 bit mode)\n" : - "We are running native (64 bit mode)\n"); + if (MACHINE_IS_VM) + printk("We are running under VM (64 bit mode)\n"); + else if (MACHINE_IS_KVM) { + printk("We are running under KVM (64 bit mode)\n"); + add_preferred_console("ttyS", 1, NULL); + } else + printk("We are running native (64 bit mode)\n"); #endif /* CONFIG_64BIT */ /* Save unparsed command line copy for /proc/cmdline */ @@ -873,8 +875,9 @@ void __cpuinit print_cpu_info(struct cpuinfo_S390 *cpuinfo) static int show_cpuinfo(struct seq_file *m, void *v) { - static const char *hwcap_str[7] = { - "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp" + static const char *hwcap_str[8] = { + "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", + "edat" }; struct cpuinfo_S390 *cpuinfo; unsigned long n = (unsigned long) v - 1; @@ -889,7 +892,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) num_online_cpus(), loops_per_jiffy/(500000/HZ), (loops_per_jiffy/(5000/HZ))%100); seq_puts(m, "features\t: "); - for (i = 0; i < 7; i++) + for (i = 0; i < 8; i++) if (hwcap_str[i] && (elf_hwcap & (1UL << i))) seq_printf(m, "%s ", hwcap_str[i]); seq_puts(m, "\n"); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 0dfa988c1b26..0aeb290060d9 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -505,7 +505,7 @@ out: return rc; } -static int smp_rescan_cpus(void) +static int __smp_rescan_cpus(void) { cpumask_t avail; @@ -570,7 +570,7 @@ out: kfree(info); printk(KERN_INFO "CPUs: %d configured, %d standby\n", c_cpus, s_cpus); get_online_cpus(); - smp_rescan_cpus(); + __smp_rescan_cpus(); put_online_cpus(); } @@ -890,8 +890,8 @@ static ssize_t cpu_configure_store(struct sys_device *dev, const char *buf, if (val != 0 && val != 1) return -EINVAL; - mutex_lock(&smp_cpu_state_mutex); get_online_cpus(); + mutex_lock(&smp_cpu_state_mutex); rc = -EBUSY; if (cpu_online(cpu)) goto out; @@ -919,8 +919,8 @@ static ssize_t cpu_configure_store(struct sys_device *dev, const char *buf, break; } out: - put_online_cpus(); mutex_unlock(&smp_cpu_state_mutex); + put_online_cpus(); return rc ? rc : count; } static SYSDEV_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); @@ -1088,17 +1088,17 @@ out: } #ifdef CONFIG_HOTPLUG_CPU -static ssize_t __ref rescan_store(struct sys_device *dev, - const char *buf, size_t count) + +int smp_rescan_cpus(void) { cpumask_t newcpus; int cpu; int rc; - mutex_lock(&smp_cpu_state_mutex); get_online_cpus(); + mutex_lock(&smp_cpu_state_mutex); newcpus = cpu_present_map; - rc = smp_rescan_cpus(); + rc = __smp_rescan_cpus(); if (rc) goto out; cpus_andnot(newcpus, cpu_present_map, newcpus); @@ -1109,10 +1109,19 @@ static ssize_t __ref rescan_store(struct sys_device *dev, } rc = 0; out: - put_online_cpus(); mutex_unlock(&smp_cpu_state_mutex); + put_online_cpus(); if (!cpus_empty(newcpus)) topology_schedule_update(); + return rc; +} + +static ssize_t __ref rescan_store(struct sys_device *dev, const char *buf, + size_t count) +{ + int rc; + + rc = smp_rescan_cpus(); return rc ? rc : count; } static SYSDEV_ATTR(rescan, 0200, NULL, rescan_store); @@ -1139,16 +1148,16 @@ static ssize_t dispatching_store(struct sys_device *dev, const char *buf, if (val != 0 && val != 1) return -EINVAL; rc = 0; - mutex_lock(&smp_cpu_state_mutex); get_online_cpus(); + mutex_lock(&smp_cpu_state_mutex); if (cpu_management == val) goto out; rc = topology_set_cpu_management(val); if (!rc) cpu_management = val; out: - put_online_cpus(); mutex_unlock(&smp_cpu_state_mutex); + put_online_cpus(); return rc ? rc : count; } static SYSDEV_ATTR(dispatching, 0644, dispatching_show, dispatching_store); diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c index 988d0d64c2c8..5fdb799062b7 100644 --- a/arch/s390/kernel/sys_s390.c +++ b/arch/s390/kernel/sys_s390.c @@ -32,23 +32,6 @@ #include <asm/uaccess.h> #include "entry.h" -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way Unix traditionally does this, though. - */ -asmlinkage long sys_pipe(unsigned long __user *fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - /* common code for old and new mmaps */ static inline long do_mmap2( unsigned long addr, unsigned long len, diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 12b39b3d9c38..661a07217057 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -9,6 +9,7 @@ #include <linux/device.h> #include <linux/bootmem.h> #include <linux/sched.h> +#include <linux/kthread.h> #include <linux/workqueue.h> #include <linux/cpu.h> #include <linux/smp.h> @@ -66,6 +67,8 @@ static struct timer_list topology_timer; static void set_topology_timer(void); static DECLARE_WORK(topology_work, topology_work_fn); +cpumask_t cpu_core_map[NR_CPUS]; + cpumask_t cpu_coregroup_map(unsigned int cpu) { struct core_info *core = &core_info; @@ -199,6 +202,14 @@ int topology_set_cpu_management(int fc) return rc; } +static void update_cpu_core_map(void) +{ + int cpu; + + for_each_present_cpu(cpu) + cpu_core_map[cpu] = cpu_coregroup_map(cpu); +} + void arch_update_cpu_topology(void) { struct tl_info *info = tl_info; @@ -206,20 +217,33 @@ void arch_update_cpu_topology(void) int cpu; if (!machine_has_topology) { + update_cpu_core_map(); topology_update_polarization_simple(); return; } stsi(info, 15, 1, 2); tl_to_cores(info); + update_cpu_core_map(); for_each_online_cpu(cpu) { sysdev = get_cpu_sysdev(cpu); kobject_uevent(&sysdev->kobj, KOBJ_CHANGE); } } -static void topology_work_fn(struct work_struct *work) +static int topology_kthread(void *data) { arch_reinit_sched_domains(); + return 0; +} + +static void topology_work_fn(struct work_struct *work) +{ + /* We can't call arch_reinit_sched_domains() from a multi-threaded + * workqueue context since it may deadlock in case of cpu hotplug. + * So we have to create a kernel thread in order to call + * arch_reinit_sched_domains(). + */ + kthread_run(topology_kthread, NULL, "topology_update"); } void topology_schedule_update(void) @@ -251,20 +275,23 @@ static int __init init_topology_update(void) { int rc; + rc = 0; if (!machine_has_topology) { topology_update_polarization_simple(); - return 0; + goto out; } init_timer_deferrable(&topology_timer); if (machine_has_topology_irq) { rc = register_external_interrupt(0x2005, topology_interrupt); if (rc) - return rc; + goto out; ctl_set_bit(0, 8); } else set_topology_timer(); - return 0; +out: + update_cpu_core_map(); + return rc; } __initcall(init_topology_update); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 57b607b61100..4584d81984c0 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -113,7 +113,7 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high) } } -void show_trace(struct task_struct *task, unsigned long *stack) +static void show_trace(struct task_struct *task, unsigned long *stack) { register unsigned long __r15 asm ("15"); unsigned long sp; @@ -161,14 +161,14 @@ void show_stack(struct task_struct *task, unsigned long *sp) show_trace(task, sp); } -#ifdef CONFIG_64BIT -void show_last_breaking_event(struct pt_regs *regs) +static void show_last_breaking_event(struct pt_regs *regs) { +#ifdef CONFIG_64BIT printk("Last Breaking-Event-Address:\n"); printk(" [<%016lx>] ", regs->args[0] & PSW_ADDR_INSN); print_symbol("%s\n", regs->args[0] & PSW_ADDR_INSN); -} #endif +} /* * The architecture-independent dump_stack generator @@ -223,6 +223,24 @@ void show_registers(struct pt_regs *regs) show_code(regs); } +void show_regs(struct pt_regs *regs) +{ + print_modules(); + printk("CPU: %d %s %s %.*s\n", + task_thread_info(current)->cpu, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); + printk("Process %s (pid: %d, task: %p, ksp: %p)\n", + current->comm, current->pid, current, + (void *) current->thread.ksp); + show_registers(regs); + /* Show stack backtrace if pt_regs is from kernel mode */ + if (!(regs->psw.mask & PSW_MASK_PSTATE)) + show_trace(NULL, (unsigned long *) regs->gprs[15]); + show_last_breaking_event(regs); +} + /* This is called from fs/proc/array.c */ void task_show_regs(struct seq_file *m, struct task_struct *task) { diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index c5f05b3fb2c3..ca90ee3f930e 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -110,6 +110,7 @@ void account_system_vtime(struct task_struct *tsk) S390_lowcore.steal_clock -= cputime << 12; account_system_time(tsk, 0, cputime); } +EXPORT_SYMBOL_GPL(account_system_vtime); static inline void set_vtimer(__u64 expires) { diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig new file mode 100644 index 000000000000..1761b74d639b --- /dev/null +++ b/arch/s390/kvm/Kconfig @@ -0,0 +1,46 @@ +# +# KVM configuration +# +config HAVE_KVM + bool + +menuconfig VIRTUALIZATION + bool "Virtualization" + default y + ---help--- + Say Y here to get to see options for using your Linux host to run other + operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and disabled. + +if VIRTUALIZATION + +config KVM + tristate "Kernel-based Virtual Machine (KVM) support" + depends on HAVE_KVM && EXPERIMENTAL + select PREEMPT_NOTIFIERS + select ANON_INODES + select S390_SWITCH_AMODE + select PREEMPT + ---help--- + Support hosting paravirtualized guest machines using the SIE + virtualization capability on the mainframe. This should work + on any 64bit machine. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + To compile this as a module, choose M here: the module + will be called kvm. + + If unsure, say N. + +config KVM_TRACE + bool + +# OK, it's a little counter-intuitive to do this, but it puts it neatly under +# the virtualization menu. +source drivers/virtio/Kconfig + +endif # VIRTUALIZATION diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile new file mode 100644 index 000000000000..e5221ec0b8e3 --- /dev/null +++ b/arch/s390/kvm/Makefile @@ -0,0 +1,14 @@ +# Makefile for kernel virtual machines on s390 +# +# Copyright IBM Corp. 2008 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License (version 2 only) +# as published by the Free Software Foundation. + +common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o) + +EXTRA_CFLAGS += -Ivirt/kvm -Iarch/s390/kvm + +kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o priv.o sigp.o diag.o +obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c new file mode 100644 index 000000000000..f639a152869f --- /dev/null +++ b/arch/s390/kvm/diag.c @@ -0,0 +1,67 @@ +/* + * diag.c - handling diagnose instructions + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + */ + +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include "kvm-s390.h" + +static int __diag_time_slice_end(struct kvm_vcpu *vcpu) +{ + VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); + vcpu->stat.diagnose_44++; + vcpu_put(vcpu); + schedule(); + vcpu_load(vcpu); + return 0; +} + +static int __diag_ipl_functions(struct kvm_vcpu *vcpu) +{ + unsigned int reg = vcpu->arch.sie_block->ipa & 0xf; + unsigned long subcode = vcpu->arch.guest_gprs[reg] & 0xffff; + + VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode); + switch (subcode) { + case 3: + vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR; + break; + case 4: + vcpu->run->s390_reset_flags = 0; + break; + default: + return -ENOTSUPP; + } + + atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM; + vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL; + vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT; + vcpu->run->exit_reason = KVM_EXIT_S390_RESET; + VCPU_EVENT(vcpu, 3, "requesting userspace resets %lx", + vcpu->run->s390_reset_flags); + return -EREMOTE; +} + +int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) +{ + int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; + + switch (code) { + case 0x44: + return __diag_time_slice_end(vcpu); + case 0x308: + return __diag_ipl_functions(vcpu); + default: + return -ENOTSUPP; + } +} diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h new file mode 100644 index 000000000000..4e0633c413f3 --- /dev/null +++ b/arch/s390/kvm/gaccess.h @@ -0,0 +1,274 @@ +/* + * gaccess.h - access guest memory + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + */ + +#ifndef __KVM_S390_GACCESS_H +#define __KVM_S390_GACCESS_H + +#include <linux/compiler.h> +#include <linux/kvm_host.h> +#include <asm/uaccess.h> + +static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu, + u64 guestaddr) +{ + u64 prefix = vcpu->arch.sie_block->prefix; + u64 origin = vcpu->kvm->arch.guest_origin; + u64 memsize = vcpu->kvm->arch.guest_memsize; + + if (guestaddr < 2 * PAGE_SIZE) + guestaddr += prefix; + else if ((guestaddr >= prefix) && (guestaddr < prefix + 2 * PAGE_SIZE)) + guestaddr -= prefix; + + if (guestaddr > memsize) + return (void __user __force *) ERR_PTR(-EFAULT); + + guestaddr += origin; + + return (void __user *) guestaddr; +} + +static inline int get_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr, + u64 *result) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + BUG_ON(guestaddr & 7); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return get_user(*result, (u64 __user *) uptr); +} + +static inline int get_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr, + u32 *result) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + BUG_ON(guestaddr & 3); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return get_user(*result, (u32 __user *) uptr); +} + +static inline int get_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr, + u16 *result) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + BUG_ON(guestaddr & 1); + + if (IS_ERR(uptr)) + return PTR_ERR(uptr); + + return get_user(*result, (u16 __user *) uptr); +} + +static inline int get_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr, + u8 *result) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return get_user(*result, (u8 __user *) uptr); +} + +static inline int put_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr, + u64 value) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + BUG_ON(guestaddr & 7); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return put_user(value, (u64 __user *) uptr); +} + +static inline int put_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr, + u32 value) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + BUG_ON(guestaddr & 3); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return put_user(value, (u32 __user *) uptr); +} + +static inline int put_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr, + u16 value) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + BUG_ON(guestaddr & 1); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return put_user(value, (u16 __user *) uptr); +} + +static inline int put_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr, + u8 value) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return put_user(value, (u8 __user *) uptr); +} + + +static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu, u64 guestdest, + const void *from, unsigned long n) +{ + int rc; + unsigned long i; + const u8 *data = from; + + for (i = 0; i < n; i++) { + rc = put_guest_u8(vcpu, guestdest++, *(data++)); + if (rc < 0) + return rc; + } + return 0; +} + +static inline int copy_to_guest(struct kvm_vcpu *vcpu, u64 guestdest, + const void *from, unsigned long n) +{ + u64 prefix = vcpu->arch.sie_block->prefix; + u64 origin = vcpu->kvm->arch.guest_origin; + u64 memsize = vcpu->kvm->arch.guest_memsize; + + if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE)) + goto slowpath; + + if ((guestdest < prefix) && (guestdest + n > prefix)) + goto slowpath; + + if ((guestdest < prefix + 2 * PAGE_SIZE) + && (guestdest + n > prefix + 2 * PAGE_SIZE)) + goto slowpath; + + if (guestdest < 2 * PAGE_SIZE) + guestdest += prefix; + else if ((guestdest >= prefix) && (guestdest < prefix + 2 * PAGE_SIZE)) + guestdest -= prefix; + + if (guestdest + n > memsize) + return -EFAULT; + + if (guestdest + n < guestdest) + return -EFAULT; + + guestdest += origin; + + return copy_to_user((void __user *) guestdest, from, n); +slowpath: + return __copy_to_guest_slow(vcpu, guestdest, from, n); +} + +static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to, + u64 guestsrc, unsigned long n) +{ + int rc; + unsigned long i; + u8 *data = to; + + for (i = 0; i < n; i++) { + rc = get_guest_u8(vcpu, guestsrc++, data++); + if (rc < 0) + return rc; + } + return 0; +} + +static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to, + u64 guestsrc, unsigned long n) +{ + u64 prefix = vcpu->arch.sie_block->prefix; + u64 origin = vcpu->kvm->arch.guest_origin; + u64 memsize = vcpu->kvm->arch.guest_memsize; + + if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE)) + goto slowpath; + + if ((guestsrc < prefix) && (guestsrc + n > prefix)) + goto slowpath; + + if ((guestsrc < prefix + 2 * PAGE_SIZE) + && (guestsrc + n > prefix + 2 * PAGE_SIZE)) + goto slowpath; + + if (guestsrc < 2 * PAGE_SIZE) + guestsrc += prefix; + else if ((guestsrc >= prefix) && (guestsrc < prefix + 2 * PAGE_SIZE)) + guestsrc -= prefix; + + if (guestsrc + n > memsize) + return -EFAULT; + + if (guestsrc + n < guestsrc) + return -EFAULT; + + guestsrc += origin; + + return copy_from_user(to, (void __user *) guestsrc, n); +slowpath: + return __copy_from_guest_slow(vcpu, to, guestsrc, n); +} + +static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu, u64 guestdest, + const void *from, unsigned long n) +{ + u64 origin = vcpu->kvm->arch.guest_origin; + u64 memsize = vcpu->kvm->arch.guest_memsize; + + if (guestdest + n > memsize) + return -EFAULT; + + if (guestdest + n < guestdest) + return -EFAULT; + + guestdest += origin; + + return copy_to_user((void __user *) guestdest, from, n); +} + +static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to, + u64 guestsrc, unsigned long n) +{ + u64 origin = vcpu->kvm->arch.guest_origin; + u64 memsize = vcpu->kvm->arch.guest_memsize; + + if (guestsrc + n > memsize) + return -EFAULT; + + if (guestsrc + n < guestsrc) + return -EFAULT; + + guestsrc += origin; + + return copy_from_user(to, (void __user *) guestsrc, n); +} +#endif diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c new file mode 100644 index 000000000000..349581a26103 --- /dev/null +++ b/arch/s390/kvm/intercept.c @@ -0,0 +1,216 @@ +/* + * intercept.c - in-kernel handling for sie intercepts + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + */ + +#include <linux/kvm_host.h> +#include <linux/errno.h> +#include <linux/pagemap.h> + +#include <asm/kvm_host.h> + +#include "kvm-s390.h" +#include "gaccess.h" + +static int handle_lctg(struct kvm_vcpu *vcpu) +{ + int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) + + ((vcpu->arch.sie_block->ipb & 0xff00) << 4); + u64 useraddr; + int reg, rc; + + vcpu->stat.instruction_lctg++; + if ((vcpu->arch.sie_block->ipb & 0xff) != 0x2f) + return -ENOTSUPP; + + useraddr = disp2; + if (base2) + useraddr += vcpu->arch.guest_gprs[base2]; + + reg = reg1; + + VCPU_EVENT(vcpu, 5, "lctg r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2, + disp2); + + do { + rc = get_guest_u64(vcpu, useraddr, + &vcpu->arch.sie_block->gcr[reg]); + if (rc == -EFAULT) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + break; + } + useraddr += 8; + if (reg == reg3) + break; + reg = (reg + 1) % 16; + } while (1); + return 0; +} + +static int handle_lctl(struct kvm_vcpu *vcpu) +{ + int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u64 useraddr; + u32 val = 0; + int reg, rc; + + vcpu->stat.instruction_lctl++; + + useraddr = disp2; + if (base2) + useraddr += vcpu->arch.guest_gprs[base2]; + + VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2, + disp2); + + reg = reg1; + do { + rc = get_guest_u32(vcpu, useraddr, &val); + if (rc == -EFAULT) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + break; + } + vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; + vcpu->arch.sie_block->gcr[reg] |= val; + useraddr += 4; + if (reg == reg3) + break; + reg = (reg + 1) % 16; + } while (1); + return 0; +} + +static intercept_handler_t instruction_handlers[256] = { + [0x83] = kvm_s390_handle_diag, + [0xae] = kvm_s390_handle_sigp, + [0xb2] = kvm_s390_handle_priv, + [0xb7] = handle_lctl, + [0xeb] = handle_lctg, +}; + +static int handle_noop(struct kvm_vcpu *vcpu) +{ + switch (vcpu->arch.sie_block->icptcode) { + case 0x10: + vcpu->stat.exit_external_request++; + break; + case 0x14: + vcpu->stat.exit_external_interrupt++; + break; + default: + break; /* nothing */ + } + return 0; +} + +static int handle_stop(struct kvm_vcpu *vcpu) +{ + int rc; + + vcpu->stat.exit_stop_request++; + atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + spin_lock_bh(&vcpu->arch.local_int.lock); + if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) { + vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP; + rc = __kvm_s390_vcpu_store_status(vcpu, + KVM_S390_STORE_STATUS_NOADDR); + if (rc >= 0) + rc = -ENOTSUPP; + } + + if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) { + vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP; + VCPU_EVENT(vcpu, 3, "%s", "cpu stopped"); + rc = -ENOTSUPP; + } else + rc = 0; + spin_unlock_bh(&vcpu->arch.local_int.lock); + return rc; +} + +static int handle_validity(struct kvm_vcpu *vcpu) +{ + int viwhy = vcpu->arch.sie_block->ipb >> 16; + vcpu->stat.exit_validity++; + if (viwhy == 0x37) { + fault_in_pages_writeable((char __user *) + vcpu->kvm->arch.guest_origin + + vcpu->arch.sie_block->prefix, + PAGE_SIZE); + return 0; + } + VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d", + viwhy); + return -ENOTSUPP; +} + +static int handle_instruction(struct kvm_vcpu *vcpu) +{ + intercept_handler_t handler; + + vcpu->stat.exit_instruction++; + handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8]; + if (handler) + return handler(vcpu); + return -ENOTSUPP; +} + +static int handle_prog(struct kvm_vcpu *vcpu) +{ + vcpu->stat.exit_program_interruption++; + return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc); +} + +static int handle_instruction_and_prog(struct kvm_vcpu *vcpu) +{ + int rc, rc2; + + vcpu->stat.exit_instr_and_program++; + rc = handle_instruction(vcpu); + rc2 = handle_prog(vcpu); + + if (rc == -ENOTSUPP) + vcpu->arch.sie_block->icptcode = 0x04; + if (rc) + return rc; + return rc2; +} + +static const intercept_handler_t intercept_funcs[0x48 >> 2] = { + [0x00 >> 2] = handle_noop, + [0x04 >> 2] = handle_instruction, + [0x08 >> 2] = handle_prog, + [0x0C >> 2] = handle_instruction_and_prog, + [0x10 >> 2] = handle_noop, + [0x14 >> 2] = handle_noop, + [0x1C >> 2] = kvm_s390_handle_wait, + [0x20 >> 2] = handle_validity, + [0x28 >> 2] = handle_stop, +}; + +int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) +{ + intercept_handler_t func; + u8 code = vcpu->arch.sie_block->icptcode; + + if (code & 3 || code > 0x48) + return -ENOTSUPP; + func = intercept_funcs[code >> 2]; + if (func) + return func(vcpu); + return -ENOTSUPP; +} diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c new file mode 100644 index 000000000000..fcd1ed8015c1 --- /dev/null +++ b/arch/s390/kvm/interrupt.c @@ -0,0 +1,592 @@ +/* + * interrupt.c - handling kvm guest interrupts + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + */ + +#include <asm/lowcore.h> +#include <asm/uaccess.h> +#include <linux/kvm_host.h> +#include "kvm-s390.h" +#include "gaccess.h" + +static int psw_extint_disabled(struct kvm_vcpu *vcpu) +{ + return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT); +} + +static int psw_interrupts_disabled(struct kvm_vcpu *vcpu) +{ + if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) || + (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO) || + (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT)) + return 0; + return 1; +} + +static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, + struct interrupt_info *inti) +{ + switch (inti->type) { + case KVM_S390_INT_EMERGENCY: + if (psw_extint_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[0] & 0x4000ul) + return 1; + return 0; + case KVM_S390_INT_SERVICE: + if (psw_extint_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[0] & 0x200ul) + return 1; + return 0; + case KVM_S390_INT_VIRTIO: + if (psw_extint_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[0] & 0x200ul) + return 1; + return 0; + case KVM_S390_PROGRAM_INT: + case KVM_S390_SIGP_STOP: + case KVM_S390_SIGP_SET_PREFIX: + case KVM_S390_RESTART: + return 1; + default: + BUG(); + } + return 0; +} + +static void __set_cpu_idle(struct kvm_vcpu *vcpu) +{ + BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1); + atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); + set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask); +} + +static void __unset_cpu_idle(struct kvm_vcpu *vcpu) +{ + BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1); + atomic_clear_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); + clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask); +} + +static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) +{ + atomic_clear_mask(CPUSTAT_ECALL_PEND | + CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT, + &vcpu->arch.sie_block->cpuflags); + vcpu->arch.sie_block->lctl = 0x0000; +} + +static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) +{ + atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags); +} + +static void __set_intercept_indicator(struct kvm_vcpu *vcpu, + struct interrupt_info *inti) +{ + switch (inti->type) { + case KVM_S390_INT_EMERGENCY: + case KVM_S390_INT_SERVICE: + case KVM_S390_INT_VIRTIO: + if (psw_extint_disabled(vcpu)) + __set_cpuflag(vcpu, CPUSTAT_EXT_INT); + else + vcpu->arch.sie_block->lctl |= LCTL_CR0; + break; + case KVM_S390_SIGP_STOP: + __set_cpuflag(vcpu, CPUSTAT_STOP_INT); + break; + default: + BUG(); + } +} + +static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, + struct interrupt_info *inti) +{ + const unsigned short table[] = { 2, 4, 4, 6 }; + int rc, exception = 0; + + switch (inti->type) { + case KVM_S390_INT_EMERGENCY: + VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg"); + vcpu->stat.deliver_emergency_signal++; + rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201); + if (rc == -EFAULT) + exception = 1; + + rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + break; + + case KVM_S390_INT_SERVICE: + VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", + inti->ext.ext_params); + vcpu->stat.deliver_service_signal++; + rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401); + if (rc == -EFAULT) + exception = 1; + + rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); + if (rc == -EFAULT) + exception = 1; + break; + + case KVM_S390_INT_VIRTIO: + VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%lx", + inti->ext.ext_params, inti->ext.ext_params2); + vcpu->stat.deliver_virtio_interrupt++; + rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, 0x0d00); + if (rc == -EFAULT) + exception = 1; + + rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u64(vcpu, __LC_PFAULT_INTPARM, + inti->ext.ext_params2); + if (rc == -EFAULT) + exception = 1; + break; + + case KVM_S390_SIGP_STOP: + VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop"); + vcpu->stat.deliver_stop_signal++; + __set_intercept_indicator(vcpu, inti); + break; + + case KVM_S390_SIGP_SET_PREFIX: + VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", + inti->prefix.address); + vcpu->stat.deliver_prefix_signal++; + vcpu->arch.sie_block->prefix = inti->prefix.address; + vcpu->arch.sie_block->ihcpu = 0xffff; + break; + + case KVM_S390_RESTART: + VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart"); + vcpu->stat.deliver_restart_signal++; + rc = copy_to_guest(vcpu, offsetof(struct _lowcore, + restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + offsetof(struct _lowcore, restart_psw), sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + break; + + case KVM_S390_PROGRAM_INT: + VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x", + inti->pgm.code, + table[vcpu->arch.sie_block->ipa >> 14]); + vcpu->stat.deliver_program_int++; + rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u16(vcpu, __LC_PGM_ILC, + table[vcpu->arch.sie_block->ipa >> 14]); + if (rc == -EFAULT) + exception = 1; + + rc = copy_to_guest(vcpu, __LC_PGM_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_PGM_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + break; + + default: + BUG(); + } + + if (exception) { + VCPU_EVENT(vcpu, 1, "%s", "program exception while delivering" + " interrupt"); + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + if (inti->type == KVM_S390_PROGRAM_INT) { + printk(KERN_WARNING "kvm: recursive program check\n"); + BUG(); + } + } +} + +static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu) +{ + int rc, exception = 0; + + if (psw_extint_disabled(vcpu)) + return 0; + if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul)) + return 0; + rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1004); + if (rc == -EFAULT) + exception = 1; + rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + if (exception) { + VCPU_EVENT(vcpu, 1, "%s", "program exception while delivering" \ + " ckc interrupt"); + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + return 0; + } + + return 1; +} + +int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) +{ + struct local_interrupt *li = &vcpu->arch.local_int; + struct float_interrupt *fi = vcpu->arch.local_int.float_int; + struct interrupt_info *inti; + int rc = 0; + + if (atomic_read(&li->active)) { + spin_lock_bh(&li->lock); + list_for_each_entry(inti, &li->list, list) + if (__interrupt_is_deliverable(vcpu, inti)) { + rc = 1; + break; + } + spin_unlock_bh(&li->lock); + } + + if ((!rc) && atomic_read(&fi->active)) { + spin_lock_bh(&fi->lock); + list_for_each_entry(inti, &fi->list, list) + if (__interrupt_is_deliverable(vcpu, inti)) { + rc = 1; + break; + } + spin_unlock_bh(&fi->lock); + } + + if ((!rc) && (vcpu->arch.sie_block->ckc < + get_clock() + vcpu->arch.sie_block->epoch)) { + if ((!psw_extint_disabled(vcpu)) && + (vcpu->arch.sie_block->gcr[0] & 0x800ul)) + rc = 1; + } + + return rc; +} + +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) +{ + return 0; +} + +int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) +{ + u64 now, sltime; + DECLARE_WAITQUEUE(wait, current); + + vcpu->stat.exit_wait_state++; + if (kvm_cpu_has_interrupt(vcpu)) + return 0; + + if (psw_interrupts_disabled(vcpu)) { + VCPU_EVENT(vcpu, 3, "%s", "disabled wait"); + __unset_cpu_idle(vcpu); + return -ENOTSUPP; /* disabled wait */ + } + + if (psw_extint_disabled(vcpu) || + (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))) { + VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer"); + goto no_timer; + } + + now = get_clock() + vcpu->arch.sie_block->epoch; + if (vcpu->arch.sie_block->ckc < now) { + __unset_cpu_idle(vcpu); + return 0; + } + + sltime = (vcpu->arch.sie_block->ckc - now) / (0xf4240000ul / HZ) + 1; + + vcpu->arch.ckc_timer.expires = jiffies + sltime; + + add_timer(&vcpu->arch.ckc_timer); + VCPU_EVENT(vcpu, 5, "enabled wait timer:%lx jiffies", sltime); +no_timer: + spin_lock_bh(&vcpu->arch.local_int.float_int->lock); + spin_lock_bh(&vcpu->arch.local_int.lock); + __set_cpu_idle(vcpu); + vcpu->arch.local_int.timer_due = 0; + add_wait_queue(&vcpu->arch.local_int.wq, &wait); + while (list_empty(&vcpu->arch.local_int.list) && + list_empty(&vcpu->arch.local_int.float_int->list) && + (!vcpu->arch.local_int.timer_due) && + !signal_pending(current)) { + set_current_state(TASK_INTERRUPTIBLE); + spin_unlock_bh(&vcpu->arch.local_int.lock); + spin_unlock_bh(&vcpu->arch.local_int.float_int->lock); + vcpu_put(vcpu); + schedule(); + vcpu_load(vcpu); + spin_lock_bh(&vcpu->arch.local_int.float_int->lock); + spin_lock_bh(&vcpu->arch.local_int.lock); + } + __unset_cpu_idle(vcpu); + __set_current_state(TASK_RUNNING); + remove_wait_queue(&vcpu->wq, &wait); + spin_unlock_bh(&vcpu->arch.local_int.lock); + spin_unlock_bh(&vcpu->arch.local_int.float_int->lock); + del_timer(&vcpu->arch.ckc_timer); + return 0; +} + +void kvm_s390_idle_wakeup(unsigned long data) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; + + spin_lock_bh(&vcpu->arch.local_int.lock); + vcpu->arch.local_int.timer_due = 1; + if (waitqueue_active(&vcpu->arch.local_int.wq)) + wake_up_interruptible(&vcpu->arch.local_int.wq); + spin_unlock_bh(&vcpu->arch.local_int.lock); +} + + +void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) +{ + struct local_interrupt *li = &vcpu->arch.local_int; + struct float_interrupt *fi = vcpu->arch.local_int.float_int; + struct interrupt_info *n, *inti = NULL; + int deliver; + + __reset_intercept_indicators(vcpu); + if (atomic_read(&li->active)) { + do { + deliver = 0; + spin_lock_bh(&li->lock); + list_for_each_entry_safe(inti, n, &li->list, list) { + if (__interrupt_is_deliverable(vcpu, inti)) { + list_del(&inti->list); + deliver = 1; + break; + } + __set_intercept_indicator(vcpu, inti); + } + if (list_empty(&li->list)) + atomic_set(&li->active, 0); + spin_unlock_bh(&li->lock); + if (deliver) { + __do_deliver_interrupt(vcpu, inti); + kfree(inti); + } + } while (deliver); + } + + if ((vcpu->arch.sie_block->ckc < + get_clock() + vcpu->arch.sie_block->epoch)) + __try_deliver_ckc_interrupt(vcpu); + + if (atomic_read(&fi->active)) { + do { + deliver = 0; + spin_lock_bh(&fi->lock); + list_for_each_entry_safe(inti, n, &fi->list, list) { + if (__interrupt_is_deliverable(vcpu, inti)) { + list_del(&inti->list); + deliver = 1; + break; + } + __set_intercept_indicator(vcpu, inti); + } + if (list_empty(&fi->list)) + atomic_set(&fi->active, 0); + spin_unlock_bh(&fi->lock); + if (deliver) { + __do_deliver_interrupt(vcpu, inti); + kfree(inti); + } + } while (deliver); + } +} + +int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) +{ + struct local_interrupt *li = &vcpu->arch.local_int; + struct interrupt_info *inti; + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + inti->type = KVM_S390_PROGRAM_INT;; + inti->pgm.code = code; + + VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code); + spin_lock_bh(&li->lock); + list_add(&inti->list, &li->list); + atomic_set(&li->active, 1); + BUG_ON(waitqueue_active(&li->wq)); + spin_unlock_bh(&li->lock); + return 0; +} + +int kvm_s390_inject_vm(struct kvm *kvm, + struct kvm_s390_interrupt *s390int) +{ + struct local_interrupt *li; + struct float_interrupt *fi; + struct interrupt_info *inti; + int sigcpu; + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + switch (s390int->type) { + case KVM_S390_INT_VIRTIO: + VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%lx", + s390int->parm, s390int->parm64); + inti->type = s390int->type; + inti->ext.ext_params = s390int->parm; + inti->ext.ext_params2 = s390int->parm64; + break; + case KVM_S390_INT_SERVICE: + VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm); + inti->type = s390int->type; + inti->ext.ext_params = s390int->parm; + break; + case KVM_S390_PROGRAM_INT: + case KVM_S390_SIGP_STOP: + case KVM_S390_INT_EMERGENCY: + default: + kfree(inti); + return -EINVAL; + } + + mutex_lock(&kvm->lock); + fi = &kvm->arch.float_int; + spin_lock_bh(&fi->lock); + list_add_tail(&inti->list, &fi->list); + atomic_set(&fi->active, 1); + sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); + if (sigcpu == KVM_MAX_VCPUS) { + do { + sigcpu = fi->next_rr_cpu++; + if (sigcpu == KVM_MAX_VCPUS) + sigcpu = fi->next_rr_cpu = 0; + } while (fi->local_int[sigcpu] == NULL); + } + li = fi->local_int[sigcpu]; + spin_lock_bh(&li->lock); + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + if (waitqueue_active(&li->wq)) + wake_up_interruptible(&li->wq); + spin_unlock_bh(&li->lock); + spin_unlock_bh(&fi->lock); + mutex_unlock(&kvm->lock); + return 0; +} + +int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt *s390int) +{ + struct local_interrupt *li; + struct interrupt_info *inti; + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + switch (s390int->type) { + case KVM_S390_PROGRAM_INT: + if (s390int->parm & 0xffff0000) { + kfree(inti); + return -EINVAL; + } + inti->type = s390int->type; + inti->pgm.code = s390int->parm; + VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)", + s390int->parm); + break; + case KVM_S390_SIGP_STOP: + case KVM_S390_RESTART: + case KVM_S390_SIGP_SET_PREFIX: + case KVM_S390_INT_EMERGENCY: + VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type); + inti->type = s390int->type; + break; + case KVM_S390_INT_VIRTIO: + case KVM_S390_INT_SERVICE: + default: + kfree(inti); + return -EINVAL; + } + + mutex_lock(&vcpu->kvm->lock); + li = &vcpu->arch.local_int; + spin_lock_bh(&li->lock); + if (inti->type == KVM_S390_PROGRAM_INT) + list_add(&inti->list, &li->list); + else + list_add_tail(&inti->list, &li->list); + atomic_set(&li->active, 1); + if (inti->type == KVM_S390_SIGP_STOP) + li->action_bits |= ACTION_STOP_ON_STOP; + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + if (waitqueue_active(&li->wq)) + wake_up_interruptible(&vcpu->arch.local_int.wq); + spin_unlock_bh(&li->lock); + mutex_unlock(&vcpu->kvm->lock); + return 0; +} diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c new file mode 100644 index 000000000000..98d1e73e01f1 --- /dev/null +++ b/arch/s390/kvm/kvm-s390.c @@ -0,0 +1,685 @@ +/* + * s390host.c -- hosting zSeries kernel virtual machines + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + * Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#include <linux/compiler.h> +#include <linux/err.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/timer.h> +#include <asm/lowcore.h> +#include <asm/pgtable.h> + +#include "kvm-s390.h" +#include "gaccess.h" + +#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU + +struct kvm_stats_debugfs_item debugfs_entries[] = { + { "userspace_handled", VCPU_STAT(exit_userspace) }, + { "exit_validity", VCPU_STAT(exit_validity) }, + { "exit_stop_request", VCPU_STAT(exit_stop_request) }, + { "exit_external_request", VCPU_STAT(exit_external_request) }, + { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, + { "exit_instruction", VCPU_STAT(exit_instruction) }, + { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, + { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, + { "instruction_lctg", VCPU_STAT(instruction_lctg) }, + { "instruction_lctl", VCPU_STAT(instruction_lctl) }, + { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, + { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, + { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) }, + { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, + { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, + { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, + { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, + { "exit_wait_state", VCPU_STAT(exit_wait_state) }, + { "instruction_stidp", VCPU_STAT(instruction_stidp) }, + { "instruction_spx", VCPU_STAT(instruction_spx) }, + { "instruction_stpx", VCPU_STAT(instruction_stpx) }, + { "instruction_stap", VCPU_STAT(instruction_stap) }, + { "instruction_storage_key", VCPU_STAT(instruction_storage_key) }, + { "instruction_stsch", VCPU_STAT(instruction_stsch) }, + { "instruction_chsc", VCPU_STAT(instruction_chsc) }, + { "instruction_stsi", VCPU_STAT(instruction_stsi) }, + { "instruction_stfl", VCPU_STAT(instruction_stfl) }, + { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, + { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, + { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, + { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, + { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, + { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, + { "diagnose_44", VCPU_STAT(diagnose_44) }, + { NULL } +}; + + +/* Section: not file related */ +void kvm_arch_hardware_enable(void *garbage) +{ + /* every s390 is virtualization enabled ;-) */ +} + +void kvm_arch_hardware_disable(void *garbage) +{ +} + +void decache_vcpus_on_cpu(int cpu) +{ +} + +int kvm_arch_hardware_setup(void) +{ + return 0; +} + +void kvm_arch_hardware_unsetup(void) +{ +} + +void kvm_arch_check_processor_compat(void *rtn) +{ +} + +int kvm_arch_init(void *opaque) +{ + return 0; +} + +void kvm_arch_exit(void) +{ +} + +/* Section: device related */ +long kvm_arch_dev_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + if (ioctl == KVM_S390_ENABLE_SIE) + return s390_enable_sie(); + return -EINVAL; +} + +int kvm_dev_ioctl_check_extension(long ext) +{ + return 0; +} + +/* Section: vm related */ +/* + * Get (and clear) the dirty memory log for a memory slot. + */ +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, + struct kvm_dirty_log *log) +{ + return 0; +} + +long kvm_arch_vm_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm *kvm = filp->private_data; + void __user *argp = (void __user *)arg; + int r; + + switch (ioctl) { + case KVM_S390_INTERRUPT: { + struct kvm_s390_interrupt s390int; + + r = -EFAULT; + if (copy_from_user(&s390int, argp, sizeof(s390int))) + break; + r = kvm_s390_inject_vm(kvm, &s390int); + break; + } + default: + r = -EINVAL; + } + + return r; +} + +struct kvm *kvm_arch_create_vm(void) +{ + struct kvm *kvm; + int rc; + char debug_name[16]; + + rc = s390_enable_sie(); + if (rc) + goto out_nokvm; + + rc = -ENOMEM; + kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); + if (!kvm) + goto out_nokvm; + + kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); + if (!kvm->arch.sca) + goto out_nosca; + + sprintf(debug_name, "kvm-%u", current->pid); + + kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long)); + if (!kvm->arch.dbf) + goto out_nodbf; + + spin_lock_init(&kvm->arch.float_int.lock); + INIT_LIST_HEAD(&kvm->arch.float_int.list); + + debug_register_view(kvm->arch.dbf, &debug_sprintf_view); + VM_EVENT(kvm, 3, "%s", "vm created"); + + try_module_get(THIS_MODULE); + + return kvm; +out_nodbf: + free_page((unsigned long)(kvm->arch.sca)); +out_nosca: + kfree(kvm); +out_nokvm: + return ERR_PTR(rc); +} + +void kvm_arch_destroy_vm(struct kvm *kvm) +{ + debug_unregister(kvm->arch.dbf); + free_page((unsigned long)(kvm->arch.sca)); + kfree(kvm); + module_put(THIS_MODULE); +} + +/* Section: vcpu related */ +int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) +{ + return 0; +} + +void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + /* kvm common code refers to this, but does'nt call it */ + BUG(); +} + +void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + save_fp_regs(&vcpu->arch.host_fpregs); + save_access_regs(vcpu->arch.host_acrs); + vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK; + restore_fp_regs(&vcpu->arch.guest_fpregs); + restore_access_regs(vcpu->arch.guest_acrs); + + if (signal_pending(current)) + atomic_set_mask(CPUSTAT_STOP_INT, + &vcpu->arch.sie_block->cpuflags); +} + +void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +{ + save_fp_regs(&vcpu->arch.guest_fpregs); + save_access_regs(vcpu->arch.guest_acrs); + restore_fp_regs(&vcpu->arch.host_fpregs); + restore_access_regs(vcpu->arch.host_acrs); +} + +static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) +{ + /* this equals initial cpu reset in pop, but we don't switch to ESA */ + vcpu->arch.sie_block->gpsw.mask = 0UL; + vcpu->arch.sie_block->gpsw.addr = 0UL; + vcpu->arch.sie_block->prefix = 0UL; + vcpu->arch.sie_block->ihcpu = 0xffff; + vcpu->arch.sie_block->cputm = 0UL; + vcpu->arch.sie_block->ckc = 0UL; + vcpu->arch.sie_block->todpr = 0; + memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); + vcpu->arch.sie_block->gcr[0] = 0xE0UL; + vcpu->arch.sie_block->gcr[14] = 0xC2000000UL; + vcpu->arch.guest_fpregs.fpc = 0; + asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc)); + vcpu->arch.sie_block->gbea = 1; +} + +int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH); + vcpu->arch.sie_block->gmslm = 0xffffffffffUL; + vcpu->arch.sie_block->gmsor = 0x000000000000; + vcpu->arch.sie_block->ecb = 2; + vcpu->arch.sie_block->eca = 0xC1002001U; + setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup, + (unsigned long) vcpu); + get_cpu_id(&vcpu->arch.cpu_id); + vcpu->arch.cpu_id.version = 0xfe; + return 0; +} + +struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, + unsigned int id) +{ + struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); + int rc = -ENOMEM; + + if (!vcpu) + goto out_nomem; + + vcpu->arch.sie_block = (struct sie_block *) get_zeroed_page(GFP_KERNEL); + + if (!vcpu->arch.sie_block) + goto out_free_cpu; + + vcpu->arch.sie_block->icpua = id; + BUG_ON(!kvm->arch.sca); + BUG_ON(kvm->arch.sca->cpu[id].sda); + kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; + vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32); + vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; + + spin_lock_init(&vcpu->arch.local_int.lock); + INIT_LIST_HEAD(&vcpu->arch.local_int.list); + vcpu->arch.local_int.float_int = &kvm->arch.float_int; + spin_lock_bh(&kvm->arch.float_int.lock); + kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int; + init_waitqueue_head(&vcpu->arch.local_int.wq); + vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; + spin_unlock_bh(&kvm->arch.float_int.lock); + + rc = kvm_vcpu_init(vcpu, kvm, id); + if (rc) + goto out_free_cpu; + VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, + vcpu->arch.sie_block); + + try_module_get(THIS_MODULE); + + return vcpu; +out_free_cpu: + kfree(vcpu); +out_nomem: + return ERR_PTR(rc); +} + +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + VCPU_EVENT(vcpu, 3, "%s", "destroy cpu"); + free_page((unsigned long)(vcpu->arch.sie_block)); + kfree(vcpu); + module_put(THIS_MODULE); +} + +int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) +{ + /* kvm common code refers to this, but never calls it */ + BUG(); + return 0; +} + +static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) +{ + vcpu_load(vcpu); + kvm_s390_vcpu_initial_reset(vcpu); + vcpu_put(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + vcpu_load(vcpu); + memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs)); + vcpu_put(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + vcpu_load(vcpu); + memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs)); + vcpu_put(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + vcpu_load(vcpu); + memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs)); + memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); + vcpu_put(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + vcpu_load(vcpu); + memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs)); + memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); + vcpu_put(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + vcpu_load(vcpu); + memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); + vcpu->arch.guest_fpregs.fpc = fpu->fpc; + vcpu_put(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + vcpu_load(vcpu); + memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs)); + fpu->fpc = vcpu->arch.guest_fpregs.fpc; + vcpu_put(vcpu); + return 0; +} + +static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) +{ + int rc = 0; + + vcpu_load(vcpu); + if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING) + rc = -EBUSY; + else + vcpu->arch.sie_block->gpsw = psw; + vcpu_put(vcpu); + return rc; +} + +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + return -EINVAL; /* not implemented yet */ +} + +int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, + struct kvm_debug_guest *dbg) +{ + return -EINVAL; /* not implemented yet */ +} + +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; /* not implemented yet */ +} + +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; /* not implemented yet */ +} + +static void __vcpu_run(struct kvm_vcpu *vcpu) +{ + memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16); + + if (need_resched()) + schedule(); + + vcpu->arch.sie_block->icptcode = 0; + local_irq_disable(); + kvm_guest_enter(); + local_irq_enable(); + VCPU_EVENT(vcpu, 6, "entering sie flags %x", + atomic_read(&vcpu->arch.sie_block->cpuflags)); + sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs); + VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", + vcpu->arch.sie_block->icptcode); + local_irq_disable(); + kvm_guest_exit(); + local_irq_enable(); + + memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16); +} + +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + int rc; + sigset_t sigsaved; + + vcpu_load(vcpu); + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + + atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + + BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL); + + switch (kvm_run->exit_reason) { + case KVM_EXIT_S390_SIEIC: + vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask; + vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr; + break; + case KVM_EXIT_UNKNOWN: + case KVM_EXIT_S390_RESET: + break; + default: + BUG(); + } + + might_sleep(); + + do { + kvm_s390_deliver_pending_interrupts(vcpu); + __vcpu_run(vcpu); + rc = kvm_handle_sie_intercept(vcpu); + } while (!signal_pending(current) && !rc); + + if (signal_pending(current) && !rc) + rc = -EINTR; + + if (rc == -ENOTSUPP) { + /* intercept cannot be handled in-kernel, prepare kvm-run */ + kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; + kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; + kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask; + kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr; + kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; + kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; + rc = 0; + } + + if (rc == -EREMOTE) { + /* intercept was handled, but userspace support is needed + * kvm_run has been prepared by the handler */ + rc = 0; + } + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + + vcpu_put(vcpu); + + vcpu->stat.exit_userspace++; + return rc; +} + +static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from, + unsigned long n, int prefix) +{ + if (prefix) + return copy_to_guest(vcpu, guestdest, from, n); + else + return copy_to_guest_absolute(vcpu, guestdest, from, n); +} + +/* + * store status at address + * we use have two special cases: + * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit + * KVM_S390_STORE_STATUS_PREFIXED: -> prefix + */ +int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) +{ + const unsigned char archmode = 1; + int prefix; + + if (addr == KVM_S390_STORE_STATUS_NOADDR) { + if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1)) + return -EFAULT; + addr = SAVE_AREA_BASE; + prefix = 0; + } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) { + if (copy_to_guest(vcpu, 163ul, &archmode, 1)) + return -EFAULT; + addr = SAVE_AREA_BASE; + prefix = 1; + } else + prefix = 0; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs), + vcpu->arch.guest_fpregs.fprs, 128, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs), + vcpu->arch.guest_gprs, 128, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw), + &vcpu->arch.sie_block->gpsw, 16, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg), + &vcpu->arch.sie_block->prefix, 4, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, + addr + offsetof(struct save_area_s390x, fp_ctrl_reg), + &vcpu->arch.guest_fpregs.fpc, 4, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg), + &vcpu->arch.sie_block->todpr, 4, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer), + &vcpu->arch.sie_block->cputm, 8, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp), + &vcpu->arch.sie_block->ckc, 8, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs), + &vcpu->arch.guest_acrs, 64, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, + addr + offsetof(struct save_area_s390x, ctrl_regs), + &vcpu->arch.sie_block->gcr, 128, prefix)) + return -EFAULT; + return 0; +} + +static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) +{ + int rc; + + vcpu_load(vcpu); + rc = __kvm_s390_vcpu_store_status(vcpu, addr); + vcpu_put(vcpu); + return rc; +} + +long kvm_arch_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + + switch (ioctl) { + case KVM_S390_INTERRUPT: { + struct kvm_s390_interrupt s390int; + + if (copy_from_user(&s390int, argp, sizeof(s390int))) + return -EFAULT; + return kvm_s390_inject_vcpu(vcpu, &s390int); + } + case KVM_S390_STORE_STATUS: + return kvm_s390_vcpu_store_status(vcpu, arg); + case KVM_S390_SET_INITIAL_PSW: { + psw_t psw; + + if (copy_from_user(&psw, argp, sizeof(psw))) + return -EFAULT; + return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); + } + case KVM_S390_INITIAL_RESET: + return kvm_arch_vcpu_ioctl_initial_reset(vcpu); + default: + ; + } + return -EINVAL; +} + +/* Section: memory related */ +int kvm_arch_set_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old, + int user_alloc) +{ + /* A few sanity checks. We can have exactly one memory slot which has + to start at guest virtual zero and which has to be located at a + page boundary in userland and which has to end at a page boundary. + The memory in userland is ok to be fragmented into various different + vmas. It is okay to mmap() and munmap() stuff in this slot after + doing this call at any time */ + + if (mem->slot) + return -EINVAL; + + if (mem->guest_phys_addr) + return -EINVAL; + + if (mem->userspace_addr & (PAGE_SIZE - 1)) + return -EINVAL; + + if (mem->memory_size & (PAGE_SIZE - 1)) + return -EINVAL; + + kvm->arch.guest_origin = mem->userspace_addr; + kvm->arch.guest_memsize = mem->memory_size; + + /* FIXME: we do want to interrupt running CPUs and update their memory + configuration now to avoid race conditions. But hey, changing the + memory layout while virtual CPUs are running is usually bad + programming practice. */ + + return 0; +} + +gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) +{ + return gfn; +} + +static int __init kvm_s390_init(void) +{ + return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE); +} + +static void __exit kvm_s390_exit(void) +{ + kvm_exit(); +} + +module_init(kvm_s390_init); +module_exit(kvm_s390_exit); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h new file mode 100644 index 000000000000..3893cf12eacf --- /dev/null +++ b/arch/s390/kvm/kvm-s390.h @@ -0,0 +1,64 @@ +/* + * kvm_s390.h - definition for kvm on s390 + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + */ + +#ifndef ARCH_S390_KVM_S390_H +#define ARCH_S390_KVM_S390_H + +#include <linux/kvm.h> +#include <linux/kvm_host.h> + +typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); + +int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); + +#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\ +do { \ + debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \ + d_args); \ +} while (0) + +#define VCPU_EVENT(d_vcpu, d_loglevel, d_string, d_args...)\ +do { \ + debug_sprintf_event(d_vcpu->kvm->arch.dbf, d_loglevel, \ + "%02d[%016lx-%016lx]: " d_string "\n", d_vcpu->vcpu_id, \ + d_vcpu->arch.sie_block->gpsw.mask, d_vcpu->arch.sie_block->gpsw.addr,\ + d_args); \ +} while (0) + +static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu) +{ + return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOP_INT; +} + +int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); +void kvm_s390_idle_wakeup(unsigned long data); +void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); +int kvm_s390_inject_vm(struct kvm *kvm, + struct kvm_s390_interrupt *s390int); +int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt *s390int); +int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); + +/* implemented in priv.c */ +int kvm_s390_handle_priv(struct kvm_vcpu *vcpu); + +/* implemented in sigp.c */ +int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); + +/* implemented in kvm-s390.c */ +int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, + unsigned long addr); +/* implemented in diag.c */ +int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); + +#endif diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c new file mode 100644 index 000000000000..c02286c6a931 --- /dev/null +++ b/arch/s390/kvm/priv.c @@ -0,0 +1,314 @@ +/* + * priv.c - handling privileged instructions + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + */ + +#include <linux/kvm.h> +#include <linux/errno.h> +#include <asm/current.h> +#include <asm/debug.h> +#include <asm/ebcdic.h> +#include <asm/sysinfo.h> +#include "gaccess.h" +#include "kvm-s390.h" + +static int handle_set_prefix(struct kvm_vcpu *vcpu) +{ + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u64 operand2; + u32 address = 0; + u8 tmp; + + vcpu->stat.instruction_spx++; + + operand2 = disp2; + if (base2) + operand2 += vcpu->arch.guest_gprs[base2]; + + /* must be word boundary */ + if (operand2 & 3) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + + /* get the value */ + if (get_guest_u32(vcpu, operand2, &address)) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out; + } + + address = address & 0x7fffe000u; + + /* make sure that the new value is valid memory */ + if (copy_from_guest_absolute(vcpu, &tmp, address, 1) || + (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out; + } + + vcpu->arch.sie_block->prefix = address; + vcpu->arch.sie_block->ihcpu = 0xffff; + + VCPU_EVENT(vcpu, 5, "setting prefix to %x", address); +out: + return 0; +} + +static int handle_store_prefix(struct kvm_vcpu *vcpu) +{ + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u64 operand2; + u32 address; + + vcpu->stat.instruction_stpx++; + operand2 = disp2; + if (base2) + operand2 += vcpu->arch.guest_gprs[base2]; + + /* must be word boundary */ + if (operand2 & 3) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + + address = vcpu->arch.sie_block->prefix; + address = address & 0x7fffe000u; + + /* get the value */ + if (put_guest_u32(vcpu, operand2, address)) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out; + } + + VCPU_EVENT(vcpu, 5, "storing prefix to %x", address); +out: + return 0; +} + +static int handle_store_cpu_address(struct kvm_vcpu *vcpu) +{ + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u64 useraddr; + int rc; + + vcpu->stat.instruction_stap++; + useraddr = disp2; + if (base2) + useraddr += vcpu->arch.guest_gprs[base2]; + + if (useraddr & 1) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + + rc = put_guest_u16(vcpu, useraddr, vcpu->vcpu_id); + if (rc == -EFAULT) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out; + } + + VCPU_EVENT(vcpu, 5, "storing cpu address to %lx", useraddr); +out: + return 0; +} + +static int handle_skey(struct kvm_vcpu *vcpu) +{ + vcpu->stat.instruction_storage_key++; + vcpu->arch.sie_block->gpsw.addr -= 4; + VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); + return 0; +} + +static int handle_stsch(struct kvm_vcpu *vcpu) +{ + vcpu->stat.instruction_stsch++; + VCPU_EVENT(vcpu, 4, "%s", "store subchannel - CC3"); + /* condition code 3 */ + vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44; + return 0; +} + +static int handle_chsc(struct kvm_vcpu *vcpu) +{ + vcpu->stat.instruction_chsc++; + VCPU_EVENT(vcpu, 4, "%s", "channel subsystem call - CC3"); + /* condition code 3 */ + vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44; + return 0; +} + +static int handle_stfl(struct kvm_vcpu *vcpu) +{ + unsigned int facility_list = stfl(); + int rc; + + vcpu->stat.instruction_stfl++; + facility_list &= ~(1UL<<24); /* no stfle */ + + rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), + &facility_list, sizeof(facility_list)); + if (rc == -EFAULT) + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + else + VCPU_EVENT(vcpu, 5, "store facility list value %x", + facility_list); + return 0; +} + +static int handle_stidp(struct kvm_vcpu *vcpu) +{ + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u64 operand2; + int rc; + + vcpu->stat.instruction_stidp++; + operand2 = disp2; + if (base2) + operand2 += vcpu->arch.guest_gprs[base2]; + + if (operand2 & 7) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + + rc = put_guest_u64(vcpu, operand2, vcpu->arch.stidp_data); + if (rc == -EFAULT) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out; + } + + VCPU_EVENT(vcpu, 5, "%s", "store cpu id"); +out: + return 0; +} + +static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) +{ + struct float_interrupt *fi = &vcpu->kvm->arch.float_int; + int cpus = 0; + int n; + + spin_lock_bh(&fi->lock); + for (n = 0; n < KVM_MAX_VCPUS; n++) + if (fi->local_int[n]) + cpus++; + spin_unlock_bh(&fi->lock); + + /* deal with other level 3 hypervisors */ + if (stsi(mem, 3, 2, 2) == -ENOSYS) + mem->count = 0; + if (mem->count < 8) + mem->count++; + for (n = mem->count - 1; n > 0 ; n--) + memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0])); + + mem->vm[0].cpus_total = cpus; + mem->vm[0].cpus_configured = cpus; + mem->vm[0].cpus_standby = 0; + mem->vm[0].cpus_reserved = 0; + mem->vm[0].caf = 1000; + memcpy(mem->vm[0].name, "KVMguest", 8); + ASCEBC(mem->vm[0].name, 8); + memcpy(mem->vm[0].cpi, "KVM/Linux ", 16); + ASCEBC(mem->vm[0].cpi, 16); +} + +static int handle_stsi(struct kvm_vcpu *vcpu) +{ + int fc = (vcpu->arch.guest_gprs[0] & 0xf0000000) >> 28; + int sel1 = vcpu->arch.guest_gprs[0] & 0xff; + int sel2 = vcpu->arch.guest_gprs[1] & 0xffff; + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u64 operand2; + unsigned long mem; + + vcpu->stat.instruction_stsi++; + VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); + + operand2 = disp2; + if (base2) + operand2 += vcpu->arch.guest_gprs[base2]; + + if (operand2 & 0xfff && fc > 0) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + switch (fc) { + case 0: + vcpu->arch.guest_gprs[0] = 3 << 28; + vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + return 0; + case 1: /* same handling for 1 and 2 */ + case 2: + mem = get_zeroed_page(GFP_KERNEL); + if (!mem) + goto out_fail; + if (stsi((void *) mem, fc, sel1, sel2) == -ENOSYS) + goto out_mem; + break; + case 3: + if (sel1 != 2 || sel2 != 2) + goto out_fail; + mem = get_zeroed_page(GFP_KERNEL); + if (!mem) + goto out_fail; + handle_stsi_3_2_2(vcpu, (void *) mem); + break; + default: + goto out_fail; + } + + if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out_mem; + } + free_page(mem); + vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + vcpu->arch.guest_gprs[0] = 0; + return 0; +out_mem: + free_page(mem); +out_fail: + /* condition code 3 */ + vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; + return 0; +} + +static intercept_handler_t priv_handlers[256] = { + [0x02] = handle_stidp, + [0x10] = handle_set_prefix, + [0x11] = handle_store_prefix, + [0x12] = handle_store_cpu_address, + [0x29] = handle_skey, + [0x2a] = handle_skey, + [0x2b] = handle_skey, + [0x34] = handle_stsch, + [0x5f] = handle_chsc, + [0x7d] = handle_stsi, + [0xb1] = handle_stfl, +}; + +int kvm_s390_handle_priv(struct kvm_vcpu *vcpu) +{ + intercept_handler_t handler; + + handler = priv_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; + if (handler) + return handler(vcpu); + return -ENOTSUPP; +} diff --git a/arch/s390/kvm/sie64a.S b/arch/s390/kvm/sie64a.S new file mode 100644 index 000000000000..934fd6a885f6 --- /dev/null +++ b/arch/s390/kvm/sie64a.S @@ -0,0 +1,47 @@ +/* + * sie64a.S - low level sie call + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#include <linux/errno.h> +#include <asm/asm-offsets.h> + +SP_R5 = 5 * 8 # offset into stackframe +SP_R6 = 6 * 8 + +/* + * sie64a calling convention: + * %r2 pointer to sie control block + * %r3 guest register save area + */ + .globl sie64a +sie64a: + lgr %r5,%r3 + stmg %r5,%r14,SP_R5(%r15) # save register on entry + lgr %r14,%r2 # pointer to sie control block + lmg %r0,%r13,0(%r3) # load guest gprs 0-13 +sie_inst: + sie 0(%r14) + lg %r14,SP_R5(%r15) + stmg %r0,%r13,0(%r14) # save guest gprs 0-13 + lghi %r2,0 + lmg %r6,%r14,SP_R6(%r15) + br %r14 + +sie_err: + lg %r14,SP_R5(%r15) + stmg %r0,%r13,0(%r14) # save guest gprs 0-13 + lghi %r2,-EFAULT + lmg %r6,%r14,SP_R6(%r15) + br %r14 + + .section __ex_table,"a" + .quad sie_inst,sie_err + .previous diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c new file mode 100644 index 000000000000..0a236acfb5f6 --- /dev/null +++ b/arch/s390/kvm/sigp.c @@ -0,0 +1,288 @@ +/* + * sigp.c - handlinge interprocessor communication + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + */ + +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include "gaccess.h" +#include "kvm-s390.h" + +/* sigp order codes */ +#define SIGP_SENSE 0x01 +#define SIGP_EXTERNAL_CALL 0x02 +#define SIGP_EMERGENCY 0x03 +#define SIGP_START 0x04 +#define SIGP_STOP 0x05 +#define SIGP_RESTART 0x06 +#define SIGP_STOP_STORE_STATUS 0x09 +#define SIGP_INITIAL_CPU_RESET 0x0b +#define SIGP_CPU_RESET 0x0c +#define SIGP_SET_PREFIX 0x0d +#define SIGP_STORE_STATUS_ADDR 0x0e +#define SIGP_SET_ARCH 0x12 + +/* cpu status bits */ +#define SIGP_STAT_EQUIPMENT_CHECK 0x80000000UL +#define SIGP_STAT_INCORRECT_STATE 0x00000200UL +#define SIGP_STAT_INVALID_PARAMETER 0x00000100UL +#define SIGP_STAT_EXT_CALL_PENDING 0x00000080UL +#define SIGP_STAT_STOPPED 0x00000040UL +#define SIGP_STAT_OPERATOR_INTERV 0x00000020UL +#define SIGP_STAT_CHECK_STOP 0x00000010UL +#define SIGP_STAT_INOPERATIVE 0x00000004UL +#define SIGP_STAT_INVALID_ORDER 0x00000002UL +#define SIGP_STAT_RECEIVER_CHECK 0x00000001UL + + +static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg) +{ + struct float_interrupt *fi = &vcpu->kvm->arch.float_int; + int rc; + + if (cpu_addr >= KVM_MAX_VCPUS) + return 3; /* not operational */ + + spin_lock_bh(&fi->lock); + if (fi->local_int[cpu_addr] == NULL) + rc = 3; /* not operational */ + else if (atomic_read(fi->local_int[cpu_addr]->cpuflags) + & CPUSTAT_RUNNING) { + *reg &= 0xffffffff00000000UL; + rc = 1; /* status stored */ + } else { + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STAT_STOPPED; + rc = 1; /* status stored */ + } + spin_unlock_bh(&fi->lock); + + VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc); + return rc; +} + +static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) +{ + struct float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct local_interrupt *li; + struct interrupt_info *inti; + int rc; + + if (cpu_addr >= KVM_MAX_VCPUS) + return 3; /* not operational */ + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + inti->type = KVM_S390_INT_EMERGENCY; + + spin_lock_bh(&fi->lock); + li = fi->local_int[cpu_addr]; + if (li == NULL) { + rc = 3; /* not operational */ + kfree(inti); + goto unlock; + } + spin_lock_bh(&li->lock); + list_add_tail(&inti->list, &li->list); + atomic_set(&li->active, 1); + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + if (waitqueue_active(&li->wq)) + wake_up_interruptible(&li->wq); + spin_unlock_bh(&li->lock); + rc = 0; /* order accepted */ +unlock: + spin_unlock_bh(&fi->lock); + VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); + return rc; +} + +static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store) +{ + struct float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct local_interrupt *li; + struct interrupt_info *inti; + int rc; + + if (cpu_addr >= KVM_MAX_VCPUS) + return 3; /* not operational */ + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + inti->type = KVM_S390_SIGP_STOP; + + spin_lock_bh(&fi->lock); + li = fi->local_int[cpu_addr]; + if (li == NULL) { + rc = 3; /* not operational */ + kfree(inti); + goto unlock; + } + spin_lock_bh(&li->lock); + list_add_tail(&inti->list, &li->list); + atomic_set(&li->active, 1); + atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); + if (store) + li->action_bits |= ACTION_STORE_ON_STOP; + li->action_bits |= ACTION_STOP_ON_STOP; + if (waitqueue_active(&li->wq)) + wake_up_interruptible(&li->wq); + spin_unlock_bh(&li->lock); + rc = 0; /* order accepted */ +unlock: + spin_unlock_bh(&fi->lock); + VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); + return rc; +} + +static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) +{ + int rc; + + switch (parameter & 0xff) { + case 0: + printk(KERN_WARNING "kvm: request to switch to ESA/390 mode" + " not supported"); + rc = 3; /* not operational */ + break; + case 1: + case 2: + rc = 0; /* order accepted */ + break; + default: + rc = -ENOTSUPP; + } + return rc; +} + +static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, + u64 *reg) +{ + struct float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct local_interrupt *li; + struct interrupt_info *inti; + int rc; + u8 tmp; + + /* make sure that the new value is valid memory */ + address = address & 0x7fffe000u; + if ((copy_from_guest(vcpu, &tmp, + (u64) (address + vcpu->kvm->arch.guest_origin) , 1)) || + (copy_from_guest(vcpu, &tmp, (u64) (address + + vcpu->kvm->arch.guest_origin + PAGE_SIZE), 1))) { + *reg |= SIGP_STAT_INVALID_PARAMETER; + return 1; /* invalid parameter */ + } + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return 2; /* busy */ + + spin_lock_bh(&fi->lock); + li = fi->local_int[cpu_addr]; + + if ((cpu_addr >= KVM_MAX_VCPUS) || (li == NULL)) { + rc = 1; /* incorrect state */ + *reg &= SIGP_STAT_INCORRECT_STATE; + kfree(inti); + goto out_fi; + } + + spin_lock_bh(&li->lock); + /* cpu must be in stopped state */ + if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) { + rc = 1; /* incorrect state */ + *reg &= SIGP_STAT_INCORRECT_STATE; + kfree(inti); + goto out_li; + } + + inti->type = KVM_S390_SIGP_SET_PREFIX; + inti->prefix.address = address; + + list_add_tail(&inti->list, &li->list); + atomic_set(&li->active, 1); + if (waitqueue_active(&li->wq)) + wake_up_interruptible(&li->wq); + rc = 0; /* order accepted */ + + VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address); +out_li: + spin_unlock_bh(&li->lock); +out_fi: + spin_unlock_bh(&fi->lock); + return rc; +} + +int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) +{ + int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int r3 = vcpu->arch.sie_block->ipa & 0x000f; + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u32 parameter; + u16 cpu_addr = vcpu->arch.guest_gprs[r3]; + u8 order_code; + int rc; + + order_code = disp2; + if (base2) + order_code += vcpu->arch.guest_gprs[base2]; + + if (r1 % 2) + parameter = vcpu->arch.guest_gprs[r1]; + else + parameter = vcpu->arch.guest_gprs[r1 + 1]; + + switch (order_code) { + case SIGP_SENSE: + vcpu->stat.instruction_sigp_sense++; + rc = __sigp_sense(vcpu, cpu_addr, + &vcpu->arch.guest_gprs[r1]); + break; + case SIGP_EMERGENCY: + vcpu->stat.instruction_sigp_emergency++; + rc = __sigp_emergency(vcpu, cpu_addr); + break; + case SIGP_STOP: + vcpu->stat.instruction_sigp_stop++; + rc = __sigp_stop(vcpu, cpu_addr, 0); + break; + case SIGP_STOP_STORE_STATUS: + vcpu->stat.instruction_sigp_stop++; + rc = __sigp_stop(vcpu, cpu_addr, 1); + break; + case SIGP_SET_ARCH: + vcpu->stat.instruction_sigp_arch++; + rc = __sigp_set_arch(vcpu, parameter); + break; + case SIGP_SET_PREFIX: + vcpu->stat.instruction_sigp_prefix++; + rc = __sigp_set_prefix(vcpu, cpu_addr, parameter, + &vcpu->arch.guest_gprs[r1]); + break; + case SIGP_RESTART: + vcpu->stat.instruction_sigp_restart++; + /* user space must know about restart */ + default: + return -ENOTSUPP; + } + + if (rc < 0) + return rc; + + vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + vcpu->arch.sie_block->gpsw.mask |= (rc & 3ul) << 44; + return 0; +} diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 52084436ab69..ab6735df2d21 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -2,8 +2,6 @@ # Makefile for s390-specific library files.. # -EXTRA_AFLAGS := -traditional - lib-y += delay.o string.o uaccess_std.o uaccess_pt.o obj-$(CONFIG_32BIT) += div64.o qrnnd.o lib-$(CONFIG_64BIT) += uaccess_mvcos.o diff --git a/arch/s390/lib/uaccess_mvcos.c b/arch/s390/lib/uaccess_mvcos.c index 6d8772339d76..3f15aaf54855 100644 --- a/arch/s390/lib/uaccess_mvcos.c +++ b/arch/s390/lib/uaccess_mvcos.c @@ -162,6 +162,7 @@ static size_t clear_user_mvcos(size_t size, void __user *to) return size; } +#ifdef CONFIG_S390_SWITCH_AMODE static size_t strnlen_user_mvcos(size_t count, const char __user *src) { char buf[256]; @@ -199,6 +200,7 @@ static size_t strncpy_from_user_mvcos(size_t count, const char __user *src, } while ((len_str == len) && (done < count)); return done; } +#endif /* CONFIG_S390_SWITCH_AMODE */ struct uaccess_ops uaccess_mvcos = { .copy_from_user = copy_from_user_mvcos_check, diff --git a/arch/s390/math-emu/Makefile b/arch/s390/math-emu/Makefile index 73b3e72efc46..c84890341052 100644 --- a/arch/s390/math-emu/Makefile +++ b/arch/s390/math-emu/Makefile @@ -5,4 +5,3 @@ obj-$(CONFIG_MATHEMU) := math.o EXTRA_CFLAGS := -I$(src) -Iinclude/math-emu -w -EXTRA_AFLAGS := -traditional diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 66401930f83e..fb988a48a754 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -4,4 +4,4 @@ obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o obj-$(CONFIG_CMM) += cmm.o - +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index ed2af0a3303b..f231f5ec74b6 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -287,7 +287,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long if (rc < 0) goto out_free; - rc = add_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1); + rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1); if (rc) goto out_free; @@ -351,7 +351,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long release_resource(seg->res); kfree(seg->res); out_shared: - remove_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1); + vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1); out_free: kfree(seg); out: @@ -474,7 +474,7 @@ segment_modify_shared (char *name, int do_nonshared) rc = 0; goto out_unlock; out_del: - remove_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1); + vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1); list_del(&seg->list); dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy); kfree(seg); @@ -508,7 +508,7 @@ segment_unload(char *name) goto out_unlock; release_resource(seg->res); kfree(seg->res); - remove_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1); + vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1); list_del(&seg->list); dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy); kfree(seg); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 2650f46001d0..4d537205e83c 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -28,6 +28,7 @@ #include <linux/hardirq.h> #include <linux/kprobes.h> #include <linux/uaccess.h> +#include <linux/hugetlb.h> #include <asm/system.h> #include <asm/pgtable.h> #include <asm/s390_ext.h> @@ -367,6 +368,8 @@ good_area: } survive: + if (is_vm_hugetlb_page(vma)) + address &= HPAGE_MASK; /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c new file mode 100644 index 000000000000..f4b6124fdb75 --- /dev/null +++ b/arch/s390/mm/hugetlbpage.c @@ -0,0 +1,134 @@ +/* + * IBM System z Huge TLB Page Support for Kernel. + * + * Copyright 2007 IBM Corp. + * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> + */ + +#include <linux/mm.h> +#include <linux/hugetlb.h> + + +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *pteptr, pte_t pteval) +{ + pmd_t *pmdp = (pmd_t *) pteptr; + pte_t shadow_pteval = pteval; + unsigned long mask; + + if (!MACHINE_HAS_HPAGE) { + pteptr = (pte_t *) pte_page(pteval)[1].index; + mask = pte_val(pteval) & + (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO); + pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask; + if (mm->context.noexec) { + pteptr += PTRS_PER_PTE; + pte_val(shadow_pteval) = + (_SEGMENT_ENTRY + __pa(pteptr)) | mask; + } + } + + pmd_val(*pmdp) = pte_val(pteval); + if (mm->context.noexec) { + pmdp = get_shadow_table(pmdp); + pmd_val(*pmdp) = pte_val(shadow_pteval); + } +} + +int arch_prepare_hugepage(struct page *page) +{ + unsigned long addr = page_to_phys(page); + pte_t pte; + pte_t *ptep; + int i; + + if (MACHINE_HAS_HPAGE) + return 0; + + ptep = (pte_t *) pte_alloc_one(&init_mm, address); + if (!ptep) + return -ENOMEM; + + pte = mk_pte(page, PAGE_RW); + for (i = 0; i < PTRS_PER_PTE; i++) { + set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte); + pte_val(pte) += PAGE_SIZE; + } + page[1].index = (unsigned long) ptep; + return 0; +} + +void arch_release_hugepage(struct page *page) +{ + pte_t *ptep; + + if (MACHINE_HAS_HPAGE) + return; + + ptep = (pte_t *) page[1].index; + if (!ptep) + return; + pte_free(&init_mm, ptep); + page[1].index = 0; +} + +pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp = NULL; + + pgdp = pgd_offset(mm, addr); + pudp = pud_alloc(mm, pgdp, addr); + if (pudp) + pmdp = pmd_alloc(mm, pudp, addr); + return (pte_t *) pmdp; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp = NULL; + + pgdp = pgd_offset(mm, addr); + if (pgd_present(*pgdp)) { + pudp = pud_offset(pgdp, addr); + if (pud_present(*pudp)) + pmdp = pmd_offset(pudp, addr); + } + return (pte_t *) pmdp; +} + +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} + +struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, + int write) +{ + return ERR_PTR(-EINVAL); +} + +int pmd_huge(pmd_t pmd) +{ + if (!MACHINE_HAS_HPAGE) + return 0; + + return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE); +} + +struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmdp, int write) +{ + struct page *page; + + if (!MACHINE_HAS_HPAGE) + return NULL; + + page = pmd_page(*pmdp); + if (page) + page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); + return page; +} diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 202c952a29b4..fa31de6ae97a 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -77,28 +77,6 @@ void show_mem(void) printk("%lu pages pagetables\n", global_page_state(NR_PAGETABLE)); } -static void __init setup_ro_region(void) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - pte_t new_pte; - unsigned long address, end; - - address = ((unsigned long)&_stext) & PAGE_MASK; - end = PFN_ALIGN((unsigned long)&_eshared); - - for (; address < end; address += PAGE_SIZE) { - pgd = pgd_offset_k(address); - pud = pud_offset(pgd, address); - pmd = pmd_offset(pud, address); - pte = pte_offset_kernel(pmd, address); - new_pte = mk_pte_phys(address, __pgprot(_PAGE_RO)); - *pte = new_pte; - } -} - /* * paging_init() sets up the page tables */ @@ -121,7 +99,6 @@ void __init paging_init(void) clear_table((unsigned long *) init_mm.pgd, pgd_type, sizeof(unsigned long)*2048); vmem_map_init(); - setup_ro_region(); /* enable virtual mapping in kernel mode */ __ctl_load(S390_lowcore.kernel_asce, 1, 1); @@ -129,6 +106,8 @@ void __init paging_init(void) __ctl_load(S390_lowcore.kernel_asce, 13, 13); __raw_local_irq_ssm(ssm_mask); + sparse_memory_present_with_active_regions(MAX_NUMNODES); + sparse_init(); memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); #ifdef CONFIG_ZONE_DMA max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index fd072013f88c..5c1aea97cd12 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -30,11 +30,27 @@ #define TABLES_PER_PAGE 4 #define FRAG_MASK 15UL #define SECOND_HALVES 10UL + +void clear_table_pgstes(unsigned long *table) +{ + clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); + memset(table + 256, 0, PAGE_SIZE/4); + clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); + memset(table + 768, 0, PAGE_SIZE/4); +} + #else #define ALLOC_ORDER 2 #define TABLES_PER_PAGE 2 #define FRAG_MASK 3UL #define SECOND_HALVES 2UL + +void clear_table_pgstes(unsigned long *table) +{ + clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); + memset(table + 256, 0, PAGE_SIZE/2); +} + #endif unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) @@ -153,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) unsigned long *table; unsigned long bits; - bits = mm->context.noexec ? 3UL : 1UL; + bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL; spin_lock(&mm->page_table_lock); page = NULL; if (!list_empty(&mm->context.pgtable_list)) { @@ -170,7 +186,10 @@ unsigned long *page_table_alloc(struct mm_struct *mm) pgtable_page_ctor(page); page->flags &= ~FRAG_MASK; table = (unsigned long *) page_to_phys(page); - clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); + if (mm->context.pgstes) + clear_table_pgstes(table); + else + clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); spin_lock(&mm->page_table_lock); list_add(&page->lru, &mm->context.pgtable_list); } @@ -191,7 +210,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) struct page *page; unsigned long bits; - bits = mm->context.noexec ? 3UL : 1UL; + bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL; bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); page = pfn_to_page(__pa(table) >> PAGE_SHIFT); spin_lock(&mm->page_table_lock); @@ -228,3 +247,43 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) mm->context.noexec = 0; update_mm(mm, tsk); } + +/* + * switch on pgstes for its userspace process (for kvm) + */ +int s390_enable_sie(void) +{ + struct task_struct *tsk = current; + struct mm_struct *mm; + int rc; + + task_lock(tsk); + + rc = 0; + if (tsk->mm->context.pgstes) + goto unlock; + + rc = -EINVAL; + if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || + tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) + goto unlock; + + tsk->mm->context.pgstes = 1; /* dirty little tricks .. */ + mm = dup_mm(tsk); + tsk->mm->context.pgstes = 0; + + rc = -ENOMEM; + if (!mm) + goto unlock; + mmput(tsk->mm); + tsk->mm = tsk->active_mm = mm; + preempt_disable(); + update_mm(mm, tsk); + cpu_set(smp_processor_id(), mm->cpu_vm_mask); + preempt_enable(); + rc = 0; +unlock: + task_unlock(tsk); + return rc; +} +EXPORT_SYMBOL_GPL(s390_enable_sie); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 35d90a4720fd..beccacf907f3 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -10,10 +10,12 @@ #include <linux/mm.h> #include <linux/module.h> #include <linux/list.h> +#include <linux/hugetlb.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> #include <asm/setup.h> #include <asm/tlbflush.h> +#include <asm/sections.h> static DEFINE_MUTEX(vmem_mutex); @@ -25,43 +27,6 @@ struct memory_segment { static LIST_HEAD(mem_segs); -void __meminit memmap_init(unsigned long size, int nid, unsigned long zone, - unsigned long start_pfn) -{ - struct page *start, *end; - struct page *map_start, *map_end; - int i; - - start = pfn_to_page(start_pfn); - end = start + size; - - for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { - unsigned long cstart, cend; - - cstart = PFN_DOWN(memory_chunk[i].addr); - cend = cstart + PFN_DOWN(memory_chunk[i].size); - - map_start = mem_map + cstart; - map_end = mem_map + cend; - - if (map_start < start) - map_start = start; - if (map_end > end) - map_end = end; - - map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1)) - / sizeof(struct page); - map_end += ((PFN_ALIGN((unsigned long) map_end) - - (unsigned long) map_end) - / sizeof(struct page)); - - if (map_start < map_end) - memmap_init_zone((unsigned long)(map_end - map_start), - nid, zone, page_to_pfn(map_start), - MEMMAP_EARLY); - } -} - static void __ref *vmem_alloc_pages(unsigned int order) { if (slab_is_available()) @@ -77,8 +42,7 @@ static inline pud_t *vmem_pud_alloc(void) pud = vmem_alloc_pages(2); if (!pud) return NULL; - pud_val(*pud) = _REGION3_ENTRY_EMPTY; - memcpy(pud + 1, pud, (PTRS_PER_PUD - 1)*sizeof(pud_t)); + clear_table((unsigned long *) pud, _REGION3_ENTRY_EMPTY, PAGE_SIZE * 4); #endif return pud; } @@ -91,7 +55,7 @@ static inline pmd_t *vmem_pmd_alloc(void) pmd = vmem_alloc_pages(2); if (!pmd) return NULL; - clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE*4); + clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE * 4); #endif return pmd; } @@ -114,7 +78,7 @@ static pte_t __init_refok *vmem_pte_alloc(void) /* * Add a physical memory range to the 1:1 mapping. */ -static int vmem_add_range(unsigned long start, unsigned long size) +static int vmem_add_mem(unsigned long start, unsigned long size, int ro) { unsigned long address; pgd_t *pg_dir; @@ -141,7 +105,19 @@ static int vmem_add_range(unsigned long start, unsigned long size) pud_populate_kernel(&init_mm, pu_dir, pm_dir); } + pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0)); pm_dir = pmd_offset(pu_dir, address); + +#ifdef __s390x__ + if (MACHINE_HAS_HPAGE && !(address & ~HPAGE_MASK) && + (address + HPAGE_SIZE <= start + size) && + (address >= HPAGE_SIZE)) { + pte_val(pte) |= _SEGMENT_ENTRY_LARGE; + pmd_val(*pm_dir) = pte_val(pte); + address += HPAGE_SIZE - PAGE_SIZE; + continue; + } +#endif if (pmd_none(*pm_dir)) { pt_dir = vmem_pte_alloc(); if (!pt_dir) @@ -150,7 +126,6 @@ static int vmem_add_range(unsigned long start, unsigned long size) } pt_dir = pte_offset_kernel(pm_dir, address); - pte = pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL); *pt_dir = pte; } ret = 0; @@ -181,6 +156,13 @@ static void vmem_remove_range(unsigned long start, unsigned long size) pm_dir = pmd_offset(pu_dir, address); if (pmd_none(*pm_dir)) continue; + + if (pmd_huge(*pm_dir)) { + pmd_clear_kernel(pm_dir); + address += HPAGE_SIZE - PAGE_SIZE; + continue; + } + pt_dir = pte_offset_kernel(pm_dir, address); *pt_dir = pte; } @@ -190,10 +172,9 @@ static void vmem_remove_range(unsigned long start, unsigned long size) /* * Add a backed mem_map array to the virtual mem_map array. */ -static int vmem_add_mem_map(unsigned long start, unsigned long size) +int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) { unsigned long address, start_addr, end_addr; - struct page *map_start, *map_end; pgd_t *pg_dir; pud_t *pu_dir; pmd_t *pm_dir; @@ -201,11 +182,8 @@ static int vmem_add_mem_map(unsigned long start, unsigned long size) pte_t pte; int ret = -ENOMEM; - map_start = VMEM_MAP + PFN_DOWN(start); - map_end = VMEM_MAP + PFN_DOWN(start + size); - - start_addr = (unsigned long) map_start & PAGE_MASK; - end_addr = PFN_ALIGN((unsigned long) map_end); + start_addr = (unsigned long) start; + end_addr = (unsigned long) (start + nr); for (address = start_addr; address < end_addr; address += PAGE_SIZE) { pg_dir = pgd_offset_k(address); @@ -249,16 +227,6 @@ out: return ret; } -static int vmem_add_mem(unsigned long start, unsigned long size) -{ - int ret; - - ret = vmem_add_mem_map(start, size); - if (ret) - return ret; - return vmem_add_range(start, size); -} - /* * Add memory segment to the segment list if it doesn't overlap with * an already present segment. @@ -296,7 +264,7 @@ static void __remove_shared_memory(struct memory_segment *seg) vmem_remove_range(seg->start, seg->size); } -int remove_shared_memory(unsigned long start, unsigned long size) +int vmem_remove_mapping(unsigned long start, unsigned long size) { struct memory_segment *seg; int ret; @@ -320,11 +288,9 @@ out: return ret; } -int add_shared_memory(unsigned long start, unsigned long size) +int vmem_add_mapping(unsigned long start, unsigned long size) { struct memory_segment *seg; - struct page *page; - unsigned long pfn, num_pfn, end_pfn; int ret; mutex_lock(&vmem_mutex); @@ -339,24 +305,9 @@ int add_shared_memory(unsigned long start, unsigned long size) if (ret) goto out_free; - ret = vmem_add_mem(start, size); + ret = vmem_add_mem(start, size, 0); if (ret) goto out_remove; - - pfn = PFN_DOWN(start); - num_pfn = PFN_DOWN(size); - end_pfn = pfn + num_pfn; - - page = pfn_to_page(pfn); - memset(page, 0, num_pfn * sizeof(struct page)); - - for (; pfn < end_pfn; pfn++) { - page = pfn_to_page(pfn); - init_page_count(page); - reset_page_mapcount(page); - SetPageReserved(page); - INIT_LIST_HEAD(&page->lru); - } goto out; out_remove: @@ -375,14 +326,34 @@ out: */ void __init vmem_map_init(void) { + unsigned long ro_start, ro_end; + unsigned long start, end; int i; INIT_LIST_HEAD(&init_mm.context.crst_list); INIT_LIST_HEAD(&init_mm.context.pgtable_list); init_mm.context.noexec = 0; - NODE_DATA(0)->node_mem_map = VMEM_MAP; - for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) - vmem_add_mem(memory_chunk[i].addr, memory_chunk[i].size); + ro_start = ((unsigned long)&_stext) & PAGE_MASK; + ro_end = PFN_ALIGN((unsigned long)&_eshared); + for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { + start = memory_chunk[i].addr; + end = memory_chunk[i].addr + memory_chunk[i].size; + if (start >= ro_end || end <= ro_start) + vmem_add_mem(start, end - start, 0); + else if (start >= ro_start && end <= ro_end) + vmem_add_mem(start, end - start, 1); + else if (start >= ro_start) { + vmem_add_mem(start, ro_end - start, 1); + vmem_add_mem(ro_end, end - ro_end, 0); + } else if (end < ro_end) { + vmem_add_mem(start, ro_start - start, 0); + vmem_add_mem(ro_start, end - ro_start, 1); + } else { + vmem_add_mem(start, ro_start - start, 0); + vmem_add_mem(ro_start, ro_end - ro_start, 1); + vmem_add_mem(ro_end, end - ro_end, 0); + } + } } /* |