From c99ec973a63e2249020d6d93a46d7572432da6a2 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sat, 27 Oct 2012 19:26:14 +0200 Subject: PPC: ePAPR: Convert header to uapi The new uapi framework splits kernel internal and user space exported bits of header files more cleanly. Adjust the ePAPR header accordingly. Signed-off-by: Alexander Graf --- arch/powerpc/include/uapi/asm/Kbuild | 1 + arch/powerpc/include/uapi/asm/epapr_hcalls.h | 98 ++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 arch/powerpc/include/uapi/asm/epapr_hcalls.h (limited to 'arch/powerpc/include/uapi') diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild index baebb3da1d44..e6b5be86e4fa 100644 --- a/arch/powerpc/include/uapi/asm/Kbuild +++ b/arch/powerpc/include/uapi/asm/Kbuild @@ -1,3 +1,4 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +header-y += epapr_hcalls.h diff --git a/arch/powerpc/include/uapi/asm/epapr_hcalls.h b/arch/powerpc/include/uapi/asm/epapr_hcalls.h new file mode 100644 index 000000000000..046c79364f83 --- /dev/null +++ b/arch/powerpc/include/uapi/asm/epapr_hcalls.h @@ -0,0 +1,98 @@ +/* + * ePAPR hcall interface + * + * Copyright 2008-2011 Freescale Semiconductor, Inc. + * + * Author: Timur Tabi + * + * This file is provided under a dual BSD/GPL license. When using or + * redistributing this file, you may do so under either license. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _UAPI__EPAPR_HCALLS_H +#define _UAPI__EPAPR_HCALLS_H + +#define EV_BYTE_CHANNEL_SEND 1 +#define EV_BYTE_CHANNEL_RECEIVE 2 +#define EV_BYTE_CHANNEL_POLL 3 +#define EV_INT_SET_CONFIG 4 +#define EV_INT_GET_CONFIG 5 +#define EV_INT_SET_MASK 6 +#define EV_INT_GET_MASK 7 +#define EV_INT_IACK 9 +#define EV_INT_EOI 10 +#define EV_INT_SEND_IPI 11 +#define EV_INT_SET_TASK_PRIORITY 12 +#define EV_INT_GET_TASK_PRIORITY 13 +#define EV_DOORBELL_SEND 14 +#define EV_MSGSND 15 +#define EV_IDLE 16 + +/* vendor ID: epapr */ +#define EV_LOCAL_VENDOR_ID 0 /* for private use */ +#define EV_EPAPR_VENDOR_ID 1 +#define EV_FSL_VENDOR_ID 2 /* Freescale Semiconductor */ +#define EV_IBM_VENDOR_ID 3 /* IBM */ +#define EV_GHS_VENDOR_ID 4 /* Green Hills Software */ +#define EV_ENEA_VENDOR_ID 5 /* Enea */ +#define EV_WR_VENDOR_ID 6 /* Wind River Systems */ +#define EV_AMCC_VENDOR_ID 7 /* Applied Micro Circuits */ +#define EV_KVM_VENDOR_ID 42 /* KVM */ + +/* The max number of bytes that a byte channel can send or receive per call */ +#define EV_BYTE_CHANNEL_MAX_BYTES 16 + + +#define _EV_HCALL_TOKEN(id, num) (((id) << 16) | (num)) +#define EV_HCALL_TOKEN(hcall_num) _EV_HCALL_TOKEN(EV_EPAPR_VENDOR_ID, hcall_num) + +/* epapr return codes */ +#define EV_SUCCESS 0 +#define EV_EPERM 1 /* Operation not permitted */ +#define EV_ENOENT 2 /* Entry Not Found */ +#define EV_EIO 3 /* I/O error occured */ +#define EV_EAGAIN 4 /* The operation had insufficient + * resources to complete and should be + * retried + */ +#define EV_ENOMEM 5 /* There was insufficient memory to + * complete the operation */ +#define EV_EFAULT 6 /* Bad guest address */ +#define EV_ENODEV 7 /* No such device */ +#define EV_EINVAL 8 /* An argument supplied to the hcall + was out of range or invalid */ +#define EV_INTERNAL 9 /* An internal error occured */ +#define EV_CONFIG 10 /* A configuration error was detected */ +#define EV_INVALID_STATE 11 /* The object is in an invalid state */ +#define EV_UNIMPLEMENTED 12 /* Unimplemented hypercall */ +#define EV_BUFFER_OVERFLOW 13 /* Caller-supplied buffer too small */ + +#endif /* _UAPI__EPAPR_HCALLS_H */ -- cgit v1.2.3 From 63a1909190a3baa0abb9463ef28c8d8c969be951 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 31 Oct 2012 13:37:59 +0100 Subject: PPC: ePAPR: Convert hcall header to uapi (round 2) The new uapi framework splits kernel internal and user space exported bits of header files more cleanly. Adjust the ePAPR header accordingly. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/epapr_hcalls.h | 458 +++++++++++++++++++++++++ arch/powerpc/include/uapi/asm/epapr_hcalls.h | 478 ++++----------------------- 2 files changed, 517 insertions(+), 419 deletions(-) create mode 100644 arch/powerpc/include/asm/epapr_hcalls.h (limited to 'arch/powerpc/include/uapi') diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h new file mode 100644 index 000000000000..d3d634274d2c --- /dev/null +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -0,0 +1,458 @@ +/* + * ePAPR hcall interface + * + * Copyright 2008-2011 Freescale Semiconductor, Inc. + * + * Author: Timur Tabi + * + * This file is provided under a dual BSD/GPL license. When using or + * redistributing this file, you may do so under either license. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* A "hypercall" is an "sc 1" instruction. This header file file provides C + * wrapper functions for the ePAPR hypervisor interface. It is inteded + * for use by Linux device drivers and other operating systems. + * + * The hypercalls are implemented as inline assembly, rather than assembly + * language functions in a .S file, for optimization. It allows + * the caller to issue the hypercall instruction directly, improving both + * performance and memory footprint. + */ + +#ifndef _EPAPR_HCALLS_H +#define _EPAPR_HCALLS_H + +#include + +#ifndef __ASSEMBLY__ +#include +#include +#include + +/* + * Hypercall register clobber list + * + * These macros are used to define the list of clobbered registers during a + * hypercall. Technically, registers r0 and r3-r12 are always clobbered, + * but the gcc inline assembly syntax does not allow us to specify registers + * on the clobber list that are also on the input/output list. Therefore, + * the lists of clobbered registers depends on the number of register + * parmeters ("+r" and "=r") passed to the hypercall. + * + * Each assembly block should use one of the HCALL_CLOBBERSx macros. As a + * general rule, 'x' is the number of parameters passed to the assembly + * block *except* for r11. + * + * If you're not sure, just use the smallest value of 'x' that does not + * generate a compilation error. Because these are static inline functions, + * the compiler will only check the clobber list for a function if you + * compile code that calls that function. + * + * r3 and r11 are not included in any clobbers list because they are always + * listed as output registers. + * + * XER, CTR, and LR are currently listed as clobbers because it's uncertain + * whether they will be clobbered. + * + * Note that r11 can be used as an output parameter. + * + * The "memory" clobber is only necessary for hcalls where the Hypervisor + * will read or write guest memory. However, we add it to all hcalls because + * the impact is minimal, and we want to ensure that it's present for the + * hcalls that need it. +*/ + +/* List of common clobbered registers. Do not use this macro. */ +#define EV_HCALL_CLOBBERS "r0", "r12", "xer", "ctr", "lr", "cc", "memory" + +#define EV_HCALL_CLOBBERS8 EV_HCALL_CLOBBERS +#define EV_HCALL_CLOBBERS7 EV_HCALL_CLOBBERS8, "r10" +#define EV_HCALL_CLOBBERS6 EV_HCALL_CLOBBERS7, "r9" +#define EV_HCALL_CLOBBERS5 EV_HCALL_CLOBBERS6, "r8" +#define EV_HCALL_CLOBBERS4 EV_HCALL_CLOBBERS5, "r7" +#define EV_HCALL_CLOBBERS3 EV_HCALL_CLOBBERS4, "r6" +#define EV_HCALL_CLOBBERS2 EV_HCALL_CLOBBERS3, "r5" +#define EV_HCALL_CLOBBERS1 EV_HCALL_CLOBBERS2, "r4" + +extern bool epapr_paravirt_enabled; +extern u32 epapr_hypercall_start[]; + +/* + * We use "uintptr_t" to define a register because it's guaranteed to be a + * 32-bit integer on a 32-bit platform, and a 64-bit integer on a 64-bit + * platform. + * + * All registers are either input/output or output only. Registers that are + * initialized before making the hypercall are input/output. All + * input/output registers are represented with "+r". Output-only registers + * are represented with "=r". Do not specify any unused registers. The + * clobber list will tell the compiler that the hypercall modifies those + * registers, which is good enough. + */ + +/** + * ev_int_set_config - configure the specified interrupt + * @interrupt: the interrupt number + * @config: configuration for this interrupt + * @priority: interrupt priority + * @destination: destination CPU number + * + * Returns 0 for success, or an error code. + */ +static inline unsigned int ev_int_set_config(unsigned int interrupt, + uint32_t config, unsigned int priority, uint32_t destination) +{ + register uintptr_t r11 __asm__("r11"); + register uintptr_t r3 __asm__("r3"); + register uintptr_t r4 __asm__("r4"); + register uintptr_t r5 __asm__("r5"); + register uintptr_t r6 __asm__("r6"); + + r11 = EV_HCALL_TOKEN(EV_INT_SET_CONFIG); + r3 = interrupt; + r4 = config; + r5 = priority; + r6 = destination; + + asm volatile("bl epapr_hypercall_start" + : "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6) + : : EV_HCALL_CLOBBERS4 + ); + + return r3; +} + +/** + * ev_int_get_config - return the config of the specified interrupt + * @interrupt: the interrupt number + * @config: returned configuration for this interrupt + * @priority: returned interrupt priority + * @destination: returned destination CPU number + * + * Returns 0 for success, or an error code. + */ +static inline unsigned int ev_int_get_config(unsigned int interrupt, + uint32_t *config, unsigned int *priority, uint32_t *destination) +{ + register uintptr_t r11 __asm__("r11"); + register uintptr_t r3 __asm__("r3"); + register uintptr_t r4 __asm__("r4"); + register uintptr_t r5 __asm__("r5"); + register uintptr_t r6 __asm__("r6"); + + r11 = EV_HCALL_TOKEN(EV_INT_GET_CONFIG); + r3 = interrupt; + + asm volatile("bl epapr_hypercall_start" + : "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5), "=r" (r6) + : : EV_HCALL_CLOBBERS4 + ); + + *config = r4; + *priority = r5; + *destination = r6; + + return r3; +} + +/** + * ev_int_set_mask - sets the mask for the specified interrupt source + * @interrupt: the interrupt number + * @mask: 0=enable interrupts, 1=disable interrupts + * + * Returns 0 for success, or an error code. + */ +static inline unsigned int ev_int_set_mask(unsigned int interrupt, + unsigned int mask) +{ + register uintptr_t r11 __asm__("r11"); + register uintptr_t r3 __asm__("r3"); + register uintptr_t r4 __asm__("r4"); + + r11 = EV_HCALL_TOKEN(EV_INT_SET_MASK); + r3 = interrupt; + r4 = mask; + + asm volatile("bl epapr_hypercall_start" + : "+r" (r11), "+r" (r3), "+r" (r4) + : : EV_HCALL_CLOBBERS2 + ); + + return r3; +} + +/** + * ev_int_get_mask - returns the mask for the specified interrupt source + * @interrupt: the interrupt number + * @mask: returned mask for this interrupt (0=enabled, 1=disabled) + * + * Returns 0 for success, or an error code. + */ +static inline unsigned int ev_int_get_mask(unsigned int interrupt, + unsigned int *mask) +{ + register uintptr_t r11 __asm__("r11"); + register uintptr_t r3 __asm__("r3"); + register uintptr_t r4 __asm__("r4"); + + r11 = EV_HCALL_TOKEN(EV_INT_GET_MASK); + r3 = interrupt; + + asm volatile("bl epapr_hypercall_start" + : "+r" (r11), "+r" (r3), "=r" (r4) + : : EV_HCALL_CLOBBERS2 + ); + + *mask = r4; + + return r3; +} + +/** + * ev_int_eoi - signal the end of interrupt processing + * @interrupt: the interrupt number + * + * This function signals the end of processing for the the specified + * interrupt, which must be the interrupt currently in service. By + * definition, this is also the highest-priority interrupt. + * + * Returns 0 for success, or an error code. + */ +static inline unsigned int ev_int_eoi(unsigned int interrupt) +{ + register uintptr_t r11 __asm__("r11"); + register uintptr_t r3 __asm__("r3"); + + r11 = EV_HCALL_TOKEN(EV_INT_EOI); + r3 = interrupt; + + asm volatile("bl epapr_hypercall_start" + : "+r" (r11), "+r" (r3) + : : EV_HCALL_CLOBBERS1 + ); + + return r3; +} + +/** + * ev_byte_channel_send - send characters to a byte stream + * @handle: byte stream handle + * @count: (input) num of chars to send, (output) num chars sent + * @buffer: pointer to a 16-byte buffer + * + * @buffer must be at least 16 bytes long, because all 16 bytes will be + * read from memory into registers, even if count < 16. + * + * Returns 0 for success, or an error code. + */ +static inline unsigned int ev_byte_channel_send(unsigned int handle, + unsigned int *count, const char buffer[EV_BYTE_CHANNEL_MAX_BYTES]) +{ + register uintptr_t r11 __asm__("r11"); + register uintptr_t r3 __asm__("r3"); + register uintptr_t r4 __asm__("r4"); + register uintptr_t r5 __asm__("r5"); + register uintptr_t r6 __asm__("r6"); + register uintptr_t r7 __asm__("r7"); + register uintptr_t r8 __asm__("r8"); + const uint32_t *p = (const uint32_t *) buffer; + + r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_SEND); + r3 = handle; + r4 = *count; + r5 = be32_to_cpu(p[0]); + r6 = be32_to_cpu(p[1]); + r7 = be32_to_cpu(p[2]); + r8 = be32_to_cpu(p[3]); + + asm volatile("bl epapr_hypercall_start" + : "+r" (r11), "+r" (r3), + "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7), "+r" (r8) + : : EV_HCALL_CLOBBERS6 + ); + + *count = r4; + + return r3; +} + +/** + * ev_byte_channel_receive - fetch characters from a byte channel + * @handle: byte channel handle + * @count: (input) max num of chars to receive, (output) num chars received + * @buffer: pointer to a 16-byte buffer + * + * The size of @buffer must be at least 16 bytes, even if you request fewer + * than 16 characters, because we always write 16 bytes to @buffer. This is + * for performance reasons. + * + * Returns 0 for success, or an error code. + */ +static inline unsigned int ev_byte_channel_receive(unsigned int handle, + unsigned int *count, char buffer[EV_BYTE_CHANNEL_MAX_BYTES]) +{ + register uintptr_t r11 __asm__("r11"); + register uintptr_t r3 __asm__("r3"); + register uintptr_t r4 __asm__("r4"); + register uintptr_t r5 __asm__("r5"); + register uintptr_t r6 __asm__("r6"); + register uintptr_t r7 __asm__("r7"); + register uintptr_t r8 __asm__("r8"); + uint32_t *p = (uint32_t *) buffer; + + r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_RECEIVE); + r3 = handle; + r4 = *count; + + asm volatile("bl epapr_hypercall_start" + : "+r" (r11), "+r" (r3), "+r" (r4), + "=r" (r5), "=r" (r6), "=r" (r7), "=r" (r8) + : : EV_HCALL_CLOBBERS6 + ); + + *count = r4; + p[0] = cpu_to_be32(r5); + p[1] = cpu_to_be32(r6); + p[2] = cpu_to_be32(r7); + p[3] = cpu_to_be32(r8); + + return r3; +} + +/** + * ev_byte_channel_poll - returns the status of the byte channel buffers + * @handle: byte channel handle + * @rx_count: returned count of bytes in receive queue + * @tx_count: returned count of free space in transmit queue + * + * This function reports the amount of data in the receive queue (i.e. the + * number of bytes you can read), and the amount of free space in the transmit + * queue (i.e. the number of bytes you can write). + * + * Returns 0 for success, or an error code. + */ +static inline unsigned int ev_byte_channel_poll(unsigned int handle, + unsigned int *rx_count, unsigned int *tx_count) +{ + register uintptr_t r11 __asm__("r11"); + register uintptr_t r3 __asm__("r3"); + register uintptr_t r4 __asm__("r4"); + register uintptr_t r5 __asm__("r5"); + + r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_POLL); + r3 = handle; + + asm volatile("bl epapr_hypercall_start" + : "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5) + : : EV_HCALL_CLOBBERS3 + ); + + *rx_count = r4; + *tx_count = r5; + + return r3; +} + +/** + * ev_int_iack - acknowledge an interrupt + * @handle: handle to the target interrupt controller + * @vector: returned interrupt vector + * + * If handle is zero, the function returns the next interrupt source + * number to be handled irrespective of the hierarchy or cascading + * of interrupt controllers. If non-zero, specifies a handle to the + * interrupt controller that is the target of the acknowledge. + * + * Returns 0 for success, or an error code. + */ +static inline unsigned int ev_int_iack(unsigned int handle, + unsigned int *vector) +{ + register uintptr_t r11 __asm__("r11"); + register uintptr_t r3 __asm__("r3"); + register uintptr_t r4 __asm__("r4"); + + r11 = EV_HCALL_TOKEN(EV_INT_IACK); + r3 = handle; + + asm volatile("bl epapr_hypercall_start" + : "+r" (r11), "+r" (r3), "=r" (r4) + : : EV_HCALL_CLOBBERS2 + ); + + *vector = r4; + + return r3; +} + +/** + * ev_doorbell_send - send a doorbell to another partition + * @handle: doorbell send handle + * + * Returns 0 for success, or an error code. + */ +static inline unsigned int ev_doorbell_send(unsigned int handle) +{ + register uintptr_t r11 __asm__("r11"); + register uintptr_t r3 __asm__("r3"); + + r11 = EV_HCALL_TOKEN(EV_DOORBELL_SEND); + r3 = handle; + + asm volatile("bl epapr_hypercall_start" + : "+r" (r11), "+r" (r3) + : : EV_HCALL_CLOBBERS1 + ); + + return r3; +} + +/** + * ev_idle -- wait for next interrupt on this core + * + * Returns 0 for success, or an error code. + */ +static inline unsigned int ev_idle(void) +{ + register uintptr_t r11 __asm__("r11"); + register uintptr_t r3 __asm__("r3"); + + r11 = EV_HCALL_TOKEN(EV_IDLE); + + asm volatile("bl epapr_hypercall_start" + : "+r" (r11), "=r" (r3) + : : EV_HCALL_CLOBBERS1 + ); + + return r3; +} +#endif /* !__ASSEMBLY__ */ +#endif /* _EPAPR_HCALLS_H */ diff --git a/arch/powerpc/include/uapi/asm/epapr_hcalls.h b/arch/powerpc/include/uapi/asm/epapr_hcalls.h index 58997afcd085..7f9c74b46704 100644 --- a/arch/powerpc/include/uapi/asm/epapr_hcalls.h +++ b/arch/powerpc/include/uapi/asm/epapr_hcalls.h @@ -37,422 +37,62 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* A "hypercall" is an "sc 1" instruction. This header file file provides C - * wrapper functions for the ePAPR hypervisor interface. It is inteded - * for use by Linux device drivers and other operating systems. - * - * The hypercalls are implemented as inline assembly, rather than assembly - * language functions in a .S file, for optimization. It allows - * the caller to issue the hypercall instruction directly, improving both - * performance and memory footprint. - */ - -#ifndef _EPAPR_HCALLS_H -#define _EPAPR_HCALLS_H - -#include - -#ifndef __ASSEMBLY__ -#include -#include -#include - -/* - * Hypercall register clobber list - * - * These macros are used to define the list of clobbered registers during a - * hypercall. Technically, registers r0 and r3-r12 are always clobbered, - * but the gcc inline assembly syntax does not allow us to specify registers - * on the clobber list that are also on the input/output list. Therefore, - * the lists of clobbered registers depends on the number of register - * parmeters ("+r" and "=r") passed to the hypercall. - * - * Each assembly block should use one of the HCALL_CLOBBERSx macros. As a - * general rule, 'x' is the number of parameters passed to the assembly - * block *except* for r11. - * - * If you're not sure, just use the smallest value of 'x' that does not - * generate a compilation error. Because these are static inline functions, - * the compiler will only check the clobber list for a function if you - * compile code that calls that function. - * - * r3 and r11 are not included in any clobbers list because they are always - * listed as output registers. - * - * XER, CTR, and LR are currently listed as clobbers because it's uncertain - * whether they will be clobbered. - * - * Note that r11 can be used as an output parameter. - * - * The "memory" clobber is only necessary for hcalls where the Hypervisor - * will read or write guest memory. However, we add it to all hcalls because - * the impact is minimal, and we want to ensure that it's present for the - * hcalls that need it. -*/ - -/* List of common clobbered registers. Do not use this macro. */ -#define EV_HCALL_CLOBBERS "r0", "r12", "xer", "ctr", "lr", "cc", "memory" - -#define EV_HCALL_CLOBBERS8 EV_HCALL_CLOBBERS -#define EV_HCALL_CLOBBERS7 EV_HCALL_CLOBBERS8, "r10" -#define EV_HCALL_CLOBBERS6 EV_HCALL_CLOBBERS7, "r9" -#define EV_HCALL_CLOBBERS5 EV_HCALL_CLOBBERS6, "r8" -#define EV_HCALL_CLOBBERS4 EV_HCALL_CLOBBERS5, "r7" -#define EV_HCALL_CLOBBERS3 EV_HCALL_CLOBBERS4, "r6" -#define EV_HCALL_CLOBBERS2 EV_HCALL_CLOBBERS3, "r5" -#define EV_HCALL_CLOBBERS1 EV_HCALL_CLOBBERS2, "r4" - -extern bool epapr_paravirt_enabled; -extern u32 epapr_hypercall_start[]; - -/* - * We use "uintptr_t" to define a register because it's guaranteed to be a - * 32-bit integer on a 32-bit platform, and a 64-bit integer on a 64-bit - * platform. - * - * All registers are either input/output or output only. Registers that are - * initialized before making the hypercall are input/output. All - * input/output registers are represented with "+r". Output-only registers - * are represented with "=r". Do not specify any unused registers. The - * clobber list will tell the compiler that the hypercall modifies those - * registers, which is good enough. - */ - -/** - * ev_int_set_config - configure the specified interrupt - * @interrupt: the interrupt number - * @config: configuration for this interrupt - * @priority: interrupt priority - * @destination: destination CPU number - * - * Returns 0 for success, or an error code. - */ -static inline unsigned int ev_int_set_config(unsigned int interrupt, - uint32_t config, unsigned int priority, uint32_t destination) -{ - register uintptr_t r11 __asm__("r11"); - register uintptr_t r3 __asm__("r3"); - register uintptr_t r4 __asm__("r4"); - register uintptr_t r5 __asm__("r5"); - register uintptr_t r6 __asm__("r6"); - - r11 = EV_HCALL_TOKEN(EV_INT_SET_CONFIG); - r3 = interrupt; - r4 = config; - r5 = priority; - r6 = destination; - - asm volatile("bl epapr_hypercall_start" - : "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6) - : : EV_HCALL_CLOBBERS4 - ); - - return r3; -} - -/** - * ev_int_get_config - return the config of the specified interrupt - * @interrupt: the interrupt number - * @config: returned configuration for this interrupt - * @priority: returned interrupt priority - * @destination: returned destination CPU number - * - * Returns 0 for success, or an error code. - */ -static inline unsigned int ev_int_get_config(unsigned int interrupt, - uint32_t *config, unsigned int *priority, uint32_t *destination) -{ - register uintptr_t r11 __asm__("r11"); - register uintptr_t r3 __asm__("r3"); - register uintptr_t r4 __asm__("r4"); - register uintptr_t r5 __asm__("r5"); - register uintptr_t r6 __asm__("r6"); - - r11 = EV_HCALL_TOKEN(EV_INT_GET_CONFIG); - r3 = interrupt; - - asm volatile("bl epapr_hypercall_start" - : "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5), "=r" (r6) - : : EV_HCALL_CLOBBERS4 - ); - - *config = r4; - *priority = r5; - *destination = r6; - - return r3; -} - -/** - * ev_int_set_mask - sets the mask for the specified interrupt source - * @interrupt: the interrupt number - * @mask: 0=enable interrupts, 1=disable interrupts - * - * Returns 0 for success, or an error code. - */ -static inline unsigned int ev_int_set_mask(unsigned int interrupt, - unsigned int mask) -{ - register uintptr_t r11 __asm__("r11"); - register uintptr_t r3 __asm__("r3"); - register uintptr_t r4 __asm__("r4"); - - r11 = EV_HCALL_TOKEN(EV_INT_SET_MASK); - r3 = interrupt; - r4 = mask; - - asm volatile("bl epapr_hypercall_start" - : "+r" (r11), "+r" (r3), "+r" (r4) - : : EV_HCALL_CLOBBERS2 - ); - - return r3; -} - -/** - * ev_int_get_mask - returns the mask for the specified interrupt source - * @interrupt: the interrupt number - * @mask: returned mask for this interrupt (0=enabled, 1=disabled) - * - * Returns 0 for success, or an error code. - */ -static inline unsigned int ev_int_get_mask(unsigned int interrupt, - unsigned int *mask) -{ - register uintptr_t r11 __asm__("r11"); - register uintptr_t r3 __asm__("r3"); - register uintptr_t r4 __asm__("r4"); - - r11 = EV_HCALL_TOKEN(EV_INT_GET_MASK); - r3 = interrupt; - - asm volatile("bl epapr_hypercall_start" - : "+r" (r11), "+r" (r3), "=r" (r4) - : : EV_HCALL_CLOBBERS2 - ); - - *mask = r4; - - return r3; -} - -/** - * ev_int_eoi - signal the end of interrupt processing - * @interrupt: the interrupt number - * - * This function signals the end of processing for the the specified - * interrupt, which must be the interrupt currently in service. By - * definition, this is also the highest-priority interrupt. - * - * Returns 0 for success, or an error code. - */ -static inline unsigned int ev_int_eoi(unsigned int interrupt) -{ - register uintptr_t r11 __asm__("r11"); - register uintptr_t r3 __asm__("r3"); - - r11 = EV_HCALL_TOKEN(EV_INT_EOI); - r3 = interrupt; - - asm volatile("bl epapr_hypercall_start" - : "+r" (r11), "+r" (r3) - : : EV_HCALL_CLOBBERS1 - ); - - return r3; -} - -/** - * ev_byte_channel_send - send characters to a byte stream - * @handle: byte stream handle - * @count: (input) num of chars to send, (output) num chars sent - * @buffer: pointer to a 16-byte buffer - * - * @buffer must be at least 16 bytes long, because all 16 bytes will be - * read from memory into registers, even if count < 16. - * - * Returns 0 for success, or an error code. - */ -static inline unsigned int ev_byte_channel_send(unsigned int handle, - unsigned int *count, const char buffer[EV_BYTE_CHANNEL_MAX_BYTES]) -{ - register uintptr_t r11 __asm__("r11"); - register uintptr_t r3 __asm__("r3"); - register uintptr_t r4 __asm__("r4"); - register uintptr_t r5 __asm__("r5"); - register uintptr_t r6 __asm__("r6"); - register uintptr_t r7 __asm__("r7"); - register uintptr_t r8 __asm__("r8"); - const uint32_t *p = (const uint32_t *) buffer; - - r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_SEND); - r3 = handle; - r4 = *count; - r5 = be32_to_cpu(p[0]); - r6 = be32_to_cpu(p[1]); - r7 = be32_to_cpu(p[2]); - r8 = be32_to_cpu(p[3]); - - asm volatile("bl epapr_hypercall_start" - : "+r" (r11), "+r" (r3), - "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7), "+r" (r8) - : : EV_HCALL_CLOBBERS6 - ); - - *count = r4; - - return r3; -} - -/** - * ev_byte_channel_receive - fetch characters from a byte channel - * @handle: byte channel handle - * @count: (input) max num of chars to receive, (output) num chars received - * @buffer: pointer to a 16-byte buffer - * - * The size of @buffer must be at least 16 bytes, even if you request fewer - * than 16 characters, because we always write 16 bytes to @buffer. This is - * for performance reasons. - * - * Returns 0 for success, or an error code. - */ -static inline unsigned int ev_byte_channel_receive(unsigned int handle, - unsigned int *count, char buffer[EV_BYTE_CHANNEL_MAX_BYTES]) -{ - register uintptr_t r11 __asm__("r11"); - register uintptr_t r3 __asm__("r3"); - register uintptr_t r4 __asm__("r4"); - register uintptr_t r5 __asm__("r5"); - register uintptr_t r6 __asm__("r6"); - register uintptr_t r7 __asm__("r7"); - register uintptr_t r8 __asm__("r8"); - uint32_t *p = (uint32_t *) buffer; - - r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_RECEIVE); - r3 = handle; - r4 = *count; - - asm volatile("bl epapr_hypercall_start" - : "+r" (r11), "+r" (r3), "+r" (r4), - "=r" (r5), "=r" (r6), "=r" (r7), "=r" (r8) - : : EV_HCALL_CLOBBERS6 - ); - - *count = r4; - p[0] = cpu_to_be32(r5); - p[1] = cpu_to_be32(r6); - p[2] = cpu_to_be32(r7); - p[3] = cpu_to_be32(r8); - - return r3; -} - -/** - * ev_byte_channel_poll - returns the status of the byte channel buffers - * @handle: byte channel handle - * @rx_count: returned count of bytes in receive queue - * @tx_count: returned count of free space in transmit queue - * - * This function reports the amount of data in the receive queue (i.e. the - * number of bytes you can read), and the amount of free space in the transmit - * queue (i.e. the number of bytes you can write). - * - * Returns 0 for success, or an error code. - */ -static inline unsigned int ev_byte_channel_poll(unsigned int handle, - unsigned int *rx_count, unsigned int *tx_count) -{ - register uintptr_t r11 __asm__("r11"); - register uintptr_t r3 __asm__("r3"); - register uintptr_t r4 __asm__("r4"); - register uintptr_t r5 __asm__("r5"); - - r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_POLL); - r3 = handle; - - asm volatile("bl epapr_hypercall_start" - : "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5) - : : EV_HCALL_CLOBBERS3 - ); - - *rx_count = r4; - *tx_count = r5; - - return r3; -} - -/** - * ev_int_iack - acknowledge an interrupt - * @handle: handle to the target interrupt controller - * @vector: returned interrupt vector - * - * If handle is zero, the function returns the next interrupt source - * number to be handled irrespective of the hierarchy or cascading - * of interrupt controllers. If non-zero, specifies a handle to the - * interrupt controller that is the target of the acknowledge. - * - * Returns 0 for success, or an error code. - */ -static inline unsigned int ev_int_iack(unsigned int handle, - unsigned int *vector) -{ - register uintptr_t r11 __asm__("r11"); - register uintptr_t r3 __asm__("r3"); - register uintptr_t r4 __asm__("r4"); - - r11 = EV_HCALL_TOKEN(EV_INT_IACK); - r3 = handle; - - asm volatile("bl epapr_hypercall_start" - : "+r" (r11), "+r" (r3), "=r" (r4) - : : EV_HCALL_CLOBBERS2 - ); - - *vector = r4; - - return r3; -} - -/** - * ev_doorbell_send - send a doorbell to another partition - * @handle: doorbell send handle - * - * Returns 0 for success, or an error code. - */ -static inline unsigned int ev_doorbell_send(unsigned int handle) -{ - register uintptr_t r11 __asm__("r11"); - register uintptr_t r3 __asm__("r3"); - - r11 = EV_HCALL_TOKEN(EV_DOORBELL_SEND); - r3 = handle; - - asm volatile("bl epapr_hypercall_start" - : "+r" (r11), "+r" (r3) - : : EV_HCALL_CLOBBERS1 - ); - - return r3; -} - -/** - * ev_idle -- wait for next interrupt on this core - * - * Returns 0 for success, or an error code. - */ -static inline unsigned int ev_idle(void) -{ - register uintptr_t r11 __asm__("r11"); - register uintptr_t r3 __asm__("r3"); - - r11 = EV_HCALL_TOKEN(EV_IDLE); - - asm volatile("bl epapr_hypercall_start" - : "+r" (r11), "=r" (r3) - : : EV_HCALL_CLOBBERS1 - ); - - return r3; -} -#endif /* !__ASSEMBLY__ */ -#endif +#ifndef _UAPI_ASM_POWERPC_EPAPR_HCALLS_H +#define _UAPI_ASM_POWERPC_EPAPR_HCALLS_H + +#define EV_BYTE_CHANNEL_SEND 1 +#define EV_BYTE_CHANNEL_RECEIVE 2 +#define EV_BYTE_CHANNEL_POLL 3 +#define EV_INT_SET_CONFIG 4 +#define EV_INT_GET_CONFIG 5 +#define EV_INT_SET_MASK 6 +#define EV_INT_GET_MASK 7 +#define EV_INT_IACK 9 +#define EV_INT_EOI 10 +#define EV_INT_SEND_IPI 11 +#define EV_INT_SET_TASK_PRIORITY 12 +#define EV_INT_GET_TASK_PRIORITY 13 +#define EV_DOORBELL_SEND 14 +#define EV_MSGSND 15 +#define EV_IDLE 16 + +/* vendor ID: epapr */ +#define EV_LOCAL_VENDOR_ID 0 /* for private use */ +#define EV_EPAPR_VENDOR_ID 1 +#define EV_FSL_VENDOR_ID 2 /* Freescale Semiconductor */ +#define EV_IBM_VENDOR_ID 3 /* IBM */ +#define EV_GHS_VENDOR_ID 4 /* Green Hills Software */ +#define EV_ENEA_VENDOR_ID 5 /* Enea */ +#define EV_WR_VENDOR_ID 6 /* Wind River Systems */ +#define EV_AMCC_VENDOR_ID 7 /* Applied Micro Circuits */ +#define EV_KVM_VENDOR_ID 42 /* KVM */ + +/* The max number of bytes that a byte channel can send or receive per call */ +#define EV_BYTE_CHANNEL_MAX_BYTES 16 + + +#define _EV_HCALL_TOKEN(id, num) (((id) << 16) | (num)) +#define EV_HCALL_TOKEN(hcall_num) _EV_HCALL_TOKEN(EV_EPAPR_VENDOR_ID, hcall_num) + +/* epapr return codes */ +#define EV_SUCCESS 0 +#define EV_EPERM 1 /* Operation not permitted */ +#define EV_ENOENT 2 /* Entry Not Found */ +#define EV_EIO 3 /* I/O error occured */ +#define EV_EAGAIN 4 /* The operation had insufficient + * resources to complete and should be + * retried + */ +#define EV_ENOMEM 5 /* There was insufficient memory to + * complete the operation */ +#define EV_EFAULT 6 /* Bad guest address */ +#define EV_ENODEV 7 /* No such device */ +#define EV_EINVAL 8 /* An argument supplied to the hcall + was out of range or invalid */ +#define EV_INTERNAL 9 /* An internal error occured */ +#define EV_CONFIG 10 /* A configuration error was detected */ +#define EV_INVALID_STATE 11 /* The object is in an invalid state */ +#define EV_UNIMPLEMENTED 12 /* Unimplemented hypercall */ +#define EV_BUFFER_OVERFLOW 13 /* Caller-supplied buffer too small */ + +#endif /* _UAPI_ASM_POWERPC_EPAPR_HCALLS_H */ -- cgit v1.2.3 From a2932923ccf63c419c77aaa18ac09be98f2c94d8 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 19 Nov 2012 22:57:20 +0000 Subject: KVM: PPC: Book3S HV: Provide a method for userspace to read and write the HPT A new ioctl, KVM_PPC_GET_HTAB_FD, returns a file descriptor. Reads on this fd return the contents of the HPT (hashed page table), writes create and/or remove entries in the HPT. There is a new capability, KVM_CAP_PPC_HTAB_FD, to indicate the presence of the ioctl. The ioctl takes an argument structure with the index of the first HPT entry to read out and a set of flags. The flags indicate whether the user is intending to read or write the HPT, and whether to return all entries or only the "bolted" entries (those with the bolted bit, 0x10, set in the first doubleword). This is intended for use in implementing qemu's savevm/loadvm and for live migration. Therefore, on reads, the first pass returns information about all HPTEs (or all bolted HPTEs). When the first pass reaches the end of the HPT, it returns from the read. Subsequent reads only return information about HPTEs that have changed since they were last read. A read that finds no changed HPTEs in the HPT following where the last read finished will return 0 bytes. The format of the data provides a simple run-length compression of the invalid entries. Each block of data starts with a header that indicates the index (position in the HPT, which is just an array), the number of valid entries starting at that index (may be zero), and the number of invalid entries following those valid entries. The valid entries, 16 bytes each, follow the header. The invalid entries are not explicitly represented. Signed-off-by: Paul Mackerras [agraf: fix documentation] Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 54 +++++ arch/powerpc/include/asm/kvm_book3s_64.h | 22 ++ arch/powerpc/include/asm/kvm_ppc.h | 2 + arch/powerpc/include/uapi/asm/kvm.h | 25 +++ arch/powerpc/kvm/book3s_64_mmu_hv.c | 344 +++++++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_hv.c | 12 -- arch/powerpc/kvm/powerpc.c | 17 ++ include/uapi/linux/kvm.h | 3 + 8 files changed, 467 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/include/uapi') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 6671fdc0afb1..a5607c571cb3 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2071,6 +2071,60 @@ KVM_S390_INT_EXTERNAL_CALL (vcpu) - sigp external call; source cpu in parm Note that the vcpu ioctl is asynchronous to vcpu execution. +4.78 KVM_PPC_GET_HTAB_FD + +Capability: KVM_CAP_PPC_HTAB_FD +Architectures: powerpc +Type: vm ioctl +Parameters: Pointer to struct kvm_get_htab_fd (in) +Returns: file descriptor number (>= 0) on success, -1 on error + +This returns a file descriptor that can be used either to read out the +entries in the guest's hashed page table (HPT), or to write entries to +initialize the HPT. The returned fd can only be written to if the +KVM_GET_HTAB_WRITE bit is set in the flags field of the argument, and +can only be read if that bit is clear. The argument struct looks like +this: + +/* For KVM_PPC_GET_HTAB_FD */ +struct kvm_get_htab_fd { + __u64 flags; + __u64 start_index; + __u64 reserved[2]; +}; + +/* Values for kvm_get_htab_fd.flags */ +#define KVM_GET_HTAB_BOLTED_ONLY ((__u64)0x1) +#define KVM_GET_HTAB_WRITE ((__u64)0x2) + +The `start_index' field gives the index in the HPT of the entry at +which to start reading. It is ignored when writing. + +Reads on the fd will initially supply information about all +"interesting" HPT entries. Interesting entries are those with the +bolted bit set, if the KVM_GET_HTAB_BOLTED_ONLY bit is set, otherwise +all entries. When the end of the HPT is reached, the read() will +return. If read() is called again on the fd, it will start again from +the beginning of the HPT, but will only return HPT entries that have +changed since they were last read. + +Data read or written is structured as a header (8 bytes) followed by a +series of valid HPT entries (16 bytes) each. The header indicates how +many valid HPT entries there are and how many invalid entries follow +the valid entries. The invalid entries are not represented explicitly +in the stream. The header format is: + +struct kvm_get_htab_header { + __u32 index; + __u16 n_valid; + __u16 n_invalid; +}; + +Writes to the fd create HPT entries starting at the index given in the +header; first `n_valid' valid entries with contents from the data +written, then `n_invalid' invalid entries, invalidating any previously +valid entries found. + 5. The kvm_run structure ------------------------ diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index b322e5bd6964..38bec1dc9928 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -246,4 +246,26 @@ static inline bool slot_is_aligned(struct kvm_memory_slot *memslot, return !(memslot->base_gfn & mask) && !(memslot->npages & mask); } +/* + * This works for 4k, 64k and 16M pages on POWER7, + * and 4k and 16M pages on PPC970. + */ +static inline unsigned long slb_pgsize_encoding(unsigned long psize) +{ + unsigned long senc = 0; + + if (psize > 0x1000) { + senc = SLB_VSID_L; + if (psize == 0x10000) + senc |= SLB_VSID_LP_01; + } + return senc; +} + +static inline int is_vrma_hpte(unsigned long hpte_v) +{ + return (hpte_v & ~0xffffffUL) == + (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16))); +} + #endif /* __ASM_KVM_BOOK3S_64_H__ */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 609cca3e9426..1ca31e92ee75 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -164,6 +164,8 @@ extern void kvmppc_bookehv_exit(void); extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu); +extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *); + /* * Cuts out inst bits with ordering according to spec. * That means the leftmost bit is zero. All given bits are included. diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index b89ae4db45ce..514883dd311e 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -331,6 +331,31 @@ struct kvm_book3e_206_tlb_params { __u32 reserved[8]; }; +/* For KVM_PPC_GET_HTAB_FD */ +struct kvm_get_htab_fd { + __u64 flags; + __u64 start_index; + __u64 reserved[2]; +}; + +/* Values for kvm_get_htab_fd.flags */ +#define KVM_GET_HTAB_BOLTED_ONLY ((__u64)0x1) +#define KVM_GET_HTAB_WRITE ((__u64)0x2) + +/* + * Data read on the file descriptor is formatted as a series of + * records, each consisting of a header followed by a series of + * `n_valid' HPTEs (16 bytes each), which are all valid. Following + * those valid HPTEs there are `n_invalid' invalid HPTEs, which + * are not represented explicitly in the stream. The same format + * is used for writing. + */ +struct kvm_get_htab_header { + __u32 index; + __u16 n_valid; + __u16 n_invalid; +}; + #define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1) #define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2) #define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 6ee6516a0bee..0aa40734c8f6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include @@ -1145,6 +1147,348 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va) put_page(page); } +/* + * Functions for reading and writing the hash table via reads and + * writes on a file descriptor. + * + * Reads return the guest view of the hash table, which has to be + * pieced together from the real hash table and the guest_rpte + * values in the revmap array. + * + * On writes, each HPTE written is considered in turn, and if it + * is valid, it is written to the HPT as if an H_ENTER with the + * exact flag set was done. When the invalid count is non-zero + * in the header written to the stream, the kernel will make + * sure that that many HPTEs are invalid, and invalidate them + * if not. + */ + +struct kvm_htab_ctx { + unsigned long index; + unsigned long flags; + struct kvm *kvm; + int first_pass; +}; + +#define HPTE_SIZE (2 * sizeof(unsigned long)) + +static long record_hpte(unsigned long flags, unsigned long *hptp, + unsigned long *hpte, struct revmap_entry *revp, + int want_valid, int first_pass) +{ + unsigned long v, r; + int ok = 1; + int valid, dirty; + + /* Unmodified entries are uninteresting except on the first pass */ + dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); + if (!first_pass && !dirty) + return 0; + + valid = 0; + if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) { + valid = 1; + if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && + !(hptp[0] & HPTE_V_BOLTED)) + valid = 0; + } + if (valid != want_valid) + return 0; + + v = r = 0; + if (valid || dirty) { + /* lock the HPTE so it's stable and read it */ + preempt_disable(); + while (!try_lock_hpte(hptp, HPTE_V_HVLOCK)) + cpu_relax(); + v = hptp[0]; + if (v & HPTE_V_ABSENT) { + v &= ~HPTE_V_ABSENT; + v |= HPTE_V_VALID; + } + /* re-evaluate valid and dirty from synchronized HPTE value */ + valid = !!(v & HPTE_V_VALID); + if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED)) + valid = 0; + r = revp->guest_rpte | (hptp[1] & (HPTE_R_R | HPTE_R_C)); + dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); + /* only clear modified if this is the right sort of entry */ + if (valid == want_valid && dirty) { + r &= ~HPTE_GR_MODIFIED; + revp->guest_rpte = r; + } + asm volatile(PPC_RELEASE_BARRIER "" : : : "memory"); + hptp[0] &= ~HPTE_V_HVLOCK; + preempt_enable(); + if (!(valid == want_valid && (first_pass || dirty))) + ok = 0; + } + hpte[0] = v; + hpte[1] = r; + return ok; +} + +static ssize_t kvm_htab_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct kvm_htab_ctx *ctx = file->private_data; + struct kvm *kvm = ctx->kvm; + struct kvm_get_htab_header hdr; + unsigned long *hptp; + struct revmap_entry *revp; + unsigned long i, nb, nw; + unsigned long __user *lbuf; + struct kvm_get_htab_header __user *hptr; + unsigned long flags; + int first_pass; + unsigned long hpte[2]; + + if (!access_ok(VERIFY_WRITE, buf, count)) + return -EFAULT; + + first_pass = ctx->first_pass; + flags = ctx->flags; + + i = ctx->index; + hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); + revp = kvm->arch.revmap + i; + lbuf = (unsigned long __user *)buf; + + nb = 0; + while (nb + sizeof(hdr) + HPTE_SIZE < count) { + /* Initialize header */ + hptr = (struct kvm_get_htab_header __user *)buf; + hdr.index = i; + hdr.n_valid = 0; + hdr.n_invalid = 0; + nw = nb; + nb += sizeof(hdr); + lbuf = (unsigned long __user *)(buf + sizeof(hdr)); + + /* Skip uninteresting entries, i.e. clean on not-first pass */ + if (!first_pass) { + while (i < kvm->arch.hpt_npte && + !(revp->guest_rpte & HPTE_GR_MODIFIED)) { + ++i; + hptp += 2; + ++revp; + } + } + + /* Grab a series of valid entries */ + while (i < kvm->arch.hpt_npte && + hdr.n_valid < 0xffff && + nb + HPTE_SIZE < count && + record_hpte(flags, hptp, hpte, revp, 1, first_pass)) { + /* valid entry, write it out */ + ++hdr.n_valid; + if (__put_user(hpte[0], lbuf) || + __put_user(hpte[1], lbuf + 1)) + return -EFAULT; + nb += HPTE_SIZE; + lbuf += 2; + ++i; + hptp += 2; + ++revp; + } + /* Now skip invalid entries while we can */ + while (i < kvm->arch.hpt_npte && + hdr.n_invalid < 0xffff && + record_hpte(flags, hptp, hpte, revp, 0, first_pass)) { + /* found an invalid entry */ + ++hdr.n_invalid; + ++i; + hptp += 2; + ++revp; + } + + if (hdr.n_valid || hdr.n_invalid) { + /* write back the header */ + if (__copy_to_user(hptr, &hdr, sizeof(hdr))) + return -EFAULT; + nw = nb; + buf = (char __user *)lbuf; + } else { + nb = nw; + } + + /* Check if we've wrapped around the hash table */ + if (i >= kvm->arch.hpt_npte) { + i = 0; + ctx->first_pass = 0; + break; + } + } + + ctx->index = i; + + return nb; +} + +static ssize_t kvm_htab_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct kvm_htab_ctx *ctx = file->private_data; + struct kvm *kvm = ctx->kvm; + struct kvm_get_htab_header hdr; + unsigned long i, j; + unsigned long v, r; + unsigned long __user *lbuf; + unsigned long *hptp; + unsigned long tmp[2]; + ssize_t nb; + long int err, ret; + int rma_setup; + + if (!access_ok(VERIFY_READ, buf, count)) + return -EFAULT; + + /* lock out vcpus from running while we're doing this */ + mutex_lock(&kvm->lock); + rma_setup = kvm->arch.rma_setup_done; + if (rma_setup) { + kvm->arch.rma_setup_done = 0; /* temporarily */ + /* order rma_setup_done vs. vcpus_running */ + smp_mb(); + if (atomic_read(&kvm->arch.vcpus_running)) { + kvm->arch.rma_setup_done = 1; + mutex_unlock(&kvm->lock); + return -EBUSY; + } + } + + err = 0; + for (nb = 0; nb + sizeof(hdr) <= count; ) { + err = -EFAULT; + if (__copy_from_user(&hdr, buf, sizeof(hdr))) + break; + + err = 0; + if (nb + hdr.n_valid * HPTE_SIZE > count) + break; + + nb += sizeof(hdr); + buf += sizeof(hdr); + + err = -EINVAL; + i = hdr.index; + if (i >= kvm->arch.hpt_npte || + i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte) + break; + + hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); + lbuf = (unsigned long __user *)buf; + for (j = 0; j < hdr.n_valid; ++j) { + err = -EFAULT; + if (__get_user(v, lbuf) || __get_user(r, lbuf + 1)) + goto out; + err = -EINVAL; + if (!(v & HPTE_V_VALID)) + goto out; + lbuf += 2; + nb += HPTE_SIZE; + + if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) + kvmppc_do_h_remove(kvm, 0, i, 0, tmp); + err = -EIO; + ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r, + tmp); + if (ret != H_SUCCESS) { + pr_err("kvm_htab_write ret %ld i=%ld v=%lx " + "r=%lx\n", ret, i, v, r); + goto out; + } + if (!rma_setup && is_vrma_hpte(v)) { + unsigned long psize = hpte_page_size(v, r); + unsigned long senc = slb_pgsize_encoding(psize); + unsigned long lpcr; + + kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | + (VRMA_VSID << SLB_VSID_SHIFT_1T); + lpcr = kvm->arch.lpcr & ~LPCR_VRMASD; + lpcr |= senc << (LPCR_VRMASD_SH - 4); + kvm->arch.lpcr = lpcr; + rma_setup = 1; + } + ++i; + hptp += 2; + } + + for (j = 0; j < hdr.n_invalid; ++j) { + if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) + kvmppc_do_h_remove(kvm, 0, i, 0, tmp); + ++i; + hptp += 2; + } + err = 0; + } + + out: + /* Order HPTE updates vs. rma_setup_done */ + smp_wmb(); + kvm->arch.rma_setup_done = rma_setup; + mutex_unlock(&kvm->lock); + + if (err) + return err; + return nb; +} + +static int kvm_htab_release(struct inode *inode, struct file *filp) +{ + struct kvm_htab_ctx *ctx = filp->private_data; + + filp->private_data = NULL; + if (!(ctx->flags & KVM_GET_HTAB_WRITE)) + atomic_dec(&ctx->kvm->arch.hpte_mod_interest); + kvm_put_kvm(ctx->kvm); + kfree(ctx); + return 0; +} + +static struct file_operations kvm_htab_fops = { + .read = kvm_htab_read, + .write = kvm_htab_write, + .llseek = default_llseek, + .release = kvm_htab_release, +}; + +int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf) +{ + int ret; + struct kvm_htab_ctx *ctx; + int rwflag; + + /* reject flags we don't recognize */ + if (ghf->flags & ~(KVM_GET_HTAB_BOLTED_ONLY | KVM_GET_HTAB_WRITE)) + return -EINVAL; + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + kvm_get_kvm(kvm); + ctx->kvm = kvm; + ctx->index = ghf->start_index; + ctx->flags = ghf->flags; + ctx->first_pass = 1; + + rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY; + ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag); + if (ret < 0) { + kvm_put_kvm(kvm); + return ret; + } + + if (rwflag == O_RDONLY) { + mutex_lock(&kvm->slots_lock); + atomic_inc(&kvm->arch.hpte_mod_interest); + /* make sure kvmppc_do_h_enter etc. see the increment */ + synchronize_srcu_expedited(&kvm->srcu); + mutex_unlock(&kvm->slots_lock); + } + + return ret; +} + void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) { struct kvmppc_mmu *mmu = &vcpu->arch.mmu; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 843eb754a1d5..a4f59dbcd800 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1563,18 +1563,6 @@ out: return r; } -static unsigned long slb_pgsize_encoding(unsigned long psize) -{ - unsigned long senc = 0; - - if (psize > 0x1000) { - senc = SLB_VSID_L; - if (psize == 0x10000) - senc |= SLB_VSID_LP_01; - } - return senc; -} - static void unpin_slot(struct kvm_memory_slot *memslot) { unsigned long *physp; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index d583ea15e151..70739a089560 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -354,6 +354,12 @@ int kvm_dev_ioctl_check_extension(long ext) r = 1; #else r = 0; + break; +#endif +#ifdef CONFIG_KVM_BOOK3S_64_HV + case KVM_CAP_PPC_HTAB_FD: + r = 1; + break; #endif break; case KVM_CAP_NR_VCPUS: @@ -954,6 +960,17 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } + + case KVM_PPC_GET_HTAB_FD: { + struct kvm *kvm = filp->private_data; + struct kvm_get_htab_fd ghf; + + r = -EFAULT; + if (copy_from_user(&ghf, argp, sizeof(ghf))) + break; + r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf); + break; + } #endif /* CONFIG_KVM_BOOK3S_64_HV */ #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 494a84c37c3e..e6e5d4b13708 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -634,6 +634,7 @@ struct kvm_ppc_smmu_info { #endif #define KVM_CAP_IRQFD_RESAMPLE 82 #define KVM_CAP_PPC_BOOKE_WATCHDOG 83 +#define KVM_CAP_PPC_HTAB_FD 84 #ifdef KVM_CAP_IRQ_ROUTING @@ -859,6 +860,8 @@ struct kvm_s390_ucas_mapping { #define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) /* Available with KVM_CAP_RMA */ #define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) +/* Available with KVM_CAP_PPC_HTAB_FD */ +#define KVM_PPC_GET_HTAB_FD _IOW(KVMIO, 0xaa, struct kvm_get_htab_fd) /* * ioctls for vcpu fds -- cgit v1.2.3 From 352df1deb2e3c40e65ff94c8d7c62d9144446b1c Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Thu, 11 Oct 2012 06:13:29 +0000 Subject: KVM: PPC: booke: Get/set guest EPCR register using ONE_REG interface Implement ONE_REG interface for EPCR register adding KVM_REG_PPC_EPCR to the list of ONE_REG PPC supported registers. Signed-off-by: Mihai Caraman [agraf: remove HV dependency, use get/put_user] Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 1 + arch/powerpc/include/uapi/asm/kvm.h | 2 ++ arch/powerpc/kvm/booke.c | 14 ++++++++++++++ 3 files changed, 17 insertions(+) (limited to 'arch/powerpc/include/uapi') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a5607c571cb3..a4df5535996b 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1773,6 +1773,7 @@ registers, find a list below: PPC | KVM_REG_PPC_VPA_ADDR | 64 PPC | KVM_REG_PPC_VPA_SLB | 128 PPC | KVM_REG_PPC_VPA_DTL | 128 + PPC | KVM_REG_PPC_EPCR | 32 4.69 KVM_GET_ONE_REG diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 514883dd311e..2fba8a66fb10 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -411,4 +411,6 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_VPA_SLB (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x83) #define KVM_REG_PPC_VPA_DTL (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x84) +#define KVM_REG_PPC_EPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85) + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 037d045db3f1..69f114015780 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1388,6 +1388,11 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) &vcpu->arch.dbg_reg.dac[dac], sizeof(u64)); break; } +#if defined(CONFIG_64BIT) + case KVM_REG_PPC_EPCR: + r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr); + break; +#endif default: break; } @@ -1415,6 +1420,15 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) (u64 __user *)(long)reg->addr, sizeof(u64)); break; } +#if defined(CONFIG_64BIT) + case KVM_REG_PPC_EPCR: { + u32 new_epcr; + r = get_user(new_epcr, (u32 __user *)(long)reg->addr); + if (r == 0) + kvmppc_set_epcr(vcpu, new_epcr); + break; + } +#endif default: break; } -- cgit v1.2.3