From 5f1f79bbc9e26fa9412fa9522f957bb8f030c442 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 7 May 2020 16:01:25 +0200 Subject: virtio-mem: Paravirtualized memory hotplug Each virtio-mem device owns exactly one memory region. It is responsible for adding/removing memory from that memory region on request. When the device driver starts up, the requested amount of memory is queried and then plugged to Linux. On request, further memory can be plugged or unplugged. This patch only implements the plugging part. On x86-64, memory can currently be plugged in 4MB ("subblock") granularity. When required, a new memory block will be added (e.g., usually 128MB on x86-64) in order to plug more subblocks. Only x86-64 was tested for now. The online_page callback is used to keep unplugged subblocks offline when onlining memory - similar to the Hyper-V balloon driver. Unplugged pages are marked PG_offline, to tell dump tools (e.g., makedumpfile) to skip them. User space is usually responsible for onlining the added memory. The memory hotplug notifier is used to synchronize virtio-mem activity against memory onlining/offlining. Each virtio-mem device can belong to a NUMA node, which allows us to easily add/remove small chunks of memory to/from a specific NUMA node by using multiple virtio-mem devices. Something that works even when the guest has no idea about the NUMA topology. One way to view virtio-mem is as a "resizable DIMM" or a DIMM with many "sub-DIMMS". This patch directly introduces the basic infrastructure to implement memory unplug. Especially the memory block states and subblock bitmaps will be heavily used there. Notes: - In case memory is to be onlined by user space, we limit the amount of offline memory blocks, to not run out of memory. This is esp. an issue if memory is added faster than it is getting onlined. - Suspend/Hibernate is not supported due to the way virtio-mem devices behave. Limited support might be possible in the future. - Reloading the device driver is not supported. Reviewed-by: Pankaj Gupta Tested-by: Pankaj Gupta Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Oscar Salvador Cc: Michal Hocko Cc: Igor Mammedov Cc: Dave Young Cc: Andrew Morton Cc: Dan Williams Cc: Pavel Tatashin Cc: Stefan Hajnoczi Cc: Vlastimil Babka Cc: "Rafael J. Wysocki" Cc: Len Brown Cc: linux-acpi@vger.kernel.org Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20200507140139.17083-2-david@redhat.com Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_mem.h | 200 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 200 insertions(+) create mode 100644 include/uapi/linux/virtio_mem.h (limited to 'include/uapi/linux/virtio_mem.h') diff --git a/include/uapi/linux/virtio_mem.h b/include/uapi/linux/virtio_mem.h new file mode 100644 index 000000000000..1bfade78bdfd --- /dev/null +++ b/include/uapi/linux/virtio_mem.h @@ -0,0 +1,200 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Virtio Mem Device + * + * Copyright Red Hat, Inc. 2020 + * + * Authors: + * David Hildenbrand + * + * This header is BSD licensed so anyone can use the definitions + * to implement compatible drivers/servers: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _LINUX_VIRTIO_MEM_H +#define _LINUX_VIRTIO_MEM_H + +#include +#include +#include +#include + +/* + * Each virtio-mem device manages a dedicated region in physical address + * space. Each device can belong to a single NUMA node, multiple devices + * for a single NUMA node are possible. A virtio-mem device is like a + * "resizable DIMM" consisting of small memory blocks that can be plugged + * or unplugged. The device driver is responsible for (un)plugging memory + * blocks on demand. + * + * Virtio-mem devices can only operate on their assigned memory region in + * order to (un)plug memory. A device cannot (un)plug memory belonging to + * other devices. + * + * The "region_size" corresponds to the maximum amount of memory that can + * be provided by a device. The "size" corresponds to the amount of memory + * that is currently plugged. "requested_size" corresponds to a request + * from the device to the device driver to (un)plug blocks. The + * device driver should try to (un)plug blocks in order to reach the + * "requested_size". It is impossible to plug more memory than requested. + * + * The "usable_region_size" represents the memory region that can actually + * be used to (un)plug memory. It is always at least as big as the + * "requested_size" and will grow dynamically. It will only shrink when + * explicitly triggered (VIRTIO_MEM_REQ_UNPLUG). + * + * There are no guarantees what will happen if unplugged memory is + * read/written. Such memory should, in general, not be touched. E.g., + * even writing might succeed, but the values will simply be discarded at + * random points in time. + * + * It can happen that the device cannot process a request, because it is + * busy. The device driver has to retry later. + * + * Usually, during system resets all memory will get unplugged, so the + * device driver can start with a clean state. However, in specific + * scenarios (if the device is busy) it can happen that the device still + * has memory plugged. The device driver can request to unplug all memory + * (VIRTIO_MEM_REQ_UNPLUG) - which might take a while to succeed if the + * device is busy. + */ + +/* --- virtio-mem: guest -> host requests --- */ + +/* request to plug memory blocks */ +#define VIRTIO_MEM_REQ_PLUG 0 +/* request to unplug memory blocks */ +#define VIRTIO_MEM_REQ_UNPLUG 1 +/* request to unplug all blocks and shrink the usable size */ +#define VIRTIO_MEM_REQ_UNPLUG_ALL 2 +/* request information about the plugged state of memory blocks */ +#define VIRTIO_MEM_REQ_STATE 3 + +struct virtio_mem_req_plug { + __virtio64 addr; + __virtio16 nb_blocks; +}; + +struct virtio_mem_req_unplug { + __virtio64 addr; + __virtio16 nb_blocks; +}; + +struct virtio_mem_req_state { + __virtio64 addr; + __virtio16 nb_blocks; +}; + +struct virtio_mem_req { + __virtio16 type; + __virtio16 padding[3]; + + union { + struct virtio_mem_req_plug plug; + struct virtio_mem_req_unplug unplug; + struct virtio_mem_req_state state; + } u; +}; + + +/* --- virtio-mem: host -> guest response --- */ + +/* + * Request processed successfully, applicable for + * - VIRTIO_MEM_REQ_PLUG + * - VIRTIO_MEM_REQ_UNPLUG + * - VIRTIO_MEM_REQ_UNPLUG_ALL + * - VIRTIO_MEM_REQ_STATE + */ +#define VIRTIO_MEM_RESP_ACK 0 +/* + * Request denied - e.g. trying to plug more than requested, applicable for + * - VIRTIO_MEM_REQ_PLUG + */ +#define VIRTIO_MEM_RESP_NACK 1 +/* + * Request cannot be processed right now, try again later, applicable for + * - VIRTIO_MEM_REQ_PLUG + * - VIRTIO_MEM_REQ_UNPLUG + * - VIRTIO_MEM_REQ_UNPLUG_ALL + */ +#define VIRTIO_MEM_RESP_BUSY 2 +/* + * Error in request (e.g. addresses/alignment), applicable for + * - VIRTIO_MEM_REQ_PLUG + * - VIRTIO_MEM_REQ_UNPLUG + * - VIRTIO_MEM_REQ_STATE + */ +#define VIRTIO_MEM_RESP_ERROR 3 + + +/* State of memory blocks is "plugged" */ +#define VIRTIO_MEM_STATE_PLUGGED 0 +/* State of memory blocks is "unplugged" */ +#define VIRTIO_MEM_STATE_UNPLUGGED 1 +/* State of memory blocks is "mixed" */ +#define VIRTIO_MEM_STATE_MIXED 2 + +struct virtio_mem_resp_state { + __virtio16 state; +}; + +struct virtio_mem_resp { + __virtio16 type; + __virtio16 padding[3]; + + union { + struct virtio_mem_resp_state state; + } u; +}; + +/* --- virtio-mem: configuration --- */ + +struct virtio_mem_config { + /* Block size and alignment. Cannot change. */ + __u32 block_size; + __u32 padding; + /* Start address of the memory region. Cannot change. */ + __u64 addr; + /* Region size (maximum). Cannot change. */ + __u64 region_size; + /* + * Currently usable region size. Can grow up to region_size. Can + * shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config + * update will be sent). + */ + __u64 usable_region_size; + /* + * Currently used size. Changes due to plug/unplug requests, but no + * config updates will be sent. + */ + __u64 plugged_size; + /* Requested size. New plug requests cannot exceed it. Can change. */ + __u64 requested_size; +}; + +#endif /* _LINUX_VIRTIO_MEM_H */ -- cgit v1.2.3