diff options
author | Shuo Liu <shuo.a.liu@intel.com> | 2021-02-07 06:10:31 +0300 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2021-02-09 12:58:19 +0300 |
commit | 72f293de3ff40b57db573c1bf623f494f3446f74 (patch) | |
tree | 71e908768a307c9bf4560ee673e4a4adf38f2a12 /drivers/virt/acrn/ioreq.c | |
parent | 88f537d5e8ddc89c2622f4a2bc1eb28455e8339c (diff) | |
download | linux-72f293de3ff40b57db573c1bf623f494f3446f74.tar.xz |
virt: acrn: Introduce I/O request management
An I/O request of a User VM, which is constructed by the hypervisor, is
distributed by the ACRN Hypervisor Service Module to an I/O client
corresponding to the address range of the I/O request.
For each User VM, there is a shared 4-KByte memory region used for I/O
requests communication between the hypervisor and Service VM. An I/O
request is a 256-byte structure buffer, which is 'struct
acrn_io_request', that is filled by an I/O handler of the hypervisor
when a trapped I/O access happens in a User VM. ACRN userspace in the
Service VM first allocates a 4-KByte page and passes the GPA (Guest
Physical Address) of the buffer to the hypervisor. The buffer is used as
an array of 16 I/O request slots with each I/O request slot being 256
bytes. This array is indexed by vCPU ID.
An I/O client, which is 'struct acrn_ioreq_client', is responsible for
handling User VM I/O requests whose accessed GPA falls in a certain
range. Multiple I/O clients can be associated with each User VM. There
is a special client associated with each User VM, called the default
client, that handles all I/O requests that do not fit into the range of
any other I/O clients. The ACRN userspace acts as the default client for
each User VM.
The state transitions of a ACRN I/O request are as follows.
FREE -> PENDING -> PROCESSING -> COMPLETE -> FREE -> ...
FREE: this I/O request slot is empty
PENDING: a valid I/O request is pending in this slot
PROCESSING: the I/O request is being processed
COMPLETE: the I/O request has been processed
An I/O request in COMPLETE or FREE state is owned by the hypervisor. HSM
and ACRN userspace are in charge of processing the others.
The processing flow of I/O requests are listed as following:
a) The I/O handler of the hypervisor will fill an I/O request with
PENDING state when a trapped I/O access happens in a User VM.
b) The hypervisor makes an upcall, which is a notification interrupt, to
the Service VM.
c) The upcall handler schedules a worker to dispatch I/O requests.
d) The worker looks for the PENDING I/O requests, assigns them to
different registered clients based on the address of the I/O accesses,
updates their state to PROCESSING, and notifies the corresponding
client to handle.
e) The notified client handles the assigned I/O requests.
f) The HSM updates I/O requests states to COMPLETE and notifies the
hypervisor of the completion via hypercalls.
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Zhi Wang <zhi.a.wang@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Yu Wang <yu1.wang@intel.com>
Cc: Reinette Chatre <reinette.chatre@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Zhi Wang <zhi.a.wang@intel.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Shuo Liu <shuo.a.liu@intel.com>
Link: https://lore.kernel.org/r/20210207031040.49576-10-shuo.a.liu@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/virt/acrn/ioreq.c')
-rw-r--r-- | drivers/virt/acrn/ioreq.c | 521 |
1 files changed, 521 insertions, 0 deletions
diff --git a/drivers/virt/acrn/ioreq.c b/drivers/virt/acrn/ioreq.c new file mode 100644 index 000000000000..51cb41ef7c72 --- /dev/null +++ b/drivers/virt/acrn/ioreq.c @@ -0,0 +1,521 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ACRN_HSM: Handle I/O requests + * + * Copyright (C) 2020 Intel Corporation. All rights reserved. + * + * Authors: + * Jason Chen CJ <jason.cj.chen@intel.com> + * Fengwei Yin <fengwei.yin@intel.com> + */ + +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/kthread.h> +#include <linux/mm.h> +#include <linux/slab.h> + +#include <asm/acrn.h> + +#include "acrn_drv.h" + +static void ioreq_pause(void); +static void ioreq_resume(void); + +static void ioreq_dispatcher(struct work_struct *work); +static struct workqueue_struct *ioreq_wq; +static DECLARE_WORK(ioreq_work, ioreq_dispatcher); + +static inline bool has_pending_request(struct acrn_ioreq_client *client) +{ + return !bitmap_empty(client->ioreqs_map, ACRN_IO_REQUEST_MAX); +} + +static inline bool is_destroying(struct acrn_ioreq_client *client) +{ + return test_bit(ACRN_IOREQ_CLIENT_DESTROYING, &client->flags); +} + +static int ioreq_complete_request(struct acrn_vm *vm, u16 vcpu, + struct acrn_io_request *acrn_req) +{ + bool polling_mode; + int ret = 0; + + polling_mode = acrn_req->completion_polling; + /* Add barrier() to make sure the writes are done before completion */ + smp_store_release(&acrn_req->processed, ACRN_IOREQ_STATE_COMPLETE); + + /* + * To fulfill the requirement of real-time in several industry + * scenarios, like automotive, ACRN can run under the partition mode, + * in which User VMs and Service VM are bound to dedicated CPU cores. + * Polling mode of handling the I/O request is introduced to achieve a + * faster I/O request handling. In polling mode, the hypervisor polls + * I/O request's completion. Once an I/O request is marked as + * ACRN_IOREQ_STATE_COMPLETE, hypervisor resumes from the polling point + * to continue the I/O request flow. Thus, the completion notification + * from HSM of I/O request is not needed. Please note, + * completion_polling needs to be read before the I/O request being + * marked as ACRN_IOREQ_STATE_COMPLETE to avoid racing with the + * hypervisor. + */ + if (!polling_mode) { + ret = hcall_notify_req_finish(vm->vmid, vcpu); + if (ret < 0) + dev_err(acrn_dev.this_device, + "Notify I/O request finished failed!\n"); + } + + return ret; +} + +static int acrn_ioreq_complete_request(struct acrn_ioreq_client *client, + u16 vcpu, + struct acrn_io_request *acrn_req) +{ + int ret; + + if (vcpu >= client->vm->vcpu_num) + return -EINVAL; + + clear_bit(vcpu, client->ioreqs_map); + if (!acrn_req) { + acrn_req = (struct acrn_io_request *)client->vm->ioreq_buf; + acrn_req += vcpu; + } + + ret = ioreq_complete_request(client->vm, vcpu, acrn_req); + + return ret; +} + +int acrn_ioreq_request_default_complete(struct acrn_vm *vm, u16 vcpu) +{ + int ret = 0; + + spin_lock_bh(&vm->ioreq_clients_lock); + if (vm->default_client) + ret = acrn_ioreq_complete_request(vm->default_client, + vcpu, NULL); + spin_unlock_bh(&vm->ioreq_clients_lock); + + return ret; +} + +/* + * ioreq_task() is the execution entity of handler thread of an I/O client. + * The handler callback of the I/O client is called within the handler thread. + */ +static int ioreq_task(void *data) +{ + struct acrn_ioreq_client *client = data; + struct acrn_io_request *req; + unsigned long *ioreqs_map; + int vcpu, ret; + + /* + * Lockless access to ioreqs_map is safe, because + * 1) set_bit() and clear_bit() are atomic operations. + * 2) I/O requests arrives serialized. The access flow of ioreqs_map is: + * set_bit() - in ioreq_work handler + * Handler callback handles corresponding I/O request + * clear_bit() - in handler thread (include ACRN userspace) + * Mark corresponding I/O request completed + * Loop again if a new I/O request occurs + */ + ioreqs_map = client->ioreqs_map; + while (!kthread_should_stop()) { + acrn_ioreq_client_wait(client); + while (has_pending_request(client)) { + vcpu = find_first_bit(ioreqs_map, client->vm->vcpu_num); + req = client->vm->ioreq_buf->req_slot + vcpu; + ret = client->handler(client, req); + if (ret < 0) { + dev_err(acrn_dev.this_device, + "IO handle failure: %d\n", ret); + break; + } + acrn_ioreq_complete_request(client, vcpu, req); + } + } + + return 0; +} + +/* + * For the non-default I/O clients, give them chance to complete the current + * I/O requests if there are any. For the default I/O client, it is safe to + * clear all pending I/O requests because the clearing request is from ACRN + * userspace. + */ +void acrn_ioreq_request_clear(struct acrn_vm *vm) +{ + struct acrn_ioreq_client *client; + bool has_pending = false; + unsigned long vcpu; + int retry = 10; + + /* + * IO requests of this VM will be completed directly in + * acrn_ioreq_dispatch if ACRN_VM_FLAG_CLEARING_IOREQ flag is set. + */ + set_bit(ACRN_VM_FLAG_CLEARING_IOREQ, &vm->flags); + + /* + * acrn_ioreq_request_clear is only called in VM reset case. Simply + * wait 100ms in total for the IO requests' completion. + */ + do { + spin_lock_bh(&vm->ioreq_clients_lock); + list_for_each_entry(client, &vm->ioreq_clients, list) { + has_pending = has_pending_request(client); + if (has_pending) + break; + } + spin_unlock_bh(&vm->ioreq_clients_lock); + + if (has_pending) + schedule_timeout_interruptible(HZ / 100); + } while (has_pending && --retry > 0); + if (retry == 0) + dev_warn(acrn_dev.this_device, + "%s cannot flush pending request!\n", client->name); + + /* Clear all ioreqs belonging to the default client */ + spin_lock_bh(&vm->ioreq_clients_lock); + client = vm->default_client; + if (client) { + vcpu = find_next_bit(client->ioreqs_map, + ACRN_IO_REQUEST_MAX, 0); + while (vcpu < ACRN_IO_REQUEST_MAX) { + acrn_ioreq_complete_request(client, vcpu, NULL); + vcpu = find_next_bit(client->ioreqs_map, + ACRN_IO_REQUEST_MAX, vcpu + 1); + } + } + spin_unlock_bh(&vm->ioreq_clients_lock); + + /* Clear ACRN_VM_FLAG_CLEARING_IOREQ flag after the clearing */ + clear_bit(ACRN_VM_FLAG_CLEARING_IOREQ, &vm->flags); +} + +int acrn_ioreq_client_wait(struct acrn_ioreq_client *client) +{ + if (client->is_default) { + /* + * In the default client, a user space thread waits on the + * waitqueue. The is_destroying() check is used to notify user + * space the client is going to be destroyed. + */ + wait_event_interruptible(client->wq, + has_pending_request(client) || + is_destroying(client)); + if (is_destroying(client)) + return -ENODEV; + } else { + wait_event_interruptible(client->wq, + has_pending_request(client) || + kthread_should_stop()); + } + + return 0; +} + +static bool in_range(struct acrn_ioreq_range *range, + struct acrn_io_request *req) +{ + bool ret = false; + + if (range->type == req->type) { + switch (req->type) { + case ACRN_IOREQ_TYPE_MMIO: + if (req->reqs.mmio_request.address >= range->start && + (req->reqs.mmio_request.address + + req->reqs.mmio_request.size - 1) <= range->end) + ret = true; + break; + case ACRN_IOREQ_TYPE_PORTIO: + if (req->reqs.pio_request.address >= range->start && + (req->reqs.pio_request.address + + req->reqs.pio_request.size - 1) <= range->end) + ret = true; + break; + default: + break; + } + } + + return ret; +} + +static struct acrn_ioreq_client *find_ioreq_client(struct acrn_vm *vm, + struct acrn_io_request *req) +{ + struct acrn_ioreq_client *client, *found = NULL; + struct acrn_ioreq_range *range; + + lockdep_assert_held(&vm->ioreq_clients_lock); + + list_for_each_entry(client, &vm->ioreq_clients, list) { + read_lock_bh(&client->range_lock); + list_for_each_entry(range, &client->range_list, list) { + if (in_range(range, req)) { + found = client; + break; + } + } + read_unlock_bh(&client->range_lock); + if (found) + break; + } + return found ? found : vm->default_client; +} + +/** + * acrn_ioreq_client_create() - Create an ioreq client + * @vm: The VM that this client belongs to + * @handler: The ioreq_handler of ioreq client acrn_hsm will create a kernel + * thread and call the handler to handle I/O requests. + * @priv: Private data for the handler + * @is_default: If it is the default client + * @name: The name of ioreq client + * + * Return: acrn_ioreq_client pointer on success, NULL on error + */ +struct acrn_ioreq_client *acrn_ioreq_client_create(struct acrn_vm *vm, + ioreq_handler_t handler, + void *priv, bool is_default, + const char *name) +{ + struct acrn_ioreq_client *client; + + if (!handler && !is_default) { + dev_dbg(acrn_dev.this_device, + "Cannot create non-default client w/o handler!\n"); + return NULL; + } + client = kzalloc(sizeof(*client), GFP_KERNEL); + if (!client) + return NULL; + + client->handler = handler; + client->vm = vm; + client->priv = priv; + client->is_default = is_default; + if (name) + strncpy(client->name, name, sizeof(client->name) - 1); + rwlock_init(&client->range_lock); + INIT_LIST_HEAD(&client->range_list); + init_waitqueue_head(&client->wq); + + if (client->handler) { + client->thread = kthread_run(ioreq_task, client, "VM%u-%s", + client->vm->vmid, client->name); + if (IS_ERR(client->thread)) { + kfree(client); + return NULL; + } + } + + spin_lock_bh(&vm->ioreq_clients_lock); + if (is_default) + vm->default_client = client; + else + list_add(&client->list, &vm->ioreq_clients); + spin_unlock_bh(&vm->ioreq_clients_lock); + + dev_dbg(acrn_dev.this_device, "Created ioreq client %s.\n", name); + return client; +} + +/** + * acrn_ioreq_client_destroy() - Destroy an ioreq client + * @client: The ioreq client + */ +void acrn_ioreq_client_destroy(struct acrn_ioreq_client *client) +{ + struct acrn_ioreq_range *range, *next; + struct acrn_vm *vm = client->vm; + + dev_dbg(acrn_dev.this_device, + "Destroy ioreq client %s.\n", client->name); + ioreq_pause(); + set_bit(ACRN_IOREQ_CLIENT_DESTROYING, &client->flags); + if (client->is_default) + wake_up_interruptible(&client->wq); + else + kthread_stop(client->thread); + + spin_lock_bh(&vm->ioreq_clients_lock); + if (client->is_default) + vm->default_client = NULL; + else + list_del(&client->list); + spin_unlock_bh(&vm->ioreq_clients_lock); + + write_lock_bh(&client->range_lock); + list_for_each_entry_safe(range, next, &client->range_list, list) { + list_del(&range->list); + kfree(range); + } + write_unlock_bh(&client->range_lock); + kfree(client); + + ioreq_resume(); +} + +static int acrn_ioreq_dispatch(struct acrn_vm *vm) +{ + struct acrn_ioreq_client *client; + struct acrn_io_request *req; + int i; + + for (i = 0; i < vm->vcpu_num; i++) { + req = vm->ioreq_buf->req_slot + i; + + /* barrier the read of processed of acrn_io_request */ + if (smp_load_acquire(&req->processed) == + ACRN_IOREQ_STATE_PENDING) { + /* Complete the IO request directly in clearing stage */ + if (test_bit(ACRN_VM_FLAG_CLEARING_IOREQ, &vm->flags)) { + ioreq_complete_request(vm, i, req); + continue; + } + + spin_lock_bh(&vm->ioreq_clients_lock); + client = find_ioreq_client(vm, req); + if (!client) { + dev_err(acrn_dev.this_device, + "Failed to find ioreq client!\n"); + spin_unlock_bh(&vm->ioreq_clients_lock); + return -EINVAL; + } + if (!client->is_default) + req->kernel_handled = 1; + else + req->kernel_handled = 0; + /* + * Add barrier() to make sure the writes are done + * before setting ACRN_IOREQ_STATE_PROCESSING + */ + smp_store_release(&req->processed, + ACRN_IOREQ_STATE_PROCESSING); + set_bit(i, client->ioreqs_map); + wake_up_interruptible(&client->wq); + spin_unlock_bh(&vm->ioreq_clients_lock); + } + } + + return 0; +} + +static void ioreq_dispatcher(struct work_struct *work) +{ + struct acrn_vm *vm; + + read_lock(&acrn_vm_list_lock); + list_for_each_entry(vm, &acrn_vm_list, list) { + if (!vm->ioreq_buf) + break; + acrn_ioreq_dispatch(vm); + } + read_unlock(&acrn_vm_list_lock); +} + +static void ioreq_intr_handler(void) +{ + queue_work(ioreq_wq, &ioreq_work); +} + +static void ioreq_pause(void) +{ + /* Flush and unarm the handler to ensure no I/O requests pending */ + acrn_remove_intr_handler(); + drain_workqueue(ioreq_wq); +} + +static void ioreq_resume(void) +{ + /* Schedule after enabling in case other clients miss interrupt */ + acrn_setup_intr_handler(ioreq_intr_handler); + queue_work(ioreq_wq, &ioreq_work); +} + +int acrn_ioreq_intr_setup(void) +{ + acrn_setup_intr_handler(ioreq_intr_handler); + ioreq_wq = alloc_workqueue("ioreq_wq", + WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_UNBOUND, 1); + if (!ioreq_wq) { + dev_err(acrn_dev.this_device, "Failed to alloc workqueue!\n"); + acrn_remove_intr_handler(); + return -ENOMEM; + } + return 0; +} + +void acrn_ioreq_intr_remove(void) +{ + if (ioreq_wq) + destroy_workqueue(ioreq_wq); + acrn_remove_intr_handler(); +} + +int acrn_ioreq_init(struct acrn_vm *vm, u64 buf_vma) +{ + struct acrn_ioreq_buffer *set_buffer; + struct page *page; + int ret; + + if (vm->ioreq_buf) + return -EEXIST; + + set_buffer = kzalloc(sizeof(*set_buffer), GFP_KERNEL); + if (!set_buffer) + return -ENOMEM; + + ret = pin_user_pages_fast(buf_vma, 1, + FOLL_WRITE | FOLL_LONGTERM, &page); + if (unlikely(ret != 1) || !page) { + dev_err(acrn_dev.this_device, "Failed to pin ioreq page!\n"); + ret = -EFAULT; + goto free_buf; + } + + vm->ioreq_buf = page_address(page); + vm->ioreq_page = page; + set_buffer->ioreq_buf = page_to_phys(page); + ret = hcall_set_ioreq_buffer(vm->vmid, virt_to_phys(set_buffer)); + if (ret < 0) { + dev_err(acrn_dev.this_device, "Failed to init ioreq buffer!\n"); + unpin_user_page(page); + vm->ioreq_buf = NULL; + goto free_buf; + } + + dev_dbg(acrn_dev.this_device, + "Init ioreq buffer %pK!\n", vm->ioreq_buf); + ret = 0; +free_buf: + kfree(set_buffer); + return ret; +} + +void acrn_ioreq_deinit(struct acrn_vm *vm) +{ + struct acrn_ioreq_client *client, *next; + + dev_dbg(acrn_dev.this_device, + "Deinit ioreq buffer %pK!\n", vm->ioreq_buf); + /* Destroy all clients belonging to this VM */ + list_for_each_entry_safe(client, next, &vm->ioreq_clients, list) + acrn_ioreq_client_destroy(client); + if (vm->default_client) + acrn_ioreq_client_destroy(vm->default_client); + + if (vm->ioreq_buf && vm->ioreq_page) { + unpin_user_page(vm->ioreq_page); + vm->ioreq_buf = NULL; + } +} |