diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-07-09 21:06:29 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-07-09 21:06:29 +0300 |
commit | 1eb8df18677d197d7538583823c373d7f13cbebc (patch) | |
tree | 32b76b7f2b84fea99ec64257bb4f5d95f6303866 /drivers/virtio | |
parent | d8dc121eeab9abfbc510097f8db83e87560f753b (diff) | |
parent | db7b337709a15d33cc5e901d2ee35d3bb3e42b2f (diff) | |
download | linux-1eb8df18677d197d7538583823c373d7f13cbebc.tar.xz |
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio,vhost,vdpa updates from Michael Tsirkin:
- Doorbell remapping for ifcvf, mlx5
- virtio_vdpa support for mlx5
- Validate device input in several drivers (for SEV and friends)
- ZONE_MOVABLE aware handling in virtio-mem
- Misc fixes, cleanups
* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (48 commits)
virtio-mem: prioritize unplug from ZONE_MOVABLE in Big Block Mode
virtio-mem: simplify high-level unplug handling in Big Block Mode
virtio-mem: prioritize unplug from ZONE_MOVABLE in Sub Block Mode
virtio-mem: simplify high-level unplug handling in Sub Block Mode
virtio-mem: simplify high-level plug handling in Sub Block Mode
virtio-mem: use page_zonenum() in virtio_mem_fake_offline()
virtio-mem: don't read big block size in Sub Block Mode
virtio/vdpa: clear the virtqueue state during probe
vp_vdpa: allow set vq state to initial state after reset
virtio-pci library: introduce vp_modern_get_driver_features()
vdpa: support packed virtqueue for set/get_vq_state()
virtio-ring: store DMA metadata in desc_extra for split virtqueue
virtio: use err label in __vring_new_virtqueue()
virtio_ring: introduce virtqueue_desc_add_split()
virtio_ring: secure handling of mapping errors
virtio-ring: factor out desc_extra allocation
virtio_ring: rename vring_desc_extra_packed
virtio-ring: maintain next in extra state for packed virtqueue
vdpa/mlx5: Clear vq ready indication upon device reset
vdpa/mlx5: Add support for doorbell bypassing
...
Diffstat (limited to 'drivers/virtio')
-rw-r--r-- | drivers/virtio/virtio_mem.c | 338 | ||||
-rw-r--r-- | drivers/virtio/virtio_pci_modern_dev.c | 21 | ||||
-rw-r--r-- | drivers/virtio/virtio_ring.c | 229 | ||||
-rw-r--r-- | drivers/virtio/virtio_vdpa.c | 15 |
4 files changed, 380 insertions, 223 deletions
diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index dc2a2e2b2ff8..09ed55de07d7 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -75,10 +75,14 @@ enum virtio_mem_sbm_mb_state { VIRTIO_MEM_SBM_MB_OFFLINE, /* Partially plugged, fully added to Linux, offline. */ VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL, - /* Fully plugged, fully added to Linux, online. */ - VIRTIO_MEM_SBM_MB_ONLINE, - /* Partially plugged, fully added to Linux, online. */ - VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL, + /* Fully plugged, fully added to Linux, onlined to a kernel zone. */ + VIRTIO_MEM_SBM_MB_KERNEL, + /* Partially plugged, fully added to Linux, online to a kernel zone */ + VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL, + /* Fully plugged, fully added to Linux, onlined to ZONE_MOVABLE. */ + VIRTIO_MEM_SBM_MB_MOVABLE, + /* Partially plugged, fully added to Linux, onlined to ZONE_MOVABLE. */ + VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL, VIRTIO_MEM_SBM_MB_COUNT }; @@ -699,18 +703,6 @@ static int virtio_mem_sbm_remove_mb(struct virtio_mem *vm, unsigned long mb_id) } /* - * See virtio_mem_remove_memory(): Try to remove all Linux memory blocks covered - * by the big block. - */ -static int virtio_mem_bbm_remove_bb(struct virtio_mem *vm, unsigned long bb_id) -{ - const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); - const uint64_t size = vm->bbm.bb_size; - - return virtio_mem_remove_memory(vm, addr, size); -} - -/* * Try offlining and removing memory from Linux. * * Must not be called with the vm->hotplug_mutex held (possible deadlock with @@ -832,11 +824,13 @@ static void virtio_mem_sbm_notify_offline(struct virtio_mem *vm, unsigned long mb_id) { switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) { - case VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL: + case VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL: + case VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL: virtio_mem_sbm_set_mb_state(vm, mb_id, VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL); break; - case VIRTIO_MEM_SBM_MB_ONLINE: + case VIRTIO_MEM_SBM_MB_KERNEL: + case VIRTIO_MEM_SBM_MB_MOVABLE: virtio_mem_sbm_set_mb_state(vm, mb_id, VIRTIO_MEM_SBM_MB_OFFLINE); break; @@ -847,21 +841,29 @@ static void virtio_mem_sbm_notify_offline(struct virtio_mem *vm, } static void virtio_mem_sbm_notify_online(struct virtio_mem *vm, - unsigned long mb_id) + unsigned long mb_id, + unsigned long start_pfn) { + const bool is_movable = page_zonenum(pfn_to_page(start_pfn)) == + ZONE_MOVABLE; + int new_state; + switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) { case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL: - virtio_mem_sbm_set_mb_state(vm, mb_id, - VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL); + new_state = VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL; + if (is_movable) + new_state = VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL; break; case VIRTIO_MEM_SBM_MB_OFFLINE: - virtio_mem_sbm_set_mb_state(vm, mb_id, - VIRTIO_MEM_SBM_MB_ONLINE); + new_state = VIRTIO_MEM_SBM_MB_KERNEL; + if (is_movable) + new_state = VIRTIO_MEM_SBM_MB_MOVABLE; break; default: BUG(); break; } + virtio_mem_sbm_set_mb_state(vm, mb_id, new_state); } static void virtio_mem_sbm_notify_going_offline(struct virtio_mem *vm, @@ -1015,7 +1017,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, break; case MEM_ONLINE: if (vm->in_sbm) - virtio_mem_sbm_notify_online(vm, id); + virtio_mem_sbm_notify_online(vm, id, mhp->start_pfn); atomic64_sub(size, &vm->offline_size); /* @@ -1137,7 +1139,7 @@ static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages) */ static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages) { - const bool is_movable = zone_idx(page_zone(pfn_to_page(pfn))) == + const bool is_movable = page_zonenum(pfn_to_page(pfn)) == ZONE_MOVABLE; int rc, retry_count; @@ -1455,8 +1457,8 @@ static int virtio_mem_bbm_plug_bb(struct virtio_mem *vm, unsigned long bb_id) * * Note: can fail after some subblocks were unplugged. */ -static int virtio_mem_sbm_unplug_any_sb(struct virtio_mem *vm, - unsigned long mb_id, uint64_t *nb_sb) +static int virtio_mem_sbm_unplug_any_sb_raw(struct virtio_mem *vm, + unsigned long mb_id, uint64_t *nb_sb) { int sb_id, count; int rc; @@ -1498,7 +1500,7 @@ static int virtio_mem_sbm_unplug_mb(struct virtio_mem *vm, unsigned long mb_id) { uint64_t nb_sb = vm->sbm.sbs_per_mb; - return virtio_mem_sbm_unplug_any_sb(vm, mb_id, &nb_sb); + return virtio_mem_sbm_unplug_any_sb_raw(vm, mb_id, &nb_sb); } /* @@ -1585,9 +1587,9 @@ static int virtio_mem_sbm_plug_and_add_mb(struct virtio_mem *vm, * Note: Can fail after some subblocks were successfully plugged. */ static int virtio_mem_sbm_plug_any_sb(struct virtio_mem *vm, - unsigned long mb_id, uint64_t *nb_sb, - bool online) + unsigned long mb_id, uint64_t *nb_sb) { + const int old_state = virtio_mem_sbm_get_mb_state(vm, mb_id); unsigned long pfn, nr_pages; int sb_id, count; int rc; @@ -1609,7 +1611,7 @@ static int virtio_mem_sbm_plug_any_sb(struct virtio_mem *vm, if (rc) return rc; *nb_sb -= count; - if (!online) + if (old_state == VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) continue; /* fake-online the pages if the memory block is online */ @@ -1619,23 +1621,22 @@ static int virtio_mem_sbm_plug_any_sb(struct virtio_mem *vm, virtio_mem_fake_online(pfn, nr_pages); } - if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { - if (online) - virtio_mem_sbm_set_mb_state(vm, mb_id, - VIRTIO_MEM_SBM_MB_ONLINE); - else - virtio_mem_sbm_set_mb_state(vm, mb_id, - VIRTIO_MEM_SBM_MB_OFFLINE); - } + if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) + virtio_mem_sbm_set_mb_state(vm, mb_id, old_state - 1); return 0; } static int virtio_mem_sbm_plug_request(struct virtio_mem *vm, uint64_t diff) { + const int mb_states[] = { + VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL, + VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL, + VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL, + }; uint64_t nb_sb = diff / vm->sbm.sb_size; unsigned long mb_id; - int rc; + int rc, i; if (!nb_sb) return 0; @@ -1643,22 +1644,13 @@ static int virtio_mem_sbm_plug_request(struct virtio_mem *vm, uint64_t diff) /* Don't race with onlining/offlining */ mutex_lock(&vm->hotplug_mutex); - /* Try to plug subblocks of partially plugged online blocks. */ - virtio_mem_sbm_for_each_mb(vm, mb_id, - VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL) { - rc = virtio_mem_sbm_plug_any_sb(vm, mb_id, &nb_sb, true); - if (rc || !nb_sb) - goto out_unlock; - cond_resched(); - } - - /* Try to plug subblocks of partially plugged offline blocks. */ - virtio_mem_sbm_for_each_mb(vm, mb_id, - VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) { - rc = virtio_mem_sbm_plug_any_sb(vm, mb_id, &nb_sb, false); - if (rc || !nb_sb) - goto out_unlock; - cond_resched(); + for (i = 0; i < ARRAY_SIZE(mb_states); i++) { + virtio_mem_sbm_for_each_mb(vm, mb_id, mb_states[i]) { + rc = virtio_mem_sbm_plug_any_sb(vm, mb_id, &nb_sb); + if (rc || !nb_sb) + goto out_unlock; + cond_resched(); + } } /* @@ -1819,7 +1811,7 @@ static int virtio_mem_sbm_unplug_any_sb_offline(struct virtio_mem *vm, { int rc; - rc = virtio_mem_sbm_unplug_any_sb(vm, mb_id, nb_sb); + rc = virtio_mem_sbm_unplug_any_sb_raw(vm, mb_id, nb_sb); /* some subblocks might have been unplugged even on failure */ if (!virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) @@ -1856,6 +1848,7 @@ static int virtio_mem_sbm_unplug_sb_online(struct virtio_mem *vm, int count) { const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size) * count; + const int old_state = virtio_mem_sbm_get_mb_state(vm, mb_id); unsigned long start_pfn; int rc; @@ -1874,8 +1867,17 @@ static int virtio_mem_sbm_unplug_sb_online(struct virtio_mem *vm, return rc; } - virtio_mem_sbm_set_mb_state(vm, mb_id, - VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL); + switch (old_state) { + case VIRTIO_MEM_SBM_MB_KERNEL: + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL); + break; + case VIRTIO_MEM_SBM_MB_MOVABLE: + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL); + break; + } + return 0; } @@ -1942,11 +1944,50 @@ unplugged: return 0; } +/* + * Unplug the desired number of plugged subblocks of a memory block that is + * already added to Linux. Will skip subblock of online memory blocks that are + * busy (by the OS). Will fail if any subblock that's not busy cannot get + * unplugged. + * + * Will modify the state of the memory block. Might temporarily drop the + * hotplug_mutex. + * + * Note: Can fail after some subblocks were successfully unplugged. Can + * return 0 even if subblocks were busy and could not get unplugged. + */ +static int virtio_mem_sbm_unplug_any_sb(struct virtio_mem *vm, + unsigned long mb_id, + uint64_t *nb_sb) +{ + const int old_state = virtio_mem_sbm_get_mb_state(vm, mb_id); + + switch (old_state) { + case VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL: + case VIRTIO_MEM_SBM_MB_KERNEL: + case VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL: + case VIRTIO_MEM_SBM_MB_MOVABLE: + return virtio_mem_sbm_unplug_any_sb_online(vm, mb_id, nb_sb); + case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL: + case VIRTIO_MEM_SBM_MB_OFFLINE: + return virtio_mem_sbm_unplug_any_sb_offline(vm, mb_id, nb_sb); + } + return -EINVAL; +} + static int virtio_mem_sbm_unplug_request(struct virtio_mem *vm, uint64_t diff) { + const int mb_states[] = { + VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL, + VIRTIO_MEM_SBM_MB_OFFLINE, + VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL, + VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL, + VIRTIO_MEM_SBM_MB_MOVABLE, + VIRTIO_MEM_SBM_MB_KERNEL, + }; uint64_t nb_sb = diff / vm->sbm.sb_size; unsigned long mb_id; - int rc; + int rc, i; if (!nb_sb) return 0; @@ -1958,47 +1999,26 @@ static int virtio_mem_sbm_unplug_request(struct virtio_mem *vm, uint64_t diff) */ mutex_lock(&vm->hotplug_mutex); - /* Try to unplug subblocks of partially plugged offline blocks. */ - virtio_mem_sbm_for_each_mb_rev(vm, mb_id, - VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) { - rc = virtio_mem_sbm_unplug_any_sb_offline(vm, mb_id, &nb_sb); - if (rc || !nb_sb) - goto out_unlock; - cond_resched(); - } - - /* Try to unplug subblocks of plugged offline blocks. */ - virtio_mem_sbm_for_each_mb_rev(vm, mb_id, VIRTIO_MEM_SBM_MB_OFFLINE) { - rc = virtio_mem_sbm_unplug_any_sb_offline(vm, mb_id, &nb_sb); - if (rc || !nb_sb) - goto out_unlock; - cond_resched(); - } - - if (!unplug_online) { - mutex_unlock(&vm->hotplug_mutex); - return 0; - } - - /* Try to unplug subblocks of partially plugged online blocks. */ - virtio_mem_sbm_for_each_mb_rev(vm, mb_id, - VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL) { - rc = virtio_mem_sbm_unplug_any_sb_online(vm, mb_id, &nb_sb); - if (rc || !nb_sb) - goto out_unlock; - mutex_unlock(&vm->hotplug_mutex); - cond_resched(); - mutex_lock(&vm->hotplug_mutex); - } - - /* Try to unplug subblocks of plugged online blocks. */ - virtio_mem_sbm_for_each_mb_rev(vm, mb_id, VIRTIO_MEM_SBM_MB_ONLINE) { - rc = virtio_mem_sbm_unplug_any_sb_online(vm, mb_id, &nb_sb); - if (rc || !nb_sb) - goto out_unlock; - mutex_unlock(&vm->hotplug_mutex); - cond_resched(); - mutex_lock(&vm->hotplug_mutex); + /* + * We try unplug from partially plugged blocks first, to try removing + * whole memory blocks along with metadata. We prioritize ZONE_MOVABLE + * as it's more reliable to unplug memory and remove whole memory + * blocks, and we don't want to trigger a zone imbalances by + * accidentially removing too much kernel memory. + */ + for (i = 0; i < ARRAY_SIZE(mb_states); i++) { + virtio_mem_sbm_for_each_mb_rev(vm, mb_id, mb_states[i]) { + rc = virtio_mem_sbm_unplug_any_sb(vm, mb_id, &nb_sb); + if (rc || !nb_sb) + goto out_unlock; + mutex_unlock(&vm->hotplug_mutex); + cond_resched(); + mutex_lock(&vm->hotplug_mutex); + } + if (!unplug_online && i == 1) { + mutex_unlock(&vm->hotplug_mutex); + return 0; + } } mutex_unlock(&vm->hotplug_mutex); @@ -2085,47 +2105,41 @@ rollback_safe_unplug: } /* - * Try to remove a big block from Linux and unplug it. Will fail with - * -EBUSY if some memory is online. - * - * Will modify the state of the memory block. + * Test if a big block is completely offline. */ -static int virtio_mem_bbm_remove_and_unplug_bb(struct virtio_mem *vm, - unsigned long bb_id) +static bool virtio_mem_bbm_bb_is_offline(struct virtio_mem *vm, + unsigned long bb_id) { - int rc; - - if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) != - VIRTIO_MEM_BBM_BB_ADDED)) - return -EINVAL; + const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id)); + const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size); + unsigned long pfn; - rc = virtio_mem_bbm_remove_bb(vm, bb_id); - if (rc) - return -EBUSY; + for (pfn = start_pfn; pfn < start_pfn + nr_pages; + pfn += PAGES_PER_SECTION) { + if (pfn_to_online_page(pfn)) + return false; + } - rc = virtio_mem_bbm_unplug_bb(vm, bb_id); - if (rc) - virtio_mem_bbm_set_bb_state(vm, bb_id, - VIRTIO_MEM_BBM_BB_PLUGGED); - else - virtio_mem_bbm_set_bb_state(vm, bb_id, - VIRTIO_MEM_BBM_BB_UNUSED); - return rc; + return true; } /* - * Test if a big block is completely offline. + * Test if a big block is completely onlined to ZONE_MOVABLE (or offline). */ -static bool virtio_mem_bbm_bb_is_offline(struct virtio_mem *vm, +static bool virtio_mem_bbm_bb_is_movable(struct virtio_mem *vm, unsigned long bb_id) { const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id)); const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size); + struct page *page; unsigned long pfn; for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn += PAGES_PER_SECTION) { - if (pfn_to_online_page(pfn)) + page = pfn_to_online_page(pfn); + if (!page) + continue; + if (page_zonenum(page) != ZONE_MOVABLE) return false; } @@ -2136,42 +2150,37 @@ static int virtio_mem_bbm_unplug_request(struct virtio_mem *vm, uint64_t diff) { uint64_t nb_bb = diff / vm->bbm.bb_size; uint64_t bb_id; - int rc; + int rc, i; if (!nb_bb) return 0; - /* Try to unplug completely offline big blocks first. */ - virtio_mem_bbm_for_each_bb_rev(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED) { - cond_resched(); - /* - * As we're holding no locks, this check is racy as memory - * can get onlined in the meantime - but we'll fail gracefully. - */ - if (!virtio_mem_bbm_bb_is_offline(vm, bb_id)) - continue; - rc = virtio_mem_bbm_remove_and_unplug_bb(vm, bb_id); - if (rc == -EBUSY) - continue; - if (!rc) - nb_bb--; - if (rc || !nb_bb) - return rc; - } - - if (!unplug_online) - return 0; + /* + * Try to unplug big blocks. Similar to SBM, start with offline + * big blocks. + */ + for (i = 0; i < 3; i++) { + virtio_mem_bbm_for_each_bb_rev(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED) { + cond_resched(); - /* Try to unplug any big blocks. */ - virtio_mem_bbm_for_each_bb_rev(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED) { - cond_resched(); - rc = virtio_mem_bbm_offline_remove_and_unplug_bb(vm, bb_id); - if (rc == -EBUSY) - continue; - if (!rc) - nb_bb--; - if (rc || !nb_bb) - return rc; + /* + * As we're holding no locks, these checks are racy, + * but we don't care. + */ + if (i == 0 && !virtio_mem_bbm_bb_is_offline(vm, bb_id)) + continue; + if (i == 1 && !virtio_mem_bbm_bb_is_movable(vm, bb_id)) + continue; + rc = virtio_mem_bbm_offline_remove_and_unplug_bb(vm, bb_id); + if (rc == -EBUSY) + continue; + if (!rc) + nb_bb--; + if (rc || !nb_bb) + return rc; + } + if (i == 0 && !unplug_online) + return 0; } return nb_bb ? -EBUSY : 0; @@ -2422,6 +2431,10 @@ static int virtio_mem_init(struct virtio_mem *vm) dev_warn(&vm->vdev->dev, "Some device memory is not addressable/pluggable. This can make some memory unusable.\n"); + /* Prepare the offline threshold - make sure we can add two blocks. */ + vm->offline_threshold = max_t(uint64_t, 2 * memory_block_size_bytes(), + VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD); + /* * We want subblocks to span at least MAX_ORDER_NR_PAGES and * pageblock_nr_pages pages. This: @@ -2468,14 +2481,11 @@ static int virtio_mem_init(struct virtio_mem *vm) vm->bbm.bb_size - 1; vm->bbm.first_bb_id = virtio_mem_phys_to_bb_id(vm, addr); vm->bbm.next_bb_id = vm->bbm.first_bb_id; - } - /* Prepare the offline threshold - make sure we can add two blocks. */ - vm->offline_threshold = max_t(uint64_t, 2 * memory_block_size_bytes(), - VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD); - /* In BBM, we also want at least two big blocks. */ - vm->offline_threshold = max_t(uint64_t, 2 * vm->bbm.bb_size, - vm->offline_threshold); + /* Make sure we can add two big blocks. */ + vm->offline_threshold = max_t(uint64_t, 2 * vm->bbm.bb_size, + vm->offline_threshold); + } dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr); dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size); diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c index 54f297028586..e11ed748e661 100644 --- a/drivers/virtio/virtio_pci_modern_dev.c +++ b/drivers/virtio/virtio_pci_modern_dev.c @@ -384,6 +384,27 @@ u64 vp_modern_get_features(struct virtio_pci_modern_device *mdev) EXPORT_SYMBOL_GPL(vp_modern_get_features); /* + * vp_modern_get_driver_features - get driver features from device + * @mdev: the modern virtio-pci device + * + * Returns the driver features read from the device + */ +u64 vp_modern_get_driver_features(struct virtio_pci_modern_device *mdev) +{ + struct virtio_pci_common_cfg __iomem *cfg = mdev->common; + + u64 features; + + vp_iowrite32(0, &cfg->guest_feature_select); + features = vp_ioread32(&cfg->guest_feature); + vp_iowrite32(1, &cfg->guest_feature_select); + features |= ((u64)vp_ioread32(&cfg->guest_feature) << 32); + + return features; +} +EXPORT_SYMBOL_GPL(vp_modern_get_driver_features); + +/* * vp_modern_set_features - set features to device * @mdev: the modern virtio-pci device * @features: the features set to device diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 71e16b53e9c1..89bfe46a8a7f 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -74,14 +74,14 @@ struct vring_desc_state_packed { void *data; /* Data for callback. */ struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ u16 num; /* Descriptor list length. */ - u16 next; /* The next desc state in a list. */ u16 last; /* The last desc state in a list. */ }; -struct vring_desc_extra_packed { +struct vring_desc_extra { dma_addr_t addr; /* Buffer DMA addr. */ u32 len; /* Buffer length. */ u16 flags; /* Descriptor flags. */ + u16 next; /* The next desc state in a list. */ }; struct vring_virtqueue { @@ -113,6 +113,9 @@ struct vring_virtqueue { /* Last used index we've seen. */ u16 last_used_idx; + /* Hint for event idx: already triggered no need to disable. */ + bool event_triggered; + union { /* Available for split ring */ struct { @@ -130,6 +133,7 @@ struct vring_virtqueue { /* Per-descriptor state. */ struct vring_desc_state_split *desc_state; + struct vring_desc_extra *desc_extra; /* DMA address and size information */ dma_addr_t queue_dma_addr; @@ -166,7 +170,7 @@ struct vring_virtqueue { /* Per-descriptor state. */ struct vring_desc_state_packed *desc_state; - struct vring_desc_extra_packed *desc_extra; + struct vring_desc_extra *desc_extra; /* DMA address and size information */ dma_addr_t ring_dma_addr; @@ -364,8 +368,8 @@ static int vring_mapping_error(const struct vring_virtqueue *vq, * Split ring specific functions - *_split(). */ -static void vring_unmap_one_split(const struct vring_virtqueue *vq, - struct vring_desc *desc) +static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, + struct vring_desc *desc) { u16 flags; @@ -389,6 +393,35 @@ static void vring_unmap_one_split(const struct vring_virtqueue *vq, } } +static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, + unsigned int i) +{ + struct vring_desc_extra *extra = vq->split.desc_extra; + u16 flags; + + if (!vq->use_dma_api) + goto out; + + flags = extra[i].flags; + + if (flags & VRING_DESC_F_INDIRECT) { + dma_unmap_single(vring_dma_dev(vq), + extra[i].addr, + extra[i].len, + (flags & VRING_DESC_F_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE); + } else { + dma_unmap_page(vring_dma_dev(vq), + extra[i].addr, + extra[i].len, + (flags & VRING_DESC_F_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE); + } + +out: + return extra[i].next; +} + static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, unsigned int total_sg, gfp_t gfp) @@ -412,6 +445,35 @@ static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, return desc; } +static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, + struct vring_desc *desc, + unsigned int i, + dma_addr_t addr, + unsigned int len, + u16 flags, + bool indirect) +{ + struct vring_virtqueue *vring = to_vvq(vq); + struct vring_desc_extra *extra = vring->split.desc_extra; + u16 next; + + desc[i].flags = cpu_to_virtio16(vq->vdev, flags); + desc[i].addr = cpu_to_virtio64(vq->vdev, addr); + desc[i].len = cpu_to_virtio32(vq->vdev, len); + + if (!indirect) { + next = extra[i].next; + desc[i].next = cpu_to_virtio16(vq->vdev, next); + + extra[i].addr = addr; + extra[i].len = len; + extra[i].flags = flags; + } else + next = virtio16_to_cpu(vq->vdev, desc[i].next); + + return next; +} + static inline int virtqueue_add_split(struct virtqueue *_vq, struct scatterlist *sgs[], unsigned int total_sg, @@ -484,11 +546,13 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, if (vring_mapping_error(vq, addr)) goto unmap_release; - desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT); - desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); - desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); prev = i; - i = virtio16_to_cpu(_vq->vdev, desc[i].next); + /* Note that we trust indirect descriptor + * table since it use stream DMA mapping. + */ + i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length, + VRING_DESC_F_NEXT, + indirect); } } for (; n < (out_sgs + in_sgs); n++) { @@ -497,15 +561,22 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, if (vring_mapping_error(vq, addr)) goto unmap_release; - desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE); - desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); - desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); prev = i; - i = virtio16_to_cpu(_vq->vdev, desc[i].next); + /* Note that we trust indirect descriptor + * table since it use stream DMA mapping. + */ + i = virtqueue_add_desc_split(_vq, desc, i, addr, + sg->length, + VRING_DESC_F_NEXT | + VRING_DESC_F_WRITE, + indirect); } } /* Last one doesn't continue. */ desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); + if (!indirect && vq->use_dma_api) + vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags = + ~VRING_DESC_F_NEXT; if (indirect) { /* Now that the indirect table is filled in, map it. */ @@ -515,13 +586,11 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, if (vring_mapping_error(vq, addr)) goto unmap_release; - vq->split.vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, - VRING_DESC_F_INDIRECT); - vq->split.vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, - addr); - - vq->split.vring.desc[head].len = cpu_to_virtio32(_vq->vdev, - total_sg * sizeof(struct vring_desc)); + virtqueue_add_desc_split(_vq, vq->split.vring.desc, + head, addr, + total_sg * sizeof(struct vring_desc), + VRING_DESC_F_INDIRECT, + false); } /* We're using some buffers from the free list. */ @@ -529,8 +598,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, /* Update free pointer */ if (indirect) - vq->free_head = virtio16_to_cpu(_vq->vdev, - vq->split.vring.desc[head].next); + vq->free_head = vq->split.desc_extra[head].next; else vq->free_head = i; @@ -575,8 +643,11 @@ unmap_release: for (n = 0; n < total_sg; n++) { if (i == err_idx) break; - vring_unmap_one_split(vq, &desc[i]); - i = virtio16_to_cpu(_vq->vdev, desc[i].next); + if (indirect) { + vring_unmap_one_split_indirect(vq, &desc[i]); + i = virtio16_to_cpu(_vq->vdev, desc[i].next); + } else + i = vring_unmap_one_split(vq, i); } if (indirect) @@ -630,14 +701,13 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, i = head; while (vq->split.vring.desc[i].flags & nextflag) { - vring_unmap_one_split(vq, &vq->split.vring.desc[i]); - i = virtio16_to_cpu(vq->vq.vdev, vq->split.vring.desc[i].next); + vring_unmap_one_split(vq, i); + i = vq->split.desc_extra[i].next; vq->vq.num_free++; } - vring_unmap_one_split(vq, &vq->split.vring.desc[i]); - vq->split.vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, - vq->free_head); + vring_unmap_one_split(vq, i); + vq->split.desc_extra[i].next = vq->free_head; vq->free_head = head; /* Plus final descriptor */ @@ -652,15 +722,14 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, if (!indir_desc) return; - len = virtio32_to_cpu(vq->vq.vdev, - vq->split.vring.desc[head].len); + len = vq->split.desc_extra[head].len; - BUG_ON(!(vq->split.vring.desc[head].flags & - cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))); + BUG_ON(!(vq->split.desc_extra[head].flags & + VRING_DESC_F_INDIRECT)); BUG_ON(len == 0 || len % sizeof(struct vring_desc)); for (j = 0; j < len / sizeof(struct vring_desc); j++) - vring_unmap_one_split(vq, &indir_desc[j]); + vring_unmap_one_split_indirect(vq, &indir_desc[j]); kfree(indir_desc); vq->split.desc_state[head].indir_desc = NULL; @@ -739,7 +808,10 @@ static void virtqueue_disable_cb_split(struct virtqueue *_vq) if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; - if (!vq->event) + if (vq->event) + /* TODO: this is a hack. Figure out a cleaner value to write. */ + vring_used_event(&vq->split.vring) = 0x0; + else vq->split.vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->split.avail_flags_shadow); @@ -912,7 +984,7 @@ static struct virtqueue *vring_create_virtqueue_split( */ static void vring_unmap_state_packed(const struct vring_virtqueue *vq, - struct vring_desc_extra_packed *state) + struct vring_desc_extra *state) { u16 flags; @@ -1061,7 +1133,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, 1 << VRING_PACKED_DESC_F_USED; } vq->packed.next_avail_idx = n; - vq->free_head = vq->packed.desc_state[id].next; + vq->free_head = vq->packed.desc_extra[id].next; /* Store token and indirect buffer state. */ vq->packed.desc_state[id].num = 1; @@ -1169,7 +1241,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, le16_to_cpu(flags); } prev = curr; - curr = vq->packed.desc_state[curr].next; + curr = vq->packed.desc_extra[curr].next; if ((unlikely(++i >= vq->packed.vring.num))) { i = 0; @@ -1213,13 +1285,16 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, unmap_release: err_idx = i; i = head; + curr = vq->free_head; vq->packed.avail_used_flags = avail_used_flags; for (n = 0; n < total_sg; n++) { if (i == err_idx) break; - vring_unmap_desc_packed(vq, &desc[i]); + vring_unmap_state_packed(vq, + &vq->packed.desc_extra[curr]); + curr = vq->packed.desc_extra[curr].next; i++; if (i >= vq->packed.vring.num) i = 0; @@ -1290,7 +1365,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq, /* Clear data ptr. */ state->data = NULL; - vq->packed.desc_state[state->last].next = vq->free_head; + vq->packed.desc_extra[state->last].next = vq->free_head; vq->free_head = id; vq->vq.num_free += state->num; @@ -1299,7 +1374,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq, for (i = 0; i < state->num; i++) { vring_unmap_state_packed(vq, &vq->packed.desc_extra[curr]); - curr = vq->packed.desc_state[curr].next; + curr = vq->packed.desc_extra[curr].next; } } @@ -1550,6 +1625,25 @@ static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) return NULL; } +static struct vring_desc_extra *vring_alloc_desc_extra(struct vring_virtqueue *vq, + unsigned int num) +{ + struct vring_desc_extra *desc_extra; + unsigned int i; + + desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra), + GFP_KERNEL); + if (!desc_extra) + return NULL; + + memset(desc_extra, 0, num * sizeof(struct vring_desc_extra)); + + for (i = 0; i < num - 1; i++) + desc_extra[i].next = i + 1; + + return desc_extra; +} + static struct virtqueue *vring_create_virtqueue_packed( unsigned int index, unsigned int num, @@ -1567,7 +1661,6 @@ static struct virtqueue *vring_create_virtqueue_packed( struct vring_packed_desc_event *driver, *device; dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; size_t ring_size_in_bytes, event_size_in_bytes; - unsigned int i; ring_size_in_bytes = num * sizeof(struct vring_packed_desc); @@ -1605,6 +1698,7 @@ static struct virtqueue *vring_create_virtqueue_packed( vq->weak_barriers = weak_barriers; vq->broken = false; vq->last_used_idx = 0; + vq->event_triggered = false; vq->num_added = 0; vq->packed_ring = true; vq->use_dma_api = vring_use_dma_api(vdev); @@ -1649,18 +1743,11 @@ static struct virtqueue *vring_create_virtqueue_packed( /* Put everything in free lists. */ vq->free_head = 0; - for (i = 0; i < num-1; i++) - vq->packed.desc_state[i].next = i + 1; - vq->packed.desc_extra = kmalloc_array(num, - sizeof(struct vring_desc_extra_packed), - GFP_KERNEL); + vq->packed.desc_extra = vring_alloc_desc_extra(vq, num); if (!vq->packed.desc_extra) goto err_desc_extra; - memset(vq->packed.desc_extra, 0, - num * sizeof(struct vring_desc_extra_packed)); - /* No callback? Tell other side not to bother us. */ if (!callback) { vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; @@ -1875,7 +1962,7 @@ bool virtqueue_kick(struct virtqueue *vq) EXPORT_SYMBOL_GPL(virtqueue_kick); /** - * virtqueue_get_buf - get the next used buffer + * virtqueue_get_buf_ctx - get the next used buffer * @_vq: the struct virtqueue we're talking about. * @len: the length written into the buffer * @ctx: extra context for the token @@ -1919,6 +2006,12 @@ void virtqueue_disable_cb(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); + /* If device triggered an event already it won't trigger one again: + * no need to disable. + */ + if (vq->event_triggered) + return; + if (vq->packed_ring) virtqueue_disable_cb_packed(_vq); else @@ -1942,6 +2035,9 @@ unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); + if (vq->event_triggered) + vq->event_triggered = false; + return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : virtqueue_enable_cb_prepare_split(_vq); } @@ -2005,6 +2101,9 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); + if (vq->event_triggered) + vq->event_triggered = false; + return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : virtqueue_enable_cb_delayed_split(_vq); } @@ -2044,6 +2143,10 @@ irqreturn_t vring_interrupt(int irq, void *_vq) if (unlikely(vq->broken)) return IRQ_HANDLED; + /* Just a hint for performance: so it's ok that this can be racy! */ + if (vq->event) + vq->event_triggered = true; + pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); if (vq->vq.callback) vq->vq.callback(&vq->vq); @@ -2062,7 +2165,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, void (*callback)(struct virtqueue *), const char *name) { - unsigned int i; struct vring_virtqueue *vq; if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) @@ -2083,6 +2185,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, vq->weak_barriers = weak_barriers; vq->broken = false; vq->last_used_idx = 0; + vq->event_triggered = false; vq->num_added = 0; vq->use_dma_api = vring_use_dma_api(vdev); #ifdef DEBUG @@ -2114,20 +2217,26 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, vq->split.desc_state = kmalloc_array(vring.num, sizeof(struct vring_desc_state_split), GFP_KERNEL); - if (!vq->split.desc_state) { - kfree(vq); - return NULL; - } + if (!vq->split.desc_state) + goto err_state; + + vq->split.desc_extra = vring_alloc_desc_extra(vq, vring.num); + if (!vq->split.desc_extra) + goto err_extra; /* Put everything in free lists. */ vq->free_head = 0; - for (i = 0; i < vring.num-1; i++) - vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1); memset(vq->split.desc_state, 0, vring.num * sizeof(struct vring_desc_state_split)); list_add_tail(&vq->vq.list, &vdev->vqs); return &vq->vq; + +err_extra: + kfree(vq->split.desc_state); +err_state: + kfree(vq); + return NULL; } EXPORT_SYMBOL_GPL(__vring_new_virtqueue); @@ -2208,8 +2317,10 @@ void vring_del_virtqueue(struct virtqueue *_vq) vq->split.queue_dma_addr); } } - if (!vq->packed_ring) + if (!vq->packed_ring) { kfree(vq->split.desc_state); + kfree(vq->split.desc_extra); + } list_del(&_vq->list); kfree(vq); } diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index e28acf482e0c..e1a141135992 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -142,6 +142,8 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, struct vdpa_callback cb; struct virtqueue *vq; u64 desc_addr, driver_addr, device_addr; + /* Assume split virtqueue, switch to packed if necessary */ + struct vdpa_vq_state state = {0}; unsigned long flags; u32 align, num; int err; @@ -191,6 +193,19 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, goto err_vq; } + /* reset virtqueue state index */ + if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) { + struct vdpa_vq_state_packed *s = &state.packed; + + s->last_avail_counter = 1; + s->last_avail_idx = 0; + s->last_used_counter = 1; + s->last_used_idx = 0; + } + err = ops->set_vq_state(vdpa, index, &state); + if (err) + goto err_vq; + ops->set_vq_ready(vdpa, index, 1); vq->priv = info; |