From ea024594b1dc5b6719c1400ae154690f5c203996 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:45 +0800 Subject: virtio_pci: struct virtio_pci_common_cfg add queue_notify_data Add queue_notify_data in struct virtio_pci_common_cfg, which comes from here https://github.com/oasis-tcs/virtio-spec/issues/89 In order not to affect the API, add a dedicated structure struct virtio_pci_modern_common_cfg to virtio_pci_modern.h. Since I want to add queue_reset after queue_notify_data, I submitted this patch first. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-26-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index 3a86f36d7e3d..f5981a874481 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -202,6 +202,7 @@ struct virtio_pci_cfg_cap { #define VIRTIO_PCI_COMMON_Q_AVAILHI 44 #define VIRTIO_PCI_COMMON_Q_USEDLO 48 #define VIRTIO_PCI_COMMON_Q_USEDHI 52 +#define VIRTIO_PCI_COMMON_Q_NDATA 56 #endif /* VIRTIO_PCI_NO_MODERN */ -- cgit v1.2.3 From d94587b5bb5c4bba32fbc2bd92c86cc6de25167f Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:47 +0800 Subject: virtio: queue_reset: add VIRTIO_F_RING_RESET Added VIRTIO_F_RING_RESET, it came from here https://github.com/oasis-tcs/virtio-spec/issues/124 https://github.com/oasis-tcs/virtio-spec/issues/139 This feature indicates that the driver can reset a queue individually. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-28-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_config.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index f0fb0ae021c0..3c05162bc988 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -52,7 +52,7 @@ * rest are per-device feature bits. */ #define VIRTIO_TRANSPORT_F_START 28 -#define VIRTIO_TRANSPORT_F_END 38 +#define VIRTIO_TRANSPORT_F_END 41 #ifndef VIRTIO_CONFIG_NO_LEGACY /* Do we get callbacks when the ring is completely used, even if we've @@ -98,4 +98,9 @@ * Does the device support Single Root I/O Virtualization? */ #define VIRTIO_F_SR_IOV 37 + +/* + * This feature indicates that the driver can reset a queue individually. + */ +#define VIRTIO_F_RING_RESET 40 #endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */ -- cgit v1.2.3 From 0cdd450e70510c9e13af8099e9f6c1467e6a0b91 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:49 +0800 Subject: virtio_pci: struct virtio_pci_common_cfg add queue_reset Add queue_reset in virtio_pci_modern_common_cfg. https://github.com/oasis-tcs/virtio-spec/issues/124 https://github.com/oasis-tcs/virtio-spec/issues/139 Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-30-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_pci_modern.h | 2 +- include/uapi/linux/virtio_pci.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index 41f5a018bd94..05123b9a606f 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -9,7 +9,7 @@ struct virtio_pci_modern_common_cfg { struct virtio_pci_common_cfg cfg; __le16 queue_notify_data; /* read-write */ - __le16 padding; + __le16 queue_reset; /* read-write */ }; struct virtio_pci_modern_device { diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index f5981a874481..f703afc7ad31 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -203,6 +203,7 @@ struct virtio_pci_cfg_cap { #define VIRTIO_PCI_COMMON_Q_USEDLO 48 #define VIRTIO_PCI_COMMON_Q_USEDHI 52 #define VIRTIO_PCI_COMMON_Q_NDATA 56 +#define VIRTIO_PCI_COMMON_Q_RESET 58 #endif /* VIRTIO_PCI_NO_MODERN */ -- cgit v1.2.3 From 699b045a8e43bd1063db4795be685bfd659649dc Mon Sep 17 00:00:00 2001 From: Alvaro Karsz Date: Mon, 18 Jul 2022 12:11:02 +0300 Subject: net: virtio_net: notifications coalescing support New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL. Control a Virtio network device notifications coalescing parameters using the control virtqueue. A device that supports this fetature can receive VIRTIO_NET_CTRL_NOTF_COAL control commands. - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET: Ask the network device to change the following parameters: - tx_usecs: Maximum number of usecs to delay a TX notification. - tx_max_packets: Maximum number of packets to send before a TX notification. - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET: Ask the network device to change the following parameters: - rx_usecs: Maximum number of usecs to delay a RX notification. - rx_max_packets: Maximum number of packets to receive before a RX notification. VirtIO spec. patch: https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html Signed-off-by: Alvaro Karsz Message-Id: <20220718091102.498774-1-alvaro.karsz@solid-run.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Jakub Kicinski Acked-by: Jason Wang --- drivers/net/virtio_net.c | 111 ++++++++++++++++++++++++++++++++++------ include/uapi/linux/virtio_net.h | 34 +++++++++++- 2 files changed, 129 insertions(+), 16 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 16783ed782c5..d9c434b00e9b 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -270,6 +270,12 @@ struct virtnet_info { u8 duplex; u32 speed; + /* Interrupt coalescing settings */ + u32 tx_usecs; + u32 rx_usecs; + u32 tx_max_packets; + u32 rx_max_packets; + unsigned long guest_offloads; unsigned long guest_offloads_capable; @@ -2737,27 +2743,89 @@ static int virtnet_get_link_ksettings(struct net_device *dev, return 0; } +static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, + struct ethtool_coalesce *ec) +{ + struct scatterlist sgs_tx, sgs_rx; + struct virtio_net_ctrl_coal_tx coal_tx; + struct virtio_net_ctrl_coal_rx coal_rx; + + coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); + coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); + sg_init_one(&sgs_tx, &coal_tx, sizeof(coal_tx)); + + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, + VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, + &sgs_tx)) + return -EINVAL; + + /* Save parameters */ + vi->tx_usecs = ec->tx_coalesce_usecs; + vi->tx_max_packets = ec->tx_max_coalesced_frames; + + coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); + coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); + sg_init_one(&sgs_rx, &coal_rx, sizeof(coal_rx)); + + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, + VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, + &sgs_rx)) + return -EINVAL; + + /* Save parameters */ + vi->rx_usecs = ec->rx_coalesce_usecs; + vi->rx_max_packets = ec->rx_max_coalesced_frames; + + return 0; +} + +static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) +{ + /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL + * feature is negotiated. + */ + if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) + return -EOPNOTSUPP; + + if (ec->tx_max_coalesced_frames > 1 || + ec->rx_max_coalesced_frames != 1) + return -EINVAL; + + return 0; +} + static int virtnet_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec, struct kernel_ethtool_coalesce *kernel_coal, struct netlink_ext_ack *extack) { struct virtnet_info *vi = netdev_priv(dev); - int i, napi_weight; - - if (ec->tx_max_coalesced_frames > 1 || - ec->rx_max_coalesced_frames != 1) - return -EINVAL; + int ret, i, napi_weight; + bool update_napi = false; + /* Can't change NAPI weight if the link is up */ napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; if (napi_weight ^ vi->sq[0].napi.weight) { if (dev->flags & IFF_UP) return -EBUSY; + else + update_napi = true; + } + + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) + ret = virtnet_send_notf_coal_cmds(vi, ec); + else + ret = virtnet_coal_params_supported(ec); + + if (ret) + return ret; + + if (update_napi) { for (i = 0; i < vi->max_queue_pairs; i++) vi->sq[i].napi.weight = napi_weight; } - return 0; + return ret; } static int virtnet_get_coalesce(struct net_device *dev, @@ -2765,16 +2833,19 @@ static int virtnet_get_coalesce(struct net_device *dev, struct kernel_ethtool_coalesce *kernel_coal, struct netlink_ext_ack *extack) { - struct ethtool_coalesce ec_default = { - .cmd = ETHTOOL_GCOALESCE, - .rx_max_coalesced_frames = 1, - }; struct virtnet_info *vi = netdev_priv(dev); - memcpy(ec, &ec_default, sizeof(ec_default)); + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { + ec->rx_coalesce_usecs = vi->rx_usecs; + ec->tx_coalesce_usecs = vi->tx_usecs; + ec->tx_max_coalesced_frames = vi->tx_max_packets; + ec->rx_max_coalesced_frames = vi->rx_max_packets; + } else { + ec->rx_max_coalesced_frames = 1; - if (vi->sq[0].napi.weight) - ec->tx_max_coalesced_frames = 1; + if (vi->sq[0].napi.weight) + ec->tx_max_coalesced_frames = 1; + } return 0; } @@ -2893,7 +2964,8 @@ static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) } static const struct ethtool_ops virtnet_ethtool_ops = { - .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES, + .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USECS, .get_drvinfo = virtnet_get_drvinfo, .get_link = ethtool_op_get_link, .get_ringparam = virtnet_get_ringparam, @@ -3606,6 +3678,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev) VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, "VIRTIO_NET_F_CTRL_VQ") || VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, + "VIRTIO_NET_F_CTRL_VQ") || + VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, "VIRTIO_NET_F_CTRL_VQ"))) { return false; } @@ -3742,6 +3816,13 @@ static int virtnet_probe(struct virtio_device *vdev) if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) vi->mergeable_rx_bufs = true; + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { + vi->rx_usecs = 0; + vi->tx_usecs = 0; + vi->tx_max_packets = 0; + vi->rx_max_packets = 0; + } + if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) vi->has_rss_hash_report = true; @@ -3977,7 +4058,7 @@ static struct virtio_device_id id_table[] = { VIRTIO_NET_F_CTRL_MAC_ADDR, \ VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ - VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT + VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL static unsigned int features[] = { VIRTNET_FEATURES, diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 3f55a4215f11..29ced55514d4 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -56,7 +56,7 @@ #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow * Steering */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ - +#define VIRTIO_NET_F_NOTF_COAL 53 /* Guest can handle notifications coalescing */ #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ @@ -355,4 +355,36 @@ struct virtio_net_hash_config { #define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 #define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 +/* + * Control notifications coalescing. + * + * Request the device to change the notifications coalescing parameters. + * + * Available with the VIRTIO_NET_F_NOTF_COAL feature bit. + */ +#define VIRTIO_NET_CTRL_NOTF_COAL 6 +/* + * Set the tx-usecs/tx-max-packets patameters. + * tx-usecs - Maximum number of usecs to delay a TX notification. + * tx-max-packets - Maximum number of packets to send before a TX notification. + */ +struct virtio_net_ctrl_coal_tx { + __le32 tx_max_packets; + __le32 tx_usecs; +}; + +#define VIRTIO_NET_CTRL_NOTF_COAL_TX_SET 0 + +/* + * Set the rx-usecs/rx-max-packets patameters. + * rx-usecs - Maximum number of usecs to delay a RX notification. + * rx-max-frames - Maximum number of packets to receive before a RX notification. + */ +struct virtio_net_ctrl_coal_rx { + __le32 rx_max_packets; + __le32 rx_usecs; +}; + +#define VIRTIO_NET_CTRL_NOTF_COAL_RX_SET 1 + #endif /* _UAPI_LINUX_VIRTIO_NET_H */ -- cgit v1.2.3 From 79a463be9e0051997508d52cf411ed5e91d657f6 Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Wed, 3 Aug 2022 12:55:22 +0800 Subject: vduse: Support registering userspace memory for IOVA regions Introduce two ioctls: VDUSE_IOTLB_REG_UMEM and VDUSE_IOTLB_DEREG_UMEM to support registering and de-registering userspace memory for IOVA regions. Now it only supports registering userspace memory for bounce buffer region in virtio-vdpa case. Signed-off-by: Xie Yongji Acked-by: Jason Wang Message-Id: <20220803045523.23851-5-xieyongji@bytedance.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_user/vduse_dev.c | 141 +++++++++++++++++++++++++++++++++++++ include/uapi/linux/vduse.h | 23 ++++++ 2 files changed, 164 insertions(+) (limited to 'include/uapi/linux') diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 3bc27de58f46..eedff0a3885a 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include #include @@ -64,6 +66,13 @@ struct vduse_vdpa { struct vduse_dev *dev; }; +struct vduse_umem { + unsigned long iova; + unsigned long npages; + struct page **pages; + struct mm_struct *mm; +}; + struct vduse_dev { struct vduse_vdpa *vdev; struct device *dev; @@ -95,6 +104,8 @@ struct vduse_dev { u8 status; u32 vq_num; u32 vq_align; + struct vduse_umem *umem; + struct mutex mem_lock; }; struct vduse_dev_msg { @@ -917,6 +928,102 @@ unlock: return ret; } +static int vduse_dev_dereg_umem(struct vduse_dev *dev, + u64 iova, u64 size) +{ + int ret; + + mutex_lock(&dev->mem_lock); + ret = -ENOENT; + if (!dev->umem) + goto unlock; + + ret = -EINVAL; + if (dev->umem->iova != iova || size != dev->domain->bounce_size) + goto unlock; + + vduse_domain_remove_user_bounce_pages(dev->domain); + unpin_user_pages_dirty_lock(dev->umem->pages, + dev->umem->npages, true); + atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm); + mmdrop(dev->umem->mm); + vfree(dev->umem->pages); + kfree(dev->umem); + dev->umem = NULL; + ret = 0; +unlock: + mutex_unlock(&dev->mem_lock); + return ret; +} + +static int vduse_dev_reg_umem(struct vduse_dev *dev, + u64 iova, u64 uaddr, u64 size) +{ + struct page **page_list = NULL; + struct vduse_umem *umem = NULL; + long pinned = 0; + unsigned long npages, lock_limit; + int ret; + + if (!dev->domain->bounce_map || + size != dev->domain->bounce_size || + iova != 0 || uaddr & ~PAGE_MASK) + return -EINVAL; + + mutex_lock(&dev->mem_lock); + ret = -EEXIST; + if (dev->umem) + goto unlock; + + ret = -ENOMEM; + npages = size >> PAGE_SHIFT; + page_list = __vmalloc(array_size(npages, sizeof(struct page *)), + GFP_KERNEL_ACCOUNT); + umem = kzalloc(sizeof(*umem), GFP_KERNEL); + if (!page_list || !umem) + goto unlock; + + mmap_read_lock(current->mm); + + lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK)); + if (npages + atomic64_read(¤t->mm->pinned_vm) > lock_limit) + goto out; + + pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE, + page_list, NULL); + if (pinned != npages) { + ret = pinned < 0 ? pinned : -ENOMEM; + goto out; + } + + ret = vduse_domain_add_user_bounce_pages(dev->domain, + page_list, pinned); + if (ret) + goto out; + + atomic64_add(npages, ¤t->mm->pinned_vm); + + umem->pages = page_list; + umem->npages = pinned; + umem->iova = iova; + umem->mm = current->mm; + mmgrab(current->mm); + + dev->umem = umem; +out: + if (ret && pinned > 0) + unpin_user_pages(page_list, pinned); + + mmap_read_unlock(current->mm); +unlock: + if (ret) { + vfree(page_list); + kfree(umem); + } + mutex_unlock(&dev->mem_lock); + return ret; +} + static long vduse_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -1089,6 +1196,38 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject); break; } + case VDUSE_IOTLB_REG_UMEM: { + struct vduse_iova_umem umem; + + ret = -EFAULT; + if (copy_from_user(&umem, argp, sizeof(umem))) + break; + + ret = -EINVAL; + if (!is_mem_zero((const char *)umem.reserved, + sizeof(umem.reserved))) + break; + + ret = vduse_dev_reg_umem(dev, umem.iova, + umem.uaddr, umem.size); + break; + } + case VDUSE_IOTLB_DEREG_UMEM: { + struct vduse_iova_umem umem; + + ret = -EFAULT; + if (copy_from_user(&umem, argp, sizeof(umem))) + break; + + ret = -EINVAL; + if (!is_mem_zero((const char *)umem.reserved, + sizeof(umem.reserved))) + break; + + ret = vduse_dev_dereg_umem(dev, umem.iova, + umem.size); + break; + } default: ret = -ENOIOCTLCMD; break; @@ -1101,6 +1240,7 @@ static int vduse_dev_release(struct inode *inode, struct file *file) { struct vduse_dev *dev = file->private_data; + vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size); spin_lock(&dev->msg_lock); /* Make sure the inflight messages can processed after reconncection */ list_splice_init(&dev->recv_list, &dev->send_list); @@ -1163,6 +1303,7 @@ static struct vduse_dev *vduse_dev_create(void) return NULL; mutex_init(&dev->lock); + mutex_init(&dev->mem_lock); spin_lock_init(&dev->msg_lock); INIT_LIST_HEAD(&dev->send_list); INIT_LIST_HEAD(&dev->recv_list); diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h index 7cfe1c1280c0..9885e0571f09 100644 --- a/include/uapi/linux/vduse.h +++ b/include/uapi/linux/vduse.h @@ -210,6 +210,29 @@ struct vduse_vq_eventfd { */ #define VDUSE_VQ_INJECT_IRQ _IOW(VDUSE_BASE, 0x17, __u32) +/** + * struct vduse_iova_umem - userspace memory configuration for one IOVA region + * @uaddr: start address of userspace memory, it must be aligned to page size + * @iova: start of the IOVA region + * @size: size of the IOVA region + * @reserved: for future use, needs to be initialized to zero + * + * Structure used by VDUSE_IOTLB_REG_UMEM and VDUSE_IOTLB_DEREG_UMEM + * ioctls to register/de-register userspace memory for IOVA regions + */ +struct vduse_iova_umem { + __u64 uaddr; + __u64 iova; + __u64 size; + __u64 reserved[3]; +}; + +/* Register userspace memory for IOVA regions */ +#define VDUSE_IOTLB_REG_UMEM _IOW(VDUSE_BASE, 0x18, struct vduse_iova_umem) + +/* De-register the userspace memory. Caller should set iova and size field. */ +#define VDUSE_IOTLB_DEREG_UMEM _IOW(VDUSE_BASE, 0x19, struct vduse_iova_umem) + /* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */ /** -- cgit v1.2.3 From ad146355bfad627bd0717ece73997c6c93b1b82e Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Wed, 3 Aug 2022 12:55:23 +0800 Subject: vduse: Support querying information of IOVA regions This introduces a new ioctl: VDUSE_IOTLB_GET_INFO to support querying some information of IOVA regions. Now it can be used to query whether the IOVA region supports userspace memory registration. Signed-off-by: Xie Yongji Message-Id: <20220803045523.23851-6-xieyongji@bytedance.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/vdpa_user/vduse_dev.c | 39 ++++++++++++++++++++++++++++++++++++++ include/uapi/linux/vduse.h | 24 +++++++++++++++++++++++ 2 files changed, 63 insertions(+) (limited to 'include/uapi/linux') diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index eedff0a3885a..41c0b29739f1 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -1228,6 +1228,45 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, umem.size); break; } + case VDUSE_IOTLB_GET_INFO: { + struct vduse_iova_info info; + struct vhost_iotlb_map *map; + struct vduse_iova_domain *domain = dev->domain; + + ret = -EFAULT; + if (copy_from_user(&info, argp, sizeof(info))) + break; + + ret = -EINVAL; + if (info.start > info.last) + break; + + if (!is_mem_zero((const char *)info.reserved, + sizeof(info.reserved))) + break; + + spin_lock(&domain->iotlb_lock); + map = vhost_iotlb_itree_first(domain->iotlb, + info.start, info.last); + if (map) { + info.start = map->start; + info.last = map->last; + info.capability = 0; + if (domain->bounce_map && map->start == 0 && + map->last == domain->bounce_size - 1) + info.capability |= VDUSE_IOVA_CAP_UMEM; + } + spin_unlock(&domain->iotlb_lock); + if (!map) + break; + + ret = -EFAULT; + if (copy_to_user(argp, &info, sizeof(info))) + break; + + ret = 0; + break; + } default: ret = -ENOIOCTLCMD; break; diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h index 9885e0571f09..11bd48c72c6c 100644 --- a/include/uapi/linux/vduse.h +++ b/include/uapi/linux/vduse.h @@ -233,6 +233,30 @@ struct vduse_iova_umem { /* De-register the userspace memory. Caller should set iova and size field. */ #define VDUSE_IOTLB_DEREG_UMEM _IOW(VDUSE_BASE, 0x19, struct vduse_iova_umem) +/** + * struct vduse_iova_info - information of one IOVA region + * @start: start of the IOVA region + * @last: last of the IOVA region + * @capability: capability of the IOVA regsion + * @reserved: for future use, needs to be initialized to zero + * + * Structure used by VDUSE_IOTLB_GET_INFO ioctl to get information of + * one IOVA region. + */ +struct vduse_iova_info { + __u64 start; + __u64 last; +#define VDUSE_IOVA_CAP_UMEM (1 << 0) + __u64 capability; + __u64 reserved[3]; +}; + +/* + * Find the first IOVA region that overlaps with the range [start, last] + * and return some information on it. Caller should set start and last fields. + */ +#define VDUSE_IOTLB_GET_INFO _IOWR(VDUSE_BASE, 0x1a, struct vduse_iova_info) + /* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */ /** -- cgit v1.2.3 From 0723f1df5c3ec8a1112d150dab98e149361ef488 Mon Sep 17 00:00:00 2001 From: Eugenio Pérez Date: Wed, 10 Aug 2022 19:15:10 +0200 Subject: vhost-vdpa: introduce SUSPEND backend feature bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Userland knows if it can suspend the device or not by checking this feature bit. It's only offered if the vdpa driver backend implements the suspend() operation callback, and to offer it or userland to ack it if the backend does not offer that callback is an error. Signed-off-by: Eugenio Pérez Message-Id: <20220810171512.2343333-3-eperezma@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 16 +++++++++++++++- include/uapi/linux/vhost_types.h | 2 ++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 2c997d77d266..092752fea8e1 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -347,6 +347,14 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v, return 0; } +static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + return ops->suspend; +} + static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) { struct vdpa_device *vdpa = v->vdpa; @@ -577,7 +585,11 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, if (cmd == VHOST_SET_BACKEND_FEATURES) { if (copy_from_user(&features, featurep, sizeof(features))) return -EFAULT; - if (features & ~VHOST_VDPA_BACKEND_FEATURES) + if (features & ~(VHOST_VDPA_BACKEND_FEATURES | + BIT_ULL(VHOST_BACKEND_F_SUSPEND))) + return -EOPNOTSUPP; + if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) && + !vhost_vdpa_can_suspend(v)) return -EOPNOTSUPP; vhost_set_backend_features(&v->vdev, features); return 0; @@ -628,6 +640,8 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, break; case VHOST_GET_BACKEND_FEATURES: features = VHOST_VDPA_BACKEND_FEATURES; + if (vhost_vdpa_can_suspend(v)) + features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND); if (copy_to_user(featurep, &features, sizeof(features))) r = -EFAULT; break; diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h index 634cee485abb..1bdd6e363f4c 100644 --- a/include/uapi/linux/vhost_types.h +++ b/include/uapi/linux/vhost_types.h @@ -161,5 +161,7 @@ struct vhost_vdpa_iova_range { * message */ #define VHOST_BACKEND_F_IOTLB_ASID 0x3 +/* Device can be suspended */ +#define VHOST_BACKEND_F_SUSPEND 0x4 #endif -- cgit v1.2.3 From f345a0143b4dd1cfc850009c6979a3801b86a06f Mon Sep 17 00:00:00 2001 From: Eugenio Pérez Date: Wed, 10 Aug 2022 19:15:11 +0200 Subject: vhost-vdpa: uAPI to suspend the device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ioctl adds support for suspending the device from userspace. This is a must before getting virtqueue indexes (base) for live migration, since the device could modify them after userland gets them. There are individual ways to perform that action for some devices (VHOST_NET_SET_BACKEND, VHOST_VSOCK_SET_RUNNING, ...) but there was no way to perform it for any vhost device (and, in particular, vhost-vdpa). After a successful return of the ioctl call the device must not process more virtqueue descriptors. The device can answer to read or writes of config fields as if it were not suspended. In particular, writing to "queue_enable" with a value of 1 will not make the device start processing buffers of the virtqueue. Signed-off-by: Eugenio Pérez Message-Id: <20220810171512.2343333-4-eperezma@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 19 +++++++++++++++++++ include/uapi/linux/vhost.h | 9 +++++++++ 2 files changed, 28 insertions(+) (limited to 'include/uapi/linux') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 092752fea8e1..166044642fd5 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -478,6 +478,22 @@ static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp) return 0; } +/* After a successful return of ioctl the device must not process more + * virtqueue descriptors. The device can answer to read or writes of config + * fields as if it were not suspended. In particular, writing to "queue_enable" + * with a value of 1 will not make the device start processing buffers. + */ +static long vhost_vdpa_suspend(struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + if (!ops->suspend) + return -EOPNOTSUPP; + + return ops->suspend(vdpa); +} + static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, void __user *argp) { @@ -654,6 +670,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, case VHOST_VDPA_GET_VQS_COUNT: r = vhost_vdpa_get_vqs_count(v, argp); break; + case VHOST_VDPA_SUSPEND: + r = vhost_vdpa_suspend(v); + break; default: r = vhost_dev_ioctl(&v->vdev, cmd, argp); if (r == -ENOIOCTLCMD) diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index cab645d4a645..f9f115a7c75b 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -171,4 +171,13 @@ #define VHOST_VDPA_SET_GROUP_ASID _IOW(VHOST_VIRTIO, 0x7C, \ struct vhost_vring_state) +/* Suspend a device so it does not process virtqueue requests anymore + * + * After the return of ioctl the device must preserve all the necessary state + * (the virtqueue vring base plus the possible device specific states) that is + * required for restoring in the future. The device must not change its + * configuration after that point. + */ +#define VHOST_VDPA_SUSPEND _IO(VHOST_VIRTIO, 0x7D) + #endif -- cgit v1.2.3