summaryrefslogtreecommitdiff
path: root/drivers/vhost
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vhost')
-rw-r--r--drivers/vhost/net.c202
-rw-r--r--drivers/vhost/scsi.c20
-rw-r--r--drivers/vhost/vdpa.c10
-rw-r--r--drivers/vhost/vhost.c133
-rw-r--r--drivers/vhost/vhost.h12
-rw-r--r--drivers/vhost/vringh.c132
-rw-r--r--drivers/vhost/vsock.c11
7 files changed, 285 insertions, 235 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index c5902cc261e5..35ded4330431 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -69,12 +69,15 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
#define VHOST_DMA_IS_DONE(len) ((__force u32)(len) >= (__force u32)VHOST_DMA_DONE_LEN)
-enum {
- VHOST_NET_FEATURES = VHOST_FEATURES |
- (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
- (1ULL << VIRTIO_NET_F_MRG_RXBUF) |
- (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
- (1ULL << VIRTIO_F_RING_RESET)
+static const u64 vhost_net_features[VIRTIO_FEATURES_DWORDS] = {
+ VHOST_FEATURES |
+ (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
+ (1ULL << VIRTIO_NET_F_MRG_RXBUF) |
+ (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
+ (1ULL << VIRTIO_F_RING_RESET) |
+ (1ULL << VIRTIO_F_IN_ORDER),
+ VIRTIO_BIT(VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO) |
+ VIRTIO_BIT(VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO),
};
enum {
@@ -379,7 +382,8 @@ static void vhost_zerocopy_signal_used(struct vhost_net *net,
while (j) {
add = min(UIO_MAXIOV - nvq->done_idx, j);
vhost_add_used_and_signal_n(vq->dev, vq,
- &vq->heads[nvq->done_idx], add);
+ &vq->heads[nvq->done_idx],
+ NULL, add);
nvq->done_idx = (nvq->done_idx + add) % UIO_MAXIOV;
j -= add;
}
@@ -454,7 +458,8 @@ static int vhost_net_enable_vq(struct vhost_net *n,
return vhost_poll_start(poll, sock->file);
}
-static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq)
+static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq,
+ unsigned int count)
{
struct vhost_virtqueue *vq = &nvq->vq;
struct vhost_dev *dev = vq->dev;
@@ -462,7 +467,8 @@ static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq)
if (!nvq->done_idx)
return;
- vhost_add_used_and_signal_n(dev, vq, vq->heads, nvq->done_idx);
+ vhost_add_used_and_signal_n(dev, vq, vq->heads,
+ vq->nheads, count);
nvq->done_idx = 0;
}
@@ -471,6 +477,8 @@ static void vhost_tx_batch(struct vhost_net *net,
struct socket *sock,
struct msghdr *msghdr)
{
+ struct vhost_virtqueue *vq = &nvq->vq;
+ bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
struct tun_msg_ctl ctl = {
.type = TUN_MSG_PTR,
.num = nvq->batched_xdp,
@@ -478,6 +486,11 @@ static void vhost_tx_batch(struct vhost_net *net,
};
int i, err;
+ if (in_order) {
+ vq->heads[0].len = 0;
+ vq->nheads[0] = nvq->done_idx;
+ }
+
if (nvq->batched_xdp == 0)
goto signal_used;
@@ -499,7 +512,7 @@ static void vhost_tx_batch(struct vhost_net *net,
}
signal_used:
- vhost_net_signal_used(nvq);
+ vhost_net_signal_used(nvq, in_order ? 1 : nvq->done_idx);
nvq->batched_xdp = 0;
}
@@ -673,7 +686,6 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
struct socket *sock = vhost_vq_get_backend(vq);
struct virtio_net_hdr *gso;
struct xdp_buff *xdp = &nvq->xdp[nvq->batched_xdp];
- struct tun_xdp_hdr *hdr;
size_t len = iov_iter_count(from);
int headroom = vhost_sock_xdp(sock) ? XDP_PACKET_HEADROOM : 0;
int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
@@ -696,15 +708,13 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
if (unlikely(!buf))
return -ENOMEM;
- copied = copy_from_iter(buf + offsetof(struct tun_xdp_hdr, gso),
- sock_hlen, from);
- if (copied != sock_hlen) {
+ copied = copy_from_iter(buf + pad - sock_hlen, len, from);
+ if (copied != len) {
ret = -EFAULT;
goto err;
}
- hdr = buf;
- gso = &hdr->gso;
+ gso = buf + pad - sock_hlen;
if (!sock_hlen)
memset(buf, 0, pad);
@@ -723,16 +733,11 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
}
}
- len -= sock_hlen;
- copied = copy_from_iter(buf + pad, len, from);
- if (copied != len) {
- ret = -EFAULT;
- goto err;
- }
+ /* pad contains sock_hlen */
+ memcpy(buf, buf + pad - sock_hlen, sock_hlen);
xdp_init_buff(xdp, buflen, NULL);
- xdp_prepare_buff(xdp, buf, pad, len, true);
- hdr->buflen = buflen;
+ xdp_prepare_buff(xdp, buf, pad, len - sock_hlen, true);
++nvq->batched_xdp;
@@ -760,6 +765,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
int err;
int sent_pkts = 0;
bool sock_can_batch = (sock->sk->sk_sndbuf == INT_MAX);
+ bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
do {
bool busyloop_intr = false;
@@ -805,11 +811,13 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
break;
}
- /* We can't build XDP buff, go for single
- * packet path but let's flush batched
- * packets.
- */
- vhost_tx_batch(net, nvq, sock, &msg);
+ if (nvq->batched_xdp) {
+ /* We can't build XDP buff, go for single
+ * packet path but let's flush batched
+ * packets.
+ */
+ vhost_tx_batch(net, nvq, sock, &msg);
+ }
msg.msg_control = NULL;
} else {
if (tx_can_batch(vq, total_len))
@@ -830,8 +838,12 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
pr_debug("Truncated TX packet: len %d != %zd\n",
err, len);
done:
- vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
- vq->heads[nvq->done_idx].len = 0;
+ if (in_order) {
+ vq->heads[0].id = cpu_to_vhost32(vq, head);
+ } else {
+ vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
+ vq->heads[nvq->done_idx].len = 0;
+ }
++nvq->done_idx;
} while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len)));
@@ -995,7 +1007,7 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
}
static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
- bool *busyloop_intr)
+ bool *busyloop_intr, unsigned int *count)
{
struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX];
struct vhost_net_virtqueue *tnvq = &net->vqs[VHOST_NET_VQ_TX];
@@ -1005,7 +1017,8 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
if (!len && rvq->busyloop_timeout) {
/* Flush batched heads first */
- vhost_net_signal_used(rnvq);
+ vhost_net_signal_used(rnvq, *count);
+ *count = 0;
/* Both tx vq and rx socket were polled here */
vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, true);
@@ -1017,7 +1030,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
/* This is a multi-buffer version of vhost_get_desc, that works if
* vq has read descriptors only.
- * @vq - the relevant virtqueue
+ * @nvq - the relevant vhost_net virtqueue
* @datalen - data length we'll be reading
* @iovcount - returned count of io vectors we fill
* @log - vhost log
@@ -1025,14 +1038,17 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
* @quota - headcount quota, 1 for big buffer
* returns number of buffer heads allocated, negative on error
*/
-static int get_rx_bufs(struct vhost_virtqueue *vq,
+static int get_rx_bufs(struct vhost_net_virtqueue *nvq,
struct vring_used_elem *heads,
+ u16 *nheads,
int datalen,
unsigned *iovcount,
struct vhost_log *log,
unsigned *log_num,
unsigned int quota)
{
+ struct vhost_virtqueue *vq = &nvq->vq;
+ bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
unsigned int out, in;
int seg = 0;
int headcount = 0;
@@ -1069,14 +1085,16 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
nlogs += *log_num;
log += *log_num;
}
- heads[headcount].id = cpu_to_vhost32(vq, d);
len = iov_length(vq->iov + seg, in);
- heads[headcount].len = cpu_to_vhost32(vq, len);
- datalen -= len;
+ if (!in_order) {
+ heads[headcount].id = cpu_to_vhost32(vq, d);
+ heads[headcount].len = cpu_to_vhost32(vq, len);
+ }
++headcount;
+ datalen -= len;
seg += in;
}
- heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
+
*iovcount = seg;
if (unlikely(log))
*log_num = nlogs;
@@ -1086,6 +1104,15 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
r = UIO_MAXIOV + 1;
goto err;
}
+
+ if (!in_order)
+ heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
+ else {
+ heads[0].len = cpu_to_vhost32(vq, len + datalen);
+ heads[0].id = cpu_to_vhost32(vq, d);
+ nheads[0] = headcount;
+ }
+
return headcount;
err:
vhost_discard_vq_desc(vq, headcount);
@@ -1098,6 +1125,8 @@ static void handle_rx(struct vhost_net *net)
{
struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX];
struct vhost_virtqueue *vq = &nvq->vq;
+ bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
+ unsigned int count = 0;
unsigned in, log;
struct vhost_log *vq_log;
struct msghdr msg = {
@@ -1145,12 +1174,13 @@ static void handle_rx(struct vhost_net *net)
do {
sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
- &busyloop_intr);
+ &busyloop_intr, &count);
if (!sock_len)
break;
sock_len += sock_hlen;
vhost_len = sock_len + vhost_hlen;
- headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx,
+ headcount = get_rx_bufs(nvq, vq->heads + count,
+ vq->nheads + count,
vhost_len, &in, vq_log, &log,
likely(mergeable) ? UIO_MAXIOV : 1);
/* On error, stop handling until the next kick. */
@@ -1226,8 +1256,11 @@ static void handle_rx(struct vhost_net *net)
goto out;
}
nvq->done_idx += headcount;
- if (nvq->done_idx > VHOST_NET_BATCH)
- vhost_net_signal_used(nvq);
+ count += in_order ? 1 : headcount;
+ if (nvq->done_idx > VHOST_NET_BATCH) {
+ vhost_net_signal_used(nvq, count);
+ count = 0;
+ }
if (unlikely(vq_log))
vhost_log_write(vq, vq_log, log, vhost_len,
vq->iov, in);
@@ -1239,7 +1272,7 @@ static void handle_rx(struct vhost_net *net)
else if (!sock_len)
vhost_net_enable_vq(net, vq);
out:
- vhost_net_signal_used(nvq);
+ vhost_net_signal_used(nvq, count);
mutex_unlock(&vq->mutex);
}
@@ -1612,16 +1645,23 @@ done:
return err;
}
-static int vhost_net_set_features(struct vhost_net *n, u64 features)
+static int vhost_net_set_features(struct vhost_net *n, const u64 *features)
{
size_t vhost_hlen, sock_hlen, hdr_len;
int i;
- hdr_len = (features & ((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
- (1ULL << VIRTIO_F_VERSION_1))) ?
- sizeof(struct virtio_net_hdr_mrg_rxbuf) :
- sizeof(struct virtio_net_hdr);
- if (features & (1 << VHOST_NET_F_VIRTIO_NET_HDR)) {
+ hdr_len = virtio_features_test_bit(features, VIRTIO_NET_F_MRG_RXBUF) ||
+ virtio_features_test_bit(features, VIRTIO_F_VERSION_1) ?
+ sizeof(struct virtio_net_hdr_mrg_rxbuf) :
+ sizeof(struct virtio_net_hdr);
+
+ if (virtio_features_test_bit(features,
+ VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO) ||
+ virtio_features_test_bit(features,
+ VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO))
+ hdr_len = sizeof(struct virtio_net_hdr_v1_hash_tunnel);
+
+ if (virtio_features_test_bit(features, VHOST_NET_F_VIRTIO_NET_HDR)) {
/* vhost provides vnet_hdr */
vhost_hlen = hdr_len;
sock_hlen = 0;
@@ -1631,18 +1671,19 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
sock_hlen = hdr_len;
}
mutex_lock(&n->dev.mutex);
- if ((features & (1 << VHOST_F_LOG_ALL)) &&
+ if (virtio_features_test_bit(features, VHOST_F_LOG_ALL) &&
!vhost_log_access_ok(&n->dev))
goto out_unlock;
- if ((features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) {
+ if (virtio_features_test_bit(features, VIRTIO_F_ACCESS_PLATFORM)) {
if (vhost_init_device_iotlb(&n->dev))
goto out_unlock;
}
for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
mutex_lock(&n->vqs[i].vq.mutex);
- n->vqs[i].vq.acked_features = features;
+ virtio_features_copy(n->vqs[i].vq.acked_features_array,
+ features);
n->vqs[i].vhost_hlen = vhost_hlen;
n->vqs[i].sock_hlen = sock_hlen;
mutex_unlock(&n->vqs[i].vq.mutex);
@@ -1679,12 +1720,13 @@ out:
static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
unsigned long arg)
{
+ u64 all_features[VIRTIO_FEATURES_DWORDS];
struct vhost_net *n = f->private_data;
void __user *argp = (void __user *)arg;
u64 __user *featurep = argp;
struct vhost_vring_file backend;
- u64 features;
- int r;
+ u64 features, count, copied;
+ int r, i;
switch (ioctl) {
case VHOST_NET_SET_BACKEND:
@@ -1692,16 +1734,60 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
return -EFAULT;
return vhost_net_set_backend(n, backend.index, backend.fd);
case VHOST_GET_FEATURES:
- features = VHOST_NET_FEATURES;
+ features = vhost_net_features[0];
if (copy_to_user(featurep, &features, sizeof features))
return -EFAULT;
return 0;
case VHOST_SET_FEATURES:
if (copy_from_user(&features, featurep, sizeof features))
return -EFAULT;
- if (features & ~VHOST_NET_FEATURES)
+ if (features & ~vhost_net_features[0])
return -EOPNOTSUPP;
- return vhost_net_set_features(n, features);
+
+ virtio_features_from_u64(all_features, features);
+ return vhost_net_set_features(n, all_features);
+ case VHOST_GET_FEATURES_ARRAY:
+ if (copy_from_user(&count, featurep, sizeof(count)))
+ return -EFAULT;
+
+ /* Copy the net features, up to the user-provided buffer size */
+ argp += sizeof(u64);
+ copied = min(count, VIRTIO_FEATURES_DWORDS);
+ if (copy_to_user(argp, vhost_net_features,
+ copied * sizeof(u64)))
+ return -EFAULT;
+
+ /* Zero the trailing space provided by user-space, if any */
+ if (clear_user(argp, size_mul(count - copied, sizeof(u64))))
+ return -EFAULT;
+ return 0;
+ case VHOST_SET_FEATURES_ARRAY:
+ if (copy_from_user(&count, featurep, sizeof(count)))
+ return -EFAULT;
+
+ virtio_features_zero(all_features);
+ argp += sizeof(u64);
+ copied = min(count, VIRTIO_FEATURES_DWORDS);
+ if (copy_from_user(all_features, argp, copied * sizeof(u64)))
+ return -EFAULT;
+
+ /*
+ * Any feature specified by user-space above
+ * VIRTIO_FEATURES_MAX is not supported by definition.
+ */
+ for (i = copied; i < count; ++i) {
+ if (copy_from_user(&features, featurep + 1 + i,
+ sizeof(features)))
+ return -EFAULT;
+ if (features)
+ return -EOPNOTSUPP;
+ }
+
+ for (i = 0; i < VIRTIO_FEATURES_DWORDS; i++)
+ if (all_features[i] & ~vhost_net_features[i])
+ return -EOPNOTSUPP;
+
+ return vhost_net_set_features(n, all_features);
case VHOST_GET_BACKEND_FEATURES:
features = VHOST_NET_BACKEND_FEATURES;
if (copy_to_user(featurep, &features, sizeof(features)))
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 63b0829391eb..98e4f68f4e3c 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -152,7 +152,7 @@ struct vhost_scsi_nexus {
struct vhost_scsi_tpg {
/* Vhost port target portal group tag for TCM */
u16 tport_tpgt;
- /* Used to track number of TPG Port/Lun Links wrt to explict I_T Nexus shutdown */
+ /* Used to track number of TPG Port/Lun Links wrt to explicit I_T Nexus shutdown */
int tv_tpg_port_count;
/* Used for vhost_scsi device reference to tpg_nexus, protected by tv_tpg_mutex */
int tv_tpg_vhost_count;
@@ -311,12 +311,12 @@ static void vhost_scsi_init_inflight(struct vhost_scsi *vs,
mutex_lock(&vq->mutex);
- /* store old infight */
+ /* store old inflight */
idx = vs->vqs[i].inflight_idx;
if (old_inflight)
old_inflight[i] = &vs->vqs[i].inflights[idx];
- /* setup new infight */
+ /* setup new inflight */
vs->vqs[i].inflight_idx = idx ^ 1;
new_inflight = &vs->vqs[i].inflights[idx ^ 1];
kref_init(&new_inflight->kref);
@@ -1247,7 +1247,7 @@ vhost_scsi_setup_resp_iovs(struct vhost_scsi_cmd *cmd, struct iovec *in_iovs,
if (!in_iovs_cnt)
return 0;
/*
- * Initiator's normally just put the virtio_scsi_cmd_resp in the first
+ * Initiators normally just put the virtio_scsi_cmd_resp in the first
* iov, but just in case they wedged in some data with it we check for
* greater than or equal to the response struct.
*/
@@ -1455,7 +1455,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
cmd = vhost_scsi_get_cmd(vq, tag);
if (IS_ERR(cmd)) {
ret = PTR_ERR(cmd);
- vq_err(vq, "vhost_scsi_get_tag failed %dd\n", ret);
+ vq_err(vq, "vhost_scsi_get_tag failed %d\n", ret);
goto err;
}
cmd->tvc_vq = vq;
@@ -2607,7 +2607,7 @@ static int vhost_scsi_make_nexus(struct vhost_scsi_tpg *tpg,
return -ENOMEM;
}
/*
- * Since we are running in 'demo mode' this call with generate a
+ * Since we are running in 'demo mode' this call will generate a
* struct se_node_acl for the vhost_scsi struct se_portal_group with
* the SCSI Initiator port name of the passed configfs group 'name'.
*/
@@ -2884,7 +2884,7 @@ vhost_scsi_make_tport(struct target_fabric_configfs *tf,
check_len:
if (strlen(name) >= VHOST_SCSI_NAMELEN) {
pr_err("Emulated %s Address: %s, exceeds"
- " max: %d\n", name, vhost_scsi_dump_proto_id(tport),
+ " max: %d\n", vhost_scsi_dump_proto_id(tport), name,
VHOST_SCSI_NAMELEN);
kfree(tport);
return ERR_PTR(-EINVAL);
@@ -2913,7 +2913,7 @@ static ssize_t
vhost_scsi_wwn_version_show(struct config_item *item, char *page)
{
return sysfs_emit(page, "TCM_VHOST fabric module %s on %s/%s"
- "on "UTS_RELEASE"\n", VHOST_SCSI_VERSION, utsname()->sysname,
+ " on "UTS_RELEASE"\n", VHOST_SCSI_VERSION, utsname()->sysname,
utsname()->machine);
}
@@ -2981,13 +2981,13 @@ out_vhost_scsi_deregister:
vhost_scsi_deregister();
out:
return ret;
-};
+}
static void vhost_scsi_exit(void)
{
target_unregister_template(&vhost_scsi_ops);
vhost_scsi_deregister();
-};
+}
MODULE_DESCRIPTION("VHOST_SCSI series fabric driver");
MODULE_ALIAS("tcm_vhost");
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 5a49b5a6d496..af1e1fdfd9ed 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -212,11 +212,11 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
if (!vq->call_ctx.ctx)
return;
- vq->call_ctx.producer.irq = irq;
- ret = irq_bypass_register_producer(&vq->call_ctx.producer);
+ ret = irq_bypass_register_producer(&vq->call_ctx.producer,
+ vq->call_ctx.ctx, irq);
if (unlikely(ret))
- dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret = %d\n",
- qid, vq->call_ctx.producer.token, ret);
+ dev_info(&v->dev, "vq %u, irq bypass producer (eventfd %p) registration fails, ret = %d\n",
+ qid, vq->call_ctx.ctx, ret);
}
static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
@@ -712,7 +712,6 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
if (ops->get_status(vdpa) &
VIRTIO_CONFIG_S_DRIVER_OK)
vhost_vdpa_unsetup_vq_irq(v, idx);
- vq->call_ctx.producer.token = NULL;
}
break;
}
@@ -753,7 +752,6 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
cb.callback = vhost_vdpa_virtqueue_cb;
cb.private = vq;
cb.trigger = vq->call_ctx.ctx;
- vq->call_ctx.producer.token = vq->call_ctx.ctx;
if (ops->get_status(vdpa) &
VIRTIO_CONFIG_S_DRIVER_OK)
vhost_vdpa_setup_vq_irq(v, idx);
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 478eca3cf113..8570fdf2e14a 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -372,6 +372,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->avail = NULL;
vq->used = NULL;
vq->last_avail_idx = 0;
+ vq->next_avail_head = 0;
vq->avail_idx = 0;
vq->last_used_idx = 0;
vq->signalled_used = 0;
@@ -380,7 +381,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->log_used = false;
vq->log_addr = -1ull;
vq->private_data = NULL;
- vq->acked_features = 0;
+ virtio_features_zero(vq->acked_features_array);
vq->acked_backend_features = 0;
vq->log_base = NULL;
vq->error_ctx = NULL;
@@ -501,6 +502,8 @@ static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
vq->log = NULL;
kfree(vq->heads);
vq->heads = NULL;
+ kfree(vq->nheads);
+ vq->nheads = NULL;
}
/* Helper to allocate iovec buffers for all vqs. */
@@ -518,7 +521,9 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
GFP_KERNEL);
vq->heads = kmalloc_array(dev->iov_limit, sizeof(*vq->heads),
GFP_KERNEL);
- if (!vq->indirect || !vq->log || !vq->heads)
+ vq->nheads = kmalloc_array(dev->iov_limit, sizeof(*vq->nheads),
+ GFP_KERNEL);
+ if (!vq->indirect || !vq->log || !vq->heads || !vq->nheads)
goto err_nomem;
}
return 0;
@@ -610,6 +615,7 @@ void vhost_dev_init(struct vhost_dev *dev,
vq->log = NULL;
vq->indirect = NULL;
vq->heads = NULL;
+ vq->nheads = NULL;
vq->dev = dev;
mutex_init(&vq->mutex);
vhost_vq_reset(dev, vq);
@@ -681,10 +687,10 @@ static void vhost_attach_mm(struct vhost_dev *dev)
if (dev->use_worker) {
dev->mm = get_task_mm(current);
} else {
- /* vDPA device does not use worker thead, so there's
- * no need to hold the address space for mm. This help
+ /* vDPA device does not use worker thread, so there's
+ * no need to hold the address space for mm. This helps
* to avoid deadlock in the case of mmap() which may
- * held the refcnt of the file and depends on release
+ * hold the refcnt of the file and depends on release
* method to remove vma.
*/
dev->mm = current->mm;
@@ -891,7 +897,7 @@ static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq,
* We don't want to call synchronize_rcu for every vq during setup
* because it will slow down VM startup. If we haven't done
* VHOST_SET_VRING_KICK and not done the driver specific
- * SET_ENDPOINT/RUNNUNG then we can skip the sync since there will
+ * SET_ENDPOINT/RUNNING then we can skip the sync since there will
* not be any works queued for scsi and net.
*/
mutex_lock(&vq->mutex);
@@ -2159,14 +2165,15 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
break;
}
if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
- vq->last_avail_idx = s.num & 0xffff;
+ vq->next_avail_head = vq->last_avail_idx =
+ s.num & 0xffff;
vq->last_used_idx = (s.num >> 16) & 0xffff;
} else {
if (s.num > 0xffff) {
r = -EINVAL;
break;
}
- vq->last_avail_idx = s.num;
+ vq->next_avail_head = vq->last_avail_idx = s.num;
}
/* Forget the cached index value. */
vq->avail_idx = vq->last_avail_idx;
@@ -2798,11 +2805,12 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
unsigned int *out_num, unsigned int *in_num,
struct vhost_log *log, unsigned int *log_num)
{
+ bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
struct vring_desc desc;
unsigned int i, head, found = 0;
u16 last_avail_idx = vq->last_avail_idx;
__virtio16 ring_head;
- int ret, access;
+ int ret, access, c = 0;
if (vq->avail_idx == vq->last_avail_idx) {
ret = vhost_get_avail_idx(vq);
@@ -2813,17 +2821,21 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
return vq->num;
}
- /* Grab the next descriptor number they're advertising, and increment
- * the index we've seen. */
- if (unlikely(vhost_get_avail_head(vq, &ring_head, last_avail_idx))) {
- vq_err(vq, "Failed to read head: idx %d address %p\n",
- last_avail_idx,
- &vq->avail->ring[last_avail_idx % vq->num]);
- return -EFAULT;
+ if (in_order)
+ head = vq->next_avail_head & (vq->num - 1);
+ else {
+ /* Grab the next descriptor number they're
+ * advertising, and increment the index we've seen. */
+ if (unlikely(vhost_get_avail_head(vq, &ring_head,
+ last_avail_idx))) {
+ vq_err(vq, "Failed to read head: idx %d address %p\n",
+ last_avail_idx,
+ &vq->avail->ring[last_avail_idx % vq->num]);
+ return -EFAULT;
+ }
+ head = vhost16_to_cpu(vq, ring_head);
}
- head = vhost16_to_cpu(vq, ring_head);
-
/* If their number is silly, that's an error. */
if (unlikely(head >= vq->num)) {
vq_err(vq, "Guest says index %u > %u is available",
@@ -2866,6 +2878,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
"in indirect descriptor at idx %d\n", i);
return ret;
}
+ ++c;
continue;
}
@@ -2901,10 +2914,12 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
}
*out_num += ret;
}
+ ++c;
} while ((i = next_desc(vq, &desc)) != -1);
/* On success, increment avail index. */
vq->last_avail_idx++;
+ vq->next_avail_head += c;
/* Assume notifications from guest are disabled at this point,
* if they aren't we would need to update avail_event index. */
@@ -2928,8 +2943,9 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
cpu_to_vhost32(vq, head),
cpu_to_vhost32(vq, len)
};
+ u16 nheads = 1;
- return vhost_add_used_n(vq, &heads, 1);
+ return vhost_add_used_n(vq, &heads, &nheads, 1);
}
EXPORT_SYMBOL_GPL(vhost_add_used);
@@ -2965,10 +2981,9 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
return 0;
}
-/* After we've used one of their buffers, we tell them about it. We'll then
- * want to notify the guest, using eventfd. */
-int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
- unsigned count)
+static int vhost_add_used_n_ooo(struct vhost_virtqueue *vq,
+ struct vring_used_elem *heads,
+ unsigned count)
{
int start, n, r;
@@ -2981,7 +2996,69 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
heads += n;
count -= n;
}
- r = __vhost_add_used_n(vq, heads, count);
+ return __vhost_add_used_n(vq, heads, count);
+}
+
+static int vhost_add_used_n_in_order(struct vhost_virtqueue *vq,
+ struct vring_used_elem *heads,
+ const u16 *nheads,
+ unsigned count)
+{
+ vring_used_elem_t __user *used;
+ u16 old, new = vq->last_used_idx;
+ int start, i;
+
+ if (!nheads)
+ return -EINVAL;
+
+ start = vq->last_used_idx & (vq->num - 1);
+ used = vq->used->ring + start;
+
+ for (i = 0; i < count; i++) {
+ if (vhost_put_used(vq, &heads[i], start, 1)) {
+ vq_err(vq, "Failed to write used");
+ return -EFAULT;
+ }
+ start += nheads[i];
+ new += nheads[i];
+ if (start >= vq->num)
+ start -= vq->num;
+ }
+
+ if (unlikely(vq->log_used)) {
+ /* Make sure data is seen before log. */
+ smp_wmb();
+ /* Log used ring entry write. */
+ log_used(vq, ((void __user *)used - (void __user *)vq->used),
+ (vq->num - start) * sizeof *used);
+ if (start + count > vq->num)
+ log_used(vq, 0,
+ (start + count - vq->num) * sizeof *used);
+ }
+
+ old = vq->last_used_idx;
+ vq->last_used_idx = new;
+ /* If the driver never bothers to signal in a very long while,
+ * used index might wrap around. If that happens, invalidate
+ * signalled_used index we stored. TODO: make sure driver
+ * signals at least once in 2^16 and remove this. */
+ if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old)))
+ vq->signalled_used_valid = false;
+ return 0;
+}
+
+/* After we've used one of their buffers, we tell them about it. We'll then
+ * want to notify the guest, using eventfd. */
+int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
+ u16 *nheads, unsigned count)
+{
+ bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
+ int r;
+
+ if (!in_order || !nheads)
+ r = vhost_add_used_n_ooo(vq, heads, count);
+ else
+ r = vhost_add_used_n_in_order(vq, heads, nheads, count);
if (r < 0)
return r;
@@ -3064,14 +3141,16 @@ EXPORT_SYMBOL_GPL(vhost_add_used_and_signal);
/* multi-buffer version of vhost_add_used_and_signal */
void vhost_add_used_and_signal_n(struct vhost_dev *dev,
struct vhost_virtqueue *vq,
- struct vring_used_elem *heads, unsigned count)
+ struct vring_used_elem *heads,
+ u16 *nheads,
+ unsigned count)
{
- vhost_add_used_n(vq, heads, count);
+ vhost_add_used_n(vq, heads, nheads, count);
vhost_signal(dev, vq);
}
EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n);
-/* return true if we're sure that avaiable ring is empty */
+/* return true if we're sure that available ring is empty */
bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
{
int r;
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index ab704d84fb34..621a6d9a8791 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -115,6 +115,8 @@ struct vhost_virtqueue {
* Values are limited to 0x7fff, and the high bit is used as
* a wrap counter when using VIRTIO_F_RING_PACKED. */
u16 last_avail_idx;
+ /* Next avail ring head when VIRTIO_F_IN_ORDER is negoitated */
+ u16 next_avail_head;
/* Caches available index value from user. */
u16 avail_idx;
@@ -141,11 +143,12 @@ struct vhost_virtqueue {
struct iovec iotlb_iov[64];
struct iovec *indirect;
struct vring_used_elem *heads;
+ u16 *nheads;
/* Protected by virtqueue mutex. */
struct vhost_iotlb *umem;
struct vhost_iotlb *iotlb;
void *private_data;
- u64 acked_features;
+ VIRTIO_DECLARE_FEATURES(acked_features);
u64 acked_backend_features;
/* Log write descriptors */
void __user *log_base;
@@ -235,11 +238,12 @@ bool vhost_vq_is_setup(struct vhost_virtqueue *vq);
int vhost_vq_init_access(struct vhost_virtqueue *);
int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len);
int vhost_add_used_n(struct vhost_virtqueue *, struct vring_used_elem *heads,
- unsigned count);
+ u16 *nheads, unsigned count);
void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *,
unsigned int id, int len);
void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *,
- struct vring_used_elem *heads, unsigned count);
+ struct vring_used_elem *heads, u16 *nheads,
+ unsigned count);
void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *);
void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *);
bool vhost_vq_avail_empty(struct vhost_dev *, struct vhost_virtqueue *);
@@ -313,7 +317,7 @@ static inline void *vhost_vq_get_backend(struct vhost_virtqueue *vq)
static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit)
{
- return vq->acked_features & (1ULL << bit);
+ return virtio_features_test_bit(vq->acked_features_array, bit);
}
static inline bool vhost_backend_has_feature(struct vhost_virtqueue *vq, int bit)
diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index bbce65452701..925858cc6096 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -780,22 +780,6 @@ ssize_t vringh_iov_push_user(struct vringh_iov *wiov,
EXPORT_SYMBOL(vringh_iov_push_user);
/**
- * vringh_abandon_user - we've decided not to handle the descriptor(s).
- * @vrh: the vring.
- * @num: the number of descriptors to put back (ie. num
- * vringh_get_user() to undo).
- *
- * The next vringh_get_user() will return the old descriptor(s) again.
- */
-void vringh_abandon_user(struct vringh *vrh, unsigned int num)
-{
- /* We only update vring_avail_event(vr) when we want to be notified,
- * so we haven't changed that yet. */
- vrh->last_avail_idx -= num;
-}
-EXPORT_SYMBOL(vringh_abandon_user);
-
-/**
* vringh_complete_user - we've finished with descriptor, publish it.
* @vrh: the vring.
* @head: the head as filled in by vringh_getdesc_user.
@@ -900,20 +884,6 @@ static inline int putused_kern(const struct vringh *vrh,
return 0;
}
-static inline int xfer_kern(const struct vringh *vrh, void *src,
- void *dst, size_t len)
-{
- memcpy(dst, src, len);
- return 0;
-}
-
-static inline int kern_xfer(const struct vringh *vrh, void *dst,
- void *src, size_t len)
-{
- memcpy(dst, src, len);
- return 0;
-}
-
/**
* vringh_init_kern - initialize a vringh for a kernelspace vring.
* @vrh: the vringh to initialize.
@@ -999,51 +969,6 @@ int vringh_getdesc_kern(struct vringh *vrh,
EXPORT_SYMBOL(vringh_getdesc_kern);
/**
- * vringh_iov_pull_kern - copy bytes from vring_iov.
- * @riov: the riov as passed to vringh_getdesc_kern() (updated as we consume)
- * @dst: the place to copy.
- * @len: the maximum length to copy.
- *
- * Returns the bytes copied <= len or a negative errno.
- */
-ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len)
-{
- return vringh_iov_xfer(NULL, riov, dst, len, xfer_kern);
-}
-EXPORT_SYMBOL(vringh_iov_pull_kern);
-
-/**
- * vringh_iov_push_kern - copy bytes into vring_iov.
- * @wiov: the wiov as passed to vringh_getdesc_kern() (updated as we consume)
- * @src: the place to copy from.
- * @len: the maximum length to copy.
- *
- * Returns the bytes copied <= len or a negative errno.
- */
-ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov,
- const void *src, size_t len)
-{
- return vringh_iov_xfer(NULL, wiov, (void *)src, len, kern_xfer);
-}
-EXPORT_SYMBOL(vringh_iov_push_kern);
-
-/**
- * vringh_abandon_kern - we've decided not to handle the descriptor(s).
- * @vrh: the vring.
- * @num: the number of descriptors to put back (ie. num
- * vringh_get_kern() to undo).
- *
- * The next vringh_get_kern() will return the old descriptor(s) again.
- */
-void vringh_abandon_kern(struct vringh *vrh, unsigned int num)
-{
- /* We only update vring_avail_event(vr) when we want to be notified,
- * so we haven't changed that yet. */
- vrh->last_avail_idx -= num;
-}
-EXPORT_SYMBOL(vringh_abandon_kern);
-
-/**
* vringh_complete_kern - we've finished with descriptor, publish it.
* @vrh: the vring.
* @head: the head as filled in by vringh_getdesc_kern.
@@ -1190,6 +1115,7 @@ static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
struct iov_iter iter;
u64 translated;
int ret;
+ size_t size;
ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
len - total_translated, &translated,
@@ -1207,9 +1133,9 @@ static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
translated);
}
- ret = copy_from_iter(dst, translated, &iter);
- if (ret < 0)
- return ret;
+ size = copy_from_iter(dst, translated, &iter);
+ if (size != translated)
+ return -EFAULT;
src += translated;
dst += translated;
@@ -1236,6 +1162,7 @@ static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
struct iov_iter iter;
u64 translated;
int ret;
+ size_t size;
ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
len - total_translated, &translated,
@@ -1253,9 +1180,9 @@ static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
translated);
}
- ret = copy_to_iter(src, translated, &iter);
- if (ret < 0)
- return ret;
+ size = copy_to_iter(src, translated, &iter);
+ if (size != translated)
+ return -EFAULT;
src += translated;
dst += translated;
@@ -1535,23 +1462,6 @@ ssize_t vringh_iov_push_iotlb(struct vringh *vrh,
EXPORT_SYMBOL(vringh_iov_push_iotlb);
/**
- * vringh_abandon_iotlb - we've decided not to handle the descriptor(s).
- * @vrh: the vring.
- * @num: the number of descriptors to put back (ie. num
- * vringh_get_iotlb() to undo).
- *
- * The next vringh_get_iotlb() will return the old descriptor(s) again.
- */
-void vringh_abandon_iotlb(struct vringh *vrh, unsigned int num)
-{
- /* We only update vring_avail_event(vr) when we want to be notified,
- * so we haven't changed that yet.
- */
- vrh->last_avail_idx -= num;
-}
-EXPORT_SYMBOL(vringh_abandon_iotlb);
-
-/**
* vringh_complete_iotlb - we've finished with descriptor, publish it.
* @vrh: the vring.
* @head: the head as filled in by vringh_getdesc_iotlb.
@@ -1572,32 +1482,6 @@ int vringh_complete_iotlb(struct vringh *vrh, u16 head, u32 len)
EXPORT_SYMBOL(vringh_complete_iotlb);
/**
- * vringh_notify_enable_iotlb - we want to know if something changes.
- * @vrh: the vring.
- *
- * This always enables notifications, but returns false if there are
- * now more buffers available in the vring.
- */
-bool vringh_notify_enable_iotlb(struct vringh *vrh)
-{
- return __vringh_notify_enable(vrh, getu16_iotlb, putu16_iotlb);
-}
-EXPORT_SYMBOL(vringh_notify_enable_iotlb);
-
-/**
- * vringh_notify_disable_iotlb - don't tell us if something changes.
- * @vrh: the vring.
- *
- * This is our normal running state: we disable and then only enable when
- * we're going to sleep.
- */
-void vringh_notify_disable_iotlb(struct vringh *vrh)
-{
- __vringh_notify_disable(vrh, putu16_iotlb);
-}
-EXPORT_SYMBOL(vringh_notify_disable_iotlb);
-
-/**
* vringh_need_notify_iotlb - must we tell the other side about used buffers?
* @vrh: the vring we've called vringh_complete_iotlb() on.
*
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 66a0f060770e..ae01457ea2cd 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -344,7 +344,8 @@ vhost_vsock_alloc_skb(struct vhost_virtqueue *vq,
len = iov_length(vq->iov, out);
- if (len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE + VIRTIO_VSOCK_SKB_HEADROOM)
+ if (len < VIRTIO_VSOCK_SKB_HEADROOM ||
+ len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE + VIRTIO_VSOCK_SKB_HEADROOM)
return NULL;
/* len contains both payload and hdr */
@@ -375,12 +376,10 @@ vhost_vsock_alloc_skb(struct vhost_virtqueue *vq,
return NULL;
}
- virtio_vsock_skb_rx_put(skb);
+ virtio_vsock_skb_put(skb, payload_len);
- nbytes = copy_from_iter(skb->data, payload_len, &iov_iter);
- if (nbytes != payload_len) {
- vq_err(vq, "Expected %zu byte payload, got %zu bytes\n",
- payload_len, nbytes);
+ if (skb_copy_datagram_from_iter(skb, 0, &iov_iter, payload_len)) {
+ vq_err(vq, "Failed to copy %zu byte payload\n", payload_len);
kfree_skb(skb);
return NULL;
}