diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-11 23:20:31 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-11 23:20:31 +0300 |
commit | 6b9e2cea428cf7af93a84bcb865e478d8bf1c165 (patch) | |
tree | 11be387e37129fce0c4c111803df1a2e56637b60 /drivers | |
parent | 14ba9a2e4bacc6f5a0dbe0de5390daedd544508f (diff) | |
parent | f01a2a811ae04124fc9382925038fcbbd2f0b7c8 (diff) | |
download | linux-6b9e2cea428cf7af93a84bcb865e478d8bf1c165.tar.xz |
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio updates from Michael Tsirkin:
"virtio: virtio 1.0 support, misc patches
This adds a lot of infrastructure for virtio 1.0 support. Notable
missing pieces: virtio pci, virtio balloon (needs spec extension),
vhost scsi.
Plus, there are some minor fixes in a couple of places.
Note: some net drivers are affected by these patches. David said he's
fine with merging these patches through my tree.
Rusty's on vacation, he acked using my tree for these, too"
* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (70 commits)
virtio_ccw: finalize_features error handling
virtio_ccw: future-proof finalize_features
virtio_pci: rename virtio_pci -> virtio_pci_common
virtio_pci: update file descriptions and copyright
virtio_pci: split out legacy device support
virtio_pci: setup config vector indirectly
virtio_pci: setup vqs indirectly
virtio_pci: delete vqs indirectly
virtio_pci: use priv for vq notification
virtio_pci: free up vq->priv
virtio_pci: fix coding style for structs
virtio_pci: add isr field
virtio: drop legacy_only driver flag
virtio_balloon: drop legacy_only driver flag
virtio_ccw: rev 1 devices set VIRTIO_F_VERSION_1
virtio: allow finalize_features to fail
virtio_ccw: legacy: don't negotiate rev 1/features
virtio: add API to detect legacy devices
virtio_console: fix sparse warnings
vhost: remove unnecessary forward declarations in vhost.h
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/block/virtio_blk.c | 74 | ||||
-rw-r--r-- | drivers/char/virtio_console.c | 39 | ||||
-rw-r--r-- | drivers/lguest/lguest_device.c | 17 | ||||
-rw-r--r-- | drivers/misc/mic/card/mic_virtio.c | 14 | ||||
-rw-r--r-- | drivers/net/macvtap.c | 68 | ||||
-rw-r--r-- | drivers/net/tun.c | 168 | ||||
-rw-r--r-- | drivers/net/virtio_net.c | 161 | ||||
-rw-r--r-- | drivers/remoteproc/remoteproc_virtio.c | 11 | ||||
-rw-r--r-- | drivers/s390/kvm/kvm_virtio.c | 11 | ||||
-rw-r--r-- | drivers/s390/kvm/virtio_ccw.c | 203 | ||||
-rw-r--r-- | drivers/scsi/virtio_scsi.c | 50 | ||||
-rw-r--r-- | drivers/vhost/net.c | 31 | ||||
-rw-r--r-- | drivers/vhost/scsi.c | 22 | ||||
-rw-r--r-- | drivers/vhost/vhost.c | 93 | ||||
-rw-r--r-- | drivers/vhost/vhost.h | 41 | ||||
-rw-r--r-- | drivers/virtio/Makefile | 1 | ||||
-rw-r--r-- | drivers/virtio/virtio.c | 102 | ||||
-rw-r--r-- | drivers/virtio/virtio_mmio.c | 17 | ||||
-rw-r--r-- | drivers/virtio/virtio_pci.c | 802 | ||||
-rw-r--r-- | drivers/virtio/virtio_pci_common.c | 464 | ||||
-rw-r--r-- | drivers/virtio/virtio_pci_common.h | 136 | ||||
-rw-r--r-- | drivers/virtio/virtio_pci_legacy.c | 326 | ||||
-rw-r--r-- | drivers/virtio/virtio_ring.c | 109 |
23 files changed, 1696 insertions, 1264 deletions
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index c6a27d54ad62..1fb9e09fbbc5 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -80,7 +80,7 @@ static int __virtblk_add_req(struct virtqueue *vq, { struct scatterlist hdr, status, cmd, sense, inhdr, *sgs[6]; unsigned int num_out = 0, num_in = 0; - int type = vbr->out_hdr.type & ~VIRTIO_BLK_T_OUT; + __virtio32 type = vbr->out_hdr.type & ~cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT); sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); sgs[num_out++] = &hdr; @@ -91,19 +91,19 @@ static int __virtblk_add_req(struct virtqueue *vq, * block, and before the normal inhdr we put the sense data and the * inhdr with additional status information. */ - if (type == VIRTIO_BLK_T_SCSI_CMD) { + if (type == cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_SCSI_CMD)) { sg_init_one(&cmd, vbr->req->cmd, vbr->req->cmd_len); sgs[num_out++] = &cmd; } if (have_data) { - if (vbr->out_hdr.type & VIRTIO_BLK_T_OUT) + if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT)) sgs[num_out++] = data_sg; else sgs[num_out + num_in++] = data_sg; } - if (type == VIRTIO_BLK_T_SCSI_CMD) { + if (type == cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_SCSI_CMD)) { sg_init_one(&sense, vbr->req->sense, SCSI_SENSE_BUFFERSIZE); sgs[num_out + num_in++] = &sense; sg_init_one(&inhdr, &vbr->in_hdr, sizeof(vbr->in_hdr)); @@ -119,12 +119,13 @@ static int __virtblk_add_req(struct virtqueue *vq, static inline void virtblk_request_done(struct request *req) { struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); + struct virtio_blk *vblk = req->q->queuedata; int error = virtblk_result(vbr); if (req->cmd_type == REQ_TYPE_BLOCK_PC) { - req->resid_len = vbr->in_hdr.residual; - req->sense_len = vbr->in_hdr.sense_len; - req->errors = vbr->in_hdr.errors; + req->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual); + req->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len); + req->errors = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors); } else if (req->cmd_type == REQ_TYPE_SPECIAL) { req->errors = (error != 0); } @@ -173,25 +174,25 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req, vbr->req = req; if (req->cmd_flags & REQ_FLUSH) { - vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH; + vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_FLUSH); vbr->out_hdr.sector = 0; - vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); + vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req)); } else { switch (req->cmd_type) { case REQ_TYPE_FS: vbr->out_hdr.type = 0; - vbr->out_hdr.sector = blk_rq_pos(vbr->req); - vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); + vbr->out_hdr.sector = cpu_to_virtio64(vblk->vdev, blk_rq_pos(vbr->req)); + vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req)); break; case REQ_TYPE_BLOCK_PC: - vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD; + vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_SCSI_CMD); vbr->out_hdr.sector = 0; - vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); + vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req)); break; case REQ_TYPE_SPECIAL: - vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID; + vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID); vbr->out_hdr.sector = 0; - vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); + vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req)); break; default: /* We don't put anything else in the queue. */ @@ -204,9 +205,9 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req, num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg); if (num) { if (rq_data_dir(vbr->req) == WRITE) - vbr->out_hdr.type |= VIRTIO_BLK_T_OUT; + vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT); else - vbr->out_hdr.type |= VIRTIO_BLK_T_IN; + vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN); } spin_lock_irqsave(&vblk->vqs[qid].lock, flags); @@ -331,7 +332,8 @@ static ssize_t virtblk_serial_show(struct device *dev, return err; } -DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL); + +static DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL); static void virtblk_config_changed_work(struct work_struct *work) { @@ -476,7 +478,8 @@ static int virtblk_get_cache_mode(struct virtio_device *vdev) struct virtio_blk_config, wce, &writeback); if (err) - writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE); + writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE) || + virtio_has_feature(vdev, VIRTIO_F_VERSION_1); return writeback; } @@ -821,25 +824,34 @@ static const struct virtio_device_id id_table[] = { { 0 }, }; -static unsigned int features[] = { +static unsigned int features_legacy[] = { VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, VIRTIO_BLK_F_MQ, +} +; +static unsigned int features[] = { + VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, + VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, + VIRTIO_BLK_F_TOPOLOGY, + VIRTIO_BLK_F_MQ, }; static struct virtio_driver virtio_blk = { - .feature_table = features, - .feature_table_size = ARRAY_SIZE(features), - .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, - .id_table = id_table, - .probe = virtblk_probe, - .remove = virtblk_remove, - .config_changed = virtblk_config_changed, + .feature_table = features, + .feature_table_size = ARRAY_SIZE(features), + .feature_table_legacy = features_legacy, + .feature_table_size_legacy = ARRAY_SIZE(features_legacy), + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = virtblk_probe, + .remove = virtblk_remove, + .config_changed = virtblk_config_changed, #ifdef CONFIG_PM_SLEEP - .freeze = virtblk_freeze, - .restore = virtblk_restore, + .freeze = virtblk_freeze, + .restore = virtblk_restore, #endif }; @@ -871,8 +883,8 @@ out_destroy_workqueue: static void __exit fini(void) { - unregister_blkdev(major, "virtblk"); unregister_virtio_driver(&virtio_blk); + unregister_blkdev(major, "virtblk"); destroy_workqueue(virtblk_wq); } module_init(init); diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index cf7a561fad7c..de03df9dd7c9 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -355,7 +355,7 @@ static inline bool use_multiport(struct ports_device *portdev) */ if (!portdev->vdev) return 0; - return portdev->vdev->features[0] & (1 << VIRTIO_CONSOLE_F_MULTIPORT); + return __virtio_test_bit(portdev->vdev, VIRTIO_CONSOLE_F_MULTIPORT); } static DEFINE_SPINLOCK(dma_bufs_lock); @@ -566,9 +566,9 @@ static ssize_t __send_control_msg(struct ports_device *portdev, u32 port_id, if (!use_multiport(portdev)) return 0; - cpkt.id = port_id; - cpkt.event = event; - cpkt.value = value; + cpkt.id = cpu_to_virtio32(portdev->vdev, port_id); + cpkt.event = cpu_to_virtio16(portdev->vdev, event); + cpkt.value = cpu_to_virtio16(portdev->vdev, value); vq = portdev->c_ovq; @@ -669,8 +669,8 @@ done: * Give out the data that's requested from the buffer that we have * queued up. */ -static ssize_t fill_readbuf(struct port *port, char *out_buf, size_t out_count, - bool to_user) +static ssize_t fill_readbuf(struct port *port, char __user *out_buf, + size_t out_count, bool to_user) { struct port_buffer *buf; unsigned long flags; @@ -688,7 +688,8 @@ static ssize_t fill_readbuf(struct port *port, char *out_buf, size_t out_count, if (ret) return -EFAULT; } else { - memcpy(out_buf, buf->buf + buf->offset, out_count); + memcpy((__force char *)out_buf, buf->buf + buf->offset, + out_count); } buf->offset += out_count; @@ -1162,7 +1163,7 @@ static int get_chars(u32 vtermno, char *buf, int count) /* If we don't have an input queue yet, we can't get input. */ BUG_ON(!port->in_vq); - return fill_readbuf(port, buf, count, false); + return fill_readbuf(port, (__force char __user *)buf, count, false); } static void resize_console(struct port *port) @@ -1602,7 +1603,8 @@ static void unplug_port(struct port *port) } /* Any private messages that the Host and Guest want to share */ -static void handle_control_message(struct ports_device *portdev, +static void handle_control_message(struct virtio_device *vdev, + struct ports_device *portdev, struct port_buffer *buf) { struct virtio_console_control *cpkt; @@ -1612,15 +1614,16 @@ static void handle_control_message(struct ports_device *portdev, cpkt = (struct virtio_console_control *)(buf->buf + buf->offset); - port = find_port_by_id(portdev, cpkt->id); - if (!port && cpkt->event != VIRTIO_CONSOLE_PORT_ADD) { + port = find_port_by_id(portdev, virtio32_to_cpu(vdev, cpkt->id)); + if (!port && + cpkt->event != cpu_to_virtio16(vdev, VIRTIO_CONSOLE_PORT_ADD)) { /* No valid header at start of buffer. Drop it. */ dev_dbg(&portdev->vdev->dev, "Invalid index %u in control packet\n", cpkt->id); return; } - switch (cpkt->event) { + switch (virtio16_to_cpu(vdev, cpkt->event)) { case VIRTIO_CONSOLE_PORT_ADD: if (port) { dev_dbg(&portdev->vdev->dev, @@ -1628,13 +1631,15 @@ static void handle_control_message(struct ports_device *portdev, send_control_msg(port, VIRTIO_CONSOLE_PORT_READY, 1); break; } - if (cpkt->id >= portdev->config.max_nr_ports) { + if (virtio32_to_cpu(vdev, cpkt->id) >= + portdev->config.max_nr_ports) { dev_warn(&portdev->vdev->dev, - "Request for adding port with out-of-bound id %u, max. supported id: %u\n", + "Request for adding port with " + "out-of-bound id %u, max. supported id: %u\n", cpkt->id, portdev->config.max_nr_ports - 1); break; } - add_port(portdev, cpkt->id); + add_port(portdev, virtio32_to_cpu(vdev, cpkt->id)); break; case VIRTIO_CONSOLE_PORT_REMOVE: unplug_port(port); @@ -1670,7 +1675,7 @@ static void handle_control_message(struct ports_device *portdev, break; } case VIRTIO_CONSOLE_PORT_OPEN: - port->host_connected = cpkt->value; + port->host_connected = virtio16_to_cpu(vdev, cpkt->value); wake_up_interruptible(&port->waitqueue); /* * If the host port got closed and the host had any @@ -1752,7 +1757,7 @@ static void control_work_handler(struct work_struct *work) buf->len = len; buf->offset = 0; - handle_control_message(portdev, buf); + handle_control_message(vq->vdev, portdev, buf); spin_lock(&portdev->c_ivq_lock); if (add_inbuf(portdev->c_ivq, buf) < 0) { diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index d0a1d8a45c81..89088d6538fd 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -94,7 +94,7 @@ static unsigned desc_size(const struct lguest_device_desc *desc) } /* This gets the device's feature bits. */ -static u32 lg_get_features(struct virtio_device *vdev) +static u64 lg_get_features(struct virtio_device *vdev) { unsigned int i; u32 features = 0; @@ -126,7 +126,7 @@ static void status_notify(struct virtio_device *vdev) * sorted out, this routine is called so we can tell the Host which features we * understand and accept. */ -static void lg_finalize_features(struct virtio_device *vdev) +static int lg_finalize_features(struct virtio_device *vdev) { unsigned int i, bits; struct lguest_device_desc *desc = to_lgdev(vdev)->desc; @@ -136,20 +136,25 @@ static void lg_finalize_features(struct virtio_device *vdev) /* Give virtio_ring a chance to accept features. */ vring_transport_features(vdev); + /* Make sure we don't have any features > 32 bits! */ + BUG_ON((u32)vdev->features != vdev->features); + /* - * The vdev->feature array is a Linux bitmask: this isn't the same as a - * the simple array of bits used by lguest devices for features. So we - * do this slow, manual conversion which is completely general. + * Since lguest is currently x86-only, we're little-endian. That + * means we could just memcpy. But it's not time critical, and in + * case someone copies this code, we do it the slow, obvious way. */ memset(out_features, 0, desc->feature_len); bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8; for (i = 0; i < bits; i++) { - if (test_bit(i, vdev->features)) + if (__virtio_test_bit(vdev, i)) out_features[i / 8] |= (1 << (i % 8)); } /* Tell Host we've finished with this device's feature negotiation */ status_notify(vdev); + + return 0; } /* Once they've found a field, getting a copy of it is easy. */ diff --git a/drivers/misc/mic/card/mic_virtio.c b/drivers/misc/mic/card/mic_virtio.c index e64794730e21..e486a0c26267 100644 --- a/drivers/misc/mic/card/mic_virtio.c +++ b/drivers/misc/mic/card/mic_virtio.c @@ -68,7 +68,7 @@ static inline struct device *mic_dev(struct mic_vdev *mvdev) } /* This gets the device's feature bits. */ -static u32 mic_get_features(struct virtio_device *vdev) +static u64 mic_get_features(struct virtio_device *vdev) { unsigned int i, bits; u32 features = 0; @@ -76,8 +76,7 @@ static u32 mic_get_features(struct virtio_device *vdev) u8 __iomem *in_features = mic_vq_features(desc); int feature_len = ioread8(&desc->feature_len); - bits = min_t(unsigned, feature_len, - sizeof(vdev->features)) * 8; + bits = min_t(unsigned, feature_len, sizeof(features)) * 8; for (i = 0; i < bits; i++) if (ioread8(&in_features[i / 8]) & (BIT(i % 8))) features |= BIT(i); @@ -85,7 +84,7 @@ static u32 mic_get_features(struct virtio_device *vdev) return features; } -static void mic_finalize_features(struct virtio_device *vdev) +static int mic_finalize_features(struct virtio_device *vdev) { unsigned int i, bits; struct mic_device_desc __iomem *desc = to_micvdev(vdev)->desc; @@ -97,14 +96,19 @@ static void mic_finalize_features(struct virtio_device *vdev) /* Give virtio_ring a chance to accept features. */ vring_transport_features(vdev); + /* Make sure we don't have any features > 32 bits! */ + BUG_ON((u32)vdev->features != vdev->features); + memset_io(out_features, 0, feature_len); bits = min_t(unsigned, feature_len, sizeof(vdev->features)) * 8; for (i = 0; i < bits; i++) { - if (test_bit(i, vdev->features)) + if (__virtio_test_bit(vdev, i)) iowrite8(ioread8(&out_features[i / 8]) | (1 << (i % 8)), &out_features[i / 8]); } + + return 0; } /* diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 880cc090dc44..af90ab5e5768 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -45,6 +45,18 @@ struct macvtap_queue { struct list_head next; }; +#define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_VNET_LE | IFF_MULTI_QUEUE) + +static inline u16 macvtap16_to_cpu(struct macvtap_queue *q, __virtio16 val) +{ + return __virtio16_to_cpu(q->flags & IFF_VNET_LE, val); +} + +static inline __virtio16 cpu_to_macvtap16(struct macvtap_queue *q, u16 val) +{ + return __cpu_to_virtio16(q->flags & IFF_VNET_LE, val); +} + static struct proto macvtap_proto = { .name = "macvtap", .owner = THIS_MODULE, @@ -557,7 +569,8 @@ static inline struct sk_buff *macvtap_alloc_skb(struct sock *sk, size_t prepad, * macvtap_skb_from_vnet_hdr and macvtap_skb_to_vnet_hdr should * be shared with the tun/tap driver. */ -static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb, +static int macvtap_skb_from_vnet_hdr(struct macvtap_queue *q, + struct sk_buff *skb, struct virtio_net_hdr *vnet_hdr) { unsigned short gso_type = 0; @@ -588,13 +601,13 @@ static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb, } if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { - if (!skb_partial_csum_set(skb, vnet_hdr->csum_start, - vnet_hdr->csum_offset)) + if (!skb_partial_csum_set(skb, macvtap16_to_cpu(q, vnet_hdr->csum_start), + macvtap16_to_cpu(q, vnet_hdr->csum_offset))) return -EINVAL; } if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { - skb_shinfo(skb)->gso_size = vnet_hdr->gso_size; + skb_shinfo(skb)->gso_size = macvtap16_to_cpu(q, vnet_hdr->gso_size); skb_shinfo(skb)->gso_type = gso_type; /* Header must be checked, and gso_segs computed. */ @@ -604,8 +617,9 @@ static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb, return 0; } -static void macvtap_skb_to_vnet_hdr(const struct sk_buff *skb, - struct virtio_net_hdr *vnet_hdr) +static void macvtap_skb_to_vnet_hdr(struct macvtap_queue *q, + const struct sk_buff *skb, + struct virtio_net_hdr *vnet_hdr) { memset(vnet_hdr, 0, sizeof(*vnet_hdr)); @@ -613,8 +627,8 @@ static void macvtap_skb_to_vnet_hdr(const struct sk_buff *skb, struct skb_shared_info *sinfo = skb_shinfo(skb); /* This is a hint as to how much should be linear. */ - vnet_hdr->hdr_len = skb_headlen(skb); - vnet_hdr->gso_size = sinfo->gso_size; + vnet_hdr->hdr_len = cpu_to_macvtap16(q, skb_headlen(skb)); + vnet_hdr->gso_size = cpu_to_macvtap16(q, sinfo->gso_size); if (sinfo->gso_type & SKB_GSO_TCPV4) vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; else if (sinfo->gso_type & SKB_GSO_TCPV6) @@ -628,10 +642,13 @@ static void macvtap_skb_to_vnet_hdr(const struct sk_buff *skb, if (skb->ip_summed == CHECKSUM_PARTIAL) { vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - vnet_hdr->csum_start = skb_checksum_start_offset(skb); if (vlan_tx_tag_present(skb)) - vnet_hdr->csum_start += VLAN_HLEN; - vnet_hdr->csum_offset = skb->csum_offset; + vnet_hdr->csum_start = cpu_to_macvtap16(q, + skb_checksum_start_offset(skb) + VLAN_HLEN); + else + vnet_hdr->csum_start = cpu_to_macvtap16(q, + skb_checksum_start_offset(skb)); + vnet_hdr->csum_offset = cpu_to_macvtap16(q, skb->csum_offset); } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { vnet_hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID; } /* else everything is zero */ @@ -666,12 +683,14 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, if (err < 0) goto err; if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && - vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 > - vnet_hdr.hdr_len) - vnet_hdr.hdr_len = vnet_hdr.csum_start + - vnet_hdr.csum_offset + 2; + macvtap16_to_cpu(q, vnet_hdr.csum_start) + + macvtap16_to_cpu(q, vnet_hdr.csum_offset) + 2 > + macvtap16_to_cpu(q, vnet_hdr.hdr_len)) + vnet_hdr.hdr_len = cpu_to_macvtap16(q, + macvtap16_to_cpu(q, vnet_hdr.csum_start) + + macvtap16_to_cpu(q, vnet_hdr.csum_offset) + 2); err = -EINVAL; - if (vnet_hdr.hdr_len > len) + if (macvtap16_to_cpu(q, vnet_hdr.hdr_len) > len) goto err; } @@ -684,7 +703,8 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, goto err; if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) { - copylen = vnet_hdr.hdr_len ? vnet_hdr.hdr_len : GOODCOPY_LEN; + copylen = vnet_hdr.hdr_len ? + macvtap16_to_cpu(q, vnet_hdr.hdr_len) : GOODCOPY_LEN; if (copylen > good_linear) copylen = good_linear; linear = copylen; @@ -695,10 +715,10 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, if (!zerocopy) { copylen = len; - if (vnet_hdr.hdr_len > good_linear) + if (macvtap16_to_cpu(q, vnet_hdr.hdr_len) > good_linear) linear = good_linear; else - linear = vnet_hdr.hdr_len; + linear = macvtap16_to_cpu(q, vnet_hdr.hdr_len); } skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, copylen, @@ -725,7 +745,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, skb->protocol = eth_hdr(skb)->h_proto; if (vnet_hdr_len) { - err = macvtap_skb_from_vnet_hdr(skb, &vnet_hdr); + err = macvtap_skb_from_vnet_hdr(q, skb, &vnet_hdr); if (err) goto err_kfree; } @@ -791,7 +811,7 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q, if ((len -= vnet_hdr_len) < 0) return -EINVAL; - macvtap_skb_to_vnet_hdr(skb, &vnet_hdr); + macvtap_skb_to_vnet_hdr(q, skb, &vnet_hdr); if (memcpy_toiovecend(iv, (void *)&vnet_hdr, 0, sizeof(vnet_hdr))) return -EFAULT; @@ -1003,8 +1023,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, return -EFAULT; ret = 0; - if ((u & ~(IFF_VNET_HDR | IFF_MULTI_QUEUE)) != - (IFF_NO_PI | IFF_TAP)) + if ((u & ~MACVTAP_FEATURES) != (IFF_NO_PI | IFF_TAP)) ret = -EINVAL; else q->flags = u; @@ -1036,8 +1055,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, return ret; case TUNGETFEATURES: - if (put_user(IFF_TAP | IFF_NO_PI | IFF_VNET_HDR | - IFF_MULTI_QUEUE, up)) + if (put_user(IFF_TAP | IFF_NO_PI | MACVTAP_FEATURES, up)) return -EFAULT; return 0; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 4d332dc93b70..798ce70e3d61 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -103,6 +103,15 @@ do { \ } while (0) #endif +/* TUN device flags */ + +/* IFF_ATTACH_QUEUE is never stored in device flags, + * overload it to mean fasync when stored there. + */ +#define TUN_FASYNC IFF_ATTACH_QUEUE + +#define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \ + IFF_VNET_LE | IFF_MULTI_QUEUE) #define GOODCOPY_LEN 128 #define FLT_EXACT_COUNT 8 @@ -196,6 +205,16 @@ struct tun_struct { u32 flow_count; }; +static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val) +{ + return __virtio16_to_cpu(tun->flags & IFF_VNET_LE, val); +} + +static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val) +{ + return __cpu_to_virtio16(tun->flags & IFF_VNET_LE, val); +} + static inline u32 tun_hashfn(u32 rxhash) { return rxhash & 0x3ff; @@ -472,7 +491,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) if (tun && tun->numqueues == 0 && tun->numdisabled == 0) { netif_carrier_off(tun->dev); - if (!(tun->flags & TUN_PERSIST) && + if (!(tun->flags & IFF_PERSIST) && tun->dev->reg_state == NETREG_REGISTERED) unregister_netdevice(tun->dev); } @@ -523,7 +542,7 @@ static void tun_detach_all(struct net_device *dev) } BUG_ON(tun->numdisabled != 0); - if (tun->flags & TUN_PERSIST) + if (tun->flags & IFF_PERSIST) module_put(THIS_MODULE); } @@ -541,7 +560,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte goto out; err = -EBUSY; - if (!(tun->flags & TUN_TAP_MQ) && tun->numqueues == 1) + if (!(tun->flags & IFF_MULTI_QUEUE) && tun->numqueues == 1) goto out; err = -E2BIG; @@ -920,7 +939,7 @@ static void tun_net_init(struct net_device *dev) struct tun_struct *tun = netdev_priv(dev); switch (tun->flags & TUN_TYPE_MASK) { - case TUN_TUN_DEV: + case IFF_TUN: dev->netdev_ops = &tun_netdev_ops; /* Point-to-Point TUN Device */ @@ -934,7 +953,7 @@ static void tun_net_init(struct net_device *dev) dev->tx_queue_len = TUN_READQ_SIZE; /* We prefer our own queue length */ break; - case TUN_TAP_DEV: + case IFF_TAP: dev->netdev_ops = &tap_netdev_ops; /* Ethernet TAP Device */ ether_setup(dev); @@ -1025,7 +1044,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, int err; u32 rxhash; - if (!(tun->flags & TUN_NO_PI)) { + if (!(tun->flags & IFF_NO_PI)) { if (len < sizeof(pi)) return -EINVAL; len -= sizeof(pi); @@ -1035,7 +1054,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, offset += sizeof(pi); } - if (tun->flags & TUN_VNET_HDR) { + if (tun->flags & IFF_VNET_HDR) { if (len < tun->vnet_hdr_sz) return -EINVAL; len -= tun->vnet_hdr_sz; @@ -1044,18 +1063,18 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, return -EFAULT; if ((gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && - gso.csum_start + gso.csum_offset + 2 > gso.hdr_len) - gso.hdr_len = gso.csum_start + gso.csum_offset + 2; + tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2 > tun16_to_cpu(tun, gso.hdr_len)) + gso.hdr_len = cpu_to_tun16(tun, tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2); - if (gso.hdr_len > len) + if (tun16_to_cpu(tun, gso.hdr_len) > len) return -EINVAL; offset += tun->vnet_hdr_sz; } - if ((tun->flags & TUN_TYPE_MASK) == TUN_TAP_DEV) { + if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) { align += NET_IP_ALIGN; if (unlikely(len < ETH_HLEN || - (gso.hdr_len && gso.hdr_len < ETH_HLEN))) + (gso.hdr_len && tun16_to_cpu(tun, gso.hdr_len) < ETH_HLEN))) return -EINVAL; } @@ -1066,7 +1085,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, * enough room for skb expand head in case it is used. * The rest of the buffer is mapped from userspace. */ - copylen = gso.hdr_len ? gso.hdr_len : GOODCOPY_LEN; + copylen = gso.hdr_len ? tun16_to_cpu(tun, gso.hdr_len) : GOODCOPY_LEN; if (copylen > good_linear) copylen = good_linear; linear = copylen; @@ -1076,10 +1095,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (!zerocopy) { copylen = len; - if (gso.hdr_len > good_linear) + if (tun16_to_cpu(tun, gso.hdr_len) > good_linear) linear = good_linear; else - linear = gso.hdr_len; + linear = tun16_to_cpu(tun, gso.hdr_len); } skb = tun_alloc_skb(tfile, align, copylen, linear, noblock); @@ -1106,8 +1125,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } if (gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { - if (!skb_partial_csum_set(skb, gso.csum_start, - gso.csum_offset)) { + if (!skb_partial_csum_set(skb, tun16_to_cpu(tun, gso.csum_start), + tun16_to_cpu(tun, gso.csum_offset))) { tun->dev->stats.rx_frame_errors++; kfree_skb(skb); return -EINVAL; @@ -1115,8 +1134,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } switch (tun->flags & TUN_TYPE_MASK) { - case TUN_TUN_DEV: - if (tun->flags & TUN_NO_PI) { + case IFF_TUN: + if (tun->flags & IFF_NO_PI) { switch (skb->data[0] & 0xf0) { case 0x40: pi.proto = htons(ETH_P_IP); @@ -1135,7 +1154,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, skb->protocol = pi.proto; skb->dev = tun->dev; break; - case TUN_TAP_DEV: + case IFF_TAP: skb->protocol = eth_type_trans(skb, tun->dev); break; } @@ -1175,7 +1194,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (gso.gso_type & VIRTIO_NET_HDR_GSO_ECN) skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; - skb_shinfo(skb)->gso_size = gso.gso_size; + skb_shinfo(skb)->gso_size = tun16_to_cpu(tun, gso.gso_size); if (skb_shinfo(skb)->gso_size == 0) { tun->dev->stats.rx_frame_errors++; kfree_skb(skb); @@ -1241,10 +1260,10 @@ static ssize_t tun_put_user(struct tun_struct *tun, if (vlan_tx_tag_present(skb)) vlan_hlen = VLAN_HLEN; - if (tun->flags & TUN_VNET_HDR) + if (tun->flags & IFF_VNET_HDR) vnet_hdr_sz = tun->vnet_hdr_sz; - if (!(tun->flags & TUN_NO_PI)) { + if (!(tun->flags & IFF_NO_PI)) { if ((len -= sizeof(pi)) < 0) return -EINVAL; @@ -1267,8 +1286,8 @@ static ssize_t tun_put_user(struct tun_struct *tun, struct skb_shared_info *sinfo = skb_shinfo(skb); /* This is a hint as to how much should be linear. */ - gso.hdr_len = skb_headlen(skb); - gso.gso_size = sinfo->gso_size; + gso.hdr_len = cpu_to_tun16(tun, skb_headlen(skb)); + gso.gso_size = cpu_to_tun16(tun, sinfo->gso_size); if (sinfo->gso_type & SKB_GSO_TCPV4) gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV4; else if (sinfo->gso_type & SKB_GSO_TCPV6) @@ -1276,12 +1295,12 @@ static ssize_t tun_put_user(struct tun_struct *tun, else { pr_err("unexpected GSO type: " "0x%x, gso_size %d, hdr_len %d\n", - sinfo->gso_type, gso.gso_size, - gso.hdr_len); + sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size), + tun16_to_cpu(tun, gso.hdr_len)); print_hex_dump(KERN_ERR, "tun: ", DUMP_PREFIX_NONE, 16, 1, skb->head, - min((int)gso.hdr_len, 64), true); + min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true); WARN_ON_ONCE(1); return -EINVAL; } @@ -1292,9 +1311,9 @@ static ssize_t tun_put_user(struct tun_struct *tun, if (skb->ip_summed == CHECKSUM_PARTIAL) { gso.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - gso.csum_start = skb_checksum_start_offset(skb) + - vlan_hlen; - gso.csum_offset = skb->csum_offset; + gso.csum_start = cpu_to_tun16(tun, skb_checksum_start_offset(skb) + + vlan_hlen); + gso.csum_offset = cpu_to_tun16(tun, skb->csum_offset); } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { gso.flags = VIRTIO_NET_HDR_F_DATA_VALID; } /* else everything is zero */ @@ -1521,32 +1540,7 @@ static struct proto tun_proto = { static int tun_flags(struct tun_struct *tun) { - int flags = 0; - - if (tun->flags & TUN_TUN_DEV) - flags |= IFF_TUN; - else - flags |= IFF_TAP; - - if (tun->flags & TUN_NO_PI) - flags |= IFF_NO_PI; - - /* This flag has no real effect. We track the value for backwards - * compatibility. - */ - if (tun->flags & TUN_ONE_QUEUE) - flags |= IFF_ONE_QUEUE; - - if (tun->flags & TUN_VNET_HDR) - flags |= IFF_VNET_HDR; - - if (tun->flags & TUN_TAP_MQ) - flags |= IFF_MULTI_QUEUE; - - if (tun->flags & TUN_PERSIST) - flags |= IFF_PERSIST; - - return flags; + return tun->flags & (TUN_FEATURES | IFF_PERSIST | IFF_TUN | IFF_TAP); } static ssize_t tun_show_flags(struct device *dev, struct device_attribute *attr, @@ -1602,7 +1596,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) return -EINVAL; if (!!(ifr->ifr_flags & IFF_MULTI_QUEUE) != - !!(tun->flags & TUN_TAP_MQ)) + !!(tun->flags & IFF_MULTI_QUEUE)) return -EINVAL; if (tun_not_capable(tun)) @@ -1615,7 +1609,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) if (err < 0) return err; - if (tun->flags & TUN_TAP_MQ && + if (tun->flags & IFF_MULTI_QUEUE && (tun->numqueues + tun->numdisabled > 1)) { /* One or more queue has already been attached, no need * to initialize the device again. @@ -1638,11 +1632,11 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) /* Set dev type */ if (ifr->ifr_flags & IFF_TUN) { /* TUN device */ - flags |= TUN_TUN_DEV; + flags |= IFF_TUN; name = "tun%d"; } else if (ifr->ifr_flags & IFF_TAP) { /* TAP device */ - flags |= TUN_TAP_DEV; + flags |= IFF_TAP; name = "tap%d"; } else return -EINVAL; @@ -1706,28 +1700,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) tun_debug(KERN_INFO, tun, "tun_set_iff\n"); - if (ifr->ifr_flags & IFF_NO_PI) - tun->flags |= TUN_NO_PI; - else - tun->flags &= ~TUN_NO_PI; - - /* This flag has no real effect. We track the value for backwards - * compatibility. - */ - if (ifr->ifr_flags & IFF_ONE_QUEUE) - tun->flags |= TUN_ONE_QUEUE; - else - tun->flags &= ~TUN_ONE_QUEUE; - - if (ifr->ifr_flags & IFF_VNET_HDR) - tun->flags |= TUN_VNET_HDR; - else - tun->flags &= ~TUN_VNET_HDR; - - if (ifr->ifr_flags & IFF_MULTI_QUEUE) - tun->flags |= TUN_TAP_MQ; - else - tun->flags &= ~TUN_TAP_MQ; + tun->flags = (tun->flags & ~TUN_FEATURES) | + (ifr->ifr_flags & TUN_FEATURES); /* Make sure persistent devices do not get stuck in * xoff state. @@ -1855,7 +1829,7 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr) ret = tun_attach(tun, file, false); } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) { tun = rtnl_dereference(tfile->tun); - if (!tun || !(tun->flags & TUN_TAP_MQ) || tfile->detached) + if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached) ret = -EINVAL; else __tun_detach(tfile, false); @@ -1890,9 +1864,9 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, if (cmd == TUNGETFEATURES) { /* Currently this just means: "what IFF flags are valid?". * This is needed because we never checked for invalid flags on - * TUNSETIFF. */ - return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE | - IFF_VNET_HDR | IFF_MULTI_QUEUE, + * TUNSETIFF. + */ + return put_user(IFF_TUN | IFF_TAP | TUN_FEATURES, (unsigned int __user*)argp); } else if (cmd == TUNSETQUEUE) return tun_set_queue(file, &ifr); @@ -1959,12 +1933,12 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, /* Disable/Enable persist mode. Keep an extra reference to the * module to prevent the module being unprobed. */ - if (arg && !(tun->flags & TUN_PERSIST)) { - tun->flags |= TUN_PERSIST; + if (arg && !(tun->flags & IFF_PERSIST)) { + tun->flags |= IFF_PERSIST; __module_get(THIS_MODULE); } - if (!arg && (tun->flags & TUN_PERSIST)) { - tun->flags &= ~TUN_PERSIST; + if (!arg && (tun->flags & IFF_PERSIST)) { + tun->flags &= ~IFF_PERSIST; module_put(THIS_MODULE); } @@ -2022,7 +1996,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, case TUNSETTXFILTER: /* Can be set only for TAPs */ ret = -EINVAL; - if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV) + if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP) break; ret = update_filter(&tun->txflt, (void __user *)arg); break; @@ -2081,7 +2055,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, case TUNATTACHFILTER: /* Can be set only for TAPs */ ret = -EINVAL; - if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV) + if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP) break; ret = -EFAULT; if (copy_from_user(&tun->fprog, argp, sizeof(tun->fprog))) @@ -2093,7 +2067,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, case TUNDETACHFILTER: /* Can be set only for TAPs */ ret = -EINVAL; - if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV) + if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP) break; ret = 0; tun_detach_filter(tun, tun->numqueues); @@ -2101,7 +2075,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, case TUNGETFILTER: ret = -EINVAL; - if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV) + if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP) break; ret = -EFAULT; if (copy_to_user(argp, &tun->fprog, sizeof(tun->fprog))) @@ -2294,10 +2268,10 @@ static void tun_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info strlcpy(info->version, DRV_VERSION, sizeof(info->version)); switch (tun->flags & TUN_TYPE_MASK) { - case TUN_TUN_DEV: + case IFF_TUN: strlcpy(info->bus_info, "tun", sizeof(info->bus_info)); break; - case TUN_TAP_DEV: + case IFF_TAP: strlcpy(info->bus_info, "tap", sizeof(info->bus_info)); break; } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index b0bc8ead47de..b8bd7191572d 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -123,6 +123,9 @@ struct virtnet_info { /* Host can handle any s/g split between our header and packet data */ bool any_header_sg; + /* Packet virtio header size */ + u8 hdr_len; + /* Active statistics */ struct virtnet_stats __percpu *stats; @@ -139,21 +142,14 @@ struct virtnet_info { struct notifier_block nb; }; -struct skb_vnet_hdr { - union { - struct virtio_net_hdr hdr; - struct virtio_net_hdr_mrg_rxbuf mhdr; - }; -}; - struct padded_vnet_hdr { - struct virtio_net_hdr hdr; + struct virtio_net_hdr_mrg_rxbuf hdr; /* - * virtio_net_hdr should be in a separated sg buffer because of a - * QEMU bug, and data sg buffer shares same page with this header sg. - * This padding makes next sg 16 byte aligned after virtio_net_hdr. + * hdr is in a separate sg buffer, and data sg buffer shares same page + * with this header sg. This padding makes next sg 16 byte aligned + * after the header. */ - char padding[6]; + char padding[4]; }; /* Converting between virtqueue no. and kernel tx/rx queue no. @@ -179,9 +175,9 @@ static int rxq2vq(int rxq) return rxq * 2; } -static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb) +static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb) { - return (struct skb_vnet_hdr *)skb->cb; + return (struct virtio_net_hdr_mrg_rxbuf *)skb->cb; } /* @@ -241,13 +237,13 @@ static unsigned long mergeable_buf_to_ctx(void *buf, unsigned int truesize) } /* Called from bottom half context */ -static struct sk_buff *page_to_skb(struct receive_queue *rq, +static struct sk_buff *page_to_skb(struct virtnet_info *vi, + struct receive_queue *rq, struct page *page, unsigned int offset, unsigned int len, unsigned int truesize) { - struct virtnet_info *vi = rq->vq->vdev->priv; struct sk_buff *skb; - struct skb_vnet_hdr *hdr; + struct virtio_net_hdr_mrg_rxbuf *hdr; unsigned int copy, hdr_len, hdr_padded_len; char *p; @@ -260,13 +256,11 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq, hdr = skb_vnet_hdr(skb); - if (vi->mergeable_rx_bufs) { - hdr_len = sizeof hdr->mhdr; - hdr_padded_len = sizeof hdr->mhdr; - } else { - hdr_len = sizeof hdr->hdr; + hdr_len = vi->hdr_len; + if (vi->mergeable_rx_bufs) + hdr_padded_len = sizeof *hdr; + else hdr_padded_len = sizeof(struct padded_vnet_hdr); - } memcpy(hdr, p, hdr_len); @@ -317,23 +311,24 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq, return skb; } -static struct sk_buff *receive_small(void *buf, unsigned int len) +static struct sk_buff *receive_small(struct virtnet_info *vi, void *buf, unsigned int len) { struct sk_buff * skb = buf; - len -= sizeof(struct virtio_net_hdr); + len -= vi->hdr_len; skb_trim(skb, len); return skb; } static struct sk_buff *receive_big(struct net_device *dev, + struct virtnet_info *vi, struct receive_queue *rq, void *buf, unsigned int len) { struct page *page = buf; - struct sk_buff *skb = page_to_skb(rq, page, 0, len, PAGE_SIZE); + struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE); if (unlikely(!skb)) goto err; @@ -347,18 +342,20 @@ err: } static struct sk_buff *receive_mergeable(struct net_device *dev, + struct virtnet_info *vi, struct receive_queue *rq, unsigned long ctx, unsigned int len) { void *buf = mergeable_ctx_to_buf_address(ctx); - struct skb_vnet_hdr *hdr = buf; - int num_buf = hdr->mhdr.num_buffers; + struct virtio_net_hdr_mrg_rxbuf *hdr = buf; + u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); struct page *page = virt_to_head_page(buf); int offset = buf - page_address(page); unsigned int truesize = max(len, mergeable_ctx_to_buf_truesize(ctx)); - struct sk_buff *head_skb = page_to_skb(rq, page, offset, len, truesize); + struct sk_buff *head_skb = page_to_skb(vi, rq, page, offset, len, + truesize); struct sk_buff *curr_skb = head_skb; if (unlikely(!curr_skb)) @@ -369,7 +366,9 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len); if (unlikely(!ctx)) { pr_debug("%s: rx error: %d buffers out of %d missing\n", - dev->name, num_buf, hdr->mhdr.num_buffers); + dev->name, num_buf, + virtio16_to_cpu(vi->vdev, + hdr->num_buffers)); dev->stats.rx_length_errors++; goto err_buf; } @@ -430,15 +429,15 @@ err_buf: return NULL; } -static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) +static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, + void *buf, unsigned int len) { - struct virtnet_info *vi = rq->vq->vdev->priv; struct net_device *dev = vi->dev; struct virtnet_stats *stats = this_cpu_ptr(vi->stats); struct sk_buff *skb; - struct skb_vnet_hdr *hdr; + struct virtio_net_hdr_mrg_rxbuf *hdr; - if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { + if (unlikely(len < vi->hdr_len + ETH_HLEN)) { pr_debug("%s: short packet %i\n", dev->name, len); dev->stats.rx_length_errors++; if (vi->mergeable_rx_bufs) { @@ -454,11 +453,11 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) } if (vi->mergeable_rx_bufs) - skb = receive_mergeable(dev, rq, (unsigned long)buf, len); + skb = receive_mergeable(dev, vi, rq, (unsigned long)buf, len); else if (vi->big_packets) - skb = receive_big(dev, rq, buf, len); + skb = receive_big(dev, vi, rq, buf, len); else - skb = receive_small(buf, len); + skb = receive_small(vi, buf, len); if (unlikely(!skb)) return; @@ -473,8 +472,8 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { pr_debug("Needs csum!\n"); if (!skb_partial_csum_set(skb, - hdr->hdr.csum_start, - hdr->hdr.csum_offset)) + virtio16_to_cpu(vi->vdev, hdr->hdr.csum_start), + virtio16_to_cpu(vi->vdev, hdr->hdr.csum_offset))) goto frame_err; } else if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID) { skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -514,7 +513,8 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN) skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; - skb_shinfo(skb)->gso_size = hdr->hdr.gso_size; + skb_shinfo(skb)->gso_size = virtio16_to_cpu(vi->vdev, + hdr->hdr.gso_size); if (skb_shinfo(skb)->gso_size == 0) { net_warn_ratelimited("%s: zero gso size.\n", dev->name); goto frame_err; @@ -535,11 +535,11 @@ frame_err: dev_kfree_skb(skb); } -static int add_recvbuf_small(struct receive_queue *rq, gfp_t gfp) +static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, + gfp_t gfp) { - struct virtnet_info *vi = rq->vq->vdev->priv; struct sk_buff *skb; - struct skb_vnet_hdr *hdr; + struct virtio_net_hdr_mrg_rxbuf *hdr; int err; skb = __netdev_alloc_skb_ip_align(vi->dev, GOOD_PACKET_LEN, gfp); @@ -550,7 +550,7 @@ static int add_recvbuf_small(struct receive_queue *rq, gfp_t gfp) hdr = skb_vnet_hdr(skb); sg_init_table(rq->sg, MAX_SKB_FRAGS + 2); - sg_set_buf(rq->sg, &hdr->hdr, sizeof hdr->hdr); + sg_set_buf(rq->sg, hdr, vi->hdr_len); skb_to_sgvec(skb, rq->sg + 1, 0, skb->len); err = virtqueue_add_inbuf(rq->vq, rq->sg, 2, skb, gfp); @@ -560,7 +560,8 @@ static int add_recvbuf_small(struct receive_queue *rq, gfp_t gfp) return err; } -static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp) +static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, + gfp_t gfp) { struct page *first, *list = NULL; char *p; @@ -591,8 +592,8 @@ static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp) p = page_address(first); /* rq->sg[0], rq->sg[1] share the same page */ - /* a separated rq->sg[0] for virtio_net_hdr only due to QEMU bug */ - sg_set_buf(&rq->sg[0], p, sizeof(struct virtio_net_hdr)); + /* a separated rq->sg[0] for header - required in case !any_header_sg */ + sg_set_buf(&rq->sg[0], p, vi->hdr_len); /* rq->sg[1] for data packet, from offset */ offset = sizeof(struct padded_vnet_hdr); @@ -660,9 +661,9 @@ static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp) * before we're receiving packets, or from refill_work which is * careful to disable receiving (using napi_disable). */ -static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp) +static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, + gfp_t gfp) { - struct virtnet_info *vi = rq->vq->vdev->priv; int err; bool oom; @@ -671,9 +672,9 @@ static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp) if (vi->mergeable_rx_bufs) err = add_recvbuf_mergeable(rq, gfp); else if (vi->big_packets) - err = add_recvbuf_big(rq, gfp); + err = add_recvbuf_big(vi, rq, gfp); else - err = add_recvbuf_small(rq, gfp); + err = add_recvbuf_small(vi, rq, gfp); oom = err == -ENOMEM; if (err) @@ -722,7 +723,7 @@ static void refill_work(struct work_struct *work) struct receive_queue *rq = &vi->rq[i]; napi_disable(&rq->napi); - still_empty = !try_fill_recv(rq, GFP_KERNEL); + still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); virtnet_napi_enable(rq); /* In theory, this can happen: if we don't get any buffers in @@ -741,12 +742,12 @@ static int virtnet_receive(struct receive_queue *rq, int budget) while (received < budget && (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { - receive_buf(rq, buf, len); + receive_buf(vi, rq, buf, len); received++; } if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) { - if (!try_fill_recv(rq, GFP_ATOMIC)) + if (!try_fill_recv(vi, rq, GFP_ATOMIC)) schedule_delayed_work(&vi->refill, 0); } @@ -822,7 +823,7 @@ static int virtnet_open(struct net_device *dev) for (i = 0; i < vi->max_queue_pairs; i++) { if (i < vi->curr_queue_pairs) /* Make sure we have some buffers: if oom use wq. */ - if (!try_fill_recv(&vi->rq[i], GFP_KERNEL)) + if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) schedule_delayed_work(&vi->refill, 0); virtnet_napi_enable(&vi->rq[i]); } @@ -851,18 +852,14 @@ static void free_old_xmit_skbs(struct send_queue *sq) static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) { - struct skb_vnet_hdr *hdr; + struct virtio_net_hdr_mrg_rxbuf *hdr; const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; struct virtnet_info *vi = sq->vq->vdev->priv; unsigned num_sg; - unsigned hdr_len; + unsigned hdr_len = vi->hdr_len; bool can_push; pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); - if (vi->mergeable_rx_bufs) - hdr_len = sizeof hdr->mhdr; - else - hdr_len = sizeof hdr->hdr; can_push = vi->any_header_sg && !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) && @@ -870,22 +867,25 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) /* Even if we can, don't push here yet as this would skew * csum_start offset below. */ if (can_push) - hdr = (struct skb_vnet_hdr *)(skb->data - hdr_len); + hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len); else hdr = skb_vnet_hdr(skb); if (skb->ip_summed == CHECKSUM_PARTIAL) { hdr->hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - hdr->hdr.csum_start = skb_checksum_start_offset(skb); - hdr->hdr.csum_offset = skb->csum_offset; + hdr->hdr.csum_start = cpu_to_virtio16(vi->vdev, + skb_checksum_start_offset(skb)); + hdr->hdr.csum_offset = cpu_to_virtio16(vi->vdev, + skb->csum_offset); } else { hdr->hdr.flags = 0; hdr->hdr.csum_offset = hdr->hdr.csum_start = 0; } if (skb_is_gso(skb)) { - hdr->hdr.hdr_len = skb_headlen(skb); - hdr->hdr.gso_size = skb_shinfo(skb)->gso_size; + hdr->hdr.hdr_len = cpu_to_virtio16(vi->vdev, skb_headlen(skb)); + hdr->hdr.gso_size = cpu_to_virtio16(vi->vdev, + skb_shinfo(skb)->gso_size); if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4; else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) @@ -900,7 +900,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) } if (vi->mergeable_rx_bufs) - hdr->mhdr.num_buffers = 0; + hdr->num_buffers = 0; sg_init_table(sq->sg, MAX_SKB_FRAGS + 2); if (can_push) { @@ -1030,7 +1030,8 @@ static int virtnet_set_mac_address(struct net_device *dev, void *p) "Failed to set mac address by vq command.\n"); return -EINVAL; } - } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { + } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && + !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { unsigned int i; /* Naturally, this has an atomicity problem. */ @@ -1112,7 +1113,7 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) return 0; - s.virtqueue_pairs = queue_pairs; + s.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs); sg_init_one(&sg, &s, sizeof(s)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, @@ -1189,7 +1190,7 @@ static void virtnet_set_rx_mode(struct net_device *dev) sg_init_table(sg, 2); /* Store the unicast list and count in the front of the buffer */ - mac_data->entries = uc_count; + mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count); i = 0; netdev_for_each_uc_addr(ha, dev) memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); @@ -1200,7 +1201,7 @@ static void virtnet_set_rx_mode(struct net_device *dev) /* multicast list and count fill the end */ mac_data = (void *)&mac_data->macs[uc_count][0]; - mac_data->entries = mc_count; + mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count); i = 0; netdev_for_each_mc_addr(ha, dev) memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); @@ -1805,18 +1806,20 @@ static int virtnet_probe(struct virtio_device *vdev) if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) vi->mergeable_rx_bufs = true; + if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || + virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) + vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); + else + vi->hdr_len = sizeof(struct virtio_net_hdr); + if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT)) vi->any_header_sg = true; if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) vi->has_cvq = true; - if (vi->any_header_sg) { - if (vi->mergeable_rx_bufs) - dev->needed_headroom = sizeof(struct virtio_net_hdr_mrg_rxbuf); - else - dev->needed_headroom = sizeof(struct virtio_net_hdr); - } + if (vi->any_header_sg) + dev->needed_headroom = vi->hdr_len; /* Use single tx/rx queue pair as default */ vi->curr_queue_pairs = 1; @@ -1844,7 +1847,7 @@ static int virtnet_probe(struct virtio_device *vdev) /* Last of all, set up some receive buffers. */ for (i = 0; i < vi->curr_queue_pairs; i++) { - try_fill_recv(&vi->rq[i], GFP_KERNEL); + try_fill_recv(vi, &vi->rq[i], GFP_KERNEL); /* If we didn't even get one input buffer, we're useless. */ if (vi->rq[i].vq->num_free == @@ -1964,7 +1967,7 @@ static int virtnet_restore(struct virtio_device *vdev) if (netif_running(vi->dev)) { for (i = 0; i < vi->curr_queue_pairs; i++) - if (!try_fill_recv(&vi->rq[i], GFP_KERNEL)) + if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) schedule_delayed_work(&vi->refill, 0); for (i = 0; i < vi->max_queue_pairs; i++) diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c index a34b50690b4e..e1a10232a943 100644 --- a/drivers/remoteproc/remoteproc_virtio.c +++ b/drivers/remoteproc/remoteproc_virtio.c @@ -207,7 +207,7 @@ static void rproc_virtio_reset(struct virtio_device *vdev) } /* provide the vdev features as retrieved from the firmware */ -static u32 rproc_virtio_get_features(struct virtio_device *vdev) +static u64 rproc_virtio_get_features(struct virtio_device *vdev) { struct rproc_vdev *rvdev = vdev_to_rvdev(vdev); struct fw_rsc_vdev *rsc; @@ -217,7 +217,7 @@ static u32 rproc_virtio_get_features(struct virtio_device *vdev) return rsc->dfeatures; } -static void rproc_virtio_finalize_features(struct virtio_device *vdev) +static int rproc_virtio_finalize_features(struct virtio_device *vdev) { struct rproc_vdev *rvdev = vdev_to_rvdev(vdev); struct fw_rsc_vdev *rsc; @@ -227,11 +227,16 @@ static void rproc_virtio_finalize_features(struct virtio_device *vdev) /* Give virtio_ring a chance to accept features */ vring_transport_features(vdev); + /* Make sure we don't have any features > 32 bits! */ + BUG_ON((u32)vdev->features != vdev->features); + /* * Remember the finalized features of our vdev, and provide it * to the remote processor once it is powered on. */ - rsc->gfeatures = vdev->features[0]; + rsc->gfeatures = vdev->features; + + return 0; } static void rproc_virtio_get(struct virtio_device *vdev, unsigned offset, diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c index 643129070c51..dd65c8b4c7fe 100644 --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -80,7 +80,7 @@ static unsigned desc_size(const struct kvm_device_desc *desc) } /* This gets the device's feature bits. */ -static u32 kvm_get_features(struct virtio_device *vdev) +static u64 kvm_get_features(struct virtio_device *vdev) { unsigned int i; u32 features = 0; @@ -93,7 +93,7 @@ static u32 kvm_get_features(struct virtio_device *vdev) return features; } -static void kvm_finalize_features(struct virtio_device *vdev) +static int kvm_finalize_features(struct virtio_device *vdev) { unsigned int i, bits; struct kvm_device_desc *desc = to_kvmdev(vdev)->desc; @@ -103,12 +103,17 @@ static void kvm_finalize_features(struct virtio_device *vdev) /* Give virtio_ring a chance to accept features. */ vring_transport_features(vdev); + /* Make sure we don't have any features > 32 bits! */ + BUG_ON((u32)vdev->features != vdev->features); + memset(out_features, 0, desc->feature_len); bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8; for (i = 0; i < bits; i++) { - if (test_bit(i, vdev->features)) + if (__virtio_test_bit(vdev, i)) out_features[i / 8] |= (1 << (i % 8)); } + + return 0; } /* diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c index bda52f18e967..71d7802aa8b4 100644 --- a/drivers/s390/kvm/virtio_ccw.c +++ b/drivers/s390/kvm/virtio_ccw.c @@ -55,6 +55,7 @@ struct virtio_ccw_device { struct ccw_device *cdev; __u32 curr_io; int err; + unsigned int revision; /* Transport revision */ wait_queue_head_t wait_q; spinlock_t lock; struct list_head virtqueues; @@ -67,13 +68,22 @@ struct virtio_ccw_device { void *airq_info; }; -struct vq_info_block { +struct vq_info_block_legacy { __u64 queue; __u32 align; __u16 index; __u16 num; } __packed; +struct vq_info_block { + __u64 desc; + __u32 res0; + __u16 index; + __u16 num; + __u64 avail; + __u64 used; +} __packed; + struct virtio_feature_desc { __u32 features; __u8 index; @@ -86,11 +96,23 @@ struct virtio_thinint_area { u8 isc; } __packed; +struct virtio_rev_info { + __u16 revision; + __u16 length; + __u8 data[]; +}; + +/* the highest virtio-ccw revision we support */ +#define VIRTIO_CCW_REV_MAX 1 + struct virtio_ccw_vq_info { struct virtqueue *vq; int num; void *queue; - struct vq_info_block *info_block; + union { + struct vq_info_block s; + struct vq_info_block_legacy l; + } *info_block; int bit_nr; struct list_head node; long cookie; @@ -122,6 +144,7 @@ static struct airq_info *airq_areas[MAX_AIRQ_AREAS]; #define CCW_CMD_WRITE_STATUS 0x31 #define CCW_CMD_READ_VQ_CONF 0x32 #define CCW_CMD_SET_IND_ADAPTER 0x73 +#define CCW_CMD_SET_VIRTIO_REV 0x83 #define VIRTIO_CCW_DOING_SET_VQ 0x00010000 #define VIRTIO_CCW_DOING_RESET 0x00040000 @@ -134,6 +157,7 @@ static struct airq_info *airq_areas[MAX_AIRQ_AREAS]; #define VIRTIO_CCW_DOING_READ_VQ_CONF 0x02000000 #define VIRTIO_CCW_DOING_SET_CONF_IND 0x04000000 #define VIRTIO_CCW_DOING_SET_IND_ADAPTER 0x08000000 +#define VIRTIO_CCW_DOING_SET_VIRTIO_REV 0x10000000 #define VIRTIO_CCW_INTPARM_MASK 0xffff0000 static struct virtio_ccw_device *to_vc_device(struct virtio_device *vdev) @@ -399,13 +423,22 @@ static void virtio_ccw_del_vq(struct virtqueue *vq, struct ccw1 *ccw) spin_unlock_irqrestore(&vcdev->lock, flags); /* Release from host. */ - info->info_block->queue = 0; - info->info_block->align = 0; - info->info_block->index = index; - info->info_block->num = 0; + if (vcdev->revision == 0) { + info->info_block->l.queue = 0; + info->info_block->l.align = 0; + info->info_block->l.index = index; + info->info_block->l.num = 0; + ccw->count = sizeof(info->info_block->l); + } else { + info->info_block->s.desc = 0; + info->info_block->s.index = index; + info->info_block->s.num = 0; + info->info_block->s.avail = 0; + info->info_block->s.used = 0; + ccw->count = sizeof(info->info_block->s); + } ccw->cmd_code = CCW_CMD_SET_VQ; ccw->flags = 0; - ccw->count = sizeof(*info->info_block); ccw->cda = (__u32)(unsigned long)(info->info_block); ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_VQ | index); @@ -488,13 +521,22 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev, } /* Register it with the host. */ - info->info_block->queue = (__u64)info->queue; - info->info_block->align = KVM_VIRTIO_CCW_RING_ALIGN; - info->info_block->index = i; - info->info_block->num = info->num; + if (vcdev->revision == 0) { + info->info_block->l.queue = (__u64)info->queue; + info->info_block->l.align = KVM_VIRTIO_CCW_RING_ALIGN; + info->info_block->l.index = i; + info->info_block->l.num = info->num; + ccw->count = sizeof(info->info_block->l); + } else { + info->info_block->s.desc = (__u64)info->queue; + info->info_block->s.index = i; + info->info_block->s.num = info->num; + info->info_block->s.avail = (__u64)virtqueue_get_avail(vq); + info->info_block->s.used = (__u64)virtqueue_get_used(vq); + ccw->count = sizeof(info->info_block->s); + } ccw->cmd_code = CCW_CMD_SET_VQ; ccw->flags = 0; - ccw->count = sizeof(*info->info_block); ccw->cda = (__u32)(unsigned long)(info->info_block); err = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_VQ | i); if (err) { @@ -660,11 +702,12 @@ static void virtio_ccw_reset(struct virtio_device *vdev) kfree(ccw); } -static u32 virtio_ccw_get_features(struct virtio_device *vdev) +static u64 virtio_ccw_get_features(struct virtio_device *vdev) { struct virtio_ccw_device *vcdev = to_vc_device(vdev); struct virtio_feature_desc *features; - int ret, rc; + int ret; + u64 rc; struct ccw1 *ccw; ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL); @@ -677,7 +720,6 @@ static u32 virtio_ccw_get_features(struct virtio_device *vdev) goto out_free; } /* Read the feature bits from the host. */ - /* TODO: Features > 32 bits */ features->index = 0; ccw->cmd_code = CCW_CMD_READ_FEAT; ccw->flags = 0; @@ -691,46 +733,79 @@ static u32 virtio_ccw_get_features(struct virtio_device *vdev) rc = le32_to_cpu(features->features); + if (vcdev->revision == 0) + goto out_free; + + /* Read second half of the feature bits from the host. */ + features->index = 1; + ccw->cmd_code = CCW_CMD_READ_FEAT; + ccw->flags = 0; + ccw->count = sizeof(*features); + ccw->cda = (__u32)(unsigned long)features; + ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_READ_FEAT); + if (ret == 0) + rc |= (u64)le32_to_cpu(features->features) << 32; + out_free: kfree(features); kfree(ccw); return rc; } -static void virtio_ccw_finalize_features(struct virtio_device *vdev) +static int virtio_ccw_finalize_features(struct virtio_device *vdev) { struct virtio_ccw_device *vcdev = to_vc_device(vdev); struct virtio_feature_desc *features; - int i; struct ccw1 *ccw; + int ret; + + if (vcdev->revision >= 1 && + !__virtio_test_bit(vdev, VIRTIO_F_VERSION_1)) { + dev_err(&vdev->dev, "virtio: device uses revision 1 " + "but does not have VIRTIO_F_VERSION_1\n"); + return -EINVAL; + } ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL); if (!ccw) - return; + return -ENOMEM; features = kzalloc(sizeof(*features), GFP_DMA | GFP_KERNEL); - if (!features) + if (!features) { + ret = -ENOMEM; goto out_free; - + } /* Give virtio_ring a chance to accept features. */ vring_transport_features(vdev); - for (i = 0; i < sizeof(*vdev->features) / sizeof(features->features); - i++) { - int highbits = i % 2 ? 32 : 0; - features->index = i; - features->features = cpu_to_le32(vdev->features[i / 2] - >> highbits); - /* Write the feature bits to the host. */ - ccw->cmd_code = CCW_CMD_WRITE_FEAT; - ccw->flags = 0; - ccw->count = sizeof(*features); - ccw->cda = (__u32)(unsigned long)features; - ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_FEAT); - } + features->index = 0; + features->features = cpu_to_le32((u32)vdev->features); + /* Write the first half of the feature bits to the host. */ + ccw->cmd_code = CCW_CMD_WRITE_FEAT; + ccw->flags = 0; + ccw->count = sizeof(*features); + ccw->cda = (__u32)(unsigned long)features; + ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_FEAT); + if (ret) + goto out_free; + + if (vcdev->revision == 0) + goto out_free; + + features->index = 1; + features->features = cpu_to_le32(vdev->features >> 32); + /* Write the second half of the feature bits to the host. */ + ccw->cmd_code = CCW_CMD_WRITE_FEAT; + ccw->flags = 0; + ccw->count = sizeof(*features); + ccw->cda = (__u32)(unsigned long)features; + ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_FEAT); + out_free: kfree(features); kfree(ccw); + + return ret; } static void virtio_ccw_get_config(struct virtio_device *vdev, @@ -806,7 +881,9 @@ static u8 virtio_ccw_get_status(struct virtio_device *vdev) static void virtio_ccw_set_status(struct virtio_device *vdev, u8 status) { struct virtio_ccw_device *vcdev = to_vc_device(vdev); + u8 old_status = *vcdev->status; struct ccw1 *ccw; + int ret; ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL); if (!ccw) @@ -818,7 +895,10 @@ static void virtio_ccw_set_status(struct virtio_device *vdev, u8 status) ccw->flags = 0; ccw->count = sizeof(status); ccw->cda = (__u32)(unsigned long)vcdev->status; - ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_STATUS); + ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_STATUS); + /* Write failed? We assume status is unchanged. */ + if (ret) + *vcdev->status = old_status; kfree(ccw); } @@ -919,6 +999,7 @@ static void virtio_ccw_int_handler(struct ccw_device *cdev, case VIRTIO_CCW_DOING_RESET: case VIRTIO_CCW_DOING_READ_VQ_CONF: case VIRTIO_CCW_DOING_SET_IND_ADAPTER: + case VIRTIO_CCW_DOING_SET_VIRTIO_REV: vcdev->curr_io &= ~activity; wake_up(&vcdev->wait_q); break; @@ -1034,6 +1115,51 @@ static int virtio_ccw_offline(struct ccw_device *cdev) return 0; } +static int virtio_ccw_set_transport_rev(struct virtio_ccw_device *vcdev) +{ + struct virtio_rev_info *rev; + struct ccw1 *ccw; + int ret; + + ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL); + if (!ccw) + return -ENOMEM; + rev = kzalloc(sizeof(*rev), GFP_DMA | GFP_KERNEL); + if (!rev) { + kfree(ccw); + return -ENOMEM; + } + + /* Set transport revision */ + ccw->cmd_code = CCW_CMD_SET_VIRTIO_REV; + ccw->flags = 0; + ccw->count = sizeof(*rev); + ccw->cda = (__u32)(unsigned long)rev; + + vcdev->revision = VIRTIO_CCW_REV_MAX; + do { + rev->revision = vcdev->revision; + /* none of our supported revisions carry payload */ + rev->length = 0; + ret = ccw_io_helper(vcdev, ccw, + VIRTIO_CCW_DOING_SET_VIRTIO_REV); + if (ret == -EOPNOTSUPP) { + if (vcdev->revision == 0) + /* + * The host device does not support setting + * the revision: let's operate it in legacy + * mode. + */ + ret = 0; + else + vcdev->revision--; + } + } while (ret == -EOPNOTSUPP); + + kfree(ccw); + kfree(rev); + return ret; +} static int virtio_ccw_online(struct ccw_device *cdev) { @@ -1074,6 +1200,15 @@ static int virtio_ccw_online(struct ccw_device *cdev) spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); vcdev->vdev.id.vendor = cdev->id.cu_type; vcdev->vdev.id.device = cdev->id.cu_model; + + if (virtio_device_is_legacy_only(vcdev->vdev.id)) { + vcdev->revision = 0; + } else { + ret = virtio_ccw_set_transport_rev(vcdev); + if (ret) + goto out_free; + } + ret = register_virtio_device(&vcdev->vdev); if (ret) { dev_warn(&cdev->dev, "Failed to register virtio device: %d\n", diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 22e70126425b..c52bb5dfaedb 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -158,7 +158,7 @@ static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf) sc, resp->response, resp->status, resp->sense_len); sc->result = resp->status; - virtscsi_compute_resid(sc, resp->resid); + virtscsi_compute_resid(sc, virtio32_to_cpu(vscsi->vdev, resp->resid)); switch (resp->response) { case VIRTIO_SCSI_S_OK: set_host_byte(sc, DID_OK); @@ -196,10 +196,13 @@ static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf) break; } - WARN_ON(resp->sense_len > VIRTIO_SCSI_SENSE_SIZE); + WARN_ON(virtio32_to_cpu(vscsi->vdev, resp->sense_len) > + VIRTIO_SCSI_SENSE_SIZE); if (sc->sense_buffer) { memcpy(sc->sense_buffer, resp->sense, - min_t(u32, resp->sense_len, VIRTIO_SCSI_SENSE_SIZE)); + min_t(u32, + virtio32_to_cpu(vscsi->vdev, resp->sense_len), + VIRTIO_SCSI_SENSE_SIZE)); if (resp->sense_len) set_driver_byte(sc, DRIVER_SENSE); } @@ -323,7 +326,7 @@ static void virtscsi_handle_transport_reset(struct virtio_scsi *vscsi, unsigned int target = event->lun[1]; unsigned int lun = (event->lun[2] << 8) | event->lun[3]; - switch (event->reason) { + switch (virtio32_to_cpu(vscsi->vdev, event->reason)) { case VIRTIO_SCSI_EVT_RESET_RESCAN: scsi_add_device(shost, 0, target, lun); break; @@ -349,8 +352,8 @@ static void virtscsi_handle_param_change(struct virtio_scsi *vscsi, struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev); unsigned int target = event->lun[1]; unsigned int lun = (event->lun[2] << 8) | event->lun[3]; - u8 asc = event->reason & 255; - u8 ascq = event->reason >> 8; + u8 asc = virtio32_to_cpu(vscsi->vdev, event->reason) & 255; + u8 ascq = virtio32_to_cpu(vscsi->vdev, event->reason) >> 8; sdev = scsi_device_lookup(shost, 0, target, lun); if (!sdev) { @@ -374,12 +377,14 @@ static void virtscsi_handle_event(struct work_struct *work) struct virtio_scsi *vscsi = event_node->vscsi; struct virtio_scsi_event *event = &event_node->event; - if (event->event & VIRTIO_SCSI_T_EVENTS_MISSED) { - event->event &= ~VIRTIO_SCSI_T_EVENTS_MISSED; + if (event->event & + cpu_to_virtio32(vscsi->vdev, VIRTIO_SCSI_T_EVENTS_MISSED)) { + event->event &= ~cpu_to_virtio32(vscsi->vdev, + VIRTIO_SCSI_T_EVENTS_MISSED); scsi_scan_host(virtio_scsi_host(vscsi->vdev)); } - switch (event->event) { + switch (virtio32_to_cpu(vscsi->vdev, event->event)) { case VIRTIO_SCSI_T_NO_EVENT: break; case VIRTIO_SCSI_T_TRANSPORT_RESET: @@ -482,26 +487,28 @@ static int virtscsi_kick_cmd(struct virtio_scsi_vq *vq, return err; } -static void virtio_scsi_init_hdr(struct virtio_scsi_cmd_req *cmd, +static void virtio_scsi_init_hdr(struct virtio_device *vdev, + struct virtio_scsi_cmd_req *cmd, struct scsi_cmnd *sc) { cmd->lun[0] = 1; cmd->lun[1] = sc->device->id; cmd->lun[2] = (sc->device->lun >> 8) | 0x40; cmd->lun[3] = sc->device->lun & 0xff; - cmd->tag = (unsigned long)sc; + cmd->tag = cpu_to_virtio64(vdev, (unsigned long)sc); cmd->task_attr = VIRTIO_SCSI_S_SIMPLE; cmd->prio = 0; cmd->crn = 0; } -static void virtio_scsi_init_hdr_pi(struct virtio_scsi_cmd_req_pi *cmd_pi, +static void virtio_scsi_init_hdr_pi(struct virtio_device *vdev, + struct virtio_scsi_cmd_req_pi *cmd_pi, struct scsi_cmnd *sc) { struct request *rq = sc->request; struct blk_integrity *bi; - virtio_scsi_init_hdr((struct virtio_scsi_cmd_req *)cmd_pi, sc); + virtio_scsi_init_hdr(vdev, (struct virtio_scsi_cmd_req *)cmd_pi, sc); if (!rq || !scsi_prot_sg_count(sc)) return; @@ -509,9 +516,13 @@ static void virtio_scsi_init_hdr_pi(struct virtio_scsi_cmd_req_pi *cmd_pi, bi = blk_get_integrity(rq->rq_disk); if (sc->sc_data_direction == DMA_TO_DEVICE) - cmd_pi->pi_bytesout = blk_rq_sectors(rq) * bi->tuple_size; + cmd_pi->pi_bytesout = cpu_to_virtio32(vdev, + blk_rq_sectors(rq) * + bi->tuple_size); else if (sc->sc_data_direction == DMA_FROM_DEVICE) - cmd_pi->pi_bytesin = blk_rq_sectors(rq) * bi->tuple_size; + cmd_pi->pi_bytesin = cpu_to_virtio32(vdev, + blk_rq_sectors(rq) * + bi->tuple_size); } static int virtscsi_queuecommand(struct virtio_scsi *vscsi, @@ -536,11 +547,11 @@ static int virtscsi_queuecommand(struct virtio_scsi *vscsi, BUG_ON(sc->cmd_len > VIRTIO_SCSI_CDB_SIZE); if (virtio_has_feature(vscsi->vdev, VIRTIO_SCSI_F_T10_PI)) { - virtio_scsi_init_hdr_pi(&cmd->req.cmd_pi, sc); + virtio_scsi_init_hdr_pi(vscsi->vdev, &cmd->req.cmd_pi, sc); memcpy(cmd->req.cmd_pi.cdb, sc->cmnd, sc->cmd_len); req_size = sizeof(cmd->req.cmd_pi); } else { - virtio_scsi_init_hdr(&cmd->req.cmd, sc); + virtio_scsi_init_hdr(vscsi->vdev, &cmd->req.cmd, sc); memcpy(cmd->req.cmd.cdb, sc->cmnd, sc->cmd_len); req_size = sizeof(cmd->req.cmd); } @@ -669,7 +680,8 @@ static int virtscsi_device_reset(struct scsi_cmnd *sc) cmd->sc = sc; cmd->req.tmf = (struct virtio_scsi_ctrl_tmf_req){ .type = VIRTIO_SCSI_T_TMF, - .subtype = VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET, + .subtype = cpu_to_virtio32(vscsi->vdev, + VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET), .lun[0] = 1, .lun[1] = sc->device->id, .lun[2] = (sc->device->lun >> 8) | 0x40, @@ -710,7 +722,7 @@ static int virtscsi_abort(struct scsi_cmnd *sc) .lun[1] = sc->device->id, .lun[2] = (sc->device->lun >> 8) | 0x40, .lun[3] = sc->device->lun & 0xff, - .tag = (unsigned long)sc, + .tag = cpu_to_virtio64(vscsi->vdev, (unsigned long)sc), }; return virtscsi_tmf(vscsi, cmd); } diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 8dae2f724a35..a935c254749e 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -48,20 +48,21 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;" * status internally; used for zerocopy tx only. */ /* Lower device DMA failed */ -#define VHOST_DMA_FAILED_LEN 3 +#define VHOST_DMA_FAILED_LEN ((__force __virtio32)3) /* Lower device DMA done */ -#define VHOST_DMA_DONE_LEN 2 +#define VHOST_DMA_DONE_LEN ((__force __virtio32)2) /* Lower device DMA in progress */ -#define VHOST_DMA_IN_PROGRESS 1 +#define VHOST_DMA_IN_PROGRESS ((__force __virtio32)1) /* Buffer unused */ -#define VHOST_DMA_CLEAR_LEN 0 +#define VHOST_DMA_CLEAR_LEN ((__force __virtio32)0) -#define VHOST_DMA_IS_DONE(len) ((len) >= VHOST_DMA_DONE_LEN) +#define VHOST_DMA_IS_DONE(len) ((__force u32)(len) >= (__force u32)VHOST_DMA_DONE_LEN) enum { VHOST_NET_FEATURES = VHOST_FEATURES | (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | - (1ULL << VIRTIO_NET_F_MRG_RXBUF), + (1ULL << VIRTIO_NET_F_MRG_RXBUF) | + (1ULL << VIRTIO_F_VERSION_1), }; enum { @@ -416,7 +417,7 @@ static void handle_tx(struct vhost_net *net) struct ubuf_info *ubuf; ubuf = nvq->ubuf_info + nvq->upend_idx; - vq->heads[nvq->upend_idx].id = head; + vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head); vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS; ubuf->callback = vhost_zerocopy_callback; ubuf->ctx = nvq->ubufs; @@ -500,6 +501,10 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, int headcount = 0; unsigned d; int r, nlogs = 0; + /* len is always initialized before use since we are always called with + * datalen > 0. + */ + u32 uninitialized_var(len); while (datalen > 0 && headcount < quota) { if (unlikely(seg >= UIO_MAXIOV)) { @@ -527,13 +532,14 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, nlogs += *log_num; log += *log_num; } - heads[headcount].id = d; - heads[headcount].len = iov_length(vq->iov + seg, in); - datalen -= heads[headcount].len; + heads[headcount].id = cpu_to_vhost32(vq, d); + len = iov_length(vq->iov + seg, in); + heads[headcount].len = cpu_to_vhost32(vq, len); + datalen -= len; ++headcount; seg += in; } - heads[headcount - 1].len += datalen; + heads[headcount - 1].len = cpu_to_vhost32(vq, len - datalen); *iovcount = seg; if (unlikely(log)) *log_num = nlogs; @@ -1025,7 +1031,8 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features) size_t vhost_hlen, sock_hlen, hdr_len; int i; - hdr_len = (features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ? + hdr_len = (features & ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | + (1ULL << VIRTIO_F_VERSION_1))) ? sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr); if (features & (1 << VHOST_NET_F_VIRTIO_NET_HDR)) { diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index a17f11850669..01c01cb3933f 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -168,6 +168,7 @@ enum { VHOST_SCSI_VQ_IO = 2, }; +/* Note: can't set VIRTIO_F_VERSION_1 yet, since that implies ANY_LAYOUT. */ enum { VHOST_SCSI_FEATURES = VHOST_FEATURES | (1ULL << VIRTIO_SCSI_F_HOTPLUG) | (1ULL << VIRTIO_SCSI_F_T10_PI) @@ -577,8 +578,8 @@ tcm_vhost_allocate_evt(struct vhost_scsi *vs, return NULL; } - evt->event.event = event; - evt->event.reason = reason; + evt->event.event = cpu_to_vhost32(vq, event); + evt->event.reason = cpu_to_vhost32(vq, reason); vs->vs_events_nr++; return evt; @@ -636,7 +637,7 @@ again: } if (vs->vs_events_missed) { - event->event |= VIRTIO_SCSI_T_EVENTS_MISSED; + event->event |= cpu_to_vhost32(vq, VIRTIO_SCSI_T_EVENTS_MISSED); vs->vs_events_missed = false; } @@ -695,12 +696,13 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) cmd, se_cmd->residual_count, se_cmd->scsi_status); memset(&v_rsp, 0, sizeof(v_rsp)); - v_rsp.resid = se_cmd->residual_count; + v_rsp.resid = cpu_to_vhost32(cmd->tvc_vq, se_cmd->residual_count); /* TODO is status_qualifier field needed? */ v_rsp.status = se_cmd->scsi_status; - v_rsp.sense_len = se_cmd->scsi_sense_length; + v_rsp.sense_len = cpu_to_vhost32(cmd->tvc_vq, + se_cmd->scsi_sense_length); memcpy(v_rsp.sense, cmd->tvc_sense_buf, - v_rsp.sense_len); + se_cmd->scsi_sense_length); ret = copy_to_user(cmd->tvc_resp, &v_rsp, sizeof(v_rsp)); if (likely(ret == 0)) { struct vhost_scsi_virtqueue *q; @@ -1095,14 +1097,14 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) ", but wrong data_direction\n"); goto err_cmd; } - prot_bytes = v_req_pi.pi_bytesout; + prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesout); } else if (v_req_pi.pi_bytesin) { if (data_direction != DMA_FROM_DEVICE) { vq_err(vq, "Received non zero di_pi_niov" ", but wrong data_direction\n"); goto err_cmd; } - prot_bytes = v_req_pi.pi_bytesin; + prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesin); } if (prot_bytes) { int tmp = 0; @@ -1117,12 +1119,12 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) data_first += prot_niov; data_niov = data_num - prot_niov; } - tag = v_req_pi.tag; + tag = vhost64_to_cpu(vq, v_req_pi.tag); task_attr = v_req_pi.task_attr; cdb = &v_req_pi.cdb[0]; lun = ((v_req_pi.lun[2] << 8) | v_req_pi.lun[3]) & 0x3FFF; } else { - tag = v_req.tag; + tag = vhost64_to_cpu(vq, v_req.tag); task_attr = v_req.task_attr; cdb = &v_req.cdb[0]; lun = ((v_req.lun[2] << 8) | v_req.lun[3]) & 0x3FFF; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index c90f4374442a..ed71b5347a76 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -33,8 +33,8 @@ enum { VHOST_MEMORY_F_LOG = 0x1, }; -#define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num]) -#define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num]) +#define vhost_used_event(vq) ((__virtio16 __user *)&vq->avail->ring[vq->num]) +#define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num]) static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, poll_table *pt) @@ -1001,7 +1001,7 @@ EXPORT_SYMBOL_GPL(vhost_log_write); static int vhost_update_used_flags(struct vhost_virtqueue *vq) { void __user *used; - if (__put_user(vq->used_flags, &vq->used->flags) < 0) + if (__put_user(cpu_to_vhost16(vq, vq->used_flags), &vq->used->flags) < 0) return -EFAULT; if (unlikely(vq->log_used)) { /* Make sure the flag is seen before log. */ @@ -1019,7 +1019,7 @@ static int vhost_update_used_flags(struct vhost_virtqueue *vq) static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) { - if (__put_user(vq->avail_idx, vhost_avail_event(vq))) + if (__put_user(cpu_to_vhost16(vq, vq->avail_idx), vhost_avail_event(vq))) return -EFAULT; if (unlikely(vq->log_used)) { void __user *used; @@ -1038,6 +1038,7 @@ static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) int vhost_init_used(struct vhost_virtqueue *vq) { + __virtio16 last_used_idx; int r; if (!vq->private_data) return 0; @@ -1046,7 +1047,13 @@ int vhost_init_used(struct vhost_virtqueue *vq) if (r) return r; vq->signalled_used_valid = false; - return get_user(vq->last_used_idx, &vq->used->idx); + if (!access_ok(VERIFY_READ, &vq->used->idx, sizeof vq->used->idx)) + return -EFAULT; + r = __get_user(last_used_idx, &vq->used->idx); + if (r) + return r; + vq->last_used_idx = vhost16_to_cpu(vq, last_used_idx); + return 0; } EXPORT_SYMBOL_GPL(vhost_init_used); @@ -1087,16 +1094,16 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, /* Each buffer in the virtqueues is actually a chain of descriptors. This * function returns the next descriptor in the chain, * or -1U if we're at the end. */ -static unsigned next_desc(struct vring_desc *desc) +static unsigned next_desc(struct vhost_virtqueue *vq, struct vring_desc *desc) { unsigned int next; /* If this descriptor says it doesn't chain, we're done. */ - if (!(desc->flags & VRING_DESC_F_NEXT)) + if (!(desc->flags & cpu_to_vhost16(vq, VRING_DESC_F_NEXT))) return -1U; /* Check they're not leading us off end of descriptors. */ - next = desc->next; + next = vhost16_to_cpu(vq, desc->next); /* Make sure compiler knows to grab that: we don't want it changing! */ /* We will use the result as an index in an array, so most * architectures only need a compiler barrier here. */ @@ -1113,18 +1120,19 @@ static int get_indirect(struct vhost_virtqueue *vq, { struct vring_desc desc; unsigned int i = 0, count, found = 0; + u32 len = vhost32_to_cpu(vq, indirect->len); int ret; /* Sanity check */ - if (unlikely(indirect->len % sizeof desc)) { + if (unlikely(len % sizeof desc)) { vq_err(vq, "Invalid length in indirect descriptor: " "len 0x%llx not multiple of 0x%zx\n", - (unsigned long long)indirect->len, + (unsigned long long)len, sizeof desc); return -EINVAL; } - ret = translate_desc(vq, indirect->addr, indirect->len, vq->indirect, + ret = translate_desc(vq, vhost64_to_cpu(vq, indirect->addr), len, vq->indirect, UIO_MAXIOV); if (unlikely(ret < 0)) { vq_err(vq, "Translation failure %d in indirect.\n", ret); @@ -1135,7 +1143,7 @@ static int get_indirect(struct vhost_virtqueue *vq, * architectures only need a compiler barrier here. */ read_barrier_depends(); - count = indirect->len / sizeof desc; + count = len / sizeof desc; /* Buffers are chained via a 16 bit next field, so * we can have at most 2^16 of these. */ if (unlikely(count > USHRT_MAX + 1)) { @@ -1155,16 +1163,17 @@ static int get_indirect(struct vhost_virtqueue *vq, if (unlikely(memcpy_fromiovec((unsigned char *)&desc, vq->indirect, sizeof desc))) { vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n", - i, (size_t)indirect->addr + i * sizeof desc); + i, (size_t)vhost64_to_cpu(vq, indirect->addr) + i * sizeof desc); return -EINVAL; } - if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) { + if (unlikely(desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_INDIRECT))) { vq_err(vq, "Nested indirect descriptor: idx %d, %zx\n", - i, (size_t)indirect->addr + i * sizeof desc); + i, (size_t)vhost64_to_cpu(vq, indirect->addr) + i * sizeof desc); return -EINVAL; } - ret = translate_desc(vq, desc.addr, desc.len, iov + iov_count, + ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr), + vhost32_to_cpu(vq, desc.len), iov + iov_count, iov_size - iov_count); if (unlikely(ret < 0)) { vq_err(vq, "Translation failure %d indirect idx %d\n", @@ -1172,11 +1181,11 @@ static int get_indirect(struct vhost_virtqueue *vq, return ret; } /* If this is an input descriptor, increment that count. */ - if (desc.flags & VRING_DESC_F_WRITE) { + if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) { *in_num += ret; if (unlikely(log)) { - log[*log_num].addr = desc.addr; - log[*log_num].len = desc.len; + log[*log_num].addr = vhost64_to_cpu(vq, desc.addr); + log[*log_num].len = vhost32_to_cpu(vq, desc.len); ++*log_num; } } else { @@ -1189,7 +1198,7 @@ static int get_indirect(struct vhost_virtqueue *vq, } *out_num += ret; } - } while ((i = next_desc(&desc)) != -1); + } while ((i = next_desc(vq, &desc)) != -1); return 0; } @@ -1209,15 +1218,18 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, struct vring_desc desc; unsigned int i, head, found = 0; u16 last_avail_idx; + __virtio16 avail_idx; + __virtio16 ring_head; int ret; /* Check it isn't doing very strange things with descriptor numbers. */ last_avail_idx = vq->last_avail_idx; - if (unlikely(__get_user(vq->avail_idx, &vq->avail->idx))) { + if (unlikely(__get_user(avail_idx, &vq->avail->idx))) { vq_err(vq, "Failed to access avail idx at %p\n", &vq->avail->idx); return -EFAULT; } + vq->avail_idx = vhost16_to_cpu(vq, avail_idx); if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) { vq_err(vq, "Guest moved used index from %u to %u", @@ -1234,7 +1246,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, /* Grab the next descriptor number they're advertising, and increment * the index we've seen. */ - if (unlikely(__get_user(head, + if (unlikely(__get_user(ring_head, &vq->avail->ring[last_avail_idx % vq->num]))) { vq_err(vq, "Failed to read head: idx %d address %p\n", last_avail_idx, @@ -1242,6 +1254,8 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, return -EFAULT; } + head = vhost16_to_cpu(vq, ring_head); + /* If their number is silly, that's an error. */ if (unlikely(head >= vq->num)) { vq_err(vq, "Guest says index %u > %u is available", @@ -1274,7 +1288,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, i, vq->desc + i); return -EFAULT; } - if (desc.flags & VRING_DESC_F_INDIRECT) { + if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_INDIRECT)) { ret = get_indirect(vq, iov, iov_size, out_num, in_num, log, log_num, &desc); @@ -1286,20 +1300,21 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, continue; } - ret = translate_desc(vq, desc.addr, desc.len, iov + iov_count, + ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr), + vhost32_to_cpu(vq, desc.len), iov + iov_count, iov_size - iov_count); if (unlikely(ret < 0)) { vq_err(vq, "Translation failure %d descriptor idx %d\n", ret, i); return ret; } - if (desc.flags & VRING_DESC_F_WRITE) { + if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) { /* If this is an input descriptor, * increment that count. */ *in_num += ret; if (unlikely(log)) { - log[*log_num].addr = desc.addr; - log[*log_num].len = desc.len; + log[*log_num].addr = vhost64_to_cpu(vq, desc.addr); + log[*log_num].len = vhost32_to_cpu(vq, desc.len); ++*log_num; } } else { @@ -1312,7 +1327,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, } *out_num += ret; } - } while ((i = next_desc(&desc)) != -1); + } while ((i = next_desc(vq, &desc)) != -1); /* On success, increment avail index. */ vq->last_avail_idx++; @@ -1335,7 +1350,10 @@ EXPORT_SYMBOL_GPL(vhost_discard_vq_desc); * want to notify the guest, using eventfd. */ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) { - struct vring_used_elem heads = { head, len }; + struct vring_used_elem heads = { + cpu_to_vhost32(vq, head), + cpu_to_vhost32(vq, len) + }; return vhost_add_used_n(vq, &heads, 1); } @@ -1404,7 +1422,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, /* Make sure buffer is written before we update index. */ smp_wmb(); - if (put_user(vq->last_used_idx, &vq->used->idx)) { + if (__put_user(cpu_to_vhost16(vq, vq->last_used_idx), &vq->used->idx)) { vq_err(vq, "Failed to increment used idx"); return -EFAULT; } @@ -1422,7 +1440,8 @@ EXPORT_SYMBOL_GPL(vhost_add_used_n); static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { - __u16 old, new, event; + __u16 old, new; + __virtio16 event; bool v; /* Flush out used index updates. This is paired * with the barrier that the Guest executes when enabling @@ -1434,12 +1453,12 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) return true; if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { - __u16 flags; + __virtio16 flags; if (__get_user(flags, &vq->avail->flags)) { vq_err(vq, "Failed to get flags"); return true; } - return !(flags & VRING_AVAIL_F_NO_INTERRUPT); + return !(flags & cpu_to_vhost16(vq, VRING_AVAIL_F_NO_INTERRUPT)); } old = vq->signalled_used; v = vq->signalled_used_valid; @@ -1449,11 +1468,11 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) if (unlikely(!v)) return true; - if (get_user(event, vhost_used_event(vq))) { + if (__get_user(event, vhost_used_event(vq))) { vq_err(vq, "Failed to get used event idx"); return true; } - return vring_need_event(event, new, old); + return vring_need_event(vhost16_to_cpu(vq, event), new, old); } /* This actually signals the guest, using eventfd. */ @@ -1488,7 +1507,7 @@ EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n); /* OK, now we need to know about added descriptors. */ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { - u16 avail_idx; + __virtio16 avail_idx; int r; if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) @@ -1519,7 +1538,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) return false; } - return avail_idx != vq->avail_idx; + return vhost16_to_cpu(vq, avail_idx) != vq->avail_idx; } EXPORT_SYMBOL_GPL(vhost_enable_notify); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 3eda654b8f5a..8c1c792900ba 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -12,8 +12,6 @@ #include <linux/virtio_ring.h> #include <linux/atomic.h> -struct vhost_device; - struct vhost_work; typedef void (*vhost_work_fn_t)(struct vhost_work *work); @@ -54,8 +52,6 @@ struct vhost_log { u64 len; }; -struct vhost_virtqueue; - /* The virtqueue structure describes a queue attached to a device. */ struct vhost_virtqueue { struct vhost_dev *dev; @@ -106,7 +102,7 @@ struct vhost_virtqueue { /* Protected by virtqueue mutex. */ struct vhost_memory *memory; void *private_data; - unsigned acked_features; + u64 acked_features; /* Log write descriptors */ void __user *log_base; struct vhost_log *log; @@ -172,8 +168,39 @@ enum { (1ULL << VHOST_F_LOG_ALL), }; -static inline int vhost_has_feature(struct vhost_virtqueue *vq, int bit) +static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit) +{ + return vq->acked_features & (1ULL << bit); +} + +/* Memory accessors */ +static inline u16 vhost16_to_cpu(struct vhost_virtqueue *vq, __virtio16 val) +{ + return __virtio16_to_cpu(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val); +} + +static inline __virtio16 cpu_to_vhost16(struct vhost_virtqueue *vq, u16 val) +{ + return __cpu_to_virtio16(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val); +} + +static inline u32 vhost32_to_cpu(struct vhost_virtqueue *vq, __virtio32 val) +{ + return __virtio32_to_cpu(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val); +} + +static inline __virtio32 cpu_to_vhost32(struct vhost_virtqueue *vq, u32 val) +{ + return __cpu_to_virtio32(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val); +} + +static inline u64 vhost64_to_cpu(struct vhost_virtqueue *vq, __virtio64 val) +{ + return __virtio64_to_cpu(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val); +} + +static inline __virtio64 cpu_to_vhost64(struct vhost_virtqueue *vq, u64 val) { - return vq->acked_features & (1 << bit); + return __cpu_to_virtio64(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val); } #endif diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile index 9076635697bb..bf5104b56894 100644 --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile @@ -1,4 +1,5 @@ obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o +virtio_pci-y := virtio_pci_legacy.o virtio_pci_common.o obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index df598dd8c5c8..f22665868781 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -3,6 +3,7 @@ #include <linux/virtio_config.h> #include <linux/module.h> #include <linux/idr.h> +#include <uapi/linux/virtio_ids.h> /* Unique numbering for virtio devices. */ static DEFINE_IDA(virtio_index_ida); @@ -49,9 +50,9 @@ static ssize_t features_show(struct device *_d, /* We actually represent this as a bitstring, as it could be * arbitrary length in future. */ - for (i = 0; i < ARRAY_SIZE(dev->features)*BITS_PER_LONG; i++) + for (i = 0; i < sizeof(dev->features)*8; i++) len += sprintf(buf+len, "%c", - test_bit(i, dev->features) ? '1' : '0'); + __virtio_test_bit(dev, i) ? '1' : '0'); len += sprintf(buf+len, "\n"); return len; } @@ -113,6 +114,13 @@ void virtio_check_driver_offered_feature(const struct virtio_device *vdev, for (i = 0; i < drv->feature_table_size; i++) if (drv->feature_table[i] == fbit) return; + + if (drv->feature_table_legacy) { + for (i = 0; i < drv->feature_table_size_legacy; i++) + if (drv->feature_table_legacy[i] == fbit) + return; + } + BUG(); } EXPORT_SYMBOL_GPL(virtio_check_driver_offered_feature); @@ -159,7 +167,10 @@ static int virtio_dev_probe(struct device *_d) int err, i; struct virtio_device *dev = dev_to_virtio(_d); struct virtio_driver *drv = drv_to_virtio(dev->dev.driver); - u32 device_features; + u64 device_features; + u64 driver_features; + u64 driver_features_legacy; + unsigned status; /* We have a driver! */ add_status(dev, VIRTIO_CONFIG_S_DRIVER); @@ -167,34 +178,66 @@ static int virtio_dev_probe(struct device *_d) /* Figure out what features the device supports. */ device_features = dev->config->get_features(dev); - /* Features supported by both device and driver into dev->features. */ - memset(dev->features, 0, sizeof(dev->features)); + /* Figure out what features the driver supports. */ + driver_features = 0; for (i = 0; i < drv->feature_table_size; i++) { unsigned int f = drv->feature_table[i]; - BUG_ON(f >= 32); - if (device_features & (1 << f)) - set_bit(f, dev->features); + BUG_ON(f >= 64); + driver_features |= (1ULL << f); + } + + /* Some drivers have a separate feature table for virtio v1.0 */ + if (drv->feature_table_legacy) { + driver_features_legacy = 0; + for (i = 0; i < drv->feature_table_size_legacy; i++) { + unsigned int f = drv->feature_table_legacy[i]; + BUG_ON(f >= 64); + driver_features_legacy |= (1ULL << f); + } + } else { + driver_features_legacy = driver_features; } + if (device_features & (1ULL << VIRTIO_F_VERSION_1)) + dev->features = driver_features & device_features; + else + dev->features = driver_features_legacy & device_features; + /* Transport features always preserved to pass to finalize_features. */ for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) - if (device_features & (1 << i)) - set_bit(i, dev->features); + if (device_features & (1ULL << i)) + __virtio_set_bit(dev, i); - dev->config->finalize_features(dev); + err = dev->config->finalize_features(dev); + if (err) + goto err; + + if (virtio_has_feature(dev, VIRTIO_F_VERSION_1)) { + add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); + status = dev->config->get_status(dev); + if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) { + dev_err(_d, "virtio: device refuses features: %x\n", + status); + err = -ENODEV; + goto err; + } + } err = drv->probe(dev); if (err) - add_status(dev, VIRTIO_CONFIG_S_FAILED); - else { - add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); - if (drv->scan) - drv->scan(dev); + goto err; - virtio_config_enable(dev); - } + add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); + if (drv->scan) + drv->scan(dev); + + virtio_config_enable(dev); + return 0; +err: + add_status(dev, VIRTIO_CONFIG_S_FAILED); return err; + } static int virtio_dev_remove(struct device *_d) @@ -223,6 +266,12 @@ static struct bus_type virtio_bus = { .remove = virtio_dev_remove, }; +bool virtio_device_is_legacy_only(struct virtio_device_id id) +{ + return id.device == VIRTIO_ID_BALLOON; +} +EXPORT_SYMBOL_GPL(virtio_device_is_legacy_only); + int register_virtio_driver(struct virtio_driver *driver) { /* Catch this early. */ @@ -303,6 +352,7 @@ EXPORT_SYMBOL_GPL(virtio_device_freeze); int virtio_device_restore(struct virtio_device *dev) { struct virtio_driver *drv = drv_to_virtio(dev->dev.driver); + int ret; /* We always start by resetting the device, in case a previous * driver messed it up. */ @@ -322,14 +372,14 @@ int virtio_device_restore(struct virtio_device *dev) /* We have a driver! */ add_status(dev, VIRTIO_CONFIG_S_DRIVER); - dev->config->finalize_features(dev); + ret = dev->config->finalize_features(dev); + if (ret) + goto err; if (drv->restore) { - int ret = drv->restore(dev); - if (ret) { - add_status(dev, VIRTIO_CONFIG_S_FAILED); - return ret; - } + ret = drv->restore(dev); + if (ret) + goto err; } /* Finally, tell the device we're all set */ @@ -338,6 +388,10 @@ int virtio_device_restore(struct virtio_device *dev) virtio_config_enable(dev); return 0; + +err: + add_status(dev, VIRTIO_CONFIG_S_FAILED); + return ret; } EXPORT_SYMBOL_GPL(virtio_device_restore); #endif diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index ef9a1650bb80..5219210d31ce 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -142,7 +142,7 @@ struct virtio_mmio_vq_info { /* Configuration interface */ -static u32 vm_get_features(struct virtio_device *vdev) +static u64 vm_get_features(struct virtio_device *vdev) { struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); @@ -152,19 +152,20 @@ static u32 vm_get_features(struct virtio_device *vdev) return readl(vm_dev->base + VIRTIO_MMIO_HOST_FEATURES); } -static void vm_finalize_features(struct virtio_device *vdev) +static int vm_finalize_features(struct virtio_device *vdev) { struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); - int i; /* Give virtio_ring a chance to accept features. */ vring_transport_features(vdev); - for (i = 0; i < ARRAY_SIZE(vdev->features); i++) { - writel(i, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES_SEL); - writel(vdev->features[i], - vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES); - } + /* Make sure we don't have any features > 32 bits! */ + BUG_ON((u32)vdev->features != vdev->features); + + writel(0, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES_SEL); + writel(vdev->features, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES); + + return 0; } static void vm_get(struct virtio_device *vdev, unsigned offset, diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c deleted file mode 100644 index d34ebfa604f3..000000000000 --- a/drivers/virtio/virtio_pci.c +++ /dev/null @@ -1,802 +0,0 @@ -/* - * Virtio PCI driver - * - * This module allows virtio devices to be used over a virtual PCI device. - * This can be used with QEMU based VMMs like KVM or Xen. - * - * Copyright IBM Corp. 2007 - * - * Authors: - * Anthony Liguori <aliguori@us.ibm.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#include <linux/module.h> -#include <linux/list.h> -#include <linux/pci.h> -#include <linux/slab.h> -#include <linux/interrupt.h> -#include <linux/virtio.h> -#include <linux/virtio_config.h> -#include <linux/virtio_ring.h> -#include <linux/virtio_pci.h> -#include <linux/highmem.h> -#include <linux/spinlock.h> - -MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>"); -MODULE_DESCRIPTION("virtio-pci"); -MODULE_LICENSE("GPL"); -MODULE_VERSION("1"); - -/* Our device structure */ -struct virtio_pci_device -{ - struct virtio_device vdev; - struct pci_dev *pci_dev; - - /* the IO mapping for the PCI config space */ - void __iomem *ioaddr; - - /* a list of queues so we can dispatch IRQs */ - spinlock_t lock; - struct list_head virtqueues; - - /* MSI-X support */ - int msix_enabled; - int intx_enabled; - struct msix_entry *msix_entries; - cpumask_var_t *msix_affinity_masks; - /* Name strings for interrupts. This size should be enough, - * and I'm too lazy to allocate each name separately. */ - char (*msix_names)[256]; - /* Number of available vectors */ - unsigned msix_vectors; - /* Vectors allocated, excluding per-vq vectors if any */ - unsigned msix_used_vectors; - - /* Whether we have vector per vq */ - bool per_vq_vectors; -}; - -/* Constants for MSI-X */ -/* Use first vector for configuration changes, second and the rest for - * virtqueues Thus, we need at least 2 vectors for MSI. */ -enum { - VP_MSIX_CONFIG_VECTOR = 0, - VP_MSIX_VQ_VECTOR = 1, -}; - -struct virtio_pci_vq_info -{ - /* the actual virtqueue */ - struct virtqueue *vq; - - /* the number of entries in the queue */ - int num; - - /* the virtual address of the ring queue */ - void *queue; - - /* the list node for the virtqueues list */ - struct list_head node; - - /* MSI-X vector (or none) */ - unsigned msix_vector; -}; - -/* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */ -static const struct pci_device_id virtio_pci_id_table[] = { - { PCI_DEVICE(0x1af4, PCI_ANY_ID) }, - { 0 } -}; - -MODULE_DEVICE_TABLE(pci, virtio_pci_id_table); - -/* Convert a generic virtio device to our structure */ -static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev) -{ - return container_of(vdev, struct virtio_pci_device, vdev); -} - -/* virtio config->get_features() implementation */ -static u32 vp_get_features(struct virtio_device *vdev) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - - /* When someone needs more than 32 feature bits, we'll need to - * steal a bit to indicate that the rest are somewhere else. */ - return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES); -} - -/* virtio config->finalize_features() implementation */ -static void vp_finalize_features(struct virtio_device *vdev) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - - /* Give virtio_ring a chance to accept features. */ - vring_transport_features(vdev); - - /* We only support 32 feature bits. */ - BUILD_BUG_ON(ARRAY_SIZE(vdev->features) != 1); - iowrite32(vdev->features[0], vp_dev->ioaddr+VIRTIO_PCI_GUEST_FEATURES); -} - -/* virtio config->get() implementation */ -static void vp_get(struct virtio_device *vdev, unsigned offset, - void *buf, unsigned len) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - void __iomem *ioaddr = vp_dev->ioaddr + - VIRTIO_PCI_CONFIG(vp_dev) + offset; - u8 *ptr = buf; - int i; - - for (i = 0; i < len; i++) - ptr[i] = ioread8(ioaddr + i); -} - -/* the config->set() implementation. it's symmetric to the config->get() - * implementation */ -static void vp_set(struct virtio_device *vdev, unsigned offset, - const void *buf, unsigned len) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - void __iomem *ioaddr = vp_dev->ioaddr + - VIRTIO_PCI_CONFIG(vp_dev) + offset; - const u8 *ptr = buf; - int i; - - for (i = 0; i < len; i++) - iowrite8(ptr[i], ioaddr + i); -} - -/* config->{get,set}_status() implementations */ -static u8 vp_get_status(struct virtio_device *vdev) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - return ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS); -} - -static void vp_set_status(struct virtio_device *vdev, u8 status) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - /* We should never be setting status to 0. */ - BUG_ON(status == 0); - iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS); -} - -/* wait for pending irq handlers */ -static void vp_synchronize_vectors(struct virtio_device *vdev) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - int i; - - if (vp_dev->intx_enabled) - synchronize_irq(vp_dev->pci_dev->irq); - - for (i = 0; i < vp_dev->msix_vectors; ++i) - synchronize_irq(vp_dev->msix_entries[i].vector); -} - -static void vp_reset(struct virtio_device *vdev) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - /* 0 status means a reset. */ - iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS); - /* Flush out the status write, and flush in device writes, - * including MSi-X interrupts, if any. */ - ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS); - /* Flush pending VQ/configuration callbacks. */ - vp_synchronize_vectors(vdev); -} - -/* the notify function used when creating a virt queue */ -static bool vp_notify(struct virtqueue *vq) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); - - /* we write the queue's selector into the notification register to - * signal the other end */ - iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY); - return true; -} - -/* Handle a configuration change: Tell driver if it wants to know. */ -static irqreturn_t vp_config_changed(int irq, void *opaque) -{ - struct virtio_pci_device *vp_dev = opaque; - - virtio_config_changed(&vp_dev->vdev); - return IRQ_HANDLED; -} - -/* Notify all virtqueues on an interrupt. */ -static irqreturn_t vp_vring_interrupt(int irq, void *opaque) -{ - struct virtio_pci_device *vp_dev = opaque; - struct virtio_pci_vq_info *info; - irqreturn_t ret = IRQ_NONE; - unsigned long flags; - - spin_lock_irqsave(&vp_dev->lock, flags); - list_for_each_entry(info, &vp_dev->virtqueues, node) { - if (vring_interrupt(irq, info->vq) == IRQ_HANDLED) - ret = IRQ_HANDLED; - } - spin_unlock_irqrestore(&vp_dev->lock, flags); - - return ret; -} - -/* A small wrapper to also acknowledge the interrupt when it's handled. - * I really need an EIO hook for the vring so I can ack the interrupt once we - * know that we'll be handling the IRQ but before we invoke the callback since - * the callback may notify the host which results in the host attempting to - * raise an interrupt that we would then mask once we acknowledged the - * interrupt. */ -static irqreturn_t vp_interrupt(int irq, void *opaque) -{ - struct virtio_pci_device *vp_dev = opaque; - u8 isr; - - /* reading the ISR has the effect of also clearing it so it's very - * important to save off the value. */ - isr = ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR); - - /* It's definitely not us if the ISR was not high */ - if (!isr) - return IRQ_NONE; - - /* Configuration change? Tell driver if it wants to know. */ - if (isr & VIRTIO_PCI_ISR_CONFIG) - vp_config_changed(irq, opaque); - - return vp_vring_interrupt(irq, opaque); -} - -static void vp_free_vectors(struct virtio_device *vdev) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - int i; - - if (vp_dev->intx_enabled) { - free_irq(vp_dev->pci_dev->irq, vp_dev); - vp_dev->intx_enabled = 0; - } - - for (i = 0; i < vp_dev->msix_used_vectors; ++i) - free_irq(vp_dev->msix_entries[i].vector, vp_dev); - - for (i = 0; i < vp_dev->msix_vectors; i++) - if (vp_dev->msix_affinity_masks[i]) - free_cpumask_var(vp_dev->msix_affinity_masks[i]); - - if (vp_dev->msix_enabled) { - /* Disable the vector used for configuration */ - iowrite16(VIRTIO_MSI_NO_VECTOR, - vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); - /* Flush the write out to device */ - ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); - - pci_disable_msix(vp_dev->pci_dev); - vp_dev->msix_enabled = 0; - } - - vp_dev->msix_vectors = 0; - vp_dev->msix_used_vectors = 0; - kfree(vp_dev->msix_names); - vp_dev->msix_names = NULL; - kfree(vp_dev->msix_entries); - vp_dev->msix_entries = NULL; - kfree(vp_dev->msix_affinity_masks); - vp_dev->msix_affinity_masks = NULL; -} - -static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, - bool per_vq_vectors) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - const char *name = dev_name(&vp_dev->vdev.dev); - unsigned i, v; - int err = -ENOMEM; - - vp_dev->msix_vectors = nvectors; - - vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries, - GFP_KERNEL); - if (!vp_dev->msix_entries) - goto error; - vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names, - GFP_KERNEL); - if (!vp_dev->msix_names) - goto error; - vp_dev->msix_affinity_masks - = kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks, - GFP_KERNEL); - if (!vp_dev->msix_affinity_masks) - goto error; - for (i = 0; i < nvectors; ++i) - if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i], - GFP_KERNEL)) - goto error; - - for (i = 0; i < nvectors; ++i) - vp_dev->msix_entries[i].entry = i; - - err = pci_enable_msix_exact(vp_dev->pci_dev, - vp_dev->msix_entries, nvectors); - if (err) - goto error; - vp_dev->msix_enabled = 1; - - /* Set the vector used for configuration */ - v = vp_dev->msix_used_vectors; - snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, - "%s-config", name); - err = request_irq(vp_dev->msix_entries[v].vector, - vp_config_changed, 0, vp_dev->msix_names[v], - vp_dev); - if (err) - goto error; - ++vp_dev->msix_used_vectors; - - iowrite16(v, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); - /* Verify we had enough resources to assign the vector */ - v = ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); - if (v == VIRTIO_MSI_NO_VECTOR) { - err = -EBUSY; - goto error; - } - - if (!per_vq_vectors) { - /* Shared vector for all VQs */ - v = vp_dev->msix_used_vectors; - snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, - "%s-virtqueues", name); - err = request_irq(vp_dev->msix_entries[v].vector, - vp_vring_interrupt, 0, vp_dev->msix_names[v], - vp_dev); - if (err) - goto error; - ++vp_dev->msix_used_vectors; - } - return 0; -error: - vp_free_vectors(vdev); - return err; -} - -static int vp_request_intx(struct virtio_device *vdev) -{ - int err; - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - - err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, - IRQF_SHARED, dev_name(&vdev->dev), vp_dev); - if (!err) - vp_dev->intx_enabled = 1; - return err; -} - -static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index, - void (*callback)(struct virtqueue *vq), - const char *name, - u16 msix_vec) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - struct virtio_pci_vq_info *info; - struct virtqueue *vq; - unsigned long flags, size; - u16 num; - int err; - - /* Select the queue we're interested in */ - iowrite16(index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); - - /* Check if queue is either not available or already active. */ - num = ioread16(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NUM); - if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN)) - return ERR_PTR(-ENOENT); - - /* allocate and fill out our structure the represents an active - * queue */ - info = kmalloc(sizeof(struct virtio_pci_vq_info), GFP_KERNEL); - if (!info) - return ERR_PTR(-ENOMEM); - - info->num = num; - info->msix_vector = msix_vec; - - size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN)); - info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO); - if (info->queue == NULL) { - err = -ENOMEM; - goto out_info; - } - - /* activate the queue */ - iowrite32(virt_to_phys(info->queue) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT, - vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); - - /* create the vring */ - vq = vring_new_virtqueue(index, info->num, VIRTIO_PCI_VRING_ALIGN, vdev, - true, info->queue, vp_notify, callback, name); - if (!vq) { - err = -ENOMEM; - goto out_activate_queue; - } - - vq->priv = info; - info->vq = vq; - - if (msix_vec != VIRTIO_MSI_NO_VECTOR) { - iowrite16(msix_vec, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); - msix_vec = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); - if (msix_vec == VIRTIO_MSI_NO_VECTOR) { - err = -EBUSY; - goto out_assign; - } - } - - if (callback) { - spin_lock_irqsave(&vp_dev->lock, flags); - list_add(&info->node, &vp_dev->virtqueues); - spin_unlock_irqrestore(&vp_dev->lock, flags); - } else { - INIT_LIST_HEAD(&info->node); - } - - return vq; - -out_assign: - vring_del_virtqueue(vq); -out_activate_queue: - iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); - free_pages_exact(info->queue, size); -out_info: - kfree(info); - return ERR_PTR(err); -} - -static void vp_del_vq(struct virtqueue *vq) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); - struct virtio_pci_vq_info *info = vq->priv; - unsigned long flags, size; - - spin_lock_irqsave(&vp_dev->lock, flags); - list_del(&info->node); - spin_unlock_irqrestore(&vp_dev->lock, flags); - - iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); - - if (vp_dev->msix_enabled) { - iowrite16(VIRTIO_MSI_NO_VECTOR, - vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); - /* Flush the write out to device */ - ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR); - } - - vring_del_virtqueue(vq); - - /* Select and deactivate the queue */ - iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); - - size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN)); - free_pages_exact(info->queue, size); - kfree(info); -} - -/* the config->del_vqs() implementation */ -static void vp_del_vqs(struct virtio_device *vdev) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - struct virtqueue *vq, *n; - struct virtio_pci_vq_info *info; - - list_for_each_entry_safe(vq, n, &vdev->vqs, list) { - info = vq->priv; - if (vp_dev->per_vq_vectors && - info->msix_vector != VIRTIO_MSI_NO_VECTOR) - free_irq(vp_dev->msix_entries[info->msix_vector].vector, - vq); - vp_del_vq(vq); - } - vp_dev->per_vq_vectors = false; - - vp_free_vectors(vdev); -} - -static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], - const char *names[], - bool use_msix, - bool per_vq_vectors) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - u16 msix_vec; - int i, err, nvectors, allocated_vectors; - - if (!use_msix) { - /* Old style: one normal interrupt for change and all vqs. */ - err = vp_request_intx(vdev); - if (err) - goto error_request; - } else { - if (per_vq_vectors) { - /* Best option: one for change interrupt, one per vq. */ - nvectors = 1; - for (i = 0; i < nvqs; ++i) - if (callbacks[i]) - ++nvectors; - } else { - /* Second best: one for change, shared for all vqs. */ - nvectors = 2; - } - - err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors); - if (err) - goto error_request; - } - - vp_dev->per_vq_vectors = per_vq_vectors; - allocated_vectors = vp_dev->msix_used_vectors; - for (i = 0; i < nvqs; ++i) { - if (!names[i]) { - vqs[i] = NULL; - continue; - } else if (!callbacks[i] || !vp_dev->msix_enabled) - msix_vec = VIRTIO_MSI_NO_VECTOR; - else if (vp_dev->per_vq_vectors) - msix_vec = allocated_vectors++; - else - msix_vec = VP_MSIX_VQ_VECTOR; - vqs[i] = setup_vq(vdev, i, callbacks[i], names[i], msix_vec); - if (IS_ERR(vqs[i])) { - err = PTR_ERR(vqs[i]); - goto error_find; - } - - if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR) - continue; - - /* allocate per-vq irq if available and necessary */ - snprintf(vp_dev->msix_names[msix_vec], - sizeof *vp_dev->msix_names, - "%s-%s", - dev_name(&vp_dev->vdev.dev), names[i]); - err = request_irq(vp_dev->msix_entries[msix_vec].vector, - vring_interrupt, 0, - vp_dev->msix_names[msix_vec], - vqs[i]); - if (err) { - vp_del_vq(vqs[i]); - goto error_find; - } - } - return 0; - -error_find: - vp_del_vqs(vdev); - -error_request: - return err; -} - -/* the config->find_vqs() implementation */ -static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], - const char *names[]) -{ - int err; - - /* Try MSI-X with one vector per queue. */ - err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true); - if (!err) - return 0; - /* Fallback: MSI-X with one vector for config, one shared for queues. */ - err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, - true, false); - if (!err) - return 0; - /* Finally fall back to regular interrupts. */ - return vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, - false, false); -} - -static const char *vp_bus_name(struct virtio_device *vdev) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - - return pci_name(vp_dev->pci_dev); -} - -/* Setup the affinity for a virtqueue: - * - force the affinity for per vq vector - * - OR over all affinities for shared MSI - * - ignore the affinity request if we're using INTX - */ -static int vp_set_vq_affinity(struct virtqueue *vq, int cpu) -{ - struct virtio_device *vdev = vq->vdev; - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - struct virtio_pci_vq_info *info = vq->priv; - struct cpumask *mask; - unsigned int irq; - - if (!vq->callback) - return -EINVAL; - - if (vp_dev->msix_enabled) { - mask = vp_dev->msix_affinity_masks[info->msix_vector]; - irq = vp_dev->msix_entries[info->msix_vector].vector; - if (cpu == -1) - irq_set_affinity_hint(irq, NULL); - else { - cpumask_set_cpu(cpu, mask); - irq_set_affinity_hint(irq, mask); - } - } - return 0; -} - -static const struct virtio_config_ops virtio_pci_config_ops = { - .get = vp_get, - .set = vp_set, - .get_status = vp_get_status, - .set_status = vp_set_status, - .reset = vp_reset, - .find_vqs = vp_find_vqs, - .del_vqs = vp_del_vqs, - .get_features = vp_get_features, - .finalize_features = vp_finalize_features, - .bus_name = vp_bus_name, - .set_vq_affinity = vp_set_vq_affinity, -}; - -static void virtio_pci_release_dev(struct device *_d) -{ - /* - * No need for a release method as we allocate/free - * all devices together with the pci devices. - * Provide an empty one to avoid getting a warning from core. - */ -} - -/* the PCI probing function */ -static int virtio_pci_probe(struct pci_dev *pci_dev, - const struct pci_device_id *id) -{ - struct virtio_pci_device *vp_dev; - int err; - - /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */ - if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f) - return -ENODEV; - - if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION) { - printk(KERN_ERR "virtio_pci: expected ABI version %d, got %d\n", - VIRTIO_PCI_ABI_VERSION, pci_dev->revision); - return -ENODEV; - } - - /* allocate our structure and fill it out */ - vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL); - if (vp_dev == NULL) - return -ENOMEM; - - vp_dev->vdev.dev.parent = &pci_dev->dev; - vp_dev->vdev.dev.release = virtio_pci_release_dev; - vp_dev->vdev.config = &virtio_pci_config_ops; - vp_dev->pci_dev = pci_dev; - INIT_LIST_HEAD(&vp_dev->virtqueues); - spin_lock_init(&vp_dev->lock); - - /* Disable MSI/MSIX to bring device to a known good state. */ - pci_msi_off(pci_dev); - - /* enable the device */ - err = pci_enable_device(pci_dev); - if (err) - goto out; - - err = pci_request_regions(pci_dev, "virtio-pci"); - if (err) - goto out_enable_device; - - vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0); - if (vp_dev->ioaddr == NULL) { - err = -ENOMEM; - goto out_req_regions; - } - - pci_set_drvdata(pci_dev, vp_dev); - pci_set_master(pci_dev); - - /* we use the subsystem vendor/device id as the virtio vendor/device - * id. this allows us to use the same PCI vendor/device id for all - * virtio devices and to identify the particular virtio driver by - * the subsystem ids */ - vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor; - vp_dev->vdev.id.device = pci_dev->subsystem_device; - - /* finally register the virtio device */ - err = register_virtio_device(&vp_dev->vdev); - if (err) - goto out_set_drvdata; - - return 0; - -out_set_drvdata: - pci_iounmap(pci_dev, vp_dev->ioaddr); -out_req_regions: - pci_release_regions(pci_dev); -out_enable_device: - pci_disable_device(pci_dev); -out: - kfree(vp_dev); - return err; -} - -static void virtio_pci_remove(struct pci_dev *pci_dev) -{ - struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); - - unregister_virtio_device(&vp_dev->vdev); - - vp_del_vqs(&vp_dev->vdev); - pci_iounmap(pci_dev, vp_dev->ioaddr); - pci_release_regions(pci_dev); - pci_disable_device(pci_dev); - kfree(vp_dev); -} - -#ifdef CONFIG_PM_SLEEP -static int virtio_pci_freeze(struct device *dev) -{ - struct pci_dev *pci_dev = to_pci_dev(dev); - struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); - int ret; - - ret = virtio_device_freeze(&vp_dev->vdev); - - if (!ret) - pci_disable_device(pci_dev); - return ret; -} - -static int virtio_pci_restore(struct device *dev) -{ - struct pci_dev *pci_dev = to_pci_dev(dev); - struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); - int ret; - - ret = pci_enable_device(pci_dev); - if (ret) - return ret; - - pci_set_master(pci_dev); - return virtio_device_restore(&vp_dev->vdev); -} - -static const struct dev_pm_ops virtio_pci_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore) -}; -#endif - -static struct pci_driver virtio_pci_driver = { - .name = "virtio-pci", - .id_table = virtio_pci_id_table, - .probe = virtio_pci_probe, - .remove = virtio_pci_remove, -#ifdef CONFIG_PM_SLEEP - .driver.pm = &virtio_pci_pm_ops, -#endif -}; - -module_pci_driver(virtio_pci_driver); diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c new file mode 100644 index 000000000000..953057d84185 --- /dev/null +++ b/drivers/virtio/virtio_pci_common.c @@ -0,0 +1,464 @@ +/* + * Virtio PCI driver - common functionality for all device versions + * + * This module allows virtio devices to be used over a virtual PCI device. + * This can be used with QEMU based VMMs like KVM or Xen. + * + * Copyright IBM Corp. 2007 + * Copyright Red Hat, Inc. 2014 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * Rusty Russell <rusty@rustcorp.com.au> + * Michael S. Tsirkin <mst@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "virtio_pci_common.h" + +/* wait for pending irq handlers */ +void vp_synchronize_vectors(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + int i; + + if (vp_dev->intx_enabled) + synchronize_irq(vp_dev->pci_dev->irq); + + for (i = 0; i < vp_dev->msix_vectors; ++i) + synchronize_irq(vp_dev->msix_entries[i].vector); +} + +/* the notify function used when creating a virt queue */ +bool vp_notify(struct virtqueue *vq) +{ + /* we write the queue's selector into the notification register to + * signal the other end */ + iowrite16(vq->index, (void __iomem *)vq->priv); + return true; +} + +/* Handle a configuration change: Tell driver if it wants to know. */ +static irqreturn_t vp_config_changed(int irq, void *opaque) +{ + struct virtio_pci_device *vp_dev = opaque; + + virtio_config_changed(&vp_dev->vdev); + return IRQ_HANDLED; +} + +/* Notify all virtqueues on an interrupt. */ +static irqreturn_t vp_vring_interrupt(int irq, void *opaque) +{ + struct virtio_pci_device *vp_dev = opaque; + struct virtio_pci_vq_info *info; + irqreturn_t ret = IRQ_NONE; + unsigned long flags; + + spin_lock_irqsave(&vp_dev->lock, flags); + list_for_each_entry(info, &vp_dev->virtqueues, node) { + if (vring_interrupt(irq, info->vq) == IRQ_HANDLED) + ret = IRQ_HANDLED; + } + spin_unlock_irqrestore(&vp_dev->lock, flags); + + return ret; +} + +/* A small wrapper to also acknowledge the interrupt when it's handled. + * I really need an EIO hook for the vring so I can ack the interrupt once we + * know that we'll be handling the IRQ but before we invoke the callback since + * the callback may notify the host which results in the host attempting to + * raise an interrupt that we would then mask once we acknowledged the + * interrupt. */ +static irqreturn_t vp_interrupt(int irq, void *opaque) +{ + struct virtio_pci_device *vp_dev = opaque; + u8 isr; + + /* reading the ISR has the effect of also clearing it so it's very + * important to save off the value. */ + isr = ioread8(vp_dev->isr); + + /* It's definitely not us if the ISR was not high */ + if (!isr) + return IRQ_NONE; + + /* Configuration change? Tell driver if it wants to know. */ + if (isr & VIRTIO_PCI_ISR_CONFIG) + vp_config_changed(irq, opaque); + + return vp_vring_interrupt(irq, opaque); +} + +static void vp_free_vectors(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + int i; + + if (vp_dev->intx_enabled) { + free_irq(vp_dev->pci_dev->irq, vp_dev); + vp_dev->intx_enabled = 0; + } + + for (i = 0; i < vp_dev->msix_used_vectors; ++i) + free_irq(vp_dev->msix_entries[i].vector, vp_dev); + + for (i = 0; i < vp_dev->msix_vectors; i++) + if (vp_dev->msix_affinity_masks[i]) + free_cpumask_var(vp_dev->msix_affinity_masks[i]); + + if (vp_dev->msix_enabled) { + /* Disable the vector used for configuration */ + vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR); + + pci_disable_msix(vp_dev->pci_dev); + vp_dev->msix_enabled = 0; + } + + vp_dev->msix_vectors = 0; + vp_dev->msix_used_vectors = 0; + kfree(vp_dev->msix_names); + vp_dev->msix_names = NULL; + kfree(vp_dev->msix_entries); + vp_dev->msix_entries = NULL; + kfree(vp_dev->msix_affinity_masks); + vp_dev->msix_affinity_masks = NULL; +} + +static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, + bool per_vq_vectors) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + const char *name = dev_name(&vp_dev->vdev.dev); + unsigned i, v; + int err = -ENOMEM; + + vp_dev->msix_vectors = nvectors; + + vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries, + GFP_KERNEL); + if (!vp_dev->msix_entries) + goto error; + vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names, + GFP_KERNEL); + if (!vp_dev->msix_names) + goto error; + vp_dev->msix_affinity_masks + = kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks, + GFP_KERNEL); + if (!vp_dev->msix_affinity_masks) + goto error; + for (i = 0; i < nvectors; ++i) + if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i], + GFP_KERNEL)) + goto error; + + for (i = 0; i < nvectors; ++i) + vp_dev->msix_entries[i].entry = i; + + err = pci_enable_msix_exact(vp_dev->pci_dev, + vp_dev->msix_entries, nvectors); + if (err) + goto error; + vp_dev->msix_enabled = 1; + + /* Set the vector used for configuration */ + v = vp_dev->msix_used_vectors; + snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, + "%s-config", name); + err = request_irq(vp_dev->msix_entries[v].vector, + vp_config_changed, 0, vp_dev->msix_names[v], + vp_dev); + if (err) + goto error; + ++vp_dev->msix_used_vectors; + + v = vp_dev->config_vector(vp_dev, v); + /* Verify we had enough resources to assign the vector */ + if (v == VIRTIO_MSI_NO_VECTOR) { + err = -EBUSY; + goto error; + } + + if (!per_vq_vectors) { + /* Shared vector for all VQs */ + v = vp_dev->msix_used_vectors; + snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, + "%s-virtqueues", name); + err = request_irq(vp_dev->msix_entries[v].vector, + vp_vring_interrupt, 0, vp_dev->msix_names[v], + vp_dev); + if (err) + goto error; + ++vp_dev->msix_used_vectors; + } + return 0; +error: + vp_free_vectors(vdev); + return err; +} + +static int vp_request_intx(struct virtio_device *vdev) +{ + int err; + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + + err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, + IRQF_SHARED, dev_name(&vdev->dev), vp_dev); + if (!err) + vp_dev->intx_enabled = 1; + return err; +} + +static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index, + void (*callback)(struct virtqueue *vq), + const char *name, + u16 msix_vec) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL); + struct virtqueue *vq; + unsigned long flags; + + /* fill out our structure that represents an active queue */ + if (!info) + return ERR_PTR(-ENOMEM); + + vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, msix_vec); + if (IS_ERR(vq)) + goto out_info; + + info->vq = vq; + if (callback) { + spin_lock_irqsave(&vp_dev->lock, flags); + list_add(&info->node, &vp_dev->virtqueues); + spin_unlock_irqrestore(&vp_dev->lock, flags); + } else { + INIT_LIST_HEAD(&info->node); + } + + vp_dev->vqs[index] = info; + return vq; + +out_info: + kfree(info); + return vq; +} + +static void vp_del_vq(struct virtqueue *vq) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); + struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index]; + unsigned long flags; + + spin_lock_irqsave(&vp_dev->lock, flags); + list_del(&info->node); + spin_unlock_irqrestore(&vp_dev->lock, flags); + + vp_dev->del_vq(info); + kfree(info); +} + +/* the config->del_vqs() implementation */ +void vp_del_vqs(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtqueue *vq, *n; + struct virtio_pci_vq_info *info; + + list_for_each_entry_safe(vq, n, &vdev->vqs, list) { + info = vp_dev->vqs[vq->index]; + if (vp_dev->per_vq_vectors && + info->msix_vector != VIRTIO_MSI_NO_VECTOR) + free_irq(vp_dev->msix_entries[info->msix_vector].vector, + vq); + vp_del_vq(vq); + } + vp_dev->per_vq_vectors = false; + + vp_free_vectors(vdev); + kfree(vp_dev->vqs); +} + +static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char *names[], + bool use_msix, + bool per_vq_vectors) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + u16 msix_vec; + int i, err, nvectors, allocated_vectors; + + vp_dev->vqs = kmalloc(nvqs * sizeof *vp_dev->vqs, GFP_KERNEL); + if (!vp_dev->vqs) + return -ENOMEM; + + if (!use_msix) { + /* Old style: one normal interrupt for change and all vqs. */ + err = vp_request_intx(vdev); + if (err) + goto error_find; + } else { + if (per_vq_vectors) { + /* Best option: one for change interrupt, one per vq. */ + nvectors = 1; + for (i = 0; i < nvqs; ++i) + if (callbacks[i]) + ++nvectors; + } else { + /* Second best: one for change, shared for all vqs. */ + nvectors = 2; + } + + err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors); + if (err) + goto error_find; + } + + vp_dev->per_vq_vectors = per_vq_vectors; + allocated_vectors = vp_dev->msix_used_vectors; + for (i = 0; i < nvqs; ++i) { + if (!names[i]) { + vqs[i] = NULL; + continue; + } else if (!callbacks[i] || !vp_dev->msix_enabled) + msix_vec = VIRTIO_MSI_NO_VECTOR; + else if (vp_dev->per_vq_vectors) + msix_vec = allocated_vectors++; + else + msix_vec = VP_MSIX_VQ_VECTOR; + vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i], msix_vec); + if (IS_ERR(vqs[i])) { + err = PTR_ERR(vqs[i]); + goto error_find; + } + + if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR) + continue; + + /* allocate per-vq irq if available and necessary */ + snprintf(vp_dev->msix_names[msix_vec], + sizeof *vp_dev->msix_names, + "%s-%s", + dev_name(&vp_dev->vdev.dev), names[i]); + err = request_irq(vp_dev->msix_entries[msix_vec].vector, + vring_interrupt, 0, + vp_dev->msix_names[msix_vec], + vqs[i]); + if (err) { + vp_del_vq(vqs[i]); + goto error_find; + } + } + return 0; + +error_find: + vp_del_vqs(vdev); + return err; +} + +/* the config->find_vqs() implementation */ +int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char *names[]) +{ + int err; + + /* Try MSI-X with one vector per queue. */ + err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true); + if (!err) + return 0; + /* Fallback: MSI-X with one vector for config, one shared for queues. */ + err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, + true, false); + if (!err) + return 0; + /* Finally fall back to regular interrupts. */ + return vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, + false, false); +} + +const char *vp_bus_name(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + + return pci_name(vp_dev->pci_dev); +} + +/* Setup the affinity for a virtqueue: + * - force the affinity for per vq vector + * - OR over all affinities for shared MSI + * - ignore the affinity request if we're using INTX + */ +int vp_set_vq_affinity(struct virtqueue *vq, int cpu) +{ + struct virtio_device *vdev = vq->vdev; + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index]; + struct cpumask *mask; + unsigned int irq; + + if (!vq->callback) + return -EINVAL; + + if (vp_dev->msix_enabled) { + mask = vp_dev->msix_affinity_masks[info->msix_vector]; + irq = vp_dev->msix_entries[info->msix_vector].vector; + if (cpu == -1) + irq_set_affinity_hint(irq, NULL); + else { + cpumask_set_cpu(cpu, mask); + irq_set_affinity_hint(irq, mask); + } + } + return 0; +} + +void virtio_pci_release_dev(struct device *_d) +{ + /* + * No need for a release method as we allocate/free + * all devices together with the pci devices. + * Provide an empty one to avoid getting a warning from core. + */ +} + +#ifdef CONFIG_PM_SLEEP +static int virtio_pci_freeze(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); + int ret; + + ret = virtio_device_freeze(&vp_dev->vdev); + + if (!ret) + pci_disable_device(pci_dev); + return ret; +} + +static int virtio_pci_restore(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); + int ret; + + ret = pci_enable_device(pci_dev); + if (ret) + return ret; + + pci_set_master(pci_dev); + return virtio_device_restore(&vp_dev->vdev); +} + +const struct dev_pm_ops virtio_pci_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore) +}; +#endif diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h new file mode 100644 index 000000000000..d840dad4149d --- /dev/null +++ b/drivers/virtio/virtio_pci_common.h @@ -0,0 +1,136 @@ +#ifndef _DRIVERS_VIRTIO_VIRTIO_PCI_COMMON_H +#define _DRIVERS_VIRTIO_VIRTIO_PCI_COMMON_H +/* + * Virtio PCI driver - APIs for common functionality for all device versions + * + * This module allows virtio devices to be used over a virtual PCI device. + * This can be used with QEMU based VMMs like KVM or Xen. + * + * Copyright IBM Corp. 2007 + * Copyright Red Hat, Inc. 2014 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * Rusty Russell <rusty@rustcorp.com.au> + * Michael S. Tsirkin <mst@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include <linux/module.h> +#include <linux/list.h> +#include <linux/pci.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/virtio.h> +#include <linux/virtio_config.h> +#include <linux/virtio_ring.h> +#define VIRTIO_PCI_NO_LEGACY +#include <linux/virtio_pci.h> +#include <linux/highmem.h> +#include <linux/spinlock.h> + +struct virtio_pci_vq_info { + /* the actual virtqueue */ + struct virtqueue *vq; + + /* the number of entries in the queue */ + int num; + + /* the virtual address of the ring queue */ + void *queue; + + /* the list node for the virtqueues list */ + struct list_head node; + + /* MSI-X vector (or none) */ + unsigned msix_vector; +}; + +/* Our device structure */ +struct virtio_pci_device { + struct virtio_device vdev; + struct pci_dev *pci_dev; + + /* the IO mapping for the PCI config space */ + void __iomem *ioaddr; + + /* the IO mapping for ISR operation */ + void __iomem *isr; + + /* a list of queues so we can dispatch IRQs */ + spinlock_t lock; + struct list_head virtqueues; + + /* array of all queues for house-keeping */ + struct virtio_pci_vq_info **vqs; + + /* MSI-X support */ + int msix_enabled; + int intx_enabled; + struct msix_entry *msix_entries; + cpumask_var_t *msix_affinity_masks; + /* Name strings for interrupts. This size should be enough, + * and I'm too lazy to allocate each name separately. */ + char (*msix_names)[256]; + /* Number of available vectors */ + unsigned msix_vectors; + /* Vectors allocated, excluding per-vq vectors if any */ + unsigned msix_used_vectors; + + /* Whether we have vector per vq */ + bool per_vq_vectors; + + struct virtqueue *(*setup_vq)(struct virtio_pci_device *vp_dev, + struct virtio_pci_vq_info *info, + unsigned idx, + void (*callback)(struct virtqueue *vq), + const char *name, + u16 msix_vec); + void (*del_vq)(struct virtio_pci_vq_info *info); + + u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector); +}; + +/* Constants for MSI-X */ +/* Use first vector for configuration changes, second and the rest for + * virtqueues Thus, we need at least 2 vectors for MSI. */ +enum { + VP_MSIX_CONFIG_VECTOR = 0, + VP_MSIX_VQ_VECTOR = 1, +}; + +/* Convert a generic virtio device to our structure */ +static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev) +{ + return container_of(vdev, struct virtio_pci_device, vdev); +} + +/* wait for pending irq handlers */ +void vp_synchronize_vectors(struct virtio_device *vdev); +/* the notify function used when creating a virt queue */ +bool vp_notify(struct virtqueue *vq); +/* the config->del_vqs() implementation */ +void vp_del_vqs(struct virtio_device *vdev); +/* the config->find_vqs() implementation */ +int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char *names[]); +const char *vp_bus_name(struct virtio_device *vdev); + +/* Setup the affinity for a virtqueue: + * - force the affinity for per vq vector + * - OR over all affinities for shared MSI + * - ignore the affinity request if we're using INTX + */ +int vp_set_vq_affinity(struct virtqueue *vq, int cpu); +void virtio_pci_release_dev(struct device *); + +#ifdef CONFIG_PM_SLEEP +extern const struct dev_pm_ops virtio_pci_pm_ops; +#endif + +#endif diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c new file mode 100644 index 000000000000..2588252e5c1c --- /dev/null +++ b/drivers/virtio/virtio_pci_legacy.c @@ -0,0 +1,326 @@ +/* + * Virtio PCI driver - legacy device support + * + * This module allows virtio devices to be used over a virtual PCI device. + * This can be used with QEMU based VMMs like KVM or Xen. + * + * Copyright IBM Corp. 2007 + * Copyright Red Hat, Inc. 2014 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * Rusty Russell <rusty@rustcorp.com.au> + * Michael S. Tsirkin <mst@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "virtio_pci_common.h" + +/* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */ +static const struct pci_device_id virtio_pci_id_table[] = { + { PCI_DEVICE(0x1af4, PCI_ANY_ID) }, + { 0 } +}; + +MODULE_DEVICE_TABLE(pci, virtio_pci_id_table); + +/* virtio config->get_features() implementation */ +static u64 vp_get_features(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + + /* When someone needs more than 32 feature bits, we'll need to + * steal a bit to indicate that the rest are somewhere else. */ + return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES); +} + +/* virtio config->finalize_features() implementation */ +static int vp_finalize_features(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + + /* Give virtio_ring a chance to accept features. */ + vring_transport_features(vdev); + + /* Make sure we don't have any features > 32 bits! */ + BUG_ON((u32)vdev->features != vdev->features); + + /* We only support 32 feature bits. */ + iowrite32(vdev->features, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES); + + return 0; +} + +/* virtio config->get() implementation */ +static void vp_get(struct virtio_device *vdev, unsigned offset, + void *buf, unsigned len) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + void __iomem *ioaddr = vp_dev->ioaddr + + VIRTIO_PCI_CONFIG(vp_dev) + offset; + u8 *ptr = buf; + int i; + + for (i = 0; i < len; i++) + ptr[i] = ioread8(ioaddr + i); +} + +/* the config->set() implementation. it's symmetric to the config->get() + * implementation */ +static void vp_set(struct virtio_device *vdev, unsigned offset, + const void *buf, unsigned len) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + void __iomem *ioaddr = vp_dev->ioaddr + + VIRTIO_PCI_CONFIG(vp_dev) + offset; + const u8 *ptr = buf; + int i; + + for (i = 0; i < len; i++) + iowrite8(ptr[i], ioaddr + i); +} + +/* config->{get,set}_status() implementations */ +static u8 vp_get_status(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + return ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS); +} + +static void vp_set_status(struct virtio_device *vdev, u8 status) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + /* We should never be setting status to 0. */ + BUG_ON(status == 0); + iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS); +} + +static void vp_reset(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + /* 0 status means a reset. */ + iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS); + /* Flush out the status write, and flush in device writes, + * including MSi-X interrupts, if any. */ + ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS); + /* Flush pending VQ/configuration callbacks. */ + vp_synchronize_vectors(vdev); +} + +static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) +{ + /* Setup the vector used for configuration events */ + iowrite16(vector, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); + /* Verify we had enough resources to assign the vector */ + /* Will also flush the write out to device */ + return ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); +} + +static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, + struct virtio_pci_vq_info *info, + unsigned index, + void (*callback)(struct virtqueue *vq), + const char *name, + u16 msix_vec) +{ + struct virtqueue *vq; + unsigned long size; + u16 num; + int err; + + /* Select the queue we're interested in */ + iowrite16(index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); + + /* Check if queue is either not available or already active. */ + num = ioread16(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NUM); + if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN)) + return ERR_PTR(-ENOENT); + + info->num = num; + info->msix_vector = msix_vec; + + size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN)); + info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO); + if (info->queue == NULL) + return ERR_PTR(-ENOMEM); + + /* activate the queue */ + iowrite32(virt_to_phys(info->queue) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT, + vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); + + /* create the vring */ + vq = vring_new_virtqueue(index, info->num, + VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev, + true, info->queue, vp_notify, callback, name); + if (!vq) { + err = -ENOMEM; + goto out_activate_queue; + } + + vq->priv = (void __force *)vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY; + + if (msix_vec != VIRTIO_MSI_NO_VECTOR) { + iowrite16(msix_vec, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); + msix_vec = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); + if (msix_vec == VIRTIO_MSI_NO_VECTOR) { + err = -EBUSY; + goto out_assign; + } + } + + return vq; + +out_assign: + vring_del_virtqueue(vq); +out_activate_queue: + iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); + free_pages_exact(info->queue, size); + return ERR_PTR(err); +} + +static void del_vq(struct virtio_pci_vq_info *info) +{ + struct virtqueue *vq = info->vq; + struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); + unsigned long size; + + iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); + + if (vp_dev->msix_enabled) { + iowrite16(VIRTIO_MSI_NO_VECTOR, + vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); + /* Flush the write out to device */ + ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR); + } + + vring_del_virtqueue(vq); + + /* Select and deactivate the queue */ + iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); + + size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN)); + free_pages_exact(info->queue, size); +} + +static const struct virtio_config_ops virtio_pci_config_ops = { + .get = vp_get, + .set = vp_set, + .get_status = vp_get_status, + .set_status = vp_set_status, + .reset = vp_reset, + .find_vqs = vp_find_vqs, + .del_vqs = vp_del_vqs, + .get_features = vp_get_features, + .finalize_features = vp_finalize_features, + .bus_name = vp_bus_name, + .set_vq_affinity = vp_set_vq_affinity, +}; + +/* the PCI probing function */ +static int virtio_pci_probe(struct pci_dev *pci_dev, + const struct pci_device_id *id) +{ + struct virtio_pci_device *vp_dev; + int err; + + /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */ + if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f) + return -ENODEV; + + if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION) { + printk(KERN_ERR "virtio_pci: expected ABI version %d, got %d\n", + VIRTIO_PCI_ABI_VERSION, pci_dev->revision); + return -ENODEV; + } + + /* allocate our structure and fill it out */ + vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL); + if (vp_dev == NULL) + return -ENOMEM; + + vp_dev->vdev.dev.parent = &pci_dev->dev; + vp_dev->vdev.dev.release = virtio_pci_release_dev; + vp_dev->vdev.config = &virtio_pci_config_ops; + vp_dev->pci_dev = pci_dev; + INIT_LIST_HEAD(&vp_dev->virtqueues); + spin_lock_init(&vp_dev->lock); + + /* Disable MSI/MSIX to bring device to a known good state. */ + pci_msi_off(pci_dev); + + /* enable the device */ + err = pci_enable_device(pci_dev); + if (err) + goto out; + + err = pci_request_regions(pci_dev, "virtio-pci"); + if (err) + goto out_enable_device; + + vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0); + if (vp_dev->ioaddr == NULL) { + err = -ENOMEM; + goto out_req_regions; + } + + vp_dev->isr = vp_dev->ioaddr + VIRTIO_PCI_ISR; + + pci_set_drvdata(pci_dev, vp_dev); + pci_set_master(pci_dev); + + /* we use the subsystem vendor/device id as the virtio vendor/device + * id. this allows us to use the same PCI vendor/device id for all + * virtio devices and to identify the particular virtio driver by + * the subsystem ids */ + vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor; + vp_dev->vdev.id.device = pci_dev->subsystem_device; + + vp_dev->config_vector = vp_config_vector; + vp_dev->setup_vq = setup_vq; + vp_dev->del_vq = del_vq; + + /* finally register the virtio device */ + err = register_virtio_device(&vp_dev->vdev); + if (err) + goto out_set_drvdata; + + return 0; + +out_set_drvdata: + pci_iounmap(pci_dev, vp_dev->ioaddr); +out_req_regions: + pci_release_regions(pci_dev); +out_enable_device: + pci_disable_device(pci_dev); +out: + kfree(vp_dev); + return err; +} + +static void virtio_pci_remove(struct pci_dev *pci_dev) +{ + struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); + + unregister_virtio_device(&vp_dev->vdev); + + vp_del_vqs(&vp_dev->vdev); + pci_iounmap(pci_dev, vp_dev->ioaddr); + pci_release_regions(pci_dev); + pci_disable_device(pci_dev); + kfree(vp_dev); +} + +static struct pci_driver virtio_pci_driver = { + .name = "virtio-pci", + .id_table = virtio_pci_id_table, + .probe = virtio_pci_probe, + .remove = virtio_pci_remove, +#ifdef CONFIG_PM_SLEEP + .driver.pm = &virtio_pci_pm_ops, +#endif +}; + +module_pci_driver(virtio_pci_driver); diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 3b1f89b6e743..00ec6b3f96b2 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -99,7 +99,8 @@ struct vring_virtqueue #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) -static struct vring_desc *alloc_indirect(unsigned int total_sg, gfp_t gfp) +static struct vring_desc *alloc_indirect(struct virtqueue *_vq, + unsigned int total_sg, gfp_t gfp) { struct vring_desc *desc; unsigned int i; @@ -116,7 +117,7 @@ static struct vring_desc *alloc_indirect(unsigned int total_sg, gfp_t gfp) return NULL; for (i = 0; i < total_sg; i++) - desc[i].next = i+1; + desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); return desc; } @@ -165,17 +166,17 @@ static inline int virtqueue_add(struct virtqueue *_vq, /* If the host supports indirect descriptor tables, and we have multiple * buffers, then go indirect. FIXME: tune this threshold */ if (vq->indirect && total_sg > 1 && vq->vq.num_free) - desc = alloc_indirect(total_sg, gfp); + desc = alloc_indirect(_vq, total_sg, gfp); else desc = NULL; if (desc) { /* Use a single buffer which doesn't continue */ - vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT; - vq->vring.desc[head].addr = virt_to_phys(desc); + vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT); + vq->vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, virt_to_phys(desc)); /* avoid kmemleak false positive (hidden by virt_to_phys) */ kmemleak_ignore(desc); - vq->vring.desc[head].len = total_sg * sizeof(struct vring_desc); + vq->vring.desc[head].len = cpu_to_virtio32(_vq->vdev, total_sg * sizeof(struct vring_desc)); /* Set up rest to use this indirect table. */ i = 0; @@ -205,28 +206,28 @@ static inline int virtqueue_add(struct virtqueue *_vq, for (n = 0; n < out_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - desc[i].flags = VRING_DESC_F_NEXT; - desc[i].addr = sg_phys(sg); - desc[i].len = sg->length; + desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT); + desc[i].addr = cpu_to_virtio64(_vq->vdev, sg_phys(sg)); + desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); prev = i; - i = desc[i].next; + i = virtio16_to_cpu(_vq->vdev, desc[i].next); } } for (; n < (out_sgs + in_sgs); n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; - desc[i].addr = sg_phys(sg); - desc[i].len = sg->length; + desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE); + desc[i].addr = cpu_to_virtio64(_vq->vdev, sg_phys(sg)); + desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); prev = i; - i = desc[i].next; + i = virtio16_to_cpu(_vq->vdev, desc[i].next); } } /* Last one doesn't continue. */ - desc[prev].flags &= ~VRING_DESC_F_NEXT; + desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); /* Update free pointer */ if (indirect) - vq->free_head = vq->vring.desc[head].next; + vq->free_head = virtio16_to_cpu(_vq->vdev, vq->vring.desc[head].next); else vq->free_head = i; @@ -235,13 +236,13 @@ static inline int virtqueue_add(struct virtqueue *_vq, /* Put entry in available array (but don't update avail->idx until they * do sync). */ - avail = (vq->vring.avail->idx & (vq->vring.num-1)); - vq->vring.avail->ring[avail] = head; + avail = virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) & (vq->vring.num - 1); + vq->vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); /* Descriptors and available array need to be set before we expose the * new available array entries. */ virtio_wmb(vq->weak_barriers); - vq->vring.avail->idx++; + vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) + 1); vq->num_added++; /* This is very unlikely, but theoretically possible. Kick @@ -354,8 +355,8 @@ bool virtqueue_kick_prepare(struct virtqueue *_vq) * event. */ virtio_mb(vq->weak_barriers); - old = vq->vring.avail->idx - vq->num_added; - new = vq->vring.avail->idx; + old = virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) - vq->num_added; + new = virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx); vq->num_added = 0; #ifdef DEBUG @@ -367,10 +368,10 @@ bool virtqueue_kick_prepare(struct virtqueue *_vq) #endif if (vq->event) { - needs_kick = vring_need_event(vring_avail_event(&vq->vring), + needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, vring_avail_event(&vq->vring)), new, old); } else { - needs_kick = !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY); + needs_kick = !(vq->vring.used->flags & cpu_to_virtio16(_vq->vdev, VRING_USED_F_NO_NOTIFY)); } END_USE(vq); return needs_kick; @@ -432,15 +433,15 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head) i = head; /* Free the indirect table */ - if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT) - kfree(phys_to_virt(vq->vring.desc[i].addr)); + if (vq->vring.desc[i].flags & cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)) + kfree(phys_to_virt(virtio64_to_cpu(vq->vq.vdev, vq->vring.desc[i].addr))); - while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) { - i = vq->vring.desc[i].next; + while (vq->vring.desc[i].flags & cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT)) { + i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next); vq->vq.num_free++; } - vq->vring.desc[i].next = vq->free_head; + vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head); vq->free_head = head; /* Plus final descriptor */ vq->vq.num_free++; @@ -448,7 +449,7 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head) static inline bool more_used(const struct vring_virtqueue *vq) { - return vq->last_used_idx != vq->vring.used->idx; + return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, vq->vring.used->idx); } /** @@ -491,8 +492,8 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) virtio_rmb(vq->weak_barriers); last_used = (vq->last_used_idx & (vq->vring.num - 1)); - i = vq->vring.used->ring[last_used].id; - *len = vq->vring.used->ring[last_used].len; + i = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].id); + *len = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].len); if (unlikely(i >= vq->vring.num)) { BAD_RING(vq, "id %u out of range\n", i); @@ -510,8 +511,8 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) /* If we expect an interrupt for the next entry, tell host * by writing event index and flush out the write before * the read in the next get_buf call. */ - if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { - vring_used_event(&vq->vring) = vq->last_used_idx; + if (!(vq->vring.avail->flags & cpu_to_virtio16(_vq->vdev, VRING_AVAIL_F_NO_INTERRUPT))) { + vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, vq->last_used_idx); virtio_mb(vq->weak_barriers); } @@ -537,7 +538,7 @@ void virtqueue_disable_cb(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); - vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; + vq->vring.avail->flags |= cpu_to_virtio16(_vq->vdev, VRING_AVAIL_F_NO_INTERRUPT); } EXPORT_SYMBOL_GPL(virtqueue_disable_cb); @@ -565,8 +566,8 @@ unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to * either clear the flags bit or point the event index at the next * entry. Always do both to keep code simple. */ - vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; - vring_used_event(&vq->vring) = last_used_idx = vq->last_used_idx; + vq->vring.avail->flags &= cpu_to_virtio16(_vq->vdev, ~VRING_AVAIL_F_NO_INTERRUPT); + vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, last_used_idx = vq->last_used_idx); END_USE(vq); return last_used_idx; } @@ -586,7 +587,7 @@ bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) struct vring_virtqueue *vq = to_vvq(_vq); virtio_mb(vq->weak_barriers); - return (u16)last_used_idx != vq->vring.used->idx; + return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, vq->vring.used->idx); } EXPORT_SYMBOL_GPL(virtqueue_poll); @@ -633,12 +634,12 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to * either clear the flags bit or point the event index at the next * entry. Always do both to keep code simple. */ - vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; + vq->vring.avail->flags &= cpu_to_virtio16(_vq->vdev, ~VRING_AVAIL_F_NO_INTERRUPT); /* TODO: tune this threshold */ - bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4; - vring_used_event(&vq->vring) = vq->last_used_idx + bufs; + bufs = (u16)(virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) - vq->last_used_idx) * 3 / 4; + vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs); virtio_mb(vq->weak_barriers); - if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) { + if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->vring.used->idx) - vq->last_used_idx) > bufs)) { END_USE(vq); return false; } @@ -670,7 +671,7 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq) /* detach_buf clears data, so grab it now. */ buf = vq->data[i]; detach_buf(vq, i); - vq->vring.avail->idx--; + vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) - 1); END_USE(vq); return buf; } @@ -747,12 +748,12 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, /* No callback? Tell other side not to bother us. */ if (!callback) - vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; + vq->vring.avail->flags |= cpu_to_virtio16(vdev, VRING_AVAIL_F_NO_INTERRUPT); /* Put everything in free lists. */ vq->free_head = 0; for (i = 0; i < num-1; i++) { - vq->vring.desc[i].next = i+1; + vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1); vq->data[i] = NULL; } vq->data[i] = NULL; @@ -779,9 +780,11 @@ void vring_transport_features(struct virtio_device *vdev) break; case VIRTIO_RING_F_EVENT_IDX: break; + case VIRTIO_F_VERSION_1: + break; default: /* We don't understand this bit. */ - clear_bit(i, vdev->features); + __virtio_clear_bit(vdev, i); } } } @@ -826,4 +829,20 @@ void virtio_break_device(struct virtio_device *dev) } EXPORT_SYMBOL_GPL(virtio_break_device); +void *virtqueue_get_avail(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + return vq->vring.avail; +} +EXPORT_SYMBOL_GPL(virtqueue_get_avail); + +void *virtqueue_get_used(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + return vq->vring.used; +} +EXPORT_SYMBOL_GPL(virtqueue_get_used); + MODULE_LICENSE("GPL"); |