diff options
Diffstat (limited to 'net/vmw_vsock')
-rw-r--r-- | net/vmw_vsock/af_vsock.c | 104 | ||||
-rw-r--r-- | net/vmw_vsock/virtio_transport.c | 12 | ||||
-rw-r--r-- | net/vmw_vsock/virtio_transport_common.c | 62 | ||||
-rw-r--r-- | net/vmw_vsock/vmci_transport.c | 4 | ||||
-rw-r--r-- | net/vmw_vsock/vsock_bpf.c | 9 |
5 files changed, 151 insertions, 40 deletions
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index b52b798aa4c2..ef519b55a3d9 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -337,7 +337,10 @@ EXPORT_SYMBOL_GPL(vsock_find_connected_socket); void vsock_remove_sock(struct vsock_sock *vsk) { - vsock_remove_bound(vsk); + /* Transport reassignment must not remove the binding. */ + if (sock_flag(sk_vsock(vsk), SOCK_DEAD)) + vsock_remove_bound(vsk); + vsock_remove_connected(vsk); } EXPORT_SYMBOL_GPL(vsock_remove_sock); @@ -404,6 +407,8 @@ EXPORT_SYMBOL_GPL(vsock_enqueue_accept); static bool vsock_use_local_transport(unsigned int remote_cid) { + lockdep_assert_held(&vsock_register_mutex); + if (!transport_local) return false; @@ -461,6 +466,8 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) remote_flags = vsk->remote_addr.svm_flags; + mutex_lock(&vsock_register_mutex); + switch (sk->sk_type) { case SOCK_DGRAM: new_transport = transport_dgram; @@ -476,12 +483,15 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) new_transport = transport_h2g; break; default: - return -ESOCKTNOSUPPORT; + ret = -ESOCKTNOSUPPORT; + goto err; } if (vsk->transport) { - if (vsk->transport == new_transport) - return 0; + if (vsk->transport == new_transport) { + ret = 0; + goto err; + } /* transport->release() must be called with sock lock acquired. * This path can only be taken during vsock_connect(), where we @@ -491,13 +501,30 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) */ vsk->transport->release(vsk); vsock_deassign_transport(vsk); + + /* transport's release() and destruct() can touch some socket + * state, since we are reassigning the socket to a new transport + * during vsock_connect(), let's reset these fields to have a + * clean state. + */ + sock_reset_flag(sk, SOCK_DONE); + sk->sk_state = TCP_CLOSE; + vsk->peer_shutdown = 0; } /* We increase the module refcnt to prevent the transport unloading * while there are open sockets assigned to it. */ - if (!new_transport || !try_module_get(new_transport->module)) - return -ENODEV; + if (!new_transport || !try_module_get(new_transport->module)) { + ret = -ENODEV; + goto err; + } + + /* It's safe to release the mutex after a successful try_module_get(). + * Whichever transport `new_transport` points at, it won't go away until + * the last module_put() below or in vsock_deassign_transport(). + */ + mutex_unlock(&vsock_register_mutex); if (sk->sk_type == SOCK_SEQPACKET) { if (!new_transport->seqpacket_allow || @@ -516,12 +543,31 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) vsk->transport = new_transport; return 0; +err: + mutex_unlock(&vsock_register_mutex); + return ret; } EXPORT_SYMBOL_GPL(vsock_assign_transport); +/* + * Provide safe access to static transport_{h2g,g2h,dgram,local} callbacks. + * Otherwise we may race with module removal. Do not use on `vsk->transport`. + */ +static u32 vsock_registered_transport_cid(const struct vsock_transport **transport) +{ + u32 cid = VMADDR_CID_ANY; + + mutex_lock(&vsock_register_mutex); + if (*transport) + cid = (*transport)->get_local_cid(); + mutex_unlock(&vsock_register_mutex); + + return cid; +} + bool vsock_find_cid(unsigned int cid) { - if (transport_g2h && cid == transport_g2h->get_local_cid()) + if (cid == vsock_registered_transport_cid(&transport_g2h)) return true; if (transport_h2g && cid == VMADDR_CID_HOST) @@ -643,7 +689,8 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk, unsigned int i; for (i = 0; i < MAX_PORT_RETRIES; i++) { - if (port <= LAST_RESERVED_PORT) + if (port == VMADDR_PORT_ANY || + port <= LAST_RESERVED_PORT) port = LAST_RESERVED_PORT + 1; new_addr.svm_port = port++; @@ -812,6 +859,13 @@ static void __vsock_release(struct sock *sk, int level) */ lock_sock_nested(sk, level); + /* Indicate to vsock_remove_sock() that the socket is being released and + * can be removed from the bound_table. Unlike transport reassignment + * case, where the socket must remain bound despite vsock_remove_sock() + * being called from the transport release() callback. + */ + sock_set_flag(sk, SOCK_DEAD); + if (vsk->transport) vsk->transport->release(vsk); else if (sock_type_connectible(sk->sk_type)) @@ -870,6 +924,9 @@ EXPORT_SYMBOL_GPL(vsock_create_connected); s64 vsock_stream_has_data(struct vsock_sock *vsk) { + if (WARN_ON(!vsk->transport)) + return 0; + return vsk->transport->stream_has_data(vsk); } EXPORT_SYMBOL_GPL(vsock_stream_has_data); @@ -878,6 +935,9 @@ s64 vsock_connectible_has_data(struct vsock_sock *vsk) { struct sock *sk = sk_vsock(vsk); + if (WARN_ON(!vsk->transport)) + return 0; + if (sk->sk_type == SOCK_SEQPACKET) return vsk->transport->seqpacket_has_data(vsk); else @@ -887,6 +947,9 @@ EXPORT_SYMBOL_GPL(vsock_connectible_has_data); s64 vsock_stream_has_space(struct vsock_sock *vsk) { + if (WARN_ON(!vsk->transport)) + return 0; + return vsk->transport->stream_has_space(vsk); } EXPORT_SYMBOL_GPL(vsock_stream_has_space); @@ -1161,6 +1224,9 @@ static int vsock_read_skb(struct sock *sk, skb_read_actor_t read_actor) { struct vsock_sock *vsk = vsock_sk(sk); + if (WARN_ON_ONCE(!vsk->transport)) + return -ENODEV; + return vsk->transport->read_skb(vsk, read_actor); } @@ -1501,6 +1567,11 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr, if (err < 0) goto out; + /* sk_err might have been set as a result of an earlier + * (failed) connect attempt. + */ + sk->sk_err = 0; + /* Mark sock as connecting and set the error code to in * progress in case this is a non-blocking connect. */ @@ -1515,7 +1586,11 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr, timeout = vsk->connect_timeout; prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - while (sk->sk_state != TCP_ESTABLISHED && sk->sk_err == 0) { + /* If the socket is already closing or it is in an error state, there + * is no point in waiting. + */ + while (sk->sk_state != TCP_ESTABLISHED && + sk->sk_state != TCP_CLOSING && sk->sk_err == 0) { if (flags & O_NONBLOCK) { /* If we're not going to block, we schedule a timeout * function to generate a timeout on the connection @@ -2462,18 +2537,19 @@ static long vsock_dev_do_ioctl(struct file *filp, unsigned int cmd, void __user *ptr) { u32 __user *p = ptr; - u32 cid = VMADDR_CID_ANY; int retval = 0; + u32 cid; switch (cmd) { case IOCTL_VM_SOCKETS_GET_LOCAL_CID: /* To be compatible with the VMCI behavior, we prioritize the * guest CID instead of well-know host CID (VMADDR_CID_HOST). */ - if (transport_g2h) - cid = transport_g2h->get_local_cid(); - else if (transport_h2g) - cid = transport_h2g->get_local_cid(); + cid = vsock_registered_transport_cid(&transport_g2h); + if (cid == VMADDR_CID_ANY) + cid = vsock_registered_transport_cid(&transport_h2g); + if (cid == VMADDR_CID_ANY) + cid = vsock_registered_transport_cid(&transport_local); if (put_user(cid, p) != 0) retval = -EFAULT; diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index b58c3818f284..f01f9e878106 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -307,7 +307,7 @@ out_rcu: static void virtio_vsock_rx_fill(struct virtio_vsock *vsock) { - int total_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE + VIRTIO_VSOCK_SKB_HEADROOM; + int total_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; struct scatterlist pkt, *p; struct virtqueue *vq; struct sk_buff *skb; @@ -670,6 +670,13 @@ static int virtio_vsock_vqs_init(struct virtio_vsock *vsock) }; int ret; + mutex_lock(&vsock->rx_lock); + vsock->rx_buf_nr = 0; + vsock->rx_buf_max_nr = 0; + mutex_unlock(&vsock->rx_lock); + + atomic_set(&vsock->queued_replies, 0); + ret = virtio_find_vqs(vdev, VSOCK_VQ_MAX, vsock->vqs, vqs_info, NULL); if (ret < 0) return ret; @@ -779,9 +786,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev) vsock->vdev = vdev; - vsock->rx_buf_nr = 0; - vsock->rx_buf_max_nr = 0; - atomic_set(&vsock->queued_replies, 0); mutex_init(&vsock->tx_lock); mutex_init(&vsock->rx_lock); diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 9acc13ab3f82..2c9b1011cdcc 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -26,6 +26,9 @@ /* Threshold for detecting small packets to copy */ #define GOOD_COPY_LEN 128 +static void virtio_transport_cancel_close_work(struct vsock_sock *vsk, + bool cancel_timeout); + static const struct virtio_transport * virtio_transport_get_ops(struct vsock_sock *vsk) { @@ -438,18 +441,20 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, u32 len) { - if (vvs->rx_bytes + len > vvs->buf_alloc) + if (vvs->buf_used + len > vvs->buf_alloc) return false; vvs->rx_bytes += len; + vvs->buf_used += len; return true; } static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs, - u32 len) + u32 bytes_read, u32 bytes_dequeued) { - vvs->rx_bytes -= len; - vvs->fwd_cnt += len; + vvs->rx_bytes -= bytes_read; + vvs->buf_used -= bytes_dequeued; + vvs->fwd_cnt += bytes_dequeued; } void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb) @@ -578,11 +583,11 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, size_t len) { struct virtio_vsock_sock *vvs = vsk->trans; - size_t bytes, total = 0; struct sk_buff *skb; u32 fwd_cnt_delta; bool low_rx_bytes; int err = -EFAULT; + size_t total = 0; u32 free_space; spin_lock_bh(&vvs->rx_lock); @@ -594,6 +599,8 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, } while (total < len && !skb_queue_empty(&vvs->rx_queue)) { + size_t bytes, dequeued = 0; + skb = skb_peek(&vvs->rx_queue); bytes = min_t(size_t, len - total, @@ -617,12 +624,12 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, VIRTIO_VSOCK_SKB_CB(skb)->offset += bytes; if (skb->len == VIRTIO_VSOCK_SKB_CB(skb)->offset) { - u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len); - - virtio_transport_dec_rx_pkt(vvs, pkt_len); + dequeued = le32_to_cpu(virtio_vsock_hdr(skb)->len); __skb_unlink(skb, &vvs->rx_queue); consume_skb(skb); } + + virtio_transport_dec_rx_pkt(vvs, bytes, dequeued); } fwd_cnt_delta = vvs->fwd_cnt - vvs->last_fwd_cnt; @@ -778,7 +785,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, msg->msg_flags |= MSG_EOR; } - virtio_transport_dec_rx_pkt(vvs, pkt_len); + virtio_transport_dec_rx_pkt(vvs, pkt_len, pkt_len); kfree_skb(skb); } @@ -1109,6 +1116,8 @@ void virtio_transport_destruct(struct vsock_sock *vsk) { struct virtio_vsock_sock *vvs = vsk->trans; + virtio_transport_cancel_close_work(vsk, true); + kfree(vvs); vsk->trans = NULL; } @@ -1204,17 +1213,11 @@ static void virtio_transport_wait_close(struct sock *sk, long timeout) } } -static void virtio_transport_do_close(struct vsock_sock *vsk, - bool cancel_timeout) +static void virtio_transport_cancel_close_work(struct vsock_sock *vsk, + bool cancel_timeout) { struct sock *sk = sk_vsock(vsk); - sock_set_flag(sk, SOCK_DONE); - vsk->peer_shutdown = SHUTDOWN_MASK; - if (vsock_stream_has_data(vsk) <= 0) - sk->sk_state = TCP_CLOSING; - sk->sk_state_change(sk); - if (vsk->close_work_scheduled && (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) { vsk->close_work_scheduled = false; @@ -1226,6 +1229,20 @@ static void virtio_transport_do_close(struct vsock_sock *vsk, } } +static void virtio_transport_do_close(struct vsock_sock *vsk, + bool cancel_timeout) +{ + struct sock *sk = sk_vsock(vsk); + + sock_set_flag(sk, SOCK_DONE); + vsk->peer_shutdown = SHUTDOWN_MASK; + if (vsock_stream_has_data(vsk) <= 0) + sk->sk_state = TCP_CLOSING; + sk->sk_state_change(sk); + + virtio_transport_cancel_close_work(vsk, cancel_timeout); +} + static void virtio_transport_close_timeout(struct work_struct *work) { struct vsock_sock *vsk = @@ -1628,8 +1645,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, lock_sock(sk); - /* Check if sk has been closed before lock_sock */ - if (sock_flag(sk, SOCK_DONE)) { + /* Check if sk has been closed or assigned to another transport before + * lock_sock (note: listener sockets are not assigned to any transport) + */ + if (sock_flag(sk, SOCK_DONE) || + (sk->sk_state != TCP_LISTEN && vsk->transport != &t->transport)) { (void)virtio_transport_reset_no_sock(t, skb); release_sock(sk); sock_put(sk); @@ -1719,6 +1739,7 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto struct sock *sk = sk_vsock(vsk); struct virtio_vsock_hdr *hdr; struct sk_buff *skb; + u32 pkt_len; int off = 0; int err; @@ -1736,7 +1757,8 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) vvs->msg_count--; - virtio_transport_dec_rx_pkt(vvs, le32_to_cpu(hdr->len)); + pkt_len = le32_to_cpu(hdr->len); + virtio_transport_dec_rx_pkt(vvs, pkt_len, pkt_len); spin_unlock_bh(&vvs->rx_lock); virtio_transport_send_credit_update(vsk); diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index b370070194fa..7eccd6708d66 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -119,6 +119,8 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt, u16 proto, struct vmci_handle handle) { + memset(pkt, 0, sizeof(*pkt)); + /* We register the stream control handler as an any cid handle so we * must always send from a source address of VMADDR_CID_ANY */ @@ -131,8 +133,6 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt, pkt->type = type; pkt->src_port = src->svm_port; pkt->dst_port = dst->svm_port; - memset(&pkt->proto, 0, sizeof(pkt->proto)); - memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2)); switch (pkt->type) { case VMCI_TRANSPORT_PACKET_TYPE_INVALID: diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c index 4aa6e74ec295..07b96d56f3a5 100644 --- a/net/vmw_vsock/vsock_bpf.c +++ b/net/vmw_vsock/vsock_bpf.c @@ -77,6 +77,7 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { struct sk_psock *psock; + struct vsock_sock *vsk; int copied; psock = sk_psock_get(sk); @@ -84,6 +85,13 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, return __vsock_recvmsg(sk, msg, len, flags); lock_sock(sk); + vsk = vsock_sk(sk); + + if (WARN_ON_ONCE(!vsk->transport)) { + copied = -ENODEV; + goto out; + } + if (vsock_has_data(sk, psock) && sk_psock_queue_empty(psock)) { release_sock(sk); sk_psock_put(sk, psock); @@ -108,6 +116,7 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, copied = sk_msg_recvmsg(sk, psock, msg, len, flags); } +out: release_sock(sk); sk_psock_put(sk, psock); |