summaryrefslogtreecommitdiff
path: root/net/vmw_vsock
diff options
context:
space:
mode:
Diffstat (limited to 'net/vmw_vsock')
-rw-r--r--net/vmw_vsock/af_vsock.c104
-rw-r--r--net/vmw_vsock/virtio_transport.c12
-rw-r--r--net/vmw_vsock/virtio_transport_common.c62
-rw-r--r--net/vmw_vsock/vmci_transport.c4
-rw-r--r--net/vmw_vsock/vsock_bpf.c9
5 files changed, 151 insertions, 40 deletions
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index b52b798aa4c2..ef519b55a3d9 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -337,7 +337,10 @@ EXPORT_SYMBOL_GPL(vsock_find_connected_socket);
void vsock_remove_sock(struct vsock_sock *vsk)
{
- vsock_remove_bound(vsk);
+ /* Transport reassignment must not remove the binding. */
+ if (sock_flag(sk_vsock(vsk), SOCK_DEAD))
+ vsock_remove_bound(vsk);
+
vsock_remove_connected(vsk);
}
EXPORT_SYMBOL_GPL(vsock_remove_sock);
@@ -404,6 +407,8 @@ EXPORT_SYMBOL_GPL(vsock_enqueue_accept);
static bool vsock_use_local_transport(unsigned int remote_cid)
{
+ lockdep_assert_held(&vsock_register_mutex);
+
if (!transport_local)
return false;
@@ -461,6 +466,8 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
remote_flags = vsk->remote_addr.svm_flags;
+ mutex_lock(&vsock_register_mutex);
+
switch (sk->sk_type) {
case SOCK_DGRAM:
new_transport = transport_dgram;
@@ -476,12 +483,15 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
new_transport = transport_h2g;
break;
default:
- return -ESOCKTNOSUPPORT;
+ ret = -ESOCKTNOSUPPORT;
+ goto err;
}
if (vsk->transport) {
- if (vsk->transport == new_transport)
- return 0;
+ if (vsk->transport == new_transport) {
+ ret = 0;
+ goto err;
+ }
/* transport->release() must be called with sock lock acquired.
* This path can only be taken during vsock_connect(), where we
@@ -491,13 +501,30 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
*/
vsk->transport->release(vsk);
vsock_deassign_transport(vsk);
+
+ /* transport's release() and destruct() can touch some socket
+ * state, since we are reassigning the socket to a new transport
+ * during vsock_connect(), let's reset these fields to have a
+ * clean state.
+ */
+ sock_reset_flag(sk, SOCK_DONE);
+ sk->sk_state = TCP_CLOSE;
+ vsk->peer_shutdown = 0;
}
/* We increase the module refcnt to prevent the transport unloading
* while there are open sockets assigned to it.
*/
- if (!new_transport || !try_module_get(new_transport->module))
- return -ENODEV;
+ if (!new_transport || !try_module_get(new_transport->module)) {
+ ret = -ENODEV;
+ goto err;
+ }
+
+ /* It's safe to release the mutex after a successful try_module_get().
+ * Whichever transport `new_transport` points at, it won't go away until
+ * the last module_put() below or in vsock_deassign_transport().
+ */
+ mutex_unlock(&vsock_register_mutex);
if (sk->sk_type == SOCK_SEQPACKET) {
if (!new_transport->seqpacket_allow ||
@@ -516,12 +543,31 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
vsk->transport = new_transport;
return 0;
+err:
+ mutex_unlock(&vsock_register_mutex);
+ return ret;
}
EXPORT_SYMBOL_GPL(vsock_assign_transport);
+/*
+ * Provide safe access to static transport_{h2g,g2h,dgram,local} callbacks.
+ * Otherwise we may race with module removal. Do not use on `vsk->transport`.
+ */
+static u32 vsock_registered_transport_cid(const struct vsock_transport **transport)
+{
+ u32 cid = VMADDR_CID_ANY;
+
+ mutex_lock(&vsock_register_mutex);
+ if (*transport)
+ cid = (*transport)->get_local_cid();
+ mutex_unlock(&vsock_register_mutex);
+
+ return cid;
+}
+
bool vsock_find_cid(unsigned int cid)
{
- if (transport_g2h && cid == transport_g2h->get_local_cid())
+ if (cid == vsock_registered_transport_cid(&transport_g2h))
return true;
if (transport_h2g && cid == VMADDR_CID_HOST)
@@ -643,7 +689,8 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk,
unsigned int i;
for (i = 0; i < MAX_PORT_RETRIES; i++) {
- if (port <= LAST_RESERVED_PORT)
+ if (port == VMADDR_PORT_ANY ||
+ port <= LAST_RESERVED_PORT)
port = LAST_RESERVED_PORT + 1;
new_addr.svm_port = port++;
@@ -812,6 +859,13 @@ static void __vsock_release(struct sock *sk, int level)
*/
lock_sock_nested(sk, level);
+ /* Indicate to vsock_remove_sock() that the socket is being released and
+ * can be removed from the bound_table. Unlike transport reassignment
+ * case, where the socket must remain bound despite vsock_remove_sock()
+ * being called from the transport release() callback.
+ */
+ sock_set_flag(sk, SOCK_DEAD);
+
if (vsk->transport)
vsk->transport->release(vsk);
else if (sock_type_connectible(sk->sk_type))
@@ -870,6 +924,9 @@ EXPORT_SYMBOL_GPL(vsock_create_connected);
s64 vsock_stream_has_data(struct vsock_sock *vsk)
{
+ if (WARN_ON(!vsk->transport))
+ return 0;
+
return vsk->transport->stream_has_data(vsk);
}
EXPORT_SYMBOL_GPL(vsock_stream_has_data);
@@ -878,6 +935,9 @@ s64 vsock_connectible_has_data(struct vsock_sock *vsk)
{
struct sock *sk = sk_vsock(vsk);
+ if (WARN_ON(!vsk->transport))
+ return 0;
+
if (sk->sk_type == SOCK_SEQPACKET)
return vsk->transport->seqpacket_has_data(vsk);
else
@@ -887,6 +947,9 @@ EXPORT_SYMBOL_GPL(vsock_connectible_has_data);
s64 vsock_stream_has_space(struct vsock_sock *vsk)
{
+ if (WARN_ON(!vsk->transport))
+ return 0;
+
return vsk->transport->stream_has_space(vsk);
}
EXPORT_SYMBOL_GPL(vsock_stream_has_space);
@@ -1161,6 +1224,9 @@ static int vsock_read_skb(struct sock *sk, skb_read_actor_t read_actor)
{
struct vsock_sock *vsk = vsock_sk(sk);
+ if (WARN_ON_ONCE(!vsk->transport))
+ return -ENODEV;
+
return vsk->transport->read_skb(vsk, read_actor);
}
@@ -1501,6 +1567,11 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr,
if (err < 0)
goto out;
+ /* sk_err might have been set as a result of an earlier
+ * (failed) connect attempt.
+ */
+ sk->sk_err = 0;
+
/* Mark sock as connecting and set the error code to in
* progress in case this is a non-blocking connect.
*/
@@ -1515,7 +1586,11 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr,
timeout = vsk->connect_timeout;
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- while (sk->sk_state != TCP_ESTABLISHED && sk->sk_err == 0) {
+ /* If the socket is already closing or it is in an error state, there
+ * is no point in waiting.
+ */
+ while (sk->sk_state != TCP_ESTABLISHED &&
+ sk->sk_state != TCP_CLOSING && sk->sk_err == 0) {
if (flags & O_NONBLOCK) {
/* If we're not going to block, we schedule a timeout
* function to generate a timeout on the connection
@@ -2462,18 +2537,19 @@ static long vsock_dev_do_ioctl(struct file *filp,
unsigned int cmd, void __user *ptr)
{
u32 __user *p = ptr;
- u32 cid = VMADDR_CID_ANY;
int retval = 0;
+ u32 cid;
switch (cmd) {
case IOCTL_VM_SOCKETS_GET_LOCAL_CID:
/* To be compatible with the VMCI behavior, we prioritize the
* guest CID instead of well-know host CID (VMADDR_CID_HOST).
*/
- if (transport_g2h)
- cid = transport_g2h->get_local_cid();
- else if (transport_h2g)
- cid = transport_h2g->get_local_cid();
+ cid = vsock_registered_transport_cid(&transport_g2h);
+ if (cid == VMADDR_CID_ANY)
+ cid = vsock_registered_transport_cid(&transport_h2g);
+ if (cid == VMADDR_CID_ANY)
+ cid = vsock_registered_transport_cid(&transport_local);
if (put_user(cid, p) != 0)
retval = -EFAULT;
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index b58c3818f284..f01f9e878106 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -307,7 +307,7 @@ out_rcu:
static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
{
- int total_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE + VIRTIO_VSOCK_SKB_HEADROOM;
+ int total_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
struct scatterlist pkt, *p;
struct virtqueue *vq;
struct sk_buff *skb;
@@ -670,6 +670,13 @@ static int virtio_vsock_vqs_init(struct virtio_vsock *vsock)
};
int ret;
+ mutex_lock(&vsock->rx_lock);
+ vsock->rx_buf_nr = 0;
+ vsock->rx_buf_max_nr = 0;
+ mutex_unlock(&vsock->rx_lock);
+
+ atomic_set(&vsock->queued_replies, 0);
+
ret = virtio_find_vqs(vdev, VSOCK_VQ_MAX, vsock->vqs, vqs_info, NULL);
if (ret < 0)
return ret;
@@ -779,9 +786,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
vsock->vdev = vdev;
- vsock->rx_buf_nr = 0;
- vsock->rx_buf_max_nr = 0;
- atomic_set(&vsock->queued_replies, 0);
mutex_init(&vsock->tx_lock);
mutex_init(&vsock->rx_lock);
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 9acc13ab3f82..2c9b1011cdcc 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -26,6 +26,9 @@
/* Threshold for detecting small packets to copy */
#define GOOD_COPY_LEN 128
+static void virtio_transport_cancel_close_work(struct vsock_sock *vsk,
+ bool cancel_timeout);
+
static const struct virtio_transport *
virtio_transport_get_ops(struct vsock_sock *vsk)
{
@@ -438,18 +441,20 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
u32 len)
{
- if (vvs->rx_bytes + len > vvs->buf_alloc)
+ if (vvs->buf_used + len > vvs->buf_alloc)
return false;
vvs->rx_bytes += len;
+ vvs->buf_used += len;
return true;
}
static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs,
- u32 len)
+ u32 bytes_read, u32 bytes_dequeued)
{
- vvs->rx_bytes -= len;
- vvs->fwd_cnt += len;
+ vvs->rx_bytes -= bytes_read;
+ vvs->buf_used -= bytes_dequeued;
+ vvs->fwd_cnt += bytes_dequeued;
}
void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb)
@@ -578,11 +583,11 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
size_t len)
{
struct virtio_vsock_sock *vvs = vsk->trans;
- size_t bytes, total = 0;
struct sk_buff *skb;
u32 fwd_cnt_delta;
bool low_rx_bytes;
int err = -EFAULT;
+ size_t total = 0;
u32 free_space;
spin_lock_bh(&vvs->rx_lock);
@@ -594,6 +599,8 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
}
while (total < len && !skb_queue_empty(&vvs->rx_queue)) {
+ size_t bytes, dequeued = 0;
+
skb = skb_peek(&vvs->rx_queue);
bytes = min_t(size_t, len - total,
@@ -617,12 +624,12 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
VIRTIO_VSOCK_SKB_CB(skb)->offset += bytes;
if (skb->len == VIRTIO_VSOCK_SKB_CB(skb)->offset) {
- u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len);
-
- virtio_transport_dec_rx_pkt(vvs, pkt_len);
+ dequeued = le32_to_cpu(virtio_vsock_hdr(skb)->len);
__skb_unlink(skb, &vvs->rx_queue);
consume_skb(skb);
}
+
+ virtio_transport_dec_rx_pkt(vvs, bytes, dequeued);
}
fwd_cnt_delta = vvs->fwd_cnt - vvs->last_fwd_cnt;
@@ -778,7 +785,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
msg->msg_flags |= MSG_EOR;
}
- virtio_transport_dec_rx_pkt(vvs, pkt_len);
+ virtio_transport_dec_rx_pkt(vvs, pkt_len, pkt_len);
kfree_skb(skb);
}
@@ -1109,6 +1116,8 @@ void virtio_transport_destruct(struct vsock_sock *vsk)
{
struct virtio_vsock_sock *vvs = vsk->trans;
+ virtio_transport_cancel_close_work(vsk, true);
+
kfree(vvs);
vsk->trans = NULL;
}
@@ -1204,17 +1213,11 @@ static void virtio_transport_wait_close(struct sock *sk, long timeout)
}
}
-static void virtio_transport_do_close(struct vsock_sock *vsk,
- bool cancel_timeout)
+static void virtio_transport_cancel_close_work(struct vsock_sock *vsk,
+ bool cancel_timeout)
{
struct sock *sk = sk_vsock(vsk);
- sock_set_flag(sk, SOCK_DONE);
- vsk->peer_shutdown = SHUTDOWN_MASK;
- if (vsock_stream_has_data(vsk) <= 0)
- sk->sk_state = TCP_CLOSING;
- sk->sk_state_change(sk);
-
if (vsk->close_work_scheduled &&
(!cancel_timeout || cancel_delayed_work(&vsk->close_work))) {
vsk->close_work_scheduled = false;
@@ -1226,6 +1229,20 @@ static void virtio_transport_do_close(struct vsock_sock *vsk,
}
}
+static void virtio_transport_do_close(struct vsock_sock *vsk,
+ bool cancel_timeout)
+{
+ struct sock *sk = sk_vsock(vsk);
+
+ sock_set_flag(sk, SOCK_DONE);
+ vsk->peer_shutdown = SHUTDOWN_MASK;
+ if (vsock_stream_has_data(vsk) <= 0)
+ sk->sk_state = TCP_CLOSING;
+ sk->sk_state_change(sk);
+
+ virtio_transport_cancel_close_work(vsk, cancel_timeout);
+}
+
static void virtio_transport_close_timeout(struct work_struct *work)
{
struct vsock_sock *vsk =
@@ -1628,8 +1645,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
lock_sock(sk);
- /* Check if sk has been closed before lock_sock */
- if (sock_flag(sk, SOCK_DONE)) {
+ /* Check if sk has been closed or assigned to another transport before
+ * lock_sock (note: listener sockets are not assigned to any transport)
+ */
+ if (sock_flag(sk, SOCK_DONE) ||
+ (sk->sk_state != TCP_LISTEN && vsk->transport != &t->transport)) {
(void)virtio_transport_reset_no_sock(t, skb);
release_sock(sk);
sock_put(sk);
@@ -1719,6 +1739,7 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
struct sock *sk = sk_vsock(vsk);
struct virtio_vsock_hdr *hdr;
struct sk_buff *skb;
+ u32 pkt_len;
int off = 0;
int err;
@@ -1736,7 +1757,8 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)
vvs->msg_count--;
- virtio_transport_dec_rx_pkt(vvs, le32_to_cpu(hdr->len));
+ pkt_len = le32_to_cpu(hdr->len);
+ virtio_transport_dec_rx_pkt(vvs, pkt_len, pkt_len);
spin_unlock_bh(&vvs->rx_lock);
virtio_transport_send_credit_update(vsk);
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index b370070194fa..7eccd6708d66 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -119,6 +119,8 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt,
u16 proto,
struct vmci_handle handle)
{
+ memset(pkt, 0, sizeof(*pkt));
+
/* We register the stream control handler as an any cid handle so we
* must always send from a source address of VMADDR_CID_ANY
*/
@@ -131,8 +133,6 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt,
pkt->type = type;
pkt->src_port = src->svm_port;
pkt->dst_port = dst->svm_port;
- memset(&pkt->proto, 0, sizeof(pkt->proto));
- memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2));
switch (pkt->type) {
case VMCI_TRANSPORT_PACKET_TYPE_INVALID:
diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c
index 4aa6e74ec295..07b96d56f3a5 100644
--- a/net/vmw_vsock/vsock_bpf.c
+++ b/net/vmw_vsock/vsock_bpf.c
@@ -77,6 +77,7 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
size_t len, int flags, int *addr_len)
{
struct sk_psock *psock;
+ struct vsock_sock *vsk;
int copied;
psock = sk_psock_get(sk);
@@ -84,6 +85,13 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
return __vsock_recvmsg(sk, msg, len, flags);
lock_sock(sk);
+ vsk = vsock_sk(sk);
+
+ if (WARN_ON_ONCE(!vsk->transport)) {
+ copied = -ENODEV;
+ goto out;
+ }
+
if (vsock_has_data(sk, psock) && sk_psock_queue_empty(psock)) {
release_sock(sk);
sk_psock_put(sk, psock);
@@ -108,6 +116,7 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
}
+out:
release_sock(sk);
sk_psock_put(sk, psock);