summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-01-18 20:53:41 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2019-01-18 20:53:41 +0300
commite6ec2fda2d464938989ecd770be92e492ace3ae1 (patch)
treec344b3d2b17c524687bd0fccfdd9b9ea592c26c6
parentdc6fef2cc57972d4d64d9cd6d26b81060e1db0e6 (diff)
parent867cefb4cb1012f42cada1c7d1f35ac8dd276071 (diff)
downloadlinux-e6ec2fda2d464938989ecd770be92e492ace3ae1.tar.xz
Merge tag 'for-linus-5.0-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull xen fixes from Juergen Gross: - Several fixes for the Xen pvcalls drivers (1 fix for the backend and 8 for the frontend). - A fix for a rather longstanding bug in the Xen sched_clock() interface which led to weird time jumps when migrating the system. - A fix for avoiding accesses to x2apic MSRs in Xen PV guests. * tag 'for-linus-5.0-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: xen: Fix x86 sched_clock() interface for xen pvcalls-front: fix potential null dereference always clear the X2APIC_ENABLE bit for PV guest pvcalls-front: Avoid get_free_pages(GFP_KERNEL) under spinlock xen/pvcalls: remove set but not used variable 'intf' pvcalls-back: set -ENOTCONN in pvcalls_conn_back_read pvcalls-front: don't return error when the ring is full pvcalls-front: properly allocate sk pvcalls-front: don't try to free unallocated rings pvcalls-front: read all data before closing the connection
-rw-r--r--arch/x86/xen/enlighten_pv.c5
-rw-r--r--arch/x86/xen/time.c12
-rw-r--r--drivers/xen/events/events_base.c2
-rw-r--r--drivers/xen/pvcalls-back.c9
-rw-r--r--drivers/xen/pvcalls-front.c104
5 files changed, 90 insertions, 42 deletions
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 2f6787fc7106..c54a493e139a 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -898,10 +898,7 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
val = native_read_msr_safe(msr, err);
switch (msr) {
case MSR_IA32_APICBASE:
-#ifdef CONFIG_X86_X2APIC
- if (!(cpuid_ecx(1) & (1 << (X86_FEATURE_X2APIC & 31))))
-#endif
- val &= ~X2APIC_ENABLE;
+ val &= ~X2APIC_ENABLE;
break;
}
return val;
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 72bf446c3fee..6e29794573b7 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -361,8 +361,6 @@ void xen_timer_resume(void)
{
int cpu;
- pvclock_resume();
-
if (xen_clockevent != &xen_vcpuop_clockevent)
return;
@@ -379,12 +377,15 @@ static const struct pv_time_ops xen_time_ops __initconst = {
};
static struct pvclock_vsyscall_time_info *xen_clock __read_mostly;
+static u64 xen_clock_value_saved;
void xen_save_time_memory_area(void)
{
struct vcpu_register_time_memory_area t;
int ret;
+ xen_clock_value_saved = xen_clocksource_read() - xen_sched_clock_offset;
+
if (!xen_clock)
return;
@@ -404,7 +405,7 @@ void xen_restore_time_memory_area(void)
int ret;
if (!xen_clock)
- return;
+ goto out;
t.addr.v = &xen_clock->pvti;
@@ -421,6 +422,11 @@ void xen_restore_time_memory_area(void)
if (ret != 0)
pr_notice("Cannot restore secondary vcpu_time_info (err %d)",
ret);
+
+out:
+ /* Need pvclock_resume() before using xen_clocksource_read(). */
+ pvclock_resume();
+ xen_sched_clock_offset = xen_clocksource_read() - xen_clock_value_saved;
}
static void xen_setup_vsyscall_time_info(void)
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 93194f3e7540..117e76b2f939 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -1650,7 +1650,7 @@ void xen_callback_vector(void)
xen_have_vector_callback = 0;
return;
}
- pr_info("Xen HVM callback vector for event delivery is enabled\n");
+ pr_info_once("Xen HVM callback vector for event delivery is enabled\n");
alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR,
xen_hvm_callback_vector);
}
diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
index 2e5d845b5091..7aa64d1b119c 100644
--- a/drivers/xen/pvcalls-back.c
+++ b/drivers/xen/pvcalls-back.c
@@ -160,9 +160,10 @@ static void pvcalls_conn_back_read(void *opaque)
/* write the data, then modify the indexes */
virt_wmb();
- if (ret < 0)
+ if (ret < 0) {
+ atomic_set(&map->read, 0);
intf->in_error = ret;
- else
+ } else
intf->in_prod = prod + ret;
/* update the indexes, then notify the other end */
virt_wmb();
@@ -282,13 +283,11 @@ static int pvcalls_back_socket(struct xenbus_device *dev,
static void pvcalls_sk_state_change(struct sock *sock)
{
struct sock_mapping *map = sock->sk_user_data;
- struct pvcalls_data_intf *intf;
if (map == NULL)
return;
- intf = map->ring;
- intf->in_error = -ENOTCONN;
+ atomic_inc(&map->read);
notify_remote_via_irq(map->irq);
}
diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 77224d8f3e6f..8a249c95c193 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -31,6 +31,12 @@
#define PVCALLS_NR_RSP_PER_RING __CONST_RING_SIZE(xen_pvcalls, XEN_PAGE_SIZE)
#define PVCALLS_FRONT_MAX_SPIN 5000
+static struct proto pvcalls_proto = {
+ .name = "PVCalls",
+ .owner = THIS_MODULE,
+ .obj_size = sizeof(struct sock),
+};
+
struct pvcalls_bedata {
struct xen_pvcalls_front_ring ring;
grant_ref_t ref;
@@ -335,6 +341,42 @@ int pvcalls_front_socket(struct socket *sock)
return ret;
}
+static void free_active_ring(struct sock_mapping *map)
+{
+ if (!map->active.ring)
+ return;
+
+ free_pages((unsigned long)map->active.data.in,
+ map->active.ring->ring_order);
+ free_page((unsigned long)map->active.ring);
+}
+
+static int alloc_active_ring(struct sock_mapping *map)
+{
+ void *bytes;
+
+ map->active.ring = (struct pvcalls_data_intf *)
+ get_zeroed_page(GFP_KERNEL);
+ if (!map->active.ring)
+ goto out;
+
+ map->active.ring->ring_order = PVCALLS_RING_ORDER;
+ bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ PVCALLS_RING_ORDER);
+ if (!bytes)
+ goto out;
+
+ map->active.data.in = bytes;
+ map->active.data.out = bytes +
+ XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER);
+
+ return 0;
+
+out:
+ free_active_ring(map);
+ return -ENOMEM;
+}
+
static int create_active(struct sock_mapping *map, int *evtchn)
{
void *bytes;
@@ -343,15 +385,7 @@ static int create_active(struct sock_mapping *map, int *evtchn)
*evtchn = -1;
init_waitqueue_head(&map->active.inflight_conn_req);
- map->active.ring = (struct pvcalls_data_intf *)
- __get_free_page(GFP_KERNEL | __GFP_ZERO);
- if (map->active.ring == NULL)
- goto out_error;
- map->active.ring->ring_order = PVCALLS_RING_ORDER;
- bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- PVCALLS_RING_ORDER);
- if (bytes == NULL)
- goto out_error;
+ bytes = map->active.data.in;
for (i = 0; i < (1 << PVCALLS_RING_ORDER); i++)
map->active.ring->ref[i] = gnttab_grant_foreign_access(
pvcalls_front_dev->otherend_id,
@@ -361,10 +395,6 @@ static int create_active(struct sock_mapping *map, int *evtchn)
pvcalls_front_dev->otherend_id,
pfn_to_gfn(virt_to_pfn((void *)map->active.ring)), 0);
- map->active.data.in = bytes;
- map->active.data.out = bytes +
- XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER);
-
ret = xenbus_alloc_evtchn(pvcalls_front_dev, evtchn);
if (ret)
goto out_error;
@@ -385,8 +415,6 @@ static int create_active(struct sock_mapping *map, int *evtchn)
out_error:
if (*evtchn >= 0)
xenbus_free_evtchn(pvcalls_front_dev, *evtchn);
- free_pages((unsigned long)map->active.data.in, PVCALLS_RING_ORDER);
- free_page((unsigned long)map->active.ring);
return ret;
}
@@ -406,17 +434,24 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
return PTR_ERR(map);
bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+ ret = alloc_active_ring(map);
+ if (ret < 0) {
+ pvcalls_exit_sock(sock);
+ return ret;
+ }
spin_lock(&bedata->socket_lock);
ret = get_request(bedata, &req_id);
if (ret < 0) {
spin_unlock(&bedata->socket_lock);
+ free_active_ring(map);
pvcalls_exit_sock(sock);
return ret;
}
ret = create_active(map, &evtchn);
if (ret < 0) {
spin_unlock(&bedata->socket_lock);
+ free_active_ring(map);
pvcalls_exit_sock(sock);
return ret;
}
@@ -469,8 +504,10 @@ static int __write_ring(struct pvcalls_data_intf *intf,
virt_mb();
size = pvcalls_queued(prod, cons, array_size);
- if (size >= array_size)
+ if (size > array_size)
return -EINVAL;
+ if (size == array_size)
+ return 0;
if (len > array_size - size)
len = array_size - size;
@@ -560,15 +597,13 @@ static int __read_ring(struct pvcalls_data_intf *intf,
error = intf->in_error;
/* get pointers before reading from the ring */
virt_rmb();
- if (error < 0)
- return error;
size = pvcalls_queued(prod, cons, array_size);
masked_prod = pvcalls_mask(prod, array_size);
masked_cons = pvcalls_mask(cons, array_size);
if (size == 0)
- return 0;
+ return error ?: size;
if (len > size)
len = size;
@@ -780,25 +815,36 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
}
}
- spin_lock(&bedata->socket_lock);
- ret = get_request(bedata, &req_id);
- if (ret < 0) {
+ map2 = kzalloc(sizeof(*map2), GFP_KERNEL);
+ if (map2 == NULL) {
clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
(void *)&map->passive.flags);
- spin_unlock(&bedata->socket_lock);
+ pvcalls_exit_sock(sock);
+ return -ENOMEM;
+ }
+ ret = alloc_active_ring(map2);
+ if (ret < 0) {
+ clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
+ (void *)&map->passive.flags);
+ kfree(map2);
pvcalls_exit_sock(sock);
return ret;
}
- map2 = kzalloc(sizeof(*map2), GFP_ATOMIC);
- if (map2 == NULL) {
+ spin_lock(&bedata->socket_lock);
+ ret = get_request(bedata, &req_id);
+ if (ret < 0) {
clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
(void *)&map->passive.flags);
spin_unlock(&bedata->socket_lock);
+ free_active_ring(map2);
+ kfree(map2);
pvcalls_exit_sock(sock);
- return -ENOMEM;
+ return ret;
}
+
ret = create_active(map2, &evtchn);
if (ret < 0) {
+ free_active_ring(map2);
kfree(map2);
clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
(void *)&map->passive.flags);
@@ -839,7 +885,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
received:
map2->sock = newsock;
- newsock->sk = kzalloc(sizeof(*newsock->sk), GFP_KERNEL);
+ newsock->sk = sk_alloc(sock_net(sock->sk), PF_INET, GFP_KERNEL, &pvcalls_proto, false);
if (!newsock->sk) {
bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID;
map->passive.inflight_req_id = PVCALLS_INVALID_ID;
@@ -1032,8 +1078,8 @@ int pvcalls_front_release(struct socket *sock)
spin_lock(&bedata->socket_lock);
list_del(&map->list);
spin_unlock(&bedata->socket_lock);
- if (READ_ONCE(map->passive.inflight_req_id) !=
- PVCALLS_INVALID_ID) {
+ if (READ_ONCE(map->passive.inflight_req_id) != PVCALLS_INVALID_ID &&
+ READ_ONCE(map->passive.inflight_req_id) != 0) {
pvcalls_front_free_map(bedata,
map->passive.accept_map);
}