summaryrefslogtreecommitdiff
path: root/drivers/hv
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/hv')
-rw-r--r--drivers/hv/Makefile1
-rw-r--r--drivers/hv/channel_mgmt.c6
-rw-r--r--drivers/hv/hv.c63
-rw-r--r--drivers/hv/hv_balloon.c121
-rw-r--r--drivers/hv/hv_trace_balloon.h48
-rw-r--r--drivers/hv/hyperv_vmbus.h4
6 files changed, 195 insertions, 48 deletions
diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile
index 14c22786b519..a1eec7177c2d 100644
--- a/drivers/hv/Makefile
+++ b/drivers/hv/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_HYPERV_UTILS) += hv_utils.o
obj-$(CONFIG_HYPERV_BALLOON) += hv_balloon.o
CFLAGS_hv_trace.o = -I$(src)
+CFLAGS_hv_balloon.o = -I$(src)
hv_vmbus-y := vmbus_drv.o \
hv.o connection.o channel.o \
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index c21020b69114..c6d9d19bc04e 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -596,10 +596,8 @@ static int next_numa_node_id;
/*
* Starting with Win8, we can statically distribute the incoming
* channel interrupt load by binding a channel to VCPU.
- * We do this in a hierarchical fashion:
- * First distribute the primary channels across available NUMA nodes
- * and then distribute the subchannels amongst the CPUs in the NUMA
- * node assigned to the primary channel.
+ * We distribute the interrupt loads to one or more NUMA nodes based on
+ * the channel's affinity_policy.
*
* For pre-win8 hosts or non-performance critical channels we assign the
* first CPU in the first NUMA node.
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index fe96aab9e794..b1f6793acf4c 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -27,7 +27,7 @@
#include <linux/vmalloc.h>
#include <linux/hyperv.h>
#include <linux/version.h>
-#include <linux/interrupt.h>
+#include <linux/random.h>
#include <linux/clockchips.h>
#include <asm/hyperv.h>
#include <asm/mshyperv.h>
@@ -38,6 +38,17 @@ struct hv_context hv_context = {
.synic_initialized = false,
};
+/*
+ * If false, we're using the old mechanism for stimer0 interrupts
+ * where it sends a VMbus message when it expires. The old
+ * mechanism is used when running on older versions of Hyper-V
+ * that don't support Direct Mode. While Hyper-V provides
+ * four stimer's per CPU, Linux uses only stimer0.
+ */
+static bool direct_mode_enabled;
+static int stimer0_irq;
+static int stimer0_vector;
+
#define HV_TIMER_FREQUENCY (10 * 1000 * 1000) /* 100ns period */
#define HV_MAX_MAX_DELTA_TICKS 0xffffffff
#define HV_MIN_DELTA_TICKS 1
@@ -53,6 +64,8 @@ int hv_init(void)
if (!hv_context.cpu_context)
return -ENOMEM;
+ direct_mode_enabled = ms_hyperv.misc_features &
+ HV_X64_STIMER_DIRECT_MODE_AVAILABLE;
return 0;
}
@@ -91,6 +104,21 @@ int hv_post_message(union hv_connection_id connection_id,
return status & 0xFFFF;
}
+/*
+ * ISR for when stimer0 is operating in Direct Mode. Direct Mode
+ * does not use VMbus or any VMbus messages, so process here and not
+ * in the VMbus driver code.
+ */
+
+static void hv_stimer0_isr(void)
+{
+ struct hv_per_cpu_context *hv_cpu;
+
+ hv_cpu = this_cpu_ptr(hv_context.cpu_context);
+ hv_cpu->clk_evt->event_handler(hv_cpu->clk_evt);
+ add_interrupt_randomness(stimer0_vector, 0);
+}
+
static int hv_ce_set_next_event(unsigned long delta,
struct clock_event_device *evt)
{
@@ -108,6 +136,8 @@ static int hv_ce_shutdown(struct clock_event_device *evt)
{
hv_init_timer(HV_X64_MSR_STIMER0_COUNT, 0);
hv_init_timer_config(HV_X64_MSR_STIMER0_CONFIG, 0);
+ if (direct_mode_enabled)
+ hv_disable_stimer0_percpu_irq(stimer0_irq);
return 0;
}
@@ -116,11 +146,26 @@ static int hv_ce_set_oneshot(struct clock_event_device *evt)
{
union hv_timer_config timer_cfg;
+ timer_cfg.as_uint64 = 0;
timer_cfg.enable = 1;
timer_cfg.auto_enable = 1;
- timer_cfg.sintx = VMBUS_MESSAGE_SINT;
+ if (direct_mode_enabled) {
+ /*
+ * When it expires, the timer will directly interrupt
+ * on the specified hardware vector/IRQ.
+ */
+ timer_cfg.direct_mode = 1;
+ timer_cfg.apic_vector = stimer0_vector;
+ hv_enable_stimer0_percpu_irq(stimer0_irq);
+ } else {
+ /*
+ * When it expires, the timer will generate a VMbus message,
+ * to be handled by the normal VMbus interrupt handler.
+ */
+ timer_cfg.direct_mode = 0;
+ timer_cfg.sintx = VMBUS_MESSAGE_SINT;
+ }
hv_init_timer_config(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64);
-
return 0;
}
@@ -147,7 +192,7 @@ int hv_synic_alloc(void)
int cpu;
hv_context.hv_numa_map = kzalloc(sizeof(struct cpumask) * nr_node_ids,
- GFP_ATOMIC);
+ GFP_KERNEL);
if (hv_context.hv_numa_map == NULL) {
pr_err("Unable to allocate NUMA map\n");
goto err;
@@ -191,6 +236,11 @@ int hv_synic_alloc(void)
INIT_LIST_HEAD(&hv_cpu->chan_list);
}
+ if (direct_mode_enabled &&
+ hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector,
+ hv_stimer0_isr))
+ goto err;
+
return 0;
err:
return -ENOMEM;
@@ -217,7 +267,7 @@ void hv_synic_free(void)
}
/*
- * hv_synic_init - Initialize the Synthethic Interrupt Controller.
+ * hv_synic_init - Initialize the Synthetic Interrupt Controller.
*
* If it is already initialized by another entity (ie x2v shim), we need to
* retrieve the initialized message and event pages. Otherwise, we create and
@@ -292,6 +342,9 @@ void hv_synic_clockevents_cleanup(void)
if (!(ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE))
return;
+ if (direct_mode_enabled)
+ hv_remove_stimer0_irq(stimer0_irq);
+
for_each_present_cpu(cpu) {
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index db0e6652d7ef..b3e9f13f8bc3 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -34,6 +34,9 @@
#include <linux/hyperv.h>
+#define CREATE_TRACE_POINTS
+#include "hv_trace_balloon.h"
+
/*
* We begin with definitions supporting the Dynamic Memory protocol
* with the host.
@@ -576,11 +579,65 @@ static struct hv_dynmem_device dm_device;
static void post_status(struct hv_dynmem_device *dm);
#ifdef CONFIG_MEMORY_HOTPLUG
+static inline bool has_pfn_is_backed(struct hv_hotadd_state *has,
+ unsigned long pfn)
+{
+ struct hv_hotadd_gap *gap;
+
+ /* The page is not backed. */
+ if ((pfn < has->covered_start_pfn) || (pfn >= has->covered_end_pfn))
+ return false;
+
+ /* Check for gaps. */
+ list_for_each_entry(gap, &has->gap_list, list) {
+ if ((pfn >= gap->start_pfn) && (pfn < gap->end_pfn))
+ return false;
+ }
+
+ return true;
+}
+
+static unsigned long hv_page_offline_check(unsigned long start_pfn,
+ unsigned long nr_pages)
+{
+ unsigned long pfn = start_pfn, count = 0;
+ struct hv_hotadd_state *has;
+ bool found;
+
+ while (pfn < start_pfn + nr_pages) {
+ /*
+ * Search for HAS which covers the pfn and when we find one
+ * count how many consequitive PFNs are covered.
+ */
+ found = false;
+ list_for_each_entry(has, &dm_device.ha_region_list, list) {
+ while ((pfn >= has->start_pfn) &&
+ (pfn < has->end_pfn) &&
+ (pfn < start_pfn + nr_pages)) {
+ found = true;
+ if (has_pfn_is_backed(has, pfn))
+ count++;
+ pfn++;
+ }
+ }
+
+ /*
+ * This PFN is not in any HAS (e.g. we're offlining a region
+ * which was present at boot), no need to account for it. Go
+ * to the next one.
+ */
+ if (!found)
+ pfn++;
+ }
+
+ return count;
+}
+
static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
void *v)
{
struct memory_notify *mem = (struct memory_notify *)v;
- unsigned long flags;
+ unsigned long flags, pfn_count;
switch (val) {
case MEM_ONLINE:
@@ -593,7 +650,19 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
case MEM_OFFLINE:
spin_lock_irqsave(&dm_device.ha_lock, flags);
- dm_device.num_pages_onlined -= mem->nr_pages;
+ pfn_count = hv_page_offline_check(mem->start_pfn,
+ mem->nr_pages);
+ if (pfn_count <= dm_device.num_pages_onlined) {
+ dm_device.num_pages_onlined -= pfn_count;
+ } else {
+ /*
+ * We're offlining more pages than we managed to online.
+ * This is unexpected. In any case don't let
+ * num_pages_onlined wrap around zero.
+ */
+ WARN_ON_ONCE(1);
+ dm_device.num_pages_onlined = 0;
+ }
spin_unlock_irqrestore(&dm_device.ha_lock, flags);
break;
case MEM_GOING_ONLINE:
@@ -612,30 +681,9 @@ static struct notifier_block hv_memory_nb = {
/* Check if the particular page is backed and can be onlined and online it. */
static void hv_page_online_one(struct hv_hotadd_state *has, struct page *pg)
{
- unsigned long cur_start_pgp;
- unsigned long cur_end_pgp;
- struct hv_hotadd_gap *gap;
-
- cur_start_pgp = (unsigned long)pfn_to_page(has->covered_start_pfn);
- cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn);
-
- /* The page is not backed. */
- if (((unsigned long)pg < cur_start_pgp) ||
- ((unsigned long)pg >= cur_end_pgp))
+ if (!has_pfn_is_backed(has, page_to_pfn(pg)))
return;
- /* Check for gaps. */
- list_for_each_entry(gap, &has->gap_list, list) {
- cur_start_pgp = (unsigned long)
- pfn_to_page(gap->start_pfn);
- cur_end_pgp = (unsigned long)
- pfn_to_page(gap->end_pfn);
- if (((unsigned long)pg >= cur_start_pgp) &&
- ((unsigned long)pg < cur_end_pgp)) {
- return;
- }
- }
-
/* This frame is currently backed; online the page. */
__online_page_set_limits(pg);
__online_page_increment_counters(pg);
@@ -691,7 +739,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
(HA_CHUNK << PAGE_SHIFT));
if (ret) {
- pr_warn("hot_add memory failed error is %d\n", ret);
+ pr_err("hot_add memory failed error is %d\n", ret);
if (ret == -EEXIST) {
/*
* This error indicates that the error
@@ -726,19 +774,13 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
static void hv_online_page(struct page *pg)
{
struct hv_hotadd_state *has;
- unsigned long cur_start_pgp;
- unsigned long cur_end_pgp;
unsigned long flags;
+ unsigned long pfn = page_to_pfn(pg);
spin_lock_irqsave(&dm_device.ha_lock, flags);
list_for_each_entry(has, &dm_device.ha_region_list, list) {
- cur_start_pgp = (unsigned long)
- pfn_to_page(has->start_pfn);
- cur_end_pgp = (unsigned long)pfn_to_page(has->end_pfn);
-
/* The page belongs to a different HAS. */
- if (((unsigned long)pg < cur_start_pgp) ||
- ((unsigned long)pg >= cur_end_pgp))
+ if ((pfn < has->start_pfn) || (pfn >= has->end_pfn))
continue;
hv_page_online_one(has, pg);
@@ -1014,7 +1056,7 @@ static void hot_add_req(struct work_struct *dummy)
resp.result = 0;
if (!do_hot_add || (resp.page_count == 0))
- pr_info("Memory hot add failed\n");
+ pr_err("Memory hot add failed\n");
dm->state = DM_INITIALIZED;
resp.hdr.trans_id = atomic_inc_return(&trans_id);
@@ -1041,7 +1083,7 @@ static void process_info(struct hv_dynmem_device *dm, struct dm_info_msg *msg)
break;
default:
- pr_info("Received Unknown type: %d\n", info_hdr->type);
+ pr_warn("Received Unknown type: %d\n", info_hdr->type);
}
}
@@ -1120,6 +1162,9 @@ static void post_status(struct hv_dynmem_device *dm)
dm->num_pages_added - dm->num_pages_onlined : 0) +
compute_balloon_floor();
+ trace_balloon_status(status.num_avail, status.num_committed,
+ vm_memory_committed(), dm->num_pages_ballooned,
+ dm->num_pages_added, dm->num_pages_onlined);
/*
* If our transaction ID is no longer current, just don't
* send the status. This can happen if we were interrupted
@@ -1290,7 +1335,7 @@ static void balloon_up(struct work_struct *dummy)
/*
* Free up the memory we allocatted.
*/
- pr_info("Balloon response failed\n");
+ pr_err("Balloon response failed\n");
for (i = 0; i < bl_resp->range_count; i++)
free_balloon_pages(&dm_device,
@@ -1421,7 +1466,7 @@ static void cap_resp(struct hv_dynmem_device *dm,
struct dm_capabilities_resp_msg *cap_resp)
{
if (!cap_resp->is_accepted) {
- pr_info("Capabilities not accepted by host\n");
+ pr_err("Capabilities not accepted by host\n");
dm->state = DM_INIT_ERROR;
}
complete(&dm->host_event);
@@ -1508,7 +1553,7 @@ static void balloon_onchannelcallback(void *context)
break;
default:
- pr_err("Unhandled message: type: %d\n", dm_hdr->type);
+ pr_warn("Unhandled message: type: %d\n", dm_hdr->type);
}
}
diff --git a/drivers/hv/hv_trace_balloon.h b/drivers/hv/hv_trace_balloon.h
new file mode 100644
index 000000000000..93082888aec3
--- /dev/null
+++ b/drivers/hv/hv_trace_balloon.h
@@ -0,0 +1,48 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hyperv
+
+#if !defined(_HV_TRACE_BALLOON_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _HV_TRACE_BALLOON_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(balloon_status,
+ TP_PROTO(u64 available, u64 committed,
+ unsigned long vm_memory_committed,
+ unsigned long pages_ballooned,
+ unsigned long pages_added,
+ unsigned long pages_onlined),
+ TP_ARGS(available, committed, vm_memory_committed,
+ pages_ballooned, pages_added, pages_onlined),
+ TP_STRUCT__entry(
+ __field(u64, available)
+ __field(u64, committed)
+ __field(unsigned long, vm_memory_committed)
+ __field(unsigned long, pages_ballooned)
+ __field(unsigned long, pages_added)
+ __field(unsigned long, pages_onlined)
+ ),
+ TP_fast_assign(
+ __entry->available = available;
+ __entry->committed = committed;
+ __entry->vm_memory_committed = vm_memory_committed;
+ __entry->pages_ballooned = pages_ballooned;
+ __entry->pages_added = pages_added;
+ __entry->pages_onlined = pages_onlined;
+ ),
+ TP_printk("available %lld, committed %lld; vm_memory_committed %ld;"
+ " pages_ballooned %ld, pages_added %ld, pages_onlined %ld",
+ __entry->available, __entry->committed,
+ __entry->vm_memory_committed, __entry->pages_ballooned,
+ __entry->pages_added, __entry->pages_onlined
+ )
+ );
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE hv_trace_balloon
+#endif /* _HV_TRACE_BALLOON_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 22300ec7b556..36d34fe3ccb3 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -57,7 +57,9 @@ union hv_timer_config {
u64 periodic:1;
u64 lazy:1;
u64 auto_enable:1;
- u64 reserved_z0:12;
+ u64 apic_vector:8;
+ u64 direct_mode:1;
+ u64 reserved_z0:3;
u64 sintx:4;
u64 reserved_z1:44;
};