diff options
Diffstat (limited to 'drivers/hv')
-rw-r--r-- | drivers/hv/Makefile | 1 | ||||
-rw-r--r-- | drivers/hv/channel_mgmt.c | 6 | ||||
-rw-r--r-- | drivers/hv/hv.c | 63 | ||||
-rw-r--r-- | drivers/hv/hv_balloon.c | 121 | ||||
-rw-r--r-- | drivers/hv/hv_trace_balloon.h | 48 | ||||
-rw-r--r-- | drivers/hv/hyperv_vmbus.h | 4 |
6 files changed, 195 insertions, 48 deletions
diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile index 14c22786b519..a1eec7177c2d 100644 --- a/drivers/hv/Makefile +++ b/drivers/hv/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_HYPERV_UTILS) += hv_utils.o obj-$(CONFIG_HYPERV_BALLOON) += hv_balloon.o CFLAGS_hv_trace.o = -I$(src) +CFLAGS_hv_balloon.o = -I$(src) hv_vmbus-y := vmbus_drv.o \ hv.o connection.o channel.o \ diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index c21020b69114..c6d9d19bc04e 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -596,10 +596,8 @@ static int next_numa_node_id; /* * Starting with Win8, we can statically distribute the incoming * channel interrupt load by binding a channel to VCPU. - * We do this in a hierarchical fashion: - * First distribute the primary channels across available NUMA nodes - * and then distribute the subchannels amongst the CPUs in the NUMA - * node assigned to the primary channel. + * We distribute the interrupt loads to one or more NUMA nodes based on + * the channel's affinity_policy. * * For pre-win8 hosts or non-performance critical channels we assign the * first CPU in the first NUMA node. diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index fe96aab9e794..b1f6793acf4c 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -27,7 +27,7 @@ #include <linux/vmalloc.h> #include <linux/hyperv.h> #include <linux/version.h> -#include <linux/interrupt.h> +#include <linux/random.h> #include <linux/clockchips.h> #include <asm/hyperv.h> #include <asm/mshyperv.h> @@ -38,6 +38,17 @@ struct hv_context hv_context = { .synic_initialized = false, }; +/* + * If false, we're using the old mechanism for stimer0 interrupts + * where it sends a VMbus message when it expires. The old + * mechanism is used when running on older versions of Hyper-V + * that don't support Direct Mode. While Hyper-V provides + * four stimer's per CPU, Linux uses only stimer0. + */ +static bool direct_mode_enabled; +static int stimer0_irq; +static int stimer0_vector; + #define HV_TIMER_FREQUENCY (10 * 1000 * 1000) /* 100ns period */ #define HV_MAX_MAX_DELTA_TICKS 0xffffffff #define HV_MIN_DELTA_TICKS 1 @@ -53,6 +64,8 @@ int hv_init(void) if (!hv_context.cpu_context) return -ENOMEM; + direct_mode_enabled = ms_hyperv.misc_features & + HV_X64_STIMER_DIRECT_MODE_AVAILABLE; return 0; } @@ -91,6 +104,21 @@ int hv_post_message(union hv_connection_id connection_id, return status & 0xFFFF; } +/* + * ISR for when stimer0 is operating in Direct Mode. Direct Mode + * does not use VMbus or any VMbus messages, so process here and not + * in the VMbus driver code. + */ + +static void hv_stimer0_isr(void) +{ + struct hv_per_cpu_context *hv_cpu; + + hv_cpu = this_cpu_ptr(hv_context.cpu_context); + hv_cpu->clk_evt->event_handler(hv_cpu->clk_evt); + add_interrupt_randomness(stimer0_vector, 0); +} + static int hv_ce_set_next_event(unsigned long delta, struct clock_event_device *evt) { @@ -108,6 +136,8 @@ static int hv_ce_shutdown(struct clock_event_device *evt) { hv_init_timer(HV_X64_MSR_STIMER0_COUNT, 0); hv_init_timer_config(HV_X64_MSR_STIMER0_CONFIG, 0); + if (direct_mode_enabled) + hv_disable_stimer0_percpu_irq(stimer0_irq); return 0; } @@ -116,11 +146,26 @@ static int hv_ce_set_oneshot(struct clock_event_device *evt) { union hv_timer_config timer_cfg; + timer_cfg.as_uint64 = 0; timer_cfg.enable = 1; timer_cfg.auto_enable = 1; - timer_cfg.sintx = VMBUS_MESSAGE_SINT; + if (direct_mode_enabled) { + /* + * When it expires, the timer will directly interrupt + * on the specified hardware vector/IRQ. + */ + timer_cfg.direct_mode = 1; + timer_cfg.apic_vector = stimer0_vector; + hv_enable_stimer0_percpu_irq(stimer0_irq); + } else { + /* + * When it expires, the timer will generate a VMbus message, + * to be handled by the normal VMbus interrupt handler. + */ + timer_cfg.direct_mode = 0; + timer_cfg.sintx = VMBUS_MESSAGE_SINT; + } hv_init_timer_config(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64); - return 0; } @@ -147,7 +192,7 @@ int hv_synic_alloc(void) int cpu; hv_context.hv_numa_map = kzalloc(sizeof(struct cpumask) * nr_node_ids, - GFP_ATOMIC); + GFP_KERNEL); if (hv_context.hv_numa_map == NULL) { pr_err("Unable to allocate NUMA map\n"); goto err; @@ -191,6 +236,11 @@ int hv_synic_alloc(void) INIT_LIST_HEAD(&hv_cpu->chan_list); } + if (direct_mode_enabled && + hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector, + hv_stimer0_isr)) + goto err; + return 0; err: return -ENOMEM; @@ -217,7 +267,7 @@ void hv_synic_free(void) } /* - * hv_synic_init - Initialize the Synthethic Interrupt Controller. + * hv_synic_init - Initialize the Synthetic Interrupt Controller. * * If it is already initialized by another entity (ie x2v shim), we need to * retrieve the initialized message and event pages. Otherwise, we create and @@ -292,6 +342,9 @@ void hv_synic_clockevents_cleanup(void) if (!(ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE)) return; + if (direct_mode_enabled) + hv_remove_stimer0_irq(stimer0_irq); + for_each_present_cpu(cpu) { struct hv_per_cpu_context *hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu); diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index db0e6652d7ef..b3e9f13f8bc3 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -34,6 +34,9 @@ #include <linux/hyperv.h> +#define CREATE_TRACE_POINTS +#include "hv_trace_balloon.h" + /* * We begin with definitions supporting the Dynamic Memory protocol * with the host. @@ -576,11 +579,65 @@ static struct hv_dynmem_device dm_device; static void post_status(struct hv_dynmem_device *dm); #ifdef CONFIG_MEMORY_HOTPLUG +static inline bool has_pfn_is_backed(struct hv_hotadd_state *has, + unsigned long pfn) +{ + struct hv_hotadd_gap *gap; + + /* The page is not backed. */ + if ((pfn < has->covered_start_pfn) || (pfn >= has->covered_end_pfn)) + return false; + + /* Check for gaps. */ + list_for_each_entry(gap, &has->gap_list, list) { + if ((pfn >= gap->start_pfn) && (pfn < gap->end_pfn)) + return false; + } + + return true; +} + +static unsigned long hv_page_offline_check(unsigned long start_pfn, + unsigned long nr_pages) +{ + unsigned long pfn = start_pfn, count = 0; + struct hv_hotadd_state *has; + bool found; + + while (pfn < start_pfn + nr_pages) { + /* + * Search for HAS which covers the pfn and when we find one + * count how many consequitive PFNs are covered. + */ + found = false; + list_for_each_entry(has, &dm_device.ha_region_list, list) { + while ((pfn >= has->start_pfn) && + (pfn < has->end_pfn) && + (pfn < start_pfn + nr_pages)) { + found = true; + if (has_pfn_is_backed(has, pfn)) + count++; + pfn++; + } + } + + /* + * This PFN is not in any HAS (e.g. we're offlining a region + * which was present at boot), no need to account for it. Go + * to the next one. + */ + if (!found) + pfn++; + } + + return count; +} + static int hv_memory_notifier(struct notifier_block *nb, unsigned long val, void *v) { struct memory_notify *mem = (struct memory_notify *)v; - unsigned long flags; + unsigned long flags, pfn_count; switch (val) { case MEM_ONLINE: @@ -593,7 +650,19 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val, case MEM_OFFLINE: spin_lock_irqsave(&dm_device.ha_lock, flags); - dm_device.num_pages_onlined -= mem->nr_pages; + pfn_count = hv_page_offline_check(mem->start_pfn, + mem->nr_pages); + if (pfn_count <= dm_device.num_pages_onlined) { + dm_device.num_pages_onlined -= pfn_count; + } else { + /* + * We're offlining more pages than we managed to online. + * This is unexpected. In any case don't let + * num_pages_onlined wrap around zero. + */ + WARN_ON_ONCE(1); + dm_device.num_pages_onlined = 0; + } spin_unlock_irqrestore(&dm_device.ha_lock, flags); break; case MEM_GOING_ONLINE: @@ -612,30 +681,9 @@ static struct notifier_block hv_memory_nb = { /* Check if the particular page is backed and can be onlined and online it. */ static void hv_page_online_one(struct hv_hotadd_state *has, struct page *pg) { - unsigned long cur_start_pgp; - unsigned long cur_end_pgp; - struct hv_hotadd_gap *gap; - - cur_start_pgp = (unsigned long)pfn_to_page(has->covered_start_pfn); - cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn); - - /* The page is not backed. */ - if (((unsigned long)pg < cur_start_pgp) || - ((unsigned long)pg >= cur_end_pgp)) + if (!has_pfn_is_backed(has, page_to_pfn(pg))) return; - /* Check for gaps. */ - list_for_each_entry(gap, &has->gap_list, list) { - cur_start_pgp = (unsigned long) - pfn_to_page(gap->start_pfn); - cur_end_pgp = (unsigned long) - pfn_to_page(gap->end_pfn); - if (((unsigned long)pg >= cur_start_pgp) && - ((unsigned long)pg < cur_end_pgp)) { - return; - } - } - /* This frame is currently backed; online the page. */ __online_page_set_limits(pg); __online_page_increment_counters(pg); @@ -691,7 +739,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, (HA_CHUNK << PAGE_SHIFT)); if (ret) { - pr_warn("hot_add memory failed error is %d\n", ret); + pr_err("hot_add memory failed error is %d\n", ret); if (ret == -EEXIST) { /* * This error indicates that the error @@ -726,19 +774,13 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, static void hv_online_page(struct page *pg) { struct hv_hotadd_state *has; - unsigned long cur_start_pgp; - unsigned long cur_end_pgp; unsigned long flags; + unsigned long pfn = page_to_pfn(pg); spin_lock_irqsave(&dm_device.ha_lock, flags); list_for_each_entry(has, &dm_device.ha_region_list, list) { - cur_start_pgp = (unsigned long) - pfn_to_page(has->start_pfn); - cur_end_pgp = (unsigned long)pfn_to_page(has->end_pfn); - /* The page belongs to a different HAS. */ - if (((unsigned long)pg < cur_start_pgp) || - ((unsigned long)pg >= cur_end_pgp)) + if ((pfn < has->start_pfn) || (pfn >= has->end_pfn)) continue; hv_page_online_one(has, pg); @@ -1014,7 +1056,7 @@ static void hot_add_req(struct work_struct *dummy) resp.result = 0; if (!do_hot_add || (resp.page_count == 0)) - pr_info("Memory hot add failed\n"); + pr_err("Memory hot add failed\n"); dm->state = DM_INITIALIZED; resp.hdr.trans_id = atomic_inc_return(&trans_id); @@ -1041,7 +1083,7 @@ static void process_info(struct hv_dynmem_device *dm, struct dm_info_msg *msg) break; default: - pr_info("Received Unknown type: %d\n", info_hdr->type); + pr_warn("Received Unknown type: %d\n", info_hdr->type); } } @@ -1120,6 +1162,9 @@ static void post_status(struct hv_dynmem_device *dm) dm->num_pages_added - dm->num_pages_onlined : 0) + compute_balloon_floor(); + trace_balloon_status(status.num_avail, status.num_committed, + vm_memory_committed(), dm->num_pages_ballooned, + dm->num_pages_added, dm->num_pages_onlined); /* * If our transaction ID is no longer current, just don't * send the status. This can happen if we were interrupted @@ -1290,7 +1335,7 @@ static void balloon_up(struct work_struct *dummy) /* * Free up the memory we allocatted. */ - pr_info("Balloon response failed\n"); + pr_err("Balloon response failed\n"); for (i = 0; i < bl_resp->range_count; i++) free_balloon_pages(&dm_device, @@ -1421,7 +1466,7 @@ static void cap_resp(struct hv_dynmem_device *dm, struct dm_capabilities_resp_msg *cap_resp) { if (!cap_resp->is_accepted) { - pr_info("Capabilities not accepted by host\n"); + pr_err("Capabilities not accepted by host\n"); dm->state = DM_INIT_ERROR; } complete(&dm->host_event); @@ -1508,7 +1553,7 @@ static void balloon_onchannelcallback(void *context) break; default: - pr_err("Unhandled message: type: %d\n", dm_hdr->type); + pr_warn("Unhandled message: type: %d\n", dm_hdr->type); } } diff --git a/drivers/hv/hv_trace_balloon.h b/drivers/hv/hv_trace_balloon.h new file mode 100644 index 000000000000..93082888aec3 --- /dev/null +++ b/drivers/hv/hv_trace_balloon.h @@ -0,0 +1,48 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hyperv + +#if !defined(_HV_TRACE_BALLOON_H) || defined(TRACE_HEADER_MULTI_READ) +#define _HV_TRACE_BALLOON_H + +#include <linux/tracepoint.h> + +TRACE_EVENT(balloon_status, + TP_PROTO(u64 available, u64 committed, + unsigned long vm_memory_committed, + unsigned long pages_ballooned, + unsigned long pages_added, + unsigned long pages_onlined), + TP_ARGS(available, committed, vm_memory_committed, + pages_ballooned, pages_added, pages_onlined), + TP_STRUCT__entry( + __field(u64, available) + __field(u64, committed) + __field(unsigned long, vm_memory_committed) + __field(unsigned long, pages_ballooned) + __field(unsigned long, pages_added) + __field(unsigned long, pages_onlined) + ), + TP_fast_assign( + __entry->available = available; + __entry->committed = committed; + __entry->vm_memory_committed = vm_memory_committed; + __entry->pages_ballooned = pages_ballooned; + __entry->pages_added = pages_added; + __entry->pages_onlined = pages_onlined; + ), + TP_printk("available %lld, committed %lld; vm_memory_committed %ld;" + " pages_ballooned %ld, pages_added %ld, pages_onlined %ld", + __entry->available, __entry->committed, + __entry->vm_memory_committed, __entry->pages_ballooned, + __entry->pages_added, __entry->pages_onlined + ) + ); + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE hv_trace_balloon +#endif /* _HV_TRACE_BALLOON_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 22300ec7b556..36d34fe3ccb3 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -57,7 +57,9 @@ union hv_timer_config { u64 periodic:1; u64 lazy:1; u64 auto_enable:1; - u64 reserved_z0:12; + u64 apic_vector:8; + u64 direct_mode:1; + u64 reserved_z0:3; u64 sintx:4; u64 reserved_z1:44; }; |