diff options
Diffstat (limited to 'drivers/hv/hv_balloon.c')
-rw-r--r-- | drivers/hv/hv_balloon.c | 143 |
1 files changed, 73 insertions, 70 deletions
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index ff169386b2c7..cb5b7dc9797f 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -428,14 +428,13 @@ struct dm_info_msg { * currently hot added. We hot add in multiples of 128M * chunks; it is possible that we may not be able to bring * online all the pages in the region. The range - * covered_start_pfn : covered_end_pfn defines the pages that can + * covered_end_pfn defines the pages that can * be brough online. */ struct hv_hotadd_state { struct list_head list; unsigned long start_pfn; - unsigned long covered_start_pfn; unsigned long covered_end_pfn; unsigned long ha_end_pfn; unsigned long end_pfn; @@ -503,6 +502,8 @@ struct hv_dynmem_device { * Number of pages we have currently ballooned out. */ unsigned int num_pages_ballooned; + unsigned int num_pages_onlined; + unsigned int num_pages_added; /* * State to manage the ballooning (up) operation. @@ -534,7 +535,6 @@ struct hv_dynmem_device { struct task_struct *thread; struct mutex ha_region_mutex; - struct completion waiter_event; /* * A list of hot-add regions. @@ -554,46 +554,32 @@ static struct hv_dynmem_device dm_device; static void post_status(struct hv_dynmem_device *dm); #ifdef CONFIG_MEMORY_HOTPLUG -static void acquire_region_mutex(bool trylock) -{ - if (trylock) { - reinit_completion(&dm_device.waiter_event); - while (!mutex_trylock(&dm_device.ha_region_mutex)) - wait_for_completion(&dm_device.waiter_event); - } else { - mutex_lock(&dm_device.ha_region_mutex); - } -} - -static void release_region_mutex(bool trylock) -{ - if (trylock) { - mutex_unlock(&dm_device.ha_region_mutex); - } else { - mutex_unlock(&dm_device.ha_region_mutex); - complete(&dm_device.waiter_event); - } -} - static int hv_memory_notifier(struct notifier_block *nb, unsigned long val, void *v) { + struct memory_notify *mem = (struct memory_notify *)v; + switch (val) { case MEM_GOING_ONLINE: - acquire_region_mutex(true); + mutex_lock(&dm_device.ha_region_mutex); break; case MEM_ONLINE: + dm_device.num_pages_onlined += mem->nr_pages; case MEM_CANCEL_ONLINE: - release_region_mutex(true); + mutex_unlock(&dm_device.ha_region_mutex); if (dm_device.ha_waiting) { dm_device.ha_waiting = false; complete(&dm_device.ol_waitevent); } break; - case MEM_GOING_OFFLINE: case MEM_OFFLINE: + mutex_lock(&dm_device.ha_region_mutex); + dm_device.num_pages_onlined -= mem->nr_pages; + mutex_unlock(&dm_device.ha_region_mutex); + break; + case MEM_GOING_OFFLINE: case MEM_CANCEL_OFFLINE: break; } @@ -646,7 +632,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, init_completion(&dm_device.ol_waitevent); dm_device.ha_waiting = true; - release_region_mutex(false); + mutex_unlock(&dm_device.ha_region_mutex); nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn)); ret = add_memory(nid, PFN_PHYS((start_pfn)), (HA_CHUNK << PAGE_SHIFT)); @@ -665,6 +651,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, } has->ha_end_pfn -= HA_CHUNK; has->covered_end_pfn -= processed_pfn; + mutex_lock(&dm_device.ha_region_mutex); break; } @@ -675,7 +662,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, * have not been "onlined" within the allowed time. */ wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ); - acquire_region_mutex(false); + mutex_lock(&dm_device.ha_region_mutex); post_status(&dm_device); } @@ -691,8 +678,7 @@ static void hv_online_page(struct page *pg) list_for_each(cur, &dm_device.ha_region_list) { has = list_entry(cur, struct hv_hotadd_state, list); - cur_start_pgp = (unsigned long) - pfn_to_page(has->covered_start_pfn); + cur_start_pgp = (unsigned long)pfn_to_page(has->start_pfn); cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn); if (((unsigned long)pg >= cur_start_pgp) && @@ -704,7 +690,6 @@ static void hv_online_page(struct page *pg) __online_page_set_limits(pg); __online_page_increment_counters(pg); __online_page_free(pg); - has->covered_start_pfn++; } } } @@ -748,10 +733,9 @@ static bool pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt) * is, update it. */ - if (has->covered_end_pfn != start_pfn) { + if (has->covered_end_pfn != start_pfn) has->covered_end_pfn = start_pfn; - has->covered_start_pfn = start_pfn; - } + return true; } @@ -794,9 +778,18 @@ static unsigned long handle_pg_range(unsigned long pg_start, pgs_ol = has->ha_end_pfn - start_pfn; if (pgs_ol > pfn_cnt) pgs_ol = pfn_cnt; - hv_bring_pgs_online(start_pfn, pgs_ol); + + /* + * Check if the corresponding memory block is already + * online by checking its last previously backed page. + * In case it is we need to bring rest (which was not + * backed previously) online too. + */ + if (start_pfn > has->start_pfn && + !PageReserved(pfn_to_page(start_pfn - 1))) + hv_bring_pgs_online(start_pfn, pgs_ol); + has->covered_end_pfn += pgs_ol; - has->covered_start_pfn += pgs_ol; pfn_cnt -= pgs_ol; } @@ -857,7 +850,6 @@ static unsigned long process_hot_add(unsigned long pg_start, list_add_tail(&ha_region->list, &dm_device.ha_region_list); ha_region->start_pfn = rg_start; ha_region->ha_end_pfn = rg_start; - ha_region->covered_start_pfn = pg_start; ha_region->covered_end_pfn = pg_start; ha_region->end_pfn = rg_start + rg_size; } @@ -886,7 +878,7 @@ static void hot_add_req(struct work_struct *dummy) resp.hdr.size = sizeof(struct dm_hot_add_response); #ifdef CONFIG_MEMORY_HOTPLUG - acquire_region_mutex(false); + mutex_lock(&dm_device.ha_region_mutex); pg_start = dm->ha_wrk.ha_page_range.finfo.start_page; pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt; @@ -918,7 +910,9 @@ static void hot_add_req(struct work_struct *dummy) if (do_hot_add) resp.page_count = process_hot_add(pg_start, pfn_cnt, rg_start, rg_sz); - release_region_mutex(false); + + dm->num_pages_added += resp.page_count; + mutex_unlock(&dm_device.ha_region_mutex); #endif /* * The result field of the response structure has the @@ -982,8 +976,8 @@ static unsigned long compute_balloon_floor(void) * 128 72 (1/2) * 512 168 (1/4) * 2048 360 (1/8) - * 8192 768 (1/16) - * 32768 1536 (1/32) + * 8192 744 (1/16) + * 32768 1512 (1/32) */ if (totalram_pages < MB2PAGES(128)) min_pages = MB2PAGES(8) + (totalram_pages >> 1); @@ -992,9 +986,9 @@ static unsigned long compute_balloon_floor(void) else if (totalram_pages < MB2PAGES(2048)) min_pages = MB2PAGES(104) + (totalram_pages >> 3); else if (totalram_pages < MB2PAGES(8192)) - min_pages = MB2PAGES(256) + (totalram_pages >> 4); + min_pages = MB2PAGES(232) + (totalram_pages >> 4); else - min_pages = MB2PAGES(512) + (totalram_pages >> 5); + min_pages = MB2PAGES(488) + (totalram_pages >> 5); #undef MB2PAGES return min_pages; } @@ -1031,17 +1025,21 @@ static void post_status(struct hv_dynmem_device *dm) status.hdr.trans_id = atomic_inc_return(&trans_id); /* - * The host expects the guest to report free memory. - * Further, the host expects the pressure information to - * include the ballooned out pages. - * For a given amount of memory that we are managing, we - * need to compute a floor below which we should not balloon. - * Compute this and add it to the pressure report. + * The host expects the guest to report free and committed memory. + * Furthermore, the host expects the pressure information to include + * the ballooned out pages. For a given amount of memory that we are + * managing we need to compute a floor below which we should not + * balloon. Compute this and add it to the pressure report. + * We also need to report all offline pages (num_pages_added - + * num_pages_onlined) as committed to the host, otherwise it can try + * asking us to balloon them out. */ status.num_avail = val.freeram; status.num_committed = vm_memory_committed() + - dm->num_pages_ballooned + - compute_balloon_floor(); + dm->num_pages_ballooned + + (dm->num_pages_added > dm->num_pages_onlined ? + dm->num_pages_added - dm->num_pages_onlined : 0) + + compute_balloon_floor(); /* * If our transaction ID is no longer current, just don't @@ -1083,11 +1081,12 @@ static void free_balloon_pages(struct hv_dynmem_device *dm, -static int alloc_balloon_pages(struct hv_dynmem_device *dm, int num_pages, - struct dm_balloon_response *bl_resp, int alloc_unit, - bool *alloc_error) +static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm, + unsigned int num_pages, + struct dm_balloon_response *bl_resp, + int alloc_unit) { - int i = 0; + unsigned int i = 0; struct page *pg; if (num_pages < alloc_unit) @@ -1106,11 +1105,8 @@ static int alloc_balloon_pages(struct hv_dynmem_device *dm, int num_pages, __GFP_NOMEMALLOC | __GFP_NOWARN, get_order(alloc_unit << PAGE_SHIFT)); - if (!pg) { - *alloc_error = true; + if (!pg) return i * alloc_unit; - } - dm->num_pages_ballooned += alloc_unit; @@ -1137,14 +1133,15 @@ static int alloc_balloon_pages(struct hv_dynmem_device *dm, int num_pages, static void balloon_up(struct work_struct *dummy) { - int num_pages = dm_device.balloon_wrk.num_pages; - int num_ballooned = 0; + unsigned int num_pages = dm_device.balloon_wrk.num_pages; + unsigned int num_ballooned = 0; struct dm_balloon_response *bl_resp; int alloc_unit; int ret; - bool alloc_error; bool done = false; int i; + struct sysinfo val; + unsigned long floor; /* The host balloons pages in 2M granularity. */ WARN_ON_ONCE(num_pages % PAGES_IN_2M != 0); @@ -1155,6 +1152,15 @@ static void balloon_up(struct work_struct *dummy) */ alloc_unit = 512; + si_meminfo(&val); + floor = compute_balloon_floor(); + + /* Refuse to balloon below the floor, keep the 2M granularity. */ + if (val.freeram < num_pages || val.freeram - num_pages < floor) { + num_pages = val.freeram > floor ? (val.freeram - floor) : 0; + num_pages -= num_pages % PAGES_IN_2M; + } + while (!done) { bl_resp = (struct dm_balloon_response *)send_buffer; memset(send_buffer, 0, PAGE_SIZE); @@ -1164,18 +1170,15 @@ static void balloon_up(struct work_struct *dummy) num_pages -= num_ballooned; - alloc_error = false; num_ballooned = alloc_balloon_pages(&dm_device, num_pages, - bl_resp, alloc_unit, - &alloc_error); + bl_resp, alloc_unit); if (alloc_unit != 1 && num_ballooned == 0) { alloc_unit = 1; continue; } - if ((alloc_unit == 1 && alloc_error) || - (num_ballooned == num_pages)) { + if (num_ballooned == 0 || num_ballooned == num_pages) { bl_resp->more_pages = 0; done = true; dm_device.state = DM_INITIALIZED; @@ -1414,7 +1417,8 @@ static void balloon_onchannelcallback(void *context) static int balloon_probe(struct hv_device *dev, const struct hv_vmbus_device_id *dev_id) { - int ret, t; + int ret; + unsigned long t; struct dm_version_request version_req; struct dm_capabilities cap_msg; @@ -1439,7 +1443,6 @@ static int balloon_probe(struct hv_device *dev, dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7; init_completion(&dm_device.host_event); init_completion(&dm_device.config_event); - init_completion(&dm_device.waiter_event); INIT_LIST_HEAD(&dm_device.ha_region_list); mutex_init(&dm_device.ha_region_mutex); INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up); |