diff options
Diffstat (limited to 'drivers/hv/hv_balloon.c')
-rw-r--r-- | drivers/hv/hv_balloon.c | 254 |
1 files changed, 161 insertions, 93 deletions
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index df35fb7ed5df..fdf8da929cbe 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -430,16 +430,27 @@ struct dm_info_msg { * currently hot added. We hot add in multiples of 128M * chunks; it is possible that we may not be able to bring * online all the pages in the region. The range - * covered_end_pfn defines the pages that can + * covered_start_pfn:covered_end_pfn defines the pages that can * be brough online. */ struct hv_hotadd_state { struct list_head list; unsigned long start_pfn; + unsigned long covered_start_pfn; unsigned long covered_end_pfn; unsigned long ha_end_pfn; unsigned long end_pfn; + /* + * A list of gaps. + */ + struct list_head gap_list; +}; + +struct hv_hotadd_gap { + struct list_head list; + unsigned long start_pfn; + unsigned long end_pfn; }; struct balloon_state { @@ -536,7 +547,11 @@ struct hv_dynmem_device { */ struct task_struct *thread; - struct mutex ha_region_mutex; + /* + * Protects ha_region_list, num_pages_onlined counter and individual + * regions from ha_region_list. + */ + spinlock_t ha_lock; /* * A list of hot-add regions. @@ -560,18 +575,14 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val, void *v) { struct memory_notify *mem = (struct memory_notify *)v; + unsigned long flags; switch (val) { - case MEM_GOING_ONLINE: - mutex_lock(&dm_device.ha_region_mutex); - break; - case MEM_ONLINE: + spin_lock_irqsave(&dm_device.ha_lock, flags); dm_device.num_pages_onlined += mem->nr_pages; + spin_unlock_irqrestore(&dm_device.ha_lock, flags); case MEM_CANCEL_ONLINE: - if (val == MEM_ONLINE || - mutex_is_locked(&dm_device.ha_region_mutex)) - mutex_unlock(&dm_device.ha_region_mutex); if (dm_device.ha_waiting) { dm_device.ha_waiting = false; complete(&dm_device.ol_waitevent); @@ -579,10 +590,11 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val, break; case MEM_OFFLINE: - mutex_lock(&dm_device.ha_region_mutex); + spin_lock_irqsave(&dm_device.ha_lock, flags); dm_device.num_pages_onlined -= mem->nr_pages; - mutex_unlock(&dm_device.ha_region_mutex); + spin_unlock_irqrestore(&dm_device.ha_lock, flags); break; + case MEM_GOING_ONLINE: case MEM_GOING_OFFLINE: case MEM_CANCEL_OFFLINE: break; @@ -595,18 +607,46 @@ static struct notifier_block hv_memory_nb = { .priority = 0 }; +/* Check if the particular page is backed and can be onlined and online it. */ +static void hv_page_online_one(struct hv_hotadd_state *has, struct page *pg) +{ + unsigned long cur_start_pgp; + unsigned long cur_end_pgp; + struct hv_hotadd_gap *gap; + + cur_start_pgp = (unsigned long)pfn_to_page(has->covered_start_pfn); + cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn); + + /* The page is not backed. */ + if (((unsigned long)pg < cur_start_pgp) || + ((unsigned long)pg >= cur_end_pgp)) + return; + + /* Check for gaps. */ + list_for_each_entry(gap, &has->gap_list, list) { + cur_start_pgp = (unsigned long) + pfn_to_page(gap->start_pfn); + cur_end_pgp = (unsigned long) + pfn_to_page(gap->end_pfn); + if (((unsigned long)pg >= cur_start_pgp) && + ((unsigned long)pg < cur_end_pgp)) { + return; + } + } + + /* This frame is currently backed; online the page. */ + __online_page_set_limits(pg); + __online_page_increment_counters(pg); + __online_page_free(pg); +} -static void hv_bring_pgs_online(unsigned long start_pfn, unsigned long size) +static void hv_bring_pgs_online(struct hv_hotadd_state *has, + unsigned long start_pfn, unsigned long size) { int i; - for (i = 0; i < size; i++) { - struct page *pg; - pg = pfn_to_page(start_pfn + i); - __online_page_set_limits(pg); - __online_page_increment_counters(pg); - __online_page_free(pg); - } + for (i = 0; i < size; i++) + hv_page_online_one(has, pfn_to_page(start_pfn + i)); } static void hv_mem_hot_add(unsigned long start, unsigned long size, @@ -618,9 +658,12 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, unsigned long start_pfn; unsigned long processed_pfn; unsigned long total_pfn = pfn_count; + unsigned long flags; for (i = 0; i < (size/HA_CHUNK); i++) { start_pfn = start + (i * HA_CHUNK); + + spin_lock_irqsave(&dm_device.ha_lock, flags); has->ha_end_pfn += HA_CHUNK; if (total_pfn > HA_CHUNK) { @@ -632,11 +675,11 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, } has->covered_end_pfn += processed_pfn; + spin_unlock_irqrestore(&dm_device.ha_lock, flags); init_completion(&dm_device.ol_waitevent); - dm_device.ha_waiting = true; + dm_device.ha_waiting = !memhp_auto_online; - mutex_unlock(&dm_device.ha_region_mutex); nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn)); ret = add_memory(nid, PFN_PHYS((start_pfn)), (HA_CHUNK << PAGE_SHIFT)); @@ -653,20 +696,23 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, */ do_hot_add = false; } + spin_lock_irqsave(&dm_device.ha_lock, flags); has->ha_end_pfn -= HA_CHUNK; has->covered_end_pfn -= processed_pfn; - mutex_lock(&dm_device.ha_region_mutex); + spin_unlock_irqrestore(&dm_device.ha_lock, flags); break; } /* - * Wait for the memory block to be onlined. - * Since the hot add has succeeded, it is ok to - * proceed even if the pages in the hot added region - * have not been "onlined" within the allowed time. + * Wait for the memory block to be onlined when memory onlining + * is done outside of kernel (memhp_auto_online). Since the hot + * add has succeeded, it is ok to proceed even if the pages in + * the hot added region have not been "onlined" within the + * allowed time. */ - wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ); - mutex_lock(&dm_device.ha_region_mutex); + if (dm_device.ha_waiting) + wait_for_completion_timeout(&dm_device.ol_waitevent, + 5*HZ); post_status(&dm_device); } @@ -675,47 +721,64 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, static void hv_online_page(struct page *pg) { - struct list_head *cur; struct hv_hotadd_state *has; unsigned long cur_start_pgp; unsigned long cur_end_pgp; + unsigned long flags; - list_for_each(cur, &dm_device.ha_region_list) { - has = list_entry(cur, struct hv_hotadd_state, list); - cur_start_pgp = (unsigned long)pfn_to_page(has->start_pfn); - cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn); + spin_lock_irqsave(&dm_device.ha_lock, flags); + list_for_each_entry(has, &dm_device.ha_region_list, list) { + cur_start_pgp = (unsigned long) + pfn_to_page(has->start_pfn); + cur_end_pgp = (unsigned long)pfn_to_page(has->end_pfn); - if (((unsigned long)pg >= cur_start_pgp) && - ((unsigned long)pg < cur_end_pgp)) { - /* - * This frame is currently backed; online the - * page. - */ - __online_page_set_limits(pg); - __online_page_increment_counters(pg); - __online_page_free(pg); - } + /* The page belongs to a different HAS. */ + if (((unsigned long)pg < cur_start_pgp) || + ((unsigned long)pg >= cur_end_pgp)) + continue; + + hv_page_online_one(has, pg); + break; } + spin_unlock_irqrestore(&dm_device.ha_lock, flags); } -static bool pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt) +static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt) { - struct list_head *cur; struct hv_hotadd_state *has; + struct hv_hotadd_gap *gap; unsigned long residual, new_inc; + int ret = 0; + unsigned long flags; - if (list_empty(&dm_device.ha_region_list)) - return false; - - list_for_each(cur, &dm_device.ha_region_list) { - has = list_entry(cur, struct hv_hotadd_state, list); - + spin_lock_irqsave(&dm_device.ha_lock, flags); + list_for_each_entry(has, &dm_device.ha_region_list, list) { /* * If the pfn range we are dealing with is not in the current * "hot add block", move on. */ if (start_pfn < has->start_pfn || start_pfn >= has->end_pfn) continue; + + /* + * If the current start pfn is not where the covered_end + * is, create a gap and update covered_end_pfn. + */ + if (has->covered_end_pfn != start_pfn) { + gap = kzalloc(sizeof(struct hv_hotadd_gap), GFP_ATOMIC); + if (!gap) { + ret = -ENOMEM; + break; + } + + INIT_LIST_HEAD(&gap->list); + gap->start_pfn = has->covered_end_pfn; + gap->end_pfn = start_pfn; + list_add_tail(&gap->list, &has->gap_list); + + has->covered_end_pfn = start_pfn; + } + /* * If the current hot add-request extends beyond * our current limit; extend it. @@ -732,19 +795,12 @@ static bool pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt) has->end_pfn += new_inc; } - /* - * If the current start pfn is not where the covered_end - * is, update it. - */ - - if (has->covered_end_pfn != start_pfn) - has->covered_end_pfn = start_pfn; - - return true; - + ret = 1; + break; } + spin_unlock_irqrestore(&dm_device.ha_lock, flags); - return false; + return ret; } static unsigned long handle_pg_range(unsigned long pg_start, @@ -753,17 +809,13 @@ static unsigned long handle_pg_range(unsigned long pg_start, unsigned long start_pfn = pg_start; unsigned long pfn_cnt = pg_count; unsigned long size; - struct list_head *cur; struct hv_hotadd_state *has; unsigned long pgs_ol = 0; unsigned long old_covered_state; + unsigned long res = 0, flags; - if (list_empty(&dm_device.ha_region_list)) - return 0; - - list_for_each(cur, &dm_device.ha_region_list) { - has = list_entry(cur, struct hv_hotadd_state, list); - + spin_lock_irqsave(&dm_device.ha_lock, flags); + list_for_each_entry(has, &dm_device.ha_region_list, list) { /* * If the pfn range we are dealing with is not in the current * "hot add block", move on. @@ -783,6 +835,8 @@ static unsigned long handle_pg_range(unsigned long pg_start, if (pgs_ol > pfn_cnt) pgs_ol = pfn_cnt; + has->covered_end_pfn += pgs_ol; + pfn_cnt -= pgs_ol; /* * Check if the corresponding memory block is already * online by checking its last previously backed page. @@ -791,10 +845,8 @@ static unsigned long handle_pg_range(unsigned long pg_start, */ if (start_pfn > has->start_pfn && !PageReserved(pfn_to_page(start_pfn - 1))) - hv_bring_pgs_online(start_pfn, pgs_ol); + hv_bring_pgs_online(has, start_pfn, pgs_ol); - has->covered_end_pfn += pgs_ol; - pfn_cnt -= pgs_ol; } if ((has->ha_end_pfn < has->end_pfn) && (pfn_cnt > 0)) { @@ -813,17 +865,20 @@ static unsigned long handle_pg_range(unsigned long pg_start, } else { pfn_cnt = size; } + spin_unlock_irqrestore(&dm_device.ha_lock, flags); hv_mem_hot_add(has->ha_end_pfn, size, pfn_cnt, has); + spin_lock_irqsave(&dm_device.ha_lock, flags); } /* * If we managed to online any pages that were given to us, * we declare success. */ - return has->covered_end_pfn - old_covered_state; - + res = has->covered_end_pfn - old_covered_state; + break; } + spin_unlock_irqrestore(&dm_device.ha_lock, flags); - return 0; + return res; } static unsigned long process_hot_add(unsigned long pg_start, @@ -832,13 +887,20 @@ static unsigned long process_hot_add(unsigned long pg_start, unsigned long rg_size) { struct hv_hotadd_state *ha_region = NULL; + int covered; + unsigned long flags; if (pfn_cnt == 0) return 0; - if (!dm_device.host_specified_ha_region) - if (pfn_covered(pg_start, pfn_cnt)) + if (!dm_device.host_specified_ha_region) { + covered = pfn_covered(pg_start, pfn_cnt); + if (covered < 0) + return 0; + + if (covered) goto do_pg_range; + } /* * If the host has specified a hot-add range; deal with it first. @@ -850,12 +912,17 @@ static unsigned long process_hot_add(unsigned long pg_start, return 0; INIT_LIST_HEAD(&ha_region->list); + INIT_LIST_HEAD(&ha_region->gap_list); - list_add_tail(&ha_region->list, &dm_device.ha_region_list); ha_region->start_pfn = rg_start; ha_region->ha_end_pfn = rg_start; + ha_region->covered_start_pfn = pg_start; ha_region->covered_end_pfn = pg_start; ha_region->end_pfn = rg_start + rg_size; + + spin_lock_irqsave(&dm_device.ha_lock, flags); + list_add_tail(&ha_region->list, &dm_device.ha_region_list); + spin_unlock_irqrestore(&dm_device.ha_lock, flags); } do_pg_range: @@ -882,7 +949,6 @@ static void hot_add_req(struct work_struct *dummy) resp.hdr.size = sizeof(struct dm_hot_add_response); #ifdef CONFIG_MEMORY_HOTPLUG - mutex_lock(&dm_device.ha_region_mutex); pg_start = dm->ha_wrk.ha_page_range.finfo.start_page; pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt; @@ -916,7 +982,6 @@ static void hot_add_req(struct work_struct *dummy) rg_start, rg_sz); dm->num_pages_added += resp.page_count; - mutex_unlock(&dm_device.ha_region_mutex); #endif /* * The result field of the response structure has the @@ -1010,7 +1075,6 @@ static unsigned long compute_balloon_floor(void) static void post_status(struct hv_dynmem_device *dm) { struct dm_status status; - struct sysinfo val; unsigned long now = jiffies; unsigned long last_post = last_post_time; @@ -1022,7 +1086,6 @@ static void post_status(struct hv_dynmem_device *dm) if (!time_after(now, (last_post_time + HZ))) return; - si_meminfo(&val); memset(&status, 0, sizeof(struct dm_status)); status.hdr.type = DM_STATUS_REPORT; status.hdr.size = sizeof(struct dm_status); @@ -1038,7 +1101,7 @@ static void post_status(struct hv_dynmem_device *dm) * num_pages_onlined) as committed to the host, otherwise it can try * asking us to balloon them out. */ - status.num_avail = val.freeram; + status.num_avail = si_mem_available(); status.num_committed = vm_memory_committed() + dm->num_pages_ballooned + (dm->num_pages_added > dm->num_pages_onlined ? @@ -1144,7 +1207,7 @@ static void balloon_up(struct work_struct *dummy) int ret; bool done = false; int i; - struct sysinfo val; + long avail_pages; unsigned long floor; /* The host balloons pages in 2M granularity. */ @@ -1156,12 +1219,12 @@ static void balloon_up(struct work_struct *dummy) */ alloc_unit = 512; - si_meminfo(&val); + avail_pages = si_mem_available(); floor = compute_balloon_floor(); /* Refuse to balloon below the floor, keep the 2M granularity. */ - if (val.freeram < num_pages || val.freeram - num_pages < floor) { - num_pages = val.freeram > floor ? (val.freeram - floor) : 0; + if (avail_pages < num_pages || avail_pages - num_pages < floor) { + num_pages = avail_pages > floor ? (avail_pages - floor) : 0; num_pages -= num_pages % PAGES_IN_2M; } @@ -1172,7 +1235,6 @@ static void balloon_up(struct work_struct *dummy) bl_resp->hdr.size = sizeof(struct dm_balloon_response); bl_resp->more_pages = 1; - num_pages -= num_ballooned; num_ballooned = alloc_balloon_pages(&dm_device, num_pages, bl_resp, alloc_unit); @@ -1461,7 +1523,7 @@ static int balloon_probe(struct hv_device *dev, init_completion(&dm_device.host_event); init_completion(&dm_device.config_event); INIT_LIST_HEAD(&dm_device.ha_region_list); - mutex_init(&dm_device.ha_region_mutex); + spin_lock_init(&dm_device.ha_lock); INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up); INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req); dm_device.host_specified_ha_region = false; @@ -1580,8 +1642,9 @@ probe_error0: static int balloon_remove(struct hv_device *dev) { struct hv_dynmem_device *dm = hv_get_drvdata(dev); - struct list_head *cur, *tmp; - struct hv_hotadd_state *has; + struct hv_hotadd_state *has, *tmp; + struct hv_hotadd_gap *gap, *tmp_gap; + unsigned long flags; if (dm->num_pages_ballooned != 0) pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned); @@ -1596,11 +1659,16 @@ static int balloon_remove(struct hv_device *dev) restore_online_page_callback(&hv_online_page); unregister_memory_notifier(&hv_memory_nb); #endif - list_for_each_safe(cur, tmp, &dm->ha_region_list) { - has = list_entry(cur, struct hv_hotadd_state, list); + spin_lock_irqsave(&dm_device.ha_lock, flags); + list_for_each_entry_safe(has, tmp, &dm->ha_region_list, list) { + list_for_each_entry_safe(gap, tmp_gap, &has->gap_list, list) { + list_del(&gap->list); + kfree(gap); + } list_del(&has->list); kfree(has); } + spin_unlock_irqrestore(&dm_device.ha_lock, flags); return 0; } |