diff options
Diffstat (limited to 'mm/hugetlb.c')
| -rw-r--r-- | mm/hugetlb.c | 123 | 
1 files changed, 91 insertions, 32 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 8fb42c6dd74b..5b1ab1f427c5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -331,6 +331,24 @@ static void coalesce_file_region(struct resv_map *resv, struct file_region *rg)  	}  } +static inline long +hugetlb_resv_map_add(struct resv_map *map, struct file_region *rg, long from, +		     long to, struct hstate *h, struct hugetlb_cgroup *cg, +		     long *regions_needed) +{ +	struct file_region *nrg; + +	if (!regions_needed) { +		nrg = get_file_region_entry_from_cache(map, from, to); +		record_hugetlb_cgroup_uncharge_info(cg, h, map, nrg); +		list_add(&nrg->link, rg->link.prev); +		coalesce_file_region(map, nrg); +	} else +		*regions_needed += 1; + +	return to - from; +} +  /*   * Must be called with resv->lock held.   * @@ -346,7 +364,7 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,  	long add = 0;  	struct list_head *head = &resv->regions;  	long last_accounted_offset = f; -	struct file_region *rg = NULL, *trg = NULL, *nrg = NULL; +	struct file_region *rg = NULL, *trg = NULL;  	if (regions_needed)  		*regions_needed = 0; @@ -369,24 +387,17 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,  		/* When we find a region that starts beyond our range, we've  		 * finished.  		 */ -		if (rg->from > t) +		if (rg->from >= t)  			break;  		/* Add an entry for last_accounted_offset -> rg->from, and  		 * update last_accounted_offset.  		 */ -		if (rg->from > last_accounted_offset) { -			add += rg->from - last_accounted_offset; -			if (!regions_needed) { -				nrg = get_file_region_entry_from_cache( -					resv, last_accounted_offset, rg->from); -				record_hugetlb_cgroup_uncharge_info(h_cg, h, -								    resv, nrg); -				list_add(&nrg->link, rg->link.prev); -				coalesce_file_region(resv, nrg); -			} else -				*regions_needed += 1; -		} +		if (rg->from > last_accounted_offset) +			add += hugetlb_resv_map_add(resv, rg, +						    last_accounted_offset, +						    rg->from, h, h_cg, +						    regions_needed);  		last_accounted_offset = rg->to;  	} @@ -394,17 +405,9 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,  	/* Handle the case where our range extends beyond  	 * last_accounted_offset.  	 */ -	if (last_accounted_offset < t) { -		add += t - last_accounted_offset; -		if (!regions_needed) { -			nrg = get_file_region_entry_from_cache( -				resv, last_accounted_offset, t); -			record_hugetlb_cgroup_uncharge_info(h_cg, h, resv, nrg); -			list_add(&nrg->link, rg->link.prev); -			coalesce_file_region(resv, nrg); -		} else -			*regions_needed += 1; -	} +	if (last_accounted_offset < t) +		add += hugetlb_resv_map_add(resv, rg, last_accounted_offset, +					    t, h, h_cg, regions_needed);  	VM_BUG_ON(add < 0);  	return add; @@ -3725,21 +3728,32 @@ static bool is_hugetlb_entry_hwpoisoned(pte_t pte)  		return false;  } +static void +hugetlb_install_page(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr, +		     struct page *new_page) +{ +	__SetPageUptodate(new_page); +	set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, new_page, 1)); +	hugepage_add_new_anon_rmap(new_page, vma, addr); +	hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm); +	ClearHPageRestoreReserve(new_page); +	SetHPageMigratable(new_page); +} +  int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,  			    struct vm_area_struct *vma)  {  	pte_t *src_pte, *dst_pte, entry, dst_entry;  	struct page *ptepage;  	unsigned long addr; -	int cow; +	bool cow = is_cow_mapping(vma->vm_flags);  	struct hstate *h = hstate_vma(vma);  	unsigned long sz = huge_page_size(h); +	unsigned long npages = pages_per_huge_page(h);  	struct address_space *mapping = vma->vm_file->f_mapping;  	struct mmu_notifier_range range;  	int ret = 0; -	cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; -  	if (cow) {  		mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, src,  					vma->vm_start, @@ -3784,6 +3798,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,  		spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);  		entry = huge_ptep_get(src_pte);  		dst_entry = huge_ptep_get(dst_pte); +again:  		if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) {  			/*  			 * Skip if src entry none.  Also, skip in the @@ -3807,6 +3822,52 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,  			}  			set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz);  		} else { +			entry = huge_ptep_get(src_pte); +			ptepage = pte_page(entry); +			get_page(ptepage); + +			/* +			 * This is a rare case where we see pinned hugetlb +			 * pages while they're prone to COW.  We need to do the +			 * COW earlier during fork. +			 * +			 * When pre-allocating the page or copying data, we +			 * need to be without the pgtable locks since we could +			 * sleep during the process. +			 */ +			if (unlikely(page_needs_cow_for_dma(vma, ptepage))) { +				pte_t src_pte_old = entry; +				struct page *new; + +				spin_unlock(src_ptl); +				spin_unlock(dst_ptl); +				/* Do not use reserve as it's private owned */ +				new = alloc_huge_page(vma, addr, 1); +				if (IS_ERR(new)) { +					put_page(ptepage); +					ret = PTR_ERR(new); +					break; +				} +				copy_user_huge_page(new, ptepage, addr, vma, +						    npages); +				put_page(ptepage); + +				/* Install the new huge page if src pte stable */ +				dst_ptl = huge_pte_lock(h, dst, dst_pte); +				src_ptl = huge_pte_lockptr(h, src, src_pte); +				spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); +				entry = huge_ptep_get(src_pte); +				if (!pte_same(src_pte_old, entry)) { +					put_page(new); +					/* dst_entry won't change as in child */ +					goto again; +				} +				hugetlb_install_page(vma, dst_pte, addr, new); +				spin_unlock(src_ptl); +				spin_unlock(dst_ptl); +				continue; +			} +  			if (cow) {  				/*  				 * No need to notify as we are downgrading page @@ -3817,12 +3878,10 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,  				 */  				huge_ptep_set_wrprotect(src, addr, src_pte);  			} -			entry = huge_ptep_get(src_pte); -			ptepage = pte_page(entry); -			get_page(ptepage); +  			page_dup_rmap(ptepage, true);  			set_huge_pte_at(dst, addr, dst_pte, entry); -			hugetlb_count_add(pages_per_huge_page(h), dst); +			hugetlb_count_add(npages, dst);  		}  		spin_unlock(src_ptl);  		spin_unlock(dst_ptl);  | 
