From 8b52c7261e04f1325f47e2b173a9d4b980e8f858 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Tue, 10 Dec 2024 14:30:57 +0000 Subject: Revert "binder: switch alloc->mutex to spinlock_t" This reverts commit 7710e2cca32e7f3958480e8bd44f50e29d0c2509. In preparation for concurrent page installations, restore the original alloc->mutex which will serialize zap_page_range_single() against page installations in subsequent patches (instead of the mmap_sem). Resolved trivial conflicts with commit 2c10a20f5e84a ("binder_alloc: Fix sleeping function called from invalid context") and commit da0c02516c50 ("mm/list_lru: simplify the list_lru walk callback function"). Cc: Mukesh Ojha Reviewed-by: Suren Baghdasaryan Signed-off-by: Carlos Llamas Link: https://lore.kernel.org/r/20241210143114.661252-2-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 46 +++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'drivers/android/binder_alloc.c') diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index a738e7745865..52f6aa3232e1 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -169,9 +169,9 @@ struct binder_buffer *binder_alloc_prepare_to_free(struct binder_alloc *alloc, { struct binder_buffer *buffer; - spin_lock(&alloc->lock); + mutex_lock(&alloc->mutex); buffer = binder_alloc_prepare_to_free_locked(alloc, user_ptr); - spin_unlock(&alloc->lock); + mutex_unlock(&alloc->mutex); return buffer; } @@ -597,10 +597,10 @@ struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc, if (!next) return ERR_PTR(-ENOMEM); - spin_lock(&alloc->lock); + mutex_lock(&alloc->mutex); buffer = binder_alloc_new_buf_locked(alloc, next, size, is_async); if (IS_ERR(buffer)) { - spin_unlock(&alloc->lock); + mutex_unlock(&alloc->mutex); goto out; } @@ -608,7 +608,7 @@ struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc, buffer->offsets_size = offsets_size; buffer->extra_buffers_size = extra_buffers_size; buffer->pid = current->tgid; - spin_unlock(&alloc->lock); + mutex_unlock(&alloc->mutex); ret = binder_install_buffer_pages(alloc, buffer, size); if (ret) { @@ -785,17 +785,17 @@ void binder_alloc_free_buf(struct binder_alloc *alloc, * We could eliminate the call to binder_alloc_clear_buf() * from binder_alloc_deferred_release() by moving this to * binder_free_buf_locked(). However, that could - * increase contention for the alloc->lock if clear_on_free - * is used frequently for large buffers. This lock is not + * increase contention for the alloc mutex if clear_on_free + * is used frequently for large buffers. The mutex is not * needed for correctness here. */ if (buffer->clear_on_free) { binder_alloc_clear_buf(alloc, buffer); buffer->clear_on_free = false; } - spin_lock(&alloc->lock); + mutex_lock(&alloc->mutex); binder_free_buf_locked(alloc, buffer); - spin_unlock(&alloc->lock); + mutex_unlock(&alloc->mutex); } /** @@ -893,7 +893,7 @@ void binder_alloc_deferred_release(struct binder_alloc *alloc) struct binder_buffer *buffer; buffers = 0; - spin_lock(&alloc->lock); + mutex_lock(&alloc->mutex); BUG_ON(alloc->vma); while ((n = rb_first(&alloc->allocated_buffers))) { @@ -940,7 +940,7 @@ void binder_alloc_deferred_release(struct binder_alloc *alloc) page_count++; } } - spin_unlock(&alloc->lock); + mutex_unlock(&alloc->mutex); kvfree(alloc->pages); if (alloc->mm) mmdrop(alloc->mm); @@ -964,7 +964,7 @@ void binder_alloc_print_allocated(struct seq_file *m, struct binder_buffer *buffer; struct rb_node *n; - spin_lock(&alloc->lock); + mutex_lock(&alloc->mutex); for (n = rb_first(&alloc->allocated_buffers); n; n = rb_next(n)) { buffer = rb_entry(n, struct binder_buffer, rb_node); seq_printf(m, " buffer %d: %lx size %zd:%zd:%zd %s\n", @@ -974,7 +974,7 @@ void binder_alloc_print_allocated(struct seq_file *m, buffer->extra_buffers_size, buffer->transaction ? "active" : "delivered"); } - spin_unlock(&alloc->lock); + mutex_unlock(&alloc->mutex); } /** @@ -991,7 +991,7 @@ void binder_alloc_print_pages(struct seq_file *m, int lru = 0; int free = 0; - spin_lock(&alloc->lock); + mutex_lock(&alloc->mutex); /* * Make sure the binder_alloc is fully initialized, otherwise we might * read inconsistent state. @@ -1007,7 +1007,7 @@ void binder_alloc_print_pages(struct seq_file *m, lru++; } } - spin_unlock(&alloc->lock); + mutex_unlock(&alloc->mutex); seq_printf(m, " pages: %d:%d:%d\n", active, lru, free); seq_printf(m, " pages high watermark: %zu\n", alloc->pages_high); } @@ -1023,10 +1023,10 @@ int binder_alloc_get_allocated_count(struct binder_alloc *alloc) struct rb_node *n; int count = 0; - spin_lock(&alloc->lock); + mutex_lock(&alloc->mutex); for (n = rb_first(&alloc->allocated_buffers); n != NULL; n = rb_next(n)) count++; - spin_unlock(&alloc->lock); + mutex_unlock(&alloc->mutex); return count; } @@ -1070,8 +1070,8 @@ enum lru_status binder_alloc_free_page(struct list_head *item, goto err_mmget; if (!mmap_read_trylock(mm)) goto err_mmap_read_lock_failed; - if (!spin_trylock(&alloc->lock)) - goto err_get_alloc_lock_failed; + if (!mutex_trylock(&alloc->mutex)) + goto err_get_alloc_mutex_failed; if (!page->page_ptr) goto err_page_already_freed; @@ -1090,7 +1090,7 @@ enum lru_status binder_alloc_free_page(struct list_head *item, trace_binder_unmap_kernel_end(alloc, index); list_lru_isolate(lru, item); - spin_unlock(&alloc->lock); + mutex_unlock(&alloc->mutex); spin_unlock(&lru->lock); if (vma) { @@ -1109,8 +1109,8 @@ enum lru_status binder_alloc_free_page(struct list_head *item, err_invalid_vma: err_page_already_freed: - spin_unlock(&alloc->lock); -err_get_alloc_lock_failed: + mutex_unlock(&alloc->mutex); +err_get_alloc_mutex_failed: mmap_read_unlock(mm); err_mmap_read_lock_failed: mmput_async(mm); @@ -1145,7 +1145,7 @@ void binder_alloc_init(struct binder_alloc *alloc) alloc->pid = current->group_leader->pid; alloc->mm = current->mm; mmgrab(alloc->mm); - spin_lock_init(&alloc->lock); + mutex_init(&alloc->mutex); INIT_LIST_HEAD(&alloc->buffers); } -- cgit v1.2.3 From d1716b4b78fb392a5514c8863e8ba287cc4580c2 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Tue, 10 Dec 2024 14:30:58 +0000 Subject: binder: concurrent page installation Allow multiple callers to install pages simultaneously by switching the mmap_sem from write-mode to read-mode. Races to the same PTE are handled using get_user_pages_remote() to retrieve the already installed page. This method significantly reduces contention in the mmap semaphore. To ensure safety, vma_lookup() is used (instead of alloc->vma) to avoid operating on an isolated VMA. In addition, zap_page_range_single() is called under the alloc->mutex to avoid racing with the shrinker. Many thanks to Barry Song who posted a similar approach [1]. Link: https://lore.kernel.org/all/20240902225009.34576-1-21cnbao@gmail.com/ [1] Cc: David Hildenbrand Cc: Barry Song Cc: Suren Baghdasaryan Cc: Liam R. Howlett Reviewed-by: Suren Baghdasaryan Signed-off-by: Carlos Llamas Link: https://lore.kernel.org/r/20241210143114.661252-3-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 65 ++++++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 24 deletions(-) (limited to 'drivers/android/binder_alloc.c') diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 52f6aa3232e1..f26283c2c768 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -221,26 +221,14 @@ static int binder_install_single_page(struct binder_alloc *alloc, struct binder_lru_page *lru_page, unsigned long addr) { + struct vm_area_struct *vma; struct page *page; - int ret = 0; + long npages; + int ret; if (!mmget_not_zero(alloc->mm)) return -ESRCH; - /* - * Protected with mmap_sem in write mode as multiple tasks - * might race to install the same page. - */ - mmap_write_lock(alloc->mm); - if (binder_get_installed_page(lru_page)) - goto out; - - if (!alloc->vma) { - pr_err("%d: %s failed, no vma\n", alloc->pid, __func__); - ret = -ESRCH; - goto out; - } - page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); if (!page) { pr_err("%d: failed to allocate page\n", alloc->pid); @@ -248,19 +236,48 @@ static int binder_install_single_page(struct binder_alloc *alloc, goto out; } - ret = vm_insert_page(alloc->vma, addr, page); - if (ret) { + mmap_read_lock(alloc->mm); + vma = vma_lookup(alloc->mm, addr); + if (!vma || vma != alloc->vma) { + __free_page(page); + pr_err("%d: %s failed, no vma\n", alloc->pid, __func__); + ret = -ESRCH; + goto unlock; + } + + ret = vm_insert_page(vma, addr, page); + switch (ret) { + case -EBUSY: + /* + * EBUSY is ok. Someone installed the pte first but the + * lru_page->page_ptr has not been updated yet. Discard + * our page and look up the one already installed. + */ + ret = 0; + __free_page(page); + npages = get_user_pages_remote(alloc->mm, addr, 1, + FOLL_NOFAULT, &page, NULL); + if (npages <= 0) { + pr_err("%d: failed to find page at offset %lx\n", + alloc->pid, addr - alloc->buffer); + ret = -ESRCH; + break; + } + fallthrough; + case 0: + /* Mark page installation complete and safe to use */ + binder_set_installed_page(lru_page, page); + break; + default: + __free_page(page); pr_err("%d: %s failed to insert page at offset %lx with %d\n", alloc->pid, __func__, addr - alloc->buffer, ret); - __free_page(page); ret = -ENOMEM; - goto out; + break; } - - /* Mark page installation complete and safe to use */ - binder_set_installed_page(lru_page, page); +unlock: + mmap_read_unlock(alloc->mm); out: - mmap_write_unlock(alloc->mm); mmput_async(alloc->mm); return ret; } @@ -1090,7 +1107,6 @@ enum lru_status binder_alloc_free_page(struct list_head *item, trace_binder_unmap_kernel_end(alloc, index); list_lru_isolate(lru, item); - mutex_unlock(&alloc->mutex); spin_unlock(&lru->lock); if (vma) { @@ -1101,6 +1117,7 @@ enum lru_status binder_alloc_free_page(struct list_head *item, trace_binder_unmap_user_end(alloc, index); } + mutex_unlock(&alloc->mutex); mmap_read_unlock(mm); mmput_async(mm); __free_page(page_to_free); -- cgit v1.2.3 From 49d2562c804fc4f43342b3254fe6fb87365c9046 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Tue, 10 Dec 2024 14:30:59 +0000 Subject: binder: select correct nid for pages in LRU The numa node id for binder pages is currently being derived from the lru entry under struct binder_lru_page. However, this object doesn't reflect the node id of the struct page items allocated separately. Instead, select the correct node id from the page itself. This was made possible since commit 0a97c01cd20b ("list_lru: allow explicit memcg and NUMA node selection"). Cc: Nhat Pham Cc: Johannes Weiner Cc: Suren Baghdasaryan Reviewed-by: Suren Baghdasaryan Signed-off-by: Carlos Llamas Link: https://lore.kernel.org/r/20241210143114.661252-4-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'drivers/android/binder_alloc.c') diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index f26283c2c768..1f02bec78451 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -210,7 +210,10 @@ static void binder_lru_freelist_add(struct binder_alloc *alloc, trace_binder_free_lru_start(alloc, index); - ret = list_lru_add_obj(&binder_freelist, &page->lru); + ret = list_lru_add(&binder_freelist, + &page->lru, + page_to_nid(page->page_ptr), + NULL); WARN_ON(!ret); trace_binder_free_lru_end(alloc, index); @@ -334,7 +337,10 @@ static void binder_lru_freelist_del(struct binder_alloc *alloc, if (page->page_ptr) { trace_binder_alloc_lru_start(alloc, index); - on_lru = list_lru_del_obj(&binder_freelist, &page->lru); + on_lru = list_lru_del(&binder_freelist, + &page->lru, + page_to_nid(page->page_ptr), + NULL); WARN_ON(!on_lru); trace_binder_alloc_lru_end(alloc, index); @@ -947,8 +953,10 @@ void binder_alloc_deferred_release(struct binder_alloc *alloc) if (!alloc->pages[i].page_ptr) continue; - on_lru = list_lru_del_obj(&binder_freelist, - &alloc->pages[i].lru); + on_lru = list_lru_del(&binder_freelist, + &alloc->pages[i].lru, + page_to_nid(alloc->pages[i].page_ptr), + NULL); binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, "%s: %d: page %d %s\n", __func__, alloc->pid, i, -- cgit v1.2.3 From f909f0308267dc49fbf122f60e1ec7ddcd1b92c7 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Tue, 10 Dec 2024 14:31:00 +0000 Subject: binder: store shrinker metadata under page->private Instead of pre-allocating an entire array of struct binder_lru_page in alloc->pages, install the shrinker metadata under page->private. This ensures the memory is allocated and released as needed alongside pages. By converting the alloc->pages[] into an array of struct page pointers, we can access these pages directly and only reference the shrinker metadata where it's being used (e.g. inside the shrinker's callback). Rename struct binder_lru_page to struct binder_shrinker_mdata to better reflect its purpose. Add convenience functions that wrap the allocation and freeing of pages along with their shrinker metadata. Note I've reworked this patch to avoid using page->lru and page->index directly, as Matthew pointed out that these are being removed [1]. Link: https://lore.kernel.org/all/ZzziucEm3np6e7a0@casper.infradead.org/ [1] Cc: Matthew Wilcox Cc: Liam R. Howlett Reviewed-by: Suren Baghdasaryan Signed-off-by: Carlos Llamas Link: https://lore.kernel.org/r/20241210143114.661252-5-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 130 ++++++++++++++++++-------------- drivers/android/binder_alloc.h | 25 ++++-- drivers/android/binder_alloc_selftest.c | 14 ++-- 3 files changed, 99 insertions(+), 70 deletions(-) (limited to 'drivers/android/binder_alloc.c') diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 1f02bec78451..3e30ac5b4861 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -176,25 +176,26 @@ struct binder_buffer *binder_alloc_prepare_to_free(struct binder_alloc *alloc, } static inline void -binder_set_installed_page(struct binder_lru_page *lru_page, +binder_set_installed_page(struct binder_alloc *alloc, + unsigned long index, struct page *page) { /* Pairs with acquire in binder_get_installed_page() */ - smp_store_release(&lru_page->page_ptr, page); + smp_store_release(&alloc->pages[index], page); } static inline struct page * -binder_get_installed_page(struct binder_lru_page *lru_page) +binder_get_installed_page(struct binder_alloc *alloc, unsigned long index) { /* Pairs with release in binder_set_installed_page() */ - return smp_load_acquire(&lru_page->page_ptr); + return smp_load_acquire(&alloc->pages[index]); } static void binder_lru_freelist_add(struct binder_alloc *alloc, unsigned long start, unsigned long end) { - struct binder_lru_page *page; unsigned long page_addr; + struct page *page; trace_binder_update_page_range(alloc, false, start, end); @@ -203,16 +204,15 @@ static void binder_lru_freelist_add(struct binder_alloc *alloc, int ret; index = (page_addr - alloc->buffer) / PAGE_SIZE; - page = &alloc->pages[index]; - - if (!binder_get_installed_page(page)) + page = binder_get_installed_page(alloc, index); + if (!page) continue; trace_binder_free_lru_start(alloc, index); ret = list_lru_add(&binder_freelist, - &page->lru, - page_to_nid(page->page_ptr), + page_to_lru(page), + page_to_nid(page), NULL); WARN_ON(!ret); @@ -220,8 +220,39 @@ static void binder_lru_freelist_add(struct binder_alloc *alloc, } } +static struct page *binder_page_alloc(struct binder_alloc *alloc, + unsigned long index) +{ + struct binder_shrinker_mdata *mdata; + struct page *page; + + page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); + if (!page) + return NULL; + + /* allocate and install shrinker metadata under page->private */ + mdata = kzalloc(sizeof(*mdata), GFP_KERNEL); + if (!mdata) { + __free_page(page); + return NULL; + } + + mdata->alloc = alloc; + mdata->page_index = index; + INIT_LIST_HEAD(&mdata->lru); + set_page_private(page, (unsigned long)mdata); + + return page; +} + +static void binder_free_page(struct page *page) +{ + kfree((struct binder_shrinker_mdata *)page_private(page)); + __free_page(page); +} + static int binder_install_single_page(struct binder_alloc *alloc, - struct binder_lru_page *lru_page, + unsigned long index, unsigned long addr) { struct vm_area_struct *vma; @@ -232,9 +263,8 @@ static int binder_install_single_page(struct binder_alloc *alloc, if (!mmget_not_zero(alloc->mm)) return -ESRCH; - page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); + page = binder_page_alloc(alloc, index); if (!page) { - pr_err("%d: failed to allocate page\n", alloc->pid); ret = -ENOMEM; goto out; } @@ -242,7 +272,7 @@ static int binder_install_single_page(struct binder_alloc *alloc, mmap_read_lock(alloc->mm); vma = vma_lookup(alloc->mm, addr); if (!vma || vma != alloc->vma) { - __free_page(page); + binder_free_page(page); pr_err("%d: %s failed, no vma\n", alloc->pid, __func__); ret = -ESRCH; goto unlock; @@ -253,11 +283,11 @@ static int binder_install_single_page(struct binder_alloc *alloc, case -EBUSY: /* * EBUSY is ok. Someone installed the pte first but the - * lru_page->page_ptr has not been updated yet. Discard + * alloc->pages[index] has not been updated yet. Discard * our page and look up the one already installed. */ ret = 0; - __free_page(page); + binder_free_page(page); npages = get_user_pages_remote(alloc->mm, addr, 1, FOLL_NOFAULT, &page, NULL); if (npages <= 0) { @@ -269,10 +299,10 @@ static int binder_install_single_page(struct binder_alloc *alloc, fallthrough; case 0: /* Mark page installation complete and safe to use */ - binder_set_installed_page(lru_page, page); + binder_set_installed_page(alloc, index, page); break; default: - __free_page(page); + binder_free_page(page); pr_err("%d: %s failed to insert page at offset %lx with %d\n", alloc->pid, __func__, addr - alloc->buffer, ret); ret = -ENOMEM; @@ -289,7 +319,6 @@ static int binder_install_buffer_pages(struct binder_alloc *alloc, struct binder_buffer *buffer, size_t size) { - struct binder_lru_page *page; unsigned long start, final; unsigned long page_addr; @@ -301,14 +330,12 @@ static int binder_install_buffer_pages(struct binder_alloc *alloc, int ret; index = (page_addr - alloc->buffer) / PAGE_SIZE; - page = &alloc->pages[index]; - - if (binder_get_installed_page(page)) + if (binder_get_installed_page(alloc, index)) continue; trace_binder_alloc_page_start(alloc, index); - ret = binder_install_single_page(alloc, page, page_addr); + ret = binder_install_single_page(alloc, index, page_addr); if (ret) return ret; @@ -322,8 +349,8 @@ static int binder_install_buffer_pages(struct binder_alloc *alloc, static void binder_lru_freelist_del(struct binder_alloc *alloc, unsigned long start, unsigned long end) { - struct binder_lru_page *page; unsigned long page_addr; + struct page *page; trace_binder_update_page_range(alloc, true, start, end); @@ -332,14 +359,14 @@ static void binder_lru_freelist_del(struct binder_alloc *alloc, bool on_lru; index = (page_addr - alloc->buffer) / PAGE_SIZE; - page = &alloc->pages[index]; + page = binder_get_installed_page(alloc, index); - if (page->page_ptr) { + if (page) { trace_binder_alloc_lru_start(alloc, index); on_lru = list_lru_del(&binder_freelist, - &page->lru, - page_to_nid(page->page_ptr), + page_to_lru(page), + page_to_nid(page), NULL); WARN_ON(!on_lru); @@ -760,11 +787,10 @@ static struct page *binder_alloc_get_page(struct binder_alloc *alloc, (buffer->user_data - alloc->buffer); pgoff_t pgoff = buffer_space_offset & ~PAGE_MASK; size_t index = buffer_space_offset >> PAGE_SHIFT; - struct binder_lru_page *lru_page; - lru_page = &alloc->pages[index]; *pgoffp = pgoff; - return lru_page->page_ptr; + + return alloc->pages[index]; } /** @@ -839,7 +865,7 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc, { struct binder_buffer *buffer; const char *failure_string; - int ret, i; + int ret; if (unlikely(vma->vm_mm != alloc->mm)) { ret = -EINVAL; @@ -862,17 +888,12 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc, alloc->pages = kvcalloc(alloc->buffer_size / PAGE_SIZE, sizeof(alloc->pages[0]), GFP_KERNEL); - if (alloc->pages == NULL) { + if (!alloc->pages) { ret = -ENOMEM; failure_string = "alloc page array"; goto err_alloc_pages_failed; } - for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) { - alloc->pages[i].alloc = alloc; - INIT_LIST_HEAD(&alloc->pages[i].lru); - } - buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); if (!buffer) { ret = -ENOMEM; @@ -948,20 +969,22 @@ void binder_alloc_deferred_release(struct binder_alloc *alloc) int i; for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) { + struct page *page; bool on_lru; - if (!alloc->pages[i].page_ptr) + page = binder_get_installed_page(alloc, i); + if (!page) continue; on_lru = list_lru_del(&binder_freelist, - &alloc->pages[i].lru, - page_to_nid(alloc->pages[i].page_ptr), + page_to_lru(page), + page_to_nid(page), NULL); binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, "%s: %d: page %d %s\n", __func__, alloc->pid, i, on_lru ? "on lru" : "active"); - __free_page(alloc->pages[i].page_ptr); + binder_free_page(page); page_count++; } } @@ -1010,7 +1033,7 @@ void binder_alloc_print_allocated(struct seq_file *m, void binder_alloc_print_pages(struct seq_file *m, struct binder_alloc *alloc) { - struct binder_lru_page *page; + struct page *page; int i; int active = 0; int lru = 0; @@ -1023,10 +1046,10 @@ void binder_alloc_print_pages(struct seq_file *m, */ if (binder_alloc_get_vma(alloc) != NULL) { for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) { - page = &alloc->pages[i]; - if (!page->page_ptr) + page = binder_get_installed_page(alloc, i); + if (!page) free++; - else if (list_empty(&page->lru)) + else if (list_empty(page_to_lru(page))) active++; else lru++; @@ -1083,8 +1106,8 @@ enum lru_status binder_alloc_free_page(struct list_head *item, void *cb_arg) __must_hold(&lru->lock) { - struct binder_lru_page *page = container_of(item, typeof(*page), lru); - struct binder_alloc *alloc = page->alloc; + struct binder_shrinker_mdata *mdata = container_of(item, typeof(*mdata), lru); + struct binder_alloc *alloc = mdata->alloc; struct mm_struct *mm = alloc->mm; struct vm_area_struct *vma; struct page *page_to_free; @@ -1097,10 +1120,8 @@ enum lru_status binder_alloc_free_page(struct list_head *item, goto err_mmap_read_lock_failed; if (!mutex_trylock(&alloc->mutex)) goto err_get_alloc_mutex_failed; - if (!page->page_ptr) - goto err_page_already_freed; - index = page - alloc->pages; + index = mdata->page_index; page_addr = alloc->buffer + index * PAGE_SIZE; vma = vma_lookup(mm, page_addr); @@ -1109,8 +1130,8 @@ enum lru_status binder_alloc_free_page(struct list_head *item, trace_binder_unmap_kernel_start(alloc, index); - page_to_free = page->page_ptr; - page->page_ptr = NULL; + page_to_free = alloc->pages[index]; + binder_set_installed_page(alloc, index, NULL); trace_binder_unmap_kernel_end(alloc, index); @@ -1128,12 +1149,11 @@ enum lru_status binder_alloc_free_page(struct list_head *item, mutex_unlock(&alloc->mutex); mmap_read_unlock(mm); mmput_async(mm); - __free_page(page_to_free); + binder_free_page(page_to_free); return LRU_REMOVED_RETRY; err_invalid_vma: -err_page_already_freed: mutex_unlock(&alloc->mutex); err_get_alloc_mutex_failed: mmap_read_unlock(mm); diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h index 33c5f971c0a5..d71f99189ef5 100644 --- a/drivers/android/binder_alloc.h +++ b/drivers/android/binder_alloc.h @@ -59,17 +59,26 @@ struct binder_buffer { }; /** - * struct binder_lru_page - page object used for binder shrinker - * @page_ptr: pointer to physical page in mmap'd space - * @lru: entry in binder_freelist - * @alloc: binder_alloc for a proc + * struct binder_shrinker_mdata - binder metadata used to reclaim pages + * @lru: LRU entry in binder_freelist + * @alloc: binder_alloc owning the page to reclaim + * @page_index: offset in @alloc->pages[] into the page to reclaim */ -struct binder_lru_page { +struct binder_shrinker_mdata { struct list_head lru; - struct page *page_ptr; struct binder_alloc *alloc; + unsigned long page_index; }; +static inline struct list_head *page_to_lru(struct page *p) +{ + struct binder_shrinker_mdata *mdata; + + mdata = (struct binder_shrinker_mdata *)page_private(p); + + return &mdata->lru; +} + /** * struct binder_alloc - per-binder proc state for binder allocator * @mutex: protects binder_alloc fields @@ -83,7 +92,7 @@ struct binder_lru_page { * @allocated_buffers: rb tree of allocated buffers sorted by address * @free_async_space: VA space available for async buffers. This is * initialized at mmap time to 1/2 the full VA space - * @pages: array of binder_lru_page + * @pages: array of struct page * * @buffer_size: size of address space specified via mmap * @pid: pid for associated binder_proc (invariant after init) * @pages_high: high watermark of offset in @pages @@ -104,7 +113,7 @@ struct binder_alloc { struct rb_root free_buffers; struct rb_root allocated_buffers; size_t free_async_space; - struct binder_lru_page *pages; + struct page **pages; size_t buffer_size; int pid; size_t pages_high; diff --git a/drivers/android/binder_alloc_selftest.c b/drivers/android/binder_alloc_selftest.c index 81442fe20a69..a4c650843bee 100644 --- a/drivers/android/binder_alloc_selftest.c +++ b/drivers/android/binder_alloc_selftest.c @@ -105,10 +105,10 @@ static bool check_buffer_pages_allocated(struct binder_alloc *alloc, page_addr = buffer->user_data; for (; page_addr < end; page_addr += PAGE_SIZE) { page_index = (page_addr - alloc->buffer) / PAGE_SIZE; - if (!alloc->pages[page_index].page_ptr || - !list_empty(&alloc->pages[page_index].lru)) { + if (!alloc->pages[page_index] || + !list_empty(page_to_lru(alloc->pages[page_index]))) { pr_err("expect alloc but is %s at page index %d\n", - alloc->pages[page_index].page_ptr ? + alloc->pages[page_index] ? "lru" : "free", page_index); return false; } @@ -148,10 +148,10 @@ static void binder_selftest_free_buf(struct binder_alloc *alloc, * if binder shrinker ran during binder_alloc_free_buf * calls above. */ - if (list_empty(&alloc->pages[i].lru)) { + if (list_empty(page_to_lru(alloc->pages[i]))) { pr_err_size_seq(sizes, seq); pr_err("expect lru but is %s at page index %d\n", - alloc->pages[i].page_ptr ? "alloc" : "free", i); + alloc->pages[i] ? "alloc" : "free", i); binder_selftest_failures++; } } @@ -168,9 +168,9 @@ static void binder_selftest_free_page(struct binder_alloc *alloc) } for (i = 0; i < (alloc->buffer_size / PAGE_SIZE); i++) { - if (alloc->pages[i].page_ptr) { + if (alloc->pages[i]) { pr_err("expect free but is %s at page index %d\n", - list_empty(&alloc->pages[i].lru) ? + list_empty(page_to_lru(alloc->pages[i])) ? "alloc" : "lru", i); binder_selftest_failures++; } -- cgit v1.2.3 From 072010abc3ad98bc20198dbe60ef13233a0a357c Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Tue, 10 Dec 2024 14:31:01 +0000 Subject: binder: replace alloc->vma with alloc->mapped It is unsafe to use alloc->vma outside of the mmap_sem. Instead, add a new boolean alloc->mapped to save the vma state (mapped or unmmaped) and use this as a replacement for alloc->vma to validate several paths. Using the alloc->vma caused several performance and security issues in the past. Now that it has been replaced with either vm_lookup() or the alloc->mapped state, we can finally remove it. Cc: Minchan Kim Cc: Liam R. Howlett Cc: Matthew Wilcox Cc: Suren Baghdasaryan Reviewed-by: Suren Baghdasaryan Signed-off-by: Carlos Llamas Link: https://lore.kernel.org/r/20241210143114.661252-6-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 48 ++++++++++++++++++--------------- drivers/android/binder_alloc.h | 6 ++--- drivers/android/binder_alloc_selftest.c | 2 +- 3 files changed, 30 insertions(+), 26 deletions(-) (limited to 'drivers/android/binder_alloc.c') diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 3e30ac5b4861..ed79d7c146c8 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -220,6 +220,19 @@ static void binder_lru_freelist_add(struct binder_alloc *alloc, } } +static inline +void binder_alloc_set_mapped(struct binder_alloc *alloc, bool state) +{ + /* pairs with smp_load_acquire in binder_alloc_is_mapped() */ + smp_store_release(&alloc->mapped, state); +} + +static inline bool binder_alloc_is_mapped(struct binder_alloc *alloc) +{ + /* pairs with smp_store_release in binder_alloc_set_mapped() */ + return smp_load_acquire(&alloc->mapped); +} + static struct page *binder_page_alloc(struct binder_alloc *alloc, unsigned long index) { @@ -271,7 +284,7 @@ static int binder_install_single_page(struct binder_alloc *alloc, mmap_read_lock(alloc->mm); vma = vma_lookup(alloc->mm, addr); - if (!vma || vma != alloc->vma) { + if (!vma || !binder_alloc_is_mapped(alloc)) { binder_free_page(page); pr_err("%d: %s failed, no vma\n", alloc->pid, __func__); ret = -ESRCH; @@ -379,20 +392,6 @@ static void binder_lru_freelist_del(struct binder_alloc *alloc, } } -static inline void binder_alloc_set_vma(struct binder_alloc *alloc, - struct vm_area_struct *vma) -{ - /* pairs with smp_load_acquire in binder_alloc_get_vma() */ - smp_store_release(&alloc->vma, vma); -} - -static inline struct vm_area_struct *binder_alloc_get_vma( - struct binder_alloc *alloc) -{ - /* pairs with smp_store_release in binder_alloc_set_vma() */ - return smp_load_acquire(&alloc->vma); -} - static void debug_no_space_locked(struct binder_alloc *alloc) { size_t largest_alloc_size = 0; @@ -626,7 +625,7 @@ struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc, int ret; /* Check binder_alloc is fully initialized */ - if (!binder_alloc_get_vma(alloc)) { + if (!binder_alloc_is_mapped(alloc)) { binder_alloc_debug(BINDER_DEBUG_USER_ERROR, "%d: binder_alloc_buf, no vma\n", alloc->pid); @@ -908,7 +907,7 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc, alloc->free_async_space = alloc->buffer_size / 2; /* Signal binder_alloc is fully initialized */ - binder_alloc_set_vma(alloc, vma); + binder_alloc_set_mapped(alloc, true); return 0; @@ -938,7 +937,7 @@ void binder_alloc_deferred_release(struct binder_alloc *alloc) buffers = 0; mutex_lock(&alloc->mutex); - BUG_ON(alloc->vma); + BUG_ON(alloc->mapped); while ((n = rb_first(&alloc->allocated_buffers))) { buffer = rb_entry(n, struct binder_buffer, rb_node); @@ -1044,7 +1043,7 @@ void binder_alloc_print_pages(struct seq_file *m, * Make sure the binder_alloc is fully initialized, otherwise we might * read inconsistent state. */ - if (binder_alloc_get_vma(alloc) != NULL) { + if (binder_alloc_is_mapped(alloc)) { for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) { page = binder_get_installed_page(alloc, i); if (!page) @@ -1084,12 +1083,12 @@ int binder_alloc_get_allocated_count(struct binder_alloc *alloc) * @alloc: binder_alloc for this proc * * Called from binder_vma_close() when releasing address space. - * Clears alloc->vma to prevent new incoming transactions from + * Clears alloc->mapped to prevent new incoming transactions from * allocating more buffers. */ void binder_alloc_vma_close(struct binder_alloc *alloc) { - binder_alloc_set_vma(alloc, NULL); + binder_alloc_set_mapped(alloc, false); } /** @@ -1125,7 +1124,12 @@ enum lru_status binder_alloc_free_page(struct list_head *item, page_addr = alloc->buffer + index * PAGE_SIZE; vma = vma_lookup(mm, page_addr); - if (vma && vma != binder_alloc_get_vma(alloc)) + /* + * Since a binder_alloc can only be mapped once, we ensure + * the vma corresponds to this mapping by checking whether + * the binder_alloc is still mapped. + */ + if (vma && !binder_alloc_is_mapped(alloc)) goto err_invalid_vma; trace_binder_unmap_kernel_start(alloc, index); diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h index d71f99189ef5..3ebb12afd4de 100644 --- a/drivers/android/binder_alloc.h +++ b/drivers/android/binder_alloc.h @@ -82,8 +82,6 @@ static inline struct list_head *page_to_lru(struct page *p) /** * struct binder_alloc - per-binder proc state for binder allocator * @mutex: protects binder_alloc fields - * @vma: vm_area_struct passed to mmap_handler - * (invariant after mmap) * @mm: copy of task->mm (invariant after open) * @buffer: base of per-proc address space mapped via mmap * @buffers: list of all buffers for this proc @@ -96,6 +94,8 @@ static inline struct list_head *page_to_lru(struct page *p) * @buffer_size: size of address space specified via mmap * @pid: pid for associated binder_proc (invariant after init) * @pages_high: high watermark of offset in @pages + * @mapped: whether the vm area is mapped, each binder instance is + * allowed a single mapping throughout its lifetime * @oneway_spam_detected: %true if oneway spam detection fired, clear that * flag once the async buffer has returned to a healthy state * @@ -106,7 +106,6 @@ static inline struct list_head *page_to_lru(struct page *p) */ struct binder_alloc { struct mutex mutex; - struct vm_area_struct *vma; struct mm_struct *mm; unsigned long buffer; struct list_head buffers; @@ -117,6 +116,7 @@ struct binder_alloc { size_t buffer_size; int pid; size_t pages_high; + bool mapped; bool oneway_spam_detected; }; diff --git a/drivers/android/binder_alloc_selftest.c b/drivers/android/binder_alloc_selftest.c index a4c650843bee..6a64847a8555 100644 --- a/drivers/android/binder_alloc_selftest.c +++ b/drivers/android/binder_alloc_selftest.c @@ -291,7 +291,7 @@ void binder_selftest_alloc(struct binder_alloc *alloc) if (!binder_selftest_run) return; mutex_lock(&binder_selftest_lock); - if (!binder_selftest_run || !alloc->vma) + if (!binder_selftest_run || !alloc->mapped) goto done; pr_info("STARTED\n"); binder_selftest_alloc_offset(alloc, end_offset, 0); -- cgit v1.2.3 From 0a7bf6866d416e4f8f452419410359b6a82639d1 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Tue, 10 Dec 2024 14:31:02 +0000 Subject: binder: rename alloc->buffer to vm_start The alloc->buffer field in struct binder_alloc stores the starting address of the mapped vma, rename this field to alloc->vm_start to better reflect its purpose. It also avoids confusion with the binder buffer concept, e.g. transaction->buffer. No functional changes in this patch. Reviewed-by: Suren Baghdasaryan Signed-off-by: Carlos Llamas Link: https://lore.kernel.org/r/20241210143114.661252-7-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 2 +- drivers/android/binder_alloc.c | 28 ++++++++++++++-------------- drivers/android/binder_alloc.h | 4 ++-- drivers/android/binder_alloc_selftest.c | 2 +- drivers/android/binder_trace.h | 2 +- 5 files changed, 19 insertions(+), 19 deletions(-) (limited to 'drivers/android/binder_alloc.c') diff --git a/drivers/android/binder.c b/drivers/android/binder.c index ef353ca13c35..9962c606cabd 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -6374,7 +6374,7 @@ static void print_binder_transaction_ilocked(struct seq_file *m, seq_printf(m, " node %d", buffer->target_node->debug_id); seq_printf(m, " size %zd:%zd offset %lx\n", buffer->data_size, buffer->offsets_size, - proc->alloc.buffer - buffer->user_data); + proc->alloc.vm_start - buffer->user_data); } static void print_binder_work_ilocked(struct seq_file *m, diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index ed79d7c146c8..9cb47e1bc6be 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -61,7 +61,7 @@ static size_t binder_alloc_buffer_size(struct binder_alloc *alloc, struct binder_buffer *buffer) { if (list_is_last(&buffer->entry, &alloc->buffers)) - return alloc->buffer + alloc->buffer_size - buffer->user_data; + return alloc->vm_start + alloc->buffer_size - buffer->user_data; return binder_buffer_next(buffer)->user_data - buffer->user_data; } @@ -203,7 +203,7 @@ static void binder_lru_freelist_add(struct binder_alloc *alloc, size_t index; int ret; - index = (page_addr - alloc->buffer) / PAGE_SIZE; + index = (page_addr - alloc->vm_start) / PAGE_SIZE; page = binder_get_installed_page(alloc, index); if (!page) continue; @@ -305,7 +305,7 @@ static int binder_install_single_page(struct binder_alloc *alloc, FOLL_NOFAULT, &page, NULL); if (npages <= 0) { pr_err("%d: failed to find page at offset %lx\n", - alloc->pid, addr - alloc->buffer); + alloc->pid, addr - alloc->vm_start); ret = -ESRCH; break; } @@ -317,7 +317,7 @@ static int binder_install_single_page(struct binder_alloc *alloc, default: binder_free_page(page); pr_err("%d: %s failed to insert page at offset %lx with %d\n", - alloc->pid, __func__, addr - alloc->buffer, ret); + alloc->pid, __func__, addr - alloc->vm_start, ret); ret = -ENOMEM; break; } @@ -342,7 +342,7 @@ static int binder_install_buffer_pages(struct binder_alloc *alloc, unsigned long index; int ret; - index = (page_addr - alloc->buffer) / PAGE_SIZE; + index = (page_addr - alloc->vm_start) / PAGE_SIZE; if (binder_get_installed_page(alloc, index)) continue; @@ -371,7 +371,7 @@ static void binder_lru_freelist_del(struct binder_alloc *alloc, unsigned long index; bool on_lru; - index = (page_addr - alloc->buffer) / PAGE_SIZE; + index = (page_addr - alloc->vm_start) / PAGE_SIZE; page = binder_get_installed_page(alloc, index); if (page) { @@ -723,8 +723,8 @@ static void binder_free_buf_locked(struct binder_alloc *alloc, BUG_ON(buffer->free); BUG_ON(size > buffer_size); BUG_ON(buffer->transaction != NULL); - BUG_ON(buffer->user_data < alloc->buffer); - BUG_ON(buffer->user_data > alloc->buffer + alloc->buffer_size); + BUG_ON(buffer->user_data < alloc->vm_start); + BUG_ON(buffer->user_data > alloc->vm_start + alloc->buffer_size); if (buffer->async_transaction) { alloc->free_async_space += buffer_size; @@ -783,7 +783,7 @@ static struct page *binder_alloc_get_page(struct binder_alloc *alloc, pgoff_t *pgoffp) { binder_size_t buffer_space_offset = buffer_offset + - (buffer->user_data - alloc->buffer); + (buffer->user_data - alloc->vm_start); pgoff_t pgoff = buffer_space_offset & ~PAGE_MASK; size_t index = buffer_space_offset >> PAGE_SHIFT; @@ -882,7 +882,7 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc, SZ_4M); mutex_unlock(&binder_alloc_mmap_lock); - alloc->buffer = vma->vm_start; + alloc->vm_start = vma->vm_start; alloc->pages = kvcalloc(alloc->buffer_size / PAGE_SIZE, sizeof(alloc->pages[0]), @@ -900,7 +900,7 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc, goto err_alloc_buf_struct_failed; } - buffer->user_data = alloc->buffer; + buffer->user_data = alloc->vm_start; list_add(&buffer->entry, &alloc->buffers); buffer->free = 1; binder_insert_free_buffer(alloc, buffer); @@ -915,7 +915,7 @@ err_alloc_buf_struct_failed: kvfree(alloc->pages); alloc->pages = NULL; err_alloc_pages_failed: - alloc->buffer = 0; + alloc->vm_start = 0; mutex_lock(&binder_alloc_mmap_lock); alloc->buffer_size = 0; err_already_mapped: @@ -1016,7 +1016,7 @@ void binder_alloc_print_allocated(struct seq_file *m, buffer = rb_entry(n, struct binder_buffer, rb_node); seq_printf(m, " buffer %d: %lx size %zd:%zd:%zd %s\n", buffer->debug_id, - buffer->user_data - alloc->buffer, + buffer->user_data - alloc->vm_start, buffer->data_size, buffer->offsets_size, buffer->extra_buffers_size, buffer->transaction ? "active" : "delivered"); @@ -1121,7 +1121,7 @@ enum lru_status binder_alloc_free_page(struct list_head *item, goto err_get_alloc_mutex_failed; index = mdata->page_index; - page_addr = alloc->buffer + index * PAGE_SIZE; + page_addr = alloc->vm_start + index * PAGE_SIZE; vma = vma_lookup(mm, page_addr); /* diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h index 3ebb12afd4de..feecd7414241 100644 --- a/drivers/android/binder_alloc.h +++ b/drivers/android/binder_alloc.h @@ -83,7 +83,7 @@ static inline struct list_head *page_to_lru(struct page *p) * struct binder_alloc - per-binder proc state for binder allocator * @mutex: protects binder_alloc fields * @mm: copy of task->mm (invariant after open) - * @buffer: base of per-proc address space mapped via mmap + * @vm_start: base of per-proc address space mapped via mmap * @buffers: list of all buffers for this proc * @free_buffers: rb tree of buffers available for allocation * sorted by size @@ -107,7 +107,7 @@ static inline struct list_head *page_to_lru(struct page *p) struct binder_alloc { struct mutex mutex; struct mm_struct *mm; - unsigned long buffer; + unsigned long vm_start; struct list_head buffers; struct rb_root free_buffers; struct rb_root allocated_buffers; diff --git a/drivers/android/binder_alloc_selftest.c b/drivers/android/binder_alloc_selftest.c index 6a64847a8555..c88735c54848 100644 --- a/drivers/android/binder_alloc_selftest.c +++ b/drivers/android/binder_alloc_selftest.c @@ -104,7 +104,7 @@ static bool check_buffer_pages_allocated(struct binder_alloc *alloc, end = PAGE_ALIGN(buffer->user_data + size); page_addr = buffer->user_data; for (; page_addr < end; page_addr += PAGE_SIZE) { - page_index = (page_addr - alloc->buffer) / PAGE_SIZE; + page_index = (page_addr - alloc->vm_start) / PAGE_SIZE; if (!alloc->pages[page_index] || !list_empty(page_to_lru(alloc->pages[page_index]))) { pr_err("expect alloc but is %s at page index %d\n", diff --git a/drivers/android/binder_trace.h b/drivers/android/binder_trace.h index fe38c6fc65d0..16de1b9e72f7 100644 --- a/drivers/android/binder_trace.h +++ b/drivers/android/binder_trace.h @@ -328,7 +328,7 @@ TRACE_EVENT(binder_update_page_range, TP_fast_assign( __entry->proc = alloc->pid; __entry->allocate = allocate; - __entry->offset = start - alloc->buffer; + __entry->offset = start - alloc->vm_start; __entry->size = end - start; ), TP_printk("proc=%d allocate=%d offset=%zu size=%zu", -- cgit v1.2.3 From 9e2aa76549b9fd2b8f7b81260417a4ec853910e6 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Tue, 10 Dec 2024 14:31:03 +0000 Subject: binder: use per-vma lock in page installation Use per-vma locking for concurrent page installations, this minimizes contention with unrelated vmas improving performance. The mmap_lock is still acquired when needed though, e.g. before get_user_pages_remote(). Many thanks to Barry Song who posted a similar approach [1]. Link: https://lore.kernel.org/all/20240902225009.34576-1-21cnbao@gmail.com/ [1] Cc: Nhat Pham Cc: Johannes Weiner Cc: Barry Song Cc: Suren Baghdasaryan Cc: Hillf Danton Cc: Lorenzo Stoakes Reviewed-by: Suren Baghdasaryan Signed-off-by: Carlos Llamas Link: https://lore.kernel.org/r/20241210143114.661252-8-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 67 +++++++++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 17 deletions(-) (limited to 'drivers/android/binder_alloc.c') diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 9cb47e1bc6be..f86bd6ded4f4 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -233,6 +233,53 @@ static inline bool binder_alloc_is_mapped(struct binder_alloc *alloc) return smp_load_acquire(&alloc->mapped); } +static struct page *binder_page_lookup(struct binder_alloc *alloc, + unsigned long addr) +{ + struct mm_struct *mm = alloc->mm; + struct page *page; + long npages = 0; + + /* + * Find an existing page in the remote mm. If missing, + * don't attempt to fault-in just propagate an error. + */ + mmap_read_lock(mm); + if (binder_alloc_is_mapped(alloc)) + npages = get_user_pages_remote(mm, addr, 1, FOLL_NOFAULT, + &page, NULL); + mmap_read_unlock(mm); + + return npages > 0 ? page : NULL; +} + +static int binder_page_insert(struct binder_alloc *alloc, + unsigned long addr, + struct page *page) +{ + struct mm_struct *mm = alloc->mm; + struct vm_area_struct *vma; + int ret = -ESRCH; + + /* attempt per-vma lock first */ + vma = lock_vma_under_rcu(mm, addr); + if (vma) { + if (binder_alloc_is_mapped(alloc)) + ret = vm_insert_page(vma, addr, page); + vma_end_read(vma); + return ret; + } + + /* fall back to mmap_lock */ + mmap_read_lock(mm); + vma = vma_lookup(mm, addr); + if (vma && binder_alloc_is_mapped(alloc)) + ret = vm_insert_page(vma, addr, page); + mmap_read_unlock(mm); + + return ret; +} + static struct page *binder_page_alloc(struct binder_alloc *alloc, unsigned long index) { @@ -268,9 +315,7 @@ static int binder_install_single_page(struct binder_alloc *alloc, unsigned long index, unsigned long addr) { - struct vm_area_struct *vma; struct page *page; - long npages; int ret; if (!mmget_not_zero(alloc->mm)) @@ -282,16 +327,7 @@ static int binder_install_single_page(struct binder_alloc *alloc, goto out; } - mmap_read_lock(alloc->mm); - vma = vma_lookup(alloc->mm, addr); - if (!vma || !binder_alloc_is_mapped(alloc)) { - binder_free_page(page); - pr_err("%d: %s failed, no vma\n", alloc->pid, __func__); - ret = -ESRCH; - goto unlock; - } - - ret = vm_insert_page(vma, addr, page); + ret = binder_page_insert(alloc, addr, page); switch (ret) { case -EBUSY: /* @@ -301,9 +337,8 @@ static int binder_install_single_page(struct binder_alloc *alloc, */ ret = 0; binder_free_page(page); - npages = get_user_pages_remote(alloc->mm, addr, 1, - FOLL_NOFAULT, &page, NULL); - if (npages <= 0) { + page = binder_page_lookup(alloc, addr); + if (!page) { pr_err("%d: failed to find page at offset %lx\n", alloc->pid, addr - alloc->vm_start); ret = -ESRCH; @@ -321,8 +356,6 @@ static int binder_install_single_page(struct binder_alloc *alloc, ret = -ENOMEM; break; } -unlock: - mmap_read_unlock(alloc->mm); out: mmput_async(alloc->mm); return ret; -- cgit v1.2.3 From 978ce3ed703db86344e1df718ea0f56ec7d4dae1 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Tue, 10 Dec 2024 14:31:04 +0000 Subject: binder: propagate vm_insert_page() errors Instead of always overriding errors with -ENOMEM, propagate the specific error code returned by vm_insert_page(). This allows for more accurate error logs and handling. Cc: Suren Baghdasaryan Reviewed-by: Suren Baghdasaryan Signed-off-by: Carlos Llamas Link: https://lore.kernel.org/r/20241210143114.661252-9-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/android/binder_alloc.c') diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index f86bd6ded4f4..b2b97ff19ba2 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -353,7 +353,6 @@ static int binder_install_single_page(struct binder_alloc *alloc, binder_free_page(page); pr_err("%d: %s failed to insert page at offset %lx with %d\n", alloc->pid, __func__, addr - alloc->vm_start, ret); - ret = -ENOMEM; break; } out: -- cgit v1.2.3 From 95bc2d4a9020efcd7858c91e68e9f4e842e3e8c8 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Tue, 10 Dec 2024 14:31:05 +0000 Subject: binder: use per-vma lock in page reclaiming Use per-vma locking in the shrinker's callback when reclaiming pages, similar to the page installation logic. This minimizes contention with unrelated vmas improving performance. The mmap_sem is still acquired if the per-vma lock cannot be obtained. Cc: Suren Baghdasaryan Suggested-by: Liam R. Howlett Reviewed-by: Suren Baghdasaryan Signed-off-by: Carlos Llamas Link: https://lore.kernel.org/r/20241210143114.661252-10-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) (limited to 'drivers/android/binder_alloc.c') diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index b2b97ff19ba2..fcfaf1b899c8 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -1143,19 +1143,28 @@ enum lru_status binder_alloc_free_page(struct list_head *item, struct vm_area_struct *vma; struct page *page_to_free; unsigned long page_addr; + int mm_locked = 0; size_t index; if (!mmget_not_zero(mm)) goto err_mmget; - if (!mmap_read_trylock(mm)) - goto err_mmap_read_lock_failed; - if (!mutex_trylock(&alloc->mutex)) - goto err_get_alloc_mutex_failed; index = mdata->page_index; page_addr = alloc->vm_start + index * PAGE_SIZE; - vma = vma_lookup(mm, page_addr); + /* attempt per-vma lock first */ + vma = lock_vma_under_rcu(mm, page_addr); + if (!vma) { + /* fall back to mmap_lock */ + if (!mmap_read_trylock(mm)) + goto err_mmap_read_lock_failed; + mm_locked = 1; + vma = vma_lookup(mm, page_addr); + } + + if (!mutex_trylock(&alloc->mutex)) + goto err_get_alloc_mutex_failed; + /* * Since a binder_alloc can only be mapped once, we ensure * the vma corresponds to this mapping by checking whether @@ -1183,7 +1192,10 @@ enum lru_status binder_alloc_free_page(struct list_head *item, } mutex_unlock(&alloc->mutex); - mmap_read_unlock(mm); + if (mm_locked) + mmap_read_unlock(mm); + else + vma_end_read(vma); mmput_async(mm); binder_free_page(page_to_free); @@ -1192,7 +1204,10 @@ enum lru_status binder_alloc_free_page(struct list_head *item, err_invalid_vma: mutex_unlock(&alloc->mutex); err_get_alloc_mutex_failed: - mmap_read_unlock(mm); + if (mm_locked) + mmap_read_unlock(mm); + else + vma_end_read(vma); err_mmap_read_lock_failed: mmput_async(mm); err_mmget: -- cgit v1.2.3