diff options
Diffstat (limited to 'drivers/md/bcache/super.c')
-rw-r--r-- | drivers/md/bcache/super.c | 80 |
1 files changed, 59 insertions, 21 deletions
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 1a2ce1a4b456..1492c8552255 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -168,14 +168,14 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, { const char *err; struct cache_sb_disk *s; - struct page *page; + struct folio *folio; unsigned int i; - page = read_cache_page_gfp(bdev->bd_mapping, - SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL); - if (IS_ERR(page)) + folio = mapping_read_folio_gfp(bdev->bd_mapping, + SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL); + if (IS_ERR(folio)) return "IO error"; - s = page_address(page) + offset_in_page(SB_OFFSET); + s = folio_address(folio) + offset_in_folio(folio, SB_OFFSET); sb->offset = le64_to_cpu(s->offset); sb->version = le64_to_cpu(s->version); @@ -272,7 +272,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, *res = s; return NULL; err: - put_page(page); + folio_put(folio); return err; } @@ -1366,7 +1366,7 @@ static CLOSURE_CALLBACK(cached_dev_free) mutex_unlock(&bch_register_lock); if (dc->sb_disk) - put_page(virt_to_page(dc->sb_disk)); + folio_put(virt_to_folio(dc->sb_disk)); if (dc->bdev_file) fput(dc->bdev_file); @@ -1733,7 +1733,12 @@ static CLOSURE_CALLBACK(cache_set_flush) mutex_unlock(&b->write_lock); } - if (ca->alloc_thread) + /* + * If the register_cache_set() call to bch_cache_set_alloc() failed, + * ca has not been assigned a value and return error. + * So we need check ca is not NULL during bch_cache_set_unregister(). + */ + if (ca && ca->alloc_thread) kthread_stop(ca->alloc_thread); if (c->journal.cur) { @@ -1907,7 +1912,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) INIT_LIST_HEAD(&c->btree_cache_freed); INIT_LIST_HEAD(&c->data_buckets); - iter_size = ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) * + iter_size = sizeof(struct btree_iter) + + ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) * sizeof(struct btree_iter_set); c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL); @@ -2210,7 +2216,7 @@ void bch_cache_release(struct kobject *kobj) free_fifo(&ca->free[i]); if (ca->sb_disk) - put_page(virt_to_page(ca->sb_disk)); + folio_put(virt_to_folio(ca->sb_disk)); if (ca->bdev_file) fput(ca->bdev_file); @@ -2233,15 +2239,47 @@ static int cache_alloc(struct cache *ca) bio_init(&ca->journal.bio, NULL, ca->journal.bio.bi_inline_vecs, 8, 0); /* - * when ca->sb.njournal_buckets is not zero, journal exists, - * and in bch_journal_replay(), tree node may split, - * so bucket of RESERVE_BTREE type is needed, - * the worst situation is all journal buckets are valid journal, - * and all the keys need to replay, - * so the number of RESERVE_BTREE type buckets should be as much - * as journal buckets + * When the cache disk is first registered, ca->sb.njournal_buckets + * is zero, and it is assigned in run_cache_set(). + * + * When ca->sb.njournal_buckets is not zero, journal exists, + * and in bch_journal_replay(), tree node may split. + * The worst situation is all journal buckets are valid journal, + * and all the keys need to replay, so the number of RESERVE_BTREE + * type buckets should be as much as journal buckets. + * + * If the number of RESERVE_BTREE type buckets is too few, the + * bch_allocator_thread() may hang up and unable to allocate + * bucket. The situation is roughly as follows: + * + * 1. In bch_data_insert_keys(), if the operation is not op->replace, + * it will call the bch_journal(), which increments the journal_ref + * counter. This counter is only decremented after bch_btree_insert + * completes. + * + * 2. When calling bch_btree_insert, if the btree needs to split, + * it will call btree_split() and btree_check_reserve() to check + * whether there are enough reserved buckets in the RESERVE_BTREE + * slot. If not enough, bcache_btree_root() will repeatedly retry. + * + * 3. Normally, the bch_allocator_thread is responsible for filling + * the reservation slots from the free_inc bucket list. When the + * free_inc bucket list is exhausted, the bch_allocator_thread + * will call invalidate_buckets() until free_inc is refilled. + * Then bch_allocator_thread calls bch_prio_write() once. and + * bch_prio_write() will call bch_journal_meta() and waits for + * the journal write to complete. + * + * 4. During journal_write, journal_write_unlocked() is be called. + * If journal full occurs, journal_reclaim() and btree_flush_write() + * will be called sequentially, then retry journal_write. + * + * 5. When 2 and 4 occur together, IO will hung up and cannot recover. + * + * Therefore, reserve more RESERVE_BTREE type buckets. */ - btree_buckets = ca->sb.njournal_buckets ?: 8; + btree_buckets = clamp_t(size_t, ca->sb.nbuckets >> 7, + 32, SB_JOURNAL_BUCKETS); free = roundup_pow_of_two(ca->sb.nbuckets) >> 10; if (!free) { ret = -EPERM; @@ -2555,7 +2593,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (!holder) { ret = -ENOMEM; err = "cannot allocate memory"; - goto out_put_sb_page; + goto out_put_sb_folio; } /* Now reopen in exclusive mode with proper holder */ @@ -2629,8 +2667,8 @@ async_done: out_free_holder: kfree(holder); -out_put_sb_page: - put_page(virt_to_page(sb_disk)); +out_put_sb_folio: + folio_put(virt_to_folio(sb_disk)); out_blkdev_put: if (bdev_file) fput(bdev_file); |