diff options
Diffstat (limited to 'fs/bcachefs/btree_cache.c')
-rw-r--r-- | fs/bcachefs/btree_cache.c | 388 |
1 files changed, 219 insertions, 169 deletions
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 6e4afb2b5441..91e0aa796e6b 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -17,14 +17,11 @@ #include <linux/sched/mm.h> #include <linux/swap.h> -#define BTREE_CACHE_NOT_FREED_INCREMENT(counter) \ -do { \ - if (shrinker_counter) \ - bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_##counter]++; \ -} while (0) - const char * const bch2_btree_node_flags[] = { -#define x(f) #f, + "typebit", + "typebit", + "typebit", +#define x(f) [BTREE_NODE_##f] = #f, BTREE_FLAGS() #undef x NULL @@ -59,16 +56,38 @@ static inline size_t btree_cache_can_free(struct btree_cache_list *list) static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b) { + BUG_ON(!list_empty(&b->list)); + if (b->c.lock.readers) - list_move(&b->list, &bc->freed_pcpu); + list_add(&b->list, &bc->freed_pcpu); else - list_move(&b->list, &bc->freed_nonpcpu); + list_add(&b->list, &bc->freed_nonpcpu); +} + +static void __bch2_btree_node_to_freelist(struct btree_cache *bc, struct btree *b) +{ + BUG_ON(!list_empty(&b->list)); + BUG_ON(!b->data); + + bc->nr_freeable++; + list_add(&b->list, &bc->freeable); } -static void btree_node_data_free(struct bch_fs *c, struct btree *b) +void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b) { struct btree_cache *bc = &c->btree_cache; + mutex_lock(&bc->lock); + __bch2_btree_node_to_freelist(bc, b); + mutex_unlock(&bc->lock); + + six_unlock_write(&b->c.lock); + six_unlock_intent(&b->c.lock); +} + +static void __btree_node_data_free(struct btree_cache *bc, struct btree *b) +{ + BUG_ON(!list_empty(&b->list)); BUG_ON(btree_node_hashed(b)); /* @@ -94,11 +113,17 @@ static void btree_node_data_free(struct bch_fs *c, struct btree *b) #endif b->aux_data = NULL; - bc->nr_freeable--; - btree_node_to_freedlist(bc, b); } +static void btree_node_data_free(struct btree_cache *bc, struct btree *b) +{ + BUG_ON(list_empty(&b->list)); + list_del_init(&b->list); + --bc->nr_freeable; + __btree_node_data_free(bc, b); +} + static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg, const void *obj) { @@ -124,7 +149,7 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp) b->data = kvmalloc(btree_buf_bytes(b), gfp); if (!b->data) - return -BCH_ERR_ENOMEM_btree_node_mem_alloc; + return bch_err_throw(c, ENOMEM_btree_node_mem_alloc); #ifdef __KERNEL__ b->aux_data = kvmalloc(btree_aux_data_bytes(b), gfp); #else @@ -137,7 +162,7 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp) if (!b->aux_data) { kvfree(b->data); b->data = NULL; - return -BCH_ERR_ENOMEM_btree_node_mem_alloc; + return bch_err_throw(c, ENOMEM_btree_node_mem_alloc); } return 0; @@ -172,23 +197,12 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c) return NULL; } - bch2_btree_lock_init(&b->c, 0); + bch2_btree_lock_init(&b->c, 0, GFP_KERNEL); - bc->nr_freeable++; - list_add(&b->list, &bc->freeable); + __bch2_btree_node_to_freelist(bc, b); return b; } -void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b) -{ - mutex_lock(&c->btree_cache.lock); - list_move(&b->list, &c->btree_cache.freeable); - mutex_unlock(&c->btree_cache.lock); - - six_unlock_write(&b->c.lock); - six_unlock_intent(&b->c.lock); -} - static inline bool __btree_node_pinned(struct btree_cache *bc, struct btree *b) { struct bbpos pos = BBPOS(b->c.btree_id, b->key.k.p); @@ -205,7 +219,6 @@ void bch2_node_pin(struct bch_fs *c, struct btree *b) struct btree_cache *bc = &c->btree_cache; mutex_lock(&bc->lock); - BUG_ON(!__btree_node_pinned(bc, b)); if (b != btree_node_root(c, b) && !btree_node_pinned(b)) { set_btree_node_pinned(b); list_move(&b->list, &bc->live[1].list); @@ -236,11 +249,11 @@ void bch2_btree_cache_unpin(struct bch_fs *c) /* Btree in memory cache - hash table */ -void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) +void __bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) { lockdep_assert_held(&bc->lock); - int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params); + int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params); BUG_ON(ret); /* Cause future lookups for this node to fail: */ @@ -248,17 +261,22 @@ void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) if (b->c.btree_id < BTREE_ID_NR) --bc->nr_by_btree[b->c.btree_id]; + --bc->live[btree_node_pinned(b)].nr; + list_del_init(&b->list); +} - bc->live[btree_node_pinned(b)].nr--; - bc->nr_freeable++; - list_move(&b->list, &bc->freeable); +void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) +{ + __bch2_btree_node_hash_remove(bc, b); + __bch2_btree_node_to_freelist(bc, b); } int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b) { + BUG_ON(!list_empty(&b->list)); BUG_ON(b->hash_val); - b->hash_val = btree_ptr_hash_val(&b->key); + b->hash_val = btree_ptr_hash_val(&b->key); int ret = rhashtable_lookup_insert_fast(&bc->table, &b->hash, bch_btree_cache_params); if (ret) @@ -270,10 +288,8 @@ int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b) bool p = __btree_node_pinned(bc, b); mod_bit(BTREE_NODE_pinned, &b->flags, p); - list_move_tail(&b->list, &bc->live[p].list); + list_add_tail(&b->list, &bc->live[p].list); bc->live[p].nr++; - - bc->nr_freeable--; return 0; } @@ -306,7 +322,7 @@ void bch2_btree_node_update_key_early(struct btree_trans *trans, if (!IS_ERR_OR_NULL(b)) { mutex_lock(&c->btree_cache.lock); - bch2_btree_node_hash_remove(&c->btree_cache, b); + __bch2_btree_node_hash_remove(&c->btree_cache, b); bkey_copy(&b->key, new); ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); @@ -328,115 +344,118 @@ static inline struct btree *btree_cache_find(struct btree_cache *bc, return rhashtable_lookup_fast(&bc->table, &v, bch_btree_cache_params); } -/* - * this version is for btree nodes that have already been freed (we're not - * reaping a real btree node) - */ -static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush, bool shrinker_counter) +static int __btree_node_reclaim_checks(struct bch_fs *c, struct btree *b, + bool flush, bool locked) { struct btree_cache *bc = &c->btree_cache; - int ret = 0; lockdep_assert_held(&bc->lock); -wait_on_io: - if (b->flags & ((1U << BTREE_NODE_dirty)| - (1U << BTREE_NODE_read_in_flight)| + + if (btree_node_noevict(b)) { + bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_noevict]++; + return bch_err_throw(c, ENOMEM_btree_node_reclaim); + } + if (btree_node_write_blocked(b)) { + bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_write_blocked]++; + return bch_err_throw(c, ENOMEM_btree_node_reclaim); + } + if (btree_node_will_make_reachable(b)) { + bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_will_make_reachable]++; + return bch_err_throw(c, ENOMEM_btree_node_reclaim); + } + + if (btree_node_dirty(b)) { + if (!flush) { + bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_dirty]++; + return bch_err_throw(c, ENOMEM_btree_node_reclaim); + } + + if (locked) { + /* + * Using the underscore version because we don't want to compact + * bsets after the write, since this node is about to be evicted + * - unless btree verify mode is enabled, since it runs out of + * the post write cleanup: + */ + if (static_branch_unlikely(&bch2_verify_btree_ondisk)) + bch2_btree_node_write(c, b, SIX_LOCK_intent, + BTREE_WRITE_cache_reclaim); + else + __bch2_btree_node_write(c, b, + BTREE_WRITE_cache_reclaim); + } + } + + if (b->flags & ((1U << BTREE_NODE_read_in_flight)| (1U << BTREE_NODE_write_in_flight))) { if (!flush) { - if (btree_node_dirty(b)) - BTREE_CACHE_NOT_FREED_INCREMENT(dirty); - else if (btree_node_read_in_flight(b)) - BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight); + if (btree_node_read_in_flight(b)) + bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_read_in_flight]++; else if (btree_node_write_in_flight(b)) - BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight); - return -BCH_ERR_ENOMEM_btree_node_reclaim; + bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_write_in_flight]++; + return bch_err_throw(c, ENOMEM_btree_node_reclaim); } + if (locked) + return -EINTR; + /* XXX: waiting on IO with btree cache lock held */ bch2_btree_node_wait_on_read(b); bch2_btree_node_wait_on_write(b); } + return 0; +} + +/* + * this version is for btree nodes that have already been freed (we're not + * reaping a real btree node) + */ +static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush) +{ + struct btree_cache *bc = &c->btree_cache; + int ret = 0; + + lockdep_assert_held(&bc->lock); +retry_unlocked: + ret = __btree_node_reclaim_checks(c, b, flush, false); + if (ret) + return ret; + if (!six_trylock_intent(&b->c.lock)) { - BTREE_CACHE_NOT_FREED_INCREMENT(lock_intent); - return -BCH_ERR_ENOMEM_btree_node_reclaim; + bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_lock_intent]++; + return bch_err_throw(c, ENOMEM_btree_node_reclaim); } if (!six_trylock_write(&b->c.lock)) { - BTREE_CACHE_NOT_FREED_INCREMENT(lock_write); - goto out_unlock_intent; + bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_lock_write]++; + six_unlock_intent(&b->c.lock); + return bch_err_throw(c, ENOMEM_btree_node_reclaim); } /* recheck under lock */ - if (b->flags & ((1U << BTREE_NODE_read_in_flight)| - (1U << BTREE_NODE_write_in_flight))) { - if (!flush) { - if (btree_node_read_in_flight(b)) - BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight); - else if (btree_node_write_in_flight(b)) - BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight); - goto out_unlock; - } + ret = __btree_node_reclaim_checks(c, b, flush, true); + if (ret) { six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); - goto wait_on_io; - } - - if (btree_node_noevict(b)) { - BTREE_CACHE_NOT_FREED_INCREMENT(noevict); - goto out_unlock; - } - if (btree_node_write_blocked(b)) { - BTREE_CACHE_NOT_FREED_INCREMENT(write_blocked); - goto out_unlock; - } - if (btree_node_will_make_reachable(b)) { - BTREE_CACHE_NOT_FREED_INCREMENT(will_make_reachable); - goto out_unlock; + if (ret == -EINTR) + goto retry_unlocked; + return ret; } - if (btree_node_dirty(b)) { - if (!flush) { - BTREE_CACHE_NOT_FREED_INCREMENT(dirty); - goto out_unlock; - } - /* - * Using the underscore version because we don't want to compact - * bsets after the write, since this node is about to be evicted - * - unless btree verify mode is enabled, since it runs out of - * the post write cleanup: - */ - if (bch2_verify_btree_ondisk) - bch2_btree_node_write(c, b, SIX_LOCK_intent, - BTREE_WRITE_cache_reclaim); - else - __bch2_btree_node_write(c, b, - BTREE_WRITE_cache_reclaim); - - six_unlock_write(&b->c.lock); - six_unlock_intent(&b->c.lock); - goto wait_on_io; - } -out: if (b->hash_val && !ret) trace_and_count(c, btree_cache_reap, c, b); - return ret; -out_unlock: - six_unlock_write(&b->c.lock); -out_unlock_intent: - six_unlock_intent(&b->c.lock); - ret = -BCH_ERR_ENOMEM_btree_node_reclaim; - goto out; + return 0; } -static int btree_node_reclaim(struct bch_fs *c, struct btree *b, bool shrinker_counter) +static int btree_node_reclaim(struct bch_fs *c, struct btree *b) { - return __btree_node_reclaim(c, b, false, shrinker_counter); + return __btree_node_reclaim(c, b, false); } static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b) { - return __btree_node_reclaim(c, b, true, false); + return __btree_node_reclaim(c, b, true); } static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, @@ -454,7 +473,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, unsigned long ret = SHRINK_STOP; bool trigger_writes = atomic_long_read(&bc->nr_dirty) + nr >= list->nr * 3 / 4; - if (bch2_btree_shrinker_disabled) + if (static_branch_unlikely(&bch2_btree_shrinker_disabled)) return SHRINK_STOP; mutex_lock(&bc->lock); @@ -468,7 +487,10 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, * IO can always make forward progress: */ can_free = btree_cache_can_free(list); - nr = min_t(unsigned long, nr, can_free); + if (nr > can_free) { + bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_cache_reserve] += nr - can_free; + nr = can_free; + } i = 0; list_for_each_entry_safe(b, t, &bc->freeable, list) { @@ -484,8 +506,8 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, if (touched >= nr) goto out; - if (!btree_node_reclaim(c, b, true)) { - btree_node_data_free(c, b); + if (!btree_node_reclaim(c, b)) { + btree_node_data_free(bc, b); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); freed++; @@ -500,11 +522,11 @@ restart: clear_btree_node_accessed(b); bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_access_bit]++; --touched;; - } else if (!btree_node_reclaim(c, b, true)) { - bch2_btree_node_hash_remove(bc, b); + } else if (!btree_node_reclaim(c, b)) { + __bch2_btree_node_hash_remove(bc, b); + __btree_node_data_free(bc, b); freed++; - btree_node_data_free(c, b); bc->nr_freed++; six_unlock_write(&b->c.lock); @@ -547,7 +569,7 @@ static unsigned long bch2_btree_cache_count(struct shrinker *shrink, { struct btree_cache_list *list = shrink->private_data; - if (bch2_btree_shrinker_disabled) + if (static_branch_unlikely(&bch2_btree_shrinker_disabled)) return 0; return btree_cache_can_free(list); @@ -587,7 +609,8 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c) BUG_ON(btree_node_read_in_flight(b) || btree_node_write_in_flight(b)); - btree_node_data_free(c, b); + btree_node_data_free(bc, b); + cond_resched(); } BUG_ON(!bch2_journal_error(&c->journal) && @@ -659,7 +682,7 @@ int bch2_fs_btree_cache_init(struct bch_fs *c) return 0; err: - return -BCH_ERR_ENOMEM_fs_btree_cache_init; + return bch_err_throw(c, ENOMEM_fs_btree_cache_init); } void bch2_fs_btree_cache_init_early(struct btree_cache *bc) @@ -704,7 +727,7 @@ int bch2_btree_cache_cannibalize_lock(struct btree_trans *trans, struct closure if (!cl) { trace_and_count(c, btree_cache_cannibalize_lock_fail, trans); - return -BCH_ERR_ENOMEM_btree_cache_cannibalize_lock; + return bch_err_throw(c, ENOMEM_btree_cache_cannibalize_lock); } closure_wait(&bc->alloc_wait, cl); @@ -718,7 +741,7 @@ int bch2_btree_cache_cannibalize_lock(struct btree_trans *trans, struct closure } trace_and_count(c, btree_cache_cannibalize_lock_fail, trans); - return -BCH_ERR_btree_cache_cannibalize_lock_blocked; + return bch_err_throw(c, btree_cache_cannibalize_lock_blocked); success: trace_and_count(c, btree_cache_cannibalize_lock, trans); @@ -732,7 +755,7 @@ static struct btree *btree_node_cannibalize(struct bch_fs *c) for (unsigned i = 0; i < ARRAY_SIZE(bc->live); i++) list_for_each_entry_reverse(b, &bc->live[i].list, list) - if (!btree_node_reclaim(c, b, false)) + if (!btree_node_reclaim(c, b)) return b; while (1) { @@ -767,36 +790,42 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea * disk node. Check the freed list before allocating a new one: */ list_for_each_entry(b, freed, list) - if (!btree_node_reclaim(c, b, false)) { + if (!btree_node_reclaim(c, b)) { list_del_init(&b->list); goto got_node; } b = __btree_node_mem_alloc(c, GFP_NOWAIT|__GFP_NOWARN); - if (!b) { + if (b) { + bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0, GFP_NOWAIT); + } else { mutex_unlock(&bc->lock); bch2_trans_unlock(trans); b = __btree_node_mem_alloc(c, GFP_KERNEL); if (!b) goto err; + bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0, GFP_KERNEL); mutex_lock(&bc->lock); } - bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0); - BUG_ON(!six_trylock_intent(&b->c.lock)); BUG_ON(!six_trylock_write(&b->c.lock)); -got_node: +got_node: /* * btree_free() doesn't free memory; it sticks the node on the end of * the list. Check if there's any freed nodes there: */ list_for_each_entry(b2, &bc->freeable, list) - if (!btree_node_reclaim(c, b2, false)) { + if (!btree_node_reclaim(c, b2)) { swap(b->data, b2->data); swap(b->aux_data, b2->aux_data); + + list_del_init(&b2->list); + --bc->nr_freeable; btree_node_to_freedlist(bc, b2); + mutex_unlock(&bc->lock); + six_unlock_write(&b2->c.lock); six_unlock_intent(&b2->c.lock); goto got_mem; @@ -810,11 +839,8 @@ got_node: goto err; } - mutex_lock(&bc->lock); - bc->nr_freeable++; got_mem: - mutex_unlock(&bc->lock); - + BUG_ON(!list_empty(&b->list)); BUG_ON(btree_node_hashed(b)); BUG_ON(btree_node_dirty(b)); BUG_ON(btree_node_write_in_flight(b)); @@ -826,7 +852,6 @@ out: b->sib_u64s[1] = 0; b->whiteout_u64s = 0; bch2_btree_keys_init(b); - set_btree_node_accessed(b); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc], start_time); @@ -845,7 +870,7 @@ err: if (bc->alloc_lock == current) { b2 = btree_node_cannibalize(c); clear_btree_node_just_written(b2); - bch2_btree_node_hash_remove(bc, b2); + __bch2_btree_node_hash_remove(bc, b2); if (b) { swap(b->data, b2->data); @@ -855,9 +880,9 @@ err: six_unlock_intent(&b2->c.lock); } else { b = b2; - list_del_init(&b->list); } + BUG_ON(!list_empty(&b->list)); mutex_unlock(&bc->lock); trace_and_count(c, btree_cache_cannibalize, trans); @@ -936,7 +961,7 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, b->hash_val = 0; mutex_lock(&bc->lock); - list_add(&b->list, &bc->freeable); + __bch2_btree_node_to_freelist(bc, b); mutex_unlock(&bc->lock); six_unlock_write(&b->c.lock); @@ -952,7 +977,7 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, /* Unlock before doing IO: */ six_unlock_intent(&b->c.lock); - bch2_trans_unlock_noassert(trans); + bch2_trans_unlock(trans); bch2_btree_node_read(trans, b, sync); @@ -978,20 +1003,18 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) { struct printbuf buf = PRINTBUF; - if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations) + if (c->recovery.pass_done < BCH_RECOVERY_PASS_check_allocations) return; prt_printf(&buf, - "btree node header doesn't match ptr\n" - "btree %s level %u\n" - "ptr: ", - bch2_btree_id_str(b->c.btree_id), b->c.level); + "btree node header doesn't match ptr: "); + bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); + prt_str(&buf, "\nptr: "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - prt_printf(&buf, "\nheader: btree %s level %llu\n" - "min ", - bch2_btree_id_str(BTREE_NODE_ID(b->data)), - BTREE_NODE_LEVEL(b->data)); + prt_str(&buf, "\nheader: "); + bch2_btree_id_level_to_text(&buf, BTREE_NODE_ID(b->data), BTREE_NODE_LEVEL(b->data)); + prt_str(&buf, "\nmin "); bch2_bpos_to_text(&buf, b->data->min_key); prt_printf(&buf, "\nmax "); @@ -1111,7 +1134,7 @@ retry: if (unlikely(btree_node_read_error(b))) { six_unlock_type(&b->c.lock, lock_type); - return ERR_PTR(-BCH_ERR_btree_node_read_error); + return ERR_PTR(-BCH_ERR_btree_node_read_err_cached); } EBUG_ON(b->c.btree_id != path->btree_id); @@ -1201,7 +1224,7 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path * if (unlikely(btree_node_read_error(b))) { six_unlock_type(&b->c.lock, lock_type); - return ERR_PTR(-BCH_ERR_btree_node_read_error); + return ERR_PTR(-BCH_ERR_btree_node_read_err_cached); } EBUG_ON(b->c.btree_id != path->btree_id); @@ -1262,6 +1285,10 @@ lock_node: six_unlock_read(&b->c.lock); goto retry; } + + /* avoid atomic set bit if it's not needed: */ + if (!btree_node_accessed(b)) + set_btree_node_accessed(b); } /* XXX: waiting on IO with btree locks held: */ @@ -1277,13 +1304,9 @@ lock_node: prefetch(p + L1_CACHE_BYTES * 2); } - /* avoid atomic set bit if it's not needed: */ - if (!btree_node_accessed(b)) - set_btree_node_accessed(b); - if (unlikely(btree_node_read_error(b))) { six_unlock_read(&b->c.lock); - b = ERR_PTR(-BCH_ERR_btree_node_read_error); + b = ERR_PTR(-BCH_ERR_btree_node_read_err_cached); goto out; } @@ -1312,9 +1335,12 @@ int bch2_btree_node_prefetch(struct btree_trans *trans, b = bch2_btree_node_fill(trans, path, k, btree_id, level, SIX_LOCK_read, false); - if (!IS_ERR_OR_NULL(b)) + int ret = PTR_ERR_OR_ZERO(b); + if (ret) + return ret; + if (b) six_unlock_read(&b->c.lock); - return bch2_trans_relock(trans) ?: PTR_ERR_OR_ZERO(b); + return 0; } void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k) @@ -1353,7 +1379,7 @@ wait_on_io: mutex_lock(&bc->lock); bch2_btree_node_hash_remove(bc, b); - btree_node_data_free(c, b); + btree_node_data_free(bc, b); mutex_unlock(&bc->lock); out: six_unlock_write(&b->c.lock); @@ -1373,13 +1399,31 @@ void bch2_btree_id_to_text(struct printbuf *out, enum btree_id btree) prt_printf(out, "(unknown btree %u)", btree); } +void bch2_btree_id_level_to_text(struct printbuf *out, enum btree_id btree, unsigned level) +{ + prt_str(out, "btree="); + bch2_btree_id_to_text(out, btree); + prt_printf(out, " level=%u", level); +} + +void __bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, + enum btree_id btree, unsigned level, struct bkey_s_c k) +{ + bch2_btree_id_to_text(out, btree); + prt_printf(out, " level %u/", level); + struct btree_root *r = bch2_btree_id_root(c, btree); + if (r) + prt_printf(out, "%u", r->level); + else + prt_printf(out, "(unknown)"); + prt_newline(out); + + bch2_bkey_val_to_text(out, c, k); +} + void bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b) { - prt_printf(out, "%s level %u/%u\n ", - bch2_btree_id_str(b->c.btree_id), - b->c.level, - bch2_btree_id_root(c, b->c.btree_id)->level); - bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key)); + __bch2_btree_pos_to_text(out, c, b->c.btree_id, b->c.level, bkey_i_to_s_c(&b->key)); } void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b) @@ -1448,15 +1492,21 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc prt_btree_cache_line(out, c, "live:", bc->live[0].nr); prt_btree_cache_line(out, c, "pinned:", bc->live[1].nr); - prt_btree_cache_line(out, c, "freeable:", bc->nr_freeable); + prt_btree_cache_line(out, c, "reserve:", bc->nr_reserve); + prt_btree_cache_line(out, c, "freed:", bc->nr_freeable); prt_btree_cache_line(out, c, "dirty:", atomic_long_read(&bc->nr_dirty)); - prt_printf(out, "cannibalize lock:\t%p\n", bc->alloc_lock); + prt_printf(out, "cannibalize lock:\t%s\n", bc->alloc_lock ? "held" : "not held"); prt_newline(out); - for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++) - prt_btree_cache_line(out, c, bch2_btree_id_str(i), bc->nr_by_btree[i]); + for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++) { + bch2_btree_id_to_text(out, i); + prt_printf(out, "\t"); + prt_human_readable_u64(out, bc->nr_by_btree[i] * c->opts.btree_node_size); + prt_printf(out, " (%zu)\n", bc->nr_by_btree[i]); + } prt_newline(out); + prt_printf(out, "counters since mount:\n"); prt_printf(out, "freed:\t%zu\n", bc->nr_freed); prt_printf(out, "not freed:\n"); |