diff options
Diffstat (limited to 'fs/bcachefs/btree_iter.c')
-rw-r--r-- | fs/bcachefs/btree_iter.c | 626 |
1 files changed, 407 insertions, 219 deletions
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index eef9b89c561d..e32fce4fd258 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -270,8 +270,10 @@ static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) BUG_ON(!(iter->flags & BTREE_ITER_all_snapshots) && iter->pos.snapshot != iter->snapshot); - BUG_ON(bkey_lt(iter->pos, bkey_start_pos(&iter->k)) || - bkey_gt(iter->pos, iter->k.p)); + BUG_ON(iter->flags & BTREE_ITER_all_snapshots ? !bpos_eq(iter->pos, iter->k.p) : + !(iter->flags & BTREE_ITER_is_extents) ? !bkey_eq(iter->pos, iter->k.p) : + (bkey_lt(iter->pos, bkey_start_pos(&iter->k)) || + bkey_gt(iter->pos, iter->k.p))); } static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) @@ -327,7 +329,7 @@ out: void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id, struct bpos pos) { - bch2_trans_verify_not_unlocked(trans); + bch2_trans_verify_not_unlocked_or_in_restart(trans); struct btree_path *path; struct trans_for_each_path_inorder_iter iter; @@ -697,6 +699,19 @@ void bch2_trans_node_add(struct btree_trans *trans, bch2_trans_revalidate_updates_in_node(trans, b); } +void bch2_trans_node_drop(struct btree_trans *trans, + struct btree *b) +{ + struct btree_path *path; + unsigned i, level = b->c.level; + + trans_for_each_path(trans, path, i) + if (path->l[level].b == b) { + btree_node_unlock(trans, path, level); + path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init); + } +} + /* * A btree node has been modified in such a way as to invalidate iterators - fix * them: @@ -720,7 +735,7 @@ static inline int btree_path_lock_root(struct btree_trans *trans, unsigned long trace_ip) { struct bch_fs *c = trans->c; - struct btree *b, **rootp = &bch2_btree_id_root(c, path->btree_id)->b; + struct btree_root *r = bch2_btree_id_root(c, path->btree_id); enum six_lock_type lock_type; unsigned i; int ret; @@ -728,7 +743,12 @@ static inline int btree_path_lock_root(struct btree_trans *trans, EBUG_ON(path->nodes_locked); while (1) { - b = READ_ONCE(*rootp); + struct btree *b = READ_ONCE(r->b); + if (unlikely(!b)) { + BUG_ON(!r->error); + return r->error; + } + path->level = READ_ONCE(b->c.level); if (unlikely(path->level < depth_want)) { @@ -748,14 +768,12 @@ static inline int btree_path_lock_root(struct btree_trans *trans, ret = btree_node_lock(trans, path, &b->c, path->level, lock_type, trace_ip); if (unlikely(ret)) { - if (bch2_err_matches(ret, BCH_ERR_lock_fail_root_changed)) - continue; if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) return ret; BUG(); } - if (likely(b == READ_ONCE(*rootp) && + if (likely(b == READ_ONCE(r->b) && b->c.level == path->level && !race_fault())) { for (i = 0; i < path->level; i++) @@ -825,6 +843,8 @@ static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *p bch2_bkey_buf_init(&tmp); + jiter->fail_if_too_many_whiteouts = true; + while (nr-- && !ret) { if (!bch2_btree_node_relock(trans, path, path->level)) break; @@ -1000,7 +1020,7 @@ retry_all: bch2_trans_unlock(trans); cond_resched(); - trans_set_locked(trans); + trans_set_locked(trans, false); if (unlikely(trans->memory_allocation_failure)) { struct closure cl; @@ -1267,7 +1287,7 @@ __bch2_btree_path_set_pos(struct btree_trans *trans, { int cmp = bpos_cmp(new_pos, trans->paths[path_idx].pos); - bch2_trans_verify_not_in_restart(trans); + bch2_trans_verify_not_unlocked_or_in_restart(trans); EBUG_ON(!trans->paths[path_idx].ref); trace_btree_path_set_pos(trans, trans->paths + path_idx, &new_pos); @@ -1427,17 +1447,31 @@ void __noreturn bch2_trans_restart_error(struct btree_trans *trans, u32 restart_ (void *) trans->last_begin_ip); } -void __noreturn bch2_trans_in_restart_error(struct btree_trans *trans) +static void __noreturn bch2_trans_in_restart_error(struct btree_trans *trans) { +#ifdef CONFIG_BCACHEFS_DEBUG + struct printbuf buf = PRINTBUF; + bch2_prt_backtrace(&buf, &trans->last_restarted_trace); + panic("in transaction restart: %s, last restarted by\n%s", + bch2_err_str(trans->restarted), + buf.buf); +#else panic("in transaction restart: %s, last restarted by %pS\n", bch2_err_str(trans->restarted), (void *) trans->last_restarted_ip); +#endif } -void __noreturn bch2_trans_unlocked_error(struct btree_trans *trans) +void __noreturn bch2_trans_unlocked_or_in_restart_error(struct btree_trans *trans) { - panic("trans should be locked, unlocked by %pS\n", - (void *) trans->last_unlock_ip); + if (trans->restarted) + bch2_trans_in_restart_error(trans); + + if (!trans->locked) + panic("trans should be locked, unlocked by %pS\n", + (void *) trans->last_unlock_ip); + + BUG(); } noinline __cold @@ -1450,10 +1484,11 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) trans_for_each_update(trans, i) { struct bkey_s_c old = { &i->old_k, i->old_v }; - prt_printf(buf, "update: btree=%s cached=%u %pS\n", - bch2_btree_id_str(i->btree_id), - i->cached, - (void *) i->ip_allocated); + prt_str(buf, "update: btree="); + bch2_btree_id_to_text(buf, i->btree_id); + prt_printf(buf, " cached=%u %pS\n", + i->cached, + (void *) i->ip_allocated); prt_printf(buf, " old "); bch2_bkey_val_to_text(buf, trans->c, old); @@ -1486,13 +1521,13 @@ static void bch2_btree_path_to_text_short(struct printbuf *out, struct btree_tra { struct btree_path *path = trans->paths + path_idx; - prt_printf(out, "path: idx %3u ref %u:%u %c %c %c btree=%s l=%u pos ", + prt_printf(out, "path: idx %3u ref %u:%u %c %c %c ", path_idx, path->ref, path->intent_ref, path->preserve ? 'P' : ' ', path->should_be_locked ? 'S' : ' ', - path->cached ? 'C' : 'B', - bch2_btree_id_str(path->btree_id), - path->level); + path->cached ? 'C' : 'B'); + bch2_btree_id_level_to_text(out, path->btree_id, path->level); + prt_str(out, " pos "); bch2_bpos_to_text(out, path->pos); if (!path->cached && btree_node_locked(path, path->level)) { @@ -1717,8 +1752,7 @@ btree_path_idx_t bch2_path_get(struct btree_trans *trans, struct trans_for_each_path_inorder_iter iter; btree_path_idx_t path_pos = 0, path_idx; - bch2_trans_verify_not_unlocked(trans); - bch2_trans_verify_not_in_restart(trans); + bch2_trans_verify_not_unlocked_or_in_restart(trans); bch2_trans_verify_locks(trans); btree_trans_sort_paths(trans); @@ -1833,7 +1867,7 @@ struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey * !bkey_eq(path->pos, ck->key.pos)); *u = ck->k->k; - k = bkey_i_to_s_c(ck->k); + k = (struct bkey_s_c) { u, &ck->k->v }; } return k; @@ -1843,7 +1877,6 @@ hole: return (struct bkey_s_c) { u, NULL }; } - void bch2_set_btree_iter_dontneed(struct btree_iter *iter) { struct btree_trans *trans = iter->trans; @@ -1870,7 +1903,7 @@ bch2_btree_iter_traverse(struct btree_iter *iter) struct btree_trans *trans = iter->trans; int ret; - bch2_trans_verify_not_unlocked(trans); + bch2_trans_verify_not_unlocked_or_in_restart(trans); iter->path = bch2_btree_path_set_pos(trans, iter->path, btree_iter_search_key(iter), @@ -1945,7 +1978,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) int ret; EBUG_ON(trans->paths[iter->path].cached); - bch2_trans_verify_not_in_restart(trans); + bch2_trans_verify_not_unlocked_or_in_restart(trans); bch2_btree_iter_verify(iter); ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); @@ -2101,7 +2134,7 @@ static struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans, { struct btree_path *path = btree_iter_path(trans, iter); - return bch2_journal_keys_peek_upto(trans->c, iter->btree_id, + return bch2_journal_keys_peek_max(trans->c, iter->btree_id, path->level, path->pos, end_pos, @@ -2124,21 +2157,47 @@ struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans, } static noinline -struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c k) +void btree_trans_peek_journal(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c *k) { struct btree_path *path = btree_iter_path(trans, iter); struct bkey_i *next_journal = bch2_btree_journal_peek(trans, iter, - k.k ? k.k->p : path_l(path)->b->key.k.p); - + k->k ? k->k->p : path_l(path)->b->key.k.p); if (next_journal) { iter->k = next_journal->k; - k = bkey_i_to_s_c(next_journal); + *k = bkey_i_to_s_c(next_journal); } +} - return k; +static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans, + struct btree_iter *iter, + struct bpos end_pos) +{ + struct btree_path *path = btree_iter_path(trans, iter); + + return bch2_journal_keys_peek_prev_min(trans->c, iter->btree_id, + path->level, + path->pos, + end_pos, + &iter->journal_idx); +} + +static noinline +void btree_trans_peek_prev_journal(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c *k) +{ + struct btree_path *path = btree_iter_path(trans, iter); + struct bkey_i *next_journal = + bch2_btree_journal_peek_prev(trans, iter, + k->k ? k->k->p : path_l(path)->b->key.k.p); + + if (next_journal) { + iter->k = next_journal->k; + *k = bkey_i_to_s_c(next_journal); + } } /* @@ -2154,8 +2213,7 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos struct bkey_s_c k; int ret; - bch2_trans_verify_not_in_restart(trans); - bch2_trans_verify_not_unlocked(trans); + bch2_trans_verify_not_unlocked_or_in_restart(trans); if ((iter->flags & BTREE_ITER_key_cache_fill) && bpos_eq(iter->pos, pos)) @@ -2181,13 +2239,17 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos if (unlikely(ret)) return bkey_s_c_err(ret); - btree_path_set_should_be_locked(trans, trans->paths + iter->key_cache_path); - k = bch2_btree_path_peek_slot(trans->paths + iter->key_cache_path, &u); - if (k.k && !bkey_err(k)) { - iter->k = u; - k.k = &iter->k; - } + if (!k.k) + return k; + + if ((iter->flags & BTREE_ITER_all_snapshots) && + !bpos_eq(pos, k.k->p)) + return bkey_s_c_null; + + iter->k = u; + k.k = &iter->k; + btree_path_set_should_be_locked(trans, trans->paths + iter->key_cache_path); return k; } @@ -2201,8 +2263,6 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp bch2_btree_iter_verify(iter); while (1) { - struct btree_path_level *l; - iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, iter->flags & BTREE_ITER_intent, btree_iter_ip_allocated(iter)); @@ -2212,17 +2272,17 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp /* ensure that iter->k is consistent with iter->pos: */ bch2_btree_iter_set_pos(iter, iter->pos); k = bkey_s_c_err(ret); - goto out; + break; } struct btree_path *path = btree_iter_path(trans, iter); - l = path_l(path); + struct btree_path_level *l = path_l(path); if (unlikely(!l->b)) { /* No btree nodes at requested level: */ bch2_btree_iter_set_pos(iter, SPOS_MAX); k = bkey_s_c_null; - goto out; + break; } btree_path_set_should_be_locked(trans, path); @@ -2233,15 +2293,14 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp k.k && (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) { k = k2; - ret = bkey_err(k); - if (ret) { + if (bkey_err(k)) { bch2_btree_iter_set_pos(iter, iter->pos); - goto out; + break; } } if (unlikely(iter->flags & BTREE_ITER_with_journal)) - k = btree_trans_peek_journal(trans, iter, k); + btree_trans_peek_journal(trans, iter, &k); if (unlikely((iter->flags & BTREE_ITER_with_updates) && trans->nr_updates)) @@ -2270,42 +2329,46 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp /* End of btree: */ bch2_btree_iter_set_pos(iter, SPOS_MAX); k = bkey_s_c_null; - goto out; + break; } } -out: - bch2_btree_iter_verify(iter); + bch2_btree_iter_verify(iter); return k; } /** - * bch2_btree_iter_peek_upto() - returns first key greater than or equal to + * bch2_btree_iter_peek_max() - returns first key greater than or equal to * iterator's current position * @iter: iterator to peek from * @end: search limit: returns keys less than or equal to @end * * Returns: key if found, or an error extractable with bkey_err(). */ -struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos end) +struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos end) { struct btree_trans *trans = iter->trans; struct bpos search_key = btree_iter_search_key(iter); struct bkey_s_c k; - struct bpos iter_pos; + struct bpos iter_pos = iter->pos; int ret; - bch2_trans_verify_not_unlocked(trans); + bch2_trans_verify_not_unlocked_or_in_restart(trans); + bch2_btree_iter_verify_entry_exit(iter); EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bkey_eq(end, POS_MAX)); + ret = trans_maybe_inject_restart(trans, _RET_IP_); + if (unlikely(ret)) { + k = bkey_s_c_err(ret); + goto out_no_locked; + } + if (iter->update_path) { bch2_path_put_nokeep(trans, iter->update_path, iter->flags & BTREE_ITER_intent); iter->update_path = 0; } - bch2_btree_iter_verify_entry_exit(iter); - while (1) { k = __bch2_btree_iter_peek(iter, search_key); if (unlikely(!k.k)) @@ -2313,75 +2376,75 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e if (unlikely(bkey_err(k))) goto out_no_locked; - /* - * We need to check against @end before FILTER_SNAPSHOTS because - * if we get to a different inode that requested we might be - * seeing keys for a different snapshot tree that will all be - * filtered out. - * - * But we can't do the full check here, because bkey_start_pos() - * isn't monotonically increasing before FILTER_SNAPSHOTS, and - * that's what we check against in extents mode: - */ - if (unlikely(!(iter->flags & BTREE_ITER_is_extents) - ? bkey_gt(k.k->p, end) - : k.k->p.inode > end.inode)) - goto end; + if (iter->flags & BTREE_ITER_filter_snapshots) { + /* + * We need to check against @end before FILTER_SNAPSHOTS because + * if we get to a different inode that requested we might be + * seeing keys for a different snapshot tree that will all be + * filtered out. + * + * But we can't do the full check here, because bkey_start_pos() + * isn't monotonically increasing before FILTER_SNAPSHOTS, and + * that's what we check against in extents mode: + */ + if (unlikely(!(iter->flags & BTREE_ITER_is_extents) + ? bkey_gt(k.k->p, end) + : k.k->p.inode > end.inode)) + goto end; + + if (iter->update_path && + !bkey_eq(trans->paths[iter->update_path].pos, k.k->p)) { + bch2_path_put_nokeep(trans, iter->update_path, + iter->flags & BTREE_ITER_intent); + iter->update_path = 0; + } - if (iter->update_path && - !bkey_eq(trans->paths[iter->update_path].pos, k.k->p)) { - bch2_path_put_nokeep(trans, iter->update_path, - iter->flags & BTREE_ITER_intent); - iter->update_path = 0; - } + if ((iter->flags & BTREE_ITER_intent) && + !(iter->flags & BTREE_ITER_is_extents) && + !iter->update_path) { + struct bpos pos = k.k->p; - if ((iter->flags & BTREE_ITER_filter_snapshots) && - (iter->flags & BTREE_ITER_intent) && - !(iter->flags & BTREE_ITER_is_extents) && - !iter->update_path) { - struct bpos pos = k.k->p; + if (pos.snapshot < iter->snapshot) { + search_key = bpos_successor(k.k->p); + continue; + } - if (pos.snapshot < iter->snapshot) { - search_key = bpos_successor(k.k->p); - continue; - } + pos.snapshot = iter->snapshot; - pos.snapshot = iter->snapshot; + /* + * advance, same as on exit for iter->path, but only up + * to snapshot + */ + __btree_path_get(trans, trans->paths + iter->path, iter->flags & BTREE_ITER_intent); + iter->update_path = iter->path; + + iter->update_path = bch2_btree_path_set_pos(trans, + iter->update_path, pos, + iter->flags & BTREE_ITER_intent, + _THIS_IP_); + ret = bch2_btree_path_traverse(trans, iter->update_path, iter->flags); + if (unlikely(ret)) { + k = bkey_s_c_err(ret); + goto out_no_locked; + } + } /* - * advance, same as on exit for iter->path, but only up - * to snapshot + * We can never have a key in a leaf node at POS_MAX, so + * we don't have to check these successor() calls: */ - __btree_path_get(trans, trans->paths + iter->path, iter->flags & BTREE_ITER_intent); - iter->update_path = iter->path; - - iter->update_path = bch2_btree_path_set_pos(trans, - iter->update_path, pos, - iter->flags & BTREE_ITER_intent, - _THIS_IP_); - ret = bch2_btree_path_traverse(trans, iter->update_path, iter->flags); - if (unlikely(ret)) { - k = bkey_s_c_err(ret); - goto out_no_locked; + if (!bch2_snapshot_is_ancestor(trans->c, + iter->snapshot, + k.k->p.snapshot)) { + search_key = bpos_successor(k.k->p); + continue; } - } - - /* - * We can never have a key in a leaf node at POS_MAX, so - * we don't have to check these successor() calls: - */ - if ((iter->flags & BTREE_ITER_filter_snapshots) && - !bch2_snapshot_is_ancestor(trans->c, - iter->snapshot, - k.k->p.snapshot)) { - search_key = bpos_successor(k.k->p); - continue; - } - if (bkey_whiteout(k.k) && - !(iter->flags & BTREE_ITER_all_snapshots)) { - search_key = bkey_successor(iter, k.k->p); - continue; + if (bkey_whiteout(k.k) && + !(iter->flags & BTREE_ITER_key_cache_fill)) { + search_key = bkey_successor(iter, k.k->p); + continue; + } } /* @@ -2451,127 +2514,210 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter) return bch2_btree_iter_peek(iter); } -/** - * bch2_btree_iter_peek_prev() - returns first key less than or equal to - * iterator's current position - * @iter: iterator to peek from - * - * Returns: key if found, or an error extractable with bkey_err(). - */ -struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) +static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, struct bpos search_key) { struct btree_trans *trans = iter->trans; - struct bpos search_key = iter->pos; - struct bkey_s_c k; - struct bkey saved_k; - const struct bch_val *saved_v; - btree_path_idx_t saved_path = 0; - int ret; - - bch2_trans_verify_not_unlocked(trans); - EBUG_ON(btree_iter_path(trans, iter)->cached || - btree_iter_path(trans, iter)->level); - - if (iter->flags & BTREE_ITER_with_journal) - return bkey_s_c_err(-BCH_ERR_btree_iter_with_journal_not_supported); + struct bkey_s_c k, k2; bch2_btree_iter_verify(iter); - bch2_btree_iter_verify_entry_exit(iter); - - if (iter->flags & BTREE_ITER_filter_snapshots) - search_key.snapshot = U32_MAX; while (1) { iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, - iter->flags & BTREE_ITER_intent, - btree_iter_ip_allocated(iter)); + iter->flags & BTREE_ITER_intent, + btree_iter_ip_allocated(iter)); - ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); + int ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); if (unlikely(ret)) { /* ensure that iter->k is consistent with iter->pos: */ bch2_btree_iter_set_pos(iter, iter->pos); k = bkey_s_c_err(ret); - goto out_no_locked; + break; } struct btree_path *path = btree_iter_path(trans, iter); + struct btree_path_level *l = path_l(path); + + if (unlikely(!l->b)) { + /* No btree nodes at requested level: */ + bch2_btree_iter_set_pos(iter, SPOS_MAX); + k = bkey_s_c_null; + break; + } + + btree_path_set_should_be_locked(trans, path); + + k = btree_path_level_peek_all(trans->c, l, &iter->k); + if (!k.k || bpos_gt(k.k->p, search_key)) { + k = btree_path_level_prev(trans, path, l, &iter->k); - k = btree_path_level_peek(trans, path, &path->l[0], &iter->k); - if (!k.k || - ((iter->flags & BTREE_ITER_is_extents) - ? bpos_ge(bkey_start_pos(k.k), search_key) - : bpos_gt(k.k->p, search_key))) - k = btree_path_level_prev(trans, path, &path->l[0], &iter->k); + BUG_ON(k.k && bpos_gt(k.k->p, search_key)); + } + + if (unlikely(iter->flags & BTREE_ITER_with_key_cache) && + k.k && + (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) { + k = k2; + if (bkey_err(k2)) { + bch2_btree_iter_set_pos(iter, iter->pos); + break; + } + } + + if (unlikely(iter->flags & BTREE_ITER_with_journal)) + btree_trans_peek_prev_journal(trans, iter, &k); if (unlikely((iter->flags & BTREE_ITER_with_updates) && trans->nr_updates)) bch2_btree_trans_peek_prev_updates(trans, iter, &k); - if (likely(k.k)) { - if (iter->flags & BTREE_ITER_filter_snapshots) { - if (k.k->p.snapshot == iter->snapshot) - goto got_key; + if (likely(k.k && !bkey_deleted(k.k))) { + break; + } else if (k.k) { + search_key = bpos_predecessor(k.k->p); + } else if (likely(!bpos_eq(path->l[0].b->data->min_key, POS_MIN))) { + /* Advance to previous leaf node: */ + search_key = bpos_predecessor(path->l[0].b->data->min_key); + } else { + /* Start of btree: */ + bch2_btree_iter_set_pos(iter, POS_MIN); + k = bkey_s_c_null; + break; + } + } + bch2_btree_iter_verify(iter); + return k; +} + +/** + * bch2_btree_iter_peek_prev_min() - returns first key less than or equal to + * iterator's current position + * @iter: iterator to peek from + * @end: search limit: returns keys greater than or equal to @end + * + * Returns: key if found, or an error extractable with bkey_err(). + */ +struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bpos end) +{ + if ((iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots)) && + !bkey_eq(iter->pos, POS_MAX)) { + /* + * bkey_start_pos(), for extents, is not monotonically + * increasing until after filtering for snapshots: + * + * Thus, for extents we need to search forward until we find a + * real visible extents - easiest to just use peek_slot() (which + * internally uses peek() for extents) + */ + struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); + if (bkey_err(k)) + return k; + + if (!bkey_deleted(k.k) && + (!(iter->flags & BTREE_ITER_is_extents) || + bkey_lt(bkey_start_pos(k.k), iter->pos))) + return k; + } + + struct btree_trans *trans = iter->trans; + struct bpos search_key = iter->pos; + struct bkey_s_c k; + btree_path_idx_t saved_path = 0; + + bch2_trans_verify_not_unlocked_or_in_restart(trans); + bch2_btree_iter_verify_entry_exit(iter); + EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bpos_eq(end, POS_MIN)); + + int ret = trans_maybe_inject_restart(trans, _RET_IP_); + if (unlikely(ret)) { + k = bkey_s_c_err(ret); + goto out_no_locked; + } + + while (1) { + k = __bch2_btree_iter_peek_prev(iter, search_key); + if (unlikely(!k.k)) + goto end; + if (unlikely(bkey_err(k))) + goto out_no_locked; + + if (iter->flags & BTREE_ITER_filter_snapshots) { + struct btree_path *s = saved_path ? trans->paths + saved_path : NULL; + if (s && bpos_lt(k.k->p, SPOS(s->pos.inode, s->pos.offset, iter->snapshot))) { + /* + * If we have a saved candidate, and we're past + * the last possible snapshot overwrite, return + * it: + */ + bch2_path_put_nokeep(trans, iter->path, + iter->flags & BTREE_ITER_intent); + iter->path = saved_path; + saved_path = 0; + k = bch2_btree_path_peek_slot(btree_iter_path(trans, iter), &iter->k); + break; + } + + /* + * We need to check against @end before FILTER_SNAPSHOTS because + * if we get to a different inode that requested we might be + * seeing keys for a different snapshot tree that will all be + * filtered out. + */ + if (unlikely(bkey_lt(k.k->p, end))) + goto end; + + if (!bch2_snapshot_is_ancestor(trans->c, iter->snapshot, k.k->p.snapshot)) { + search_key = bpos_predecessor(k.k->p); + continue; + } + + if (k.k->p.snapshot != iter->snapshot) { /* - * If we have a saved candidate, and we're no - * longer at the same _key_ (not pos), return - * that candidate + * Have a key visible in iter->snapshot, but + * might have overwrites: - save it and keep + * searching. Unless it's a whiteout - then drop + * our previous saved candidate: */ - if (saved_path && !bkey_eq(k.k->p, saved_k.p)) { - bch2_path_put_nokeep(trans, iter->path, - iter->flags & BTREE_ITER_intent); - iter->path = saved_path; + if (saved_path) { + bch2_path_put_nokeep(trans, saved_path, + iter->flags & BTREE_ITER_intent); saved_path = 0; - iter->k = saved_k; - k.v = saved_v; - goto got_key; } - if (bch2_snapshot_is_ancestor(trans->c, - iter->snapshot, - k.k->p.snapshot)) { - if (saved_path) - bch2_path_put_nokeep(trans, saved_path, - iter->flags & BTREE_ITER_intent); + if (!bkey_whiteout(k.k)) { saved_path = btree_path_clone(trans, iter->path, iter->flags & BTREE_ITER_intent, _THIS_IP_); - path = btree_iter_path(trans, iter); - trace_btree_path_save_pos(trans, path, trans->paths + saved_path); - saved_k = *k.k; - saved_v = k.v; + trace_btree_path_save_pos(trans, + trans->paths + iter->path, + trans->paths + saved_path); } search_key = bpos_predecessor(k.k->p); continue; } -got_key: - if (bkey_whiteout(k.k) && - !(iter->flags & BTREE_ITER_all_snapshots)) { + + if (bkey_whiteout(k.k)) { search_key = bkey_predecessor(iter, k.k->p); - if (iter->flags & BTREE_ITER_filter_snapshots) - search_key.snapshot = U32_MAX; + search_key.snapshot = U32_MAX; continue; } - - btree_path_set_should_be_locked(trans, path); - break; - } else if (likely(!bpos_eq(path->l[0].b->data->min_key, POS_MIN))) { - /* Advance to previous leaf node: */ - search_key = bpos_predecessor(path->l[0].b->data->min_key); - } else { - /* Start of btree: */ - bch2_btree_iter_set_pos(iter, POS_MIN); - k = bkey_s_c_null; - goto out_no_locked; } - } - EBUG_ON(bkey_gt(bkey_start_pos(k.k), iter->pos)); + EBUG_ON(iter->flags & BTREE_ITER_all_snapshots ? bpos_gt(k.k->p, iter->pos) : + iter->flags & BTREE_ITER_is_extents ? bkey_ge(bkey_start_pos(k.k), iter->pos) : + bkey_gt(k.k->p, iter->pos)); + + if (unlikely(iter->flags & BTREE_ITER_all_snapshots ? bpos_lt(k.k->p, end) : + iter->flags & BTREE_ITER_is_extents ? bkey_le(k.k->p, end) : + bkey_lt(k.k->p, end))) + goto end; + + break; + } /* Extents can straddle iter->pos: */ - if (bkey_lt(k.k->p, iter->pos)) - iter->pos = k.k->p; + iter->pos = bpos_min(iter->pos, k.k->p);; if (iter->flags & BTREE_ITER_filter_snapshots) iter->pos.snapshot = iter->snapshot; @@ -2581,8 +2727,11 @@ out_no_locked: bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(iter); - return k; +end: + bch2_btree_iter_set_pos(iter, end); + k = bkey_s_c_null; + goto out_no_locked; } /** @@ -2607,11 +2756,17 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) struct bkey_s_c k; int ret; - bch2_trans_verify_not_unlocked(trans); + bch2_trans_verify_not_unlocked_or_in_restart(trans); bch2_btree_iter_verify(iter); bch2_btree_iter_verify_entry_exit(iter); EBUG_ON(btree_iter_path(trans, iter)->level && (iter->flags & BTREE_ITER_with_key_cache)); + ret = trans_maybe_inject_restart(trans, _RET_IP_); + if (unlikely(ret)) { + k = bkey_s_c_err(ret); + goto out_no_locked; + } + /* extents can't span inode numbers: */ if ((iter->flags & BTREE_ITER_is_extents) && unlikely(iter->pos.offset == KEY_OFFSET_MAX)) { @@ -2632,6 +2787,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) goto out_no_locked; } + struct btree_path *path = btree_iter_path(trans, iter); + if (unlikely(!btree_path_node(path, path->level))) + return bkey_s_c_null; + if ((iter->flags & BTREE_ITER_cached) || !(iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots))) { k = bkey_s_c_null; @@ -2658,6 +2817,11 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) k = bch2_btree_path_peek_slot(trans->paths + iter->path, &iter->k); if (unlikely(!k.k)) goto out_no_locked; + + if (unlikely(k.k->type == KEY_TYPE_whiteout && + (iter->flags & BTREE_ITER_filter_snapshots) && + !(iter->flags & BTREE_ITER_key_cache_fill))) + iter->k.type = KEY_TYPE_deleted; } else { struct bpos next; struct bpos end = iter->pos; @@ -2671,7 +2835,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) struct btree_iter iter2; bch2_trans_copy_iter(&iter2, iter); - k = bch2_btree_iter_peek_upto(&iter2, end); + k = bch2_btree_iter_peek_max(&iter2, end); if (k.k && !bkey_err(k)) { swap(iter->key_cache_path, iter2.key_cache_path); @@ -2682,7 +2846,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) } else { struct bpos pos = iter->pos; - k = bch2_btree_iter_peek_upto(iter, end); + k = bch2_btree_iter_peek_max(iter, end); if (unlikely(bkey_err(k))) bch2_btree_iter_set_pos(iter, pos); else @@ -2902,7 +3066,7 @@ void bch2_trans_iter_init_outlined(struct btree_trans *trans, unsigned flags) { bch2_trans_iter_init_common(trans, iter, btree_id, pos, 0, 0, - bch2_btree_iter_flags(trans, btree_id, flags), + bch2_btree_iter_flags(trans, btree_id, 0, flags), _RET_IP_); } @@ -2918,8 +3082,11 @@ void bch2_trans_node_iter_init(struct btree_trans *trans, flags |= BTREE_ITER_snapshot_field; flags |= BTREE_ITER_all_snapshots; + if (!depth && btree_id_cached(trans->c, btree_id)) + flags |= BTREE_ITER_with_key_cache; + bch2_trans_iter_init_common(trans, iter, btree_id, pos, locks_want, depth, - __bch2_btree_iter_flags(trans, btree_id, flags), + bch2_btree_iter_flags(trans, btree_id, depth, flags), _RET_IP_); iter->min_depth = depth; @@ -2957,6 +3124,10 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX); + ret = trans_maybe_inject_restart(trans, _RET_IP_); + if (ret) + return ERR_PTR(ret); + struct btree_transaction_stats *s = btree_trans_stats(trans); s->max_mem = max(s->max_mem, new_bytes); @@ -3014,7 +3185,8 @@ out_new_mem: if (old_bytes) { trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes); - return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced)); + return ERR_PTR(btree_trans_restart_ip(trans, + BCH_ERR_transaction_restart_mem_realloced, _RET_IP_)); } out_change_top: p = trans->mem + trans->mem_top; @@ -3122,14 +3294,22 @@ u32 bch2_trans_begin(struct btree_trans *trans) trans->last_begin_ip = _RET_IP_; - trans_set_locked(trans); +#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS + if (trans->restarted) { + trans->restart_count_this_trans++; + } else { + trans->restart_count_this_trans = 0; + } +#endif + + trans_set_locked(trans, false); if (trans->restarted) { bch2_btree_path_traverse_all(trans); trans->notrace_relock_fail = false; } - bch2_trans_verify_not_unlocked(trans); + bch2_trans_verify_not_unlocked_or_in_restart(trans); return trans->restart_count; } @@ -3228,7 +3408,7 @@ got_trans: trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); trans->srcu_lock_time = jiffies; trans->srcu_held = true; - trans_set_locked(trans); + trans_set_locked(trans, false); closure_init_stack_release(&trans->ref); return trans; @@ -3262,6 +3442,9 @@ void bch2_trans_put(struct btree_trans *trans) { struct bch_fs *c = trans->c; + if (trans->restarted) + bch2_trans_in_restart_error(trans); + bch2_trans_unlock(trans); trans_for_each_update(trans, i) @@ -3285,6 +3468,10 @@ void bch2_trans_put(struct btree_trans *trans) closure_return_sync(&trans->ref); trans->locking_wait.task = NULL; +#ifdef CONFIG_BCACHEFS_DEBUG + darray_exit(&trans->last_restarted_trace); +#endif + unsigned long *paths_allocated = trans->paths_allocated; trans->paths_allocated = NULL; trans->paths = NULL; @@ -3338,8 +3525,9 @@ bch2_btree_bkey_cached_common_to_text(struct printbuf *out, pid = owner ? owner->pid : 0; rcu_read_unlock(); - prt_printf(out, "\t%px %c l=%u %s:", b, b->cached ? 'c' : 'b', - b->level, bch2_btree_id_str(b->btree_id)); + prt_printf(out, "\t%px %c ", b, b->cached ? 'c' : 'b'); + bch2_btree_id_to_text(out, b->btree_id); + prt_printf(out, " l=%u:", b->level); bch2_bpos_to_text(out, btree_node_pos(b)); prt_printf(out, "\t locks %u:%u:%u held by pid %u", @@ -3378,11 +3566,11 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) if (!path->nodes_locked) continue; - prt_printf(out, " path %u %c l=%u %s:", - idx, - path->cached ? 'c' : 'b', - path->level, - bch2_btree_id_str(path->btree_id)); + prt_printf(out, " path %u %c ", + idx, + path->cached ? 'c' : 'b'); + bch2_btree_id_to_text(out, path->btree_id); + prt_printf(out, " l=%u:", path->level); bch2_bpos_to_text(out, path->pos); prt_newline(out); @@ -3488,7 +3676,7 @@ int bch2_fs_btree_iter_init(struct bch_fs *c) #ifdef CONFIG_LOCKDEP fs_reclaim_acquire(GFP_KERNEL); struct btree_trans *trans = bch2_trans_get(c); - trans_set_locked(trans); + trans_set_locked(trans, false); bch2_trans_put(trans); fs_reclaim_release(GFP_KERNEL); #endif |