diff options
Diffstat (limited to 'fs/bcachefs/inode.c')
-rw-r--r-- | fs/bcachefs/inode.c | 315 |
1 files changed, 210 insertions, 105 deletions
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 039cb7a22244..845efd429d13 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -14,6 +14,8 @@ #include "extent_update.h" #include "fs.h" #include "inode.h" +#include "namei.h" +#include "opts.h" #include "str_hash.h" #include "snapshot.h" #include "subvolume.h" @@ -36,6 +38,7 @@ static const char * const bch2_inode_flag_strs[] = { #undef x static int delete_ancestor_snapshot_inodes(struct btree_trans *, struct bpos); +static int may_delete_deleted_inum(struct btree_trans *, subvol_inum); static const u8 byte_table[8] = { 1, 2, 3, 4, 6, 8, 10, 13 }; @@ -47,10 +50,10 @@ static int inode_decode_field(const u8 *in, const u8 *end, u8 *p; if (in >= end) - return -1; + return -BCH_ERR_inode_unpack_error; if (!*in) - return -1; + return -BCH_ERR_inode_unpack_error; /* * position of highest set bit indicates number of bytes: @@ -60,7 +63,7 @@ static int inode_decode_field(const u8 *in, const u8 *end, bytes = byte_table[shift - 1]; if (in + bytes > end) - return -1; + return -BCH_ERR_inode_unpack_error; p = (u8 *) be + 16 - bytes; memcpy(p, in, bytes); @@ -176,7 +179,7 @@ static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode, return ret; \ \ if (field_bits > sizeof(unpacked->_name) * 8) \ - return -1; \ + return -BCH_ERR_inode_unpack_error; \ \ unpacked->_name = field[1]; \ in += ret; @@ -217,7 +220,7 @@ static int bch2_inode_unpack_v2(struct bch_inode_unpacked *unpacked, \ unpacked->_name = v[0]; \ if (v[1] || v[0] != unpacked->_name) \ - return -1; \ + return -BCH_ERR_inode_unpack_error; \ fieldnr++; BCH_INODE_FIELDS_v2() @@ -268,7 +271,7 @@ static int bch2_inode_unpack_v3(struct bkey_s_c k, \ unpacked->_name = v[0]; \ if (v[1] || v[0] != unpacked->_name) \ - return -1; \ + return -BCH_ERR_inode_unpack_error; \ fieldnr++; BCH_INODE_FIELDS_v3() @@ -428,7 +431,7 @@ struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k) } static int __bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags) + struct bkey_validate_context from) { struct bch_inode_unpacked unpacked; int ret = 0; @@ -468,7 +471,7 @@ fsck_err: } int bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags) + struct bkey_validate_context from) { struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); int ret = 0; @@ -478,13 +481,13 @@ int bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, "invalid str hash type (%llu >= %u)", INODEv1_STR_HASH(inode.v), BCH_STR_HASH_NR); - ret = __bch2_inode_validate(c, k, flags); + ret = __bch2_inode_validate(c, k, from); fsck_err: return ret; } int bch2_inode_v2_validate(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags) + struct bkey_validate_context from) { struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k); int ret = 0; @@ -494,13 +497,13 @@ int bch2_inode_v2_validate(struct bch_fs *c, struct bkey_s_c k, "invalid str hash type (%llu >= %u)", INODEv2_STR_HASH(inode.v), BCH_STR_HASH_NR); - ret = __bch2_inode_validate(c, k, flags); + ret = __bch2_inode_validate(c, k, from); fsck_err: return ret; } int bch2_inode_v3_validate(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags) + struct bkey_validate_context from) { struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k); int ret = 0; @@ -518,7 +521,7 @@ int bch2_inode_v3_validate(struct bch_fs *c, struct bkey_s_c k, "invalid str hash type (%llu >= %u)", INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR); - ret = __bch2_inode_validate(c, k, flags); + ret = __bch2_inode_validate(c, k, from); fsck_err: return ret; } @@ -617,7 +620,7 @@ bch2_bkey_get_iter_snapshot_parent(struct btree_trans *trans, struct btree_iter struct bkey_s_c k; int ret = 0; - for_each_btree_key_upto_norestart(trans, *iter, btree, + for_each_btree_key_max_norestart(trans, *iter, btree, bpos_successor(pos), SPOS(pos.inode, pos.offset, U32_MAX), flags|BTREE_ITER_all_snapshots, k, ret) @@ -652,7 +655,7 @@ int __bch2_inode_has_child_snapshots(struct btree_trans *trans, struct bpos pos) struct bkey_s_c k; int ret = 0; - for_each_btree_key_upto_norestart(trans, iter, + for_each_btree_key_max_norestart(trans, iter, BTREE_ID_inodes, POS(0, pos.offset), bpos_predecessor(pos), BTREE_ITER_all_snapshots| BTREE_ITER_with_updates, k, ret) @@ -730,10 +733,9 @@ int bch2_trigger_inode(struct btree_trans *trans, bkey_s_to_inode_v3(new).v->bi_journal_seq = cpu_to_le64(trans->journal_res.seq); } - s64 nr = bkey_is_inode(new.k) - bkey_is_inode(old.k); - if ((flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) && nr) { - struct disk_accounting_pos acc = { .type = BCH_DISK_ACCOUNTING_nr_inodes }; - int ret = bch2_disk_accounting_mod(trans, &acc, &nr, 1, flags & BTREE_TRIGGER_gc); + s64 nr[1] = { bkey_is_inode(new.k) - bkey_is_inode(old.k) }; + if ((flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) && nr[0]) { + int ret = bch2_disk_accounting_mod2(trans, flags & BTREE_TRIGGER_gc, nr, nr_inodes); if (ret) return ret; } @@ -779,7 +781,7 @@ int bch2_trigger_inode(struct btree_trans *trans, } int bch2_inode_generation_validate(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags) + struct bkey_validate_context from) { int ret = 0; @@ -798,6 +800,28 @@ void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c, prt_printf(out, "generation: %u", le32_to_cpu(gen.v->bi_generation)); } +int bch2_inode_alloc_cursor_validate(struct bch_fs *c, struct bkey_s_c k, + struct bkey_validate_context from) +{ + int ret = 0; + + bkey_fsck_err_on(k.k->p.inode != LOGGED_OPS_INUM_inode_cursors, + c, inode_alloc_cursor_inode_bad, + "k.p.inode bad"); +fsck_err: + return ret; +} + +void bch2_inode_alloc_cursor_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) +{ + struct bkey_s_c_inode_alloc_cursor i = bkey_s_c_to_inode_alloc_cursor(k); + + prt_printf(out, "idx %llu generation %llu", + le64_to_cpu(i.v->idx), + le64_to_cpu(i.v->gen)); +} + void bch2_inode_init_early(struct bch_fs *c, struct bch_inode_unpacked *inode_u) { @@ -845,17 +869,54 @@ void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, uid, gid, mode, rdev, parent); } -static inline u32 bkey_generation(struct bkey_s_c k) +static struct bkey_i_inode_alloc_cursor * +bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *max) { - switch (k.k->type) { - case KEY_TYPE_inode: - case KEY_TYPE_inode_v2: - BUG(); - case KEY_TYPE_inode_generation: - return le32_to_cpu(bkey_s_c_to_inode_generation(k).v->bi_generation); - default: - return 0; + struct bch_fs *c = trans->c; + + u64 cursor_idx = c->opts.inodes_32bit ? 0 : cpu + 1; + + cursor_idx &= ~(~0ULL << c->opts.shard_inode_numbers_bits); + + struct btree_iter iter; + struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, + BTREE_ID_logged_ops, + POS(LOGGED_OPS_INUM_inode_cursors, cursor_idx), + BTREE_ITER_cached); + int ret = bkey_err(k); + if (ret) + return ERR_PTR(ret); + + struct bkey_i_inode_alloc_cursor *cursor = + k.k->type == KEY_TYPE_inode_alloc_cursor + ? bch2_bkey_make_mut_typed(trans, &iter, &k, 0, inode_alloc_cursor) + : bch2_bkey_alloc(trans, &iter, 0, inode_alloc_cursor); + ret = PTR_ERR_OR_ZERO(cursor); + if (ret) + goto err; + + if (c->opts.inodes_32bit) { + *min = BLOCKDEV_INODE_MAX; + *max = INT_MAX; + } else { + cursor->v.bits = c->opts.shard_inode_numbers_bits; + + unsigned bits = 63 - c->opts.shard_inode_numbers_bits; + + *min = max(cpu << bits, (u64) INT_MAX + 1); + *max = (cpu << bits) | ~(ULLONG_MAX << bits); + } + + if (le64_to_cpu(cursor->v.idx) < *min) + cursor->v.idx = cpu_to_le64(*min); + + if (le64_to_cpu(cursor->v.idx) >= *max) { + cursor->v.idx = cpu_to_le64(*min); + le32_add_cpu(&cursor->v.gen, 1); } +err: + bch2_trans_iter_exit(trans, &iter); + return ret ? ERR_PTR(ret) : cursor; } /* @@ -866,37 +927,22 @@ int bch2_inode_create(struct btree_trans *trans, struct bch_inode_unpacked *inode_u, u32 snapshot, u64 cpu) { - struct bch_fs *c = trans->c; - struct bkey_s_c k; - u64 min, max, start, pos, *hint; - int ret = 0; - unsigned bits = (c->opts.inodes_32bit ? 31 : 63); - - if (c->opts.shard_inode_numbers) { - bits -= c->inode_shard_bits; - - min = (cpu << bits); - max = (cpu << bits) | ~(ULLONG_MAX << bits); - - min = max_t(u64, min, BLOCKDEV_INODE_MAX); - hint = c->unused_inode_hints + cpu; - } else { - min = BLOCKDEV_INODE_MAX; - max = ~(ULLONG_MAX << bits); - hint = c->unused_inode_hints; - } - - start = READ_ONCE(*hint); + u64 min, max; + struct bkey_i_inode_alloc_cursor *cursor = + bch2_inode_alloc_cursor_get(trans, cpu, &min, &max); + int ret = PTR_ERR_OR_ZERO(cursor); + if (ret) + return ret; - if (start >= max || start < min) - start = min; + u64 start = le64_to_cpu(cursor->v.idx); + u64 pos = start; - pos = start; bch2_trans_iter_init(trans, iter, BTREE_ID_inodes, POS(0, pos), BTREE_ITER_all_snapshots| BTREE_ITER_intent); + struct bkey_s_c k; again: - while ((k = bch2_btree_iter_peek(iter)).k && + while ((k = bch2_btree_iter_peek(trans, iter)).k && !(ret = bkey_err(k)) && bkey_lt(k.k->p, POS(0, max))) { if (pos < iter->pos.offset) @@ -907,7 +953,7 @@ again: * we've found just one: */ pos = iter->pos.offset + 1; - bch2_btree_iter_set_pos(iter, POS(0, pos)); + bch2_btree_iter_set_pos(trans, iter, POS(0, pos)); } if (!ret && pos < max) @@ -923,20 +969,21 @@ again: /* Retry from start */ pos = start = min; - bch2_btree_iter_set_pos(iter, POS(0, pos)); + bch2_btree_iter_set_pos(trans, iter, POS(0, pos)); + le32_add_cpu(&cursor->v.gen, 1); goto again; found_slot: - bch2_btree_iter_set_pos(iter, SPOS(0, pos, snapshot)); - k = bch2_btree_iter_peek_slot(iter); + bch2_btree_iter_set_pos(trans, iter, SPOS(0, pos, snapshot)); + k = bch2_btree_iter_peek_slot(trans, iter); ret = bkey_err(k); if (ret) { bch2_trans_iter_exit(trans, iter); return ret; } - *hint = k.k->p.offset; inode_u->bi_inum = k.k->p.offset; - inode_u->bi_generation = bkey_generation(k); + inode_u->bi_generation = le64_to_cpu(cursor->v.gen); + cursor->v.idx = cpu_to_le64(k.k->p.offset + 1); return 0; } @@ -964,9 +1011,9 @@ static int bch2_inode_delete_keys(struct btree_trans *trans, if (ret) goto err; - bch2_btree_iter_set_snapshot(&iter, snapshot); + bch2_btree_iter_set_snapshot(trans, &iter, snapshot); - k = bch2_btree_iter_peek_upto(&iter, end); + k = bch2_btree_iter_peek_max(trans, &iter, end); ret = bkey_err(k); if (ret) goto err; @@ -997,26 +1044,28 @@ err: int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) { struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter = { NULL }; - struct bkey_i_inode_generation delete; - struct bch_inode_unpacked inode_u; + struct btree_iter iter = {}; struct bkey_s_c k; u32 snapshot; int ret; + ret = lockrestart_do(trans, may_delete_deleted_inum(trans, inum)); + if (ret) + goto err2; + /* * If this was a directory, there shouldn't be any real dirents left - * but there could be whiteouts (from hash collisions) that we should * delete: * - * XXX: the dirent could ideally would delete whiteouts when they're no + * XXX: the dirent code ideally would delete whiteouts when they're no * longer needed */ ret = bch2_inode_delete_keys(trans, inum, BTREE_ID_extents) ?: bch2_inode_delete_keys(trans, inum, BTREE_ID_xattrs) ?: bch2_inode_delete_keys(trans, inum, BTREE_ID_dirents); if (ret) - goto err; + goto err2; retry: bch2_trans_begin(trans); @@ -1035,17 +1084,11 @@ retry: bch2_fs_inconsistent(c, "inode %llu:%u not found when deleting", inum.inum, snapshot); - ret = -EIO; + ret = -BCH_ERR_ENOENT_inode; goto err; } - bch2_inode_unpack(k, &inode_u); - - bkey_inode_generation_init(&delete.k_i); - delete.k.p = iter.pos; - delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1); - - ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?: + ret = bch2_btree_delete_at(trans, &iter, 0) ?: bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); err: @@ -1141,12 +1184,18 @@ struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *inode) void bch2_inode_opts_get(struct bch_io_opts *opts, struct bch_fs *c, struct bch_inode_unpacked *inode) { -#define x(_name, _bits) opts->_name = inode_opt_get(c, inode, _name); +#define x(_name, _bits) \ + if ((inode)->bi_##_name) { \ + opts->_name = inode->bi_##_name - 1; \ + opts->_name##_from_inode = true; \ + } else { \ + opts->_name = c->opts._name; \ + opts->_name##_from_inode = false; \ + } BCH_INODE_OPTS() #undef x - if (opts->nocow) - opts->compression = opts->background_compression = opts->data_checksum = opts->erasure_code = 0; + bch2_io_opts_fixups(opts); } int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_io_opts *opts) @@ -1161,10 +1210,45 @@ int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_i return 0; } +int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum, + struct bch_inode_unpacked *bi, unsigned v) +{ + struct bch_fs *c = trans->c; + +#ifdef CONFIG_UNICODE + int ret = 0; + /* Not supported on individual files. */ + if (!S_ISDIR(bi->bi_mode)) + return -EOPNOTSUPP; + + /* + * Make sure the dir is empty, as otherwise we'd need to + * rehash everything and update the dirent keys. + */ + ret = bch2_empty_dir_trans(trans, inum); + if (ret < 0) + return ret; + + ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding); + if (ret) + return ret; + + bch2_check_set_feature(c, BCH_FEATURE_casefolding); + + bi->bi_casefold = v + 1; + bi->bi_fields_set |= BIT(Inode_opt_casefold); + + return 0; +#else + bch_err(c, "Cannot use casefolding on a kernel without CONFIG_UNICODE"); + return -EOPNOTSUPP; +#endif +} + static noinline int __bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot) { struct bch_fs *c = trans->c; - struct btree_iter iter = { NULL }; + struct btree_iter iter = {}; struct bkey_i_inode_generation delete; struct bch_inode_unpacked inode_u; struct bkey_s_c k; @@ -1199,7 +1283,7 @@ retry: bch2_fs_inconsistent(c, "inode %llu:%u not found when deleting", inum, snapshot); - ret = -EIO; + ret = -BCH_ERR_ENOENT_inode; goto err; } @@ -1263,10 +1347,8 @@ int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot) delete_ancestor_snapshot_inodes(trans, SPOS(0, inum, snapshot)); } -static int may_delete_deleted_inode(struct btree_trans *trans, - struct btree_iter *iter, - struct bpos pos, - bool *need_another_pass) +static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos, + bool from_deleted_inodes) { struct bch_fs *c = trans->c; struct btree_iter inode_iter; @@ -1281,11 +1363,13 @@ static int may_delete_deleted_inode(struct btree_trans *trans, return ret; ret = bkey_is_inode(k.k) ? 0 : -BCH_ERR_ENOENT_inode; - if (fsck_err_on(!bkey_is_inode(k.k), + if (fsck_err_on(from_deleted_inodes && ret, trans, deleted_inode_missing, "nonexistent inode %llu:%u in deleted_inodes btree", pos.offset, pos.snapshot)) goto delete; + if (ret) + goto out; ret = bch2_inode_unpack(k, &inode); if (ret) @@ -1293,7 +1377,8 @@ static int may_delete_deleted_inode(struct btree_trans *trans, if (S_ISDIR(inode.bi_mode)) { ret = bch2_empty_dir_snapshot(trans, pos.offset, 0, pos.snapshot); - if (fsck_err_on(bch2_err_matches(ret, ENOTEMPTY), + if (fsck_err_on(from_deleted_inodes && + bch2_err_matches(ret, ENOTEMPTY), trans, deleted_inode_is_dir, "non empty directory %llu:%u in deleted_inodes btree", pos.offset, pos.snapshot)) @@ -1302,17 +1387,25 @@ static int may_delete_deleted_inode(struct btree_trans *trans, goto out; } - if (fsck_err_on(!(inode.bi_flags & BCH_INODE_unlinked), + ret = inode.bi_flags & BCH_INODE_unlinked ? 0 : -BCH_ERR_inode_not_unlinked; + if (fsck_err_on(from_deleted_inodes && ret, trans, deleted_inode_not_unlinked, "non-deleted inode %llu:%u in deleted_inodes btree", pos.offset, pos.snapshot)) goto delete; + if (ret) + goto out; + + ret = !(inode.bi_flags & BCH_INODE_has_child_snapshot) + ? 0 : -BCH_ERR_inode_has_child_snapshot; - if (fsck_err_on(inode.bi_flags & BCH_INODE_has_child_snapshot, + if (fsck_err_on(from_deleted_inodes && ret, trans, deleted_inode_has_child_snapshots, "inode with child snapshots %llu:%u in deleted_inodes btree", pos.offset, pos.snapshot)) goto delete; + if (ret) + goto out; ret = bch2_inode_has_child_snapshots(trans, k.k->p); if (ret < 0) @@ -1329,19 +1422,28 @@ static int may_delete_deleted_inode(struct btree_trans *trans, if (ret) goto out; } + + if (!from_deleted_inodes) { + ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: + -BCH_ERR_inode_has_child_snapshot; + goto out; + } + goto delete; } - if (test_bit(BCH_FS_clean_recovery, &c->flags) && - !fsck_err(trans, deleted_inode_but_clean, - "filesystem marked as clean but have deleted inode %llu:%u", - pos.offset, pos.snapshot)) { - ret = 0; - goto out; - } + if (from_deleted_inodes) { + if (test_bit(BCH_FS_clean_recovery, &c->flags) && + !fsck_err(trans, deleted_inode_but_clean, + "filesystem marked as clean but have deleted inode %llu:%u", + pos.offset, pos.snapshot)) { + ret = 0; + goto out; + } - ret = 1; + ret = 1; + } out: fsck_err: bch2_trans_iter_exit(trans, &inode_iter); @@ -1352,12 +1454,19 @@ delete: goto out; } +static int may_delete_deleted_inum(struct btree_trans *trans, subvol_inum inum) +{ + u32 snapshot; + + return bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot) ?: + may_delete_deleted_inode(trans, SPOS(0, inum.inum, snapshot), false); +} + int bch2_delete_dead_inodes(struct bch_fs *c) { struct btree_trans *trans = bch2_trans_get(c); - bool need_another_pass; int ret; -again: + /* * if we ran check_inodes() unlinked inodes will have already been * cleaned up but the write buffer will be out of sync; therefore we @@ -1367,8 +1476,6 @@ again: if (ret) goto err; - need_another_pass = false; - /* * Weird transaction restart handling here because on successful delete, * bch2_inode_rm_snapshot() will return a nested transaction restart, @@ -1378,9 +1485,10 @@ again: ret = for_each_btree_key_commit(trans, iter, BTREE_ID_deleted_inodes, POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ - ret = may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass); + ret = may_delete_deleted_inode(trans, k.k->p, true); if (ret > 0) { - bch_verbose(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot); + bch_verbose_ratelimited(c, "deleting unlinked inode %llu:%u", + k.k->p.offset, k.k->p.snapshot); ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot); /* @@ -1398,9 +1506,6 @@ again: ret; })); - - if (!ret && need_another_pass) - goto again; err: bch2_trans_put(trans); return ret; |