diff options
Diffstat (limited to 'fs')
75 files changed, 1219 insertions, 852 deletions
diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig index c08c2c7d6fbb..fddc7be58022 100644 --- a/fs/bcachefs/Kconfig +++ b/fs/bcachefs/Kconfig @@ -33,6 +33,18 @@ config BCACHEFS_QUOTA depends on BCACHEFS_FS select QUOTACTL +config BCACHEFS_ERASURE_CODING + bool "bcachefs erasure coding (RAID5/6) support (EXPERIMENTAL)" + depends on BCACHEFS_FS + select QUOTACTL + help + This enables the "erasure_code" filesysystem and inode option, which + organizes data into reed-solomon stripes instead of ordinary + replication. + + WARNING: this feature is still undergoing on disk format changes, and + should only be enabled for testing purposes. + config BCACHEFS_POSIX_ACL bool "bcachefs POSIX ACL support" depends on BCACHEFS_FS diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index b85c7765272f..1ba0eeb7552a 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1297,6 +1297,30 @@ out: return wp; } +static noinline void +deallocate_extra_replicas(struct bch_fs *c, + struct open_buckets *ptrs, + struct open_buckets *ptrs_no_use, + unsigned extra_replicas) +{ + struct open_buckets ptrs2 = { 0 }; + struct open_bucket *ob; + unsigned i; + + open_bucket_for_each(c, ptrs, ob, i) { + unsigned d = bch_dev_bkey_exists(c, ob->dev)->mi.durability; + + if (d && d <= extra_replicas) { + extra_replicas -= d; + ob_push(c, ptrs_no_use, ob); + } else { + ob_push(c, &ptrs2, ob); + } + } + + *ptrs = ptrs2; +} + /* * Get us an open_bucket we can allocate from, return with it locked: */ @@ -1321,6 +1345,9 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans, int ret; int i; + if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING)) + erasure_code = false; + BUG_ON(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS); BUG_ON(!nr_replicas || !nr_replicas_required); @@ -1382,6 +1409,9 @@ alloc_done: if (ret) goto err; + if (nr_effective > nr_replicas) + deallocate_extra_replicas(c, &ptrs, &wp->ptrs, nr_effective - nr_replicas); + /* Free buckets we didn't use: */ open_bucket_for_each(c, &wp->ptrs, ob, i) open_bucket_free_unused(c, ob); diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 403aa3389fcc..dfa22f9d9a1d 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -638,6 +638,8 @@ struct journal_keys { size_t gap; size_t nr; size_t size; + atomic_t ref; + bool initial_ref_held; }; struct btree_trans_buf { @@ -929,7 +931,7 @@ struct bch_fs { mempool_t compression_bounce[2]; mempool_t compress_workspace[BCH_COMPRESSION_TYPE_NR]; mempool_t decompress_workspace; - ZSTD_parameters zstd_params; + size_t zstd_workspace_size; struct crypto_shash *sha256; struct crypto_sync_skcipher *chacha20; diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 0a750953ff92..1ab1f08d763b 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -151,7 +151,11 @@ struct bpos { #else #error edit for your odd byteorder. #endif -} __packed __aligned(4); +} __packed +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +__aligned(4) +#endif +; #define KEY_INODE_MAX ((__u64)~0ULL) #define KEY_OFFSET_MAX ((__u64)~0ULL) @@ -1528,7 +1532,7 @@ struct bch_sb_field_disk_groups { x(move_extent_write, 36) \ x(move_extent_finish, 37) \ x(move_extent_fail, 38) \ - x(move_extent_alloc_mem_fail, 39) \ + x(move_extent_start_fail, 39) \ x(copygc, 40) \ x(copygc_wait, 41) \ x(gc_gens_end, 42) \ diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 0b5d09c8475d..30ab78a24517 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1541,8 +1541,8 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only) rcu_assign_pointer(ca->buckets_gc, buckets); } - for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, - BTREE_ITER_PREFETCH, k, ret) { + ret = for_each_btree_key2(trans, iter, BTREE_ID_alloc, POS_MIN, + BTREE_ITER_PREFETCH, k, ({ ca = bch_dev_bkey_exists(c, k.k->p.inode); g = gc_bucket(ca, k.k->p.offset); @@ -1561,8 +1561,9 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only) g->stripe = a->stripe; g->stripe_redundancy = a->stripe_redundancy; } - } - bch2_trans_iter_exit(trans, &iter); + + 0; + })); err: bch2_trans_put(trans); if (ret) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 37d896edb06e..57c20390e10e 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1358,10 +1358,9 @@ static bool btree_node_has_extra_bsets(struct bch_fs *c, unsigned offset, void * return offset; } -static void btree_node_read_all_replicas_done(struct closure *cl) +static CLOSURE_CALLBACK(btree_node_read_all_replicas_done) { - struct btree_node_read_all *ra = - container_of(cl, struct btree_node_read_all, cl); + closure_type(ra, struct btree_node_read_all, cl); struct bch_fs *c = ra->c; struct btree *b = ra->b; struct printbuf buf = PRINTBUF; @@ -1567,7 +1566,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool if (sync) { closure_sync(&ra->cl); - btree_node_read_all_replicas_done(&ra->cl); + btree_node_read_all_replicas_done(&ra->cl.work); } else { continue_at(&ra->cl, btree_node_read_all_replicas_done, c->io_complete_wq); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 6fa90bcd7016..8e0fe65f6101 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2981,7 +2981,8 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) trans->fn_idx = fn_idx; trans->locking_wait.task = current; trans->journal_replay_not_finished = - !test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags); + unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) && + atomic_inc_not_zero(&c->journal_keys.ref); closure_init_stack(&trans->ref); s = btree_trans_stats(trans); @@ -3098,6 +3099,9 @@ void bch2_trans_put(struct btree_trans *trans) kfree(trans->fs_usage_deltas); } + if (unlikely(trans->journal_replay_not_finished)) + bch2_journal_keys_put(c); + if (trans->mem_bytes == BTREE_TRANS_MEM_MAX) mempool_free(trans->mem, &c->btree_trans_mem_pool); else diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index 58a981bcf3aa..ec52f50d249d 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -80,6 +80,8 @@ struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree struct journal_keys *keys = &c->journal_keys; unsigned iters = 0; struct journal_key *k; + + BUG_ON(*idx > keys->nr); search: if (!*idx) *idx = __bch2_journal_key_search(keys, btree_id, level, pos); @@ -189,10 +191,12 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, /* Since @keys was full, there was no gap: */ memcpy(new_keys.d, keys->d, sizeof(keys->d[0]) * keys->nr); kvfree(keys->d); - *keys = new_keys; + keys->d = new_keys.d; + keys->nr = new_keys.nr; + keys->size = new_keys.size; /* And now the gap is at the end: */ - keys->gap = keys->nr; + keys->gap = keys->nr; } journal_iters_move_gap(c, keys->gap, idx); @@ -415,10 +419,16 @@ static int journal_sort_key_cmp(const void *_l, const void *_r) cmp_int(l->journal_offset, r->journal_offset); } -void bch2_journal_keys_free(struct journal_keys *keys) +void bch2_journal_keys_put(struct bch_fs *c) { + struct journal_keys *keys = &c->journal_keys; struct journal_key *i; + BUG_ON(atomic_read(&keys->ref) <= 0); + + if (!atomic_dec_and_test(&keys->ref)) + return; + move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr); keys->gap = keys->nr; @@ -429,6 +439,8 @@ void bch2_journal_keys_free(struct journal_keys *keys) kvfree(keys->d); keys->d = NULL; keys->nr = keys->gap = keys->size = 0; + + bch2_journal_entries_free(c); } static void __journal_keys_sort(struct journal_keys *keys) diff --git a/fs/bcachefs/btree_journal_iter.h b/fs/bcachefs/btree_journal_iter.h index 5d64e7e22f26..8ca4c100b2e3 100644 --- a/fs/bcachefs/btree_journal_iter.h +++ b/fs/bcachefs/btree_journal_iter.h @@ -49,7 +49,15 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *, struct bch_fs *, struct btree *); -void bch2_journal_keys_free(struct journal_keys *); +void bch2_journal_keys_put(struct bch_fs *); + +static inline void bch2_journal_keys_put_initial(struct bch_fs *c) +{ + if (c->journal_keys.initial_ref_held) + bch2_journal_keys_put(c); + c->journal_keys.initial_ref_held = false; +} + void bch2_journal_entries_free(struct bch_fs *); int bch2_journal_keys_sort(struct bch_fs *); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 76f27bc9fa24..6697417273aa 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -778,9 +778,9 @@ static void btree_interior_update_work(struct work_struct *work) } } -static void btree_update_set_nodes_written(struct closure *cl) +static CLOSURE_CALLBACK(btree_update_set_nodes_written) { - struct btree_update *as = container_of(cl, struct btree_update, cl); + closure_type(as, struct btree_update, cl); struct bch_fs *c = as->c; mutex_lock(&c->btree_interior_update_lock); @@ -1071,8 +1071,12 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, break; } + /* + * Always check for space for two keys, even if we won't have to + * split at prior level - it might have been a merge instead: + */ if (bch2_btree_node_insert_fits(c, path->l[update_level].b, - BKEY_BTREE_PTR_U64s_MAX * (1 + split))) + BKEY_BTREE_PTR_U64s_MAX * 2)) break; split = path->l[update_level].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c); @@ -2266,6 +2270,10 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *trans, BUG_ON(!btree_node_hashed(b)); + struct bch_extent_ptr *ptr; + bch2_bkey_drop_ptrs(bkey_i_to_s(new_key), ptr, + !bch2_bkey_has_device(bkey_i_to_s(&b->key), ptr->dev)); + ret = bch2_btree_node_update_key(trans, &iter, b, new_key, commit_flags, skip_triggers); out: diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 58d8c6ffd955..5a91d3189fcf 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -854,8 +854,12 @@ static int __mark_pointer(struct btree_trans *trans, return ret; *dst_sectors += sectors; - *bucket_data_type = *dirty_sectors || *cached_sectors - ? ptr_data_type : 0; + + if (!*dirty_sectors && !*cached_sectors) + *bucket_data_type = 0; + else if (*bucket_data_type != BCH_DATA_stripe) + *bucket_data_type = ptr_data_type; + return 0; } @@ -2091,8 +2095,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) bucket_gens->first_bucket = ca->mi.first_bucket; bucket_gens->nbuckets = nbuckets; - bch2_copygc_stop(c); - if (resize) { down_write(&c->gc_lock); down_write(&ca->bucket_lock); diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c index a8b148ec2a2b..51af8ea230ed 100644 --- a/fs/bcachefs/compress.c +++ b/fs/bcachefs/compress.c @@ -354,8 +354,7 @@ static int attempt_compress(struct bch_fs *c, */ unsigned level = min((compression.level * 3) / 2, zstd_max_clevel()); ZSTD_parameters params = zstd_get_params(level, c->opts.encoded_extent_max); - ZSTD_CCtx *ctx = zstd_init_cctx(workspace, - zstd_cctx_workspace_bound(¶ms.cParams)); + ZSTD_CCtx *ctx = zstd_init_cctx(workspace, c->zstd_workspace_size); /* * ZSTD requires that when we decompress we pass in the exact @@ -371,7 +370,7 @@ static int attempt_compress(struct bch_fs *c, size_t len = zstd_compress_cctx(ctx, dst + 4, dst_len - 4 - 7, src, src_len, - &c->zstd_params); + ¶ms); if (zstd_is_error(len)) return 0; @@ -572,6 +571,13 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) size_t decompress_workspace_size = 0; ZSTD_parameters params = zstd_get_params(zstd_max_clevel(), c->opts.encoded_extent_max); + + /* + * ZSTD is lying: if we allocate the size of the workspace it says it + * requires, it returns memory allocation errors + */ + c->zstd_workspace_size = zstd_cctx_workspace_bound(¶ms.cParams); + struct { unsigned feature; enum bch_compression_type type; @@ -585,13 +591,11 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL), zlib_inflate_workspacesize(), }, { BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd, - zstd_cctx_workspace_bound(¶ms.cParams), + c->zstd_workspace_size, zstd_dctx_workspace_bound() }, }, *i; bool have_compressed = false; - c->zstd_params = params; - for (i = compression_types; i < compression_types + ARRAY_SIZE(compression_types); i++) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 5ed66202c226..71aa5e59787b 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -356,7 +356,7 @@ void bch2_data_update_exit(struct data_update *update) bch2_bio_free_pages_pool(c, &update->op.wbio.bio); } -void bch2_update_unwritten_extent(struct btree_trans *trans, +static void bch2_update_unwritten_extent(struct btree_trans *trans, struct data_update *update) { struct bch_fs *c = update->op.c; @@ -436,7 +436,51 @@ void bch2_update_unwritten_extent(struct btree_trans *trans, } } +int bch2_extent_drop_ptrs(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k, + struct data_update_opts data_opts) +{ + struct bch_fs *c = trans->c; + struct bkey_i *n; + int ret; + + n = bch2_bkey_make_mut_noupdate(trans, k); + ret = PTR_ERR_OR_ZERO(n); + if (ret) + return ret; + + while (data_opts.kill_ptrs) { + unsigned i = 0, drop = __fls(data_opts.kill_ptrs); + struct bch_extent_ptr *ptr; + + bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop); + data_opts.kill_ptrs ^= 1U << drop; + } + + /* + * If the new extent no longer has any pointers, bch2_extent_normalize() + * will do the appropriate thing with it (turning it into a + * KEY_TYPE_error key, or just a discard if it was a cached extent) + */ + bch2_extent_normalize(c, bkey_i_to_s(n)); + + /* + * Since we're not inserting through an extent iterator + * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators), + * we aren't using the extent overwrite path to delete, we're + * just using the normal key deletion path: + */ + if (bkey_deleted(&n->k)) + n->k.size = 0; + + return bch2_trans_relock(trans) ?: + bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: + bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); +} + int bch2_data_update_init(struct btree_trans *trans, + struct btree_iter *iter, struct moving_context *ctxt, struct data_update *m, struct write_point_specifier wp, @@ -452,7 +496,7 @@ int bch2_data_update_init(struct btree_trans *trans, const struct bch_extent_ptr *ptr; unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas; unsigned ptrs_locked = 0; - int ret; + int ret = 0; bch2_bkey_buf_init(&m->k); bch2_bkey_buf_reassemble(&m->k, c, k); @@ -478,6 +522,8 @@ int bch2_data_update_init(struct btree_trans *trans, bkey_for_each_ptr(ptrs, ptr) percpu_ref_get(&bch_dev_bkey_exists(c, ptr->dev)->ref); + unsigned durability_have = 0, durability_removing = 0; + i = 0; bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { bool locked; @@ -489,8 +535,11 @@ int bch2_data_update_init(struct btree_trans *trans, reserve_sectors += k.k->size; m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p); - } else if (!p.ptr.cached) { + durability_removing += bch2_extent_ptr_desired_durability(c, &p); + } else if (!p.ptr.cached && + !((1U << i) & m->data_opts.kill_ptrs)) { bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev); + durability_have += bch2_extent_ptr_durability(c, &p); } /* @@ -529,6 +578,29 @@ int bch2_data_update_init(struct btree_trans *trans, i++; } + /* + * If current extent durability is less than io_opts.data_replicas, + * we're not trying to rereplicate the extent up to data_replicas here - + * unless extra_replicas was specified + * + * Increasing replication is an explicit operation triggered by + * rereplicate, currently, so that users don't get an unexpected -ENOSPC + */ + if (durability_have >= io_opts.data_replicas) { + m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs; + m->data_opts.rewrite_ptrs = 0; + /* if iter == NULL, it's just a promote */ + if (iter) + ret = bch2_extent_drop_ptrs(trans, iter, k, data_opts); + goto done; + } + + m->op.nr_replicas = min(durability_removing, io_opts.data_replicas - durability_have) + + m->data_opts.extra_replicas; + m->op.nr_replicas_required = m->op.nr_replicas; + + BUG_ON(!m->op.nr_replicas); + if (reserve_sectors) { ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors, m->data_opts.extra_replicas @@ -538,14 +610,11 @@ int bch2_data_update_init(struct btree_trans *trans, goto err; } - m->op.nr_replicas += m->data_opts.extra_replicas; - m->op.nr_replicas_required = m->op.nr_replicas; - - BUG_ON(!m->op.nr_replicas); + if (bkey_extent_is_unwritten(k)) { + bch2_update_unwritten_extent(trans, m); + goto done; + } - /* Special handling required: */ - if (bkey_extent_is_unwritten(k)) - return -BCH_ERR_unwritten_extent_update; return 0; err: i = 0; @@ -560,6 +629,9 @@ err: bch2_bkey_buf_exit(&m->k, c); bch2_bio_free_pages_pool(c, &m->op.wbio.bio); return ret; +done: + bch2_data_update_exit(m); + return ret ?: -BCH_ERR_data_update_done; } void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts) diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h index 9dc17b9d8379..991095bbd469 100644 --- a/fs/bcachefs/data_update.h +++ b/fs/bcachefs/data_update.h @@ -32,9 +32,14 @@ int bch2_data_update_index_update(struct bch_write_op *); void bch2_data_update_read_done(struct data_update *, struct bch_extent_crc_unpacked); +int bch2_extent_drop_ptrs(struct btree_trans *, + struct btree_iter *, + struct bkey_s_c, + struct data_update_opts); + void bch2_data_update_exit(struct data_update *); -void bch2_update_unwritten_extent(struct btree_trans *, struct data_update *); -int bch2_data_update_init(struct btree_trans *, struct moving_context *, +int bch2_data_update_init(struct btree_trans *, struct btree_iter *, + struct moving_context *, struct data_update *, struct write_point_specifier, struct bch_io_opts, struct data_update_opts, diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 68a1a96bb7ca..ae7910bf2228 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -162,7 +162,7 @@ x(BCH_ERR_fsck, fsck_repair_unimplemented) \ x(BCH_ERR_fsck, fsck_repair_impossible) \ x(0, restart_recovery) \ - x(0, unwritten_extent_update) \ + x(0, data_update_done) \ x(EINVAL, device_state_not_allowed) \ x(EINVAL, member_info_missing) \ x(EINVAL, mismatched_block_size) \ @@ -210,6 +210,7 @@ x(BCH_ERR_invalid_sb, invalid_sb_members) \ x(BCH_ERR_invalid_sb, invalid_sb_disk_groups) \ x(BCH_ERR_invalid_sb, invalid_sb_replicas) \ + x(BCH_ERR_invalid_sb, invalid_replicas_entry) \ x(BCH_ERR_invalid_sb, invalid_sb_journal) \ x(BCH_ERR_invalid_sb, invalid_sb_journal_seq_blacklist) \ x(BCH_ERR_invalid_sb, invalid_sb_crypt) \ diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index a864de231b69..f6c92df55270 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -649,37 +649,31 @@ unsigned bch2_bkey_replicas(struct bch_fs *c, struct bkey_s_c k) return replicas; } -unsigned bch2_extent_ptr_desired_durability(struct bch_fs *c, struct extent_ptr_decoded *p) +static inline unsigned __extent_ptr_durability(struct bch_dev *ca, struct extent_ptr_decoded *p) { - struct bch_dev *ca; - if (p->ptr.cached) return 0; - ca = bch_dev_bkey_exists(c, p->ptr.dev); - - return ca->mi.durability + - (p->has_ec - ? p->ec.redundancy - : 0); + return p->has_ec + ? p->ec.redundancy + 1 + : ca->mi.durability; } -unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded *p) +unsigned bch2_extent_ptr_desired_durability(struct bch_fs *c, struct extent_ptr_decoded *p) { - struct bch_dev *ca; + struct bch_dev *ca = bch_dev_bkey_exists(c, p->ptr.dev); - if (p->ptr.cached) - return 0; + return __extent_ptr_durability(ca, p); +} - ca = bch_dev_bkey_exists(c, p->ptr.dev); +unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded *p) +{ + struct bch_dev *ca = bch_dev_bkey_exists(c, p->ptr.dev); if (ca->mi.state == BCH_MEMBER_STATE_failed) return 0; - return ca->mi.durability + - (p->has_ec - ? p->ec.redundancy - : 0); + return __extent_ptr_durability(ca, p); } unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k) diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c index 5b42a76c4796..9a479e4de6b3 100644 --- a/fs/bcachefs/fs-io-direct.c +++ b/fs/bcachefs/fs-io-direct.c @@ -35,9 +35,9 @@ static void bio_check_or_release(struct bio *bio, bool check_dirty) } } -static void bch2_dio_read_complete(struct closure *cl) +static CLOSURE_CALLBACK(bch2_dio_read_complete) { - struct dio_read *dio = container_of(cl, struct dio_read, cl); + closure_type(dio, struct dio_read, cl); dio->req->ki_complete(dio->req, dio->ret); bio_check_or_release(&dio->rbio.bio, dio->should_dirty); @@ -325,9 +325,9 @@ static noinline int bch2_dio_write_copy_iov(struct dio_write *dio) return 0; } -static void bch2_dio_write_flush_done(struct closure *cl) +static CLOSURE_CALLBACK(bch2_dio_write_flush_done) { - struct dio_write *dio = container_of(cl, struct dio_write, op.cl); + closure_type(dio, struct dio_write, op.cl); struct bch_fs *c = dio->op.c; closure_debug_destroy(cl); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 8ef817304e4a..4d51be813509 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1667,8 +1667,7 @@ static int bch2_show_devname(struct seq_file *seq, struct dentry *root) if (!first) seq_putc(seq, ':'); first = false; - seq_puts(seq, "/dev/"); - seq_puts(seq, ca->name); + seq_puts(seq, ca->disk_sb.sb_name); } return 0; diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index a56ed553dc15..36763865facd 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -209,7 +209,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, bio = &op->write.op.wbio.bio; bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0); - ret = bch2_data_update_init(trans, NULL, &op->write, + ret = bch2_data_update_init(trans, NULL, NULL, &op->write, writepoint_hashed((unsigned long) current), opts, (struct data_update_opts) { diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index d704a8f829c8..8ede46b1e354 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -580,9 +580,9 @@ static inline void wp_update_state(struct write_point *wp, bool running) __wp_update_state(wp, state); } -static void bch2_write_index(struct closure *cl) +static CLOSURE_CALLBACK(bch2_write_index) { - struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); + closure_type(op, struct bch_write_op, cl); struct write_point *wp = op->wp; struct workqueue_struct *wq = index_update_wq(op); unsigned long flags; @@ -1208,9 +1208,9 @@ static void __bch2_nocow_write_done(struct bch_write_op *op) bch2_nocow_write_convert_unwritten(op); } -static void bch2_nocow_write_done(struct closure *cl) +static CLOSURE_CALLBACK(bch2_nocow_write_done) { - struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); + closure_type(op, struct bch_write_op, cl); __bch2_nocow_write_done(op); bch2_write_done(cl); @@ -1363,7 +1363,7 @@ err: op->insert_keys.top = op->insert_keys.keys; } else if (op->flags & BCH_WRITE_SYNC) { closure_sync(&op->cl); - bch2_nocow_write_done(&op->cl); + bch2_nocow_write_done(&op->cl.work); } else { /* * XXX @@ -1566,9 +1566,9 @@ err: * If op->discard is true, instead of inserting the data it invalidates the * region of the cache represented by op->bio and op->inode. */ -void bch2_write(struct closure *cl) +CLOSURE_CALLBACK(bch2_write) { - struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); + closure_type(op, struct bch_write_op, cl); struct bio *bio = &op->wbio.bio; struct bch_fs *c = op->c; unsigned data_len; diff --git a/fs/bcachefs/io_write.h b/fs/bcachefs/io_write.h index 9323167229ee..6c276a48f95d 100644 --- a/fs/bcachefs/io_write.h +++ b/fs/bcachefs/io_write.h @@ -90,8 +90,7 @@ static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c, op->devs_need_flush = NULL; } -void bch2_write(struct closure *); - +CLOSURE_CALLBACK(bch2_write); void bch2_write_point_do_index_updates(struct work_struct *); static inline struct bch_write_bio *wbio_init(struct bio *bio) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 23a9b7845d11..489b34046e78 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -321,6 +321,8 @@ static int journal_entry_open(struct journal *j) atomic64_inc(&j->seq); journal_pin_list_init(fifo_push_ref(&j->pin), 1); + BUG_ON(j->pin.back - 1 != atomic64_read(&j->seq)); + BUG_ON(j->buf + (journal_cur_seq(j) & JOURNAL_BUF_MASK) != buf); bkey_extent_init(&buf->key); diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index c85d01cf4948..4c513fca5ef2 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -136,9 +136,7 @@ static inline u64 journal_last_seq(struct journal *j) static inline u64 journal_cur_seq(struct journal *j) { - EBUG_ON(j->pin.back - 1 != atomic64_read(&j->seq)); - - return j->pin.back - 1; + return atomic64_read(&j->seq); } static inline u64 journal_last_unwritten_seq(struct journal *j) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 786a09285509..0f17fc5f8d68 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -547,6 +547,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c, struct jset_entry_data_usage *u = container_of(entry, struct jset_entry_data_usage, entry); unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64); + struct printbuf err = PRINTBUF; int ret = 0; if (journal_entry_err_on(bytes < sizeof(*u) || @@ -555,10 +556,19 @@ static int journal_entry_data_usage_validate(struct bch_fs *c, journal_entry_data_usage_bad_size, "invalid journal entry usage: bad size")) { journal_entry_null_range(entry, vstruct_next(entry)); - return ret; + goto out; } + if (journal_entry_err_on(bch2_replicas_entry_validate(&u->r, c->disk_sb.sb, &err), + c, version, jset, entry, + journal_entry_data_usage_bad_size, + "invalid journal entry usage: %s", err.buf)) { + journal_entry_null_range(entry, vstruct_next(entry)); + goto out; + } +out: fsck_err: + printbuf_exit(&err); return ret; } @@ -1025,10 +1035,9 @@ next_block: return 0; } -static void bch2_journal_read_device(struct closure *cl) +static CLOSURE_CALLBACK(bch2_journal_read_device) { - struct journal_device *ja = - container_of(cl, struct journal_device, read); + closure_type(ja, struct journal_device, read); struct bch_dev *ca = container_of(ja, struct bch_dev, journal); struct bch_fs *c = ca->fs; struct journal_list *jlist = @@ -1520,9 +1529,9 @@ static inline struct journal_buf *journal_last_unwritten_buf(struct journal *j) return j->buf + (journal_last_unwritten_seq(j) & JOURNAL_BUF_MASK); } -static void journal_write_done(struct closure *cl) +static CLOSURE_CALLBACK(journal_write_done) { - struct journal *j = container_of(cl, struct journal, io); + closure_type(j, struct journal, io); struct bch_fs *c = container_of(j, struct bch_fs, journal); struct journal_buf *w = journal_last_unwritten_buf(j); struct bch_replicas_padded replicas; @@ -1638,9 +1647,9 @@ static void journal_write_endio(struct bio *bio) percpu_ref_put(&ca->io_ref); } -static void do_journal_write(struct closure *cl) +static CLOSURE_CALLBACK(do_journal_write) { - struct journal *j = container_of(cl, struct journal, io); + closure_type(j, struct journal, io); struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_dev *ca; struct journal_buf *w = journal_last_unwritten_buf(j); @@ -1850,9 +1859,9 @@ static int bch2_journal_write_pick_flush(struct journal *j, struct journal_buf * return 0; } -void bch2_journal_write(struct closure *cl) +CLOSURE_CALLBACK(bch2_journal_write) { - struct journal *j = container_of(cl, struct journal, io); + closure_type(j, struct journal, io); struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_dev *ca; struct journal_buf *w = journal_last_unwritten_buf(j); diff --git a/fs/bcachefs/journal_io.h b/fs/bcachefs/journal_io.h index a88d097b13f1..c035e7c108e1 100644 --- a/fs/bcachefs/journal_io.h +++ b/fs/bcachefs/journal_io.h @@ -60,6 +60,6 @@ void bch2_journal_ptrs_to_text(struct printbuf *, struct bch_fs *, int bch2_journal_read(struct bch_fs *, u64 *, u64 *, u64 *); -void bch2_journal_write(struct closure *); +CLOSURE_CALLBACK(bch2_journal_write); #endif /* _BCACHEFS_JOURNAL_IO_H */ diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index ab749bf2fcbc..54830ee0ed88 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -49,17 +49,6 @@ static void trace_move_extent_read2(struct bch_fs *c, struct bkey_s_c k) } } -static void trace_move_extent_alloc_mem_fail2(struct bch_fs *c, struct bkey_s_c k) -{ - if (trace_move_extent_alloc_mem_fail_enabled()) { - struct printbuf buf = PRINTBUF; - - bch2_bkey_val_to_text(&buf, c, k); - trace_move_extent_alloc_mem_fail(c, buf.buf); - printbuf_exit(&buf); - } -} - struct moving_io { struct list_head read_list; struct list_head io_list; @@ -163,12 +152,18 @@ void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt) atomic_read(&ctxt->write_sectors) != sectors_pending); } +static void bch2_moving_ctxt_flush_all(struct moving_context *ctxt) +{ + move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads)); + bch2_trans_unlock_long(ctxt->trans); + closure_sync(&ctxt->cl); +} + void bch2_moving_ctxt_exit(struct moving_context *ctxt) { struct bch_fs *c = ctxt->trans->c; - move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads)); - closure_sync(&ctxt->cl); + bch2_moving_ctxt_flush_all(ctxt); EBUG_ON(atomic_read(&ctxt->write_sectors)); EBUG_ON(atomic_read(&ctxt->write_ios)); @@ -223,49 +218,6 @@ void bch2_move_stats_init(struct bch_move_stats *stats, char *name) scnprintf(stats->name, sizeof(stats->name), "%s", name); } -static int bch2_extent_drop_ptrs(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c k, - struct data_update_opts data_opts) -{ - struct bch_fs *c = trans->c; - struct bkey_i *n; - int ret; - - n = bch2_bkey_make_mut_noupdate(trans, k); - ret = PTR_ERR_OR_ZERO(n); - if (ret) - return ret; - - while (data_opts.kill_ptrs) { - unsigned i = 0, drop = __fls(data_opts.kill_ptrs); - struct bch_extent_ptr *ptr; - - bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop); - data_opts.kill_ptrs ^= 1U << drop; - } - - /* - * If the new extent no longer has any pointers, bch2_extent_normalize() - * will do the appropriate thing with it (turning it into a - * KEY_TYPE_error key, or just a discard if it was a cached extent) - */ - bch2_extent_normalize(c, bkey_i_to_s(n)); - - /* - * Since we're not inserting through an extent iterator - * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators), - * we aren't using the extent overwrite path to delete, we're - * just using the normal key deletion path: - */ - if (bkey_deleted(&n->k)) - n->k.size = 0; - - return bch2_trans_relock(trans) ?: - bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: - bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); -} - int bch2_move_extent(struct moving_context *ctxt, struct move_bucket_in_flight *bucket_in_flight, struct btree_iter *iter, @@ -335,19 +287,11 @@ int bch2_move_extent(struct moving_context *ctxt, io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k); io->rbio.bio.bi_end_io = move_read_endio; - ret = bch2_data_update_init(trans, ctxt, &io->write, ctxt->wp, + ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp, io_opts, data_opts, iter->btree_id, k); - if (ret && ret != -BCH_ERR_unwritten_extent_update) + if (ret) goto err_free_pages; - if (ret == -BCH_ERR_unwritten_extent_update) { - bch2_update_unwritten_extent(trans, &io->write); - move_free(io); - return 0; - } - - BUG_ON(ret); - io->write.op.end_io = move_write_done; if (ctxt->rate) @@ -391,8 +335,23 @@ err_free_pages: err_free: kfree(io); err: - this_cpu_inc(c->counters[BCH_COUNTER_move_extent_alloc_mem_fail]); - trace_move_extent_alloc_mem_fail2(c, k); + if (ret == -BCH_ERR_data_update_done) + return 0; + + if (bch2_err_matches(ret, EROFS) || + bch2_err_matches(ret, BCH_ERR_transaction_restart)) + return ret; + + this_cpu_inc(c->counters[BCH_COUNTER_move_extent_start_fail]); + if (trace_move_extent_start_fail_enabled()) { + struct printbuf buf = PRINTBUF; + + bch2_bkey_val_to_text(&buf, c, k); + prt_str(&buf, ": "); + prt_str(&buf, bch2_err_str(ret)); + trace_move_extent_start_fail(c, buf.buf); + printbuf_exit(&buf); + } return ret; } @@ -482,37 +441,30 @@ int bch2_move_get_io_opts_one(struct btree_trans *trans, int bch2_move_ratelimit(struct moving_context *ctxt) { struct bch_fs *c = ctxt->trans->c; + bool is_kthread = current->flags & PF_KTHREAD; u64 delay; - if (ctxt->wait_on_copygc && !c->copygc_running) { - bch2_trans_unlock_long(ctxt->trans); + if (ctxt->wait_on_copygc && c->copygc_running) { + bch2_moving_ctxt_flush_all(ctxt); wait_event_killable(c->copygc_running_wq, !c->copygc_running || - kthread_should_stop()); + (is_kthread && kthread_should_stop())); } do { delay = ctxt->rate ? bch2_ratelimit_delay(ctxt->rate) : 0; - - if (delay) { - if (delay > HZ / 10) - bch2_trans_unlock_long(ctxt->trans); - else - bch2_trans_unlock(ctxt->trans); - set_current_state(TASK_INTERRUPTIBLE); - } - - if ((current->flags & PF_KTHREAD) && kthread_should_stop()) { - __set_current_state(TASK_RUNNING); + if (is_kthread && kthread_should_stop()) return 1; - } if (delay) - schedule_timeout(delay); + move_ctxt_wait_event_timeout(ctxt, + freezing(current) || + (is_kthread && kthread_should_stop()), + delay); if (unlikely(freezing(current))) { - move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads)); + bch2_moving_ctxt_flush_all(ctxt); try_to_freeze(); } } while (delay); @@ -683,6 +635,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, { struct btree_trans *trans = ctxt->trans; struct bch_fs *c = trans->c; + bool is_kthread = current->flags & PF_KTHREAD; struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); struct btree_iter iter; struct bkey_buf sk; @@ -728,6 +681,9 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, } while (!(ret = bch2_move_ratelimit(ctxt))) { + if (is_kthread && kthread_should_stop()) + break; + bch2_trans_begin(trans); ret = bch2_get_next_backpointer(trans, bucket, gen, diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index 07cf9d42643b..0906aa2d1de2 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -38,6 +38,25 @@ struct moving_context { wait_queue_head_t wait; }; +#define move_ctxt_wait_event_timeout(_ctxt, _cond, _timeout) \ +({ \ + int _ret = 0; \ + while (true) { \ + bool cond_finished = false; \ + bch2_moving_ctxt_do_pending_writes(_ctxt); \ + \ + if (_cond) \ + break; \ + bch2_trans_unlock_long((_ctxt)->trans); \ + _ret = __wait_event_timeout((_ctxt)->wait, \ + bch2_moving_ctxt_next_pending_write(_ctxt) || \ + (cond_finished = (_cond)), _timeout); \ + if (_ret || ( cond_finished)) \ + break; \ + } \ + _ret; \ +}) + #define move_ctxt_wait_event(_ctxt, _cond) \ do { \ bool cond_finished = false; \ diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 0a0576326c5b..a84e79f79e5e 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -207,7 +207,7 @@ static int bch2_copygc(struct moving_context *ctxt, goto err; darray_for_each(buckets, i) { - if (unlikely(freezing(current))) + if (kthread_should_stop() || freezing(current)) break; f = move_bucket_in_flight_add(buckets_in_flight, *i); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 9c30500ce920..770ced1c6285 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -167,6 +167,8 @@ static int bch2_journal_replay(struct bch_fs *c) goto err; } + BUG_ON(!atomic_read(&keys->ref)); + for (i = 0; i < keys->nr; i++) { k = keys_sorted[i]; @@ -188,6 +190,9 @@ static int bch2_journal_replay(struct bch_fs *c) } } + if (!c->opts.keep_journal) + bch2_journal_keys_put_initial(c); + replay_now_at(j, j->replay_journal_seq_end); j->replay_journal_seq = 0; @@ -909,10 +914,8 @@ out: bch2_flush_fsck_errs(c); if (!c->opts.keep_journal && - test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) { - bch2_journal_keys_free(&c->journal_keys); - bch2_journal_entries_free(c); - } + test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) + bch2_journal_keys_put_initial(c); kfree(clean); if (!ret && test_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags)) { diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 1c3ae13bfced..2008fe8bf706 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -68,6 +68,33 @@ void bch2_replicas_entry_to_text(struct printbuf *out, prt_printf(out, "]"); } +int bch2_replicas_entry_validate(struct bch_replicas_entry *r, + struct bch_sb *sb, + struct printbuf *err) +{ + if (!r->nr_devs) { + prt_printf(err, "no devices in entry "); + goto bad; + } + + if (r->nr_required > 1 && + r->nr_required >= r->nr_devs) { + prt_printf(err, "bad nr_required in entry "); + goto bad; + } + + for (unsigned i = 0; i < r->nr_devs; i++) + if (!bch2_dev_exists(sb, r->devs[i])) { + prt_printf(err, "invalid device %u in entry ", r->devs[i]); + goto bad; + } + + return 0; +bad: + bch2_replicas_entry_to_text(err, r); + return -BCH_ERR_invalid_replicas_entry; +} + void bch2_cpu_replicas_to_text(struct printbuf *out, struct bch_replicas_cpu *r) { @@ -163,7 +190,8 @@ void bch2_devlist_to_replicas(struct bch_replicas_entry *e, } static struct bch_replicas_cpu -cpu_replicas_add_entry(struct bch_replicas_cpu *old, +cpu_replicas_add_entry(struct bch_fs *c, + struct bch_replicas_cpu *old, struct bch_replicas_entry *new_entry) { unsigned i; @@ -173,6 +201,9 @@ cpu_replicas_add_entry(struct bch_replicas_cpu *old, replicas_entry_bytes(new_entry)), }; + for (i = 0; i < new_entry->nr_devs; i++) + BUG_ON(!bch2_dev_exists2(c, new_entry->devs[i])); + BUG_ON(!new_entry->data_type); verify_replicas_entry(new_entry); @@ -382,7 +413,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c, if (c->replicas_gc.entries && !__replicas_has_entry(&c->replicas_gc, new_entry)) { - new_gc = cpu_replicas_add_entry(&c->replicas_gc, new_entry); + new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry); if (!new_gc.entries) { ret = -BCH_ERR_ENOMEM_cpu_replicas; goto err; @@ -390,7 +421,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c, } if (!__replicas_has_entry(&c->replicas, new_entry)) { - new_r = cpu_replicas_add_entry(&c->replicas, new_entry); + new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry); if (!new_r.entries) { ret = -BCH_ERR_ENOMEM_cpu_replicas; goto err; @@ -598,7 +629,7 @@ int bch2_replicas_set_usage(struct bch_fs *c, if (idx < 0) { struct bch_replicas_cpu n; - n = cpu_replicas_add_entry(&c->replicas, r); + n = cpu_replicas_add_entry(c, &c->replicas, r); if (!n.entries) return -BCH_ERR_ENOMEM_cpu_replicas; @@ -797,7 +828,7 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, struct bch_sb *sb, struct printbuf *err) { - unsigned i, j; + unsigned i; sort_cmp_size(cpu_r->entries, cpu_r->nr, @@ -808,31 +839,9 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, struct bch_replicas_entry *e = cpu_replicas_entry(cpu_r, i); - if (e->data_type >= BCH_DATA_NR) { - prt_printf(err, "invalid data type in entry "); - bch2_replicas_entry_to_text(err, e); - return -BCH_ERR_invalid_sb_replicas; - } - - if (!e->nr_devs) { - prt_printf(err, "no devices in entry "); - bch2_replicas_entry_to_text(err, e); - return -BCH_ERR_invalid_sb_replicas; - } - - if (e->nr_required > 1 && - e->nr_required >= e->nr_devs) { - prt_printf(err, "bad nr_required in entry "); - bch2_replicas_entry_to_text(err, e); - return -BCH_ERR_invalid_sb_replicas; - } - - for (j = 0; j < e->nr_devs; j++) - if (!bch2_dev_exists(sb, e->devs[j])) { - prt_printf(err, "invalid device %u in entry ", e->devs[j]); - bch2_replicas_entry_to_text(err, e); - return -BCH_ERR_invalid_sb_replicas; - } + int ret = bch2_replicas_entry_validate(e, sb, err); + if (ret) + return ret; if (i + 1 < cpu_r->nr) { struct bch_replicas_entry *n = diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h index 4887675a86f0..f70a642775d1 100644 --- a/fs/bcachefs/replicas.h +++ b/fs/bcachefs/replicas.h @@ -9,6 +9,8 @@ void bch2_replicas_entry_sort(struct bch_replicas_entry *); void bch2_replicas_entry_to_text(struct printbuf *, struct bch_replicas_entry *); +int bch2_replicas_entry_validate(struct bch_replicas_entry *, + struct bch_sb *, struct printbuf *); void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *); static inline struct bch_replicas_entry * diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index e9af77b384c7..5dac038f0851 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -959,7 +959,7 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id) parent_id, id)) goto err; - parent->v.children[i] = le32_to_cpu(child_id); + parent->v.children[i] = cpu_to_le32(child_id); normalize_snapshot_child_pointers(&parent->v); } diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index f4cad903f4d6..f3e12f7979d5 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -166,6 +166,7 @@ void bch2_free_super(struct bch_sb_handle *sb) if (!IS_ERR_OR_NULL(sb->bdev)) blkdev_put(sb->bdev, sb->holder); kfree(sb->holder); + kfree(sb->sb_name); kfree(sb->sb); memset(sb, 0, sizeof(*sb)); @@ -675,6 +676,10 @@ retry: if (!sb->holder) return -ENOMEM; + sb->sb_name = kstrdup(path, GFP_KERNEL); + if (!sb->sb_name) + return -ENOMEM; + #ifndef __KERNEL__ if (opt_get(*opts, direct_io) == false) sb->mode |= BLK_OPEN_BUFFERED; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 24672bb31cbe..f63474c5c5a2 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -423,6 +423,18 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) bch2_dev_allocator_add(c, ca); bch2_recalc_capacity(c); + set_bit(BCH_FS_RW, &c->flags); + set_bit(BCH_FS_WAS_RW, &c->flags); + +#ifndef BCH_WRITE_REF_DEBUG + percpu_ref_reinit(&c->writes); +#else + for (i = 0; i < BCH_WRITE_REF_NR; i++) { + BUG_ON(atomic_long_read(&c->writes[i])); + atomic_long_inc(&c->writes[i]); + } +#endif + ret = bch2_gc_thread_start(c); if (ret) { bch_err(c, "error starting gc thread"); @@ -439,24 +451,16 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) goto err; } -#ifndef BCH_WRITE_REF_DEBUG - percpu_ref_reinit(&c->writes); -#else - for (i = 0; i < BCH_WRITE_REF_NR; i++) { - BUG_ON(atomic_long_read(&c->writes[i])); - atomic_long_inc(&c->writes[i]); - } -#endif - set_bit(BCH_FS_RW, &c->flags); - set_bit(BCH_FS_WAS_RW, &c->flags); - bch2_do_discards(c); bch2_do_invalidates(c); bch2_do_stripe_deletes(c); bch2_do_pending_node_rewrites(c); return 0; err: - __bch2_fs_read_only(c); + if (test_bit(BCH_FS_RW, &c->flags)) + bch2_fs_read_only(c); + else + __bch2_fs_read_only(c); return ret; } @@ -504,8 +508,8 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_io_clock_exit(&c->io_clock[WRITE]); bch2_io_clock_exit(&c->io_clock[READ]); bch2_fs_compress_exit(c); - bch2_journal_keys_free(&c->journal_keys); - bch2_journal_entries_free(c); + bch2_journal_keys_put_initial(c); + BUG_ON(atomic_read(&c->journal_keys.ref)); bch2_fs_btree_write_buffer_exit(c); percpu_free_rwsem(&c->mark_lock); free_percpu(c->online_reserved); @@ -702,6 +706,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) init_rwsem(&c->gc_lock); mutex_init(&c->gc_gens_lock); + atomic_set(&c->journal_keys.ref, 1); + c->journal_keys.initial_ref_held = true; for (i = 0; i < BCH_TIME_STAT_NR; i++) bch2_time_stats_init(&c->times[i]); diff --git a/fs/bcachefs/super_types.h b/fs/bcachefs/super_types.h index 7dda4985b99f..9c1fd4ca2b10 100644 --- a/fs/bcachefs/super_types.h +++ b/fs/bcachefs/super_types.h @@ -5,6 +5,7 @@ struct bch_sb_handle { struct bch_sb *sb; struct block_device *bdev; + char *sb_name; struct bio *bio; void *holder; size_t buffer_size; diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 7857671159b4..fd49b63562c3 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -754,9 +754,9 @@ TRACE_EVENT(move_extent_fail, TP_printk("%d:%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(msg)) ); -DEFINE_EVENT(bkey, move_extent_alloc_mem_fail, - TP_PROTO(struct bch_fs *c, const char *k), - TP_ARGS(c, k) +DEFINE_EVENT(bkey, move_extent_start_fail, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) ); TRACE_EVENT(move_data, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 401ea09ae4b8..bbcc3df77646 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3213,6 +3213,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device goto fail_alloc; } + btrfs_info(fs_info, "first mount of filesystem %pU", disk_super->fsid); /* * Verify the type first, if that or the checksum value are * corrupted, we'll find out diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 03cef28d9e37..e6230a6ffa98 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -674,8 +674,8 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio) * the array will be skipped * * Return: 0 if all pages were able to be allocated; - * -ENOMEM otherwise, and the caller is responsible for freeing all - * non-null page pointers in the array. + * -ENOMEM otherwise, the partially allocated pages would be freed and + * the array slots zeroed */ int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array) { @@ -694,8 +694,13 @@ int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array) * though alloc_pages_bulk_array() falls back to alloc_page() * if it could not bulk-allocate. So we must be out of memory. */ - if (allocated == last) + if (allocated == last) { + for (int i = 0; i < allocated; i++) { + __free_page(page_array[i]); + page_array[i] = NULL; + } return -ENOMEM; + } memalloc_retry_wait(GFP_NOFS); } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index dfe257e1845b..4e50b62db2a8 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4356,6 +4356,7 @@ static int _btrfs_ioctl_send(struct inode *inode, void __user *argp, bool compat arg->clone_sources = compat_ptr(args32.clone_sources); arg->parent_root = args32.parent_root; arg->flags = args32.flags; + arg->version = args32.version; memcpy(arg->reserved, args32.reserved, sizeof(args32.reserved)); #else diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index 1f62976bee82..6486f0d7e993 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -794,6 +794,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, dump_ref_action(fs_info, ra); kfree(ref); kfree(ra); + kfree(re); goto out_unlock; } else if (be->num_refs == 0) { btrfs_err(fs_info, @@ -803,6 +804,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, dump_ref_action(fs_info, ra); kfree(ref); kfree(ra); + kfree(re); goto out_unlock; } diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 3b929f0e8f04..4e36550618e5 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -8158,7 +8158,7 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg) } sctx->send_filp = fget(arg->send_fd); - if (!sctx->send_filp) { + if (!sctx->send_filp || !(sctx->send_filp->f_mode & FMODE_WRITE)) { ret = -EBADF; goto out; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f638dc339693..ef256b944c72 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -80,7 +80,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data); static void btrfs_put_super(struct super_block *sb) { - close_ctree(btrfs_sb(sb)); + struct btrfs_fs_info *fs_info = btrfs_sb(sb); + + btrfs_info(fs_info, "last unmount of filesystem %pU", fs_info->fs_devices->fsid); + close_ctree(fs_info); } enum { diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 6e63816dddcb..bfc0eb5e3b7c 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1774,7 +1774,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_release_path(path); ret = btrfs_create_qgroup(trans, objectid); - if (ret) { + if (ret && ret != -EEXIST) { btrfs_abort_transaction(trans, ret); goto fail; } diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index a416cbea75d1..50fdc69fdddf 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -31,6 +31,7 @@ #include "inode-item.h" #include "dir-item.h" #include "raid-stripe-tree.h" +#include "extent-tree.h" /* * Error message should follow the following format: @@ -1276,6 +1277,8 @@ static int check_extent_item(struct extent_buffer *leaf, unsigned long ptr; /* Current pointer inside inline refs */ unsigned long end; /* Extent item end */ const u32 item_size = btrfs_item_size(leaf, slot); + u8 last_type = 0; + u64 last_seq = U64_MAX; u64 flags; u64 generation; u64 total_refs; /* Total refs in btrfs_extent_item */ @@ -1322,6 +1325,18 @@ static int check_extent_item(struct extent_buffer *leaf, * 2.2) Ref type specific data * Either using btrfs_extent_inline_ref::offset, or specific * data structure. + * + * All above inline items should follow the order: + * + * - All btrfs_extent_inline_ref::type should be in an ascending + * order + * + * - Within the same type, the items should follow a descending + * order by their sequence number. The sequence number is + * determined by: + * * btrfs_extent_inline_ref::offset for all types other than + * EXTENT_DATA_REF + * * hash_extent_data_ref() for EXTENT_DATA_REF */ if (unlikely(item_size < sizeof(*ei))) { extent_err(leaf, slot, @@ -1403,6 +1418,7 @@ static int check_extent_item(struct extent_buffer *leaf, struct btrfs_extent_inline_ref *iref; struct btrfs_extent_data_ref *dref; struct btrfs_shared_data_ref *sref; + u64 seq; u64 dref_offset; u64 inline_offset; u8 inline_type; @@ -1416,6 +1432,7 @@ static int check_extent_item(struct extent_buffer *leaf, iref = (struct btrfs_extent_inline_ref *)ptr; inline_type = btrfs_extent_inline_ref_type(leaf, iref); inline_offset = btrfs_extent_inline_ref_offset(leaf, iref); + seq = inline_offset; if (unlikely(ptr + btrfs_extent_inline_ref_size(inline_type) > end)) { extent_err(leaf, slot, "inline ref item overflows extent item, ptr %lu iref size %u end %lu", @@ -1446,6 +1463,10 @@ static int check_extent_item(struct extent_buffer *leaf, case BTRFS_EXTENT_DATA_REF_KEY: dref = (struct btrfs_extent_data_ref *)(&iref->offset); dref_offset = btrfs_extent_data_ref_offset(leaf, dref); + seq = hash_extent_data_ref( + btrfs_extent_data_ref_root(leaf, dref), + btrfs_extent_data_ref_objectid(leaf, dref), + btrfs_extent_data_ref_offset(leaf, dref)); if (unlikely(!IS_ALIGNED(dref_offset, fs_info->sectorsize))) { extent_err(leaf, slot, @@ -1475,6 +1496,24 @@ static int check_extent_item(struct extent_buffer *leaf, inline_type); return -EUCLEAN; } + if (inline_type < last_type) { + extent_err(leaf, slot, + "inline ref out-of-order: has type %u, prev type %u", + inline_type, last_type); + return -EUCLEAN; + } + /* Type changed, allow the sequence starts from U64_MAX again. */ + if (inline_type > last_type) + last_seq = U64_MAX; + if (seq > last_seq) { + extent_err(leaf, slot, +"inline ref out-of-order: has type %u offset %llu seq 0x%llx, prev type %u seq 0x%llx", + inline_type, inline_offset, seq, + last_type, last_seq); + return -EUCLEAN; + } + last_type = inline_type; + last_seq = seq; ptr += btrfs_extent_inline_ref_size(inline_type); } /* No padding is allowed */ diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c6f16625af51..f627674b37db 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3006,15 +3006,16 @@ struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info, read_unlock(&em_tree->lock); if (!em) { - btrfs_crit(fs_info, "unable to find logical %llu length %llu", + btrfs_crit(fs_info, + "unable to find chunk map for logical %llu length %llu", logical, length); return ERR_PTR(-EINVAL); } - if (em->start > logical || em->start + em->len < logical) { + if (em->start > logical || em->start + em->len <= logical) { btrfs_crit(fs_info, - "found a bad mapping, wanted %llu-%llu, found %llu-%llu", - logical, length, em->start, em->start + em->len); + "found a bad chunk map, wanted %llu-%llu, found %llu-%llu", + logical, logical + length, em->start, em->start + em->len); free_extent_map(em); return ERR_PTR(-EINVAL); } diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index c45e8c2d62e1..a5ade8c16375 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -84,6 +84,14 @@ int debugfs_file_get(struct dentry *dentry) struct debugfs_fsdata *fsd; void *d_fsd; + /* + * This could only happen if some debugfs user erroneously calls + * debugfs_file_get() on a dentry that isn't even a file, let + * them know about it. + */ + if (WARN_ON(!d_is_reg(dentry))) + return -EINVAL; + d_fsd = READ_ONCE(dentry->d_fsdata); if (!((unsigned long)d_fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)) { fsd = d_fsd; @@ -100,6 +108,14 @@ int debugfs_file_get(struct dentry *dentry) kfree(fsd); fsd = READ_ONCE(dentry->d_fsdata); } +#ifdef CONFIG_LOCKDEP + fsd->lock_name = kasprintf(GFP_KERNEL, "debugfs:%pd", dentry); + lockdep_register_key(&fsd->key); + lockdep_init_map(&fsd->lockdep_map, fsd->lock_name ?: "debugfs", + &fsd->key, 0); +#endif + INIT_LIST_HEAD(&fsd->cancellations); + mutex_init(&fsd->cancellations_mtx); } /* @@ -116,6 +132,8 @@ int debugfs_file_get(struct dentry *dentry) if (!refcount_inc_not_zero(&fsd->active_users)) return -EIO; + lock_map_acquire_read(&fsd->lockdep_map); + return 0; } EXPORT_SYMBOL_GPL(debugfs_file_get); @@ -133,11 +151,93 @@ void debugfs_file_put(struct dentry *dentry) { struct debugfs_fsdata *fsd = READ_ONCE(dentry->d_fsdata); + lock_map_release(&fsd->lockdep_map); + if (refcount_dec_and_test(&fsd->active_users)) complete(&fsd->active_users_drained); } EXPORT_SYMBOL_GPL(debugfs_file_put); +/** + * debugfs_enter_cancellation - enter a debugfs cancellation + * @file: the file being accessed + * @cancellation: the cancellation object, the cancel callback + * inside of it must be initialized + * + * When a debugfs file is removed it needs to wait for all active + * operations to complete. However, the operation itself may need + * to wait for hardware or completion of some asynchronous process + * or similar. As such, it may need to be cancelled to avoid long + * waits or even deadlocks. + * + * This function can be used inside a debugfs handler that may + * need to be cancelled. As soon as this function is called, the + * cancellation's 'cancel' callback may be called, at which point + * the caller should proceed to call debugfs_leave_cancellation() + * and leave the debugfs handler function as soon as possible. + * Note that the 'cancel' callback is only ever called in the + * context of some kind of debugfs_remove(). + * + * This function must be paired with debugfs_leave_cancellation(). + */ +void debugfs_enter_cancellation(struct file *file, + struct debugfs_cancellation *cancellation) +{ + struct debugfs_fsdata *fsd; + struct dentry *dentry = F_DENTRY(file); + + INIT_LIST_HEAD(&cancellation->list); + + if (WARN_ON(!d_is_reg(dentry))) + return; + + if (WARN_ON(!cancellation->cancel)) + return; + + fsd = READ_ONCE(dentry->d_fsdata); + if (WARN_ON(!fsd || + ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT))) + return; + + mutex_lock(&fsd->cancellations_mtx); + list_add(&cancellation->list, &fsd->cancellations); + mutex_unlock(&fsd->cancellations_mtx); + + /* if we're already removing wake it up to cancel */ + if (d_unlinked(dentry)) + complete(&fsd->active_users_drained); +} +EXPORT_SYMBOL_GPL(debugfs_enter_cancellation); + +/** + * debugfs_leave_cancellation - leave cancellation section + * @file: the file being accessed + * @cancellation: the cancellation previously registered with + * debugfs_enter_cancellation() + * + * See the documentation of debugfs_enter_cancellation(). + */ +void debugfs_leave_cancellation(struct file *file, + struct debugfs_cancellation *cancellation) +{ + struct debugfs_fsdata *fsd; + struct dentry *dentry = F_DENTRY(file); + + if (WARN_ON(!d_is_reg(dentry))) + return; + + fsd = READ_ONCE(dentry->d_fsdata); + if (WARN_ON(!fsd || + ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT))) + return; + + mutex_lock(&fsd->cancellations_mtx); + if (!list_empty(&cancellation->list)) + list_del(&cancellation->list); + mutex_unlock(&fsd->cancellations_mtx); +} +EXPORT_SYMBOL_GPL(debugfs_leave_cancellation); + /* * Only permit access to world-readable files when the kernel is locked down. * We also need to exclude any file that has ways to write or alter it as root diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 5d41765e0c77..e4e7fe1bd9fb 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -236,17 +236,29 @@ static const struct super_operations debugfs_super_operations = { static void debugfs_release_dentry(struct dentry *dentry) { - void *fsd = dentry->d_fsdata; + struct debugfs_fsdata *fsd = dentry->d_fsdata; - if (!((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)) - kfree(dentry->d_fsdata); + if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT) + return; + + /* check it wasn't a dir (no fsdata) or automount (no real_fops) */ + if (fsd && fsd->real_fops) { +#ifdef CONFIG_LOCKDEP + lockdep_unregister_key(&fsd->key); + kfree(fsd->lock_name); +#endif + WARN_ON(!list_empty(&fsd->cancellations)); + mutex_destroy(&fsd->cancellations_mtx); + } + + kfree(fsd); } static struct vfsmount *debugfs_automount(struct path *path) { - debugfs_automount_t f; - f = (debugfs_automount_t)path->dentry->d_fsdata; - return f(path->dentry, d_inode(path->dentry)->i_private); + struct debugfs_fsdata *fsd = path->dentry->d_fsdata; + + return fsd->automount(path->dentry, d_inode(path->dentry)->i_private); } static const struct dentry_operations debugfs_dops = { @@ -634,13 +646,23 @@ struct dentry *debugfs_create_automount(const char *name, void *data) { struct dentry *dentry = start_creating(name, parent); + struct debugfs_fsdata *fsd; struct inode *inode; if (IS_ERR(dentry)) return dentry; + fsd = kzalloc(sizeof(*fsd), GFP_KERNEL); + if (!fsd) { + failed_creating(dentry); + return ERR_PTR(-ENOMEM); + } + + fsd->automount = f; + if (!(debugfs_allow & DEBUGFS_ALLOW_API)) { failed_creating(dentry); + kfree(fsd); return ERR_PTR(-EPERM); } @@ -648,13 +670,14 @@ struct dentry *debugfs_create_automount(const char *name, if (unlikely(!inode)) { pr_err("out of free dentries, can not create automount '%s'\n", name); + kfree(fsd); return failed_creating(dentry); } make_empty_dir_inode(inode); inode->i_flags |= S_AUTOMOUNT; inode->i_private = data; - dentry->d_fsdata = (void *)f; + dentry->d_fsdata = fsd; /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); d_instantiate(dentry, inode); @@ -731,8 +754,40 @@ static void __debugfs_file_removed(struct dentry *dentry) fsd = READ_ONCE(dentry->d_fsdata); if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT) return; - if (!refcount_dec_and_test(&fsd->active_users)) + + lock_map_acquire(&fsd->lockdep_map); + lock_map_release(&fsd->lockdep_map); + + /* if we hit zero, just wait for all to finish */ + if (!refcount_dec_and_test(&fsd->active_users)) { + wait_for_completion(&fsd->active_users_drained); + return; + } + + /* if we didn't hit zero, try to cancel any we can */ + while (refcount_read(&fsd->active_users)) { + struct debugfs_cancellation *c; + + /* + * Lock the cancellations. Note that the cancellations + * structs are meant to be on the stack, so we need to + * ensure we either use them here or don't touch them, + * and debugfs_leave_cancellation() will wait for this + * to be finished processing before exiting one. It may + * of course win and remove the cancellation, but then + * chances are we never even got into this bit, we only + * do if the refcount isn't zero already. + */ + mutex_lock(&fsd->cancellations_mtx); + while ((c = list_first_entry_or_null(&fsd->cancellations, + typeof(*c), list))) { + list_del_init(&c->list); + c->cancel(dentry, c->cancel_data); + } + mutex_unlock(&fsd->cancellations_mtx); + wait_for_completion(&fsd->active_users_drained); + } } static void remove_one(struct dentry *victim) diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h index 92af8ae31313..0c4c68cf161f 100644 --- a/fs/debugfs/internal.h +++ b/fs/debugfs/internal.h @@ -7,6 +7,8 @@ #ifndef _DEBUGFS_INTERNAL_H_ #define _DEBUGFS_INTERNAL_H_ +#include <linux/lockdep.h> +#include <linux/list.h> struct file_operations; @@ -17,8 +19,23 @@ extern const struct file_operations debugfs_full_proxy_file_operations; struct debugfs_fsdata { const struct file_operations *real_fops; - refcount_t active_users; - struct completion active_users_drained; + union { + /* automount_fn is used when real_fops is NULL */ + debugfs_automount_t automount; + struct { + refcount_t active_users; + struct completion active_users_drained; +#ifdef CONFIG_LOCKDEP + struct lockdep_map lockdep_map; + struct lock_class_key key; + char *lock_name; +#endif + + /* protect cancellations */ + struct mutex cancellations_mtx; + struct list_head cancellations; + }; + }; }; /* diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 1039e5bf90af..4ddc36f4dbd4 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -258,7 +258,6 @@ static ssize_t ext2_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) goto out_unlock; } - iocb->ki_pos += status; ret += status; endbyte = pos + status - 1; ret2 = filemap_write_and_wait_range(inode->i_mapping, pos, diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 6ffbd81bd109..7558167f603c 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -191,7 +191,13 @@ struct cifs_open_info_data { bool reparse_point; bool symlink; }; - __u32 reparse_tag; + struct { + __u32 tag; + union { + struct reparse_data_buffer *buf; + struct reparse_posix_data *posix; + }; + } reparse; char *symlink_target; union { struct smb2_file_all_info fi; @@ -395,8 +401,7 @@ struct smb_version_operations { struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, - char **target_path, - struct kvec *rsp_iov); + char **target_path); /* open a file for non-posix mounts */ int (*open)(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock, void *buf); @@ -551,6 +556,9 @@ struct smb_version_operations { bool (*is_status_io_timeout)(char *buf); /* Check for STATUS_NETWORK_NAME_DELETED */ bool (*is_network_name_deleted)(char *buf, struct TCP_Server_Info *srv); + int (*parse_reparse_point)(struct cifs_sb_info *cifs_sb, + struct kvec *rsp_iov, + struct cifs_open_info_data *data); }; struct smb_version_values { diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h index a75220db5c1e..c0513fbb8a59 100644 --- a/fs/smb/client/cifspdu.h +++ b/fs/smb/client/cifspdu.h @@ -882,11 +882,13 @@ typedef struct smb_com_open_rsp { __u8 OplockLevel; __u16 Fid; __le32 CreateAction; - __le64 CreationTime; - __le64 LastAccessTime; - __le64 LastWriteTime; - __le64 ChangeTime; - __le32 FileAttributes; + struct_group(common_attributes, + __le64 CreationTime; + __le64 LastAccessTime; + __le64 LastWriteTime; + __le64 ChangeTime; + __le32 FileAttributes; + ); __le64 AllocationSize; __le64 EndOfFile; __le16 FileType; @@ -1356,7 +1358,7 @@ typedef struct smb_com_transaction_ioctl_rsp { __le32 DataDisplacement; __u8 SetupCount; /* 1 */ __le16 ReturnedDataLen; - __u16 ByteCount; + __le16 ByteCount; } __attribute__((packed)) TRANSACT_IOCTL_RSP; #define CIFS_ACL_OWNER 1 @@ -1509,7 +1511,7 @@ struct reparse_posix_data { __le16 ReparseDataLength; __u16 Reserved; __le64 InodeType; /* LNK, FIFO, CHR etc. */ - char PathBuffer[]; + __u8 DataBuffer[]; } __attribute__((packed)); struct cifs_quota_data { @@ -2264,11 +2266,13 @@ typedef struct { /* QueryFileInfo/QueryPathinfo (also for SetPath/SetFile) data buffer formats */ /******************************************************************************/ typedef struct { /* data block encoding of response to level 263 QPathInfo */ - __le64 CreationTime; - __le64 LastAccessTime; - __le64 LastWriteTime; - __le64 ChangeTime; - __le32 Attributes; + struct_group(common_attributes, + __le64 CreationTime; + __le64 LastAccessTime; + __le64 LastWriteTime; + __le64 ChangeTime; + __le32 Attributes; + ); __u32 Pad1; __le64 AllocationSize; __le64 EndOfFile; /* size ie offset to first free byte in file */ diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index d87e2c26cce2..46feaa0880bd 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -210,7 +210,7 @@ int cifs_get_inode_info(struct inode **inode, const char *full_path, const struct cifs_fid *fid); bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, - u32 tag); + struct cifs_open_info_data *data); extern int smb311_posix_get_inode_info(struct inode **pinode, const char *search_path, struct super_block *sb, unsigned int xid); extern int cifs_get_inode_info_unix(struct inode **pinode, @@ -458,6 +458,12 @@ extern int CIFSSMBUnixQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, char **syminfo, const struct nls_table *nls_codepage, int remap); +extern int cifs_query_reparse_point(const unsigned int xid, + struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, + const char *full_path, + u32 *tag, struct kvec *rsp, + int *rsp_buftype); extern int CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, char **symlinkinfo, const struct nls_table *nls_codepage); @@ -659,6 +665,12 @@ void cifs_put_tcp_super(struct super_block *sb); int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix); char *extract_hostname(const char *unc); char *extract_sharename(const char *unc); +int parse_reparse_point(struct reparse_data_buffer *buf, + u32 plen, struct cifs_sb_info *cifs_sb, + bool unicode, struct cifs_open_info_data *data); +int cifs_sfu_make_node(unsigned int xid, struct inode *inode, + struct dentry *dentry, struct cifs_tcon *tcon, + const char *full_path, umode_t mode, dev_t dev); #ifdef CONFIG_CIFS_DFS_UPCALL static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses, diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 25503f1a4fd2..9ee348e6d106 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -1244,8 +1244,10 @@ openRetry: *oplock |= CIFS_CREATE_ACTION; if (buf) { - /* copy from CreationTime to Attributes */ - memcpy((char *)buf, (char *)&rsp->CreationTime, 36); + /* copy commonly used attributes */ + memcpy(&buf->common_attributes, + &rsp->common_attributes, + sizeof(buf->common_attributes)); /* the file_info buf is endian converted by caller */ buf->AllocationSize = rsp->AllocationSize; buf->EndOfFile = rsp->EndOfFile; @@ -2690,136 +2692,97 @@ querySymLinkRetry: return rc; } -/* - * Recent Windows versions now create symlinks more frequently - * and they use the "reparse point" mechanism below. We can of course - * do symlinks nicely to Samba and other servers which support the - * CIFS Unix Extensions and we can also do SFU symlinks and "client only" - * "MF" symlinks optionally, but for recent Windows we really need to - * reenable the code below and fix the cifs_symlink callers to handle this. - * In the interim this code has been moved to its own config option so - * it is not compiled in by default until callers fixed up and more tested. - */ -int -CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, - __u16 fid, char **symlinkinfo, - const struct nls_table *nls_codepage) +int cifs_query_reparse_point(const unsigned int xid, + struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, + const char *full_path, + u32 *tag, struct kvec *rsp, + int *rsp_buftype) { - int rc = 0; - int bytes_returned; - struct smb_com_transaction_ioctl_req *pSMB; - struct smb_com_transaction_ioctl_rsp *pSMBr; - bool is_unicode; - unsigned int sub_len; - char *sub_start; - struct reparse_symlink_data *reparse_buf; - struct reparse_posix_data *posix_buf; + struct cifs_open_parms oparms; + TRANSACT_IOCTL_REQ *io_req = NULL; + TRANSACT_IOCTL_RSP *io_rsp = NULL; + struct cifs_fid fid; __u32 data_offset, data_count; - char *end_of_smb; + __u8 *start, *end; + int io_rsp_len; + int oplock = 0; + int rc; - cifs_dbg(FYI, "In Windows reparse style QueryLink for fid %u\n", fid); - rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB, - (void **) &pSMBr); + cifs_tcon_dbg(FYI, "%s: path=%s\n", __func__, full_path); + + if (cap_unix(tcon->ses)) + return -EOPNOTSUPP; + + oparms = (struct cifs_open_parms) { + .tcon = tcon, + .cifs_sb = cifs_sb, + .desired_access = FILE_READ_ATTRIBUTES, + .create_options = cifs_create_options(cifs_sb, + OPEN_REPARSE_POINT), + .disposition = FILE_OPEN, + .path = full_path, + .fid = &fid, + }; + + rc = CIFS_open(xid, &oparms, &oplock, NULL); if (rc) return rc; - pSMB->TotalParameterCount = 0 ; - pSMB->TotalDataCount = 0; - pSMB->MaxParameterCount = cpu_to_le32(2); - /* BB find exact data count max from sess structure BB */ - pSMB->MaxDataCount = cpu_to_le32(CIFSMaxBufSize & 0xFFFFFF00); - pSMB->MaxSetupCount = 4; - pSMB->Reserved = 0; - pSMB->ParameterOffset = 0; - pSMB->DataCount = 0; - pSMB->DataOffset = 0; - pSMB->SetupCount = 4; - pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_IOCTL); - pSMB->ParameterCount = pSMB->TotalParameterCount; - pSMB->FunctionCode = cpu_to_le32(FSCTL_GET_REPARSE_POINT); - pSMB->IsFsctl = 1; /* FSCTL */ - pSMB->IsRootFlag = 0; - pSMB->Fid = fid; /* file handle always le */ - pSMB->ByteCount = 0; + rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, + (void **)&io_req, (void **)&io_rsp); + if (rc) + goto error; - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, - (struct smb_hdr *) pSMBr, &bytes_returned, 0); - if (rc) { - cifs_dbg(FYI, "Send error in QueryReparseLinkInfo = %d\n", rc); - goto qreparse_out; - } + io_req->TotalParameterCount = 0; + io_req->TotalDataCount = 0; + io_req->MaxParameterCount = cpu_to_le32(2); + /* BB find exact data count max from sess structure BB */ + io_req->MaxDataCount = cpu_to_le32(CIFSMaxBufSize & 0xFFFFFF00); + io_req->MaxSetupCount = 4; + io_req->Reserved = 0; + io_req->ParameterOffset = 0; + io_req->DataCount = 0; + io_req->DataOffset = 0; + io_req->SetupCount = 4; + io_req->SubCommand = cpu_to_le16(NT_TRANSACT_IOCTL); + io_req->ParameterCount = io_req->TotalParameterCount; + io_req->FunctionCode = cpu_to_le32(FSCTL_GET_REPARSE_POINT); + io_req->IsFsctl = 1; + io_req->IsRootFlag = 0; + io_req->Fid = fid.netfid; + io_req->ByteCount = 0; + + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *)io_req, + (struct smb_hdr *)io_rsp, &io_rsp_len, 0); + if (rc) + goto error; - data_offset = le32_to_cpu(pSMBr->DataOffset); - data_count = le32_to_cpu(pSMBr->DataCount); - if (get_bcc(&pSMBr->hdr) < 2 || data_offset > 512) { - /* BB also check enough total bytes returned */ - rc = -EIO; /* bad smb */ - goto qreparse_out; - } - if (!data_count || (data_count > 2048)) { + data_offset = le32_to_cpu(io_rsp->DataOffset); + data_count = le32_to_cpu(io_rsp->DataCount); + if (get_bcc(&io_rsp->hdr) < 2 || data_offset > 512 || + !data_count || data_count > 2048) { rc = -EIO; - cifs_dbg(FYI, "Invalid return data count on get reparse info ioctl\n"); - goto qreparse_out; - } - end_of_smb = 2 + get_bcc(&pSMBr->hdr) + (char *)&pSMBr->ByteCount; - reparse_buf = (struct reparse_symlink_data *) - ((char *)&pSMBr->hdr.Protocol + data_offset); - if ((char *)reparse_buf >= end_of_smb) { - rc = -EIO; - goto qreparse_out; - } - if (reparse_buf->ReparseTag == cpu_to_le32(IO_REPARSE_TAG_NFS)) { - cifs_dbg(FYI, "NFS style reparse tag\n"); - posix_buf = (struct reparse_posix_data *)reparse_buf; - - if (posix_buf->InodeType != cpu_to_le64(NFS_SPECFILE_LNK)) { - cifs_dbg(FYI, "unsupported file type 0x%llx\n", - le64_to_cpu(posix_buf->InodeType)); - rc = -EOPNOTSUPP; - goto qreparse_out; - } - is_unicode = true; - sub_len = le16_to_cpu(reparse_buf->ReparseDataLength); - if (posix_buf->PathBuffer + sub_len > end_of_smb) { - cifs_dbg(FYI, "reparse buf beyond SMB\n"); - rc = -EIO; - goto qreparse_out; - } - *symlinkinfo = cifs_strndup_from_utf16(posix_buf->PathBuffer, - sub_len, is_unicode, nls_codepage); - goto qreparse_out; - } else if (reparse_buf->ReparseTag != - cpu_to_le32(IO_REPARSE_TAG_SYMLINK)) { - rc = -EOPNOTSUPP; - goto qreparse_out; + goto error; } - /* Reparse tag is NTFS symlink */ - sub_start = le16_to_cpu(reparse_buf->SubstituteNameOffset) + - reparse_buf->PathBuffer; - sub_len = le16_to_cpu(reparse_buf->SubstituteNameLength); - if (sub_start + sub_len > end_of_smb) { - cifs_dbg(FYI, "reparse buf beyond SMB\n"); + end = 2 + get_bcc(&io_rsp->hdr) + (__u8 *)&io_rsp->ByteCount; + start = (__u8 *)&io_rsp->hdr.Protocol + data_offset; + if (start >= end) { rc = -EIO; - goto qreparse_out; + goto error; } - if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) - is_unicode = true; - else - is_unicode = false; - - /* BB FIXME investigate remapping reserved chars here */ - *symlinkinfo = cifs_strndup_from_utf16(sub_start, sub_len, is_unicode, - nls_codepage); - if (!*symlinkinfo) - rc = -ENOMEM; -qreparse_out: - cifs_buf_release(pSMB); - /* - * Note: On -EAGAIN error only caller can retry on handle based calls - * since file handle passed in no longer valid. - */ + *tag = le32_to_cpu(((struct reparse_data_buffer *)start)->ReparseTag); + rsp->iov_base = io_rsp; + rsp->iov_len = io_rsp_len; + *rsp_buftype = CIFS_LARGE_BUFFER; + CIFSSMBClose(xid, tcon, fid.netfid); + return 0; + +error: + cifs_buf_release(io_req); + CIFSSMBClose(xid, tcon, fid.netfid); return rc; } diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 86fbd3f847d6..09c5c0f5c96e 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -459,8 +459,7 @@ static int cifs_get_unix_fattr(const unsigned char *full_path, return -EOPNOTSUPP; rc = server->ops->query_symlink(xid, tcon, cifs_sb, full_path, - &fattr->cf_symlink_target, - NULL); + &fattr->cf_symlink_target); cifs_dbg(FYI, "%s: query_symlink: %d\n", __func__, rc); } return rc; @@ -722,10 +721,51 @@ static void smb311_posix_info_to_fattr(struct cifs_fattr *fattr, fattr->cf_mode, fattr->cf_uniqueid, fattr->cf_nlink); } +static inline dev_t nfs_mkdev(struct reparse_posix_data *buf) +{ + u64 v = le64_to_cpu(*(__le64 *)buf->DataBuffer); + + return MKDEV(v >> 32, v & 0xffffffff); +} + bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, - u32 tag) + struct cifs_open_info_data *data) { + struct reparse_posix_data *buf = data->reparse.posix; + u32 tag = data->reparse.tag; + + if (tag == IO_REPARSE_TAG_NFS && buf) { + switch (le64_to_cpu(buf->InodeType)) { + case NFS_SPECFILE_CHR: + fattr->cf_mode |= S_IFCHR | cifs_sb->ctx->file_mode; + fattr->cf_dtype = DT_CHR; + fattr->cf_rdev = nfs_mkdev(buf); + break; + case NFS_SPECFILE_BLK: + fattr->cf_mode |= S_IFBLK | cifs_sb->ctx->file_mode; + fattr->cf_dtype = DT_BLK; + fattr->cf_rdev = nfs_mkdev(buf); + break; + case NFS_SPECFILE_FIFO: + fattr->cf_mode |= S_IFIFO | cifs_sb->ctx->file_mode; + fattr->cf_dtype = DT_FIFO; + break; + case NFS_SPECFILE_SOCK: + fattr->cf_mode |= S_IFSOCK | cifs_sb->ctx->file_mode; + fattr->cf_dtype = DT_SOCK; + break; + case NFS_SPECFILE_LNK: + fattr->cf_mode = S_IFLNK | cifs_sb->ctx->file_mode; + fattr->cf_dtype = DT_LNK; + break; + default: + WARN_ON_ONCE(1); + return false; + } + return true; + } + switch (tag) { case IO_REPARSE_TAG_LX_SYMLINK: fattr->cf_mode |= S_IFLNK | cifs_sb->ctx->file_mode; @@ -750,7 +790,7 @@ bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb, case 0: /* SMB1 symlink */ case IO_REPARSE_TAG_SYMLINK: case IO_REPARSE_TAG_NFS: - fattr->cf_mode = S_IFLNK; + fattr->cf_mode = S_IFLNK | cifs_sb->ctx->file_mode; fattr->cf_dtype = DT_LNK; break; default: @@ -791,7 +831,7 @@ static void cifs_open_info_to_fattr(struct cifs_fattr *fattr, fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); if (cifs_open_data_reparse(data) && - cifs_reparse_point_to_fattr(cifs_sb, fattr, data->reparse_tag)) + cifs_reparse_point_to_fattr(cifs_sb, fattr, data)) goto out_reparse; if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { @@ -825,6 +865,8 @@ static void cifs_open_info_to_fattr(struct cifs_fattr *fattr, out_reparse: if (S_ISLNK(fattr->cf_mode)) { + if (likely(data->symlink_target)) + fattr->cf_eof = strnlen(data->symlink_target, PATH_MAX); fattr->cf_symlink_target = data->symlink_target; data->symlink_target = NULL; } @@ -856,7 +898,7 @@ cifs_get_file_info(struct file *filp) data.adjust_tz = false; if (data.symlink_target) { data.symlink = true; - data.reparse_tag = IO_REPARSE_TAG_SYMLINK; + data.reparse.tag = IO_REPARSE_TAG_SYMLINK; } cifs_open_info_to_fattr(&fattr, &data, inode->i_sb); break; @@ -1025,7 +1067,7 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data, struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct kvec rsp_iov, *iov = NULL; int rsp_buftype = CIFS_NO_BUFFER; - u32 tag = data->reparse_tag; + u32 tag = data->reparse.tag; int rc = 0; if (!tag && server->ops->query_reparse_point) { @@ -1035,22 +1077,28 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data, if (!rc) iov = &rsp_iov; } - switch ((data->reparse_tag = tag)) { + + rc = -EOPNOTSUPP; + switch ((data->reparse.tag = tag)) { case 0: /* SMB1 symlink */ - iov = NULL; - fallthrough; - case IO_REPARSE_TAG_NFS: - case IO_REPARSE_TAG_SYMLINK: - if (!data->symlink_target && server->ops->query_symlink) { + if (server->ops->query_symlink) { rc = server->ops->query_symlink(xid, tcon, cifs_sb, full_path, - &data->symlink_target, - iov); + &data->symlink_target); } break; case IO_REPARSE_TAG_MOUNT_POINT: cifs_create_junction_fattr(fattr, sb); + rc = 0; goto out; + default: + if (data->symlink_target) { + rc = 0; + } else if (server->ops->parse_reparse_point) { + rc = server->ops->parse_reparse_point(cifs_sb, + iov, data); + } + break; } cifs_open_info_to_fattr(fattr, data, sb); diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index 47fc22de8d20..d30ea2005eb3 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -153,6 +153,10 @@ static bool reparse_file_needs_reval(const struct cifs_fattr *fattr) static void cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) { + struct cifs_open_info_data data = { + .reparse = { .tag = fattr->cf_cifstag, }, + }; + fattr->cf_uid = cifs_sb->ctx->linux_uid; fattr->cf_gid = cifs_sb->ctx->linux_gid; @@ -165,7 +169,7 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) * reasonably map some of them to directories vs. files vs. symlinks */ if ((fattr->cf_cifsattrs & ATTR_REPARSE) && - cifs_reparse_point_to_fattr(cifs_sb, fattr, fattr->cf_cifstag)) + cifs_reparse_point_to_fattr(cifs_sb, fattr, &data)) goto out_reparse; if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 8b2d7c1ca428..816e01c5589b 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -332,10 +332,10 @@ cifs_disable_secondary_channels(struct cifs_ses *ses) if (iface) { spin_lock(&ses->iface_lock); - kref_put(&iface->refcount, release_iface); iface->num_channels--; if (iface->weight_fulfilled) iface->weight_fulfilled--; + kref_put(&iface->refcount, release_iface); spin_unlock(&ses->iface_lock); } diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 9bf8735cdd1e..a9eaba8083b0 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -976,64 +976,37 @@ static int cifs_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, - char **target_path, - struct kvec *rsp_iov) + char **target_path) { int rc; - int oplock = 0; - bool is_reparse_point = !!rsp_iov; - struct cifs_fid fid; - struct cifs_open_parms oparms; - cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); + cifs_tcon_dbg(FYI, "%s: path=%s\n", __func__, full_path); - if (is_reparse_point) { - cifs_dbg(VFS, "reparse points not handled for SMB1 symlinks\n"); + if (!cap_unix(tcon->ses)) return -EOPNOTSUPP; - } - - /* Check for unix extensions */ - if (cap_unix(tcon->ses)) { - rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, target_path, - cifs_sb->local_nls, - cifs_remap(cifs_sb)); - if (rc == -EREMOTE) - rc = cifs_unix_dfs_readlink(xid, tcon, full_path, - target_path, - cifs_sb->local_nls); - - goto out; - } - - oparms = (struct cifs_open_parms) { - .tcon = tcon, - .cifs_sb = cifs_sb, - .desired_access = FILE_READ_ATTRIBUTES, - .create_options = cifs_create_options(cifs_sb, - OPEN_REPARSE_POINT), - .disposition = FILE_OPEN, - .path = full_path, - .fid = &fid, - }; - - rc = CIFS_open(xid, &oparms, &oplock, NULL); - if (rc) - goto out; - - rc = CIFSSMBQuerySymLink(xid, tcon, fid.netfid, target_path, - cifs_sb->local_nls); - if (rc) - goto out_close; - convert_delimiter(*target_path, '/'); -out_close: - CIFSSMBClose(xid, tcon, fid.netfid); -out: - if (!rc) - cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path); + rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, target_path, + cifs_sb->local_nls, cifs_remap(cifs_sb)); + if (rc == -EREMOTE) + rc = cifs_unix_dfs_readlink(xid, tcon, full_path, + target_path, cifs_sb->local_nls); return rc; } +static int cifs_parse_reparse_point(struct cifs_sb_info *cifs_sb, + struct kvec *rsp_iov, + struct cifs_open_info_data *data) +{ + struct reparse_data_buffer *buf; + TRANSACT_IOCTL_RSP *io = rsp_iov->iov_base; + bool unicode = !!(io->hdr.Flags2 & SMBFLG2_UNICODE); + u32 plen = le16_to_cpu(io->ByteCount); + + buf = (struct reparse_data_buffer *)((__u8 *)&io->hdr.Protocol + + le32_to_cpu(io->DataOffset)); + return parse_reparse_point(buf, plen, cifs_sb, unicode, data); +} + static bool cifs_is_read_op(__u32 oplock) { @@ -1068,15 +1041,7 @@ cifs_make_node(unsigned int xid, struct inode *inode, { struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct inode *newinode = NULL; - int rc = -EPERM; - struct cifs_open_info_data buf = {}; - struct cifs_io_parms io_parms; - __u32 oplock = 0; - struct cifs_fid fid; - struct cifs_open_parms oparms; - unsigned int bytes_written; - struct win_dev *pdev; - struct kvec iov[2]; + int rc; if (tcon->unix_ext) { /* @@ -1110,74 +1075,18 @@ cifs_make_node(unsigned int xid, struct inode *inode, d_instantiate(dentry, newinode); return rc; } - /* - * SMB1 SFU emulation: should work with all servers, but only - * support block and char device (no socket & fifo) + * Check if mounted with mount parm 'sfu' mount parm. + * SFU emulation should work with all servers, but only + * supports block and char device (no socket & fifo), + * and was used by default in earlier versions of Windows */ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) - return rc; - - if (!S_ISCHR(mode) && !S_ISBLK(mode)) - return rc; - - cifs_dbg(FYI, "sfu compat create special file\n"); - - oparms = (struct cifs_open_parms) { - .tcon = tcon, - .cifs_sb = cifs_sb, - .desired_access = GENERIC_WRITE, - .create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR | - CREATE_OPTION_SPECIAL), - .disposition = FILE_CREATE, - .path = full_path, - .fid = &fid, - }; - - if (tcon->ses->server->oplocks) - oplock = REQ_OPLOCK; - else - oplock = 0; - rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, &buf); - if (rc) - return rc; - - /* - * BB Do not bother to decode buf since no local inode yet to put - * timestamps in, but we can reuse it safely. - */ - - pdev = (struct win_dev *)&buf.fi; - io_parms.pid = current->tgid; - io_parms.tcon = tcon; - io_parms.offset = 0; - io_parms.length = sizeof(struct win_dev); - iov[1].iov_base = &buf.fi; - iov[1].iov_len = sizeof(struct win_dev); - if (S_ISCHR(mode)) { - memcpy(pdev->type, "IntxCHR", 8); - pdev->major = cpu_to_le64(MAJOR(dev)); - pdev->minor = cpu_to_le64(MINOR(dev)); - rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms, - &bytes_written, iov, 1); - } else if (S_ISBLK(mode)) { - memcpy(pdev->type, "IntxBLK", 8); - pdev->major = cpu_to_le64(MAJOR(dev)); - pdev->minor = cpu_to_le64(MINOR(dev)); - rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms, - &bytes_written, iov, 1); - } - tcon->ses->server->ops->close(xid, tcon, &fid); - d_drop(dentry); - - /* FIXME: add code here to set EAs */ - - cifs_free_open_info(&buf); - return rc; + return -EPERM; + return cifs_sfu_make_node(xid, inode, dentry, tcon, + full_path, mode, dev); } - - struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -1214,6 +1123,7 @@ struct smb_version_operations smb1_operations = { .is_path_accessible = cifs_is_path_accessible, .can_echo = cifs_can_echo, .query_path_info = cifs_query_path_info, + .query_reparse_point = cifs_query_reparse_point, .query_file_info = cifs_query_file_info, .get_srv_inum = cifs_get_srv_inum, .set_path_size = CIFSSMBSetEOF, @@ -1229,6 +1139,7 @@ struct smb_version_operations smb1_operations = { .rename = CIFSSMBRename, .create_hardlink = CIFSCreateHardLink, .query_symlink = cifs_query_symlink, + .parse_reparse_point = cifs_parse_reparse_point, .open = cifs_open_file, .set_fid = cifs_set_fid, .close = cifs_close_file, diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 0b89f7008ac0..c94940af5d4b 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -555,7 +555,7 @@ static int parse_create_response(struct cifs_open_info_data *data, break; } data->reparse_point = reparse_point; - data->reparse_tag = tag; + data->reparse.tag = tag; return rc; } diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index a959ed2c9b22..45931115f475 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -2866,115 +2866,119 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, return rc; } -static int -parse_reparse_posix(struct reparse_posix_data *symlink_buf, - u32 plen, char **target_path, - struct cifs_sb_info *cifs_sb) +/* See MS-FSCC 2.1.2.6 for the 'NFS' style reparse tags */ +static int parse_reparse_posix(struct reparse_posix_data *buf, + struct cifs_sb_info *cifs_sb, + struct cifs_open_info_data *data) { unsigned int len; - - /* See MS-FSCC 2.1.2.6 for the 'NFS' style reparse tags */ - len = le16_to_cpu(symlink_buf->ReparseDataLength); - - if (le64_to_cpu(symlink_buf->InodeType) != NFS_SPECFILE_LNK) { - cifs_dbg(VFS, "%lld not a supported symlink type\n", - le64_to_cpu(symlink_buf->InodeType)); + u64 type; + + switch ((type = le64_to_cpu(buf->InodeType))) { + case NFS_SPECFILE_LNK: + len = le16_to_cpu(buf->ReparseDataLength); + data->symlink_target = cifs_strndup_from_utf16(buf->DataBuffer, + len, true, + cifs_sb->local_nls); + if (!data->symlink_target) + return -ENOMEM; + convert_delimiter(data->symlink_target, '/'); + cifs_dbg(FYI, "%s: target path: %s\n", + __func__, data->symlink_target); + break; + case NFS_SPECFILE_CHR: + case NFS_SPECFILE_BLK: + case NFS_SPECFILE_FIFO: + case NFS_SPECFILE_SOCK: + break; + default: + cifs_dbg(VFS, "%s: unhandled inode type: 0x%llx\n", + __func__, type); return -EOPNOTSUPP; } - - *target_path = cifs_strndup_from_utf16( - symlink_buf->PathBuffer, - len, true, cifs_sb->local_nls); - if (!(*target_path)) - return -ENOMEM; - - convert_delimiter(*target_path, '/'); - cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path); - return 0; } -static int -parse_reparse_symlink(struct reparse_symlink_data_buffer *symlink_buf, - u32 plen, char **target_path, - struct cifs_sb_info *cifs_sb) +static int parse_reparse_symlink(struct reparse_symlink_data_buffer *sym, + u32 plen, bool unicode, + struct cifs_sb_info *cifs_sb, + struct cifs_open_info_data *data) { - unsigned int sub_len; - unsigned int sub_offset; + unsigned int len; + unsigned int offs; /* We handle Symbolic Link reparse tag here. See: MS-FSCC 2.1.2.4 */ - sub_offset = le16_to_cpu(symlink_buf->SubstituteNameOffset); - sub_len = le16_to_cpu(symlink_buf->SubstituteNameLength); - if (sub_offset + 20 > plen || - sub_offset + sub_len + 20 > plen) { + offs = le16_to_cpu(sym->SubstituteNameOffset); + len = le16_to_cpu(sym->SubstituteNameLength); + if (offs + 20 > plen || offs + len + 20 > plen) { cifs_dbg(VFS, "srv returned malformed symlink buffer\n"); return -EIO; } - *target_path = cifs_strndup_from_utf16( - symlink_buf->PathBuffer + sub_offset, - sub_len, true, cifs_sb->local_nls); - if (!(*target_path)) + data->symlink_target = cifs_strndup_from_utf16(sym->PathBuffer + offs, + len, unicode, + cifs_sb->local_nls); + if (!data->symlink_target) return -ENOMEM; - convert_delimiter(*target_path, '/'); - cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path); + convert_delimiter(data->symlink_target, '/'); + cifs_dbg(FYI, "%s: target path: %s\n", __func__, data->symlink_target); return 0; } -static int -parse_reparse_point(struct reparse_data_buffer *buf, - u32 plen, char **target_path, - struct cifs_sb_info *cifs_sb) +int parse_reparse_point(struct reparse_data_buffer *buf, + u32 plen, struct cifs_sb_info *cifs_sb, + bool unicode, struct cifs_open_info_data *data) { - if (plen < sizeof(struct reparse_data_buffer)) { - cifs_dbg(VFS, "reparse buffer is too small. Must be at least 8 bytes but was %d\n", - plen); + if (plen < sizeof(*buf)) { + cifs_dbg(VFS, "%s: reparse buffer is too small. Must be at least 8 bytes but was %d\n", + __func__, plen); return -EIO; } - if (plen < le16_to_cpu(buf->ReparseDataLength) + - sizeof(struct reparse_data_buffer)) { - cifs_dbg(VFS, "srv returned invalid reparse buf length: %d\n", - plen); + if (plen < le16_to_cpu(buf->ReparseDataLength) + sizeof(*buf)) { + cifs_dbg(VFS, "%s: invalid reparse buf length: %d\n", + __func__, plen); return -EIO; } + data->reparse.buf = buf; + /* See MS-FSCC 2.1.2 */ switch (le32_to_cpu(buf->ReparseTag)) { case IO_REPARSE_TAG_NFS: - return parse_reparse_posix( - (struct reparse_posix_data *)buf, - plen, target_path, cifs_sb); + return parse_reparse_posix((struct reparse_posix_data *)buf, + cifs_sb, data); case IO_REPARSE_TAG_SYMLINK: return parse_reparse_symlink( (struct reparse_symlink_data_buffer *)buf, - plen, target_path, cifs_sb); + plen, unicode, cifs_sb, data); + case IO_REPARSE_TAG_LX_SYMLINK: + case IO_REPARSE_TAG_AF_UNIX: + case IO_REPARSE_TAG_LX_FIFO: + case IO_REPARSE_TAG_LX_CHR: + case IO_REPARSE_TAG_LX_BLK: + return 0; default: - cifs_dbg(VFS, "srv returned unknown symlink buffer tag:0x%08x\n", - le32_to_cpu(buf->ReparseTag)); + cifs_dbg(VFS, "%s: unhandled reparse tag: 0x%08x\n", + __func__, le32_to_cpu(buf->ReparseTag)); return -EOPNOTSUPP; } } -static int smb2_query_symlink(const unsigned int xid, - struct cifs_tcon *tcon, - struct cifs_sb_info *cifs_sb, - const char *full_path, - char **target_path, - struct kvec *rsp_iov) +static int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb, + struct kvec *rsp_iov, + struct cifs_open_info_data *data) { struct reparse_data_buffer *buf; struct smb2_ioctl_rsp *io = rsp_iov->iov_base; u32 plen = le32_to_cpu(io->OutputCount); - cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); - buf = (struct reparse_data_buffer *)((u8 *)io + le32_to_cpu(io->OutputOffset)); - return parse_reparse_point(buf, plen, target_path, cifs_sb); + return parse_reparse_point(buf, plen, cifs_sb, true, data); } static int smb2_query_reparse_point(const unsigned int xid, @@ -3307,6 +3311,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, struct inode *inode = file_inode(file); struct cifsInodeInfo *cifsi = CIFS_I(inode); struct cifsFileInfo *cfile = file->private_data; + unsigned long long new_size; long rc; unsigned int xid; __le64 eof; @@ -3337,10 +3342,15 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, /* * do we also need to change the size of the file? */ - if (keep_size == false && i_size_read(inode) < offset + len) { - eof = cpu_to_le64(offset + len); + new_size = offset + len; + if (keep_size == false && (unsigned long long)i_size_read(inode) < new_size) { + eof = cpu_to_le64(new_size); rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, cfile->pid, &eof); + if (rc >= 0) { + truncate_setsize(inode, new_size); + fscache_resize_cookie(cifs_inode_cookie(inode), new_size); + } } zero_range_exit: @@ -3735,6 +3745,9 @@ static long smb3_insert_range(struct file *file, struct cifs_tcon *tcon, if (rc < 0) goto out_2; + truncate_setsize(inode, old_eof + len); + fscache_resize_cookie(cifs_inode_cookie(inode), i_size_read(inode)); + rc = smb2_copychunk_range(xid, cfile, cfile, off, count, off + len); if (rc < 0) goto out_2; @@ -5064,41 +5077,24 @@ smb2_next_header(char *buf) return le32_to_cpu(hdr->NextCommand); } -static int -smb2_make_node(unsigned int xid, struct inode *inode, - struct dentry *dentry, struct cifs_tcon *tcon, - const char *full_path, umode_t mode, dev_t dev) +int cifs_sfu_make_node(unsigned int xid, struct inode *inode, + struct dentry *dentry, struct cifs_tcon *tcon, + const char *full_path, umode_t mode, dev_t dev) { - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); - int rc = -EPERM; struct cifs_open_info_data buf = {}; - struct cifs_io_parms io_parms = {0}; - __u32 oplock = 0; - struct cifs_fid fid; + struct TCP_Server_Info *server = tcon->ses->server; struct cifs_open_parms oparms; + struct cifs_io_parms io_parms = {}; + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_fid fid; unsigned int bytes_written; struct win_dev *pdev; struct kvec iov[2]; - - /* - * Check if mounted with mount parm 'sfu' mount parm. - * SFU emulation should work with all servers, but only - * supports block and char device (no socket & fifo), - * and was used by default in earlier versions of Windows - */ - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) - return rc; - - /* - * TODO: Add ability to create instead via reparse point. Windows (e.g. - * their current NFS server) uses this approach to expose special files - * over SMB2/SMB3 and Samba will do this with SMB3.1.1 POSIX Extensions - */ + __u32 oplock = server->oplocks ? REQ_OPLOCK : 0; + int rc; if (!S_ISCHR(mode) && !S_ISBLK(mode) && !S_ISFIFO(mode)) - return rc; - - cifs_dbg(FYI, "sfu compat create special file\n"); + return -EPERM; oparms = (struct cifs_open_parms) { .tcon = tcon, @@ -5111,11 +5107,7 @@ smb2_make_node(unsigned int xid, struct inode *inode, .fid = &fid, }; - if (tcon->ses->server->oplocks) - oplock = REQ_OPLOCK; - else - oplock = 0; - rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, &buf); + rc = server->ops->open(xid, &oparms, &oplock, &buf); if (rc) return rc; @@ -5123,42 +5115,56 @@ smb2_make_node(unsigned int xid, struct inode *inode, * BB Do not bother to decode buf since no local inode yet to put * timestamps in, but we can reuse it safely. */ - pdev = (struct win_dev *)&buf.fi; io_parms.pid = current->tgid; io_parms.tcon = tcon; - io_parms.offset = 0; - io_parms.length = sizeof(struct win_dev); - iov[1].iov_base = &buf.fi; - iov[1].iov_len = sizeof(struct win_dev); + io_parms.length = sizeof(*pdev); + iov[1].iov_base = pdev; + iov[1].iov_len = sizeof(*pdev); if (S_ISCHR(mode)) { memcpy(pdev->type, "IntxCHR", 8); pdev->major = cpu_to_le64(MAJOR(dev)); pdev->minor = cpu_to_le64(MINOR(dev)); - rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms, - &bytes_written, iov, 1); } else if (S_ISBLK(mode)) { memcpy(pdev->type, "IntxBLK", 8); pdev->major = cpu_to_le64(MAJOR(dev)); pdev->minor = cpu_to_le64(MINOR(dev)); - rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms, - &bytes_written, iov, 1); } else if (S_ISFIFO(mode)) { memcpy(pdev->type, "LnxFIFO", 8); - pdev->major = 0; - pdev->minor = 0; - rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms, - &bytes_written, iov, 1); } - tcon->ses->server->ops->close(xid, tcon, &fid); - d_drop(dentry); + rc = server->ops->sync_write(xid, &fid, &io_parms, + &bytes_written, iov, 1); + server->ops->close(xid, tcon, &fid); + d_drop(dentry); /* FIXME: add code here to set EAs */ - cifs_free_open_info(&buf); return rc; } +static int smb2_make_node(unsigned int xid, struct inode *inode, + struct dentry *dentry, struct cifs_tcon *tcon, + const char *full_path, umode_t mode, dev_t dev) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + + /* + * Check if mounted with mount parm 'sfu' mount parm. + * SFU emulation should work with all servers, but only + * supports block and char device (no socket & fifo), + * and was used by default in earlier versions of Windows + */ + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) + return -EPERM; + /* + * TODO: Add ability to create instead via reparse point. Windows (e.g. + * their current NFS server) uses this approach to expose special files + * over SMB2/SMB3 and Samba will do this with SMB3.1.1 POSIX Extensions + */ + return cifs_sfu_make_node(xid, inode, dentry, tcon, + full_path, mode, dev); +} + #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY struct smb_version_operations smb20_operations = { .compare_fids = smb2_compare_fids, @@ -5209,7 +5215,7 @@ struct smb_version_operations smb20_operations = { .unlink = smb2_unlink, .rename = smb2_rename_path, .create_hardlink = smb2_create_hardlink, - .query_symlink = smb2_query_symlink, + .parse_reparse_point = smb2_parse_reparse_point, .query_mf_symlink = smb3_query_mf_symlink, .create_mf_symlink = smb3_create_mf_symlink, .open = smb2_open_file, @@ -5311,7 +5317,7 @@ struct smb_version_operations smb21_operations = { .unlink = smb2_unlink, .rename = smb2_rename_path, .create_hardlink = smb2_create_hardlink, - .query_symlink = smb2_query_symlink, + .parse_reparse_point = smb2_parse_reparse_point, .query_mf_symlink = smb3_query_mf_symlink, .create_mf_symlink = smb3_create_mf_symlink, .open = smb2_open_file, @@ -5416,7 +5422,7 @@ struct smb_version_operations smb30_operations = { .unlink = smb2_unlink, .rename = smb2_rename_path, .create_hardlink = smb2_create_hardlink, - .query_symlink = smb2_query_symlink, + .parse_reparse_point = smb2_parse_reparse_point, .query_mf_symlink = smb3_query_mf_symlink, .create_mf_symlink = smb3_create_mf_symlink, .open = smb2_open_file, @@ -5530,7 +5536,7 @@ struct smb_version_operations smb311_operations = { .unlink = smb2_unlink, .rename = smb2_rename_path, .create_hardlink = smb2_create_hardlink, - .query_symlink = smb2_query_symlink, + .parse_reparse_point = smb2_parse_reparse_point, .query_mf_symlink = smb3_query_mf_symlink, .create_mf_symlink = smb3_create_mf_symlink, .open = smb2_open_file, diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 2eb29fa278c3..395e1230ddbc 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -3472,12 +3472,10 @@ __SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, } else { trace_smb3_close_done(xid, persistent_fid, tcon->tid, ses->Suid); - /* - * Note that have to subtract 4 since struct network_open_info - * has a final 4 byte pad that close response does not have - */ if (pbuf) - memcpy(pbuf, (char *)&rsp->CreationTime, sizeof(*pbuf) - 4); + memcpy(&pbuf->network_open_info, + &rsp->network_open_info, + sizeof(pbuf->network_open_info)); } atomic_dec(&tcon->num_remote_opens); diff --git a/fs/smb/client/smb2pdu.h b/fs/smb/client/smb2pdu.h index 220994d0a0f7..db08194484e0 100644 --- a/fs/smb/client/smb2pdu.h +++ b/fs/smb/client/smb2pdu.h @@ -319,13 +319,15 @@ struct smb2_file_reparse_point_info { } __packed; struct smb2_file_network_open_info { - __le64 CreationTime; - __le64 LastAccessTime; - __le64 LastWriteTime; - __le64 ChangeTime; - __le64 AllocationSize; - __le64 EndOfFile; - __le32 Attributes; + struct_group(network_open_info, + __le64 CreationTime; + __le64 LastAccessTime; + __le64 LastWriteTime; + __le64 ChangeTime; + __le64 AllocationSize; + __le64 EndOfFile; + __le32 Attributes; + ); __le32 Reserved; } __packed; /* level 34 Query also similar returned in close rsp and open rsp */ diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index 8983f45f8430..9fbaaa387dcc 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -702,13 +702,16 @@ struct smb2_close_rsp { __le16 StructureSize; /* 60 */ __le16 Flags; __le32 Reserved; - __le64 CreationTime; - __le64 LastAccessTime; - __le64 LastWriteTime; - __le64 ChangeTime; - __le64 AllocationSize; /* Beginning of FILE_STANDARD_INFO equivalent */ - __le64 EndOfFile; - __le32 Attributes; + struct_group(network_open_info, + __le64 CreationTime; + __le64 LastAccessTime; + __le64 LastWriteTime; + __le64 ChangeTime; + /* Beginning of FILE_STANDARD_INFO equivalent */ + __le64 AllocationSize; + __le64 EndOfFile; + __le32 Attributes; + ); } __packed; diff --git a/fs/smb/server/ksmbd_work.c b/fs/smb/server/ksmbd_work.c index a2ed441e837a..d7c676c151e2 100644 --- a/fs/smb/server/ksmbd_work.c +++ b/fs/smb/server/ksmbd_work.c @@ -56,6 +56,9 @@ void ksmbd_free_work_struct(struct ksmbd_work *work) kfree(work->tr_buf); kvfree(work->request_buf); kfree(work->iov); + if (!list_empty(&work->interim_entry)) + list_del(&work->interim_entry); + if (work->async_id) ksmbd_release_id(&work->conn->async_ida, work->async_id); kmem_cache_free(work_cache, work); @@ -106,7 +109,7 @@ static inline void __ksmbd_iov_pin(struct ksmbd_work *work, void *ib, static int __ksmbd_iov_pin_rsp(struct ksmbd_work *work, void *ib, int len, void *aux_buf, unsigned int aux_size) { - struct aux_read *ar; + struct aux_read *ar = NULL; int need_iov_cnt = 1; if (aux_size) { @@ -123,8 +126,11 @@ static int __ksmbd_iov_pin_rsp(struct ksmbd_work *work, void *ib, int len, new = krealloc(work->iov, sizeof(struct kvec) * work->iov_alloc_cnt, GFP_KERNEL | __GFP_ZERO); - if (!new) + if (!new) { + kfree(ar); + work->iov_alloc_cnt -= 4; return -ENOMEM; + } work->iov = new; } diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 9bc0103720f5..50c68beb71d6 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -833,7 +833,8 @@ static int smb2_lease_break_noti(struct oplock_info *opinfo) interim_entry); setup_async_work(in_work, NULL, NULL); smb2_send_interim_resp(in_work, STATUS_PENDING); - list_del(&in_work->interim_entry); + list_del_init(&in_work->interim_entry); + release_async_work(in_work); } INIT_WORK(&work->work, __smb2_lease_break_noti); ksmbd_queue_work(work); diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 658209839729..d369b98a6e10 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -657,13 +657,9 @@ smb2_get_name(const char *src, const int maxlen, struct nls_table *local_nls) int setup_async_work(struct ksmbd_work *work, void (*fn)(void **), void **arg) { - struct smb2_hdr *rsp_hdr; struct ksmbd_conn *conn = work->conn; int id; - rsp_hdr = ksmbd_resp_buf_next(work); - rsp_hdr->Flags |= SMB2_FLAGS_ASYNC_COMMAND; - id = ksmbd_acquire_async_msg_id(&conn->async_ida); if (id < 0) { pr_err("Failed to alloc async message id\n"); @@ -671,7 +667,6 @@ int setup_async_work(struct ksmbd_work *work, void (*fn)(void **), void **arg) } work->asynchronous = true; work->async_id = id; - rsp_hdr->Id.AsyncId = cpu_to_le64(id); ksmbd_debug(SMB, "Send interim Response to inform async request id : %d\n", @@ -723,6 +718,8 @@ void smb2_send_interim_resp(struct ksmbd_work *work, __le32 status) __SMB2_HEADER_STRUCTURE_SIZE); rsp_hdr = smb2_get_msg(in_work->response_buf); + rsp_hdr->Flags |= SMB2_FLAGS_ASYNC_COMMAND; + rsp_hdr->Id.AsyncId = cpu_to_le64(work->async_id); smb2_set_err_rsp(in_work); rsp_hdr->Status = status; @@ -2380,7 +2377,8 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len, rc = 0; } else { rc = ksmbd_vfs_setxattr(idmap, path, attr_name, value, - le16_to_cpu(eabuf->EaValueLength), 0); + le16_to_cpu(eabuf->EaValueLength), + 0, true); if (rc < 0) { ksmbd_debug(SMB, "ksmbd_vfs_setxattr is failed(%d)\n", @@ -2443,7 +2441,7 @@ static noinline int smb2_set_stream_name_xattr(const struct path *path, return -EBADF; } - rc = ksmbd_vfs_setxattr(idmap, path, xattr_stream_name, NULL, 0, 0); + rc = ksmbd_vfs_setxattr(idmap, path, xattr_stream_name, NULL, 0, 0, false); if (rc < 0) pr_err("Failed to store XATTR stream name :%d\n", rc); return 0; @@ -2518,7 +2516,7 @@ static void smb2_new_xattrs(struct ksmbd_tree_connect *tcon, const struct path * da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME | XATTR_DOSINFO_ITIME; - rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_idmap(path->mnt), path, &da); + rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_idmap(path->mnt), path, &da, false); if (rc) ksmbd_debug(SMB, "failed to store file attribute into xattr\n"); } @@ -2608,7 +2606,7 @@ static int smb2_create_sd_buffer(struct ksmbd_work *work, sizeof(struct create_sd_buf_req)) return -EINVAL; return set_info_sec(work->conn, work->tcon, path, &sd_buf->ntsd, - le32_to_cpu(sd_buf->ccontext.DataLength), true); + le32_to_cpu(sd_buf->ccontext.DataLength), true, false); } static void ksmbd_acls_fattr(struct smb_fattr *fattr, @@ -2690,7 +2688,7 @@ int smb2_open(struct ksmbd_work *work) *(char *)req->Buffer == '\\') { pr_err("not allow directory name included leading slash\n"); rc = -EINVAL; - goto err_out1; + goto err_out2; } name = smb2_get_name(req->Buffer, @@ -2701,7 +2699,7 @@ int smb2_open(struct ksmbd_work *work) if (rc != -ENOMEM) rc = -ENOENT; name = NULL; - goto err_out1; + goto err_out2; } ksmbd_debug(SMB, "converted name = %s\n", name); @@ -2709,28 +2707,28 @@ int smb2_open(struct ksmbd_work *work) if (!test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_STREAMS)) { rc = -EBADF; - goto err_out1; + goto err_out2; } rc = parse_stream_name(name, &stream_name, &s_type); if (rc < 0) - goto err_out1; + goto err_out2; } rc = ksmbd_validate_filename(name); if (rc < 0) - goto err_out1; + goto err_out2; if (ksmbd_share_veto_filename(share, name)) { rc = -ENOENT; ksmbd_debug(SMB, "Reject open(), vetoed file: %s\n", name); - goto err_out1; + goto err_out2; } } else { name = kstrdup("", GFP_KERNEL); if (!name) { rc = -ENOMEM; - goto err_out1; + goto err_out2; } } @@ -2743,14 +2741,14 @@ int smb2_open(struct ksmbd_work *work) le32_to_cpu(req->ImpersonationLevel)); rc = -EIO; rsp->hdr.Status = STATUS_BAD_IMPERSONATION_LEVEL; - goto err_out1; + goto err_out2; } if (req->CreateOptions && !(req->CreateOptions & CREATE_OPTIONS_MASK_LE)) { pr_err("Invalid create options : 0x%x\n", le32_to_cpu(req->CreateOptions)); rc = -EINVAL; - goto err_out1; + goto err_out2; } else { if (req->CreateOptions & FILE_SEQUENTIAL_ONLY_LE && req->CreateOptions & FILE_RANDOM_ACCESS_LE) @@ -2760,13 +2758,13 @@ int smb2_open(struct ksmbd_work *work) (FILE_OPEN_BY_FILE_ID_LE | CREATE_TREE_CONNECTION | FILE_RESERVE_OPFILTER_LE)) { rc = -EOPNOTSUPP; - goto err_out1; + goto err_out2; } if (req->CreateOptions & FILE_DIRECTORY_FILE_LE) { if (req->CreateOptions & FILE_NON_DIRECTORY_FILE_LE) { rc = -EINVAL; - goto err_out1; + goto err_out2; } else if (req->CreateOptions & FILE_NO_COMPRESSION_LE) { req->CreateOptions = ~(FILE_NO_COMPRESSION_LE); } @@ -2778,21 +2776,21 @@ int smb2_open(struct ksmbd_work *work) pr_err("Invalid create disposition : 0x%x\n", le32_to_cpu(req->CreateDisposition)); rc = -EINVAL; - goto err_out1; + goto err_out2; } if (!(req->DesiredAccess & DESIRED_ACCESS_MASK)) { pr_err("Invalid desired access : 0x%x\n", le32_to_cpu(req->DesiredAccess)); rc = -EACCES; - goto err_out1; + goto err_out2; } if (req->FileAttributes && !(req->FileAttributes & FILE_ATTRIBUTE_MASK_LE)) { pr_err("Invalid file attribute : 0x%x\n", le32_to_cpu(req->FileAttributes)); rc = -EINVAL; - goto err_out1; + goto err_out2; } if (req->CreateContextsOffset) { @@ -2800,19 +2798,19 @@ int smb2_open(struct ksmbd_work *work) context = smb2_find_context_vals(req, SMB2_CREATE_EA_BUFFER, 4); if (IS_ERR(context)) { rc = PTR_ERR(context); - goto err_out1; + goto err_out2; } else if (context) { ea_buf = (struct create_ea_buf_req *)context; if (le16_to_cpu(context->DataOffset) + le32_to_cpu(context->DataLength) < sizeof(struct create_ea_buf_req)) { rc = -EINVAL; - goto err_out1; + goto err_out2; } if (req->CreateOptions & FILE_NO_EA_KNOWLEDGE_LE) { rsp->hdr.Status = STATUS_ACCESS_DENIED; rc = -EACCES; - goto err_out1; + goto err_out2; } } @@ -2820,7 +2818,7 @@ int smb2_open(struct ksmbd_work *work) SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST, 4); if (IS_ERR(context)) { rc = PTR_ERR(context); - goto err_out1; + goto err_out2; } else if (context) { ksmbd_debug(SMB, "get query maximal access context\n"); @@ -2831,11 +2829,11 @@ int smb2_open(struct ksmbd_work *work) SMB2_CREATE_TIMEWARP_REQUEST, 4); if (IS_ERR(context)) { rc = PTR_ERR(context); - goto err_out1; + goto err_out2; } else if (context) { ksmbd_debug(SMB, "get timewarp context\n"); rc = -EBADF; - goto err_out1; + goto err_out2; } if (tcon->posix_extensions) { @@ -2843,7 +2841,7 @@ int smb2_open(struct ksmbd_work *work) SMB2_CREATE_TAG_POSIX, 16); if (IS_ERR(context)) { rc = PTR_ERR(context); - goto err_out1; + goto err_out2; } else if (context) { struct create_posix *posix = (struct create_posix *)context; @@ -2851,7 +2849,7 @@ int smb2_open(struct ksmbd_work *work) le32_to_cpu(context->DataLength) < sizeof(struct create_posix) - 4) { rc = -EINVAL; - goto err_out1; + goto err_out2; } ksmbd_debug(SMB, "get posix context\n"); @@ -2863,7 +2861,7 @@ int smb2_open(struct ksmbd_work *work) if (ksmbd_override_fsids(work)) { rc = -ENOMEM; - goto err_out1; + goto err_out2; } rc = ksmbd_vfs_kern_path_locked(work, name, LOOKUP_NO_SYMLINKS, @@ -3038,7 +3036,7 @@ int smb2_open(struct ksmbd_work *work) } } - rc = ksmbd_query_inode_status(d_inode(path.dentry->d_parent)); + rc = ksmbd_query_inode_status(path.dentry->d_parent); if (rc == KSMBD_INODE_STATUS_PENDING_DELETE) { rc = -EBUSY; goto err_out; @@ -3152,7 +3150,8 @@ int smb2_open(struct ksmbd_work *work) idmap, &path, pntsd, - pntsd_size); + pntsd_size, + false); kfree(pntsd); if (rc) pr_err("failed to store ntacl in xattr : %d\n", @@ -3175,11 +3174,6 @@ int smb2_open(struct ksmbd_work *work) fp->attrib_only = !(req->DesiredAccess & ~(FILE_READ_ATTRIBUTES_LE | FILE_WRITE_ATTRIBUTES_LE | FILE_SYNCHRONIZE_LE)); - if (!S_ISDIR(file_inode(filp)->i_mode) && open_flags & O_TRUNC && - !fp->attrib_only && !stream_name) { - smb_break_all_oplock(work, fp); - need_truncate = 1; - } /* fp should be searchable through ksmbd_inode.m_fp_list * after daccess, saccess, attrib_only, and stream are @@ -3195,13 +3189,39 @@ int smb2_open(struct ksmbd_work *work) goto err_out; } + rc = ksmbd_vfs_getattr(&path, &stat); + if (rc) + goto err_out; + + if (stat.result_mask & STATX_BTIME) + fp->create_time = ksmbd_UnixTimeToNT(stat.btime); + else + fp->create_time = ksmbd_UnixTimeToNT(stat.ctime); + if (req->FileAttributes || fp->f_ci->m_fattr == 0) + fp->f_ci->m_fattr = + cpu_to_le32(smb2_get_dos_mode(&stat, le32_to_cpu(req->FileAttributes))); + + if (!created) + smb2_update_xattrs(tcon, &path, fp); + else + smb2_new_xattrs(tcon, &path, fp); + + if (file_present || created) + ksmbd_vfs_kern_path_unlock(&parent_path, &path); + + if (!S_ISDIR(file_inode(filp)->i_mode) && open_flags & O_TRUNC && + !fp->attrib_only && !stream_name) { + smb_break_all_oplock(work, fp); + need_truncate = 1; + } + share_ret = ksmbd_smb_check_shared_mode(fp->filp, fp); if (!test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_OPLOCKS) || (req_op_level == SMB2_OPLOCK_LEVEL_LEASE && !(conn->vals->capabilities & SMB2_GLOBAL_CAP_LEASING))) { if (share_ret < 0 && !S_ISDIR(file_inode(fp->filp)->i_mode)) { rc = share_ret; - goto err_out; + goto err_out1; } } else { if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) { @@ -3211,7 +3231,7 @@ int smb2_open(struct ksmbd_work *work) name, req_op_level, lc->req_state); rc = find_same_lease_key(sess, fp->f_ci, lc); if (rc) - goto err_out; + goto err_out1; } else if (open_flags == O_RDONLY && (req_op_level == SMB2_OPLOCK_LEVEL_BATCH || req_op_level == SMB2_OPLOCK_LEVEL_EXCLUSIVE)) @@ -3222,16 +3242,16 @@ int smb2_open(struct ksmbd_work *work) le32_to_cpu(req->hdr.Id.SyncId.TreeId), lc, share_ret); if (rc < 0) - goto err_out; + goto err_out1; } if (req->CreateOptions & FILE_DELETE_ON_CLOSE_LE) ksmbd_fd_set_delete_on_close(fp, file_info); if (need_truncate) { - rc = smb2_create_truncate(&path); + rc = smb2_create_truncate(&fp->filp->f_path); if (rc) - goto err_out; + goto err_out1; } if (req->CreateContextsOffset) { @@ -3241,7 +3261,7 @@ int smb2_open(struct ksmbd_work *work) SMB2_CREATE_ALLOCATION_SIZE, 4); if (IS_ERR(az_req)) { rc = PTR_ERR(az_req); - goto err_out; + goto err_out1; } else if (az_req) { loff_t alloc_size; int err; @@ -3250,7 +3270,7 @@ int smb2_open(struct ksmbd_work *work) le32_to_cpu(az_req->ccontext.DataLength) < sizeof(struct create_alloc_size_req)) { rc = -EINVAL; - goto err_out; + goto err_out1; } alloc_size = le64_to_cpu(az_req->AllocationSize); ksmbd_debug(SMB, @@ -3268,30 +3288,13 @@ int smb2_open(struct ksmbd_work *work) context = smb2_find_context_vals(req, SMB2_CREATE_QUERY_ON_DISK_ID, 4); if (IS_ERR(context)) { rc = PTR_ERR(context); - goto err_out; + goto err_out1; } else if (context) { ksmbd_debug(SMB, "get query on disk id context\n"); query_disk_id = 1; } } - rc = ksmbd_vfs_getattr(&path, &stat); - if (rc) - goto err_out; - - if (stat.result_mask & STATX_BTIME) - fp->create_time = ksmbd_UnixTimeToNT(stat.btime); - else - fp->create_time = ksmbd_UnixTimeToNT(stat.ctime); - if (req->FileAttributes || fp->f_ci->m_fattr == 0) - fp->f_ci->m_fattr = - cpu_to_le32(smb2_get_dos_mode(&stat, le32_to_cpu(req->FileAttributes))); - - if (!created) - smb2_update_xattrs(tcon, &path, fp); - else - smb2_new_xattrs(tcon, &path, fp); - memcpy(fp->client_guid, conn->ClientGUID, SMB2_CLIENT_GUID_SIZE); rsp->StructureSize = cpu_to_le16(89); @@ -3398,13 +3401,13 @@ int smb2_open(struct ksmbd_work *work) } err_out: - if (file_present || created) { - inode_unlock(d_inode(parent_path.dentry)); - path_put(&path); - path_put(&parent_path); - } - ksmbd_revert_fsids(work); + if (rc && (file_present || created)) + ksmbd_vfs_kern_path_unlock(&parent_path, &path); + err_out1: + ksmbd_revert_fsids(work); + +err_out2: if (!rc) { ksmbd_update_fstate(&work->sess->file_table, fp, FP_INITED); rc = ksmbd_iov_pin_rsp(work, (void *)rsp, iov_len); @@ -5537,7 +5540,7 @@ static int smb2_rename(struct ksmbd_work *work, rc = ksmbd_vfs_setxattr(file_mnt_idmap(fp->filp), &fp->filp->f_path, xattr_stream_name, - NULL, 0, 0); + NULL, 0, 0, true); if (rc < 0) { pr_err("failed to store stream name in xattr: %d\n", rc); @@ -5630,11 +5633,9 @@ static int smb2_create_link(struct ksmbd_work *work, if (rc) rc = -EINVAL; out: - if (file_present) { - inode_unlock(d_inode(parent_path.dentry)); - path_put(&path); - path_put(&parent_path); - } + if (file_present) + ksmbd_vfs_kern_path_unlock(&parent_path, &path); + if (!IS_ERR(link_name)) kfree(link_name); kfree(pathname); @@ -5701,7 +5702,8 @@ static int set_file_basic_info(struct ksmbd_file *fp, da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME | XATTR_DOSINFO_ITIME; - rc = ksmbd_vfs_set_dos_attrib_xattr(idmap, &filp->f_path, &da); + rc = ksmbd_vfs_set_dos_attrib_xattr(idmap, &filp->f_path, &da, + true); if (rc) ksmbd_debug(SMB, "failed to restore file attribute in EA\n"); @@ -6013,7 +6015,7 @@ static int smb2_set_info_sec(struct ksmbd_file *fp, int addition_info, fp->saccess |= FILE_SHARE_DELETE_LE; return set_info_sec(fp->conn, fp->tcon, &fp->filp->f_path, pntsd, - buf_len, false); + buf_len, false, true); } /** @@ -7582,7 +7584,8 @@ static inline int fsctl_set_sparse(struct ksmbd_work *work, u64 id, da.attr = le32_to_cpu(fp->f_ci->m_fattr); ret = ksmbd_vfs_set_dos_attrib_xattr(idmap, - &fp->filp->f_path, &da); + &fp->filp->f_path, + &da, true); if (ret) fp->f_ci->m_fattr = old_fattr; } @@ -8231,7 +8234,6 @@ static void smb21_lease_break_ack(struct ksmbd_work *work) return; err_out: - opinfo->op_state = OPLOCK_STATE_NONE; wake_up_interruptible_all(&opinfo->oplock_q); atomic_dec(&opinfo->breaking_cnt); wake_up_interruptible_all(&opinfo->oplock_brk); diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index 51b8bfab7481..1164365533f0 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c @@ -1185,7 +1185,7 @@ pass: pntsd_size += sizeof(struct smb_acl) + nt_size; } - ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, pntsd_size); + ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, pntsd_size, false); kfree(pntsd); } @@ -1377,7 +1377,7 @@ err_out: int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon, const struct path *path, struct smb_ntsd *pntsd, int ntsd_len, - bool type_check) + bool type_check, bool get_write) { int rc; struct smb_fattr fattr = {{0}}; @@ -1437,7 +1437,8 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon, if (test_share_config_flag(tcon->share_conf, KSMBD_SHARE_FLAG_ACL_XATTR)) { /* Update WinACL in xattr */ ksmbd_vfs_remove_sd_xattrs(idmap, path); - ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, ntsd_len); + ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, ntsd_len, + get_write); } out: diff --git a/fs/smb/server/smbacl.h b/fs/smb/server/smbacl.h index 49a8c292bd2e..2b52861707d8 100644 --- a/fs/smb/server/smbacl.h +++ b/fs/smb/server/smbacl.h @@ -207,7 +207,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path, __le32 *pdaccess, int uid); int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon, const struct path *path, struct smb_ntsd *pntsd, int ntsd_len, - bool type_check); + bool type_check, bool get_write); void id_to_sid(unsigned int cid, uint sidtype, struct smb_sid *ssid); void ksmbd_init_domain(u32 *sub_auth); diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index c53dea5598fc..9091dcd7a310 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -97,6 +97,13 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf, return -ENOENT; } + err = mnt_want_write(parent_path->mnt); + if (err) { + path_put(parent_path); + putname(filename); + return -ENOENT; + } + inode_lock_nested(parent_path->dentry->d_inode, I_MUTEX_PARENT); d = lookup_one_qstr_excl(&last, parent_path->dentry, 0); if (IS_ERR(d)) @@ -123,6 +130,7 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf, err_out: inode_unlock(d_inode(parent_path->dentry)); + mnt_drop_write(parent_path->mnt); path_put(parent_path); putname(filename); return -ENOENT; @@ -451,7 +459,8 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos, fp->stream.name, (void *)stream_buf, size, - 0); + 0, + true); if (err < 0) goto out; @@ -593,10 +602,6 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, const struct path *path) goto out_err; } - err = mnt_want_write(path->mnt); - if (err) - goto out_err; - idmap = mnt_idmap(path->mnt); if (S_ISDIR(d_inode(path->dentry)->i_mode)) { err = vfs_rmdir(idmap, d_inode(parent), path->dentry); @@ -607,7 +612,6 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, const struct path *path) if (err) ksmbd_debug(VFS, "unlink failed, err %d\n", err); } - mnt_drop_write(path->mnt); out_err: ksmbd_revert_fsids(work); @@ -715,7 +719,7 @@ retry: goto out3; } - parent_fp = ksmbd_lookup_fd_inode(d_inode(old_child->d_parent)); + parent_fp = ksmbd_lookup_fd_inode(old_child->d_parent); if (parent_fp) { if (parent_fp->daccess & FILE_DELETE_LE) { pr_err("parent dir is opened with delete access\n"); @@ -907,18 +911,22 @@ ssize_t ksmbd_vfs_getxattr(struct mnt_idmap *idmap, * @attr_value: xattr value to set * @attr_size: size of xattr value * @flags: destination buffer length + * @get_write: get write access to a mount * * Return: 0 on success, otherwise error */ int ksmbd_vfs_setxattr(struct mnt_idmap *idmap, const struct path *path, const char *attr_name, - void *attr_value, size_t attr_size, int flags) + void *attr_value, size_t attr_size, int flags, + bool get_write) { int err; - err = mnt_want_write(path->mnt); - if (err) - return err; + if (get_write == true) { + err = mnt_want_write(path->mnt); + if (err) + return err; + } err = vfs_setxattr(idmap, path->dentry, @@ -928,7 +936,8 @@ int ksmbd_vfs_setxattr(struct mnt_idmap *idmap, flags); if (err) ksmbd_debug(VFS, "setxattr failed, err %d\n", err); - mnt_drop_write(path->mnt); + if (get_write == true) + mnt_drop_write(path->mnt); return err; } @@ -1252,6 +1261,13 @@ out1: } if (!err) { + err = mnt_want_write(parent_path->mnt); + if (err) { + path_put(path); + path_put(parent_path); + return err; + } + err = ksmbd_vfs_lock_parent(parent_path->dentry, path->dentry); if (err) { path_put(path); @@ -1261,6 +1277,14 @@ out1: return err; } +void ksmbd_vfs_kern_path_unlock(struct path *parent_path, struct path *path) +{ + inode_unlock(d_inode(parent_path->dentry)); + mnt_drop_write(parent_path->mnt); + path_put(path); + path_put(parent_path); +} + struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work, const char *name, unsigned int flags, @@ -1415,7 +1439,8 @@ out: int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn, struct mnt_idmap *idmap, const struct path *path, - struct smb_ntsd *pntsd, int len) + struct smb_ntsd *pntsd, int len, + bool get_write) { int rc; struct ndr sd_ndr = {0}, acl_ndr = {0}; @@ -1475,7 +1500,7 @@ int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn, rc = ksmbd_vfs_setxattr(idmap, path, XATTR_NAME_SD, sd_ndr.data, - sd_ndr.offset, 0); + sd_ndr.offset, 0, get_write); if (rc < 0) pr_err("Failed to store XATTR ntacl :%d\n", rc); @@ -1564,7 +1589,8 @@ free_n_data: int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap, const struct path *path, - struct xattr_dos_attrib *da) + struct xattr_dos_attrib *da, + bool get_write) { struct ndr n; int err; @@ -1574,7 +1600,7 @@ int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap, return err; err = ksmbd_vfs_setxattr(idmap, path, XATTR_NAME_DOS_ATTRIBUTE, - (void *)n.data, n.offset, 0); + (void *)n.data, n.offset, 0, get_write); if (err) ksmbd_debug(SMB, "failed to store dos attribute in xattr\n"); kfree(n.data); @@ -1846,10 +1872,6 @@ int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap, } posix_state_to_acl(&acl_state, acls->a_entries); - rc = mnt_want_write(path->mnt); - if (rc) - goto out_err; - rc = set_posix_acl(idmap, dentry, ACL_TYPE_ACCESS, acls); if (rc < 0) ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n", @@ -1861,9 +1883,7 @@ int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap, ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n", rc); } - mnt_drop_write(path->mnt); -out_err: free_acl_state(&acl_state); posix_acl_release(acls); return rc; @@ -1893,10 +1913,6 @@ int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap, } } - rc = mnt_want_write(path->mnt); - if (rc) - goto out_err; - rc = set_posix_acl(idmap, dentry, ACL_TYPE_ACCESS, acls); if (rc < 0) ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n", @@ -1908,9 +1924,7 @@ int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap, ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n", rc); } - mnt_drop_write(path->mnt); -out_err: posix_acl_release(acls); return rc; } diff --git a/fs/smb/server/vfs.h b/fs/smb/server/vfs.h index 00968081856e..cfe1c8092f23 100644 --- a/fs/smb/server/vfs.h +++ b/fs/smb/server/vfs.h @@ -109,7 +109,8 @@ ssize_t ksmbd_vfs_casexattr_len(struct mnt_idmap *idmap, int attr_name_len); int ksmbd_vfs_setxattr(struct mnt_idmap *idmap, const struct path *path, const char *attr_name, - void *attr_value, size_t attr_size, int flags); + void *attr_value, size_t attr_size, int flags, + bool get_write); int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name, size_t *xattr_stream_name_size, int s_type); int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap, @@ -117,6 +118,7 @@ int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap, int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, unsigned int flags, struct path *parent_path, struct path *path, bool caseless); +void ksmbd_vfs_kern_path_unlock(struct path *parent_path, struct path *path); struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work, const char *name, unsigned int flags, @@ -144,14 +146,16 @@ int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap, const struct path *path) int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn, struct mnt_idmap *idmap, const struct path *path, - struct smb_ntsd *pntsd, int len); + struct smb_ntsd *pntsd, int len, + bool get_write); int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn, struct mnt_idmap *idmap, struct dentry *dentry, struct smb_ntsd **pntsd); int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap, const struct path *path, - struct xattr_dos_attrib *da); + struct xattr_dos_attrib *da, + bool get_write); int ksmbd_vfs_get_dos_attrib_xattr(struct mnt_idmap *idmap, struct dentry *dentry, struct xattr_dos_attrib *da); diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index c91eac6514dd..ddf233994ddb 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -66,14 +66,14 @@ static unsigned long inode_hash(struct super_block *sb, unsigned long hashval) return tmp & inode_hash_mask; } -static struct ksmbd_inode *__ksmbd_inode_lookup(struct inode *inode) +static struct ksmbd_inode *__ksmbd_inode_lookup(struct dentry *de) { struct hlist_head *head = inode_hashtable + - inode_hash(inode->i_sb, inode->i_ino); + inode_hash(d_inode(de)->i_sb, (unsigned long)de); struct ksmbd_inode *ci = NULL, *ret_ci = NULL; hlist_for_each_entry(ci, head, m_hash) { - if (ci->m_inode == inode) { + if (ci->m_de == de) { if (atomic_inc_not_zero(&ci->m_count)) ret_ci = ci; break; @@ -84,26 +84,16 @@ static struct ksmbd_inode *__ksmbd_inode_lookup(struct inode *inode) static struct ksmbd_inode *ksmbd_inode_lookup(struct ksmbd_file *fp) { - return __ksmbd_inode_lookup(file_inode(fp->filp)); + return __ksmbd_inode_lookup(fp->filp->f_path.dentry); } -static struct ksmbd_inode *ksmbd_inode_lookup_by_vfsinode(struct inode *inode) -{ - struct ksmbd_inode *ci; - - read_lock(&inode_hash_lock); - ci = __ksmbd_inode_lookup(inode); - read_unlock(&inode_hash_lock); - return ci; -} - -int ksmbd_query_inode_status(struct inode *inode) +int ksmbd_query_inode_status(struct dentry *dentry) { struct ksmbd_inode *ci; int ret = KSMBD_INODE_STATUS_UNKNOWN; read_lock(&inode_hash_lock); - ci = __ksmbd_inode_lookup(inode); + ci = __ksmbd_inode_lookup(dentry); if (ci) { ret = KSMBD_INODE_STATUS_OK; if (ci->m_flags & (S_DEL_PENDING | S_DEL_ON_CLS)) @@ -143,7 +133,7 @@ void ksmbd_fd_set_delete_on_close(struct ksmbd_file *fp, static void ksmbd_inode_hash(struct ksmbd_inode *ci) { struct hlist_head *b = inode_hashtable + - inode_hash(ci->m_inode->i_sb, ci->m_inode->i_ino); + inode_hash(d_inode(ci->m_de)->i_sb, (unsigned long)ci->m_de); hlist_add_head(&ci->m_hash, b); } @@ -157,7 +147,6 @@ static void ksmbd_inode_unhash(struct ksmbd_inode *ci) static int ksmbd_inode_init(struct ksmbd_inode *ci, struct ksmbd_file *fp) { - ci->m_inode = file_inode(fp->filp); atomic_set(&ci->m_count, 1); atomic_set(&ci->op_count, 0); atomic_set(&ci->sop_count, 0); @@ -166,6 +155,7 @@ static int ksmbd_inode_init(struct ksmbd_inode *ci, struct ksmbd_file *fp) INIT_LIST_HEAD(&ci->m_fp_list); INIT_LIST_HEAD(&ci->m_op_list); rwlock_init(&ci->m_lock); + ci->m_de = fp->filp->f_path.dentry; return 0; } @@ -488,12 +478,15 @@ struct ksmbd_file *ksmbd_lookup_fd_cguid(char *cguid) return fp; } -struct ksmbd_file *ksmbd_lookup_fd_inode(struct inode *inode) +struct ksmbd_file *ksmbd_lookup_fd_inode(struct dentry *dentry) { struct ksmbd_file *lfp; struct ksmbd_inode *ci; + struct inode *inode = d_inode(dentry); - ci = ksmbd_inode_lookup_by_vfsinode(inode); + read_lock(&inode_hash_lock); + ci = __ksmbd_inode_lookup(dentry); + read_unlock(&inode_hash_lock); if (!ci) return NULL; diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h index 03d0bf941216..8325cf4527c4 100644 --- a/fs/smb/server/vfs_cache.h +++ b/fs/smb/server/vfs_cache.h @@ -51,7 +51,7 @@ struct ksmbd_inode { atomic_t op_count; /* opinfo count for streams */ atomic_t sop_count; - struct inode *m_inode; + struct dentry *m_de; unsigned int m_flags; struct hlist_node m_hash; struct list_head m_fp_list; @@ -140,7 +140,7 @@ struct ksmbd_file *ksmbd_lookup_fd_slow(struct ksmbd_work *work, u64 id, void ksmbd_fd_put(struct ksmbd_work *work, struct ksmbd_file *fp); struct ksmbd_file *ksmbd_lookup_durable_fd(unsigned long long id); struct ksmbd_file *ksmbd_lookup_fd_cguid(char *cguid); -struct ksmbd_file *ksmbd_lookup_fd_inode(struct inode *inode); +struct ksmbd_file *ksmbd_lookup_fd_inode(struct dentry *dentry); unsigned int ksmbd_open_durable_fd(struct ksmbd_file *fp); struct ksmbd_file *ksmbd_open_fd(struct ksmbd_work *work, struct file *filp); void ksmbd_close_tree_conn_fds(struct ksmbd_work *work); @@ -164,7 +164,7 @@ enum KSMBD_INODE_STATUS { KSMBD_INODE_STATUS_PENDING_DELETE, }; -int ksmbd_query_inode_status(struct inode *inode); +int ksmbd_query_inode_status(struct dentry *dentry); bool ksmbd_inode_pending_delete(struct ksmbd_file *fp); void ksmbd_set_inode_pending_delete(struct ksmbd_file *fp); void ksmbd_clear_inode_pending_delete(struct ksmbd_file *fp); diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index f8a594a50ae6..0b90869fd805 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -27,16 +27,16 @@ /* * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access * to the ei->dentry must be done under this mutex and after checking - * if ei->is_freed is not set. The ei->dentry is released under the - * mutex at the same time ei->is_freed is set. If ei->is_freed is set - * then the ei->dentry is invalid. + * if ei->is_freed is not set. When ei->is_freed is set, the dentry + * is on its way to being freed after the last dput() is made on it. */ static DEFINE_MUTEX(eventfs_mutex); /* * The eventfs_inode (ei) itself is protected by SRCU. It is released from * its parent's list and will have is_freed set (under eventfs_mutex). - * After the SRCU grace period is over, the ei may be freed. + * After the SRCU grace period is over and the last dput() is called + * the ei is freed. */ DEFINE_STATIC_SRCU(eventfs_srcu); @@ -95,7 +95,7 @@ static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry, if (!(dentry->d_inode->i_mode & S_IFDIR)) { if (!ei->entry_attrs) { ei->entry_attrs = kzalloc(sizeof(*ei->entry_attrs) * ei->nr_entries, - GFP_KERNEL); + GFP_NOFS); if (!ei->entry_attrs) { ret = -ENOMEM; goto out; @@ -326,7 +326,8 @@ create_file_dentry(struct eventfs_inode *ei, int idx, struct eventfs_attr *attr = NULL; struct dentry **e_dentry = &ei->d_children[idx]; struct dentry *dentry; - bool invalidate = false; + + WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); mutex_lock(&eventfs_mutex); if (ei->is_freed) { @@ -348,15 +349,8 @@ create_file_dentry(struct eventfs_inode *ei, int idx, mutex_unlock(&eventfs_mutex); - /* The lookup already has the parent->d_inode locked */ - if (!lookup) - inode_lock(parent->d_inode); - dentry = create_file(name, mode, attr, parent, data, fops); - if (!lookup) - inode_unlock(parent->d_inode); - mutex_lock(&eventfs_mutex); if (IS_ERR_OR_NULL(dentry)) { @@ -365,12 +359,14 @@ create_file_dentry(struct eventfs_inode *ei, int idx, * created the dentry for this e_dentry. In which case * use that one. * - * Note, with the mutex held, the e_dentry cannot have content - * and the ei->is_freed be true at the same time. + * If ei->is_freed is set, the e_dentry is currently on its + * way to being freed, don't return it. If e_dentry is NULL + * it means it was already freed. */ - dentry = *e_dentry; - if (WARN_ON_ONCE(dentry && ei->is_freed)) + if (ei->is_freed) dentry = NULL; + else + dentry = *e_dentry; /* The lookup does not need to up the dentry refcount */ if (dentry && !lookup) dget(dentry); @@ -387,17 +383,14 @@ create_file_dentry(struct eventfs_inode *ei, int idx, * Otherwise it means two dentries exist with the same name. */ WARN_ON_ONCE(!ei->is_freed); - invalidate = true; + dentry = NULL; } mutex_unlock(&eventfs_mutex); - if (invalidate) - d_invalidate(dentry); - - if (lookup || invalidate) + if (lookup) dput(dentry); - return invalidate ? NULL : dentry; + return dentry; } /** @@ -437,9 +430,10 @@ static struct dentry * create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, struct dentry *parent, bool lookup) { - bool invalidate = false; struct dentry *dentry = NULL; + WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); + mutex_lock(&eventfs_mutex); if (pei->is_freed || ei->is_freed) { mutex_unlock(&eventfs_mutex); @@ -456,15 +450,8 @@ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, } mutex_unlock(&eventfs_mutex); - /* The lookup already has the parent->d_inode locked */ - if (!lookup) - inode_lock(parent->d_inode); - dentry = create_dir(ei, parent); - if (!lookup) - inode_unlock(parent->d_inode); - mutex_lock(&eventfs_mutex); if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) { @@ -473,8 +460,8 @@ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, * created the dentry for this e_dentry. In which case * use that one. * - * Note, with the mutex held, the e_dentry cannot have content - * and the ei->is_freed be true at the same time. + * If ei->is_freed is set, the e_dentry is currently on its + * way to being freed. */ dentry = ei->dentry; if (dentry && !lookup) @@ -493,16 +480,14 @@ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, * Otherwise it means two dentries exist with the same name. */ WARN_ON_ONCE(!ei->is_freed); - invalidate = true; + dentry = NULL; } mutex_unlock(&eventfs_mutex); - if (invalidate) - d_invalidate(dentry); - if (lookup || invalidate) + if (lookup) dput(dentry); - return invalidate ? NULL : dentry; + return dentry; } /** @@ -632,7 +617,7 @@ static int add_dentries(struct dentry ***dentries, struct dentry *d, int cnt) { struct dentry **tmp; - tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_KERNEL); + tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_NOFS); if (!tmp) return -1; tmp[cnt] = d; @@ -698,6 +683,7 @@ static int dcache_dir_open_wrapper(struct inode *inode, struct file *file) return -ENOMEM; } + inode_lock(parent->d_inode); list_for_each_entry_srcu(ei_child, &ei->children, list, srcu_read_lock_held(&eventfs_srcu)) { d = create_dir_dentry(ei, ei_child, parent, false); @@ -730,6 +716,7 @@ static int dcache_dir_open_wrapper(struct inode *inode, struct file *file) cnt++; } } + inode_unlock(parent->d_inode); srcu_read_unlock(&eventfs_srcu, idx); ret = dcache_dir_open(inode, file); diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 5b54948514fe..ae648deed019 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -509,20 +509,15 @@ struct dentry *eventfs_start_creating(const char *name, struct dentry *parent) struct dentry *dentry; int error; + /* Must always have a parent. */ + if (WARN_ON_ONCE(!parent)) + return ERR_PTR(-EINVAL); + error = simple_pin_fs(&trace_fs_type, &tracefs_mount, &tracefs_mount_count); if (error) return ERR_PTR(error); - /* - * If the parent is not specified, we create it in the root. - * We need the root dentry to do this, which is in the super - * block. A pointer to that is in the struct vfsmount that we - * have around. - */ - if (!parent) - parent = tracefs_mount->mnt_root; - if (unlikely(IS_DEADDIR(parent->d_inode))) dentry = ERR_PTR(-ENOENT); else diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index ac6ba646624d..a013b87ab8d5 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -562,7 +562,8 @@ xfs_dquot_from_disk( struct xfs_dquot *dqp, struct xfs_buf *bp) { - struct xfs_disk_dquot *ddqp = bp->b_addr + dqp->q_bufoffset; + struct xfs_dqblk *dqb = xfs_buf_offset(bp, dqp->q_bufoffset); + struct xfs_disk_dquot *ddqp = &dqb->dd_diskdq; /* * Ensure that we got the type and ID we were looking for. @@ -1250,7 +1251,7 @@ xfs_qm_dqflush( } /* Flush the incore dquot to the ondisk buffer. */ - dqblk = bp->b_addr + dqp->q_bufoffset; + dqblk = xfs_buf_offset(bp, dqp->q_bufoffset); xfs_dquot_to_disk(&dqblk->dd_diskdq, dqp); /* diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c index 8966ba842395..2c2720ce6923 100644 --- a/fs/xfs/xfs_dquot_item_recover.c +++ b/fs/xfs/xfs_dquot_item_recover.c @@ -19,6 +19,7 @@ #include "xfs_log.h" #include "xfs_log_priv.h" #include "xfs_log_recover.h" +#include "xfs_error.h" STATIC void xlog_recover_dquot_ra_pass2( @@ -65,6 +66,7 @@ xlog_recover_dquot_commit_pass2( { struct xfs_mount *mp = log->l_mp; struct xfs_buf *bp; + struct xfs_dqblk *dqb; struct xfs_disk_dquot *ddq, *recddq; struct xfs_dq_logformat *dq_f; xfs_failaddr_t fa; @@ -130,14 +132,14 @@ xlog_recover_dquot_commit_pass2( return error; ASSERT(bp); - ddq = xfs_buf_offset(bp, dq_f->qlf_boffset); + dqb = xfs_buf_offset(bp, dq_f->qlf_boffset); + ddq = &dqb->dd_diskdq; /* * If the dquot has an LSN in it, recover the dquot only if it's less * than the lsn of the transaction we are replaying. */ if (xfs_has_crc(mp)) { - struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddq; xfs_lsn_t lsn = be64_to_cpu(dqb->dd_lsn); if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { @@ -147,10 +149,23 @@ xlog_recover_dquot_commit_pass2( memcpy(ddq, recddq, item->ri_buf[1].i_len); if (xfs_has_crc(mp)) { - xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk), + xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk), XFS_DQUOT_CRC_OFF); } + /* Validate the recovered dquot. */ + fa = xfs_dqblk_verify(log->l_mp, dqb, dq_f->qlf_id); + if (fa) { + XFS_CORRUPTION_ERROR("Bad dquot after recovery", + XFS_ERRLEVEL_LOW, mp, dqb, + sizeof(struct xfs_dqblk)); + xfs_alert(mp, + "Metadata corruption detected at %pS, dquot 0x%x", + fa, dq_f->qlf_id); + error = -EFSCORRUPTED; + goto out_release; + } + ASSERT(dq_f->qlf_size == 2); ASSERT(bp->b_mount == mp); bp->b_flags |= _XBF_LOGRECOVERY; |