summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/dir.c25
-rw-r--r--fs/afs/dir_edit.c91
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/autofs/dev-ioctl.c5
-rw-r--r--fs/bcachefs/alloc_background.h3
-rw-r--r--fs/bcachefs/alloc_foreground.c19
-rw-r--r--fs/bcachefs/bcachefs.h1
-rw-r--r--fs/bcachefs/btree_iter.c13
-rw-r--r--fs/bcachefs/data_update.c21
-rw-r--r--fs/bcachefs/data_update.h3
-rw-r--r--fs/bcachefs/ec.c4
-rw-r--r--fs/bcachefs/errcode.h2
-rw-r--r--fs/bcachefs/extents.c86
-rw-r--r--fs/bcachefs/extents.h5
-rw-r--r--fs/bcachefs/fs-io.c17
-rw-r--r--fs/bcachefs/move.c2
-rw-r--r--fs/bcachefs/recovery.c14
-rw-r--r--fs/bcachefs/sb-downgrade.c3
-rw-r--r--fs/bcachefs/super-io.c5
-rw-r--r--fs/btrfs/bio.c37
-rw-r--r--fs/btrfs/bio.h3
-rw-r--r--fs/btrfs/defrag.c10
-rw-r--r--fs/btrfs/extent_map.c7
-rw-r--r--fs/btrfs/volumes.c1
-rw-r--r--fs/dax.c45
-rw-r--r--fs/erofs/super.c4
-rw-r--r--fs/iomap/buffered-io.c17
-rw-r--r--fs/nfs/client.c3
-rw-r--r--fs/nfs/inode.c70
-rw-r--r--fs/nfs/localio.c3
-rw-r--r--fs/nfs/nfs4proc.c4
-rw-r--r--fs/nfs/super.c10
-rw-r--r--fs/nfs_common/nfslocalio.c23
-rw-r--r--fs/nfsd/nfs4proc.c8
-rw-r--r--fs/nilfs2/namei.c3
-rw-r--r--fs/proc/softirqs.c2
-rw-r--r--fs/squashfs/file_direct.c9
-rw-r--r--fs/super.c26
-rw-r--r--fs/tracefs/inode.c12
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c2
-rw-r--r--fs/xfs/xfs_filestream.c99
-rw-r--r--fs/xfs/xfs_inode.c2
-rw-r--r--fs/xfs/xfs_inode.h5
-rw-r--r--fs/xfs/xfs_ioctl.c4
-rw-r--r--fs/xfs/xfs_iomap.c2
-rw-r--r--fs/xfs/xfs_trace.h15
46 files changed, 523 insertions, 224 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index f8622ed72e08..ada363af5aab 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -12,6 +12,7 @@
#include <linux/swap.h>
#include <linux/ctype.h>
#include <linux/sched.h>
+#include <linux/iversion.h>
#include <linux/task_io_accounting_ops.h>
#include "internal.h"
#include "afs_fs.h"
@@ -1823,6 +1824,8 @@ error:
static void afs_rename_success(struct afs_operation *op)
{
+ struct afs_vnode *vnode = AFS_FS_I(d_inode(op->dentry));
+
_enter("op=%08x", op->debug_id);
op->ctime = op->file[0].scb.status.mtime_client;
@@ -1832,6 +1835,22 @@ static void afs_rename_success(struct afs_operation *op)
op->ctime = op->file[1].scb.status.mtime_client;
afs_vnode_commit_status(op, &op->file[1]);
}
+
+ /* If we're moving a subdir between dirs, we need to update
+ * its DV counter too as the ".." will be altered.
+ */
+ if (S_ISDIR(vnode->netfs.inode.i_mode) &&
+ op->file[0].vnode != op->file[1].vnode) {
+ u64 new_dv;
+
+ write_seqlock(&vnode->cb_lock);
+
+ new_dv = vnode->status.data_version + 1;
+ vnode->status.data_version = new_dv;
+ inode_set_iversion_raw(&vnode->netfs.inode, new_dv);
+
+ write_sequnlock(&vnode->cb_lock);
+ }
}
static void afs_rename_edit_dir(struct afs_operation *op)
@@ -1873,6 +1892,12 @@ static void afs_rename_edit_dir(struct afs_operation *op)
&vnode->fid, afs_edit_dir_for_rename_2);
}
+ if (S_ISDIR(vnode->netfs.inode.i_mode) &&
+ new_dvnode != orig_dvnode &&
+ test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+ afs_edit_dir_update_dotdot(vnode, new_dvnode,
+ afs_edit_dir_for_rename_sub);
+
new_inode = d_inode(new_dentry);
if (new_inode) {
spin_lock(&new_inode->i_lock);
diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c
index a71bff10496b..fe223fb78111 100644
--- a/fs/afs/dir_edit.c
+++ b/fs/afs/dir_edit.c
@@ -127,10 +127,10 @@ static struct folio *afs_dir_get_folio(struct afs_vnode *vnode, pgoff_t index)
/*
* Scan a directory block looking for a dirent of the right name.
*/
-static int afs_dir_scan_block(union afs_xdr_dir_block *block, struct qstr *name,
+static int afs_dir_scan_block(const union afs_xdr_dir_block *block, const struct qstr *name,
unsigned int blocknum)
{
- union afs_xdr_dirent *de;
+ const union afs_xdr_dirent *de;
u64 bitmap;
int d, len, n;
@@ -492,3 +492,90 @@ error:
clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
goto out_unmap;
}
+
+/*
+ * Edit a subdirectory that has been moved between directories to update the
+ * ".." entry.
+ */
+void afs_edit_dir_update_dotdot(struct afs_vnode *vnode, struct afs_vnode *new_dvnode,
+ enum afs_edit_dir_reason why)
+{
+ union afs_xdr_dir_block *block;
+ union afs_xdr_dirent *de;
+ struct folio *folio;
+ unsigned int nr_blocks, b;
+ pgoff_t index;
+ loff_t i_size;
+ int slot;
+
+ _enter("");
+
+ i_size = i_size_read(&vnode->netfs.inode);
+ if (i_size < AFS_DIR_BLOCK_SIZE) {
+ clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ return;
+ }
+ nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
+
+ /* Find a block that has sufficient slots available. Each folio
+ * contains two or more directory blocks.
+ */
+ for (b = 0; b < nr_blocks; b++) {
+ index = b / AFS_DIR_BLOCKS_PER_PAGE;
+ folio = afs_dir_get_folio(vnode, index);
+ if (!folio)
+ goto error;
+
+ block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_pos(folio));
+
+ /* Abandon the edit if we got a callback break. */
+ if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+ goto invalidated;
+
+ slot = afs_dir_scan_block(block, &dotdot_name, b);
+ if (slot >= 0)
+ goto found_dirent;
+
+ kunmap_local(block);
+ folio_unlock(folio);
+ folio_put(folio);
+ }
+
+ /* Didn't find the dirent to clobber. Download the directory again. */
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_update_nodd,
+ 0, 0, 0, 0, "..");
+ clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ goto out;
+
+found_dirent:
+ de = &block->dirents[slot];
+ de->u.vnode = htonl(new_dvnode->fid.vnode);
+ de->u.unique = htonl(new_dvnode->fid.unique);
+
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_update_dd, b, slot,
+ ntohl(de->u.vnode), ntohl(de->u.unique), "..");
+
+ kunmap_local(block);
+ folio_unlock(folio);
+ folio_put(folio);
+ inode_set_iversion_raw(&vnode->netfs.inode, vnode->status.data_version);
+
+out:
+ _leave("");
+ return;
+
+invalidated:
+ kunmap_local(block);
+ folio_unlock(folio);
+ folio_put(folio);
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_update_inval,
+ 0, 0, 0, 0, "..");
+ clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ goto out;
+
+error:
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_update_error,
+ 0, 0, 0, 0, "..");
+ clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ goto out;
+}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 52aab09a32a9..c9d620175e80 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -1073,6 +1073,8 @@ extern void afs_check_for_remote_deletion(struct afs_operation *);
extern void afs_edit_dir_add(struct afs_vnode *, struct qstr *, struct afs_fid *,
enum afs_edit_dir_reason);
extern void afs_edit_dir_remove(struct afs_vnode *, struct qstr *, enum afs_edit_dir_reason);
+void afs_edit_dir_update_dotdot(struct afs_vnode *vnode, struct afs_vnode *new_dvnode,
+ enum afs_edit_dir_reason why);
/*
* dir_silly.c
diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c
index f011e026358e..6d57efbb8110 100644
--- a/fs/autofs/dev-ioctl.c
+++ b/fs/autofs/dev-ioctl.c
@@ -110,6 +110,7 @@ static inline void free_dev_ioctl(struct autofs_dev_ioctl *param)
*/
static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
{
+ unsigned int inr = _IOC_NR(cmd);
int err;
err = check_dev_ioctl_version(cmd, param);
@@ -133,7 +134,7 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
* check_name() return for AUTOFS_DEV_IOCTL_TIMEOUT_CMD.
*/
err = check_name(param->path);
- if (cmd == AUTOFS_DEV_IOCTL_TIMEOUT_CMD)
+ if (inr == AUTOFS_DEV_IOCTL_TIMEOUT_CMD)
err = err ? 0 : -EINVAL;
if (err) {
pr_warn("invalid path supplied for cmd(0x%08x)\n",
@@ -141,8 +142,6 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
goto out;
}
} else {
- unsigned int inr = _IOC_NR(cmd);
-
if (inr == AUTOFS_DEV_IOCTL_OPENMOUNT_CMD ||
inr == AUTOFS_DEV_IOCTL_REQUESTER_CMD ||
inr == AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD) {
diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h
index f8e87c6721b1..163a67b97a40 100644
--- a/fs/bcachefs/alloc_background.h
+++ b/fs/bcachefs/alloc_background.h
@@ -168,6 +168,9 @@ static inline bool data_type_movable(enum bch_data_type type)
static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a,
struct bch_dev *ca)
{
+ if (a.data_type >= BCH_DATA_NR)
+ return 0;
+
if (!data_type_movable(a.data_type) ||
!bch2_bucket_sectors_fragmented(ca, a))
return 0;
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 5836870ab882..372178c8d416 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -162,6 +162,10 @@ static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob)
ARRAY_SIZE(c->open_buckets_partial));
spin_lock(&c->freelist_lock);
+ rcu_read_lock();
+ bch2_dev_rcu(c, ob->dev)->nr_partial_buckets++;
+ rcu_read_unlock();
+
ob->on_partial_list = true;
c->open_buckets_partial[c->open_buckets_partial_nr++] =
ob - c->open_buckets;
@@ -972,7 +976,7 @@ static int bucket_alloc_set_partial(struct bch_fs *c,
u64 avail;
bch2_dev_usage_read_fast(ca, &usage);
- avail = dev_buckets_free(ca, usage, watermark);
+ avail = dev_buckets_free(ca, usage, watermark) + ca->nr_partial_buckets;
if (!avail)
continue;
@@ -981,6 +985,10 @@ static int bucket_alloc_set_partial(struct bch_fs *c,
i);
ob->on_partial_list = false;
+ rcu_read_lock();
+ bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--;
+ rcu_read_unlock();
+
ret = add_new_bucket(c, ptrs, devs_may_alloc,
nr_replicas, nr_effective,
have_cache, ob);
@@ -1191,7 +1199,13 @@ void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca,
--c->open_buckets_partial_nr;
swap(c->open_buckets_partial[i],
c->open_buckets_partial[c->open_buckets_partial_nr]);
+
ob->on_partial_list = false;
+
+ rcu_read_lock();
+ bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--;
+ rcu_read_unlock();
+
spin_unlock(&c->freelist_lock);
bch2_open_bucket_put(c, ob);
spin_lock(&c->freelist_lock);
@@ -1610,8 +1624,7 @@ void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c,
ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
ob++) {
spin_lock(&ob->lock);
- if (ob->valid && !ob->on_partial_list &&
- (!ca || ob->dev == ca->dev_idx))
+ if (ob->valid && (!ca || ob->dev == ca->dev_idx))
bch2_open_bucket_to_text(out, c, ob);
spin_unlock(&ob->lock);
}
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index f4151ee51b03..e94a83b8113e 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -555,6 +555,7 @@ struct bch_dev {
u64 alloc_cursor[3];
unsigned nr_open_buckets;
+ unsigned nr_partial_buckets;
unsigned nr_btree_reserve;
size_t inc_gen_needs_gc;
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 0883cf6e1a3e..eef9b89c561d 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -882,6 +882,18 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
__bch2_btree_and_journal_iter_init_node_iter(trans, &jiter, l->b, l->iter, path->pos);
k = bch2_btree_and_journal_iter_peek(&jiter);
+ if (!k.k) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "node not found at pos ");
+ bch2_bpos_to_text(&buf, path->pos);
+ prt_str(&buf, " at btree ");
+ bch2_btree_pos_to_text(&buf, c, l->b);
+
+ ret = bch2_fs_topology_error(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ goto err;
+ }
bch2_bkey_buf_reassemble(out, c, k);
@@ -889,6 +901,7 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
c->opts.btree_node_prefetch)
ret = btree_path_prefetch_j(trans, path, &jiter);
+err:
bch2_btree_and_journal_iter_exit(&jiter);
return ret;
}
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index a6ee0beee6b0..8e75a852b358 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -236,7 +236,8 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
if (((1U << i) & m->data_opts.rewrite_ptrs) &&
(ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) &&
!ptr->cached) {
- bch2_extent_ptr_set_cached(bkey_i_to_s(insert), ptr);
+ bch2_extent_ptr_set_cached(c, &m->op.opts,
+ bkey_i_to_s(insert), ptr);
rewrites_found |= 1U << i;
}
i++;
@@ -284,7 +285,8 @@ restart_drop_extra_replicas:
durability - ptr_durability >= m->op.opts.data_replicas) {
durability -= ptr_durability;
- bch2_extent_ptr_set_cached(bkey_i_to_s(insert), &entry->ptr);
+ bch2_extent_ptr_set_cached(c, &m->op.opts,
+ bkey_i_to_s(insert), &entry->ptr);
goto restart_drop_extra_replicas;
}
}
@@ -295,7 +297,7 @@ restart_drop_extra_replicas:
bch2_extent_ptr_decoded_append(insert, &p);
bch2_bkey_narrow_crcs(insert, (struct bch_extent_crc_unpacked) { 0 });
- bch2_extent_normalize(c, bkey_i_to_s(insert));
+ bch2_extent_normalize_by_opts(c, &m->op.opts, bkey_i_to_s(insert));
ret = bch2_sum_sector_overwrites(trans, &iter, insert,
&should_check_enospc,
@@ -558,7 +560,8 @@ void bch2_data_update_to_text(struct printbuf *out, struct data_update *m)
int bch2_extent_drop_ptrs(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k,
- struct data_update_opts data_opts)
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
{
struct bch_fs *c = trans->c;
struct bkey_i *n;
@@ -569,11 +572,11 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans,
if (ret)
return ret;
- while (data_opts.kill_ptrs) {
- unsigned i = 0, drop = __fls(data_opts.kill_ptrs);
+ while (data_opts->kill_ptrs) {
+ unsigned i = 0, drop = __fls(data_opts->kill_ptrs);
bch2_bkey_drop_ptrs_noerror(bkey_i_to_s(n), ptr, i++ == drop);
- data_opts.kill_ptrs ^= 1U << drop;
+ data_opts->kill_ptrs ^= 1U << drop;
}
/*
@@ -581,7 +584,7 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans,
* will do the appropriate thing with it (turning it into a
* KEY_TYPE_error key, or just a discard if it was a cached extent)
*/
- bch2_extent_normalize(c, bkey_i_to_s(n));
+ bch2_extent_normalize_by_opts(c, io_opts, bkey_i_to_s(n));
/*
* Since we're not inserting through an extent iterator
@@ -720,7 +723,7 @@ int bch2_data_update_init(struct btree_trans *trans,
m->data_opts.rewrite_ptrs = 0;
/* if iter == NULL, it's just a promote */
if (iter)
- ret = bch2_extent_drop_ptrs(trans, iter, k, m->data_opts);
+ ret = bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &m->data_opts);
goto out;
}
diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h
index 8d36365bdea8..e4b50723428e 100644
--- a/fs/bcachefs/data_update.h
+++ b/fs/bcachefs/data_update.h
@@ -40,7 +40,8 @@ void bch2_data_update_read_done(struct data_update *,
int bch2_extent_drop_ptrs(struct btree_trans *,
struct btree_iter *,
struct bkey_s_c,
- struct data_update_opts);
+ struct bch_io_opts *,
+ struct data_update_opts *);
void bch2_data_update_exit(struct data_update *);
int bch2_data_update_init(struct btree_trans *, struct btree_iter *,
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index a0aa5bb467d9..749dcf368841 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -1870,6 +1870,10 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
}
h = ec_new_stripe_head_alloc(c, disk_label, algo, redundancy, watermark);
+ if (!h) {
+ h = ERR_PTR(-BCH_ERR_ENOMEM_stripe_head_alloc);
+ goto err;
+ }
found:
if (h->rw_devs_change_count != c->rw_devs_change_count)
ec_stripe_head_devs_update(c, h);
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index 649263516ab1..a1bc6c7a8ba0 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -83,6 +83,7 @@
x(ENOMEM, ENOMEM_fs_other_alloc) \
x(ENOMEM, ENOMEM_dev_alloc) \
x(ENOMEM, ENOMEM_disk_accounting) \
+ x(ENOMEM, ENOMEM_stripe_head_alloc) \
x(ENOSPC, ENOSPC_disk_reservation) \
x(ENOSPC, ENOSPC_bucket_alloc) \
x(ENOSPC, ENOSPC_disk_label_add) \
@@ -222,6 +223,7 @@
x(BCH_ERR_invalid_sb_layout, invalid_sb_layout_type) \
x(BCH_ERR_invalid_sb_layout, invalid_sb_layout_nr_superblocks) \
x(BCH_ERR_invalid_sb_layout, invalid_sb_layout_superblocks_overlap) \
+ x(BCH_ERR_invalid_sb_layout, invalid_sb_layout_sb_max_size_bits) \
x(BCH_ERR_invalid_sb, invalid_sb_members_missing) \
x(BCH_ERR_invalid_sb, invalid_sb_members) \
x(BCH_ERR_invalid_sb, invalid_sb_disk_groups) \
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index cc0d22085aef..c4e91d123849 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -978,31 +978,54 @@ bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1, struct bke
return NULL;
}
-void bch2_extent_ptr_set_cached(struct bkey_s k, struct bch_extent_ptr *ptr)
+static bool want_cached_ptr(struct bch_fs *c, struct bch_io_opts *opts,
+ struct bch_extent_ptr *ptr)
+{
+ if (!opts->promote_target ||
+ !bch2_dev_in_target(c, ptr->dev, opts->promote_target))
+ return false;
+
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev);
+
+ return ca && bch2_dev_is_readable(ca) && !dev_ptr_stale_rcu(ca, ptr);
+}
+
+void bch2_extent_ptr_set_cached(struct bch_fs *c,
+ struct bch_io_opts *opts,
+ struct bkey_s k,
+ struct bch_extent_ptr *ptr)
{
struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
union bch_extent_entry *entry;
- union bch_extent_entry *ec = NULL;
+ struct extent_ptr_decoded p;
- bkey_extent_entry_for_each(ptrs, entry) {
+ rcu_read_lock();
+ if (!want_cached_ptr(c, opts, ptr)) {
+ bch2_bkey_drop_ptr_noerror(k, ptr);
+ goto out;
+ }
+
+ /*
+ * Stripes can't contain cached data, for - reasons.
+ *
+ * Possibly something we can fix in the future?
+ */
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
if (&entry->ptr == ptr) {
- ptr->cached = true;
- if (ec)
- extent_entry_drop(k, ec);
- return;
+ if (p.has_ec)
+ bch2_bkey_drop_ptr_noerror(k, ptr);
+ else
+ ptr->cached = true;
+ goto out;
}
- if (extent_entry_is_stripe_ptr(entry))
- ec = entry;
- else if (extent_entry_is_ptr(entry))
- ec = NULL;
- }
-
BUG();
+out:
+ rcu_read_unlock();
}
/*
- * bch_extent_normalize - clean up an extent, dropping stale pointers etc.
+ * bch2_extent_normalize - clean up an extent, dropping stale pointers etc.
*
* Returns true if @k should be dropped entirely
*
@@ -1016,8 +1039,39 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
rcu_read_lock();
bch2_bkey_drop_ptrs(k, ptr,
ptr->cached &&
- (ca = bch2_dev_rcu(c, ptr->dev)) &&
- dev_ptr_stale_rcu(ca, ptr) > 0);
+ (!(ca = bch2_dev_rcu(c, ptr->dev)) ||
+ dev_ptr_stale_rcu(ca, ptr) > 0));
+ rcu_read_unlock();
+
+ return bkey_deleted(k.k);
+}
+
+/*
+ * bch2_extent_normalize_by_opts - clean up an extent, dropping stale pointers etc.
+ *
+ * Like bch2_extent_normalize(), but also only keeps a single cached pointer on
+ * the promote target.
+ */
+bool bch2_extent_normalize_by_opts(struct bch_fs *c,
+ struct bch_io_opts *opts,
+ struct bkey_s k)
+{
+ struct bkey_ptrs ptrs;
+ bool have_cached_ptr;
+
+ rcu_read_lock();
+restart_drop_ptrs:
+ ptrs = bch2_bkey_ptrs(k);
+ have_cached_ptr = false;
+
+ bkey_for_each_ptr(ptrs, ptr)
+ if (ptr->cached) {
+ if (have_cached_ptr || !want_cached_ptr(c, opts, ptr)) {
+ bch2_bkey_drop_ptr(k, ptr);
+ goto restart_drop_ptrs;
+ }
+ have_cached_ptr = true;
+ }
rcu_read_unlock();
return bkey_deleted(k.k);
diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
index 923a5f1849a8..bcffcf60aaaf 100644
--- a/fs/bcachefs/extents.h
+++ b/fs/bcachefs/extents.h
@@ -686,9 +686,12 @@ bool bch2_extents_match(struct bkey_s_c, struct bkey_s_c);
struct bch_extent_ptr *
bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s);
-void bch2_extent_ptr_set_cached(struct bkey_s, struct bch_extent_ptr *);
+void bch2_extent_ptr_set_cached(struct bch_fs *, struct bch_io_opts *,
+ struct bkey_s, struct bch_extent_ptr *);
+bool bch2_extent_normalize_by_opts(struct bch_fs *, struct bch_io_opts *, struct bkey_s);
bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
+
void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct bch_extent_ptr *);
void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index 15d3f073b824..2456c41b215e 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -587,7 +587,7 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
POS(inode->v.i_ino, start_sector),
BTREE_ITER_slots|BTREE_ITER_intent);
- while (!ret && bkey_lt(iter.pos, end_pos)) {
+ while (!ret) {
s64 i_sectors_delta = 0;
struct quota_res quota_res = { 0 };
struct bkey_s_c k;
@@ -598,6 +598,9 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
bch2_trans_begin(trans);
+ if (bkey_ge(iter.pos, end_pos))
+ break;
+
ret = bch2_subvolume_get_snapshot(trans,
inode->ei_inum.subvol, &snapshot);
if (ret)
@@ -634,12 +637,15 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
if (bch2_clamp_data_hole(&inode->v,
&hole_start,
&hole_end,
- opts.data_replicas, true))
+ opts.data_replicas, true)) {
ret = drop_locks_do(trans,
(bch2_clamp_data_hole(&inode->v,
&hole_start,
&hole_end,
opts.data_replicas, false), 0));
+ if (ret)
+ goto bkey_err;
+ }
bch2_btree_iter_set_pos(&iter, POS(iter.pos.inode, hole_start));
if (ret)
@@ -667,10 +673,13 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
bch2_i_sectors_acct(c, inode, &quota_res, i_sectors_delta);
if (bch2_mark_pagecache_reserved(inode, &hole_start,
- iter.pos.offset, true))
- drop_locks_do(trans,
+ iter.pos.offset, true)) {
+ ret = drop_locks_do(trans,
bch2_mark_pagecache_reserved(inode, &hole_start,
iter.pos.offset, false));
+ if (ret)
+ goto bkey_err;
+ }
bkey_err:
bch2_quota_reservation_put(c, inode, &quota_res);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 8c456d8b8b99..0ef4a86850bb 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -266,7 +266,7 @@ int bch2_move_extent(struct moving_context *ctxt,
if (!data_opts.rewrite_ptrs &&
!data_opts.extra_replicas) {
if (data_opts.kill_ptrs)
- return bch2_extent_drop_ptrs(trans, iter, k, data_opts);
+ return bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &data_opts);
return 0;
}
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 454b5a32dd7f..32d15aacc069 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -94,11 +94,10 @@ static void bch2_reconstruct_alloc(struct bch_fs *c)
__set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent);
c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
- bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
-
c->opts.recovery_passes |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
bch2_shoot_down_journal_keys(c, BTREE_ID_alloc,
0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
@@ -1002,6 +1001,7 @@ int bch2_fs_initialize(struct bch_fs *c)
struct bch_inode_unpacked root_inode, lostfound_inode;
struct bkey_inode_buf packed_inode;
struct qstr lostfound = QSTR("lost+found");
+ struct bch_member *m;
int ret;
bch_notice(c, "initializing new filesystem");
@@ -1018,6 +1018,14 @@ int bch2_fs_initialize(struct bch_fs *c)
SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current);
bch2_write_super(c);
}
+
+ for_each_member_device(c, ca) {
+ m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
+ SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, false);
+ ca->mi = bch2_mi_to_cpu(m);
+ }
+
+ bch2_write_super(c);
mutex_unlock(&c->sb_lock);
c->curr_recovery_pass = BCH_RECOVERY_PASS_NR;
diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c
index ae715ff658e8..8767c33c2b51 100644
--- a/fs/bcachefs/sb-downgrade.c
+++ b/fs/bcachefs/sb-downgrade.c
@@ -143,6 +143,9 @@ UPGRADE_TABLE()
static int have_stripes(struct bch_fs *c)
{
+ if (IS_ERR_OR_NULL(c->btree_roots_known[BTREE_ID_stripes].b))
+ return 0;
+
return !btree_node_fake(c->btree_roots_known[BTREE_ID_stripes].b);
}
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index ce7410d72089..7c71594f6a8b 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -287,6 +287,11 @@ static int validate_sb_layout(struct bch_sb_layout *layout, struct printbuf *out
return -BCH_ERR_invalid_sb_layout_nr_superblocks;
}
+ if (layout->sb_max_size_bits > BCH_SB_LAYOUT_SIZE_BITS_MAX) {
+ prt_printf(out, "Invalid superblock layout: max_size_bits too high");
+ return -BCH_ERR_invalid_sb_layout_sb_max_size_bits;
+ }
+
max_sectors = 1 << layout->sb_max_size_bits;
prev_offset = le64_to_cpu(layout->sb_offset[0]);
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index fec5c6cde0a7..7e0f9600b80c 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -49,6 +49,7 @@ void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info,
bbio->end_io = end_io;
bbio->private = private;
atomic_set(&bbio->pending_ios, 1);
+ WRITE_ONCE(bbio->status, BLK_STS_OK);
}
/*
@@ -113,41 +114,29 @@ static void __btrfs_bio_end_io(struct btrfs_bio *bbio)
}
}
-static void btrfs_orig_write_end_io(struct bio *bio);
-
-static void btrfs_bbio_propagate_error(struct btrfs_bio *bbio,
- struct btrfs_bio *orig_bbio)
-{
- /*
- * For writes we tolerate nr_mirrors - 1 write failures, so we can't
- * just blindly propagate a write failure here. Instead increment the
- * error count in the original I/O context so that it is guaranteed to
- * be larger than the error tolerance.
- */
- if (bbio->bio.bi_end_io == &btrfs_orig_write_end_io) {
- struct btrfs_io_stripe *orig_stripe = orig_bbio->bio.bi_private;
- struct btrfs_io_context *orig_bioc = orig_stripe->bioc;
-
- atomic_add(orig_bioc->max_errors, &orig_bioc->error);
- } else {
- orig_bbio->bio.bi_status = bbio->bio.bi_status;
- }
-}
-
void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
{
bbio->bio.bi_status = status;
if (bbio->bio.bi_pool == &btrfs_clone_bioset) {
struct btrfs_bio *orig_bbio = bbio->private;
- if (bbio->bio.bi_status)
- btrfs_bbio_propagate_error(bbio, orig_bbio);
btrfs_cleanup_bio(bbio);
bbio = orig_bbio;
}
- if (atomic_dec_and_test(&bbio->pending_ios))
+ /*
+ * At this point, bbio always points to the original btrfs_bio. Save
+ * the first error in it.
+ */
+ if (status != BLK_STS_OK)
+ cmpxchg(&bbio->status, BLK_STS_OK, status);
+
+ if (atomic_dec_and_test(&bbio->pending_ios)) {
+ /* Load split bio's error which might be set above. */
+ if (status == BLK_STS_OK)
+ bbio->bio.bi_status = READ_ONCE(bbio->status);
__btrfs_bio_end_io(bbio);
+ }
}
static int next_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror)
diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h
index e48612340745..e2fe16074ad6 100644
--- a/fs/btrfs/bio.h
+++ b/fs/btrfs/bio.h
@@ -79,6 +79,9 @@ struct btrfs_bio {
/* File system that this I/O operates on. */
struct btrfs_fs_info *fs_info;
+ /* Save the first error status of split bio. */
+ blk_status_t status;
+
/*
* This member must come last, bio_alloc_bioset will allocate enough
* bytes for entire btrfs_bio but relies on bio being last.
diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
index b95ef44c326b..968dae953948 100644
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -763,12 +763,12 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
* We can get a merged extent, in that case, we need to re-search
* tree to get the original em for defrag.
*
- * If @newer_than is 0 or em::generation < newer_than, we can trust
- * this em, as either we don't care about the generation, or the
- * merged extent map will be rejected anyway.
+ * This is because even if we have adjacent extents that are contiguous
+ * and compatible (same type and flags), we still want to defrag them
+ * so that we use less metadata (extent items in the extent tree and
+ * file extent items in the inode's subvolume tree).
*/
- if (em && (em->flags & EXTENT_FLAG_MERGED) &&
- newer_than && em->generation >= newer_than) {
+ if (em && (em->flags & EXTENT_FLAG_MERGED)) {
free_extent_map(em);
em = NULL;
}
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 668c617444a5..1d93e1202c33 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -230,7 +230,12 @@ static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma
if (extent_map_end(prev) != next->start)
return false;
- if (prev->flags != next->flags)
+ /*
+ * The merged flag is not an on-disk flag, it just indicates we had the
+ * extent maps of 2 (or more) adjacent extents merged, so factor it out.
+ */
+ if ((prev->flags & ~EXTENT_FLAG_MERGED) !=
+ (next->flags & ~EXTENT_FLAG_MERGED))
return false;
if (next->disk_bytenr < EXTENT_MAP_LAST_BYTE - 1)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8f340ad1d938..eb51b609190f 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1105,6 +1105,7 @@ static void btrfs_close_one_device(struct btrfs_device *device)
if (device->bdev) {
fs_devices->open_devices--;
device->bdev = NULL;
+ device->bdev_file = NULL;
}
clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
btrfs_destroy_dev_zone_info(device);
diff --git a/fs/dax.c b/fs/dax.c
index c62acd2812f8..21b47402b3dc 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1262,35 +1262,46 @@ static s64 dax_unshare_iter(struct iomap_iter *iter)
{
struct iomap *iomap = &iter->iomap;
const struct iomap *srcmap = iomap_iter_srcmap(iter);
- loff_t pos = iter->pos;
- loff_t length = iomap_length(iter);
+ loff_t copy_pos = iter->pos;
+ u64 copy_len = iomap_length(iter);
+ u32 mod;
int id = 0;
s64 ret = 0;
void *daddr = NULL, *saddr = NULL;
- /* don't bother with blocks that are not shared to start with */
- if (!(iomap->flags & IOMAP_F_SHARED))
- return length;
+ if (!iomap_want_unshare_iter(iter))
+ return iomap_length(iter);
+
+ /*
+ * Extend the file range to be aligned to fsblock/pagesize, because
+ * we need to copy entire blocks, not just the byte range specified.
+ * Invalidate the mapping because we're about to CoW.
+ */
+ mod = offset_in_page(copy_pos);
+ if (mod) {
+ copy_len += mod;
+ copy_pos -= mod;
+ }
+
+ mod = offset_in_page(copy_pos + copy_len);
+ if (mod)
+ copy_len += PAGE_SIZE - mod;
+
+ invalidate_inode_pages2_range(iter->inode->i_mapping,
+ copy_pos >> PAGE_SHIFT,
+ (copy_pos + copy_len - 1) >> PAGE_SHIFT);
id = dax_read_lock();
- ret = dax_iomap_direct_access(iomap, pos, length, &daddr, NULL);
+ ret = dax_iomap_direct_access(iomap, copy_pos, copy_len, &daddr, NULL);
if (ret < 0)
goto out_unlock;
- /* zero the distance if srcmap is HOLE or UNWRITTEN */
- if (srcmap->flags & IOMAP_F_SHARED || srcmap->type == IOMAP_UNWRITTEN) {
- memset(daddr, 0, length);
- dax_flush(iomap->dax_dev, daddr, length);
- ret = length;
- goto out_unlock;
- }
-
- ret = dax_iomap_direct_access(srcmap, pos, length, &saddr, NULL);
+ ret = dax_iomap_direct_access(srcmap, copy_pos, copy_len, &saddr, NULL);
if (ret < 0)
goto out_unlock;
- if (copy_mc_to_kernel(daddr, saddr, length) == 0)
- ret = length;
+ if (copy_mc_to_kernel(daddr, saddr, copy_len) == 0)
+ ret = iomap_length(iter);
else
ret = -EIO;
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 320d586c3896..bed3dbe5b7cb 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -709,7 +709,9 @@ static int erofs_fc_get_tree(struct fs_context *fc)
if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid)
return get_tree_nodev(fc, erofs_fc_fill_super);
- ret = get_tree_bdev(fc, erofs_fc_fill_super);
+ ret = get_tree_bdev_flags(fc, erofs_fc_fill_super,
+ IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) ?
+ GET_TREE_BDEV_QUIET_LOOKUP : 0);
#ifdef CONFIG_EROFS_FS_BACKED_BY_FILE
if (ret == -ENOTBLK) {
if (!fc->source)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index aa587b2142e2..ef0b68bccbb6 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1277,22 +1277,7 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter)
loff_t length = iomap_length(iter);
loff_t written = 0;
- /* Don't bother with blocks that are not shared to start with. */
- if (!(iomap->flags & IOMAP_F_SHARED))
- return length;
-
- /*
- * Don't bother with delalloc reservations, holes or unwritten extents.
- *
- * Note that we use srcmap directly instead of iomap_iter_srcmap as
- * unsharing requires providing a separate source map, and the presence
- * of one is a good indicator that unsharing is needed, unlike
- * IOMAP_F_SHARED which can be set for any data that goes into the COW
- * fork for XFS.
- */
- if (iter->srcmap.type == IOMAP_HOLE ||
- iter->srcmap.type == IOMAP_DELALLOC ||
- iter->srcmap.type == IOMAP_UNWRITTEN)
+ if (!iomap_want_unshare_iter(iter))
return length;
do {
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 114282398716..03ecc7765615 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -181,8 +181,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
#if IS_ENABLED(CONFIG_NFS_LOCALIO)
seqlock_init(&clp->cl_boot_lock);
ktime_get_real_ts64(&clp->cl_nfssvc_boot);
- clp->cl_uuid.net = NULL;
- clp->cl_uuid.dom = NULL;
+ nfs_uuid_init(&clp->cl_uuid);
spin_lock_init(&clp->cl_localio_lock);
#endif /* CONFIG_NFS_LOCALIO */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 542c7d97b235..596f35170137 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -205,12 +205,15 @@ void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
nfs_fscache_invalidate(inode, 0);
flags &= ~NFS_INO_REVAL_FORCED;
- nfsi->cache_validity |= flags;
+ flags |= nfsi->cache_validity;
+ if (inode->i_mapping->nrpages == 0)
+ flags &= ~NFS_INO_INVALID_DATA;
- if (inode->i_mapping->nrpages == 0) {
- nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
- nfs_ooo_clear(nfsi);
- } else if (nfsi->cache_validity & NFS_INO_INVALID_DATA) {
+ /* pairs with nfs_clear_invalid_mapping()'s smp_load_acquire() */
+ smp_store_release(&nfsi->cache_validity, flags);
+
+ if (inode->i_mapping->nrpages == 0 ||
+ nfsi->cache_validity & NFS_INO_INVALID_DATA) {
nfs_ooo_clear(nfsi);
}
trace_nfs_set_cache_invalid(inode, 0);
@@ -628,23 +631,35 @@ nfs_fattr_fixup_delegated(struct inode *inode, struct nfs_fattr *fattr)
}
}
+static void nfs_update_timestamps(struct inode *inode, unsigned int ia_valid)
+{
+ enum file_time_flags time_flags = 0;
+ unsigned int cache_flags = 0;
+
+ if (ia_valid & ATTR_MTIME) {
+ time_flags |= S_MTIME | S_CTIME;
+ cache_flags |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME;
+ }
+ if (ia_valid & ATTR_ATIME) {
+ time_flags |= S_ATIME;
+ cache_flags |= NFS_INO_INVALID_ATIME;
+ }
+ inode_update_timestamps(inode, time_flags);
+ NFS_I(inode)->cache_validity &= ~cache_flags;
+}
+
void nfs_update_delegated_atime(struct inode *inode)
{
spin_lock(&inode->i_lock);
- if (nfs_have_delegated_atime(inode)) {
- inode_update_timestamps(inode, S_ATIME);
- NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ATIME;
- }
+ if (nfs_have_delegated_atime(inode))
+ nfs_update_timestamps(inode, ATTR_ATIME);
spin_unlock(&inode->i_lock);
}
void nfs_update_delegated_mtime_locked(struct inode *inode)
{
- if (nfs_have_delegated_mtime(inode)) {
- inode_update_timestamps(inode, S_CTIME | S_MTIME);
- NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_CTIME |
- NFS_INO_INVALID_MTIME);
- }
+ if (nfs_have_delegated_mtime(inode))
+ nfs_update_timestamps(inode, ATTR_MTIME);
}
void nfs_update_delegated_mtime(struct inode *inode)
@@ -682,15 +697,16 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
attr->ia_valid &= ~ATTR_SIZE;
}
- if (nfs_have_delegated_mtime(inode)) {
- if (attr->ia_valid & ATTR_MTIME) {
- nfs_update_delegated_mtime(inode);
- attr->ia_valid &= ~ATTR_MTIME;
- }
- if (attr->ia_valid & ATTR_ATIME) {
- nfs_update_delegated_atime(inode);
- attr->ia_valid &= ~ATTR_ATIME;
- }
+ if (nfs_have_delegated_mtime(inode) && attr->ia_valid & ATTR_MTIME) {
+ spin_lock(&inode->i_lock);
+ nfs_update_timestamps(inode, attr->ia_valid);
+ spin_unlock(&inode->i_lock);
+ attr->ia_valid &= ~(ATTR_MTIME | ATTR_ATIME);
+ } else if (nfs_have_delegated_atime(inode) &&
+ attr->ia_valid & ATTR_ATIME &&
+ !(attr->ia_valid & ATTR_MTIME)) {
+ nfs_update_delegated_atime(inode);
+ attr->ia_valid &= ~ATTR_ATIME;
}
/* Optimization: if the end result is no change, don't RPC */
@@ -1408,6 +1424,13 @@ int nfs_clear_invalid_mapping(struct address_space *mapping)
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
if (ret)
goto out;
+ smp_rmb(); /* pairs with smp_wmb() below */
+ if (test_bit(NFS_INO_INVALIDATING, bitlock))
+ continue;
+ /* pairs with nfs_set_cache_invalid()'s smp_store_release() */
+ if (!(smp_load_acquire(&nfsi->cache_validity) & NFS_INO_INVALID_DATA))
+ goto out;
+ /* Slow-path that double-checks with spinlock held */
spin_lock(&inode->i_lock);
if (test_bit(NFS_INO_INVALIDATING, bitlock)) {
spin_unlock(&inode->i_lock);
@@ -1633,6 +1656,7 @@ void nfs_fattr_init(struct nfs_fattr *fattr)
fattr->gencount = nfs_inc_attr_generation_counter();
fattr->owner_name = NULL;
fattr->group_name = NULL;
+ fattr->mdsthreshold = NULL;
}
EXPORT_SYMBOL_GPL(nfs_fattr_init);
diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
index d0aa680ec816..8f0ce82a677e 100644
--- a/fs/nfs/localio.c
+++ b/fs/nfs/localio.c
@@ -205,7 +205,8 @@ void nfs_local_probe(struct nfs_client *clp)
nfs_local_disable(clp);
}
- nfs_uuid_begin(&clp->cl_uuid);
+ if (!nfs_uuid_begin(&clp->cl_uuid))
+ return;
if (nfs_server_uuid_is_local(clp))
nfs_local_enable(clp);
nfs_uuid_end(&clp->cl_uuid);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index cd2fbde2e6d7..9d40319e063d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3452,6 +3452,10 @@ static int nfs4_do_setattr(struct inode *inode, const struct cred *cred,
adjust_flags |= NFS_INO_INVALID_MODE;
if (sattr->ia_valid & (ATTR_UID | ATTR_GID))
adjust_flags |= NFS_INO_INVALID_OTHER;
+ if (sattr->ia_valid & ATTR_ATIME)
+ adjust_flags |= NFS_INO_INVALID_ATIME;
+ if (sattr->ia_valid & ATTR_MTIME)
+ adjust_flags |= NFS_INO_INVALID_MTIME;
do {
nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, fattr->label),
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 9723b6c53397..ae5c5e39afa0 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -885,7 +885,15 @@ static int nfs_request_mount(struct fs_context *fc,
* Now ask the mount server to map our export path
* to a file handle.
*/
- status = nfs_mount(&request, ctx->timeo, ctx->retrans);
+ if ((request.protocol == XPRT_TRANSPORT_UDP) ==
+ !(ctx->flags & NFS_MOUNT_TCP))
+ /*
+ * NFS protocol and mount protocol are both UDP or neither UDP
+ * so timeouts are compatible. Use NFS timeouts for MOUNT
+ */
+ status = nfs_mount(&request, ctx->timeo, ctx->retrans);
+ else
+ status = nfs_mount(&request, NFS_UNSPEC_TIMEO, NFS_UNSPEC_RETRANS);
if (status != 0) {
dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n",
request.hostname, status);
diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c
index 5c8ce5066c16..09404d142d1a 100644
--- a/fs/nfs_common/nfslocalio.c
+++ b/fs/nfs_common/nfslocalio.c
@@ -5,7 +5,7 @@
*/
#include <linux/module.h>
-#include <linux/rculist.h>
+#include <linux/list.h>
#include <linux/nfslocalio.h>
#include <net/netns/generic.h>
@@ -20,15 +20,27 @@ static DEFINE_SPINLOCK(nfs_uuid_lock);
*/
static LIST_HEAD(nfs_uuids);
-void nfs_uuid_begin(nfs_uuid_t *nfs_uuid)
+void nfs_uuid_init(nfs_uuid_t *nfs_uuid)
{
nfs_uuid->net = NULL;
nfs_uuid->dom = NULL;
- uuid_gen(&nfs_uuid->uuid);
+ INIT_LIST_HEAD(&nfs_uuid->list);
+}
+EXPORT_SYMBOL_GPL(nfs_uuid_init);
+bool nfs_uuid_begin(nfs_uuid_t *nfs_uuid)
+{
spin_lock(&nfs_uuid_lock);
- list_add_tail_rcu(&nfs_uuid->list, &nfs_uuids);
+ /* Is this nfs_uuid already in use? */
+ if (!list_empty(&nfs_uuid->list)) {
+ spin_unlock(&nfs_uuid_lock);
+ return false;
+ }
+ uuid_gen(&nfs_uuid->uuid);
+ list_add_tail(&nfs_uuid->list, &nfs_uuids);
spin_unlock(&nfs_uuid_lock);
+
+ return true;
}
EXPORT_SYMBOL_GPL(nfs_uuid_begin);
@@ -36,7 +48,8 @@ void nfs_uuid_end(nfs_uuid_t *nfs_uuid)
{
if (nfs_uuid->net == NULL) {
spin_lock(&nfs_uuid_lock);
- list_del_init(&nfs_uuid->list);
+ if (nfs_uuid->net == NULL)
+ list_del_init(&nfs_uuid->list);
spin_unlock(&nfs_uuid_lock);
}
}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index b5a6bf4f459f..d32f2dfd148f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1841,14 +1841,12 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (!async_copy)
goto out_err;
async_copy->cp_nn = nn;
+ INIT_LIST_HEAD(&async_copy->copies);
+ refcount_set(&async_copy->refcount, 1);
/* Arbitrary cap on number of pending async copy operations */
if (atomic_inc_return(&nn->pending_async_copies) >
- (int)rqstp->rq_pool->sp_nrthreads) {
- atomic_dec(&nn->pending_async_copies);
+ (int)rqstp->rq_pool->sp_nrthreads)
goto out_err;
- }
- INIT_LIST_HEAD(&async_copy->copies);
- refcount_set(&async_copy->refcount, 1);
async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL);
if (!async_copy->cp_src)
goto out_err;
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 4905063790c5..9b108052d9f7 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -157,6 +157,9 @@ static int nilfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
/* slow symlink */
inode->i_op = &nilfs_symlink_inode_operations;
inode_nohighmem(inode);
+ mapping_set_gfp_mask(inode->i_mapping,
+ mapping_gfp_constraint(inode->i_mapping,
+ ~__GFP_FS));
inode->i_mapping->a_ops = &nilfs_aops;
err = page_symlink(inode, symname, l);
if (err)
diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c
index f4616083faef..04bb29721419 100644
--- a/fs/proc/softirqs.c
+++ b/fs/proc/softirqs.c
@@ -20,7 +20,7 @@ static int show_softirqs(struct seq_file *p, void *v)
for (i = 0; i < NR_SOFTIRQS; i++) {
seq_printf(p, "%12s:", softirq_to_name[i]);
for_each_possible_cpu(j)
- seq_printf(p, " %10u", kstat_softirqs_cpu(i, j));
+ seq_put_decimal_ull_width(p, " ", kstat_softirqs_cpu(i, j), 10);
seq_putc(p, '\n');
}
return 0;
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index 22251743fadf..d19d4db74af8 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c
@@ -30,7 +30,8 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize,
int mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
loff_t start_index = folio->index & ~mask;
loff_t end_index = start_index | mask;
- int i, n, pages, bytes, res = -ENOMEM;
+ loff_t index;
+ int i, pages, bytes, res = -ENOMEM;
struct page **page, *last_page;
struct squashfs_page_actor *actor;
void *pageaddr;
@@ -45,9 +46,9 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize,
return res;
/* Try to grab all the pages covered by the Squashfs block */
- for (i = 0, n = start_index; n <= end_index; n++) {
- page[i] = (n == folio->index) ? target_page :
- grab_cache_page_nowait(target_page->mapping, n);
+ for (i = 0, index = start_index; index <= end_index; index++) {
+ page[i] = (index == folio->index) ? target_page :
+ grab_cache_page_nowait(target_page->mapping, index);
if (page[i] == NULL)
continue;
diff --git a/fs/super.c b/fs/super.c
index 1db230432960..c9c7223bc2a2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1596,13 +1596,14 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
EXPORT_SYMBOL_GPL(setup_bdev_super);
/**
- * get_tree_bdev - Get a superblock based on a single block device
+ * get_tree_bdev_flags - Get a superblock based on a single block device
* @fc: The filesystem context holding the parameters
* @fill_super: Helper to initialise a new superblock
+ * @flags: GET_TREE_BDEV_* flags
*/
-int get_tree_bdev(struct fs_context *fc,
- int (*fill_super)(struct super_block *,
- struct fs_context *))
+int get_tree_bdev_flags(struct fs_context *fc,
+ int (*fill_super)(struct super_block *sb,
+ struct fs_context *fc), unsigned int flags)
{
struct super_block *s;
int error = 0;
@@ -1613,10 +1614,10 @@ int get_tree_bdev(struct fs_context *fc,
error = lookup_bdev(fc->source, &dev);
if (error) {
- errorf(fc, "%s: Can't lookup blockdev", fc->source);
+ if (!(flags & GET_TREE_BDEV_QUIET_LOOKUP))
+ errorf(fc, "%s: Can't lookup blockdev", fc->source);
return error;
}
-
fc->sb_flags |= SB_NOSEC;
s = sget_dev(fc, dev);
if (IS_ERR(s))
@@ -1644,6 +1645,19 @@ int get_tree_bdev(struct fs_context *fc,
fc->root = dget(s->s_root);
return 0;
}
+EXPORT_SYMBOL_GPL(get_tree_bdev_flags);
+
+/**
+ * get_tree_bdev - Get a superblock based on a single block device
+ * @fc: The filesystem context holding the parameters
+ * @fill_super: Helper to initialise a new superblock
+ */
+int get_tree_bdev(struct fs_context *fc,
+ int (*fill_super)(struct super_block *,
+ struct fs_context *))
+{
+ return get_tree_bdev_flags(fc, fill_super, 0);
+}
EXPORT_SYMBOL(get_tree_bdev);
static int test_bdev_super(struct super_block *s, void *data)
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 1748dff58c3b..cfc614c638da 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -392,6 +392,9 @@ static int tracefs_reconfigure(struct fs_context *fc)
struct tracefs_fs_info *sb_opts = sb->s_fs_info;
struct tracefs_fs_info *new_opts = fc->s_fs_info;
+ if (!new_opts)
+ return 0;
+
sync_filesystem(sb);
/* structure copy of new mount options to sb */
*sb_opts = *new_opts;
@@ -478,14 +481,17 @@ static int tracefs_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_op = &tracefs_super_operations;
sb->s_d_op = &tracefs_dentry_operations;
- tracefs_apply_options(sb, false);
-
return 0;
}
static int tracefs_get_tree(struct fs_context *fc)
{
- return get_tree_single(fc, tracefs_fill_super);
+ int err = get_tree_single(fc, tracefs_fill_super);
+
+ if (err)
+ return err;
+
+ return tracefs_reconfigure(fc);
}
static void tracefs_free_fc(struct fs_context *fc)
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 04f64cf9777e..22bdbb3e9980 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -1923,7 +1923,7 @@ restart:
error = -EFSCORRUPTED;
goto error0;
}
- if (flen < bestrlen)
+ if (flen <= bestrlen)
break;
busy = xfs_alloc_compute_aligned(args, fbno, flen,
&rbno, &rlen, &busy_gen);
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index e3aaa0555597..290ba8887d29 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -64,25 +64,31 @@ xfs_filestream_pick_ag(
struct xfs_perag *pag;
struct xfs_perag *max_pag = NULL;
xfs_extlen_t minlen = *longest;
- xfs_extlen_t free = 0, minfree, maxfree = 0;
+ xfs_extlen_t minfree, maxfree = 0;
xfs_agnumber_t agno;
bool first_pass = true;
- int err;
/* 2% of an AG's blocks must be free for it to be chosen. */
minfree = mp->m_sb.sb_agblocks / 50;
restart:
for_each_perag_wrap(mp, start_agno, agno, pag) {
+ int err;
+
trace_xfs_filestream_scan(pag, pino);
+
*longest = 0;
err = xfs_bmap_longest_free_extent(pag, NULL, longest);
if (err) {
- if (err != -EAGAIN)
- break;
- /* Couldn't lock the AGF, skip this AG. */
- err = 0;
- continue;
+ if (err == -EAGAIN) {
+ /* Couldn't lock the AGF, skip this AG. */
+ err = 0;
+ continue;
+ }
+ xfs_perag_rele(pag);
+ if (max_pag)
+ xfs_perag_rele(max_pag);
+ return err;
}
/* Keep track of the AG with the most free blocks. */
@@ -107,8 +113,9 @@ restart:
!(flags & XFS_PICK_USERDATA) ||
(flags & XFS_PICK_LOWSPACE))) {
/* Break out, retaining the reference on the AG. */
- free = pag->pagf_freeblks;
- break;
+ if (max_pag)
+ xfs_perag_rele(max_pag);
+ goto done;
}
}
@@ -116,57 +123,47 @@ restart:
atomic_dec(&pag->pagf_fstrms);
}
- if (err) {
- xfs_perag_rele(pag);
- if (max_pag)
- xfs_perag_rele(max_pag);
- return err;
+ /*
+ * Allow a second pass to give xfs_bmap_longest_free_extent() another
+ * attempt at locking AGFs that it might have skipped over before we
+ * fail.
+ */
+ if (first_pass) {
+ first_pass = false;
+ goto restart;
}
- if (!pag) {
- /*
- * Allow a second pass to give xfs_bmap_longest_free_extent()
- * another attempt at locking AGFs that it might have skipped
- * over before we fail.
- */
- if (first_pass) {
- first_pass = false;
- goto restart;
- }
+ /*
+ * We must be low on data space, so run a final lowspace optimised
+ * selection pass if we haven't already.
+ */
+ if (!(flags & XFS_PICK_LOWSPACE)) {
+ flags |= XFS_PICK_LOWSPACE;
+ goto restart;
+ }
- /*
- * We must be low on data space, so run a final lowspace
- * optimised selection pass if we haven't already.
- */
- if (!(flags & XFS_PICK_LOWSPACE)) {
- flags |= XFS_PICK_LOWSPACE;
- goto restart;
+ /*
+ * No unassociated AGs are available, so select the AG with the most
+ * free space, regardless of whether it's already in use by another
+ * filestream. It none suit, just use whatever AG we can grab.
+ */
+ if (!max_pag) {
+ for_each_perag_wrap(args->mp, 0, start_agno, pag) {
+ max_pag = pag;
+ break;
}
- /*
- * No unassociated AGs are available, so select the AG with the
- * most free space, regardless of whether it's already in use by
- * another filestream. It none suit, just use whatever AG we can
- * grab.
- */
- if (!max_pag) {
- for_each_perag_wrap(args->mp, 0, start_agno, args->pag)
- break;
- atomic_inc(&args->pag->pagf_fstrms);
- *longest = 0;
- } else {
- pag = max_pag;
- free = maxfree;
- atomic_inc(&pag->pagf_fstrms);
- }
- } else if (max_pag) {
- xfs_perag_rele(max_pag);
+ /* Bail if there are no AGs at all to select from. */
+ if (!max_pag)
+ return -ENOSPC;
}
- trace_xfs_filestream_pick(pag, pino, free);
+ pag = max_pag;
+ atomic_inc(&pag->pagf_fstrms);
+done:
+ trace_xfs_filestream_pick(pag, pino);
args->pag = pag;
return 0;
-
}
static struct xfs_inode *
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index bcc277fc0a83..19dcb569a3e7 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1409,7 +1409,7 @@ xfs_inactive(
if (S_ISREG(VFS_I(ip)->i_mode) &&
(ip->i_disk_size != 0 || XFS_ISIZE(ip) != 0 ||
- ip->i_df.if_nextents > 0 || ip->i_delayed_blks > 0))
+ xfs_inode_has_filedata(ip)))
truncate = 1;
if (xfs_iflags_test(ip, XFS_IQUOTAUNCHECKED)) {
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 97ed912306fd..03944b6c5fba 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -292,6 +292,11 @@ static inline bool xfs_is_cow_inode(struct xfs_inode *ip)
return xfs_is_reflink_inode(ip) || xfs_is_always_cow_inode(ip);
}
+static inline bool xfs_inode_has_filedata(const struct xfs_inode *ip)
+{
+ return ip->i_df.if_nextents > 0 || ip->i_delayed_blks > 0;
+}
+
/*
* Check if an inode has any data in the COW fork. This might be often false
* even for inodes with the reflink flag when there is no pending COW operation.
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index a20d426ef021..2567fd2a0994 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -481,7 +481,7 @@ xfs_ioctl_setattr_xflags(
if (rtflag != XFS_IS_REALTIME_INODE(ip)) {
/* Can't change realtime flag if any extents are allocated. */
- if (ip->i_df.if_nextents || ip->i_delayed_blks)
+ if (xfs_inode_has_filedata(ip))
return -EINVAL;
/*
@@ -602,7 +602,7 @@ xfs_ioctl_setattr_check_extsize(
if (!fa->fsx_valid)
return 0;
- if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_df.if_nextents &&
+ if (S_ISREG(VFS_I(ip)->i_mode) && xfs_inode_has_filedata(ip) &&
XFS_FSB_TO_B(mp, ip->i_extsize) != fa->fsx_extsize)
return -EINVAL;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 916531d9f83c..86da16f54be9 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -707,7 +707,7 @@ imap_needs_cow(
return false;
/* when zeroing we don't have to COW holes or unwritten extents */
- if (flags & IOMAP_ZERO) {
+ if (flags & (IOMAP_UNSHARE | IOMAP_ZERO)) {
if (!nimaps ||
imap->br_startblock == HOLESTARTBLOCK ||
imap->br_state == XFS_EXT_UNWRITTEN)
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index ee9f0b1f548d..fcb2bad4f76e 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -691,8 +691,8 @@ DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup);
DEFINE_FILESTREAM_EVENT(xfs_filestream_scan);
TRACE_EVENT(xfs_filestream_pick,
- TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino, xfs_extlen_t free),
- TP_ARGS(pag, ino, free),
+ TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino),
+ TP_ARGS(pag, ino),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
@@ -703,14 +703,9 @@ TRACE_EVENT(xfs_filestream_pick,
TP_fast_assign(
__entry->dev = pag->pag_mount->m_super->s_dev;
__entry->ino = ino;
- if (pag) {
- __entry->agno = pag->pag_agno;
- __entry->streams = atomic_read(&pag->pagf_fstrms);
- } else {
- __entry->agno = NULLAGNUMBER;
- __entry->streams = 0;
- }
- __entry->free = free;
+ __entry->agno = pag->pag_agno;
+ __entry->streams = atomic_read(&pag->pagf_fstrms);
+ __entry->free = pag->pagf_freeblks;
),
TP_printk("dev %d:%d ino 0x%llx agno 0x%x streams %d free %d",
MAJOR(__entry->dev), MINOR(__entry->dev),