summaryrefslogtreecommitdiff
path: root/fs/xfs/libxfs/xfs_alloc_btree.c
diff options
context:
space:
mode:
authorZizhi Wo <wozizhi@huawei.com>2024-07-01 09:02:36 +0300
committerChandan Babu R <chandanbabu@kernel.org>2024-07-04 10:14:16 +0300
commit94a0333b9212a114d19096a77903f76d0d5bca26 (patch)
tree08bb3a1d68e937565f166275b0ec0c4621a36bdf /fs/xfs/libxfs/xfs_alloc_btree.c
parent4cdbfe457a32cf31c44b8ed7caf1697b0cd51ffc (diff)
downloadlinux-94a0333b9212a114d19096a77903f76d0d5bca26.tar.xz
xfs: Avoid races with cnt_btree lastrec updates
A concurrent file creation and little writing could unexpectedly return -ENOSPC error since there is a race window that the allocator could get the wrong agf->agf_longest. Write file process steps: 1) Find the entry that best meets the conditions, then calculate the start address and length of the remaining part of the entry after allocation. 2) Delete this entry and update the -current- agf->agf_longest. 3) Insert the remaining unused parts of this entry based on the calculations in 1), and update the agf->agf_longest again if necessary. Create file process steps: 1) Check whether there are free inodes in the inode chunk. 2) If there is no free inode, check whether there has space for creating inode chunks, perform the no-lock judgment first. 3) If the judgment succeeds, the judgment is performed again with agf lock held. Otherwire, an error is returned directly. If the write process is in step 2) but not go to 3) yet, the create file process goes to 2) at this time, it may be mistaken for no space, resulting in the file system still has space but the file creation fails. We have sent two different commits to the community in order to fix this problem[1][2]. Unfortunately, both solutions have flaws. In [2], I discussed with Dave and Darrick, realized that a better solution to this problem requires the "last cnt record tracking" to be ripped out of the generic btree code. And surprisingly, Dave directly provided his fix code. This patch includes appropriate modifications based on his tmp-code to address this issue. The entire fix can be roughly divided into two parts: 1) Delete the code related to lastrec-update in the generic btree code. 2) Place the process of updating longest freespace with cntbt separately to the end of the cntbt modifications. Move the cursor to the rightmost firstly, and update the longest free extent based on the record. Note that we can not update the longest with xfs_alloc_get_rec() after find the longest record, as xfs_verify_agbno() may not pass because pag->block_count is updated on the outside. Therefore, use xfs_btree_get_rec() as a replacement. [1] https://lore.kernel.org/all/20240419061848.1032366-2-yebin10@huawei.com [2] https://lore.kernel.org/all/20240604071121.3981686-1-wozizhi@huawei.com Reported by: Ye Bin <yebin10@huawei.com> Signed-off-by: Zizhi Wo <wozizhi@huawei.com> Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
Diffstat (limited to 'fs/xfs/libxfs/xfs_alloc_btree.c')
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c64
1 files changed, 0 insertions, 64 deletions
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 6ef5ddd89600..585e98e87ef9 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -115,67 +115,6 @@ xfs_allocbt_free_block(
return 0;
}
-/*
- * Update the longest extent in the AGF
- */
-STATIC void
-xfs_allocbt_update_lastrec(
- struct xfs_btree_cur *cur,
- const struct xfs_btree_block *block,
- const union xfs_btree_rec *rec,
- int ptr,
- int reason)
-{
- struct xfs_agf *agf = cur->bc_ag.agbp->b_addr;
- struct xfs_perag *pag;
- __be32 len;
- int numrecs;
-
- ASSERT(!xfs_btree_is_bno(cur->bc_ops));
-
- switch (reason) {
- case LASTREC_UPDATE:
- /*
- * If this is the last leaf block and it's the last record,
- * then update the size of the longest extent in the AG.
- */
- if (ptr != xfs_btree_get_numrecs(block))
- return;
- len = rec->alloc.ar_blockcount;
- break;
- case LASTREC_INSREC:
- if (be32_to_cpu(rec->alloc.ar_blockcount) <=
- be32_to_cpu(agf->agf_longest))
- return;
- len = rec->alloc.ar_blockcount;
- break;
- case LASTREC_DELREC:
- numrecs = xfs_btree_get_numrecs(block);
- if (ptr <= numrecs)
- return;
- ASSERT(ptr == numrecs + 1);
-
- if (numrecs) {
- xfs_alloc_rec_t *rrp;
-
- rrp = XFS_ALLOC_REC_ADDR(cur->bc_mp, block, numrecs);
- len = rrp->ar_blockcount;
- } else {
- len = 0;
- }
-
- break;
- default:
- ASSERT(0);
- return;
- }
-
- agf->agf_longest = len;
- pag = cur->bc_ag.agbp->b_pag;
- pag->pagf_longest = be32_to_cpu(len);
- xfs_alloc_log_agf(cur->bc_tp, cur->bc_ag.agbp, XFS_AGF_LONGEST);
-}
-
STATIC int
xfs_allocbt_get_minrecs(
struct xfs_btree_cur *cur,
@@ -493,7 +432,6 @@ const struct xfs_btree_ops xfs_bnobt_ops = {
.set_root = xfs_allocbt_set_root,
.alloc_block = xfs_allocbt_alloc_block,
.free_block = xfs_allocbt_free_block,
- .update_lastrec = xfs_allocbt_update_lastrec,
.get_minrecs = xfs_allocbt_get_minrecs,
.get_maxrecs = xfs_allocbt_get_maxrecs,
.init_key_from_rec = xfs_allocbt_init_key_from_rec,
@@ -511,7 +449,6 @@ const struct xfs_btree_ops xfs_bnobt_ops = {
const struct xfs_btree_ops xfs_cntbt_ops = {
.name = "cnt",
.type = XFS_BTREE_TYPE_AG,
- .geom_flags = XFS_BTGEO_LASTREC_UPDATE,
.rec_len = sizeof(xfs_alloc_rec_t),
.key_len = sizeof(xfs_alloc_key_t),
@@ -525,7 +462,6 @@ const struct xfs_btree_ops xfs_cntbt_ops = {
.set_root = xfs_allocbt_set_root,
.alloc_block = xfs_allocbt_alloc_block,
.free_block = xfs_allocbt_free_block,
- .update_lastrec = xfs_allocbt_update_lastrec,
.get_minrecs = xfs_allocbt_get_minrecs,
.get_maxrecs = xfs_allocbt_get_maxrecs,
.init_key_from_rec = xfs_allocbt_init_key_from_rec,