diff options
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r-- | fs/xfs/xfs_inode.c | 769 |
1 files changed, 558 insertions, 211 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index ae667ba74a1c..f643a9295179 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1332,7 +1332,7 @@ xfs_create_tmpfile( if (error) goto out_trans_cancel; - error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, prid, &ip); + error = xfs_dir_ialloc(&tp, dp, mode, 0, 0, prid, &ip); if (error) goto out_trans_cancel; @@ -1754,7 +1754,7 @@ xfs_inactive_ifree( * now remains allocated and sits on the unlinked list until the fs is * repaired. */ - if (unlikely(mp->m_inotbt_nores)) { + if (unlikely(mp->m_finobt_nores)) { error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp); @@ -1907,86 +1907,510 @@ xfs_inactive( } /* - * This is called when the inode's link count goes to 0 or we are creating a - * tmpfile via O_TMPFILE. In the case of a tmpfile, @ignore_linkcount will be - * set to true as the link count is dropped to zero by the VFS after we've - * created the file successfully, so we have to add it to the unlinked list - * while the link count is non-zero. + * In-Core Unlinked List Lookups + * ============================= + * + * Every inode is supposed to be reachable from some other piece of metadata + * with the exception of the root directory. Inodes with a connection to a + * file descriptor but not linked from anywhere in the on-disk directory tree + * are collectively known as unlinked inodes, though the filesystem itself + * maintains links to these inodes so that on-disk metadata are consistent. + * + * XFS implements a per-AG on-disk hash table of unlinked inodes. The AGI + * header contains a number of buckets that point to an inode, and each inode + * record has a pointer to the next inode in the hash chain. This + * singly-linked list causes scaling problems in the iunlink remove function + * because we must walk that list to find the inode that points to the inode + * being removed from the unlinked hash bucket list. + * + * What if we modelled the unlinked list as a collection of records capturing + * "X.next_unlinked = Y" relations? If we indexed those records on Y, we'd + * have a fast way to look up unlinked list predecessors, which avoids the + * slow list walk. That's exactly what we do here (in-core) with a per-AG + * rhashtable. + * + * Because this is a backref cache, we ignore operational failures since the + * iunlink code can fall back to the slow bucket walk. The only errors that + * should bubble out are for obviously incorrect situations. + * + * All users of the backref cache MUST hold the AGI buffer lock to serialize + * access or have otherwise provided for concurrency control. + */ + +/* Capture a "X.next_unlinked = Y" relationship. */ +struct xfs_iunlink { + struct rhash_head iu_rhash_head; + xfs_agino_t iu_agino; /* X */ + xfs_agino_t iu_next_unlinked; /* Y */ +}; + +/* Unlinked list predecessor lookup hashtable construction */ +static int +xfs_iunlink_obj_cmpfn( + struct rhashtable_compare_arg *arg, + const void *obj) +{ + const xfs_agino_t *key = arg->key; + const struct xfs_iunlink *iu = obj; + + if (iu->iu_next_unlinked != *key) + return 1; + return 0; +} + +static const struct rhashtable_params xfs_iunlink_hash_params = { + .min_size = XFS_AGI_UNLINKED_BUCKETS, + .key_len = sizeof(xfs_agino_t), + .key_offset = offsetof(struct xfs_iunlink, + iu_next_unlinked), + .head_offset = offsetof(struct xfs_iunlink, iu_rhash_head), + .automatic_shrinking = true, + .obj_cmpfn = xfs_iunlink_obj_cmpfn, +}; + +/* + * Return X, where X.next_unlinked == @agino. Returns NULLAGINO if no such + * relation is found. + */ +static xfs_agino_t +xfs_iunlink_lookup_backref( + struct xfs_perag *pag, + xfs_agino_t agino) +{ + struct xfs_iunlink *iu; + + iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &agino, + xfs_iunlink_hash_params); + return iu ? iu->iu_agino : NULLAGINO; +} + +/* + * Take ownership of an iunlink cache entry and insert it into the hash table. + * If successful, the entry will be owned by the cache; if not, it is freed. + * Either way, the caller does not own @iu after this call. + */ +static int +xfs_iunlink_insert_backref( + struct xfs_perag *pag, + struct xfs_iunlink *iu) +{ + int error; + + error = rhashtable_insert_fast(&pag->pagi_unlinked_hash, + &iu->iu_rhash_head, xfs_iunlink_hash_params); + /* + * Fail loudly if there already was an entry because that's a sign of + * corruption of in-memory data. Also fail loudly if we see an error + * code we didn't anticipate from the rhashtable code. Currently we + * only anticipate ENOMEM. + */ + if (error) { + WARN(error != -ENOMEM, "iunlink cache insert error %d", error); + kmem_free(iu); + } + /* + * Absorb any runtime errors that aren't a result of corruption because + * this is a cache and we can always fall back to bucket list scanning. + */ + if (error != 0 && error != -EEXIST) + error = 0; + return error; +} + +/* Remember that @prev_agino.next_unlinked = @this_agino. */ +static int +xfs_iunlink_add_backref( + struct xfs_perag *pag, + xfs_agino_t prev_agino, + xfs_agino_t this_agino) +{ + struct xfs_iunlink *iu; + + if (XFS_TEST_ERROR(false, pag->pag_mount, XFS_ERRTAG_IUNLINK_FALLBACK)) + return 0; + + iu = kmem_zalloc(sizeof(*iu), KM_SLEEP | KM_NOFS); + iu->iu_agino = prev_agino; + iu->iu_next_unlinked = this_agino; + + return xfs_iunlink_insert_backref(pag, iu); +} + +/* + * Replace X.next_unlinked = @agino with X.next_unlinked = @next_unlinked. + * If @next_unlinked is NULLAGINO, we drop the backref and exit. If there + * wasn't any such entry then we don't bother. + */ +static int +xfs_iunlink_change_backref( + struct xfs_perag *pag, + xfs_agino_t agino, + xfs_agino_t next_unlinked) +{ + struct xfs_iunlink *iu; + int error; + + /* Look up the old entry; if there wasn't one then exit. */ + iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &agino, + xfs_iunlink_hash_params); + if (!iu) + return 0; + + /* + * Remove the entry. This shouldn't ever return an error, but if we + * couldn't remove the old entry we don't want to add it again to the + * hash table, and if the entry disappeared on us then someone's + * violated the locking rules and we need to fail loudly. Either way + * we cannot remove the inode because internal state is or would have + * been corrupt. + */ + error = rhashtable_remove_fast(&pag->pagi_unlinked_hash, + &iu->iu_rhash_head, xfs_iunlink_hash_params); + if (error) + return error; + + /* If there is no new next entry just free our item and return. */ + if (next_unlinked == NULLAGINO) { + kmem_free(iu); + return 0; + } + + /* Update the entry and re-add it to the hash table. */ + iu->iu_next_unlinked = next_unlinked; + return xfs_iunlink_insert_backref(pag, iu); +} + +/* Set up the in-core predecessor structures. */ +int +xfs_iunlink_init( + struct xfs_perag *pag) +{ + return rhashtable_init(&pag->pagi_unlinked_hash, + &xfs_iunlink_hash_params); +} + +/* Free the in-core predecessor structures. */ +static void +xfs_iunlink_free_item( + void *ptr, + void *arg) +{ + struct xfs_iunlink *iu = ptr; + bool *freed_anything = arg; + + *freed_anything = true; + kmem_free(iu); +} + +void +xfs_iunlink_destroy( + struct xfs_perag *pag) +{ + bool freed_anything = false; + + rhashtable_free_and_destroy(&pag->pagi_unlinked_hash, + xfs_iunlink_free_item, &freed_anything); + + ASSERT(freed_anything == false || XFS_FORCED_SHUTDOWN(pag->pag_mount)); +} + +/* + * Point the AGI unlinked bucket at an inode and log the results. The caller + * is responsible for validating the old value. + */ +STATIC int +xfs_iunlink_update_bucket( + struct xfs_trans *tp, + xfs_agnumber_t agno, + struct xfs_buf *agibp, + unsigned int bucket_index, + xfs_agino_t new_agino) +{ + struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp); + xfs_agino_t old_value; + int offset; + + ASSERT(xfs_verify_agino_or_null(tp->t_mountp, agno, new_agino)); + + old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]); + trace_xfs_iunlink_update_bucket(tp->t_mountp, agno, bucket_index, + old_value, new_agino); + + /* + * We should never find the head of the list already set to the value + * passed in because either we're adding or removing ourselves from the + * head of the list. + */ + if (old_value == new_agino) + return -EFSCORRUPTED; + + agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino); + offset = offsetof(struct xfs_agi, agi_unlinked) + + (sizeof(xfs_agino_t) * bucket_index); + xfs_trans_log_buf(tp, agibp, offset, offset + sizeof(xfs_agino_t) - 1); + return 0; +} + +/* Set an on-disk inode's next_unlinked pointer. */ +STATIC void +xfs_iunlink_update_dinode( + struct xfs_trans *tp, + xfs_agnumber_t agno, + xfs_agino_t agino, + struct xfs_buf *ibp, + struct xfs_dinode *dip, + struct xfs_imap *imap, + xfs_agino_t next_agino) +{ + struct xfs_mount *mp = tp->t_mountp; + int offset; + + ASSERT(xfs_verify_agino_or_null(mp, agno, next_agino)); + + trace_xfs_iunlink_update_dinode(mp, agno, agino, + be32_to_cpu(dip->di_next_unlinked), next_agino); + + dip->di_next_unlinked = cpu_to_be32(next_agino); + offset = imap->im_boffset + + offsetof(struct xfs_dinode, di_next_unlinked); + + /* need to recalc the inode CRC if appropriate */ + xfs_dinode_calc_crc(mp, dip); + xfs_trans_inode_buf(tp, ibp); + xfs_trans_log_buf(tp, ibp, offset, offset + sizeof(xfs_agino_t) - 1); + xfs_inobp_check(mp, ibp); +} + +/* Set an in-core inode's unlinked pointer and return the old value. */ +STATIC int +xfs_iunlink_update_inode( + struct xfs_trans *tp, + struct xfs_inode *ip, + xfs_agnumber_t agno, + xfs_agino_t next_agino, + xfs_agino_t *old_next_agino) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_dinode *dip; + struct xfs_buf *ibp; + xfs_agino_t old_value; + int error; + + ASSERT(xfs_verify_agino_or_null(mp, agno, next_agino)); + + error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, 0, 0); + if (error) + return error; + + /* Make sure the old pointer isn't garbage. */ + old_value = be32_to_cpu(dip->di_next_unlinked); + if (!xfs_verify_agino_or_null(mp, agno, old_value)) { + error = -EFSCORRUPTED; + goto out; + } + + /* + * Since we're updating a linked list, we should never find that the + * current pointer is the same as the new value, unless we're + * terminating the list. + */ + *old_next_agino = old_value; + if (old_value == next_agino) { + if (next_agino != NULLAGINO) + error = -EFSCORRUPTED; + goto out; + } + + /* Ok, update the new pointer. */ + xfs_iunlink_update_dinode(tp, agno, XFS_INO_TO_AGINO(mp, ip->i_ino), + ibp, dip, &ip->i_imap, next_agino); + return 0; +out: + xfs_trans_brelse(tp, ibp); + return error; +} + +/* + * This is called when the inode's link count has gone to 0 or we are creating + * a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0. * * We place the on-disk inode on a list in the AGI. It will be pulled from this * list when the inode is freed. */ STATIC int xfs_iunlink( - struct xfs_trans *tp, - struct xfs_inode *ip) + struct xfs_trans *tp, + struct xfs_inode *ip) { - xfs_mount_t *mp = tp->t_mountp; - xfs_agi_t *agi; - xfs_dinode_t *dip; - xfs_buf_t *agibp; - xfs_buf_t *ibp; - xfs_agino_t agino; - short bucket_index; - int offset; - int error; + struct xfs_mount *mp = tp->t_mountp; + struct xfs_agi *agi; + struct xfs_buf *agibp; + xfs_agino_t next_agino; + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino); + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); + short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; + int error; + ASSERT(VFS_I(ip)->i_nlink == 0); ASSERT(VFS_I(ip)->i_mode != 0); + trace_xfs_iunlink(ip); - /* - * Get the agi buffer first. It ensures lock ordering - * on the list. - */ - error = xfs_read_agi(mp, tp, XFS_INO_TO_AGNO(mp, ip->i_ino), &agibp); + /* Get the agi buffer first. It ensures lock ordering on the list. */ + error = xfs_read_agi(mp, tp, agno, &agibp); if (error) return error; agi = XFS_BUF_TO_AGI(agibp); /* - * Get the index into the agi hash table for the - * list this inode will go on. + * Get the index into the agi hash table for the list this inode will + * go on. Make sure the pointer isn't garbage and that this inode + * isn't already on the list. */ - agino = XFS_INO_TO_AGINO(mp, ip->i_ino); - ASSERT(agino != 0); - bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; - ASSERT(agi->agi_unlinked[bucket_index]); - ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino); + next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); + if (next_agino == agino || + !xfs_verify_agino_or_null(mp, agno, next_agino)) + return -EFSCORRUPTED; + + if (next_agino != NULLAGINO) { + struct xfs_perag *pag; + xfs_agino_t old_agino; + + /* + * There is already another inode in the bucket, so point this + * inode to the current head of the list. + */ + error = xfs_iunlink_update_inode(tp, ip, agno, next_agino, + &old_agino); + if (error) + return error; + ASSERT(old_agino == NULLAGINO); - if (agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)) { /* - * There is already another inode in the bucket we need - * to add ourselves to. Add us at the front of the list. - * Here we put the head pointer into our next pointer, - * and then we fall through to point the head at us. + * agino has been unlinked, add a backref from the next inode + * back to agino. */ - error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, - 0, 0); + pag = xfs_perag_get(mp, agno); + error = xfs_iunlink_add_backref(pag, agino, next_agino); + xfs_perag_put(pag); if (error) return error; + } + + /* Point the head of the list to point to this inode. */ + return xfs_iunlink_update_bucket(tp, agno, agibp, bucket_index, agino); +} - ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO)); - dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; - offset = ip->i_imap.im_boffset + - offsetof(xfs_dinode_t, di_next_unlinked); +/* Return the imap, dinode pointer, and buffer for an inode. */ +STATIC int +xfs_iunlink_map_ino( + struct xfs_trans *tp, + xfs_agnumber_t agno, + xfs_agino_t agino, + struct xfs_imap *imap, + struct xfs_dinode **dipp, + struct xfs_buf **bpp) +{ + struct xfs_mount *mp = tp->t_mountp; + int error; - /* need to recalc the inode CRC if appropriate */ - xfs_dinode_calc_crc(mp, dip); + imap->im_blkno = 0; + error = xfs_imap(mp, tp, XFS_AGINO_TO_INO(mp, agno, agino), imap, 0); + if (error) { + xfs_warn(mp, "%s: xfs_imap returned error %d.", + __func__, error); + return error; + } - xfs_trans_inode_buf(tp, ibp); - xfs_trans_log_buf(tp, ibp, offset, - (offset + sizeof(xfs_agino_t) - 1)); - xfs_inobp_check(mp, ibp); + error = xfs_imap_to_bp(mp, tp, imap, dipp, bpp, 0, 0); + if (error) { + xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.", + __func__, error); + return error; + } + + return 0; +} + +/* + * Walk the unlinked chain from @head_agino until we find the inode that + * points to @target_agino. Return the inode number, map, dinode pointer, + * and inode cluster buffer of that inode as @agino, @imap, @dipp, and @bpp. + * + * @tp, @pag, @head_agino, and @target_agino are input parameters. + * @agino, @imap, @dipp, and @bpp are all output parameters. + * + * Do not call this function if @target_agino is the head of the list. + */ +STATIC int +xfs_iunlink_map_prev( + struct xfs_trans *tp, + xfs_agnumber_t agno, + xfs_agino_t head_agino, + xfs_agino_t target_agino, + xfs_agino_t *agino, + struct xfs_imap *imap, + struct xfs_dinode **dipp, + struct xfs_buf **bpp, + struct xfs_perag *pag) +{ + struct xfs_mount *mp = tp->t_mountp; + xfs_agino_t next_agino; + int error; + + ASSERT(head_agino != target_agino); + *bpp = NULL; + + /* See if our backref cache can find it faster. */ + *agino = xfs_iunlink_lookup_backref(pag, target_agino); + if (*agino != NULLAGINO) { + error = xfs_iunlink_map_ino(tp, agno, *agino, imap, dipp, bpp); + if (error) + return error; + + if (be32_to_cpu((*dipp)->di_next_unlinked) == target_agino) + return 0; + + /* + * If we get here the cache contents were corrupt, so drop the + * buffer and fall back to walking the bucket list. + */ + xfs_trans_brelse(tp, *bpp); + *bpp = NULL; + WARN_ON_ONCE(1); + } + + trace_xfs_iunlink_map_prev_fallback(mp, agno); + + /* Otherwise, walk the entire bucket until we find it. */ + next_agino = head_agino; + while (next_agino != target_agino) { + xfs_agino_t unlinked_agino; + + if (*bpp) + xfs_trans_brelse(tp, *bpp); + + *agino = next_agino; + error = xfs_iunlink_map_ino(tp, agno, next_agino, imap, dipp, + bpp); + if (error) + return error; + + unlinked_agino = be32_to_cpu((*dipp)->di_next_unlinked); + /* + * Make sure this pointer is valid and isn't an obvious + * infinite loop. + */ + if (!xfs_verify_agino(mp, agno, unlinked_agino) || + next_agino == unlinked_agino) { + XFS_CORRUPTION_ERROR(__func__, + XFS_ERRLEVEL_LOW, mp, + *dipp, sizeof(**dipp)); + error = -EFSCORRUPTED; + return error; + } + next_agino = unlinked_agino; } - /* - * Point the bucket head pointer at the inode being inserted. - */ - ASSERT(agino != 0); - agi->agi_unlinked[bucket_index] = cpu_to_be32(agino); - offset = offsetof(xfs_agi_t, agi_unlinked) + - (sizeof(xfs_agino_t) * bucket_index); - xfs_trans_log_buf(tp, agibp, offset, - (offset + sizeof(xfs_agino_t) - 1)); return 0; } @@ -1995,181 +2419,106 @@ xfs_iunlink( */ STATIC int xfs_iunlink_remove( - xfs_trans_t *tp, - xfs_inode_t *ip) + struct xfs_trans *tp, + struct xfs_inode *ip) { - xfs_ino_t next_ino; - xfs_mount_t *mp; - xfs_agi_t *agi; - xfs_dinode_t *dip; - xfs_buf_t *agibp; - xfs_buf_t *ibp; - xfs_agnumber_t agno; - xfs_agino_t agino; - xfs_agino_t next_agino; - xfs_buf_t *last_ibp; - xfs_dinode_t *last_dip = NULL; - short bucket_index; - int offset, last_offset = 0; - int error; + struct xfs_mount *mp = tp->t_mountp; + struct xfs_agi *agi; + struct xfs_buf *agibp; + struct xfs_buf *last_ibp; + struct xfs_dinode *last_dip = NULL; + struct xfs_perag *pag = NULL; + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino); + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); + xfs_agino_t next_agino; + xfs_agino_t head_agino; + short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; + int error; - mp = tp->t_mountp; - agno = XFS_INO_TO_AGNO(mp, ip->i_ino); + trace_xfs_iunlink_remove(ip); - /* - * Get the agi buffer first. It ensures lock ordering - * on the list. - */ + /* Get the agi buffer first. It ensures lock ordering on the list. */ error = xfs_read_agi(mp, tp, agno, &agibp); if (error) return error; - agi = XFS_BUF_TO_AGI(agibp); /* - * Get the index into the agi hash table for the - * list this inode will go on. + * Get the index into the agi hash table for the list this inode will + * go on. Make sure the head pointer isn't garbage. */ - agino = XFS_INO_TO_AGINO(mp, ip->i_ino); - if (!xfs_verify_agino(mp, agno, agino)) - return -EFSCORRUPTED; - bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; - if (!xfs_verify_agino(mp, agno, - be32_to_cpu(agi->agi_unlinked[bucket_index]))) { + head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); + if (!xfs_verify_agino(mp, agno, head_agino)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agi, sizeof(*agi)); return -EFSCORRUPTED; } - if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) { - /* - * We're at the head of the list. Get the inode's on-disk - * buffer to see if there is anyone after us on the list. - * Only modify our next pointer if it is not already NULLAGINO. - * This saves us the overhead of dealing with the buffer when - * there is no need to change it. - */ - error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, - 0, 0); - if (error) { - xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.", - __func__, error); - return error; - } - next_agino = be32_to_cpu(dip->di_next_unlinked); - ASSERT(next_agino != 0); - if (next_agino != NULLAGINO) { - dip->di_next_unlinked = cpu_to_be32(NULLAGINO); - offset = ip->i_imap.im_boffset + - offsetof(xfs_dinode_t, di_next_unlinked); - - /* need to recalc the inode CRC if appropriate */ - xfs_dinode_calc_crc(mp, dip); - - xfs_trans_inode_buf(tp, ibp); - xfs_trans_log_buf(tp, ibp, offset, - (offset + sizeof(xfs_agino_t) - 1)); - xfs_inobp_check(mp, ibp); - } else { - xfs_trans_brelse(tp, ibp); - } - /* - * Point the bucket head pointer at the next inode. - */ - ASSERT(next_agino != 0); - ASSERT(next_agino != agino); - agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino); - offset = offsetof(xfs_agi_t, agi_unlinked) + - (sizeof(xfs_agino_t) * bucket_index); - xfs_trans_log_buf(tp, agibp, offset, - (offset + sizeof(xfs_agino_t) - 1)); - } else { - /* - * We need to search the list for the inode being freed. - */ - next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); - last_ibp = NULL; - while (next_agino != agino) { - struct xfs_imap imap; + /* + * Set our inode's next_unlinked pointer to NULL and then return + * the old pointer value so that we can update whatever was previous + * to us in the list to point to whatever was next in the list. + */ + error = xfs_iunlink_update_inode(tp, ip, agno, NULLAGINO, &next_agino); + if (error) + return error; - if (last_ibp) - xfs_trans_brelse(tp, last_ibp); + /* + * If there was a backref pointing from the next inode back to this + * one, remove it because we've removed this inode from the list. + * + * Later, if this inode was in the middle of the list we'll update + * this inode's backref to point from the next inode. + */ + if (next_agino != NULLAGINO) { + pag = xfs_perag_get(mp, agno); + error = xfs_iunlink_change_backref(pag, next_agino, + NULLAGINO); + if (error) + goto out; + } - imap.im_blkno = 0; - next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino); + if (head_agino == agino) { + /* Point the head of the list to the next unlinked inode. */ + error = xfs_iunlink_update_bucket(tp, agno, agibp, bucket_index, + next_agino); + if (error) + goto out; + } else { + struct xfs_imap imap; + xfs_agino_t prev_agino; - error = xfs_imap(mp, tp, next_ino, &imap, 0); - if (error) { - xfs_warn(mp, - "%s: xfs_imap returned error %d.", - __func__, error); - return error; - } + if (!pag) + pag = xfs_perag_get(mp, agno); - error = xfs_imap_to_bp(mp, tp, &imap, &last_dip, - &last_ibp, 0, 0); - if (error) { - xfs_warn(mp, - "%s: xfs_imap_to_bp returned error %d.", - __func__, error); - return error; - } + /* We need to search the list for the inode being freed. */ + error = xfs_iunlink_map_prev(tp, agno, head_agino, agino, + &prev_agino, &imap, &last_dip, &last_ibp, + pag); + if (error) + goto out; - last_offset = imap.im_boffset; - next_agino = be32_to_cpu(last_dip->di_next_unlinked); - if (!xfs_verify_agino(mp, agno, next_agino)) { - XFS_CORRUPTION_ERROR(__func__, - XFS_ERRLEVEL_LOW, mp, - last_dip, sizeof(*last_dip)); - return -EFSCORRUPTED; - } - } + /* Point the previous inode on the list to the next inode. */ + xfs_iunlink_update_dinode(tp, agno, prev_agino, last_ibp, + last_dip, &imap, next_agino); /* - * Now last_ibp points to the buffer previous to us on the - * unlinked list. Pull us from the list. + * Now we deal with the backref for this inode. If this inode + * pointed at a real inode, change the backref that pointed to + * us to point to our old next. If this inode was the end of + * the list, delete the backref that pointed to us. Note that + * change_backref takes care of deleting the backref if + * next_agino is NULLAGINO. */ - error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, - 0, 0); - if (error) { - xfs_warn(mp, "%s: xfs_imap_to_bp(2) returned error %d.", - __func__, error); - return error; - } - next_agino = be32_to_cpu(dip->di_next_unlinked); - ASSERT(next_agino != 0); - ASSERT(next_agino != agino); - if (next_agino != NULLAGINO) { - dip->di_next_unlinked = cpu_to_be32(NULLAGINO); - offset = ip->i_imap.im_boffset + - offsetof(xfs_dinode_t, di_next_unlinked); - - /* need to recalc the inode CRC if appropriate */ - xfs_dinode_calc_crc(mp, dip); - - xfs_trans_inode_buf(tp, ibp); - xfs_trans_log_buf(tp, ibp, offset, - (offset + sizeof(xfs_agino_t) - 1)); - xfs_inobp_check(mp, ibp); - } else { - xfs_trans_brelse(tp, ibp); - } - /* - * Point the previous inode on the list to the next inode. - */ - last_dip->di_next_unlinked = cpu_to_be32(next_agino); - ASSERT(next_agino != 0); - offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked); - - /* need to recalc the inode CRC if appropriate */ - xfs_dinode_calc_crc(mp, last_dip); - - xfs_trans_inode_buf(tp, last_ibp); - xfs_trans_log_buf(tp, last_ibp, offset, - (offset + sizeof(xfs_agino_t) - 1)); - xfs_inobp_check(mp, last_ibp); + error = xfs_iunlink_change_backref(pag, agino, next_agino); + if (error) + goto out; } - return 0; + +out: + if (pag) + xfs_perag_put(pag); + return error; } /* @@ -2833,11 +3182,9 @@ xfs_rename_alloc_whiteout( /* * Prepare the tmpfile inode as if it were created through the VFS. - * Otherwise, the link increment paths will complain about nlink 0->1. - * Drop the link count as done by d_tmpfile(), complete the inode setup - * and flag it as linkable. + * Complete the inode setup and flag it as linkable. nlink is already + * zero, so we can skip the drop_nlink. */ - drop_nlink(VFS_I(tmpfile)); xfs_setup_iops(tmpfile); xfs_finish_inode_setup(tmpfile); VFS_I(tmpfile)->i_state |= I_LINKABLE; |