summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r--fs/xfs/xfs_inode.c280
1 files changed, 1 insertions, 279 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 5848f7b36cc5..ac5826ce5fec 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1739,39 +1739,6 @@ out:
}
/*
- * In-Core Unlinked List Lookups
- * =============================
- *
- * Every inode is supposed to be reachable from some other piece of metadata
- * with the exception of the root directory. Inodes with a connection to a
- * file descriptor but not linked from anywhere in the on-disk directory tree
- * are collectively known as unlinked inodes, though the filesystem itself
- * maintains links to these inodes so that on-disk metadata are consistent.
- *
- * XFS implements a per-AG on-disk hash table of unlinked inodes. The AGI
- * header contains a number of buckets that point to an inode, and each inode
- * record has a pointer to the next inode in the hash chain. This
- * singly-linked list causes scaling problems in the iunlink remove function
- * because we must walk that list to find the inode that points to the inode
- * being removed from the unlinked hash bucket list.
- *
- * Hence we keep an in-memory double linked list to link each inode on an
- * unlinked list. Because there are 64 unlinked lists per AGI, keeping pointer
- * based lists would require having 64 list heads in the perag, one for each
- * list. This is expensive in terms of memory (think millions of AGs) and cache
- * misses on lookups. Instead, use the fact that inodes on the unlinked list
- * must be referenced at the VFS level to keep them on the list and hence we
- * have an existence guarantee for inodes on the unlinked list.
- *
- * Given we have an existence guarantee, we can use lockless inode cache lookups
- * to resolve aginos to xfs inodes. This means we only need 8 bytes per inode
- * for the double linked unlinked list, and we don't need any extra locking to
- * keep the list safe as all manipulations are done under the AGI buffer lock.
- * Keeping the list up to date does not require memory allocation, just finding
- * the XFS inode and updating the next/prev unlinked list aginos.
- */
-
-/*
* Find an inode on the unlinked list. This does not take references to the
* inode as we have existence guarantees by holding the AGI buffer lock and that
* only unlinked, referenced inodes can be on the unlinked inode list. If we
@@ -1806,75 +1773,11 @@ xfs_iunlink_lookup(
}
/*
- * Update the prev pointer of the next agino. Returns -ENOLINK if the inode
- * is not in cache.
- */
-static int
-xfs_iunlink_update_backref(
- struct xfs_perag *pag,
- xfs_agino_t prev_agino,
- xfs_agino_t next_agino)
-{
- struct xfs_inode *ip;
-
- /* No update necessary if we are at the end of the list. */
- if (next_agino == NULLAGINO)
- return 0;
-
- ip = xfs_iunlink_lookup(pag, next_agino);
- if (!ip)
- return -ENOLINK;
-
- ip->i_prev_unlinked = prev_agino;
- return 0;
-}
-
-/*
- * Point the AGI unlinked bucket at an inode and log the results. The caller
- * is responsible for validating the old value.
- */
-STATIC int
-xfs_iunlink_update_bucket(
- struct xfs_trans *tp,
- struct xfs_perag *pag,
- struct xfs_buf *agibp,
- unsigned int bucket_index,
- xfs_agino_t new_agino)
-{
- struct xfs_agi *agi = agibp->b_addr;
- xfs_agino_t old_value;
- int offset;
-
- ASSERT(xfs_verify_agino_or_null(pag, new_agino));
-
- old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]);
- trace_xfs_iunlink_update_bucket(tp->t_mountp, pag->pag_agno, bucket_index,
- old_value, new_agino);
-
- /*
- * We should never find the head of the list already set to the value
- * passed in because either we're adding or removing ourselves from the
- * head of the list.
- */
- if (old_value == new_agino) {
- xfs_buf_mark_corrupt(agibp);
- xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
- return -EFSCORRUPTED;
- }
-
- agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino);
- offset = offsetof(struct xfs_agi, agi_unlinked) +
- (sizeof(xfs_agino_t) * bucket_index);
- xfs_trans_log_buf(tp, agibp, offset, offset + sizeof(xfs_agino_t) - 1);
- return 0;
-}
-
-/*
* Load the inode @next_agino into the cache and set its prev_unlinked pointer
* to @prev_agino. Caller must hold the AGI to synchronize with other changes
* to the unlinked list.
*/
-STATIC int
+int
xfs_iunlink_reload_next(
struct xfs_trans *tp,
struct xfs_buf *agibp,
@@ -1930,187 +1833,6 @@ rele:
return error;
}
-static int
-xfs_iunlink_insert_inode(
- struct xfs_trans *tp,
- struct xfs_perag *pag,
- struct xfs_buf *agibp,
- struct xfs_inode *ip)
-{
- struct xfs_mount *mp = tp->t_mountp;
- struct xfs_agi *agi = agibp->b_addr;
- xfs_agino_t next_agino;
- xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
- short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
- int error;
-
- /*
- * Get the index into the agi hash table for the list this inode will
- * go on. Make sure the pointer isn't garbage and that this inode
- * isn't already on the list.
- */
- next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
- if (next_agino == agino ||
- !xfs_verify_agino_or_null(pag, next_agino)) {
- xfs_buf_mark_corrupt(agibp);
- xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
- return -EFSCORRUPTED;
- }
-
- /*
- * Update the prev pointer in the next inode to point back to this
- * inode.
- */
- error = xfs_iunlink_update_backref(pag, agino, next_agino);
- if (error == -ENOLINK)
- error = xfs_iunlink_reload_next(tp, agibp, agino, next_agino);
- if (error)
- return error;
-
- if (next_agino != NULLAGINO) {
- /*
- * There is already another inode in the bucket, so point this
- * inode to the current head of the list.
- */
- error = xfs_iunlink_log_inode(tp, ip, pag, next_agino);
- if (error)
- return error;
- ip->i_next_unlinked = next_agino;
- }
-
- /* Point the head of the list to point to this inode. */
- ip->i_prev_unlinked = NULLAGINO;
- return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino);
-}
-
-/*
- * This is called when the inode's link count has gone to 0 or we are creating
- * a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0.
- *
- * We place the on-disk inode on a list in the AGI. It will be pulled from this
- * list when the inode is freed.
- */
-int
-xfs_iunlink(
- struct xfs_trans *tp,
- struct xfs_inode *ip)
-{
- struct xfs_mount *mp = tp->t_mountp;
- struct xfs_perag *pag;
- struct xfs_buf *agibp;
- int error;
-
- ASSERT(VFS_I(ip)->i_nlink == 0);
- ASSERT(VFS_I(ip)->i_mode != 0);
- trace_xfs_iunlink(ip);
-
- pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
-
- /* Get the agi buffer first. It ensures lock ordering on the list. */
- error = xfs_read_agi(pag, tp, 0, &agibp);
- if (error)
- goto out;
-
- error = xfs_iunlink_insert_inode(tp, pag, agibp, ip);
-out:
- xfs_perag_put(pag);
- return error;
-}
-
-static int
-xfs_iunlink_remove_inode(
- struct xfs_trans *tp,
- struct xfs_perag *pag,
- struct xfs_buf *agibp,
- struct xfs_inode *ip)
-{
- struct xfs_mount *mp = tp->t_mountp;
- struct xfs_agi *agi = agibp->b_addr;
- xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
- xfs_agino_t head_agino;
- short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
- int error;
-
- trace_xfs_iunlink_remove(ip);
-
- /*
- * Get the index into the agi hash table for the list this inode will
- * go on. Make sure the head pointer isn't garbage.
- */
- head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
- if (!xfs_verify_agino(pag, head_agino)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- agi, sizeof(*agi));
- xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
- return -EFSCORRUPTED;
- }
-
- /*
- * Set our inode's next_unlinked pointer to NULL and then return
- * the old pointer value so that we can update whatever was previous
- * to us in the list to point to whatever was next in the list.
- */
- error = xfs_iunlink_log_inode(tp, ip, pag, NULLAGINO);
- if (error)
- return error;
-
- /*
- * Update the prev pointer in the next inode to point back to previous
- * inode in the chain.
- */
- error = xfs_iunlink_update_backref(pag, ip->i_prev_unlinked,
- ip->i_next_unlinked);
- if (error == -ENOLINK)
- error = xfs_iunlink_reload_next(tp, agibp, ip->i_prev_unlinked,
- ip->i_next_unlinked);
- if (error)
- return error;
-
- if (head_agino != agino) {
- struct xfs_inode *prev_ip;
-
- prev_ip = xfs_iunlink_lookup(pag, ip->i_prev_unlinked);
- if (!prev_ip) {
- xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
- return -EFSCORRUPTED;
- }
-
- error = xfs_iunlink_log_inode(tp, prev_ip, pag,
- ip->i_next_unlinked);
- prev_ip->i_next_unlinked = ip->i_next_unlinked;
- } else {
- /* Point the head of the list to the next unlinked inode. */
- error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index,
- ip->i_next_unlinked);
- }
-
- ip->i_next_unlinked = NULLAGINO;
- ip->i_prev_unlinked = 0;
- return error;
-}
-
-/*
- * Pull the on-disk inode from the AGI unlinked list.
- */
-int
-xfs_iunlink_remove(
- struct xfs_trans *tp,
- struct xfs_perag *pag,
- struct xfs_inode *ip)
-{
- struct xfs_buf *agibp;
- int error;
-
- trace_xfs_iunlink_remove(ip);
-
- /* Get the agi buffer first. It ensures lock ordering on the list. */
- error = xfs_read_agi(pag, tp, 0, &agibp);
- if (error)
- return error;
-
- return xfs_iunlink_remove_inode(tp, pag, agibp, ip);
-}
-
/*
* Look up the inode number specified and if it is not already marked XFS_ISTALE
* mark it stale. We should only find clean inodes in this lookup that aren't