summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_alloc.c')
-rw-r--r--fs/xfs/xfs_alloc.c605
1 files changed, 21 insertions, 584 deletions
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 0f0df2759b09..4f33c32affe3 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -20,7 +20,6 @@
#include "xfs_types.h"
#include "xfs_bit.h"
#include "xfs_log.h"
-#include "xfs_inum.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
@@ -32,6 +31,7 @@
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_alloc.h"
+#include "xfs_extent_busy.h"
#include "xfs_error.h"
#include "xfs_trace.h"
@@ -47,8 +47,6 @@ STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
-STATIC void xfs_alloc_busy_trim(struct xfs_alloc_arg *,
- xfs_agblock_t, xfs_extlen_t, xfs_agblock_t *, xfs_extlen_t *);
/*
* Lookup the record equal to [bno, len] in the btree given by cur.
@@ -152,7 +150,7 @@ xfs_alloc_compute_aligned(
xfs_extlen_t len;
/* Trim busy sections out of found extent */
- xfs_alloc_busy_trim(args, foundbno, foundlen, &bno, &len);
+ xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
if (args->alignment > 1 && len >= args->minlen) {
xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
@@ -536,7 +534,7 @@ xfs_alloc_ag_vextent(
if (error)
return error;
- ASSERT(!xfs_alloc_busy_search(args->mp, args->agno,
+ ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
args->agbno, args->len));
}
@@ -603,7 +601,7 @@ xfs_alloc_ag_vextent_exact(
/*
* Check for overlapping busy extents.
*/
- xfs_alloc_busy_trim(args, fbno, flen, &tbno, &tlen);
+ xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen);
/*
* Give up if the start of the extent is busy, or the freespace isn't
@@ -1076,12 +1074,13 @@ restart:
* If we couldn't get anything, give up.
*/
if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
+ xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+
if (!forced++) {
trace_xfs_alloc_near_busy(args);
xfs_log_force(args->mp, XFS_LOG_SYNC);
goto restart;
}
-
trace_xfs_alloc_size_neither(args);
args->agbno = NULLAGBLOCK;
return 0;
@@ -1391,7 +1390,7 @@ xfs_alloc_ag_vextent_small(
if (error)
goto error0;
if (fbno != NULLAGBLOCK) {
- xfs_alloc_busy_reuse(args->mp, args->agno, fbno, 1,
+ xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
args->userdata);
if (args->userdata) {
@@ -2435,15 +2434,24 @@ xfs_alloc_vextent_worker(
current_restore_flags_nested(&pflags, PF_FSTRANS);
}
-
-int /* error */
+/*
+ * Data allocation requests often come in with little stack to work on. Push
+ * them off to a worker thread so there is lots of stack to use. Metadata
+ * requests, OTOH, are generally from low stack usage paths, so avoid the
+ * context switch overhead here.
+ */
+int
xfs_alloc_vextent(
- xfs_alloc_arg_t *args) /* allocation argument structure */
+ struct xfs_alloc_arg *args)
{
DECLARE_COMPLETION_ONSTACK(done);
+ if (!args->userdata)
+ return __xfs_alloc_vextent(args);
+
+
args->done = &done;
- INIT_WORK(&args->work, xfs_alloc_vextent_worker);
+ INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker);
queue_work(xfs_alloc_wq, &args->work);
wait_for_completion(&done);
return args->result;
@@ -2496,579 +2504,8 @@ xfs_free_extent(
error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
if (!error)
- xfs_alloc_busy_insert(tp, args.agno, args.agbno, len, 0);
+ xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0);
error0:
xfs_perag_put(args.pag);
return error;
}
-
-void
-xfs_alloc_busy_insert(
- struct xfs_trans *tp,
- xfs_agnumber_t agno,
- xfs_agblock_t bno,
- xfs_extlen_t len,
- unsigned int flags)
-{
- struct xfs_busy_extent *new;
- struct xfs_busy_extent *busyp;
- struct xfs_perag *pag;
- struct rb_node **rbp;
- struct rb_node *parent = NULL;
-
- new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL);
- if (!new) {
- /*
- * No Memory! Since it is now not possible to track the free
- * block, make this a synchronous transaction to insure that
- * the block is not reused before this transaction commits.
- */
- trace_xfs_alloc_busy_enomem(tp->t_mountp, agno, bno, len);
- xfs_trans_set_sync(tp);
- return;
- }
-
- new->agno = agno;
- new->bno = bno;
- new->length = len;
- INIT_LIST_HEAD(&new->list);
- new->flags = flags;
-
- /* trace before insert to be able to see failed inserts */
- trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len);
-
- pag = xfs_perag_get(tp->t_mountp, new->agno);
- spin_lock(&pag->pagb_lock);
- rbp = &pag->pagb_tree.rb_node;
- while (*rbp) {
- parent = *rbp;
- busyp = rb_entry(parent, struct xfs_busy_extent, rb_node);
-
- if (new->bno < busyp->bno) {
- rbp = &(*rbp)->rb_left;
- ASSERT(new->bno + new->length <= busyp->bno);
- } else if (new->bno > busyp->bno) {
- rbp = &(*rbp)->rb_right;
- ASSERT(bno >= busyp->bno + busyp->length);
- } else {
- ASSERT(0);
- }
- }
-
- rb_link_node(&new->rb_node, parent, rbp);
- rb_insert_color(&new->rb_node, &pag->pagb_tree);
-
- list_add(&new->list, &tp->t_busy);
- spin_unlock(&pag->pagb_lock);
- xfs_perag_put(pag);
-}
-
-/*
- * Search for a busy extent within the range of the extent we are about to
- * allocate. You need to be holding the busy extent tree lock when calling
- * xfs_alloc_busy_search(). This function returns 0 for no overlapping busy
- * extent, -1 for an overlapping but not exact busy extent, and 1 for an exact
- * match. This is done so that a non-zero return indicates an overlap that
- * will require a synchronous transaction, but it can still be
- * used to distinguish between a partial or exact match.
- */
-int
-xfs_alloc_busy_search(
- struct xfs_mount *mp,
- xfs_agnumber_t agno,
- xfs_agblock_t bno,
- xfs_extlen_t len)
-{
- struct xfs_perag *pag;
- struct rb_node *rbp;
- struct xfs_busy_extent *busyp;
- int match = 0;
-
- pag = xfs_perag_get(mp, agno);
- spin_lock(&pag->pagb_lock);
-
- rbp = pag->pagb_tree.rb_node;
-
- /* find closest start bno overlap */
- while (rbp) {
- busyp = rb_entry(rbp, struct xfs_busy_extent, rb_node);
- if (bno < busyp->bno) {
- /* may overlap, but exact start block is lower */
- if (bno + len > busyp->bno)
- match = -1;
- rbp = rbp->rb_left;
- } else if (bno > busyp->bno) {
- /* may overlap, but exact start block is higher */
- if (bno < busyp->bno + busyp->length)
- match = -1;
- rbp = rbp->rb_right;
- } else {
- /* bno matches busyp, length determines exact match */
- match = (busyp->length == len) ? 1 : -1;
- break;
- }
- }
- spin_unlock(&pag->pagb_lock);
- xfs_perag_put(pag);
- return match;
-}
-
-/*
- * The found free extent [fbno, fend] overlaps part or all of the given busy
- * extent. If the overlap covers the beginning, the end, or all of the busy
- * extent, the overlapping portion can be made unbusy and used for the
- * allocation. We can't split a busy extent because we can't modify a
- * transaction/CIL context busy list, but we can update an entries block
- * number or length.
- *
- * Returns true if the extent can safely be reused, or false if the search
- * needs to be restarted.
- */
-STATIC bool
-xfs_alloc_busy_update_extent(
- struct xfs_mount *mp,
- struct xfs_perag *pag,
- struct xfs_busy_extent *busyp,
- xfs_agblock_t fbno,
- xfs_extlen_t flen,
- bool userdata)
-{
- xfs_agblock_t fend = fbno + flen;
- xfs_agblock_t bbno = busyp->bno;
- xfs_agblock_t bend = bbno + busyp->length;
-
- /*
- * This extent is currently being discarded. Give the thread
- * performing the discard a chance to mark the extent unbusy
- * and retry.
- */
- if (busyp->flags & XFS_ALLOC_BUSY_DISCARDED) {
- spin_unlock(&pag->pagb_lock);
- delay(1);
- spin_lock(&pag->pagb_lock);
- return false;
- }
-
- /*
- * If there is a busy extent overlapping a user allocation, we have
- * no choice but to force the log and retry the search.
- *
- * Fortunately this does not happen during normal operation, but
- * only if the filesystem is very low on space and has to dip into
- * the AGFL for normal allocations.
- */
- if (userdata)
- goto out_force_log;
-
- if (bbno < fbno && bend > fend) {
- /*
- * Case 1:
- * bbno bend
- * +BBBBBBBBBBBBBBBBB+
- * +---------+
- * fbno fend
- */
-
- /*
- * We would have to split the busy extent to be able to track
- * it correct, which we cannot do because we would have to
- * modify the list of busy extents attached to the transaction
- * or CIL context, which is immutable.
- *
- * Force out the log to clear the busy extent and retry the
- * search.
- */
- goto out_force_log;
- } else if (bbno >= fbno && bend <= fend) {
- /*
- * Case 2:
- * bbno bend
- * +BBBBBBBBBBBBBBBBB+
- * +-----------------+
- * fbno fend
- *
- * Case 3:
- * bbno bend
- * +BBBBBBBBBBBBBBBBB+
- * +--------------------------+
- * fbno fend
- *
- * Case 4:
- * bbno bend
- * +BBBBBBBBBBBBBBBBB+
- * +--------------------------+
- * fbno fend
- *
- * Case 5:
- * bbno bend
- * +BBBBBBBBBBBBBBBBB+
- * +-----------------------------------+
- * fbno fend
- *
- */
-
- /*
- * The busy extent is fully covered by the extent we are
- * allocating, and can simply be removed from the rbtree.
- * However we cannot remove it from the immutable list
- * tracking busy extents in the transaction or CIL context,
- * so set the length to zero to mark it invalid.
- *
- * We also need to restart the busy extent search from the
- * tree root, because erasing the node can rearrange the
- * tree topology.
- */
- rb_erase(&busyp->rb_node, &pag->pagb_tree);
- busyp->length = 0;
- return false;
- } else if (fend < bend) {
- /*
- * Case 6:
- * bbno bend
- * +BBBBBBBBBBBBBBBBB+
- * +---------+
- * fbno fend
- *
- * Case 7:
- * bbno bend
- * +BBBBBBBBBBBBBBBBB+
- * +------------------+
- * fbno fend
- *
- */
- busyp->bno = fend;
- } else if (bbno < fbno) {
- /*
- * Case 8:
- * bbno bend
- * +BBBBBBBBBBBBBBBBB+
- * +-------------+
- * fbno fend
- *
- * Case 9:
- * bbno bend
- * +BBBBBBBBBBBBBBBBB+
- * +----------------------+
- * fbno fend
- */
- busyp->length = fbno - busyp->bno;
- } else {
- ASSERT(0);
- }
-
- trace_xfs_alloc_busy_reuse(mp, pag->pag_agno, fbno, flen);
- return true;
-
-out_force_log:
- spin_unlock(&pag->pagb_lock);
- xfs_log_force(mp, XFS_LOG_SYNC);
- trace_xfs_alloc_busy_force(mp, pag->pag_agno, fbno, flen);
- spin_lock(&pag->pagb_lock);
- return false;
-}
-
-
-/*
- * For a given extent [fbno, flen], make sure we can reuse it safely.
- */
-void
-xfs_alloc_busy_reuse(
- struct xfs_mount *mp,
- xfs_agnumber_t agno,
- xfs_agblock_t fbno,
- xfs_extlen_t flen,
- bool userdata)
-{
- struct xfs_perag *pag;
- struct rb_node *rbp;
-
- ASSERT(flen > 0);
-
- pag = xfs_perag_get(mp, agno);
- spin_lock(&pag->pagb_lock);
-restart:
- rbp = pag->pagb_tree.rb_node;
- while (rbp) {
- struct xfs_busy_extent *busyp =
- rb_entry(rbp, struct xfs_busy_extent, rb_node);
- xfs_agblock_t bbno = busyp->bno;
- xfs_agblock_t bend = bbno + busyp->length;
-
- if (fbno + flen <= bbno) {
- rbp = rbp->rb_left;
- continue;
- } else if (fbno >= bend) {
- rbp = rbp->rb_right;
- continue;
- }
-
- if (!xfs_alloc_busy_update_extent(mp, pag, busyp, fbno, flen,
- userdata))
- goto restart;
- }
- spin_unlock(&pag->pagb_lock);
- xfs_perag_put(pag);
-}
-
-/*
- * For a given extent [fbno, flen], search the busy extent list to find a
- * subset of the extent that is not busy. If *rlen is smaller than
- * args->minlen no suitable extent could be found, and the higher level
- * code needs to force out the log and retry the allocation.
- */
-STATIC void
-xfs_alloc_busy_trim(
- struct xfs_alloc_arg *args,
- xfs_agblock_t bno,
- xfs_extlen_t len,
- xfs_agblock_t *rbno,
- xfs_extlen_t *rlen)
-{
- xfs_agblock_t fbno;
- xfs_extlen_t flen;
- struct rb_node *rbp;
-
- ASSERT(len > 0);
-
- spin_lock(&args->pag->pagb_lock);
-restart:
- fbno = bno;
- flen = len;
- rbp = args->pag->pagb_tree.rb_node;
- while (rbp && flen >= args->minlen) {
- struct xfs_busy_extent *busyp =
- rb_entry(rbp, struct xfs_busy_extent, rb_node);
- xfs_agblock_t fend = fbno + flen;
- xfs_agblock_t bbno = busyp->bno;
- xfs_agblock_t bend = bbno + busyp->length;
-
- if (fend <= bbno) {
- rbp = rbp->rb_left;
- continue;
- } else if (fbno >= bend) {
- rbp = rbp->rb_right;
- continue;
- }
-
- /*
- * If this is a metadata allocation, try to reuse the busy
- * extent instead of trimming the allocation.
- */
- if (!args->userdata &&
- !(busyp->flags & XFS_ALLOC_BUSY_DISCARDED)) {
- if (!xfs_alloc_busy_update_extent(args->mp, args->pag,
- busyp, fbno, flen,
- false))
- goto restart;
- continue;
- }
-
- if (bbno <= fbno) {
- /* start overlap */
-
- /*
- * Case 1:
- * bbno bend
- * +BBBBBBBBBBBBBBBBB+
- * +---------+
- * fbno fend
- *
- * Case 2:
- * bbno bend
- * +BBBBBBBBBBBBBBBBB+
- * +-------------+
- * fbno fend
- *
- * Case 3:
- * bbno bend
- * +BBBBBBBBBBBBBBBBB+
- * +-------------+
- * fbno fend
- *
- * Case 4:
- * bbno bend
- * +BBBBBBBBBBBBBBBBB+
- * +-----------------+
- * fbno fend
- *
- * No unbusy region in extent, return failure.
- */
- if (fend <= bend)
- goto fail;
-
- /*
- * Case 5:
- * bbno bend
- * +BBBBBBBBBBBBBBBBB+
- * +----------------------+
- * fbno fend
- *
- * Case 6:
- * bbno bend
- * +BBBBBBBBBBBBBBBBB+
- * +--------------------------+
- * fbno fend
- *
- * Needs to be trimmed to:
- * +-------+
- * fbno fend
- */
- fbno = bend;
- } else if (bend >= fend) {
- /* end overlap */
-
- /*
- * Case 7:
- * bbno bend
- * +BBBBBBBBBBBBBBBBB+
- * +------------------+
- * fbno fend
- *
- * Case 8:
- * bbno bend
- * +BBBBBBBBBBBBBBBBB+
- * +--------------------------+
- * fbno fend
- *
- * Needs to be trimmed to:
- * +-------+
- * fbno fend
- */
- fend = bbno;
- } else {
- /* middle overlap */
-
- /*
- * Case 9:
- * bbno bend
- * +BBBBBBBBBBBBBBBBB+
- * +-----------------------------------+
- * fbno fend
- *
- * Can be trimmed to:
- * +-------+ OR +-------+
- * fbno fend fbno fend
- *
- * Backward allocation leads to significant
- * fragmentation of directories, which degrades
- * directory performance, therefore we always want to
- * choose the option that produces forward allocation
- * patterns.
- * Preferring the lower bno extent will make the next
- * request use "fend" as the start of the next
- * allocation; if the segment is no longer busy at
- * that point, we'll get a contiguous allocation, but
- * even if it is still busy, we will get a forward
- * allocation.
- * We try to avoid choosing the segment at "bend",
- * because that can lead to the next allocation
- * taking the segment at "fbno", which would be a
- * backward allocation. We only use the segment at
- * "fbno" if it is much larger than the current
- * requested size, because in that case there's a
- * good chance subsequent allocations will be
- * contiguous.
- */
- if (bbno - fbno >= args->maxlen) {
- /* left candidate fits perfect */
- fend = bbno;
- } else if (fend - bend >= args->maxlen * 4) {
- /* right candidate has enough free space */
- fbno = bend;
- } else if (bbno - fbno >= args->minlen) {
- /* left candidate fits minimum requirement */
- fend = bbno;
- } else {
- goto fail;
- }
- }
-
- flen = fend - fbno;
- }
- spin_unlock(&args->pag->pagb_lock);
-
- if (fbno != bno || flen != len) {
- trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len,
- fbno, flen);
- }
- *rbno = fbno;
- *rlen = flen;
- return;
-fail:
- /*
- * Return a zero extent length as failure indications. All callers
- * re-check if the trimmed extent satisfies the minlen requirement.
- */
- spin_unlock(&args->pag->pagb_lock);
- trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, fbno, 0);
- *rbno = fbno;
- *rlen = 0;
-}
-
-static void
-xfs_alloc_busy_clear_one(
- struct xfs_mount *mp,
- struct xfs_perag *pag,
- struct xfs_busy_extent *busyp)
-{
- if (busyp->length) {
- trace_xfs_alloc_busy_clear(mp, busyp->agno, busyp->bno,
- busyp->length);
- rb_erase(&busyp->rb_node, &pag->pagb_tree);
- }
-
- list_del_init(&busyp->list);
- kmem_free(busyp);
-}
-
-/*
- * Remove all extents on the passed in list from the busy extents tree.
- * If do_discard is set skip extents that need to be discarded, and mark
- * these as undergoing a discard operation instead.
- */
-void
-xfs_alloc_busy_clear(
- struct xfs_mount *mp,
- struct list_head *list,
- bool do_discard)
-{
- struct xfs_busy_extent *busyp, *n;
- struct xfs_perag *pag = NULL;
- xfs_agnumber_t agno = NULLAGNUMBER;
-
- list_for_each_entry_safe(busyp, n, list, list) {
- if (busyp->agno != agno) {
- if (pag) {
- spin_unlock(&pag->pagb_lock);
- xfs_perag_put(pag);
- }
- pag = xfs_perag_get(mp, busyp->agno);
- spin_lock(&pag->pagb_lock);
- agno = busyp->agno;
- }
-
- if (do_discard && busyp->length &&
- !(busyp->flags & XFS_ALLOC_BUSY_SKIP_DISCARD))
- busyp->flags = XFS_ALLOC_BUSY_DISCARDED;
- else
- xfs_alloc_busy_clear_one(mp, pag, busyp);
- }
-
- if (pag) {
- spin_unlock(&pag->pagb_lock);
- xfs_perag_put(pag);
- }
-}
-
-/*
- * Callback for list_sort to sort busy extents by the AG they reside in.
- */
-int
-xfs_busy_extent_ag_cmp(
- void *priv,
- struct list_head *a,
- struct list_head *b)
-{
- return container_of(a, struct xfs_busy_extent, list)->agno -
- container_of(b, struct xfs_busy_extent, list)->agno;
-}