summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_trans_rmap.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-08-06 16:50:36 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2016-08-06 16:50:36 +0300
commit0cbbc422d56668528f6efd1234fe908010284082 (patch)
treed4bebf90c29044b4a6180053fc18f9e927361012 /fs/xfs/xfs_trans_rmap.c
parent835c92d43b29eb354abdbd5475308a474d7efdfa (diff)
parent3481b68285238054be519ad0c8cad5cc2425e26c (diff)
downloadlinux-0cbbc422d56668528f6efd1234fe908010284082.tar.xz
Merge tag 'xfs-rmap-for-linus-4.8-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs
Pull more xfs updates from Dave Chinner: "This is the second part of the XFS updates for this merge cycle, and contains the new reverse block mapping feature for XFS. Reverse mapping allows us to track the owner of a specific block on disk precisely. It is implemented as a set of btrees (one per allocation group) that track the owners of allocated extents. Effectively it is a "used space tree" that is updated when we allocate or free extents. i.e. it is coherent with the free space btrees we already maintain and never overlaps with them. This reverse mapping infrastructure is the building block of several upcoming features - reflink, copy-on-write data, dedupe, online metadata and data scrubbing, highly accurate bad sector/data loss reporting to users, and significantly improved reconstruction of damaged and corrupted filesystems. There's a lot of new stuff coming along in the next couple of cycles,a nd it all builds in the rmap infrastructure. As such, it's a huge chunk of new code with new on-disk format features and internal infrastructure. It warns at mount time as an experimental feature and that it may eat data (as we do with all new on-disk features until they stabilise). We have not released userspace suport for it yet - userspace support currently requires download from Darrick's xfsprogs repo and build from source, so the access to this feature is really developer/tester only at this point. Initial userspace support will be released at the same time kernel with this code in it is released. The new rmap enabled code regresses 3 xfstests - all are ENOSPC related corner cases, one of which Darrick posted a fix for a few hours ago. The other two are fixed by infrastructure that is part of the upcoming reflink patchset. This new ENOSPC infrastructure requires a on-disk format tweak required to keep mount times in check - we need to keep an on-disk count of allocated rmapbt blocks so we don't have to scan the entire btrees at mount time to count them. This is currently being tested and will be part of the fixes sent in the next week or two so users will not be exposed to this change" * tag 'xfs-rmap-for-linus-4.8-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: (52 commits) xfs: move (and rename) the deferred bmap-free tracepoints xfs: collapse single use static functions xfs: remove unnecessary parentheses from log redo item recovery functions xfs: remove the extents array from the rmap update done log item xfs: in btree_lshift, only allocate temporary cursor when needed xfs: remove unnecesary lshift/rshift key initialization xfs: remove the get*keys and update_keys btree ops pointers xfs: enable the rmap btree functionality xfs: don't update rmapbt when fixing agfl xfs: disable XFS_IOC_SWAPEXT when rmap btree is enabled xfs: add rmap btree block detection to log recovery xfs: add rmap btree geometry feature flag xfs: propagate bmap updates to rmapbt xfs: enable the xfs_defer mechanism to process rmaps to update xfs: log rmap intent items xfs: create rmap update intent log items xfs: add rmap btree insert and delete helpers xfs: convert unwritten status of reverse mappings xfs: remove an extent from the rmap btree xfs: add an extent to the rmap btree ...
Diffstat (limited to 'fs/xfs/xfs_trans_rmap.c')
-rw-r--r--fs/xfs/xfs_trans_rmap.c271
1 files changed, 271 insertions, 0 deletions
diff --git a/fs/xfs/xfs_trans_rmap.c b/fs/xfs/xfs_trans_rmap.c
new file mode 100644
index 000000000000..5a50ef881568
--- /dev/null
+++ b/fs/xfs/xfs_trans_rmap.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright (C) 2016 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
+#include "xfs_rmap_item.h"
+#include "xfs_alloc.h"
+#include "xfs_rmap.h"
+
+/* Set the map extent flags for this reverse mapping. */
+static void
+xfs_trans_set_rmap_flags(
+ struct xfs_map_extent *rmap,
+ enum xfs_rmap_intent_type type,
+ int whichfork,
+ xfs_exntst_t state)
+{
+ rmap->me_flags = 0;
+ if (state == XFS_EXT_UNWRITTEN)
+ rmap->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN;
+ if (whichfork == XFS_ATTR_FORK)
+ rmap->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK;
+ switch (type) {
+ case XFS_RMAP_MAP:
+ rmap->me_flags |= XFS_RMAP_EXTENT_MAP;
+ break;
+ case XFS_RMAP_UNMAP:
+ rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP;
+ break;
+ case XFS_RMAP_CONVERT:
+ rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT;
+ break;
+ case XFS_RMAP_ALLOC:
+ rmap->me_flags |= XFS_RMAP_EXTENT_ALLOC;
+ break;
+ case XFS_RMAP_FREE:
+ rmap->me_flags |= XFS_RMAP_EXTENT_FREE;
+ break;
+ default:
+ ASSERT(0);
+ }
+}
+
+struct xfs_rud_log_item *
+xfs_trans_get_rud(
+ struct xfs_trans *tp,
+ struct xfs_rui_log_item *ruip)
+{
+ struct xfs_rud_log_item *rudp;
+
+ rudp = xfs_rud_init(tp->t_mountp, ruip);
+ xfs_trans_add_item(tp, &rudp->rud_item);
+ return rudp;
+}
+
+/*
+ * Finish an rmap update and log it to the RUD. Note that the transaction is
+ * marked dirty regardless of whether the rmap update succeeds or fails to
+ * support the RUI/RUD lifecycle rules.
+ */
+int
+xfs_trans_log_finish_rmap_update(
+ struct xfs_trans *tp,
+ struct xfs_rud_log_item *rudp,
+ enum xfs_rmap_intent_type type,
+ __uint64_t owner,
+ int whichfork,
+ xfs_fileoff_t startoff,
+ xfs_fsblock_t startblock,
+ xfs_filblks_t blockcount,
+ xfs_exntst_t state,
+ struct xfs_btree_cur **pcur)
+{
+ int error;
+
+ error = xfs_rmap_finish_one(tp, type, owner, whichfork, startoff,
+ startblock, blockcount, state, pcur);
+
+ /*
+ * Mark the transaction dirty, even on error. This ensures the
+ * transaction is aborted, which:
+ *
+ * 1.) releases the RUI and frees the RUD
+ * 2.) shuts down the filesystem
+ */
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ rudp->rud_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+
+ return error;
+}
+
+/* Sort rmap intents by AG. */
+static int
+xfs_rmap_update_diff_items(
+ void *priv,
+ struct list_head *a,
+ struct list_head *b)
+{
+ struct xfs_mount *mp = priv;
+ struct xfs_rmap_intent *ra;
+ struct xfs_rmap_intent *rb;
+
+ ra = container_of(a, struct xfs_rmap_intent, ri_list);
+ rb = container_of(b, struct xfs_rmap_intent, ri_list);
+ return XFS_FSB_TO_AGNO(mp, ra->ri_bmap.br_startblock) -
+ XFS_FSB_TO_AGNO(mp, rb->ri_bmap.br_startblock);
+}
+
+/* Get an RUI. */
+STATIC void *
+xfs_rmap_update_create_intent(
+ struct xfs_trans *tp,
+ unsigned int count)
+{
+ struct xfs_rui_log_item *ruip;
+
+ ASSERT(tp != NULL);
+ ASSERT(count > 0);
+
+ ruip = xfs_rui_init(tp->t_mountp, count);
+ ASSERT(ruip != NULL);
+
+ /*
+ * Get a log_item_desc to point at the new item.
+ */
+ xfs_trans_add_item(tp, &ruip->rui_item);
+ return ruip;
+}
+
+/* Log rmap updates in the intent item. */
+STATIC void
+xfs_rmap_update_log_item(
+ struct xfs_trans *tp,
+ void *intent,
+ struct list_head *item)
+{
+ struct xfs_rui_log_item *ruip = intent;
+ struct xfs_rmap_intent *rmap;
+ uint next_extent;
+ struct xfs_map_extent *map;
+
+ rmap = container_of(item, struct xfs_rmap_intent, ri_list);
+
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ ruip->rui_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+
+ /*
+ * atomic_inc_return gives us the value after the increment;
+ * we want to use it as an array index so we need to subtract 1 from
+ * it.
+ */
+ next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1;
+ ASSERT(next_extent < ruip->rui_format.rui_nextents);
+ map = &ruip->rui_format.rui_extents[next_extent];
+ map->me_owner = rmap->ri_owner;
+ map->me_startblock = rmap->ri_bmap.br_startblock;
+ map->me_startoff = rmap->ri_bmap.br_startoff;
+ map->me_len = rmap->ri_bmap.br_blockcount;
+ xfs_trans_set_rmap_flags(map, rmap->ri_type, rmap->ri_whichfork,
+ rmap->ri_bmap.br_state);
+}
+
+/* Get an RUD so we can process all the deferred rmap updates. */
+STATIC void *
+xfs_rmap_update_create_done(
+ struct xfs_trans *tp,
+ void *intent,
+ unsigned int count)
+{
+ return xfs_trans_get_rud(tp, intent);
+}
+
+/* Process a deferred rmap update. */
+STATIC int
+xfs_rmap_update_finish_item(
+ struct xfs_trans *tp,
+ struct xfs_defer_ops *dop,
+ struct list_head *item,
+ void *done_item,
+ void **state)
+{
+ struct xfs_rmap_intent *rmap;
+ int error;
+
+ rmap = container_of(item, struct xfs_rmap_intent, ri_list);
+ error = xfs_trans_log_finish_rmap_update(tp, done_item,
+ rmap->ri_type,
+ rmap->ri_owner, rmap->ri_whichfork,
+ rmap->ri_bmap.br_startoff,
+ rmap->ri_bmap.br_startblock,
+ rmap->ri_bmap.br_blockcount,
+ rmap->ri_bmap.br_state,
+ (struct xfs_btree_cur **)state);
+ kmem_free(rmap);
+ return error;
+}
+
+/* Clean up after processing deferred rmaps. */
+STATIC void
+xfs_rmap_update_finish_cleanup(
+ struct xfs_trans *tp,
+ void *state,
+ int error)
+{
+ struct xfs_btree_cur *rcur = state;
+
+ xfs_rmap_finish_one_cleanup(tp, rcur, error);
+}
+
+/* Abort all pending RUIs. */
+STATIC void
+xfs_rmap_update_abort_intent(
+ void *intent)
+{
+ xfs_rui_release(intent);
+}
+
+/* Cancel a deferred rmap update. */
+STATIC void
+xfs_rmap_update_cancel_item(
+ struct list_head *item)
+{
+ struct xfs_rmap_intent *rmap;
+
+ rmap = container_of(item, struct xfs_rmap_intent, ri_list);
+ kmem_free(rmap);
+}
+
+static const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
+ .type = XFS_DEFER_OPS_TYPE_RMAP,
+ .max_items = XFS_RUI_MAX_FAST_EXTENTS,
+ .diff_items = xfs_rmap_update_diff_items,
+ .create_intent = xfs_rmap_update_create_intent,
+ .abort_intent = xfs_rmap_update_abort_intent,
+ .log_item = xfs_rmap_update_log_item,
+ .create_done = xfs_rmap_update_create_done,
+ .finish_item = xfs_rmap_update_finish_item,
+ .finish_cleanup = xfs_rmap_update_finish_cleanup,
+ .cancel_item = xfs_rmap_update_cancel_item,
+};
+
+/* Register the deferred op type. */
+void
+xfs_rmap_update_init_defer_op(void)
+{
+ xfs_defer_init_op_type(&xfs_rmap_update_defer_type);
+}