summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_log_recover.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_log_recover.c')
-rw-r--r--fs/xfs/xfs_log_recover.c463
1 files changed, 447 insertions, 16 deletions
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 96fcbb85ff83..6fcc910a50b9 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -29,6 +29,7 @@
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
@@ -44,6 +45,15 @@
#include "xfs_cksum.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
+#include "xfs_icreate_item.h"
+
+/* Need all the magic numbers and buffer ops structures from these headers */
+#include "xfs_symlink.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_format.h"
+#include "xfs_dir2_priv.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_attr_remote.h"
STATIC int
xlog_find_zeroed(
@@ -1442,9 +1452,8 @@ xlog_recover_find_tid(
xlog_tid_t tid)
{
xlog_recover_t *trans;
- struct hlist_node *n;
- hlist_for_each_entry(trans, n, head, r_list) {
+ hlist_for_each_entry(trans, head, r_list) {
if (trans->r_log_tid == tid)
return trans;
}
@@ -1591,10 +1600,53 @@ xlog_recover_add_to_trans(
}
/*
- * Sort the log items in the transaction. Cancelled buffers need
- * to be put first so they are processed before any items that might
- * modify the buffers. If they are cancelled, then the modifications
- * don't need to be replayed.
+ * Sort the log items in the transaction.
+ *
+ * The ordering constraints are defined by the inode allocation and unlink
+ * behaviour. The rules are:
+ *
+ * 1. Every item is only logged once in a given transaction. Hence it
+ * represents the last logged state of the item. Hence ordering is
+ * dependent on the order in which operations need to be performed so
+ * required initial conditions are always met.
+ *
+ * 2. Cancelled buffers are recorded in pass 1 in a separate table and
+ * there's nothing to replay from them so we can simply cull them
+ * from the transaction. However, we can't do that until after we've
+ * replayed all the other items because they may be dependent on the
+ * cancelled buffer and replaying the cancelled buffer can remove it
+ * form the cancelled buffer table. Hence they have tobe done last.
+ *
+ * 3. Inode allocation buffers must be replayed before inode items that
+ * read the buffer and replay changes into it. For filesystems using the
+ * ICREATE transactions, this means XFS_LI_ICREATE objects need to get
+ * treated the same as inode allocation buffers as they create and
+ * initialise the buffers directly.
+ *
+ * 4. Inode unlink buffers must be replayed after inode items are replayed.
+ * This ensures that inodes are completely flushed to the inode buffer
+ * in a "free" state before we remove the unlinked inode list pointer.
+ *
+ * Hence the ordering needs to be inode allocation buffers first, inode items
+ * second, inode unlink buffers third and cancelled buffers last.
+ *
+ * But there's a problem with that - we can't tell an inode allocation buffer
+ * apart from a regular buffer, so we can't separate them. We can, however,
+ * tell an inode unlink buffer from the others, and so we can separate them out
+ * from all the other buffers and move them to last.
+ *
+ * Hence, 4 lists, in order from head to tail:
+ * - buffer_list for all buffers except cancelled/inode unlink buffers
+ * - item_list for all non-buffer items
+ * - inode_buffer_list for inode unlink buffers
+ * - cancel_list for the cancelled buffers
+ *
+ * Note that we add objects to the tail of the lists so that first-to-last
+ * ordering is preserved within the lists. Adding objects to the head of the
+ * list means when we traverse from the head we walk them in last-to-first
+ * order. For cancelled buffers and inode unlink buffers this doesn't matter,
+ * but for all other items there may be specific ordering that we need to
+ * preserve.
*/
STATIC int
xlog_recover_reorder_trans(
@@ -1604,19 +1656,32 @@ xlog_recover_reorder_trans(
{
xlog_recover_item_t *item, *n;
LIST_HEAD(sort_list);
+ LIST_HEAD(cancel_list);
+ LIST_HEAD(buffer_list);
+ LIST_HEAD(inode_buffer_list);
+ LIST_HEAD(inode_list);
list_splice_init(&trans->r_itemq, &sort_list);
list_for_each_entry_safe(item, n, &sort_list, ri_list) {
xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
switch (ITEM_TYPE(item)) {
+ case XFS_LI_ICREATE:
+ list_move_tail(&item->ri_list, &buffer_list);
+ break;
case XFS_LI_BUF:
- if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) {
+ if (buf_f->blf_flags & XFS_BLF_CANCEL) {
trace_xfs_log_recover_item_reorder_head(log,
trans, item, pass);
- list_move(&item->ri_list, &trans->r_itemq);
+ list_move(&item->ri_list, &cancel_list);
+ break;
+ }
+ if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
+ list_move(&item->ri_list, &inode_buffer_list);
break;
}
+ list_move_tail(&item->ri_list, &buffer_list);
+ break;
case XFS_LI_INODE:
case XFS_LI_DQUOT:
case XFS_LI_QUOTAOFF:
@@ -1624,7 +1689,7 @@ xlog_recover_reorder_trans(
case XFS_LI_EFI:
trace_xfs_log_recover_item_reorder_tail(log,
trans, item, pass);
- list_move_tail(&item->ri_list, &trans->r_itemq);
+ list_move_tail(&item->ri_list, &inode_list);
break;
default:
xfs_warn(log->l_mp,
@@ -1635,6 +1700,14 @@ xlog_recover_reorder_trans(
}
}
ASSERT(list_empty(&sort_list));
+ if (!list_empty(&buffer_list))
+ list_splice(&buffer_list, &trans->r_itemq);
+ if (!list_empty(&inode_list))
+ list_splice_tail(&inode_list, &trans->r_itemq);
+ if (!list_empty(&inode_buffer_list))
+ list_splice_tail(&inode_buffer_list, &trans->r_itemq);
+ if (!list_empty(&cancel_list))
+ list_splice_tail(&cancel_list, &trans->r_itemq);
return 0;
}
@@ -1787,6 +1860,13 @@ xlog_recover_do_inode_buffer(
trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
+ /*
+ * Post recovery validation only works properly on CRC enabled
+ * filesystems.
+ */
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ bp->b_ops = &xfs_inode_buf_ops;
+
inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog;
for (i = 0; i < inodes_per_buf; i++) {
next_unlinked_offset = (i * mp->m_sb.sb_inodesize) +
@@ -1852,12 +1932,216 @@ xlog_recover_do_inode_buffer(
buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp,
next_unlinked_offset);
*buffer_nextp = *logged_nextp;
+
+ /*
+ * If necessary, recalculate the CRC in the on-disk inode. We
+ * have to leave the inode in a consistent state for whoever
+ * reads it next....
+ */
+ xfs_dinode_calc_crc(mp, (struct xfs_dinode *)
+ xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
+
}
return 0;
}
/*
+ * Validate the recovered buffer is of the correct type and attach the
+ * appropriate buffer operations to them for writeback. Magic numbers are in a
+ * few places:
+ * the first 16 bits of the buffer (inode buffer, dquot buffer),
+ * the first 32 bits of the buffer (most blocks),
+ * inside a struct xfs_da_blkinfo at the start of the buffer.
+ */
+static void
+xlog_recovery_validate_buf_type(
+ struct xfs_mount *mp,
+ struct xfs_buf *bp,
+ xfs_buf_log_format_t *buf_f)
+{
+ struct xfs_da_blkinfo *info = bp->b_addr;
+ __uint32_t magic32;
+ __uint16_t magic16;
+ __uint16_t magicda;
+
+ magic32 = be32_to_cpu(*(__be32 *)bp->b_addr);
+ magic16 = be16_to_cpu(*(__be16*)bp->b_addr);
+ magicda = be16_to_cpu(info->magic);
+ switch (xfs_blft_from_flags(buf_f)) {
+ case XFS_BLFT_BTREE_BUF:
+ switch (magic32) {
+ case XFS_ABTB_CRC_MAGIC:
+ case XFS_ABTC_CRC_MAGIC:
+ case XFS_ABTB_MAGIC:
+ case XFS_ABTC_MAGIC:
+ bp->b_ops = &xfs_allocbt_buf_ops;
+ break;
+ case XFS_IBT_CRC_MAGIC:
+ case XFS_IBT_MAGIC:
+ bp->b_ops = &xfs_inobt_buf_ops;
+ break;
+ case XFS_BMAP_CRC_MAGIC:
+ case XFS_BMAP_MAGIC:
+ bp->b_ops = &xfs_bmbt_buf_ops;
+ break;
+ default:
+ xfs_warn(mp, "Bad btree block magic!");
+ ASSERT(0);
+ break;
+ }
+ break;
+ case XFS_BLFT_AGF_BUF:
+ if (magic32 != XFS_AGF_MAGIC) {
+ xfs_warn(mp, "Bad AGF block magic!");
+ ASSERT(0);
+ break;
+ }
+ bp->b_ops = &xfs_agf_buf_ops;
+ break;
+ case XFS_BLFT_AGFL_BUF:
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ break;
+ if (magic32 != XFS_AGFL_MAGIC) {
+ xfs_warn(mp, "Bad AGFL block magic!");
+ ASSERT(0);
+ break;
+ }
+ bp->b_ops = &xfs_agfl_buf_ops;
+ break;
+ case XFS_BLFT_AGI_BUF:
+ if (magic32 != XFS_AGI_MAGIC) {
+ xfs_warn(mp, "Bad AGI block magic!");
+ ASSERT(0);
+ break;
+ }
+ bp->b_ops = &xfs_agi_buf_ops;
+ break;
+ case XFS_BLFT_UDQUOT_BUF:
+ case XFS_BLFT_PDQUOT_BUF:
+ case XFS_BLFT_GDQUOT_BUF:
+#ifdef CONFIG_XFS_QUOTA
+ if (magic16 != XFS_DQUOT_MAGIC) {
+ xfs_warn(mp, "Bad DQUOT block magic!");
+ ASSERT(0);
+ break;
+ }
+ bp->b_ops = &xfs_dquot_buf_ops;
+#else
+ xfs_alert(mp,
+ "Trying to recover dquots without QUOTA support built in!");
+ ASSERT(0);
+#endif
+ break;
+ case XFS_BLFT_DINO_BUF:
+ /*
+ * we get here with inode allocation buffers, not buffers that
+ * track unlinked list changes.
+ */
+ if (magic16 != XFS_DINODE_MAGIC) {
+ xfs_warn(mp, "Bad INODE block magic!");
+ ASSERT(0);
+ break;
+ }
+ bp->b_ops = &xfs_inode_buf_ops;
+ break;
+ case XFS_BLFT_SYMLINK_BUF:
+ if (magic32 != XFS_SYMLINK_MAGIC) {
+ xfs_warn(mp, "Bad symlink block magic!");
+ ASSERT(0);
+ break;
+ }
+ bp->b_ops = &xfs_symlink_buf_ops;
+ break;
+ case XFS_BLFT_DIR_BLOCK_BUF:
+ if (magic32 != XFS_DIR2_BLOCK_MAGIC &&
+ magic32 != XFS_DIR3_BLOCK_MAGIC) {
+ xfs_warn(mp, "Bad dir block magic!");
+ ASSERT(0);
+ break;
+ }
+ bp->b_ops = &xfs_dir3_block_buf_ops;
+ break;
+ case XFS_BLFT_DIR_DATA_BUF:
+ if (magic32 != XFS_DIR2_DATA_MAGIC &&
+ magic32 != XFS_DIR3_DATA_MAGIC) {
+ xfs_warn(mp, "Bad dir data magic!");
+ ASSERT(0);
+ break;
+ }
+ bp->b_ops = &xfs_dir3_data_buf_ops;
+ break;
+ case XFS_BLFT_DIR_FREE_BUF:
+ if (magic32 != XFS_DIR2_FREE_MAGIC &&
+ magic32 != XFS_DIR3_FREE_MAGIC) {
+ xfs_warn(mp, "Bad dir3 free magic!");
+ ASSERT(0);
+ break;
+ }
+ bp->b_ops = &xfs_dir3_free_buf_ops;
+ break;
+ case XFS_BLFT_DIR_LEAF1_BUF:
+ if (magicda != XFS_DIR2_LEAF1_MAGIC &&
+ magicda != XFS_DIR3_LEAF1_MAGIC) {
+ xfs_warn(mp, "Bad dir leaf1 magic!");
+ ASSERT(0);
+ break;
+ }
+ bp->b_ops = &xfs_dir3_leaf1_buf_ops;
+ break;
+ case XFS_BLFT_DIR_LEAFN_BUF:
+ if (magicda != XFS_DIR2_LEAFN_MAGIC &&
+ magicda != XFS_DIR3_LEAFN_MAGIC) {
+ xfs_warn(mp, "Bad dir leafn magic!");
+ ASSERT(0);
+ break;
+ }
+ bp->b_ops = &xfs_dir3_leafn_buf_ops;
+ break;
+ case XFS_BLFT_DA_NODE_BUF:
+ if (magicda != XFS_DA_NODE_MAGIC &&
+ magicda != XFS_DA3_NODE_MAGIC) {
+ xfs_warn(mp, "Bad da node magic!");
+ ASSERT(0);
+ break;
+ }
+ bp->b_ops = &xfs_da3_node_buf_ops;
+ break;
+ case XFS_BLFT_ATTR_LEAF_BUF:
+ if (magicda != XFS_ATTR_LEAF_MAGIC &&
+ magicda != XFS_ATTR3_LEAF_MAGIC) {
+ xfs_warn(mp, "Bad attr leaf magic!");
+ ASSERT(0);
+ break;
+ }
+ bp->b_ops = &xfs_attr3_leaf_buf_ops;
+ break;
+ case XFS_BLFT_ATTR_RMT_BUF:
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ break;
+ if (magic32 != XFS_ATTR3_RMT_MAGIC) {
+ xfs_warn(mp, "Bad attr remote magic!");
+ ASSERT(0);
+ break;
+ }
+ bp->b_ops = &xfs_attr3_rmt_buf_ops;
+ break;
+ case XFS_BLFT_SB_BUF:
+ if (magic32 != XFS_SB_MAGIC) {
+ xfs_warn(mp, "Bad SB block magic!");
+ ASSERT(0);
+ break;
+ }
+ bp->b_ops = &xfs_sb_buf_ops;
+ break;
+ default:
+ xfs_warn(mp, "Unknown buffer type %d!",
+ xfs_blft_from_flags(buf_f));
+ break;
+ }
+}
+
+/*
* Perform a 'normal' buffer recovery. Each logged region of the
* buffer should be copied over the corresponding region in the
* given buffer. The bitmap in the buf log format structure indicates
@@ -1893,6 +2177,17 @@ xlog_recover_do_reg_buffer(
((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
/*
+ * The dirty regions logged in the buffer, even though
+ * contiguous, may span multiple chunks. This is because the
+ * dirty region may span a physical page boundary in a buffer
+ * and hence be split into two separate vectors for writing into
+ * the log. Hence we need to trim nbits back to the length of
+ * the current region being copied out of the log.
+ */
+ if (item->ri_buf[i].i_len < (nbits << XFS_BLF_SHIFT))
+ nbits = item->ri_buf[i].i_len >> XFS_BLF_SHIFT;
+
+ /*
* Do a sanity check if this is a dquot buffer. Just checking
* the first dquot in the buffer should do. XXXThis is
* probably a good thing to do for other buf types also.
@@ -1929,6 +2224,17 @@ xlog_recover_do_reg_buffer(
/* Shouldn't be any more regions */
ASSERT(i == item->ri_total);
+
+ /*
+ * We can only do post recovery validation on items on CRC enabled
+ * fielsystems as we need to know when the buffer was written to be able
+ * to determine if we should have replayed the item. If we replay old
+ * metadata over a newer buffer, then it will enter a temporarily
+ * inconsistent state resulting in verification failures. Hence for now
+ * just avoid the verification stage for non-crc filesystems
+ */
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ xlog_recovery_validate_buf_type(mp, bp, buf_f);
}
/*
@@ -2049,6 +2355,12 @@ xfs_qm_dqcheck(
d->dd_diskdq.d_flags = type;
d->dd_diskdq.d_id = cpu_to_be32(id);
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid);
+ xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
+ XFS_DQUOT_CRC_OFF);
+ }
+
return errs;
}
@@ -2214,6 +2526,7 @@ xlog_recover_inode_pass2(
int attr_index;
uint fields;
xfs_icdinode_t *dicp;
+ uint isize;
int need_free = 0;
if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
@@ -2239,7 +2552,7 @@ xlog_recover_inode_pass2(
trace_xfs_log_recover_inode_recover(log, in_f);
bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0,
- NULL);
+ &xfs_inode_buf_ops);
if (!bp) {
error = ENOMEM;
goto error;
@@ -2350,7 +2663,8 @@ xlog_recover_inode_pass2(
error = EFSCORRUPTED;
goto error;
}
- if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) {
+ isize = xfs_icdinode_size(dicp->di_version);
+ if (unlikely(item->ri_buf[1].i_len > isize)) {
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
XFS_ERRLEVEL_LOW, mp, dicp);
xfs_buf_relse(bp);
@@ -2362,13 +2676,13 @@ xlog_recover_inode_pass2(
}
/* The core is in in-core format */
- xfs_dinode_to_disk(dip, item->ri_buf[1].i_addr);
+ xfs_dinode_to_disk(dip, dicp);
/* the rest is in on-disk format */
- if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) {
- memcpy((xfs_caddr_t) dip + sizeof(struct xfs_icdinode),
- item->ri_buf[1].i_addr + sizeof(struct xfs_icdinode),
- item->ri_buf[1].i_len - sizeof(struct xfs_icdinode));
+ if (item->ri_buf[1].i_len > isize) {
+ memcpy((char *)dip + isize,
+ item->ri_buf[1].i_addr + isize,
+ item->ri_buf[1].i_len - isize);
}
fields = in_f->ilf_fields;
@@ -2452,6 +2766,9 @@ xlog_recover_inode_pass2(
}
write_inode_buffer:
+ /* re-generate the checksum. */
+ xfs_dinode_calc_crc(log->l_mp, dip);
+
ASSERT(bp->b_target->bt_mount == mp);
bp->b_iodone = xlog_recover_iodone;
xfs_buf_delwri_queue(bp, buffer_list);
@@ -2571,6 +2888,10 @@ xlog_recover_dquot_pass2(
}
memcpy(ddq, recddq, item->ri_buf[1].i_len);
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk),
+ XFS_DQUOT_CRC_OFF);
+ }
ASSERT(dq_f->qlf_size == 2);
ASSERT(bp->b_target->bt_mount == mp);
@@ -2675,6 +2996,93 @@ xlog_recover_efd_pass2(
}
/*
+ * This routine is called when an inode create format structure is found in a
+ * committed transaction in the log. It's purpose is to initialise the inodes
+ * being allocated on disk. This requires us to get inode cluster buffers that
+ * match the range to be intialised, stamped with inode templates and written
+ * by delayed write so that subsequent modifications will hit the cached buffer
+ * and only need writing out at the end of recovery.
+ */
+STATIC int
+xlog_recover_do_icreate_pass2(
+ struct xlog *log,
+ struct list_head *buffer_list,
+ xlog_recover_item_t *item)
+{
+ struct xfs_mount *mp = log->l_mp;
+ struct xfs_icreate_log *icl;
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+ unsigned int count;
+ unsigned int isize;
+ xfs_agblock_t length;
+
+ icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr;
+ if (icl->icl_type != XFS_LI_ICREATE) {
+ xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type");
+ return EINVAL;
+ }
+
+ if (icl->icl_size != 1) {
+ xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size");
+ return EINVAL;
+ }
+
+ agno = be32_to_cpu(icl->icl_ag);
+ if (agno >= mp->m_sb.sb_agcount) {
+ xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno");
+ return EINVAL;
+ }
+ agbno = be32_to_cpu(icl->icl_agbno);
+ if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) {
+ xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno");
+ return EINVAL;
+ }
+ isize = be32_to_cpu(icl->icl_isize);
+ if (isize != mp->m_sb.sb_inodesize) {
+ xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize");
+ return EINVAL;
+ }
+ count = be32_to_cpu(icl->icl_count);
+ if (!count) {
+ xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count");
+ return EINVAL;
+ }
+ length = be32_to_cpu(icl->icl_length);
+ if (!length || length >= mp->m_sb.sb_agblocks) {
+ xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length");
+ return EINVAL;
+ }
+
+ /* existing allocation is fixed value */
+ ASSERT(count == XFS_IALLOC_INODES(mp));
+ ASSERT(length == XFS_IALLOC_BLOCKS(mp));
+ if (count != XFS_IALLOC_INODES(mp) ||
+ length != XFS_IALLOC_BLOCKS(mp)) {
+ xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2");
+ return EINVAL;
+ }
+
+ /*
+ * Inode buffers can be freed. Do not replay the inode initialisation as
+ * we could be overwriting something written after this inode buffer was
+ * cancelled.
+ *
+ * XXX: we need to iterate all buffers and only init those that are not
+ * cancelled. I think that a more fine grained factoring of
+ * xfs_ialloc_inode_init may be appropriate here to enable this to be
+ * done easily.
+ */
+ if (xlog_check_buffer_cancelled(log,
+ XFS_AGB_TO_DADDR(mp, agno, agbno), length, 0))
+ return 0;
+
+ xfs_ialloc_inode_init(mp, NULL, buffer_list, agno, agbno, length,
+ be32_to_cpu(icl->icl_gen));
+ return 0;
+}
+
+/*
* Free up any resources allocated by the transaction
*
* Remember that EFIs, EFDs, and IUNLINKs are handled later.
@@ -2716,6 +3124,7 @@ xlog_recover_commit_pass1(
case XFS_LI_EFI:
case XFS_LI_EFD:
case XFS_LI_DQUOT:
+ case XFS_LI_ICREATE:
/* nothing to do in pass 1 */
return 0;
default:
@@ -2746,6 +3155,8 @@ xlog_recover_commit_pass2(
return xlog_recover_efd_pass2(log, item);
case XFS_LI_DQUOT:
return xlog_recover_dquot_pass2(log, buffer_list, item);
+ case XFS_LI_ICREATE:
+ return xlog_recover_do_icreate_pass2(log, buffer_list, item);
case XFS_LI_QUOTAOFF:
/* nothing to do in pass2 */
return 0;
@@ -2949,6 +3360,7 @@ xlog_recover_process_efi(
* This will pull the EFI from the AIL and
* free the memory associated with it.
*/
+ set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
xfs_efi_release(efip, efip->efi_format.efi_nextents);
return XFS_ERROR(EIO);
}
@@ -3752,6 +4164,25 @@ xlog_recover(
return error;
}
+ /*
+ * Version 5 superblock log feature mask validation. We know the
+ * log is dirty so check if there are any unknown log features
+ * in what we need to recover. If there are unknown features
+ * (e.g. unsupported transactions, then simply reject the
+ * attempt at recovery before touching anything.
+ */
+ if (XFS_SB_VERSION_NUM(&log->l_mp->m_sb) == XFS_SB_VERSION_5 &&
+ xfs_sb_has_incompat_log_feature(&log->l_mp->m_sb,
+ XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)) {
+ xfs_warn(log->l_mp,
+"Superblock has unknown incompatible log features (0x%x) enabled.\n"
+"The log can not be fully and/or safely recovered by this kernel.\n"
+"Please recover the log on a kernel that supports the unknown features.",
+ (log->l_mp->m_sb.sb_features_log_incompat &
+ XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN));
+ return EINVAL;
+ }
+
xfs_notice(log->l_mp, "Starting recovery (logdev: %s)",
log->l_mp->m_logname ? log->l_mp->m_logname
: "internal");