summaryrefslogtreecommitdiff
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/scrub/common.h5
-rw-r--r--fs/xfs/scrub/repair.h11
-rw-r--r--fs/xfs/scrub/scrub.c12
-rw-r--r--fs/xfs/scrub/trace.h2
-rw-r--r--fs/xfs/xfs_aops.c41
-rw-r--r--fs/xfs/xfs_buf.c3
-rw-r--r--fs/xfs/xfs_buf_item_recover.c11
-rw-r--r--fs/xfs/xfs_discard.c17
-rw-r--r--fs/xfs/xfs_dquot.c199
-rw-r--r--fs/xfs/xfs_dquot.h6
-rw-r--r--fs/xfs/xfs_dquot_item.c51
-rw-r--r--fs/xfs/xfs_dquot_item.h7
-rw-r--r--fs/xfs/xfs_exchrange.c71
-rw-r--r--fs/xfs/xfs_inode.c7
-rw-r--r--fs/xfs/xfs_iomap.c6
-rw-r--r--fs/xfs/xfs_notify_failure.c121
-rw-r--r--fs/xfs/xfs_qm.c48
-rw-r--r--fs/xfs/xfs_qm_bhv.c90
-rw-r--r--fs/xfs/xfs_quota.h5
-rw-r--r--fs/xfs/xfs_super.c19
-rw-r--r--fs/xfs/xfs_trans.c39
-rw-r--r--fs/xfs/xfs_trans_ail.c2
-rw-r--r--fs/xfs/xfs_trans_dquot.c31
23 files changed, 585 insertions, 219 deletions
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 47148cc4a833..eb00d48590f2 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -179,7 +179,6 @@ static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm)
bool xchk_dir_looks_zapped(struct xfs_inode *dp);
bool xchk_pptr_looks_zapped(struct xfs_inode *ip);
-#ifdef CONFIG_XFS_ONLINE_REPAIR
/* Decide if a repair is required. */
static inline bool xchk_needs_repair(const struct xfs_scrub_metadata *sm)
{
@@ -199,10 +198,6 @@ static inline bool xchk_could_repair(const struct xfs_scrub *sc)
return (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
!(sc->flags & XREP_ALREADY_FIXED);
}
-#else
-# define xchk_needs_repair(sc) (false)
-# define xchk_could_repair(sc) (false)
-#endif /* CONFIG_XFS_ONLINE_REPAIR */
int xchk_metadata_inode_forks(struct xfs_scrub *sc);
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 0e0dc2bf985c..96180176c582 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -163,7 +163,16 @@ bool xrep_buf_verify_struct(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
#else
#define xrep_ino_dqattach(sc) (0)
-#define xrep_will_attempt(sc) (false)
+
+/*
+ * When online repair is not built into the kernel, we still want to attempt
+ * the repair so that the stub xrep_attempt below will return EOPNOTSUPP.
+ */
+static inline bool xrep_will_attempt(const struct xfs_scrub *sc)
+{
+ return (sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
+ xchk_needs_repair(sc->sm);
+}
static inline int
xrep_attempt(
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 4cbcf7a86dbe..5c266d2842db 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -149,6 +149,18 @@ xchk_probe(
if (xchk_should_terminate(sc, &error))
return error;
+ /*
+ * If the caller is probing to see if repair works but repair isn't
+ * built into the kernel, return EOPNOTSUPP because that's the signal
+ * that userspace expects. If online repair is built in, set the
+ * CORRUPT flag (without any of the usual tracing/logging) to force us
+ * into xrep_probe.
+ */
+ if (xchk_could_repair(sc)) {
+ if (!IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR))
+ return -EOPNOTSUPP;
+ sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+ }
return 0;
}
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index da773fee8638..2fbc8508ccdf 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -467,7 +467,7 @@ DECLARE_EVENT_CLASS(xchk_dqiter_class,
__field(xfs_exntst_t, state)
),
TP_fast_assign(
- __entry->dev = cursor->sc->ip->i_mount->m_super->s_dev;
+ __entry->dev = cursor->sc->mp->m_super->s_dev;
__entry->dqtype = cursor->dqtype;
__entry->ino = cursor->quota_ip->i_ino;
__entry->cur_id = cursor->id;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 559a3a577097..ba6b4a180e80 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -19,6 +19,7 @@
#include "xfs_reflink.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
+#include "xfs_icache.h"
struct xfs_writepage_ctx {
struct iomap_writepage_ctx ctx;
@@ -528,12 +529,44 @@ xfs_vm_readahead(
}
static int
-xfs_iomap_swapfile_activate(
+xfs_vm_swap_activate(
struct swap_info_struct *sis,
struct file *swap_file,
sector_t *span)
{
- sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev;
+ struct xfs_inode *ip = XFS_I(file_inode(swap_file));
+
+ /*
+ * Swap file activation can race against concurrent shared extent
+ * removal in files that have been cloned. If this happens,
+ * iomap_swapfile_iter() can fail because it encountered a shared
+ * extent even though an operation is in progress to remove those
+ * shared extents.
+ *
+ * This race becomes problematic when we defer extent removal
+ * operations beyond the end of a syscall (i.e. use async background
+ * processing algorithms). Users think the extents are no longer
+ * shared, but iomap_swapfile_iter() still sees them as shared
+ * because the refcountbt entries for the extents being removed have
+ * not yet been updated. Hence the swapon call fails unexpectedly.
+ *
+ * The race condition is currently most obvious from the unlink()
+ * operation as extent removal is deferred until after the last
+ * reference to the inode goes away. We then process the extent
+ * removal asynchronously, hence triggers the "syscall completed but
+ * work not done" condition mentioned above. To close this race
+ * window, we need to flush any pending inodegc operations to ensure
+ * they have updated the refcountbt records before we try to map the
+ * swapfile.
+ */
+ xfs_inodegc_flush(ip->i_mount);
+
+ /*
+ * Direct the swap code to the correct block device when this file
+ * sits on the RT device.
+ */
+ sis->bdev = xfs_inode_buftarg(ip)->bt_bdev;
+
return iomap_swapfile_activate(sis, swap_file, span,
&xfs_read_iomap_ops);
}
@@ -549,11 +582,11 @@ const struct address_space_operations xfs_address_space_operations = {
.migrate_folio = filemap_migrate_folio,
.is_partially_uptodate = iomap_is_partially_uptodate,
.error_remove_folio = generic_error_remove_folio,
- .swap_activate = xfs_iomap_swapfile_activate,
+ .swap_activate = xfs_vm_swap_activate,
};
const struct address_space_operations xfs_dax_aops = {
.writepages = xfs_dax_writepages,
.dirty_folio = noop_dirty_folio,
- .swap_activate = xfs_iomap_swapfile_activate,
+ .swap_activate = xfs_vm_swap_activate,
};
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index aa4dbda7b536..6bcbdc8bf186 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -663,9 +663,8 @@ xfs_buf_find_insert(
spin_unlock(&bch->bc_lock);
goto out_free_buf;
}
- if (bp) {
+ if (bp && atomic_inc_not_zero(&bp->b_hold)) {
/* found an existing buffer */
- atomic_inc(&bp->b_hold);
spin_unlock(&bch->bc_lock);
error = xfs_buf_find_lock(bp, flags);
if (error)
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index 5180cbf5a90b..0185c92df8c2 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -1036,12 +1036,21 @@ xlog_recover_buf_commit_pass2(
error = xlog_recover_do_primary_sb_buffer(mp, item, bp, buf_f,
current_lsn);
if (error)
- goto out_release;
+ goto out_writebuf;
} else {
xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn);
}
/*
+ * Buffer held by buf log item during 'normal' buffer recovery must
+ * be committed through buffer I/O submission path to ensure proper
+ * release. When error occurs during sb buffer recovery, log shutdown
+ * will be done before submitting buffer list so that buffers can be
+ * released correctly through ioend failure path.
+ */
+out_writebuf:
+
+ /*
* Perform delayed write on the buffer. Asynchronous writes will be
* slower when taking into account all the buffers to be flushed.
*
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index d8c4a5dcca7a..0b343776da8c 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -146,6 +146,14 @@ xfs_discard_extents(
return error;
}
+/*
+ * Care must be taken setting up the trim cursor as the perags may not have been
+ * initialised when the cursor is initialised. e.g. a clean mount which hasn't
+ * read in AGFs and the first operation run on the mounted fs is a trim. This
+ * can result in perag fields that aren't initialised until
+ * xfs_trim_gather_extents() calls xfs_alloc_read_agf() to lock down the AG for
+ * the free space search.
+ */
struct xfs_trim_cur {
xfs_agblock_t start;
xfs_extlen_t count;
@@ -183,6 +191,14 @@ xfs_trim_gather_extents(
if (error)
goto out_trans_cancel;
+ /*
+ * First time through tcur->count will not have been initialised as
+ * pag->pagf_longest is not guaranteed to be valid before we read
+ * the AGF buffer above.
+ */
+ if (!tcur->count)
+ tcur->count = pag->pagf_longest;
+
if (tcur->by_bno) {
/* sub-AG discard request always starts at tcur->start */
cur = xfs_bnobt_init_cursor(mp, tp, agbp, pag);
@@ -329,7 +345,6 @@ xfs_trim_perag_extents(
{
struct xfs_trim_cur tcur = {
.start = start,
- .count = pag->pagf_longest,
.end = end,
.minlen = minlen,
};
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index c1b211c260a9..0d73b59f1c9e 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -69,6 +69,31 @@ xfs_dquot_mark_sick(
}
/*
+ * Detach the dquot buffer if it's still attached, because we can get called
+ * through dqpurge after a log shutdown. Caller must hold the dqflock or have
+ * otherwise isolated the dquot.
+ */
+void
+xfs_dquot_detach_buf(
+ struct xfs_dquot *dqp)
+{
+ struct xfs_dq_logitem *qlip = &dqp->q_logitem;
+ struct xfs_buf *bp = NULL;
+
+ spin_lock(&qlip->qli_lock);
+ if (qlip->qli_item.li_buf) {
+ bp = qlip->qli_item.li_buf;
+ qlip->qli_item.li_buf = NULL;
+ }
+ spin_unlock(&qlip->qli_lock);
+ if (bp) {
+ xfs_buf_lock(bp);
+ list_del_init(&qlip->qli_item.li_bio_list);
+ xfs_buf_relse(bp);
+ }
+}
+
+/*
* This is called to free all the memory associated with a dquot
*/
void
@@ -76,6 +101,7 @@ xfs_qm_dqdestroy(
struct xfs_dquot *dqp)
{
ASSERT(list_empty(&dqp->q_lru));
+ ASSERT(dqp->q_logitem.qli_item.li_buf == NULL);
kvfree(dqp->q_logitem.qli_item.li_lv_shadow);
mutex_destroy(&dqp->q_qlock);
@@ -1136,9 +1162,11 @@ static void
xfs_qm_dqflush_done(
struct xfs_log_item *lip)
{
- struct xfs_dq_logitem *qip = (struct xfs_dq_logitem *)lip;
- struct xfs_dquot *dqp = qip->qli_dquot;
+ struct xfs_dq_logitem *qlip =
+ container_of(lip, struct xfs_dq_logitem, qli_item);
+ struct xfs_dquot *dqp = qlip->qli_dquot;
struct xfs_ail *ailp = lip->li_ailp;
+ struct xfs_buf *bp = NULL;
xfs_lsn_t tail_lsn;
/*
@@ -1150,12 +1178,12 @@ xfs_qm_dqflush_done(
* holding the lock before removing the dquot from the AIL.
*/
if (test_bit(XFS_LI_IN_AIL, &lip->li_flags) &&
- ((lip->li_lsn == qip->qli_flush_lsn) ||
+ ((lip->li_lsn == qlip->qli_flush_lsn) ||
test_bit(XFS_LI_FAILED, &lip->li_flags))) {
spin_lock(&ailp->ail_lock);
xfs_clear_li_failed(lip);
- if (lip->li_lsn == qip->qli_flush_lsn) {
+ if (lip->li_lsn == qlip->qli_flush_lsn) {
/* xfs_ail_update_finish() drops the AIL lock */
tail_lsn = xfs_ail_delete_one(ailp, lip);
xfs_ail_update_finish(ailp, tail_lsn);
@@ -1168,6 +1196,19 @@ xfs_qm_dqflush_done(
* Release the dq's flush lock since we're done with it.
*/
xfs_dqfunlock(dqp);
+
+ /*
+ * If this dquot hasn't been dirtied since initiating the last dqflush,
+ * release the buffer reference.
+ */
+ spin_lock(&qlip->qli_lock);
+ if (!qlip->qli_dirty) {
+ bp = lip->li_buf;
+ lip->li_buf = NULL;
+ }
+ spin_unlock(&qlip->qli_lock);
+ if (bp)
+ xfs_buf_rele(bp);
}
void
@@ -1190,7 +1231,7 @@ xfs_buf_dquot_io_fail(
spin_lock(&bp->b_mount->m_ail->ail_lock);
list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
- xfs_set_li_failed(lip, bp);
+ set_bit(XFS_LI_FAILED, &lip->li_flags);
spin_unlock(&bp->b_mount->m_ail->ail_lock);
}
@@ -1233,6 +1274,115 @@ xfs_qm_dqflush_check(
}
/*
+ * Get the buffer containing the on-disk dquot.
+ *
+ * Requires dquot flush lock, will clear the dirty flag, delete the quota log
+ * item from the AIL, and shut down the system if something goes wrong.
+ */
+static int
+xfs_dquot_read_buf(
+ struct xfs_trans *tp,
+ struct xfs_dquot *dqp,
+ struct xfs_buf **bpp)
+{
+ struct xfs_mount *mp = dqp->q_mount;
+ struct xfs_buf *bp = NULL;
+ int error;
+
+ error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno,
+ mp->m_quotainfo->qi_dqchunklen, 0,
+ &bp, &xfs_dquot_buf_ops);
+ if (xfs_metadata_is_sick(error))
+ xfs_dquot_mark_sick(dqp);
+ if (error)
+ goto out_abort;
+
+ *bpp = bp;
+ return 0;
+
+out_abort:
+ dqp->q_flags &= ~XFS_DQFLAG_DIRTY;
+ xfs_trans_ail_delete(&dqp->q_logitem.qli_item, 0);
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+ return error;
+}
+
+/*
+ * Attach a dquot buffer to this dquot to avoid allocating a buffer during a
+ * dqflush, since dqflush can be called from reclaim context. Caller must hold
+ * the dqlock.
+ */
+int
+xfs_dquot_attach_buf(
+ struct xfs_trans *tp,
+ struct xfs_dquot *dqp)
+{
+ struct xfs_dq_logitem *qlip = &dqp->q_logitem;
+ struct xfs_log_item *lip = &qlip->qli_item;
+ int error;
+
+ spin_lock(&qlip->qli_lock);
+ if (!lip->li_buf) {
+ struct xfs_buf *bp = NULL;
+
+ spin_unlock(&qlip->qli_lock);
+ error = xfs_dquot_read_buf(tp, dqp, &bp);
+ if (error)
+ return error;
+
+ /*
+ * Hold the dquot buffer so that we retain our ref to it after
+ * detaching it from the transaction, then give that ref to the
+ * dquot log item so that the AIL does not have to read the
+ * dquot buffer to push this item.
+ */
+ xfs_buf_hold(bp);
+ xfs_trans_brelse(tp, bp);
+
+ spin_lock(&qlip->qli_lock);
+ lip->li_buf = bp;
+ }
+ qlip->qli_dirty = true;
+ spin_unlock(&qlip->qli_lock);
+
+ return 0;
+}
+
+/*
+ * Get a new reference the dquot buffer attached to this dquot for a dqflush
+ * operation.
+ *
+ * Returns 0 and a NULL bp if none was attached to the dquot; 0 and a locked
+ * bp; or -EAGAIN if the buffer could not be locked.
+ */
+int
+xfs_dquot_use_attached_buf(
+ struct xfs_dquot *dqp,
+ struct xfs_buf **bpp)
+{
+ struct xfs_buf *bp = dqp->q_logitem.qli_item.li_buf;
+
+ /*
+ * A NULL buffer can happen if the dquot dirty flag was set but the
+ * filesystem shut down before transaction commit happened. In that
+ * case we're not going to flush anyway.
+ */
+ if (!bp) {
+ ASSERT(xfs_is_shutdown(dqp->q_mount));
+
+ *bpp = NULL;
+ return 0;
+ }
+
+ if (!xfs_buf_trylock(bp))
+ return -EAGAIN;
+
+ xfs_buf_hold(bp);
+ *bpp = bp;
+ return 0;
+}
+
+/*
* Write a modified dquot to disk.
* The dquot must be locked and the flush lock too taken by caller.
* The flush lock will not be unlocked until the dquot reaches the disk,
@@ -1243,11 +1393,11 @@ xfs_qm_dqflush_check(
int
xfs_qm_dqflush(
struct xfs_dquot *dqp,
- struct xfs_buf **bpp)
+ struct xfs_buf *bp)
{
struct xfs_mount *mp = dqp->q_mount;
- struct xfs_log_item *lip = &dqp->q_logitem.qli_item;
- struct xfs_buf *bp;
+ struct xfs_dq_logitem *qlip = &dqp->q_logitem;
+ struct xfs_log_item *lip = &qlip->qli_item;
struct xfs_dqblk *dqblk;
xfs_failaddr_t fa;
int error;
@@ -1257,28 +1407,12 @@ xfs_qm_dqflush(
trace_xfs_dqflush(dqp);
- *bpp = NULL;
-
xfs_qm_dqunpin_wait(dqp);
- /*
- * Get the buffer containing the on-disk dquot
- */
- error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
- mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK,
- &bp, &xfs_dquot_buf_ops);
- if (error == -EAGAIN)
- goto out_unlock;
- if (xfs_metadata_is_sick(error))
- xfs_dquot_mark_sick(dqp);
- if (error)
- goto out_abort;
-
fa = xfs_qm_dqflush_check(dqp);
if (fa) {
xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS",
dqp->q_id, fa);
- xfs_buf_relse(bp);
xfs_dquot_mark_sick(dqp);
error = -EFSCORRUPTED;
goto out_abort;
@@ -1293,8 +1427,15 @@ xfs_qm_dqflush(
*/
dqp->q_flags &= ~XFS_DQFLAG_DIRTY;
- xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
- &dqp->q_logitem.qli_item.li_lsn);
+ /*
+ * We hold the dquot lock, so nobody can dirty it while we're
+ * scheduling the write out. Clear the dirty-since-flush flag.
+ */
+ spin_lock(&qlip->qli_lock);
+ qlip->qli_dirty = false;
+ spin_unlock(&qlip->qli_lock);
+
+ xfs_trans_ail_copy_lsn(mp->m_ail, &qlip->qli_flush_lsn, &lip->li_lsn);
/*
* copy the lsn into the on-disk dquot now while we have the in memory
@@ -1306,7 +1447,7 @@ xfs_qm_dqflush(
* of a dquot without an up-to-date CRC getting to disk.
*/
if (xfs_has_crc(mp)) {
- dqblk->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn);
+ dqblk->dd_lsn = cpu_to_be64(lip->li_lsn);
xfs_update_cksum((char *)dqblk, sizeof(struct xfs_dqblk),
XFS_DQUOT_CRC_OFF);
}
@@ -1316,7 +1457,7 @@ xfs_qm_dqflush(
* the AIL and release the flush lock once the dquot is synced to disk.
*/
bp->b_flags |= _XBF_DQUOTS;
- list_add_tail(&dqp->q_logitem.qli_item.li_bio_list, &bp->b_li_list);
+ list_add_tail(&lip->li_bio_list, &bp->b_li_list);
/*
* If the buffer is pinned then push on the log so we won't
@@ -1328,14 +1469,12 @@ xfs_qm_dqflush(
}
trace_xfs_dqflush_done(dqp);
- *bpp = bp;
return 0;
out_abort:
dqp->q_flags &= ~XFS_DQFLAG_DIRTY;
xfs_trans_ail_delete(lip, 0);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-out_unlock:
xfs_dqfunlock(dqp);
return error;
}
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 677bb2dc9ac9..bd7bfd9e402e 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -204,7 +204,7 @@ void xfs_dquot_to_disk(struct xfs_disk_dquot *ddqp, struct xfs_dquot *dqp);
#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->q_flags & XFS_DQFLAG_DIRTY)
void xfs_qm_dqdestroy(struct xfs_dquot *dqp);
-int xfs_qm_dqflush(struct xfs_dquot *dqp, struct xfs_buf **bpp);
+int xfs_qm_dqflush(struct xfs_dquot *dqp, struct xfs_buf *bp);
void xfs_qm_dqunpin_wait(struct xfs_dquot *dqp);
void xfs_qm_adjust_dqtimers(struct xfs_dquot *d);
void xfs_qm_adjust_dqlimits(struct xfs_dquot *d);
@@ -227,6 +227,10 @@ void xfs_dqlockn(struct xfs_dqtrx *q);
void xfs_dquot_set_prealloc_limits(struct xfs_dquot *);
+int xfs_dquot_attach_buf(struct xfs_trans *tp, struct xfs_dquot *dqp);
+int xfs_dquot_use_attached_buf(struct xfs_dquot *dqp, struct xfs_buf **bpp);
+void xfs_dquot_detach_buf(struct xfs_dquot *dqp);
+
static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)
{
xfs_dqlock(dqp);
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 7d19091215b0..271b195ebb93 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -123,8 +123,9 @@ xfs_qm_dquot_logitem_push(
__releases(&lip->li_ailp->ail_lock)
__acquires(&lip->li_ailp->ail_lock)
{
- struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
- struct xfs_buf *bp = lip->li_buf;
+ struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
+ struct xfs_dquot *dqp = qlip->qli_dquot;
+ struct xfs_buf *bp;
uint rval = XFS_ITEM_SUCCESS;
int error;
@@ -155,14 +156,25 @@ xfs_qm_dquot_logitem_push(
spin_unlock(&lip->li_ailp->ail_lock);
- error = xfs_qm_dqflush(dqp, &bp);
+ error = xfs_dquot_use_attached_buf(dqp, &bp);
+ if (error == -EAGAIN) {
+ xfs_dqfunlock(dqp);
+ rval = XFS_ITEM_LOCKED;
+ goto out_relock_ail;
+ }
+
+ /*
+ * dqflush completes dqflock on error, and the delwri ioend does it on
+ * success.
+ */
+ error = xfs_qm_dqflush(dqp, bp);
if (!error) {
if (!xfs_buf_delwri_queue(bp, buffer_list))
rval = XFS_ITEM_FLUSHING;
- xfs_buf_relse(bp);
- } else if (error == -EAGAIN)
- rval = XFS_ITEM_LOCKED;
+ }
+ xfs_buf_relse(bp);
+out_relock_ail:
spin_lock(&lip->li_ailp->ail_lock);
out_unlock:
xfs_dqunlock(dqp);
@@ -195,12 +207,10 @@ xfs_qm_dquot_logitem_committing(
}
#ifdef DEBUG_EXPENSIVE
-static int
-xfs_qm_dquot_logitem_precommit(
- struct xfs_trans *tp,
- struct xfs_log_item *lip)
+static void
+xfs_qm_dquot_logitem_precommit_check(
+ struct xfs_dquot *dqp)
{
- struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
struct xfs_mount *mp = dqp->q_mount;
struct xfs_disk_dquot ddq = { };
xfs_failaddr_t fa;
@@ -216,13 +226,24 @@ xfs_qm_dquot_logitem_precommit(
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
ASSERT(fa == NULL);
}
-
- return 0;
}
#else
-# define xfs_qm_dquot_logitem_precommit NULL
+# define xfs_qm_dquot_logitem_precommit_check(...) ((void)0)
#endif
+static int
+xfs_qm_dquot_logitem_precommit(
+ struct xfs_trans *tp,
+ struct xfs_log_item *lip)
+{
+ struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
+ struct xfs_dquot *dqp = qlip->qli_dquot;
+
+ xfs_qm_dquot_logitem_precommit_check(dqp);
+
+ return xfs_dquot_attach_buf(tp, dqp);
+}
+
static const struct xfs_item_ops xfs_dquot_item_ops = {
.iop_size = xfs_qm_dquot_logitem_size,
.iop_precommit = xfs_qm_dquot_logitem_precommit,
@@ -247,5 +268,7 @@ xfs_qm_dquot_logitem_init(
xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT,
&xfs_dquot_item_ops);
+ spin_lock_init(&lp->qli_lock);
lp->qli_dquot = dqp;
+ lp->qli_dirty = false;
}
diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h
index 794710c24474..d66e52807d76 100644
--- a/fs/xfs/xfs_dquot_item.h
+++ b/fs/xfs/xfs_dquot_item.h
@@ -14,6 +14,13 @@ struct xfs_dq_logitem {
struct xfs_log_item qli_item; /* common portion */
struct xfs_dquot *qli_dquot; /* dquot ptr */
xfs_lsn_t qli_flush_lsn; /* lsn at last flush */
+
+ /*
+ * We use this spinlock to coordinate access to the li_buf pointer in
+ * the log item and the qli_dirty flag.
+ */
+ spinlock_t qli_lock;
+ bool qli_dirty; /* dirtied since last flush? */
};
void xfs_qm_dquot_logitem_init(struct xfs_dquot *dqp);
diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c
index 75cb53f090d1..7c8195895a73 100644
--- a/fs/xfs/xfs_exchrange.c
+++ b/fs/xfs/xfs_exchrange.c
@@ -326,22 +326,6 @@ out_trans_cancel:
* successfully but before locks are dropped.
*/
-/* Verify that we have security clearance to perform this operation. */
-static int
-xfs_exchange_range_verify_area(
- struct xfs_exchrange *fxr)
-{
- int ret;
-
- ret = remap_verify_area(fxr->file1, fxr->file1_offset, fxr->length,
- true);
- if (ret)
- return ret;
-
- return remap_verify_area(fxr->file2, fxr->file2_offset, fxr->length,
- true);
-}
-
/*
* Performs necessary checks before doing a range exchange, having stabilized
* mutable inode attributes via i_rwsem.
@@ -352,11 +336,13 @@ xfs_exchange_range_checks(
unsigned int alloc_unit)
{
struct inode *inode1 = file_inode(fxr->file1);
+ loff_t size1 = i_size_read(inode1);
struct inode *inode2 = file_inode(fxr->file2);
+ loff_t size2 = i_size_read(inode2);
uint64_t allocmask = alloc_unit - 1;
int64_t test_len;
uint64_t blen;
- loff_t size1, size2, tmp;
+ loff_t tmp;
int error;
/* Don't touch certain kinds of inodes */
@@ -365,24 +351,25 @@ xfs_exchange_range_checks(
if (IS_SWAPFILE(inode1) || IS_SWAPFILE(inode2))
return -ETXTBSY;
- size1 = i_size_read(inode1);
- size2 = i_size_read(inode2);
-
/* Ranges cannot start after EOF. */
if (fxr->file1_offset > size1 || fxr->file2_offset > size2)
return -EINVAL;
- /*
- * If the caller said to exchange to EOF, we set the length of the
- * request large enough to cover everything to the end of both files.
- */
if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) {
+ /*
+ * If the caller said to exchange to EOF, we set the length of
+ * the request large enough to cover everything to the end of
+ * both files.
+ */
fxr->length = max_t(int64_t, size1 - fxr->file1_offset,
size2 - fxr->file2_offset);
-
- error = xfs_exchange_range_verify_area(fxr);
- if (error)
- return error;
+ } else {
+ /*
+ * Otherwise we require both ranges to end within EOF.
+ */
+ if (fxr->file1_offset + fxr->length > size1 ||
+ fxr->file2_offset + fxr->length > size2)
+ return -EINVAL;
}
/*
@@ -399,15 +386,6 @@ xfs_exchange_range_checks(
return -EINVAL;
/*
- * We require both ranges to end within EOF, unless we're exchanging
- * to EOF.
- */
- if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) &&
- (fxr->file1_offset + fxr->length > size1 ||
- fxr->file2_offset + fxr->length > size2))
- return -EINVAL;
-
- /*
* Make sure we don't hit any file size limits. If we hit any size
* limits such that test_length was adjusted, we abort the whole
* operation.
@@ -744,6 +722,7 @@ xfs_exchange_range(
{
struct inode *inode1 = file_inode(fxr->file1);
struct inode *inode2 = file_inode(fxr->file2);
+ loff_t check_len = fxr->length;
int ret;
BUILD_BUG_ON(XFS_EXCHANGE_RANGE_ALL_FLAGS &
@@ -776,14 +755,18 @@ xfs_exchange_range(
return -EBADF;
/*
- * If we're not exchanging to EOF, we can check the areas before
- * stabilizing both files' i_size.
+ * If we're exchanging to EOF we can't calculate the length until taking
+ * the iolock. Pass a 0 length to remap_verify_area similar to the
+ * FICLONE and FICLONERANGE ioctls that support cloning to EOF as well.
*/
- if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)) {
- ret = xfs_exchange_range_verify_area(fxr);
- if (ret)
- return ret;
- }
+ if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)
+ check_len = 0;
+ ret = remap_verify_area(fxr->file1, fxr->file1_offset, check_len, true);
+ if (ret)
+ return ret;
+ ret = remap_verify_area(fxr->file2, fxr->file2_offset, check_len, true);
+ if (ret)
+ return ret;
/* Update cmtime if the fd/inode don't forbid it. */
if (!(fxr->file1->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode1))
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 19dcb569a3e7..ed09b4a3084e 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1392,8 +1392,11 @@ xfs_inactive(
goto out;
/* Try to clean out the cow blocks if there are any. */
- if (xfs_inode_has_cow_data(ip))
- xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
+ if (xfs_inode_has_cow_data(ip)) {
+ error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
+ if (error)
+ goto out;
+ }
if (VFS_I(ip)->i_nlink != 0) {
/*
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 86da16f54be9..6335b122486f 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -942,10 +942,8 @@ xfs_dax_write_iomap_end(
if (!xfs_is_cow_inode(ip))
return 0;
- if (!written) {
- xfs_reflink_cancel_cow_range(ip, pos, length, true);
- return 0;
- }
+ if (!written)
+ return xfs_reflink_cancel_cow_range(ip, pos, length, true);
return xfs_reflink_end_cow(ip, pos, written);
}
diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c
index fa50e5308292..0b0b0f31aca2 100644
--- a/fs/xfs/xfs_notify_failure.c
+++ b/fs/xfs/xfs_notify_failure.c
@@ -154,6 +154,79 @@ xfs_dax_notify_failure_thaw(
}
static int
+xfs_dax_translate_range(
+ struct xfs_buftarg *btp,
+ u64 offset,
+ u64 len,
+ xfs_daddr_t *daddr,
+ uint64_t *bblen)
+{
+ u64 dev_start = btp->bt_dax_part_off;
+ u64 dev_len = bdev_nr_bytes(btp->bt_bdev);
+ u64 dev_end = dev_start + dev_len - 1;
+
+ /* Notify failure on the whole device. */
+ if (offset == 0 && len == U64_MAX) {
+ offset = dev_start;
+ len = dev_len;
+ }
+
+ /* Ignore the range out of filesystem area */
+ if (offset + len - 1 < dev_start)
+ return -ENXIO;
+ if (offset > dev_end)
+ return -ENXIO;
+
+ /* Calculate the real range when it touches the boundary */
+ if (offset > dev_start)
+ offset -= dev_start;
+ else {
+ len -= dev_start - offset;
+ offset = 0;
+ }
+ if (offset + len - 1 > dev_end)
+ len = dev_end - offset + 1;
+
+ *daddr = BTOBB(offset);
+ *bblen = BTOBB(len);
+ return 0;
+}
+
+static int
+xfs_dax_notify_logdev_failure(
+ struct xfs_mount *mp,
+ u64 offset,
+ u64 len,
+ int mf_flags)
+{
+ xfs_daddr_t daddr;
+ uint64_t bblen;
+ int error;
+
+ /*
+ * Return ENXIO instead of shutting down the filesystem if the failed
+ * region is beyond the end of the log.
+ */
+ error = xfs_dax_translate_range(mp->m_logdev_targp,
+ offset, len, &daddr, &bblen);
+ if (error)
+ return error;
+
+ /*
+ * In the pre-remove case the failure notification is attempting to
+ * trigger a force unmount. The expectation is that the device is
+ * still present, but its removal is in progress and can not be
+ * cancelled, proceed with accessing the log device.
+ */
+ if (mf_flags & MF_MEM_PRE_REMOVE)
+ return 0;
+
+ xfs_err(mp, "ondisk log corrupt, shutting down fs!");
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_ONDISK);
+ return -EFSCORRUPTED;
+}
+
+static int
xfs_dax_notify_ddev_failure(
struct xfs_mount *mp,
xfs_daddr_t daddr,
@@ -263,8 +336,9 @@ xfs_dax_notify_failure(
int mf_flags)
{
struct xfs_mount *mp = dax_holder(dax_dev);
- u64 ddev_start;
- u64 ddev_end;
+ xfs_daddr_t daddr;
+ uint64_t bblen;
+ int error;
if (!(mp->m_super->s_flags & SB_BORN)) {
xfs_warn(mp, "filesystem is not ready for notify_failure()!");
@@ -279,17 +353,7 @@ xfs_dax_notify_failure(
if (mp->m_logdev_targp && mp->m_logdev_targp->bt_daxdev == dax_dev &&
mp->m_logdev_targp != mp->m_ddev_targp) {
- /*
- * In the pre-remove case the failure notification is attempting
- * to trigger a force unmount. The expectation is that the
- * device is still present, but its removal is in progress and
- * can not be cancelled, proceed with accessing the log device.
- */
- if (mf_flags & MF_MEM_PRE_REMOVE)
- return 0;
- xfs_err(mp, "ondisk log corrupt, shutting down fs!");
- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_ONDISK);
- return -EFSCORRUPTED;
+ return xfs_dax_notify_logdev_failure(mp, offset, len, mf_flags);
}
if (!xfs_has_rmapbt(mp)) {
@@ -297,33 +361,12 @@ xfs_dax_notify_failure(
return -EOPNOTSUPP;
}
- ddev_start = mp->m_ddev_targp->bt_dax_part_off;
- ddev_end = ddev_start + bdev_nr_bytes(mp->m_ddev_targp->bt_bdev) - 1;
-
- /* Notify failure on the whole device. */
- if (offset == 0 && len == U64_MAX) {
- offset = ddev_start;
- len = bdev_nr_bytes(mp->m_ddev_targp->bt_bdev);
- }
-
- /* Ignore the range out of filesystem area */
- if (offset + len - 1 < ddev_start)
- return -ENXIO;
- if (offset > ddev_end)
- return -ENXIO;
-
- /* Calculate the real range when it touches the boundary */
- if (offset > ddev_start)
- offset -= ddev_start;
- else {
- len -= ddev_start - offset;
- offset = 0;
- }
- if (offset + len - 1 > ddev_end)
- len = ddev_end - offset + 1;
+ error = xfs_dax_translate_range(mp->m_ddev_targp, offset, len, &daddr,
+ &bblen);
+ if (error)
+ return error;
- return xfs_dax_notify_ddev_failure(mp, BTOBB(offset), BTOBB(len),
- mf_flags);
+ return xfs_dax_notify_ddev_failure(mp, daddr, bblen, mf_flags);
}
const struct dax_holder_operations xfs_dax_holder_operations = {
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 7e2307921deb..3212b5bf3fb3 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -146,17 +146,29 @@ xfs_qm_dqpurge(
* We don't care about getting disk errors here. We need
* to purge this dquot anyway, so we go ahead regardless.
*/
- error = xfs_qm_dqflush(dqp, &bp);
+ error = xfs_dquot_use_attached_buf(dqp, &bp);
+ if (error == -EAGAIN) {
+ xfs_dqfunlock(dqp);
+ dqp->q_flags &= ~XFS_DQFLAG_FREEING;
+ goto out_unlock;
+ }
+ if (!bp)
+ goto out_funlock;
+
+ /*
+ * dqflush completes dqflock on error, and the bwrite ioend
+ * does it on success.
+ */
+ error = xfs_qm_dqflush(dqp, bp);
if (!error) {
error = xfs_bwrite(bp);
xfs_buf_relse(bp);
- } else if (error == -EAGAIN) {
- dqp->q_flags &= ~XFS_DQFLAG_FREEING;
- goto out_unlock;
}
xfs_dqflock(dqp);
}
+ xfs_dquot_detach_buf(dqp);
+out_funlock:
ASSERT(atomic_read(&dqp->q_pincount) == 0);
ASSERT(xlog_is_shutdown(dqp->q_logitem.qli_item.li_log) ||
!test_bit(XFS_LI_IN_AIL, &dqp->q_logitem.qli_item.li_flags));
@@ -462,7 +474,17 @@ xfs_qm_dquot_isolate(
/* we have to drop the LRU lock to flush the dquot */
spin_unlock(lru_lock);
- error = xfs_qm_dqflush(dqp, &bp);
+ error = xfs_dquot_use_attached_buf(dqp, &bp);
+ if (!bp || error == -EAGAIN) {
+ xfs_dqfunlock(dqp);
+ goto out_unlock_dirty;
+ }
+
+ /*
+ * dqflush completes dqflock on error, and the delwri ioend
+ * does it on success.
+ */
+ error = xfs_qm_dqflush(dqp, bp);
if (error)
goto out_unlock_dirty;
@@ -470,6 +492,8 @@ xfs_qm_dquot_isolate(
xfs_buf_relse(bp);
goto out_unlock_dirty;
}
+
+ xfs_dquot_detach_buf(dqp);
xfs_dqfunlock(dqp);
/*
@@ -1108,6 +1132,10 @@ xfs_qm_quotacheck_dqadjust(
return error;
}
+ error = xfs_dquot_attach_buf(NULL, dqp);
+ if (error)
+ return error;
+
trace_xfs_dqadjust(dqp);
/*
@@ -1287,11 +1315,17 @@ xfs_qm_flush_one(
goto out_unlock;
}
- error = xfs_qm_dqflush(dqp, &bp);
+ error = xfs_dquot_use_attached_buf(dqp, &bp);
if (error)
goto out_unlock;
+ if (!bp) {
+ error = -EFSCORRUPTED;
+ goto out_unlock;
+ }
- xfs_buf_delwri_queue(bp, buffer_list);
+ error = xfs_qm_dqflush(dqp, bp);
+ if (!error)
+ xfs_buf_delwri_queue(bp, buffer_list);
xfs_buf_relse(bp);
out_unlock:
xfs_dqunlock(dqp);
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index a11436579877..dabb1d6d7e46 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -19,28 +19,41 @@
STATIC void
xfs_fill_statvfs_from_dquot(
struct kstatfs *statp,
+ struct xfs_inode *ip,
struct xfs_dquot *dqp)
{
+ struct xfs_dquot_res *blkres = &dqp->q_blk;
uint64_t limit;
- limit = dqp->q_blk.softlimit ?
- dqp->q_blk.softlimit :
- dqp->q_blk.hardlimit;
- if (limit && statp->f_blocks > limit) {
- statp->f_blocks = limit;
- statp->f_bfree = statp->f_bavail =
- (statp->f_blocks > dqp->q_blk.reserved) ?
- (statp->f_blocks - dqp->q_blk.reserved) : 0;
+ if (XFS_IS_REALTIME_MOUNT(ip->i_mount) &&
+ (ip->i_diflags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME)))
+ blkres = &dqp->q_rtb;
+
+ limit = blkres->softlimit ?
+ blkres->softlimit :
+ blkres->hardlimit;
+ if (limit) {
+ uint64_t remaining = 0;
+
+ if (limit > blkres->reserved)
+ remaining = limit - blkres->reserved;
+
+ statp->f_blocks = min(statp->f_blocks, limit);
+ statp->f_bfree = min(statp->f_bfree, remaining);
+ statp->f_bavail = min(statp->f_bavail, remaining);
}
limit = dqp->q_ino.softlimit ?
dqp->q_ino.softlimit :
dqp->q_ino.hardlimit;
- if (limit && statp->f_files > limit) {
- statp->f_files = limit;
- statp->f_ffree =
- (statp->f_files > dqp->q_ino.reserved) ?
- (statp->f_files - dqp->q_ino.reserved) : 0;
+ if (limit) {
+ uint64_t remaining = 0;
+
+ if (limit > dqp->q_ino.reserved)
+ remaining = limit - dqp->q_ino.reserved;
+
+ statp->f_files = min(statp->f_files, limit);
+ statp->f_ffree = min(statp->f_ffree, remaining);
}
}
@@ -61,11 +74,33 @@ xfs_qm_statvfs(
struct xfs_dquot *dqp;
if (!xfs_qm_dqget(mp, ip->i_projid, XFS_DQTYPE_PROJ, false, &dqp)) {
- xfs_fill_statvfs_from_dquot(statp, dqp);
+ xfs_fill_statvfs_from_dquot(statp, ip, dqp);
xfs_qm_dqput(dqp);
}
}
+STATIC int
+xfs_qm_validate_state_change(
+ struct xfs_mount *mp,
+ uint uqd,
+ uint gqd,
+ uint pqd)
+{
+ int state;
+
+ /* Is quota state changing? */
+ state = ((uqd && !XFS_IS_UQUOTA_ON(mp)) ||
+ (!uqd && XFS_IS_UQUOTA_ON(mp)) ||
+ (gqd && !XFS_IS_GQUOTA_ON(mp)) ||
+ (!gqd && XFS_IS_GQUOTA_ON(mp)) ||
+ (pqd && !XFS_IS_PQUOTA_ON(mp)) ||
+ (!pqd && XFS_IS_PQUOTA_ON(mp)));
+
+ return state &&
+ (xfs_dev_is_read_only(mp, "changing quota state") ||
+ xfs_has_norecovery(mp));
+}
+
int
xfs_qm_newmount(
xfs_mount_t *mp,
@@ -85,24 +120,21 @@ xfs_qm_newmount(
}
/*
- * If the device itself is read-only, we can't allow
- * the user to change the state of quota on the mount -
- * this would generate a transaction on the ro device,
- * which would lead to an I/O error and shutdown
+ * If the device itself is read-only and/or in norecovery
+ * mode, we can't allow the user to change the state of
+ * quota on the mount - this would generate a transaction
+ * on the ro device, which would lead to an I/O error and
+ * shutdown.
*/
- if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) ||
- (!uquotaondisk && XFS_IS_UQUOTA_ON(mp)) ||
- (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) ||
- (!gquotaondisk && XFS_IS_GQUOTA_ON(mp)) ||
- (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) ||
- (!pquotaondisk && XFS_IS_PQUOTA_ON(mp))) &&
- xfs_dev_is_read_only(mp, "changing quota state")) {
+ if (xfs_qm_validate_state_change(mp, uquotaondisk,
+ gquotaondisk, pquotaondisk)) {
+
xfs_warn(mp, "please mount with%s%s%s%s.",
- (!quotaondisk ? "out quota" : ""),
- (uquotaondisk ? " usrquota" : ""),
- (gquotaondisk ? " grpquota" : ""),
- (pquotaondisk ? " prjquota" : ""));
+ (!quotaondisk ? "out quota" : ""),
+ (uquotaondisk ? " usrquota" : ""),
+ (gquotaondisk ? " grpquota" : ""),
+ (pquotaondisk ? " prjquota" : ""));
return -EPERM;
}
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 23d71a55bbc0..032f3a70f21d 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -96,7 +96,8 @@ extern void xfs_trans_free_dqinfo(struct xfs_trans *);
extern void xfs_trans_mod_dquot_byino(struct xfs_trans *, struct xfs_inode *,
uint, int64_t);
extern void xfs_trans_apply_dquot_deltas(struct xfs_trans *);
-extern void xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *);
+void xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *tp,
+ bool already_locked);
int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp, struct xfs_inode *ip,
int64_t dblocks, int64_t rblocks, bool force);
extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *,
@@ -166,7 +167,7 @@ static inline void xfs_trans_mod_dquot_byino(struct xfs_trans *tp,
{
}
#define xfs_trans_apply_dquot_deltas(tp)
-#define xfs_trans_unreserve_and_mod_dquots(tp)
+#define xfs_trans_unreserve_and_mod_dquots(tp, a)
static inline int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp,
struct xfs_inode *ip, int64_t dblocks, int64_t rblocks,
bool force)
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index fbb3a1594c0d..201a86b3574d 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -873,12 +873,6 @@ xfs_fs_statfs(
ffree = statp->f_files - (icount - ifree);
statp->f_ffree = max_t(int64_t, ffree, 0);
-
- if ((ip->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
- ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
- (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))
- xfs_qm_statvfs(ip, statp);
-
if (XFS_IS_REALTIME_MOUNT(mp) &&
(ip->i_diflags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) {
s64 freertx;
@@ -888,6 +882,11 @@ xfs_fs_statfs(
statp->f_bavail = statp->f_bfree = xfs_rtx_to_rtb(mp, freertx);
}
+ if ((ip->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
+ ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
+ (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))
+ xfs_qm_statvfs(ip, statp);
+
return 0;
}
@@ -1620,8 +1619,12 @@ xfs_fs_fill_super(
#endif
}
- /* Filesystem claims it needs repair, so refuse the mount. */
- if (xfs_has_needsrepair(mp)) {
+ /*
+ * Filesystem claims it needs repair, so refuse the mount unless
+ * norecovery is also specified, in which case the filesystem can
+ * be mounted with no risk of further damage.
+ */
+ if (xfs_has_needsrepair(mp) && !xfs_has_norecovery(mp)) {
xfs_warn(mp, "Filesystem needs repair. Please run xfs_repair.");
error = -EFSCORRUPTED;
goto out_free_sb;
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 30e03342287a..39cd11cbe21f 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -835,16 +835,12 @@ __xfs_trans_commit(
trace_xfs_trans_commit(tp, _RET_IP_);
/*
- * Finish deferred items on final commit. Only permanent transactions
- * should ever have deferred ops.
+ * Commit per-transaction changes that are not already tracked through
+ * log items. This can add dirty log items to the transaction.
*/
- WARN_ON_ONCE(!list_empty(&tp->t_dfops) &&
- !(tp->t_flags & XFS_TRANS_PERM_LOG_RES));
- if (!regrant && (tp->t_flags & XFS_TRANS_PERM_LOG_RES)) {
- error = xfs_defer_finish_noroll(&tp);
- if (error)
- goto out_unreserve;
- }
+ if (tp->t_flags & XFS_TRANS_SB_DIRTY)
+ xfs_trans_apply_sb_deltas(tp);
+ xfs_trans_apply_dquot_deltas(tp);
error = xfs_trans_run_precommits(tp);
if (error)
@@ -873,13 +869,6 @@ __xfs_trans_commit(
ASSERT(tp->t_ticket != NULL);
- /*
- * If we need to update the superblock, then do it now.
- */
- if (tp->t_flags & XFS_TRANS_SB_DIRTY)
- xfs_trans_apply_sb_deltas(tp);
- xfs_trans_apply_dquot_deltas(tp);
-
xlog_cil_commit(log, tp, &commit_seq, regrant);
xfs_trans_free(tp);
@@ -905,7 +894,7 @@ out_unreserve:
* the dqinfo portion to be. All that means is that we have some
* (non-persistent) quota reservations that need to be unreserved.
*/
- xfs_trans_unreserve_and_mod_dquots(tp);
+ xfs_trans_unreserve_and_mod_dquots(tp, true);
if (tp->t_ticket) {
if (regrant && !xlog_is_shutdown(log))
xfs_log_ticket_regrant(log, tp->t_ticket);
@@ -924,6 +913,20 @@ int
xfs_trans_commit(
struct xfs_trans *tp)
{
+ /*
+ * Finish deferred items on final commit. Only permanent transactions
+ * should ever have deferred ops.
+ */
+ WARN_ON_ONCE(!list_empty(&tp->t_dfops) &&
+ !(tp->t_flags & XFS_TRANS_PERM_LOG_RES));
+ if (tp->t_flags & XFS_TRANS_PERM_LOG_RES) {
+ int error = xfs_defer_finish_noroll(&tp);
+ if (error) {
+ xfs_trans_cancel(tp);
+ return error;
+ }
+ }
+
return __xfs_trans_commit(tp, false);
}
@@ -985,7 +988,7 @@ xfs_trans_cancel(
}
#endif
xfs_trans_unreserve_and_mod_sb(tp);
- xfs_trans_unreserve_and_mod_dquots(tp);
+ xfs_trans_unreserve_and_mod_dquots(tp, false);
if (tp->t_ticket) {
xfs_log_ticket_ungrant(log, tp->t_ticket);
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 8ede9d099d1f..f56d62dced97 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -360,7 +360,7 @@ xfsaild_resubmit_item(
/* protected by ail_lock */
list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
- if (bp->b_flags & _XBF_INODES)
+ if (bp->b_flags & (_XBF_INODES | _XBF_DQUOTS))
clear_bit(XFS_LI_FAILED, &lip->li_flags);
else
xfs_clear_li_failed(lip);
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index b368e13424c4..b92eeaa1a2a9 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -602,6 +602,24 @@ xfs_trans_apply_dquot_deltas(
ASSERT(dqp->q_blk.reserved >= dqp->q_blk.count);
ASSERT(dqp->q_ino.reserved >= dqp->q_ino.count);
ASSERT(dqp->q_rtb.reserved >= dqp->q_rtb.count);
+
+ /*
+ * We've applied the count changes and given back
+ * whatever reservation we didn't use. Zero out the
+ * dqtrx fields.
+ */
+ qtrx->qt_blk_res = 0;
+ qtrx->qt_bcount_delta = 0;
+ qtrx->qt_delbcnt_delta = 0;
+
+ qtrx->qt_rtblk_res = 0;
+ qtrx->qt_rtblk_res_used = 0;
+ qtrx->qt_rtbcount_delta = 0;
+ qtrx->qt_delrtb_delta = 0;
+
+ qtrx->qt_ino_res = 0;
+ qtrx->qt_ino_res_used = 0;
+ qtrx->qt_icount_delta = 0;
}
}
}
@@ -638,7 +656,8 @@ xfs_trans_unreserve_and_mod_dquots_hook(
*/
void
xfs_trans_unreserve_and_mod_dquots(
- struct xfs_trans *tp)
+ struct xfs_trans *tp,
+ bool already_locked)
{
int i, j;
struct xfs_dquot *dqp;
@@ -667,10 +686,12 @@ xfs_trans_unreserve_and_mod_dquots(
* about the number of blocks used field, or deltas.
* Also we don't bother to zero the fields.
*/
- locked = false;
+ locked = already_locked;
if (qtrx->qt_blk_res) {
- xfs_dqlock(dqp);
- locked = true;
+ if (!locked) {
+ xfs_dqlock(dqp);
+ locked = true;
+ }
dqp->q_blk.reserved -=
(xfs_qcnt_t)qtrx->qt_blk_res;
}
@@ -691,7 +712,7 @@ xfs_trans_unreserve_and_mod_dquots(
dqp->q_rtb.reserved -=
(xfs_qcnt_t)qtrx->qt_rtblk_res;
}
- if (locked)
+ if (locked && !already_locked)
xfs_dqunlock(dqp);
}