diff options
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/scrub/common.h | 5 | ||||
-rw-r--r-- | fs/xfs/scrub/repair.h | 11 | ||||
-rw-r--r-- | fs/xfs/scrub/scrub.c | 12 | ||||
-rw-r--r-- | fs/xfs/scrub/trace.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.c | 41 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_buf_item_recover.c | 11 | ||||
-rw-r--r-- | fs/xfs/xfs_discard.c | 17 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot.c | 199 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot.h | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot_item.c | 51 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot_item.h | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_exchrange.c | 71 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.c | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_notify_failure.c | 121 | ||||
-rw-r--r-- | fs/xfs/xfs_qm.c | 48 | ||||
-rw-r--r-- | fs/xfs/xfs_qm_bhv.c | 90 | ||||
-rw-r--r-- | fs/xfs/xfs_quota.h | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 19 | ||||
-rw-r--r-- | fs/xfs/xfs_trans.c | 39 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_ail.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_dquot.c | 31 |
23 files changed, 585 insertions, 219 deletions
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index 47148cc4a833..eb00d48590f2 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -179,7 +179,6 @@ static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm) bool xchk_dir_looks_zapped(struct xfs_inode *dp); bool xchk_pptr_looks_zapped(struct xfs_inode *ip); -#ifdef CONFIG_XFS_ONLINE_REPAIR /* Decide if a repair is required. */ static inline bool xchk_needs_repair(const struct xfs_scrub_metadata *sm) { @@ -199,10 +198,6 @@ static inline bool xchk_could_repair(const struct xfs_scrub *sc) return (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) && !(sc->flags & XREP_ALREADY_FIXED); } -#else -# define xchk_needs_repair(sc) (false) -# define xchk_could_repair(sc) (false) -#endif /* CONFIG_XFS_ONLINE_REPAIR */ int xchk_metadata_inode_forks(struct xfs_scrub *sc); diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index 0e0dc2bf985c..96180176c582 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -163,7 +163,16 @@ bool xrep_buf_verify_struct(struct xfs_buf *bp, const struct xfs_buf_ops *ops); #else #define xrep_ino_dqattach(sc) (0) -#define xrep_will_attempt(sc) (false) + +/* + * When online repair is not built into the kernel, we still want to attempt + * the repair so that the stub xrep_attempt below will return EOPNOTSUPP. + */ +static inline bool xrep_will_attempt(const struct xfs_scrub *sc) +{ + return (sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) || + xchk_needs_repair(sc->sm); +} static inline int xrep_attempt( diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 4cbcf7a86dbe..5c266d2842db 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -149,6 +149,18 @@ xchk_probe( if (xchk_should_terminate(sc, &error)) return error; + /* + * If the caller is probing to see if repair works but repair isn't + * built into the kernel, return EOPNOTSUPP because that's the signal + * that userspace expects. If online repair is built in, set the + * CORRUPT flag (without any of the usual tracing/logging) to force us + * into xrep_probe. + */ + if (xchk_could_repair(sc)) { + if (!IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR)) + return -EOPNOTSUPP; + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + } return 0; } diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index da773fee8638..2fbc8508ccdf 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -467,7 +467,7 @@ DECLARE_EVENT_CLASS(xchk_dqiter_class, __field(xfs_exntst_t, state) ), TP_fast_assign( - __entry->dev = cursor->sc->ip->i_mount->m_super->s_dev; + __entry->dev = cursor->sc->mp->m_super->s_dev; __entry->dqtype = cursor->dqtype; __entry->ino = cursor->quota_ip->i_ino; __entry->cur_id = cursor->id; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 559a3a577097..ba6b4a180e80 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -19,6 +19,7 @@ #include "xfs_reflink.h" #include "xfs_errortag.h" #include "xfs_error.h" +#include "xfs_icache.h" struct xfs_writepage_ctx { struct iomap_writepage_ctx ctx; @@ -528,12 +529,44 @@ xfs_vm_readahead( } static int -xfs_iomap_swapfile_activate( +xfs_vm_swap_activate( struct swap_info_struct *sis, struct file *swap_file, sector_t *span) { - sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev; + struct xfs_inode *ip = XFS_I(file_inode(swap_file)); + + /* + * Swap file activation can race against concurrent shared extent + * removal in files that have been cloned. If this happens, + * iomap_swapfile_iter() can fail because it encountered a shared + * extent even though an operation is in progress to remove those + * shared extents. + * + * This race becomes problematic when we defer extent removal + * operations beyond the end of a syscall (i.e. use async background + * processing algorithms). Users think the extents are no longer + * shared, but iomap_swapfile_iter() still sees them as shared + * because the refcountbt entries for the extents being removed have + * not yet been updated. Hence the swapon call fails unexpectedly. + * + * The race condition is currently most obvious from the unlink() + * operation as extent removal is deferred until after the last + * reference to the inode goes away. We then process the extent + * removal asynchronously, hence triggers the "syscall completed but + * work not done" condition mentioned above. To close this race + * window, we need to flush any pending inodegc operations to ensure + * they have updated the refcountbt records before we try to map the + * swapfile. + */ + xfs_inodegc_flush(ip->i_mount); + + /* + * Direct the swap code to the correct block device when this file + * sits on the RT device. + */ + sis->bdev = xfs_inode_buftarg(ip)->bt_bdev; + return iomap_swapfile_activate(sis, swap_file, span, &xfs_read_iomap_ops); } @@ -549,11 +582,11 @@ const struct address_space_operations xfs_address_space_operations = { .migrate_folio = filemap_migrate_folio, .is_partially_uptodate = iomap_is_partially_uptodate, .error_remove_folio = generic_error_remove_folio, - .swap_activate = xfs_iomap_swapfile_activate, + .swap_activate = xfs_vm_swap_activate, }; const struct address_space_operations xfs_dax_aops = { .writepages = xfs_dax_writepages, .dirty_folio = noop_dirty_folio, - .swap_activate = xfs_iomap_swapfile_activate, + .swap_activate = xfs_vm_swap_activate, }; diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index aa4dbda7b536..6bcbdc8bf186 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -663,9 +663,8 @@ xfs_buf_find_insert( spin_unlock(&bch->bc_lock); goto out_free_buf; } - if (bp) { + if (bp && atomic_inc_not_zero(&bp->b_hold)) { /* found an existing buffer */ - atomic_inc(&bp->b_hold); spin_unlock(&bch->bc_lock); error = xfs_buf_find_lock(bp, flags); if (error) diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index 5180cbf5a90b..0185c92df8c2 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -1036,12 +1036,21 @@ xlog_recover_buf_commit_pass2( error = xlog_recover_do_primary_sb_buffer(mp, item, bp, buf_f, current_lsn); if (error) - goto out_release; + goto out_writebuf; } else { xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn); } /* + * Buffer held by buf log item during 'normal' buffer recovery must + * be committed through buffer I/O submission path to ensure proper + * release. When error occurs during sb buffer recovery, log shutdown + * will be done before submitting buffer list so that buffers can be + * released correctly through ioend failure path. + */ +out_writebuf: + + /* * Perform delayed write on the buffer. Asynchronous writes will be * slower when taking into account all the buffers to be flushed. * diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index d8c4a5dcca7a..0b343776da8c 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -146,6 +146,14 @@ xfs_discard_extents( return error; } +/* + * Care must be taken setting up the trim cursor as the perags may not have been + * initialised when the cursor is initialised. e.g. a clean mount which hasn't + * read in AGFs and the first operation run on the mounted fs is a trim. This + * can result in perag fields that aren't initialised until + * xfs_trim_gather_extents() calls xfs_alloc_read_agf() to lock down the AG for + * the free space search. + */ struct xfs_trim_cur { xfs_agblock_t start; xfs_extlen_t count; @@ -183,6 +191,14 @@ xfs_trim_gather_extents( if (error) goto out_trans_cancel; + /* + * First time through tcur->count will not have been initialised as + * pag->pagf_longest is not guaranteed to be valid before we read + * the AGF buffer above. + */ + if (!tcur->count) + tcur->count = pag->pagf_longest; + if (tcur->by_bno) { /* sub-AG discard request always starts at tcur->start */ cur = xfs_bnobt_init_cursor(mp, tp, agbp, pag); @@ -329,7 +345,6 @@ xfs_trim_perag_extents( { struct xfs_trim_cur tcur = { .start = start, - .count = pag->pagf_longest, .end = end, .minlen = minlen, }; diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index c1b211c260a9..0d73b59f1c9e 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -69,6 +69,31 @@ xfs_dquot_mark_sick( } /* + * Detach the dquot buffer if it's still attached, because we can get called + * through dqpurge after a log shutdown. Caller must hold the dqflock or have + * otherwise isolated the dquot. + */ +void +xfs_dquot_detach_buf( + struct xfs_dquot *dqp) +{ + struct xfs_dq_logitem *qlip = &dqp->q_logitem; + struct xfs_buf *bp = NULL; + + spin_lock(&qlip->qli_lock); + if (qlip->qli_item.li_buf) { + bp = qlip->qli_item.li_buf; + qlip->qli_item.li_buf = NULL; + } + spin_unlock(&qlip->qli_lock); + if (bp) { + xfs_buf_lock(bp); + list_del_init(&qlip->qli_item.li_bio_list); + xfs_buf_relse(bp); + } +} + +/* * This is called to free all the memory associated with a dquot */ void @@ -76,6 +101,7 @@ xfs_qm_dqdestroy( struct xfs_dquot *dqp) { ASSERT(list_empty(&dqp->q_lru)); + ASSERT(dqp->q_logitem.qli_item.li_buf == NULL); kvfree(dqp->q_logitem.qli_item.li_lv_shadow); mutex_destroy(&dqp->q_qlock); @@ -1136,9 +1162,11 @@ static void xfs_qm_dqflush_done( struct xfs_log_item *lip) { - struct xfs_dq_logitem *qip = (struct xfs_dq_logitem *)lip; - struct xfs_dquot *dqp = qip->qli_dquot; + struct xfs_dq_logitem *qlip = + container_of(lip, struct xfs_dq_logitem, qli_item); + struct xfs_dquot *dqp = qlip->qli_dquot; struct xfs_ail *ailp = lip->li_ailp; + struct xfs_buf *bp = NULL; xfs_lsn_t tail_lsn; /* @@ -1150,12 +1178,12 @@ xfs_qm_dqflush_done( * holding the lock before removing the dquot from the AIL. */ if (test_bit(XFS_LI_IN_AIL, &lip->li_flags) && - ((lip->li_lsn == qip->qli_flush_lsn) || + ((lip->li_lsn == qlip->qli_flush_lsn) || test_bit(XFS_LI_FAILED, &lip->li_flags))) { spin_lock(&ailp->ail_lock); xfs_clear_li_failed(lip); - if (lip->li_lsn == qip->qli_flush_lsn) { + if (lip->li_lsn == qlip->qli_flush_lsn) { /* xfs_ail_update_finish() drops the AIL lock */ tail_lsn = xfs_ail_delete_one(ailp, lip); xfs_ail_update_finish(ailp, tail_lsn); @@ -1168,6 +1196,19 @@ xfs_qm_dqflush_done( * Release the dq's flush lock since we're done with it. */ xfs_dqfunlock(dqp); + + /* + * If this dquot hasn't been dirtied since initiating the last dqflush, + * release the buffer reference. + */ + spin_lock(&qlip->qli_lock); + if (!qlip->qli_dirty) { + bp = lip->li_buf; + lip->li_buf = NULL; + } + spin_unlock(&qlip->qli_lock); + if (bp) + xfs_buf_rele(bp); } void @@ -1190,7 +1231,7 @@ xfs_buf_dquot_io_fail( spin_lock(&bp->b_mount->m_ail->ail_lock); list_for_each_entry(lip, &bp->b_li_list, li_bio_list) - xfs_set_li_failed(lip, bp); + set_bit(XFS_LI_FAILED, &lip->li_flags); spin_unlock(&bp->b_mount->m_ail->ail_lock); } @@ -1233,6 +1274,115 @@ xfs_qm_dqflush_check( } /* + * Get the buffer containing the on-disk dquot. + * + * Requires dquot flush lock, will clear the dirty flag, delete the quota log + * item from the AIL, and shut down the system if something goes wrong. + */ +static int +xfs_dquot_read_buf( + struct xfs_trans *tp, + struct xfs_dquot *dqp, + struct xfs_buf **bpp) +{ + struct xfs_mount *mp = dqp->q_mount; + struct xfs_buf *bp = NULL; + int error; + + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno, + mp->m_quotainfo->qi_dqchunklen, 0, + &bp, &xfs_dquot_buf_ops); + if (xfs_metadata_is_sick(error)) + xfs_dquot_mark_sick(dqp); + if (error) + goto out_abort; + + *bpp = bp; + return 0; + +out_abort: + dqp->q_flags &= ~XFS_DQFLAG_DIRTY; + xfs_trans_ail_delete(&dqp->q_logitem.qli_item, 0); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + return error; +} + +/* + * Attach a dquot buffer to this dquot to avoid allocating a buffer during a + * dqflush, since dqflush can be called from reclaim context. Caller must hold + * the dqlock. + */ +int +xfs_dquot_attach_buf( + struct xfs_trans *tp, + struct xfs_dquot *dqp) +{ + struct xfs_dq_logitem *qlip = &dqp->q_logitem; + struct xfs_log_item *lip = &qlip->qli_item; + int error; + + spin_lock(&qlip->qli_lock); + if (!lip->li_buf) { + struct xfs_buf *bp = NULL; + + spin_unlock(&qlip->qli_lock); + error = xfs_dquot_read_buf(tp, dqp, &bp); + if (error) + return error; + + /* + * Hold the dquot buffer so that we retain our ref to it after + * detaching it from the transaction, then give that ref to the + * dquot log item so that the AIL does not have to read the + * dquot buffer to push this item. + */ + xfs_buf_hold(bp); + xfs_trans_brelse(tp, bp); + + spin_lock(&qlip->qli_lock); + lip->li_buf = bp; + } + qlip->qli_dirty = true; + spin_unlock(&qlip->qli_lock); + + return 0; +} + +/* + * Get a new reference the dquot buffer attached to this dquot for a dqflush + * operation. + * + * Returns 0 and a NULL bp if none was attached to the dquot; 0 and a locked + * bp; or -EAGAIN if the buffer could not be locked. + */ +int +xfs_dquot_use_attached_buf( + struct xfs_dquot *dqp, + struct xfs_buf **bpp) +{ + struct xfs_buf *bp = dqp->q_logitem.qli_item.li_buf; + + /* + * A NULL buffer can happen if the dquot dirty flag was set but the + * filesystem shut down before transaction commit happened. In that + * case we're not going to flush anyway. + */ + if (!bp) { + ASSERT(xfs_is_shutdown(dqp->q_mount)); + + *bpp = NULL; + return 0; + } + + if (!xfs_buf_trylock(bp)) + return -EAGAIN; + + xfs_buf_hold(bp); + *bpp = bp; + return 0; +} + +/* * Write a modified dquot to disk. * The dquot must be locked and the flush lock too taken by caller. * The flush lock will not be unlocked until the dquot reaches the disk, @@ -1243,11 +1393,11 @@ xfs_qm_dqflush_check( int xfs_qm_dqflush( struct xfs_dquot *dqp, - struct xfs_buf **bpp) + struct xfs_buf *bp) { struct xfs_mount *mp = dqp->q_mount; - struct xfs_log_item *lip = &dqp->q_logitem.qli_item; - struct xfs_buf *bp; + struct xfs_dq_logitem *qlip = &dqp->q_logitem; + struct xfs_log_item *lip = &qlip->qli_item; struct xfs_dqblk *dqblk; xfs_failaddr_t fa; int error; @@ -1257,28 +1407,12 @@ xfs_qm_dqflush( trace_xfs_dqflush(dqp); - *bpp = NULL; - xfs_qm_dqunpin_wait(dqp); - /* - * Get the buffer containing the on-disk dquot - */ - error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, - mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK, - &bp, &xfs_dquot_buf_ops); - if (error == -EAGAIN) - goto out_unlock; - if (xfs_metadata_is_sick(error)) - xfs_dquot_mark_sick(dqp); - if (error) - goto out_abort; - fa = xfs_qm_dqflush_check(dqp); if (fa) { xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS", dqp->q_id, fa); - xfs_buf_relse(bp); xfs_dquot_mark_sick(dqp); error = -EFSCORRUPTED; goto out_abort; @@ -1293,8 +1427,15 @@ xfs_qm_dqflush( */ dqp->q_flags &= ~XFS_DQFLAG_DIRTY; - xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn, - &dqp->q_logitem.qli_item.li_lsn); + /* + * We hold the dquot lock, so nobody can dirty it while we're + * scheduling the write out. Clear the dirty-since-flush flag. + */ + spin_lock(&qlip->qli_lock); + qlip->qli_dirty = false; + spin_unlock(&qlip->qli_lock); + + xfs_trans_ail_copy_lsn(mp->m_ail, &qlip->qli_flush_lsn, &lip->li_lsn); /* * copy the lsn into the on-disk dquot now while we have the in memory @@ -1306,7 +1447,7 @@ xfs_qm_dqflush( * of a dquot without an up-to-date CRC getting to disk. */ if (xfs_has_crc(mp)) { - dqblk->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn); + dqblk->dd_lsn = cpu_to_be64(lip->li_lsn); xfs_update_cksum((char *)dqblk, sizeof(struct xfs_dqblk), XFS_DQUOT_CRC_OFF); } @@ -1316,7 +1457,7 @@ xfs_qm_dqflush( * the AIL and release the flush lock once the dquot is synced to disk. */ bp->b_flags |= _XBF_DQUOTS; - list_add_tail(&dqp->q_logitem.qli_item.li_bio_list, &bp->b_li_list); + list_add_tail(&lip->li_bio_list, &bp->b_li_list); /* * If the buffer is pinned then push on the log so we won't @@ -1328,14 +1469,12 @@ xfs_qm_dqflush( } trace_xfs_dqflush_done(dqp); - *bpp = bp; return 0; out_abort: dqp->q_flags &= ~XFS_DQFLAG_DIRTY; xfs_trans_ail_delete(lip, 0); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); -out_unlock: xfs_dqfunlock(dqp); return error; } diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index 677bb2dc9ac9..bd7bfd9e402e 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h @@ -204,7 +204,7 @@ void xfs_dquot_to_disk(struct xfs_disk_dquot *ddqp, struct xfs_dquot *dqp); #define XFS_DQ_IS_DIRTY(dqp) ((dqp)->q_flags & XFS_DQFLAG_DIRTY) void xfs_qm_dqdestroy(struct xfs_dquot *dqp); -int xfs_qm_dqflush(struct xfs_dquot *dqp, struct xfs_buf **bpp); +int xfs_qm_dqflush(struct xfs_dquot *dqp, struct xfs_buf *bp); void xfs_qm_dqunpin_wait(struct xfs_dquot *dqp); void xfs_qm_adjust_dqtimers(struct xfs_dquot *d); void xfs_qm_adjust_dqlimits(struct xfs_dquot *d); @@ -227,6 +227,10 @@ void xfs_dqlockn(struct xfs_dqtrx *q); void xfs_dquot_set_prealloc_limits(struct xfs_dquot *); +int xfs_dquot_attach_buf(struct xfs_trans *tp, struct xfs_dquot *dqp); +int xfs_dquot_use_attached_buf(struct xfs_dquot *dqp, struct xfs_buf **bpp); +void xfs_dquot_detach_buf(struct xfs_dquot *dqp); + static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp) { xfs_dqlock(dqp); diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 7d19091215b0..271b195ebb93 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -123,8 +123,9 @@ xfs_qm_dquot_logitem_push( __releases(&lip->li_ailp->ail_lock) __acquires(&lip->li_ailp->ail_lock) { - struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; - struct xfs_buf *bp = lip->li_buf; + struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); + struct xfs_dquot *dqp = qlip->qli_dquot; + struct xfs_buf *bp; uint rval = XFS_ITEM_SUCCESS; int error; @@ -155,14 +156,25 @@ xfs_qm_dquot_logitem_push( spin_unlock(&lip->li_ailp->ail_lock); - error = xfs_qm_dqflush(dqp, &bp); + error = xfs_dquot_use_attached_buf(dqp, &bp); + if (error == -EAGAIN) { + xfs_dqfunlock(dqp); + rval = XFS_ITEM_LOCKED; + goto out_relock_ail; + } + + /* + * dqflush completes dqflock on error, and the delwri ioend does it on + * success. + */ + error = xfs_qm_dqflush(dqp, bp); if (!error) { if (!xfs_buf_delwri_queue(bp, buffer_list)) rval = XFS_ITEM_FLUSHING; - xfs_buf_relse(bp); - } else if (error == -EAGAIN) - rval = XFS_ITEM_LOCKED; + } + xfs_buf_relse(bp); +out_relock_ail: spin_lock(&lip->li_ailp->ail_lock); out_unlock: xfs_dqunlock(dqp); @@ -195,12 +207,10 @@ xfs_qm_dquot_logitem_committing( } #ifdef DEBUG_EXPENSIVE -static int -xfs_qm_dquot_logitem_precommit( - struct xfs_trans *tp, - struct xfs_log_item *lip) +static void +xfs_qm_dquot_logitem_precommit_check( + struct xfs_dquot *dqp) { - struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; struct xfs_mount *mp = dqp->q_mount; struct xfs_disk_dquot ddq = { }; xfs_failaddr_t fa; @@ -216,13 +226,24 @@ xfs_qm_dquot_logitem_precommit( xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); ASSERT(fa == NULL); } - - return 0; } #else -# define xfs_qm_dquot_logitem_precommit NULL +# define xfs_qm_dquot_logitem_precommit_check(...) ((void)0) #endif +static int +xfs_qm_dquot_logitem_precommit( + struct xfs_trans *tp, + struct xfs_log_item *lip) +{ + struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); + struct xfs_dquot *dqp = qlip->qli_dquot; + + xfs_qm_dquot_logitem_precommit_check(dqp); + + return xfs_dquot_attach_buf(tp, dqp); +} + static const struct xfs_item_ops xfs_dquot_item_ops = { .iop_size = xfs_qm_dquot_logitem_size, .iop_precommit = xfs_qm_dquot_logitem_precommit, @@ -247,5 +268,7 @@ xfs_qm_dquot_logitem_init( xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT, &xfs_dquot_item_ops); + spin_lock_init(&lp->qli_lock); lp->qli_dquot = dqp; + lp->qli_dirty = false; } diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h index 794710c24474..d66e52807d76 100644 --- a/fs/xfs/xfs_dquot_item.h +++ b/fs/xfs/xfs_dquot_item.h @@ -14,6 +14,13 @@ struct xfs_dq_logitem { struct xfs_log_item qli_item; /* common portion */ struct xfs_dquot *qli_dquot; /* dquot ptr */ xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ + + /* + * We use this spinlock to coordinate access to the li_buf pointer in + * the log item and the qli_dirty flag. + */ + spinlock_t qli_lock; + bool qli_dirty; /* dirtied since last flush? */ }; void xfs_qm_dquot_logitem_init(struct xfs_dquot *dqp); diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c index 75cb53f090d1..7c8195895a73 100644 --- a/fs/xfs/xfs_exchrange.c +++ b/fs/xfs/xfs_exchrange.c @@ -326,22 +326,6 @@ out_trans_cancel: * successfully but before locks are dropped. */ -/* Verify that we have security clearance to perform this operation. */ -static int -xfs_exchange_range_verify_area( - struct xfs_exchrange *fxr) -{ - int ret; - - ret = remap_verify_area(fxr->file1, fxr->file1_offset, fxr->length, - true); - if (ret) - return ret; - - return remap_verify_area(fxr->file2, fxr->file2_offset, fxr->length, - true); -} - /* * Performs necessary checks before doing a range exchange, having stabilized * mutable inode attributes via i_rwsem. @@ -352,11 +336,13 @@ xfs_exchange_range_checks( unsigned int alloc_unit) { struct inode *inode1 = file_inode(fxr->file1); + loff_t size1 = i_size_read(inode1); struct inode *inode2 = file_inode(fxr->file2); + loff_t size2 = i_size_read(inode2); uint64_t allocmask = alloc_unit - 1; int64_t test_len; uint64_t blen; - loff_t size1, size2, tmp; + loff_t tmp; int error; /* Don't touch certain kinds of inodes */ @@ -365,24 +351,25 @@ xfs_exchange_range_checks( if (IS_SWAPFILE(inode1) || IS_SWAPFILE(inode2)) return -ETXTBSY; - size1 = i_size_read(inode1); - size2 = i_size_read(inode2); - /* Ranges cannot start after EOF. */ if (fxr->file1_offset > size1 || fxr->file2_offset > size2) return -EINVAL; - /* - * If the caller said to exchange to EOF, we set the length of the - * request large enough to cover everything to the end of both files. - */ if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) { + /* + * If the caller said to exchange to EOF, we set the length of + * the request large enough to cover everything to the end of + * both files. + */ fxr->length = max_t(int64_t, size1 - fxr->file1_offset, size2 - fxr->file2_offset); - - error = xfs_exchange_range_verify_area(fxr); - if (error) - return error; + } else { + /* + * Otherwise we require both ranges to end within EOF. + */ + if (fxr->file1_offset + fxr->length > size1 || + fxr->file2_offset + fxr->length > size2) + return -EINVAL; } /* @@ -399,15 +386,6 @@ xfs_exchange_range_checks( return -EINVAL; /* - * We require both ranges to end within EOF, unless we're exchanging - * to EOF. - */ - if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) && - (fxr->file1_offset + fxr->length > size1 || - fxr->file2_offset + fxr->length > size2)) - return -EINVAL; - - /* * Make sure we don't hit any file size limits. If we hit any size * limits such that test_length was adjusted, we abort the whole * operation. @@ -744,6 +722,7 @@ xfs_exchange_range( { struct inode *inode1 = file_inode(fxr->file1); struct inode *inode2 = file_inode(fxr->file2); + loff_t check_len = fxr->length; int ret; BUILD_BUG_ON(XFS_EXCHANGE_RANGE_ALL_FLAGS & @@ -776,14 +755,18 @@ xfs_exchange_range( return -EBADF; /* - * If we're not exchanging to EOF, we can check the areas before - * stabilizing both files' i_size. + * If we're exchanging to EOF we can't calculate the length until taking + * the iolock. Pass a 0 length to remap_verify_area similar to the + * FICLONE and FICLONERANGE ioctls that support cloning to EOF as well. */ - if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)) { - ret = xfs_exchange_range_verify_area(fxr); - if (ret) - return ret; - } + if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) + check_len = 0; + ret = remap_verify_area(fxr->file1, fxr->file1_offset, check_len, true); + if (ret) + return ret; + ret = remap_verify_area(fxr->file2, fxr->file2_offset, check_len, true); + if (ret) + return ret; /* Update cmtime if the fd/inode don't forbid it. */ if (!(fxr->file1->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode1)) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 19dcb569a3e7..ed09b4a3084e 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1392,8 +1392,11 @@ xfs_inactive( goto out; /* Try to clean out the cow blocks if there are any. */ - if (xfs_inode_has_cow_data(ip)) - xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); + if (xfs_inode_has_cow_data(ip)) { + error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); + if (error) + goto out; + } if (VFS_I(ip)->i_nlink != 0) { /* diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 86da16f54be9..6335b122486f 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -942,10 +942,8 @@ xfs_dax_write_iomap_end( if (!xfs_is_cow_inode(ip)) return 0; - if (!written) { - xfs_reflink_cancel_cow_range(ip, pos, length, true); - return 0; - } + if (!written) + return xfs_reflink_cancel_cow_range(ip, pos, length, true); return xfs_reflink_end_cow(ip, pos, written); } diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c index fa50e5308292..0b0b0f31aca2 100644 --- a/fs/xfs/xfs_notify_failure.c +++ b/fs/xfs/xfs_notify_failure.c @@ -154,6 +154,79 @@ xfs_dax_notify_failure_thaw( } static int +xfs_dax_translate_range( + struct xfs_buftarg *btp, + u64 offset, + u64 len, + xfs_daddr_t *daddr, + uint64_t *bblen) +{ + u64 dev_start = btp->bt_dax_part_off; + u64 dev_len = bdev_nr_bytes(btp->bt_bdev); + u64 dev_end = dev_start + dev_len - 1; + + /* Notify failure on the whole device. */ + if (offset == 0 && len == U64_MAX) { + offset = dev_start; + len = dev_len; + } + + /* Ignore the range out of filesystem area */ + if (offset + len - 1 < dev_start) + return -ENXIO; + if (offset > dev_end) + return -ENXIO; + + /* Calculate the real range when it touches the boundary */ + if (offset > dev_start) + offset -= dev_start; + else { + len -= dev_start - offset; + offset = 0; + } + if (offset + len - 1 > dev_end) + len = dev_end - offset + 1; + + *daddr = BTOBB(offset); + *bblen = BTOBB(len); + return 0; +} + +static int +xfs_dax_notify_logdev_failure( + struct xfs_mount *mp, + u64 offset, + u64 len, + int mf_flags) +{ + xfs_daddr_t daddr; + uint64_t bblen; + int error; + + /* + * Return ENXIO instead of shutting down the filesystem if the failed + * region is beyond the end of the log. + */ + error = xfs_dax_translate_range(mp->m_logdev_targp, + offset, len, &daddr, &bblen); + if (error) + return error; + + /* + * In the pre-remove case the failure notification is attempting to + * trigger a force unmount. The expectation is that the device is + * still present, but its removal is in progress and can not be + * cancelled, proceed with accessing the log device. + */ + if (mf_flags & MF_MEM_PRE_REMOVE) + return 0; + + xfs_err(mp, "ondisk log corrupt, shutting down fs!"); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_ONDISK); + return -EFSCORRUPTED; +} + +static int xfs_dax_notify_ddev_failure( struct xfs_mount *mp, xfs_daddr_t daddr, @@ -263,8 +336,9 @@ xfs_dax_notify_failure( int mf_flags) { struct xfs_mount *mp = dax_holder(dax_dev); - u64 ddev_start; - u64 ddev_end; + xfs_daddr_t daddr; + uint64_t bblen; + int error; if (!(mp->m_super->s_flags & SB_BORN)) { xfs_warn(mp, "filesystem is not ready for notify_failure()!"); @@ -279,17 +353,7 @@ xfs_dax_notify_failure( if (mp->m_logdev_targp && mp->m_logdev_targp->bt_daxdev == dax_dev && mp->m_logdev_targp != mp->m_ddev_targp) { - /* - * In the pre-remove case the failure notification is attempting - * to trigger a force unmount. The expectation is that the - * device is still present, but its removal is in progress and - * can not be cancelled, proceed with accessing the log device. - */ - if (mf_flags & MF_MEM_PRE_REMOVE) - return 0; - xfs_err(mp, "ondisk log corrupt, shutting down fs!"); - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_ONDISK); - return -EFSCORRUPTED; + return xfs_dax_notify_logdev_failure(mp, offset, len, mf_flags); } if (!xfs_has_rmapbt(mp)) { @@ -297,33 +361,12 @@ xfs_dax_notify_failure( return -EOPNOTSUPP; } - ddev_start = mp->m_ddev_targp->bt_dax_part_off; - ddev_end = ddev_start + bdev_nr_bytes(mp->m_ddev_targp->bt_bdev) - 1; - - /* Notify failure on the whole device. */ - if (offset == 0 && len == U64_MAX) { - offset = ddev_start; - len = bdev_nr_bytes(mp->m_ddev_targp->bt_bdev); - } - - /* Ignore the range out of filesystem area */ - if (offset + len - 1 < ddev_start) - return -ENXIO; - if (offset > ddev_end) - return -ENXIO; - - /* Calculate the real range when it touches the boundary */ - if (offset > ddev_start) - offset -= ddev_start; - else { - len -= ddev_start - offset; - offset = 0; - } - if (offset + len - 1 > ddev_end) - len = ddev_end - offset + 1; + error = xfs_dax_translate_range(mp->m_ddev_targp, offset, len, &daddr, + &bblen); + if (error) + return error; - return xfs_dax_notify_ddev_failure(mp, BTOBB(offset), BTOBB(len), - mf_flags); + return xfs_dax_notify_ddev_failure(mp, daddr, bblen, mf_flags); } const struct dax_holder_operations xfs_dax_holder_operations = { diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 7e2307921deb..3212b5bf3fb3 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -146,17 +146,29 @@ xfs_qm_dqpurge( * We don't care about getting disk errors here. We need * to purge this dquot anyway, so we go ahead regardless. */ - error = xfs_qm_dqflush(dqp, &bp); + error = xfs_dquot_use_attached_buf(dqp, &bp); + if (error == -EAGAIN) { + xfs_dqfunlock(dqp); + dqp->q_flags &= ~XFS_DQFLAG_FREEING; + goto out_unlock; + } + if (!bp) + goto out_funlock; + + /* + * dqflush completes dqflock on error, and the bwrite ioend + * does it on success. + */ + error = xfs_qm_dqflush(dqp, bp); if (!error) { error = xfs_bwrite(bp); xfs_buf_relse(bp); - } else if (error == -EAGAIN) { - dqp->q_flags &= ~XFS_DQFLAG_FREEING; - goto out_unlock; } xfs_dqflock(dqp); } + xfs_dquot_detach_buf(dqp); +out_funlock: ASSERT(atomic_read(&dqp->q_pincount) == 0); ASSERT(xlog_is_shutdown(dqp->q_logitem.qli_item.li_log) || !test_bit(XFS_LI_IN_AIL, &dqp->q_logitem.qli_item.li_flags)); @@ -462,7 +474,17 @@ xfs_qm_dquot_isolate( /* we have to drop the LRU lock to flush the dquot */ spin_unlock(lru_lock); - error = xfs_qm_dqflush(dqp, &bp); + error = xfs_dquot_use_attached_buf(dqp, &bp); + if (!bp || error == -EAGAIN) { + xfs_dqfunlock(dqp); + goto out_unlock_dirty; + } + + /* + * dqflush completes dqflock on error, and the delwri ioend + * does it on success. + */ + error = xfs_qm_dqflush(dqp, bp); if (error) goto out_unlock_dirty; @@ -470,6 +492,8 @@ xfs_qm_dquot_isolate( xfs_buf_relse(bp); goto out_unlock_dirty; } + + xfs_dquot_detach_buf(dqp); xfs_dqfunlock(dqp); /* @@ -1108,6 +1132,10 @@ xfs_qm_quotacheck_dqadjust( return error; } + error = xfs_dquot_attach_buf(NULL, dqp); + if (error) + return error; + trace_xfs_dqadjust(dqp); /* @@ -1287,11 +1315,17 @@ xfs_qm_flush_one( goto out_unlock; } - error = xfs_qm_dqflush(dqp, &bp); + error = xfs_dquot_use_attached_buf(dqp, &bp); if (error) goto out_unlock; + if (!bp) { + error = -EFSCORRUPTED; + goto out_unlock; + } - xfs_buf_delwri_queue(bp, buffer_list); + error = xfs_qm_dqflush(dqp, bp); + if (!error) + xfs_buf_delwri_queue(bp, buffer_list); xfs_buf_relse(bp); out_unlock: xfs_dqunlock(dqp); diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c index a11436579877..dabb1d6d7e46 100644 --- a/fs/xfs/xfs_qm_bhv.c +++ b/fs/xfs/xfs_qm_bhv.c @@ -19,28 +19,41 @@ STATIC void xfs_fill_statvfs_from_dquot( struct kstatfs *statp, + struct xfs_inode *ip, struct xfs_dquot *dqp) { + struct xfs_dquot_res *blkres = &dqp->q_blk; uint64_t limit; - limit = dqp->q_blk.softlimit ? - dqp->q_blk.softlimit : - dqp->q_blk.hardlimit; - if (limit && statp->f_blocks > limit) { - statp->f_blocks = limit; - statp->f_bfree = statp->f_bavail = - (statp->f_blocks > dqp->q_blk.reserved) ? - (statp->f_blocks - dqp->q_blk.reserved) : 0; + if (XFS_IS_REALTIME_MOUNT(ip->i_mount) && + (ip->i_diflags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) + blkres = &dqp->q_rtb; + + limit = blkres->softlimit ? + blkres->softlimit : + blkres->hardlimit; + if (limit) { + uint64_t remaining = 0; + + if (limit > blkres->reserved) + remaining = limit - blkres->reserved; + + statp->f_blocks = min(statp->f_blocks, limit); + statp->f_bfree = min(statp->f_bfree, remaining); + statp->f_bavail = min(statp->f_bavail, remaining); } limit = dqp->q_ino.softlimit ? dqp->q_ino.softlimit : dqp->q_ino.hardlimit; - if (limit && statp->f_files > limit) { - statp->f_files = limit; - statp->f_ffree = - (statp->f_files > dqp->q_ino.reserved) ? - (statp->f_files - dqp->q_ino.reserved) : 0; + if (limit) { + uint64_t remaining = 0; + + if (limit > dqp->q_ino.reserved) + remaining = limit - dqp->q_ino.reserved; + + statp->f_files = min(statp->f_files, limit); + statp->f_ffree = min(statp->f_ffree, remaining); } } @@ -61,11 +74,33 @@ xfs_qm_statvfs( struct xfs_dquot *dqp; if (!xfs_qm_dqget(mp, ip->i_projid, XFS_DQTYPE_PROJ, false, &dqp)) { - xfs_fill_statvfs_from_dquot(statp, dqp); + xfs_fill_statvfs_from_dquot(statp, ip, dqp); xfs_qm_dqput(dqp); } } +STATIC int +xfs_qm_validate_state_change( + struct xfs_mount *mp, + uint uqd, + uint gqd, + uint pqd) +{ + int state; + + /* Is quota state changing? */ + state = ((uqd && !XFS_IS_UQUOTA_ON(mp)) || + (!uqd && XFS_IS_UQUOTA_ON(mp)) || + (gqd && !XFS_IS_GQUOTA_ON(mp)) || + (!gqd && XFS_IS_GQUOTA_ON(mp)) || + (pqd && !XFS_IS_PQUOTA_ON(mp)) || + (!pqd && XFS_IS_PQUOTA_ON(mp))); + + return state && + (xfs_dev_is_read_only(mp, "changing quota state") || + xfs_has_norecovery(mp)); +} + int xfs_qm_newmount( xfs_mount_t *mp, @@ -85,24 +120,21 @@ xfs_qm_newmount( } /* - * If the device itself is read-only, we can't allow - * the user to change the state of quota on the mount - - * this would generate a transaction on the ro device, - * which would lead to an I/O error and shutdown + * If the device itself is read-only and/or in norecovery + * mode, we can't allow the user to change the state of + * quota on the mount - this would generate a transaction + * on the ro device, which would lead to an I/O error and + * shutdown. */ - if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) || - (!uquotaondisk && XFS_IS_UQUOTA_ON(mp)) || - (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) || - (!gquotaondisk && XFS_IS_GQUOTA_ON(mp)) || - (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) || - (!pquotaondisk && XFS_IS_PQUOTA_ON(mp))) && - xfs_dev_is_read_only(mp, "changing quota state")) { + if (xfs_qm_validate_state_change(mp, uquotaondisk, + gquotaondisk, pquotaondisk)) { + xfs_warn(mp, "please mount with%s%s%s%s.", - (!quotaondisk ? "out quota" : ""), - (uquotaondisk ? " usrquota" : ""), - (gquotaondisk ? " grpquota" : ""), - (pquotaondisk ? " prjquota" : "")); + (!quotaondisk ? "out quota" : ""), + (uquotaondisk ? " usrquota" : ""), + (gquotaondisk ? " grpquota" : ""), + (pquotaondisk ? " prjquota" : "")); return -EPERM; } diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index 23d71a55bbc0..032f3a70f21d 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -96,7 +96,8 @@ extern void xfs_trans_free_dqinfo(struct xfs_trans *); extern void xfs_trans_mod_dquot_byino(struct xfs_trans *, struct xfs_inode *, uint, int64_t); extern void xfs_trans_apply_dquot_deltas(struct xfs_trans *); -extern void xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *); +void xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *tp, + bool already_locked); int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp, struct xfs_inode *ip, int64_t dblocks, int64_t rblocks, bool force); extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *, @@ -166,7 +167,7 @@ static inline void xfs_trans_mod_dquot_byino(struct xfs_trans *tp, { } #define xfs_trans_apply_dquot_deltas(tp) -#define xfs_trans_unreserve_and_mod_dquots(tp) +#define xfs_trans_unreserve_and_mod_dquots(tp, a) static inline int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp, struct xfs_inode *ip, int64_t dblocks, int64_t rblocks, bool force) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index fbb3a1594c0d..201a86b3574d 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -873,12 +873,6 @@ xfs_fs_statfs( ffree = statp->f_files - (icount - ifree); statp->f_ffree = max_t(int64_t, ffree, 0); - - if ((ip->i_diflags & XFS_DIFLAG_PROJINHERIT) && - ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == - (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD)) - xfs_qm_statvfs(ip, statp); - if (XFS_IS_REALTIME_MOUNT(mp) && (ip->i_diflags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) { s64 freertx; @@ -888,6 +882,11 @@ xfs_fs_statfs( statp->f_bavail = statp->f_bfree = xfs_rtx_to_rtb(mp, freertx); } + if ((ip->i_diflags & XFS_DIFLAG_PROJINHERIT) && + ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == + (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD)) + xfs_qm_statvfs(ip, statp); + return 0; } @@ -1620,8 +1619,12 @@ xfs_fs_fill_super( #endif } - /* Filesystem claims it needs repair, so refuse the mount. */ - if (xfs_has_needsrepair(mp)) { + /* + * Filesystem claims it needs repair, so refuse the mount unless + * norecovery is also specified, in which case the filesystem can + * be mounted with no risk of further damage. + */ + if (xfs_has_needsrepair(mp) && !xfs_has_norecovery(mp)) { xfs_warn(mp, "Filesystem needs repair. Please run xfs_repair."); error = -EFSCORRUPTED; goto out_free_sb; diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 30e03342287a..39cd11cbe21f 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -835,16 +835,12 @@ __xfs_trans_commit( trace_xfs_trans_commit(tp, _RET_IP_); /* - * Finish deferred items on final commit. Only permanent transactions - * should ever have deferred ops. + * Commit per-transaction changes that are not already tracked through + * log items. This can add dirty log items to the transaction. */ - WARN_ON_ONCE(!list_empty(&tp->t_dfops) && - !(tp->t_flags & XFS_TRANS_PERM_LOG_RES)); - if (!regrant && (tp->t_flags & XFS_TRANS_PERM_LOG_RES)) { - error = xfs_defer_finish_noroll(&tp); - if (error) - goto out_unreserve; - } + if (tp->t_flags & XFS_TRANS_SB_DIRTY) + xfs_trans_apply_sb_deltas(tp); + xfs_trans_apply_dquot_deltas(tp); error = xfs_trans_run_precommits(tp); if (error) @@ -873,13 +869,6 @@ __xfs_trans_commit( ASSERT(tp->t_ticket != NULL); - /* - * If we need to update the superblock, then do it now. - */ - if (tp->t_flags & XFS_TRANS_SB_DIRTY) - xfs_trans_apply_sb_deltas(tp); - xfs_trans_apply_dquot_deltas(tp); - xlog_cil_commit(log, tp, &commit_seq, regrant); xfs_trans_free(tp); @@ -905,7 +894,7 @@ out_unreserve: * the dqinfo portion to be. All that means is that we have some * (non-persistent) quota reservations that need to be unreserved. */ - xfs_trans_unreserve_and_mod_dquots(tp); + xfs_trans_unreserve_and_mod_dquots(tp, true); if (tp->t_ticket) { if (regrant && !xlog_is_shutdown(log)) xfs_log_ticket_regrant(log, tp->t_ticket); @@ -924,6 +913,20 @@ int xfs_trans_commit( struct xfs_trans *tp) { + /* + * Finish deferred items on final commit. Only permanent transactions + * should ever have deferred ops. + */ + WARN_ON_ONCE(!list_empty(&tp->t_dfops) && + !(tp->t_flags & XFS_TRANS_PERM_LOG_RES)); + if (tp->t_flags & XFS_TRANS_PERM_LOG_RES) { + int error = xfs_defer_finish_noroll(&tp); + if (error) { + xfs_trans_cancel(tp); + return error; + } + } + return __xfs_trans_commit(tp, false); } @@ -985,7 +988,7 @@ xfs_trans_cancel( } #endif xfs_trans_unreserve_and_mod_sb(tp); - xfs_trans_unreserve_and_mod_dquots(tp); + xfs_trans_unreserve_and_mod_dquots(tp, false); if (tp->t_ticket) { xfs_log_ticket_ungrant(log, tp->t_ticket); diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 8ede9d099d1f..f56d62dced97 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -360,7 +360,7 @@ xfsaild_resubmit_item( /* protected by ail_lock */ list_for_each_entry(lip, &bp->b_li_list, li_bio_list) { - if (bp->b_flags & _XBF_INODES) + if (bp->b_flags & (_XBF_INODES | _XBF_DQUOTS)) clear_bit(XFS_LI_FAILED, &lip->li_flags); else xfs_clear_li_failed(lip); diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index b368e13424c4..b92eeaa1a2a9 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -602,6 +602,24 @@ xfs_trans_apply_dquot_deltas( ASSERT(dqp->q_blk.reserved >= dqp->q_blk.count); ASSERT(dqp->q_ino.reserved >= dqp->q_ino.count); ASSERT(dqp->q_rtb.reserved >= dqp->q_rtb.count); + + /* + * We've applied the count changes and given back + * whatever reservation we didn't use. Zero out the + * dqtrx fields. + */ + qtrx->qt_blk_res = 0; + qtrx->qt_bcount_delta = 0; + qtrx->qt_delbcnt_delta = 0; + + qtrx->qt_rtblk_res = 0; + qtrx->qt_rtblk_res_used = 0; + qtrx->qt_rtbcount_delta = 0; + qtrx->qt_delrtb_delta = 0; + + qtrx->qt_ino_res = 0; + qtrx->qt_ino_res_used = 0; + qtrx->qt_icount_delta = 0; } } } @@ -638,7 +656,8 @@ xfs_trans_unreserve_and_mod_dquots_hook( */ void xfs_trans_unreserve_and_mod_dquots( - struct xfs_trans *tp) + struct xfs_trans *tp, + bool already_locked) { int i, j; struct xfs_dquot *dqp; @@ -667,10 +686,12 @@ xfs_trans_unreserve_and_mod_dquots( * about the number of blocks used field, or deltas. * Also we don't bother to zero the fields. */ - locked = false; + locked = already_locked; if (qtrx->qt_blk_res) { - xfs_dqlock(dqp); - locked = true; + if (!locked) { + xfs_dqlock(dqp); + locked = true; + } dqp->q_blk.reserved -= (xfs_qcnt_t)qtrx->qt_blk_res; } @@ -691,7 +712,7 @@ xfs_trans_unreserve_and_mod_dquots( dqp->q_rtb.reserved -= (xfs_qcnt_t)qtrx->qt_rtblk_res; } - if (locked) + if (locked && !already_locked) xfs_dqunlock(dqp); } |