From 4aaf15d1aa9673dd2cc45c48957c946cb4aa2694 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 8 Mar 2010 11:24:07 +1100 Subject: xfs: Add inode pin counts to traces We don't record pin counts in inode events right now, and this makes it difficult to track down problems related to pinning inodes. Add the pin count to the inode trace class and add trace events for pinning and unpinning inodes. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_trace.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs/xfs/linux-2.6/xfs_trace.h') diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index fcaa62f0799e..65371859c753 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -562,18 +562,21 @@ DECLARE_EVENT_CLASS(xfs_inode_class, __field(dev_t, dev) __field(xfs_ino_t, ino) __field(int, count) + __field(int, pincount) __field(unsigned long, caller_ip) ), TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->ino = ip->i_ino; __entry->count = atomic_read(&VFS_I(ip)->i_count); + __entry->pincount = atomic_read(&ip->i_pincount); __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d ino 0x%llx count %d caller %pf", + TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->count, + __entry->pincount, (char *)__entry->caller_ip) ) @@ -583,6 +586,10 @@ DEFINE_EVENT(xfs_inode_class, name, \ TP_ARGS(ip, caller_ip)) DEFINE_INODE_EVENT(xfs_ihold); DEFINE_INODE_EVENT(xfs_irele); +DEFINE_INODE_EVENT(xfs_inode_pin); +DEFINE_INODE_EVENT(xfs_inode_unpin); +DEFINE_INODE_EVENT(xfs_inode_unpin_nowait); + /* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */ DEFINE_INODE_EVENT(xfs_inode); #define xfs_itrace_entry(ip) \ -- cgit v1.2.3 From 9abbc539bf7f299819ad0a235064a1b643ab6407 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 13 Apr 2010 15:06:46 +1000 Subject: xfs: add log item recovery tracing Currently there is no tracing in log recovery, so it is difficult to determine what is going on when something goes wrong. Add tracing for log item recovery to provide visibility into the log recovery process. The tracing added shows regions being extracted from the log transactions and added to the transaction hash forming recovery items, followed by the reordering, cancelling and finally recovery of the items. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_trace.c | 3 + fs/xfs/linux-2.6/xfs_trace.h | 138 +++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_buf_item.h | 2 +- fs/xfs/xfs_log_recover.c | 44 +++++++++++--- fs/xfs/xfs_trans.h | 9 +++ 5 files changed, 187 insertions(+), 9 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_trace.h') diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c index 5a107601e969..2a460581308f 100644 --- a/fs/xfs/linux-2.6/xfs_trace.c +++ b/fs/xfs/linux-2.6/xfs_trace.c @@ -50,6 +50,9 @@ #include "xfs_aops.h" #include "quota/xfs_dquot_item.h" #include "quota/xfs_dquot.h" +#include "xfs_log_recover.h" +#include "xfs_buf_item.h" +#include "xfs_inode_item.h" /* * We include this last to have the helpers above available for the trace diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index 65371859c753..33f7d2b7afeb 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -32,6 +32,10 @@ struct xfs_da_node_entry; struct xfs_dquot; struct xlog_ticket; struct log; +struct xlog_recover; +struct xlog_recover_item; +struct xfs_buf_log_format; +struct xfs_inode_log_format; DECLARE_EVENT_CLASS(xfs_attr_list_class, TP_PROTO(struct xfs_attr_list_context *ctx), @@ -1502,6 +1506,140 @@ DEFINE_EVENT(xfs_swap_extent_class, name, \ DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before); DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after); +DECLARE_EVENT_CLASS(xfs_log_recover_item_class, + TP_PROTO(struct log *log, struct xlog_recover *trans, + struct xlog_recover_item *item, int pass), + TP_ARGS(log, trans, item, pass), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned long, item) + __field(xlog_tid_t, tid) + __field(int, type) + __field(int, pass) + __field(int, count) + __field(int, total) + ), + TP_fast_assign( + __entry->dev = log->l_mp->m_super->s_dev; + __entry->item = (unsigned long)item; + __entry->tid = trans->r_log_tid; + __entry->type = ITEM_TYPE(item); + __entry->pass = pass; + __entry->count = item->ri_cnt; + __entry->total = item->ri_total; + ), + TP_printk("dev %d:%d trans 0x%x, pass %d, item 0x%p, item type %s " + "item region count/total %d/%d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->tid, + __entry->pass, + (void *)__entry->item, + __print_symbolic(__entry->type, XFS_LI_TYPE_DESC), + __entry->count, + __entry->total) +) + +#define DEFINE_LOG_RECOVER_ITEM(name) \ +DEFINE_EVENT(xfs_log_recover_item_class, name, \ + TP_PROTO(struct log *log, struct xlog_recover *trans, \ + struct xlog_recover_item *item, int pass), \ + TP_ARGS(log, trans, item, pass)) + +DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add); +DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add_cont); +DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_head); +DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail); +DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover); + +DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class, + TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), + TP_ARGS(log, buf_f), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(__int64_t, blkno) + __field(unsigned short, len) + __field(unsigned short, flags) + __field(unsigned short, size) + __field(unsigned int, map_size) + ), + TP_fast_assign( + __entry->dev = log->l_mp->m_super->s_dev; + __entry->blkno = buf_f->blf_blkno; + __entry->len = buf_f->blf_len; + __entry->flags = buf_f->blf_flags; + __entry->size = buf_f->blf_size; + __entry->map_size = buf_f->blf_map_size; + ), + TP_printk("dev %d:%d blkno 0x%llx, len %u, flags 0x%x, size %d, " + "map_size %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->blkno, + __entry->len, + __entry->flags, + __entry->size, + __entry->map_size) +) + +#define DEFINE_LOG_RECOVER_BUF_ITEM(name) \ +DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \ + TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), \ + TP_ARGS(log, buf_f)) + +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_add); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_ref_inc); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_recover); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_inode_buf); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf); + +DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class, + TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), + TP_ARGS(log, in_f), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(unsigned short, size) + __field(int, fields) + __field(unsigned short, asize) + __field(unsigned short, dsize) + __field(__int64_t, blkno) + __field(int, len) + __field(int, boffset) + ), + TP_fast_assign( + __entry->dev = log->l_mp->m_super->s_dev; + __entry->ino = in_f->ilf_ino; + __entry->size = in_f->ilf_size; + __entry->fields = in_f->ilf_fields; + __entry->asize = in_f->ilf_asize; + __entry->dsize = in_f->ilf_dsize; + __entry->blkno = in_f->ilf_blkno; + __entry->len = in_f->ilf_len; + __entry->boffset = in_f->ilf_boffset; + ), + TP_printk("dev %d:%d ino 0x%llx, size %u, fields 0x%x, asize %d, " + "dsize %d, blkno 0x%llx, len %d, boffset %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->size, + __entry->fields, + __entry->asize, + __entry->dsize, + __entry->blkno, + __entry->len, + __entry->boffset) +) +#define DEFINE_LOG_RECOVER_INO_ITEM(name) \ +DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \ + TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), \ + TP_ARGS(log, in_f)) + +DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover); +DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel); +DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 217f34af00cb..df4454511f73 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -26,7 +26,7 @@ extern kmem_zone_t *xfs_buf_item_zone; * have been logged. * For 6.2 and beyond, this is XFS_LI_BUF. We use this to log everything. */ -typedef struct xfs_buf_log_format_t { +typedef struct xfs_buf_log_format { unsigned short blf_type; /* buf log item type indicator */ unsigned short blf_size; /* size of this item */ ushort blf_flags; /* misc state */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 22e6efdc17ea..f21eb8ad2d97 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1408,6 +1408,7 @@ xlog_recover_add_item( STATIC int xlog_recover_add_to_cont_trans( + struct log *log, xlog_recover_t *trans, xfs_caddr_t dp, int len) @@ -1434,6 +1435,7 @@ xlog_recover_add_to_cont_trans( memcpy(&ptr[old_len], dp, len); /* d, s, l */ item->ri_buf[item->ri_cnt-1].i_len += len; item->ri_buf[item->ri_cnt-1].i_addr = ptr; + trace_xfs_log_recover_item_add_cont(log, trans, item, 0); return 0; } @@ -1452,6 +1454,7 @@ xlog_recover_add_to_cont_trans( */ STATIC int xlog_recover_add_to_trans( + struct log *log, xlog_recover_t *trans, xfs_caddr_t dp, int len) @@ -1510,6 +1513,7 @@ xlog_recover_add_to_trans( item->ri_buf[item->ri_cnt].i_addr = ptr; item->ri_buf[item->ri_cnt].i_len = len; item->ri_cnt++; + trace_xfs_log_recover_item_add(log, trans, item, 0); return 0; } @@ -1521,7 +1525,9 @@ xlog_recover_add_to_trans( */ STATIC int xlog_recover_reorder_trans( - xlog_recover_t *trans) + struct log *log, + xlog_recover_t *trans, + int pass) { xlog_recover_item_t *item, *n; LIST_HEAD(sort_list); @@ -1535,6 +1541,8 @@ xlog_recover_reorder_trans( switch (ITEM_TYPE(item)) { case XFS_LI_BUF: if (!(buf_f->blf_flags & XFS_BLI_CANCEL)) { + trace_xfs_log_recover_item_reorder_head(log, + trans, item, pass); list_move(&item->ri_list, &trans->r_itemq); break; } @@ -1543,6 +1551,8 @@ xlog_recover_reorder_trans( case XFS_LI_QUOTAOFF: case XFS_LI_EFD: case XFS_LI_EFI: + trace_xfs_log_recover_item_reorder_tail(log, + trans, item, pass); list_move_tail(&item->ri_list, &trans->r_itemq); break; default: @@ -1592,8 +1602,10 @@ xlog_recover_do_buffer_pass1( /* * If this isn't a cancel buffer item, then just return. */ - if (!(flags & XFS_BLI_CANCEL)) + if (!(flags & XFS_BLI_CANCEL)) { + trace_xfs_log_recover_buf_not_cancel(log, buf_f); return; + } /* * Insert an xfs_buf_cancel record into the hash table of @@ -1627,6 +1639,7 @@ xlog_recover_do_buffer_pass1( while (nextp != NULL) { if (nextp->bc_blkno == blkno && nextp->bc_len == len) { nextp->bc_refcount++; + trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f); return; } prevp = nextp; @@ -1640,6 +1653,7 @@ xlog_recover_do_buffer_pass1( bcp->bc_refcount = 1; bcp->bc_next = NULL; prevp->bc_next = bcp; + trace_xfs_log_recover_buf_cancel_add(log, buf_f); } /* @@ -1779,6 +1793,8 @@ xlog_recover_do_inode_buffer( unsigned int *data_map = NULL; unsigned int map_size = 0; + trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); + switch (buf_f->blf_type) { case XFS_LI_BUF: data_map = buf_f->blf_data_map; @@ -1874,6 +1890,7 @@ xlog_recover_do_inode_buffer( /*ARGSUSED*/ STATIC void xlog_recover_do_reg_buffer( + struct xfs_mount *mp, xlog_recover_item_t *item, xfs_buf_t *bp, xfs_buf_log_format_t *buf_f) @@ -1885,6 +1902,8 @@ xlog_recover_do_reg_buffer( unsigned int map_size = 0; int error; + trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); + switch (buf_f->blf_type) { case XFS_LI_BUF: data_map = buf_f->blf_data_map; @@ -2083,6 +2102,8 @@ xlog_recover_do_dquot_buffer( { uint type; + trace_xfs_log_recover_buf_dquot_buf(log, buf_f); + /* * Filesystems are required to send in quota flags at mount time. */ @@ -2103,7 +2124,7 @@ xlog_recover_do_dquot_buffer( if (log->l_quotaoffs_flag & type) return; - xlog_recover_do_reg_buffer(item, bp, buf_f); + xlog_recover_do_reg_buffer(mp, item, bp, buf_f); } /* @@ -2164,9 +2185,11 @@ xlog_recover_do_buffer_trans( */ cancel = xlog_recover_do_buffer_pass2(log, buf_f); if (cancel) { + trace_xfs_log_recover_buf_cancel(log, buf_f); return 0; } } + trace_xfs_log_recover_buf_recover(log, buf_f); switch (buf_f->blf_type) { case XFS_LI_BUF: blkno = buf_f->blf_blkno; @@ -2204,7 +2227,7 @@ xlog_recover_do_buffer_trans( (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); } else { - xlog_recover_do_reg_buffer(item, bp, buf_f); + xlog_recover_do_reg_buffer(mp, item, bp, buf_f); } if (error) return XFS_ERROR(error); @@ -2284,8 +2307,10 @@ xlog_recover_do_inode_trans( if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno, in_f->ilf_len, 0)) { error = 0; + trace_xfs_log_recover_inode_cancel(log, in_f); goto error; } + trace_xfs_log_recover_inode_recover(log, in_f); bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, XBF_LOCK); @@ -2337,6 +2362,7 @@ xlog_recover_do_inode_trans( /* do nothing */ } else { xfs_buf_relse(bp); + trace_xfs_log_recover_inode_skip(log, in_f); error = 0; goto error; } @@ -2758,11 +2784,12 @@ xlog_recover_do_trans( int error = 0; xlog_recover_item_t *item; - error = xlog_recover_reorder_trans(trans); + error = xlog_recover_reorder_trans(log, trans, pass); if (error) return error; list_for_each_entry(item, &trans->r_itemq, ri_list) { + trace_xfs_log_recover_item_recover(log, trans, item, pass); switch (ITEM_TYPE(item)) { case XFS_LI_BUF: error = xlog_recover_do_buffer_trans(log, item, pass); @@ -2919,8 +2946,9 @@ xlog_recover_process_data( error = xlog_recover_unmount_trans(trans); break; case XLOG_WAS_CONT_TRANS: - error = xlog_recover_add_to_cont_trans(trans, - dp, be32_to_cpu(ohead->oh_len)); + error = xlog_recover_add_to_cont_trans(log, + trans, dp, + be32_to_cpu(ohead->oh_len)); break; case XLOG_START_TRANS: xlog_warn( @@ -2930,7 +2958,7 @@ xlog_recover_process_data( break; case 0: case XLOG_CONTINUE_TRANS: - error = xlog_recover_add_to_trans(trans, + error = xlog_recover_add_to_trans(log, trans, dp, be32_to_cpu(ohead->oh_len)); break; default: diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 82574ef36580..c62beee0921e 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -49,6 +49,15 @@ typedef struct xfs_trans_header { #define XFS_LI_DQUOT 0x123d #define XFS_LI_QUOTAOFF 0x123e +#define XFS_LI_TYPE_DESC \ + { XFS_LI_EFI, "XFS_LI_EFI" }, \ + { XFS_LI_EFD, "XFS_LI_EFD" }, \ + { XFS_LI_IUNLINK, "XFS_LI_IUNLINK" }, \ + { XFS_LI_INODE, "XFS_LI_INODE" }, \ + { XFS_LI_BUF, "XFS_LI_BUF" }, \ + { XFS_LI_DQUOT, "XFS_LI_DQUOT" }, \ + { XFS_LI_QUOTAOFF, "XFS_LI_QUOTAOFF" } + /* * Transaction types. Used to distinguish types of buffers. */ -- cgit v1.2.3 From 368e136174344c417bad6ff0380b7b3f574bf120 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 13 Apr 2010 15:06:50 +1000 Subject: xfs: remove duplicate code from dquot reclaim The dquot shaker and the free-list reclaim code use exactly the same algorithm but the code is duplicated and slightly different in each case. Make the shaker code use the single dquot reclaim code to remove the code duplication. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_trace.h | 2 - fs/xfs/quota/xfs_qm.c | 264 ++++++++++++------------------------------- 2 files changed, 73 insertions(+), 193 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_trace.h') diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index 33f7d2b7afeb..ce6a968a8f40 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -653,8 +653,6 @@ DEFINE_EVENT(xfs_dquot_class, name, \ TP_PROTO(struct xfs_dquot *dqp), \ TP_ARGS(dqp)) DEFINE_DQUOT_EVENT(xfs_dqadjust); -DEFINE_DQUOT_EVENT(xfs_dqshake_dirty); -DEFINE_DQUOT_EVENT(xfs_dqshake_unlink); DEFINE_DQUOT_EVENT(xfs_dqreclaim_want); DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty); DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink); diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 855827320ff6..5ca65c834bbd 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -1926,53 +1926,46 @@ xfs_qm_init_quotainos( } + /* - * Traverse the freelist of dquots and attempt to reclaim a maximum of - * 'howmany' dquots. This operation races with dqlookup(), and attempts to - * favor the lookup function ... - * XXXsup merge this with qm_reclaim_one(). + * Just pop the least recently used dquot off the freelist and + * recycle it. The returned dquot is locked. */ -STATIC int -xfs_qm_shake_freelist( - int howmany) +STATIC xfs_dquot_t * +xfs_qm_dqreclaim_one(void) { - int nreclaimed; - xfs_dqhash_t *hash; - xfs_dquot_t *dqp, *nextdqp; + xfs_dquot_t *dqpout; + xfs_dquot_t *dqp; int restarts; - int nflushes; - if (howmany <= 0) - return 0; - - nreclaimed = 0; restarts = 0; - nflushes = 0; + dqpout = NULL; -#ifdef QUOTADEBUG - cmn_err(CE_DEBUG, "Shake free 0x%x", howmany); -#endif - /* lock order is : hashchainlock, freelistlock, mplistlock */ - tryagain: + /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */ +startagain: xfs_qm_freelist_lock(xfs_Gqm); - for (dqp = xfs_Gqm->qm_dqfreelist.qh_next; - ((dqp != (xfs_dquot_t *) &xfs_Gqm->qm_dqfreelist) && - nreclaimed < howmany); ) { + FOREACH_DQUOT_IN_FREELIST(dqp, &(xfs_Gqm->qm_dqfreelist)) { struct xfs_mount *mp = dqp->q_mount; xfs_dqlock(dqp); /* * We are racing with dqlookup here. Naturally we don't - * want to reclaim a dquot that lookup wants. + * want to reclaim a dquot that lookup wants. We release the + * freelist lock and start over, so that lookup will grab + * both the dquot and the freelistlock. */ if (dqp->dq_flags & XFS_DQ_WANT) { + ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE)); + + trace_xfs_dqreclaim_want(dqp); + xfs_dqunlock(dqp); xfs_qm_freelist_unlock(xfs_Gqm); if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) - return nreclaimed; + return NULL; XQM_STATS_INC(xqmstats.xs_qm_dqwants); - goto tryagain; + goto startagain; } /* @@ -1985,19 +1978,22 @@ xfs_qm_shake_freelist( ASSERT(! XFS_DQ_IS_DIRTY(dqp)); ASSERT(dqp->HL_PREVP == NULL); ASSERT(list_empty(&dqp->q_mplist)); + XQM_FREELIST_REMOVE(dqp); + xfs_dqunlock(dqp); + dqpout = dqp; XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims); - nextdqp = dqp->dq_flnext; - goto off_freelist; + break; } + ASSERT(dqp->q_hash); ASSERT(!list_empty(&dqp->q_mplist)); + /* * Try to grab the flush lock. If this dquot is in the process of * getting flushed to disk, we don't want to reclaim it. */ if (!xfs_dqflock_nowait(dqp)) { xfs_dqunlock(dqp); - dqp = dqp->dq_flnext; continue; } @@ -2010,21 +2006,21 @@ xfs_qm_shake_freelist( if (XFS_DQ_IS_DIRTY(dqp)) { int error; - trace_xfs_dqshake_dirty(dqp); + trace_xfs_dqreclaim_dirty(dqp); /* * We flush it delayed write, so don't bother - * releasing the mplock. + * releasing the freelist lock. */ error = xfs_qm_dqflush(dqp, 0); if (error) { xfs_fs_cmn_err(CE_WARN, mp, - "xfs_qm_dqflush_all: dquot %p flush failed", dqp); + "xfs_qm_dqreclaim: dquot %p flush failed", dqp); } xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ - dqp = dqp->dq_flnext; continue; } + /* * We're trying to get the hashlock out of order. This races * with dqlookup; so, we giveup and goto the next dquot if @@ -2033,57 +2029,71 @@ xfs_qm_shake_freelist( * waiting for the freelist lock. */ if (!mutex_trylock(&dqp->q_hash->qh_lock)) { - xfs_dqfunlock(dqp); - xfs_dqunlock(dqp); - dqp = dqp->dq_flnext; - continue; + restarts++; + goto dqfunlock; } + /* * This races with dquot allocation code as well as dqflush_all * and reclaim code. So, if we failed to grab the mplist lock, * giveup everything and start over. */ - hash = dqp->q_hash; - ASSERT(hash); if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) { - /* XXX put a sentinel so that we can come back here */ + restarts++; + mutex_unlock(&dqp->q_hash->qh_lock); xfs_dqfunlock(dqp); xfs_dqunlock(dqp); - mutex_unlock(&hash->qh_lock); xfs_qm_freelist_unlock(xfs_Gqm); - if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) - return nreclaimed; - goto tryagain; + if (restarts++ >= XFS_QM_RECLAIM_MAX_RESTARTS) + return NULL; + goto startagain; } - trace_xfs_dqshake_unlink(dqp); - -#ifdef QUOTADEBUG - cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n", - dqp, be32_to_cpu(dqp->q_core.d_id)); -#endif ASSERT(dqp->q_nrefs == 0); - nextdqp = dqp->dq_flnext; - XQM_HASHLIST_REMOVE(hash, dqp); list_del_init(&dqp->q_mplist); mp->m_quotainfo->qi_dquots--; mp->m_quotainfo->qi_dqreclaims++; - xfs_dqfunlock(dqp); - mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); - mutex_unlock(&hash->qh_lock); - - off_freelist: + XQM_HASHLIST_REMOVE(dqp->q_hash, dqp); XQM_FREELIST_REMOVE(dqp); + dqpout = dqp; + mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); + mutex_unlock(&dqp->q_hash->qh_lock); +dqfunlock: + xfs_dqfunlock(dqp); xfs_dqunlock(dqp); - nreclaimed++; - XQM_STATS_INC(xqmstats.xs_qm_dqshake_reclaims); - xfs_qm_dqdestroy(dqp); - dqp = nextdqp; + if (dqpout) + break; + if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) + return NULL; } xfs_qm_freelist_unlock(xfs_Gqm); - return nreclaimed; + return dqpout; } +/* + * Traverse the freelist of dquots and attempt to reclaim a maximum of + * 'howmany' dquots. This operation races with dqlookup(), and attempts to + * favor the lookup function ... + */ +STATIC int +xfs_qm_shake_freelist( + int howmany) +{ + int nreclaimed = 0; + xfs_dquot_t *dqp; + + if (howmany <= 0) + return 0; + + while (nreclaimed < howmany) { + dqp = xfs_qm_dqreclaim_one(); + if (!dqp) + return nreclaimed; + xfs_qm_dqdestroy(dqp); + nreclaimed++; + } + return nreclaimed; +} /* * The kmem_shake interface is invoked when memory is running low. @@ -2115,134 +2125,6 @@ xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask) } -/* - * Just pop the least recently used dquot off the freelist and - * recycle it. The returned dquot is locked. - */ -STATIC xfs_dquot_t * -xfs_qm_dqreclaim_one(void) -{ - xfs_dquot_t *dqpout; - xfs_dquot_t *dqp; - int restarts; - int nflushes; - - restarts = 0; - dqpout = NULL; - nflushes = 0; - - /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */ - startagain: - xfs_qm_freelist_lock(xfs_Gqm); - - FOREACH_DQUOT_IN_FREELIST(dqp, &(xfs_Gqm->qm_dqfreelist)) { - struct xfs_mount *mp = dqp->q_mount; - xfs_dqlock(dqp); - - /* - * We are racing with dqlookup here. Naturally we don't - * want to reclaim a dquot that lookup wants. We release the - * freelist lock and start over, so that lookup will grab - * both the dquot and the freelistlock. - */ - if (dqp->dq_flags & XFS_DQ_WANT) { - ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE)); - - trace_xfs_dqreclaim_want(dqp); - - xfs_dqunlock(dqp); - xfs_qm_freelist_unlock(xfs_Gqm); - if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) - return NULL; - XQM_STATS_INC(xqmstats.xs_qm_dqwants); - goto startagain; - } - - /* - * If the dquot is inactive, we are assured that it is - * not on the mplist or the hashlist, and that makes our - * life easier. - */ - if (dqp->dq_flags & XFS_DQ_INACTIVE) { - ASSERT(mp == NULL); - ASSERT(! XFS_DQ_IS_DIRTY(dqp)); - ASSERT(dqp->HL_PREVP == NULL); - ASSERT(list_empty(&dqp->q_mplist)); - XQM_FREELIST_REMOVE(dqp); - xfs_dqunlock(dqp); - dqpout = dqp; - XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims); - break; - } - - ASSERT(dqp->q_hash); - ASSERT(!list_empty(&dqp->q_mplist)); - - /* - * Try to grab the flush lock. If this dquot is in the process of - * getting flushed to disk, we don't want to reclaim it. - */ - if (!xfs_dqflock_nowait(dqp)) { - xfs_dqunlock(dqp); - continue; - } - - /* - * We have the flush lock so we know that this is not in the - * process of being flushed. So, if this is dirty, flush it - * DELWRI so that we don't get a freelist infested with - * dirty dquots. - */ - if (XFS_DQ_IS_DIRTY(dqp)) { - int error; - - trace_xfs_dqreclaim_dirty(dqp); - - /* - * We flush it delayed write, so don't bother - * releasing the freelist lock. - */ - error = xfs_qm_dqflush(dqp, 0); - if (error) { - xfs_fs_cmn_err(CE_WARN, mp, - "xfs_qm_dqreclaim: dquot %p flush failed", dqp); - } - xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ - continue; - } - - if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) { - xfs_dqfunlock(dqp); - xfs_dqunlock(dqp); - continue; - } - - if (!mutex_trylock(&dqp->q_hash->qh_lock)) - goto mplistunlock; - - trace_xfs_dqreclaim_unlink(dqp); - - ASSERT(dqp->q_nrefs == 0); - list_del_init(&dqp->q_mplist); - mp->m_quotainfo->qi_dquots--; - mp->m_quotainfo->qi_dqreclaims++; - XQM_HASHLIST_REMOVE(dqp->q_hash, dqp); - XQM_FREELIST_REMOVE(dqp); - dqpout = dqp; - mutex_unlock(&dqp->q_hash->qh_lock); - mplistunlock: - mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); - xfs_dqfunlock(dqp); - xfs_dqunlock(dqp); - if (dqpout) - break; - } - - xfs_qm_freelist_unlock(xfs_Gqm); - return dqpout; -} - - /*------------------------------------------------------------------*/ /* -- cgit v1.2.3 From e6a81f13aa9aa20ef03174210aed24791865b05e Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 13 Apr 2010 15:06:51 +1000 Subject: xfs: convert the dquot hash list to use list heads Convert the dquot hash list on the filesystem to use listhead infrastructure rather than the roll-your-own in the quota code. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_trace.h | 1 - fs/xfs/quota/xfs_dquot.c | 32 ++++++++++---------------------- fs/xfs/quota/xfs_dquot.h | 9 ++------- fs/xfs/quota/xfs_qm.c | 9 +++++---- fs/xfs/quota/xfs_qm_syscalls.c | 34 +++++++++++++--------------------- fs/xfs/quota/xfs_quota_priv.h | 33 --------------------------------- 6 files changed, 30 insertions(+), 88 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_trace.h') diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index ce6a968a8f40..8a319cfd2901 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -667,7 +667,6 @@ DEFINE_DQUOT_EVENT(xfs_dqread_fail); DEFINE_DQUOT_EVENT(xfs_dqlookup_found); DEFINE_DQUOT_EVENT(xfs_dqlookup_want); DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist); -DEFINE_DQUOT_EVENT(xfs_dqlookup_move); DEFINE_DQUOT_EVENT(xfs_dqlookup_done); DEFINE_DQUOT_EVENT(xfs_dqget_hit); DEFINE_DQUOT_EVENT(xfs_dqget_miss); diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 838289b92bb9..ad64ab62d9c5 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -122,8 +122,7 @@ xfs_qm_dqinit( dqp->q_nrefs = 0; dqp->q_blkno = 0; INIT_LIST_HEAD(&dqp->q_mplist); - dqp->HL_NEXT = NULL; - dqp->HL_PREVP = NULL; + INIT_LIST_HEAD(&dqp->q_hashlist); dqp->q_bufoffset = 0; dqp->q_fileoffset = 0; dqp->q_transp = NULL; @@ -752,7 +751,6 @@ xfs_qm_dqlookup( { xfs_dquot_t *dqp; uint flist_locked; - xfs_dquot_t *d; ASSERT(mutex_is_locked(&qh->qh_lock)); @@ -761,7 +759,7 @@ xfs_qm_dqlookup( /* * Traverse the hashchain looking for a match */ - for (dqp = qh->qh_next; dqp != NULL; dqp = dqp->HL_NEXT) { + list_for_each_entry(dqp, &qh->qh_list, q_hashlist) { /* * We already have the hashlock. We don't need the * dqlock to look at the id field of the dquot, since the @@ -828,21 +826,10 @@ xfs_qm_dqlookup( * move the dquot to the front of the hashchain */ ASSERT(mutex_is_locked(&qh->qh_lock)); - if (dqp->HL_PREVP != &qh->qh_next) { - trace_xfs_dqlookup_move(dqp); - if ((d = dqp->HL_NEXT)) - d->HL_PREVP = dqp->HL_PREVP; - *(dqp->HL_PREVP) = d; - d = qh->qh_next; - d->HL_PREVP = &dqp->HL_NEXT; - dqp->HL_NEXT = d; - dqp->HL_PREVP = &qh->qh_next; - qh->qh_next = dqp; - } + list_move(&dqp->q_hashlist, &qh->qh_list); trace_xfs_dqlookup_done(dqp); *O_dqpp = dqp; - ASSERT(mutex_is_locked(&qh->qh_lock)); - return (0); + return 0; } } @@ -1034,7 +1021,8 @@ xfs_qm_dqget( */ ASSERT(mutex_is_locked(&h->qh_lock)); dqp->q_hash = h; - XQM_HASHLIST_INSERT(h, dqp); + list_add(&dqp->q_hashlist, &h->qh_list); + h->qh_version++; /* * Attach this dquot to this filesystem's list of all dquots, @@ -1387,7 +1375,7 @@ int xfs_qm_dqpurge( xfs_dquot_t *dqp) { - xfs_dqhash_t *thishash; + xfs_dqhash_t *qh = dqp->q_hash; xfs_mount_t *mp = dqp->q_mount; ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock)); @@ -1453,8 +1441,8 @@ xfs_qm_dqpurge( ASSERT(XFS_FORCED_SHUTDOWN(mp) || !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); - thishash = dqp->q_hash; - XQM_HASHLIST_REMOVE(thishash, dqp); + list_del_init(&dqp->q_hashlist); + qh->qh_version++; list_del_init(&dqp->q_mplist); mp->m_quotainfo->qi_dqreclaims++; mp->m_quotainfo->qi_dquots--; @@ -1470,7 +1458,7 @@ xfs_qm_dqpurge( memset(&dqp->q_core, 0, sizeof(dqp->q_core)); xfs_dqfunlock(dqp); xfs_dqunlock(dqp); - mutex_unlock(&thishash->qh_lock); + mutex_unlock(&qh->qh_lock); return (0); } diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h index 6992a67c165a..169b3c24af79 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/quota/xfs_dquot.h @@ -33,17 +33,12 @@ * The hash chain headers (hash buckets) */ typedef struct xfs_dqhash { - struct xfs_dquot *qh_next; + struct list_head qh_list; struct mutex qh_lock; uint qh_version; /* ever increasing version */ uint qh_nelems; /* number of dquots on the list */ } xfs_dqhash_t; -typedef struct xfs_dqlink { - struct xfs_dquot *ql_next; /* forward link */ - struct xfs_dquot **ql_prevp; /* pointer to prev ql_next */ -} xfs_dqlink_t; - struct xfs_mount; struct xfs_trans; @@ -57,7 +52,6 @@ struct xfs_trans; typedef struct xfs_dqmarker { struct xfs_dquot*dqm_flnext; /* link to freelist: must be first */ struct xfs_dquot*dqm_flprev; - xfs_dqlink_t dqm_hashlist; /* link to the hash chain */ uint dqm_flags; /* various flags (XFS_DQ_*) */ } xfs_dqmarker_t; @@ -67,6 +61,7 @@ typedef struct xfs_dqmarker { typedef struct xfs_dquot { xfs_dqmarker_t q_lists; /* list ptrs, q_flags (marker) */ struct list_head q_mplist; /* mount's list of dquots */ + struct list_head q_hashlist; /* mount's list of dquots */ xfs_dqhash_t *q_hash; /* the hashchain header */ struct xfs_mount*q_mount; /* filesystem this relates to */ struct xfs_trans*q_transp; /* trans this belongs to currently */ diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 5ca65c834bbd..08e97f1ef653 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -277,7 +277,7 @@ xfs_qm_rele_quotafs_ref( if (dqp->dq_flags & XFS_DQ_INACTIVE) { ASSERT(dqp->q_mount == NULL); ASSERT(! XFS_DQ_IS_DIRTY(dqp)); - ASSERT(dqp->HL_PREVP == NULL); + ASSERT(list_empty(&dqp->q_hashlist)); ASSERT(list_empty(&dqp->q_mplist)); XQM_FREELIST_REMOVE(dqp); xfs_dqunlock(dqp); @@ -1176,7 +1176,7 @@ xfs_qm_list_init( int n) { mutex_init(&list->qh_lock); - list->qh_next = NULL; + INIT_LIST_HEAD(&list->qh_list); list->qh_version = 0; list->qh_nelems = 0; } @@ -1976,7 +1976,7 @@ startagain: if (dqp->dq_flags & XFS_DQ_INACTIVE) { ASSERT(mp == NULL); ASSERT(! XFS_DQ_IS_DIRTY(dqp)); - ASSERT(dqp->HL_PREVP == NULL); + ASSERT(list_empty(&dqp->q_hashlist)); ASSERT(list_empty(&dqp->q_mplist)); XQM_FREELIST_REMOVE(dqp); xfs_dqunlock(dqp); @@ -2053,7 +2053,8 @@ startagain: list_del_init(&dqp->q_mplist); mp->m_quotainfo->qi_dquots--; mp->m_quotainfo->qi_dqreclaims++; - XQM_HASHLIST_REMOVE(dqp->q_hash, dqp); + list_del_init(&dqp->q_hashlist); + dqp->q_hash->qh_version++; XQM_FREELIST_REMOVE(dqp); dqpout = dqp; mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index c54fa7790bd8..c82e319f9df4 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -932,6 +932,7 @@ struct mutex qcheck_lock; typedef struct dqtest { xfs_dqmarker_t q_lists; + struct list_head q_hashlist; xfs_dqhash_t *q_hash; /* the hashchain header */ xfs_mount_t *q_mount; /* filesystem this relates to */ xfs_dqid_t d_id; /* user id or group id */ @@ -942,14 +943,9 @@ typedef struct dqtest { STATIC void xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp) { - xfs_dquot_t *d; - if (((d) = (h)->qh_next)) - (d)->HL_PREVP = &((dqp)->HL_NEXT); - (dqp)->HL_NEXT = d; - (dqp)->HL_PREVP = &((h)->qh_next); - (h)->qh_next = (xfs_dquot_t *)dqp; - (h)->qh_version++; - (h)->qh_nelems++; + list_add(&dqp->q_hashlist, &h->qh_list); + h->qh_version++; + h->qh_nelems++; } STATIC void xfs_qm_dqtest_print( @@ -1061,9 +1057,7 @@ xfs_qm_internalqcheck_dqget( xfs_dqhash_t *h; h = DQTEST_HASH(mp, id, type); - for (d = (xfs_dqtest_t *) h->qh_next; d != NULL; - d = (xfs_dqtest_t *) d->HL_NEXT) { - /* DQTEST_LIST_PRINT(h, HL_NEXT, "@@@@@ dqtestlist @@@@@"); */ + list_for_each_entry(d, &h->qh_list, q_hashlist) { if (d->d_id == id && mp == d->q_mount) { *O_dq = d; return (0); @@ -1074,6 +1068,7 @@ xfs_qm_internalqcheck_dqget( d->d_id = id; d->q_mount = mp; d->q_hash = h; + INIT_LIST_HEAD(&d->q_hashlist); xfs_qm_hashinsert(h, d); *O_dq = d; return (0); @@ -1180,8 +1175,6 @@ xfs_qm_internalqcheck( xfs_ino_t lastino; int done, count; int i; - xfs_dqtest_t *d, *e; - xfs_dqhash_t *h1; int error; lastino = 0; @@ -1221,19 +1214,18 @@ xfs_qm_internalqcheck( } cmn_err(CE_DEBUG, "Checking results against system dquots"); for (i = 0; i < qmtest_hashmask; i++) { - h1 = &qmtest_udqtab[i]; - for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) { + xfs_dqtest_t *d, *n; + xfs_dqhash_t *h; + + h = &qmtest_udqtab[i]; + list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) { xfs_dqtest_cmp(d); - e = (xfs_dqtest_t *) d->HL_NEXT; kmem_free(d); - d = e; } - h1 = &qmtest_gdqtab[i]; - for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) { + h = &qmtest_gdqtab[i]; + list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) { xfs_dqtest_cmp(d); - e = (xfs_dqtest_t *) d->HL_NEXT; kmem_free(d); - d = e; } } diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h index 6f4bbae51aca..3a1b9aa763fc 100644 --- a/fs/xfs/quota/xfs_quota_priv.h +++ b/fs/xfs/quota/xfs_quota_priv.h @@ -72,46 +72,13 @@ !dqp->q_core.d_rtbcount && \ !dqp->q_core.d_icount) -#define HL_PREVP dq_hashlist.ql_prevp -#define HL_NEXT dq_hashlist.ql_next - - -#define _LIST_REMOVE(h, dqp, PVP, NXT) \ - { \ - xfs_dquot_t *d; \ - if (((d) = (dqp)->NXT)) \ - (d)->PVP = (dqp)->PVP; \ - *((dqp)->PVP) = d; \ - (dqp)->NXT = NULL; \ - (dqp)->PVP = NULL; \ - (h)->qh_version++; \ - (h)->qh_nelems--; \ - } - -#define _LIST_INSERT(h, dqp, PVP, NXT) \ - { \ - xfs_dquot_t *d; \ - if (((d) = (h)->qh_next)) \ - (d)->PVP = &((dqp)->NXT); \ - (dqp)->NXT = d; \ - (dqp)->PVP = &((h)->qh_next); \ - (h)->qh_next = dqp; \ - (h)->qh_version++; \ - (h)->qh_nelems++; \ - } - #define FOREACH_DQUOT_IN_FREELIST(dqp, qlist) \ for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \ (dqp) = (dqp)->dq_flnext) -#define XQM_HASHLIST_INSERT(h, dqp) \ - _LIST_INSERT(h, dqp, HL_PREVP, HL_NEXT) - #define XQM_FREELIST_INSERT(h, dqp) \ xfs_qm_freelist_append(h, dqp) -#define XQM_HASHLIST_REMOVE(h, dqp) \ - _LIST_REMOVE(h, dqp, HL_PREVP, HL_NEXT) #define XQM_FREELIST_REMOVE(dqp) \ xfs_qm_freelist_unlink(dqp) -- cgit v1.2.3