diff options
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/Makefile | 1 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 7 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.h | 4 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_btree.c | 82 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_sb.c | 25 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.c | 53 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.h | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot.h | 15 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 23 | ||||
-rw-r--r-- | fs/xfs/xfs_fs.h | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_icache.c | 96 | ||||
-rw-r--r-- | fs/xfs/xfs_icache.h | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.c | 23 | ||||
-rw-r--r-- | fs/xfs/xfs_linux.h | 11 | ||||
-rw-r--r-- | fs/xfs/xfs_log.c | 9 | ||||
-rw-r--r-- | fs/xfs/xfs_log_priv.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.c | 18 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 12 | ||||
-rw-r--r-- | fs/xfs/xfs_sysfs.c | 165 | ||||
-rw-r--r-- | fs/xfs/xfs_sysfs.h | 59 |
21 files changed, 528 insertions, 88 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 0dfa26d626f5..d61799949580 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -86,6 +86,7 @@ xfs-y += xfs_aops.o \ xfs_mru_cache.o \ xfs_super.o \ xfs_symlink.o \ + xfs_sysfs.o \ xfs_trans.o \ xfs_xattr.o \ kmem.o \ diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 224963b1efc0..de2d26d32844 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4294,8 +4294,8 @@ xfs_bmapi_delay( } -int -__xfs_bmapi_allocate( +static int +xfs_bmapi_allocate( struct xfs_bmalloca *bma) { struct xfs_mount *mp = bma->ip->i_mount; @@ -4574,9 +4574,6 @@ xfs_bmapi_write( bma.flist = flist; bma.firstblock = firstblock; - if (flags & XFS_BMAPI_STACK_SWITCH) - bma.stack_switch = 1; - while (bno < end && n < *nmap) { inhole = eof || bma.got.br_startoff > bno; wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 38ba36e9b2f0..b879ca56a64c 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -77,7 +77,6 @@ typedef struct xfs_bmap_free * from written to unwritten, otherwise convert from unwritten to written. */ #define XFS_BMAPI_CONVERT 0x040 -#define XFS_BMAPI_STACK_SWITCH 0x080 #define XFS_BMAPI_FLAGS \ { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ @@ -86,8 +85,7 @@ typedef struct xfs_bmap_free { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ { XFS_BMAPI_CONTIG, "CONTIG" }, \ - { XFS_BMAPI_CONVERT, "CONVERT" }, \ - { XFS_BMAPI_STACK_SWITCH, "STACK_SWITCH" } + { XFS_BMAPI_CONVERT, "CONVERT" } static inline int xfs_bmapi_aflag(int w) diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 2435928b0765..8fe6a93ff473 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -33,6 +33,7 @@ #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_cksum.h" +#include "xfs_alloc.h" /* * Cursor allocation zone. @@ -2323,7 +2324,7 @@ error1: * record (to be inserted into parent). */ STATIC int /* error */ -xfs_btree_split( +__xfs_btree_split( struct xfs_btree_cur *cur, int level, union xfs_btree_ptr *ptrp, @@ -2503,6 +2504,85 @@ error0: return error; } +struct xfs_btree_split_args { + struct xfs_btree_cur *cur; + int level; + union xfs_btree_ptr *ptrp; + union xfs_btree_key *key; + struct xfs_btree_cur **curp; + int *stat; /* success/failure */ + int result; + bool kswapd; /* allocation in kswapd context */ + struct completion *done; + struct work_struct work; +}; + +/* + * Stack switching interfaces for allocation + */ +static void +xfs_btree_split_worker( + struct work_struct *work) +{ + struct xfs_btree_split_args *args = container_of(work, + struct xfs_btree_split_args, work); + unsigned long pflags; + unsigned long new_pflags = PF_FSTRANS; + + /* + * we are in a transaction context here, but may also be doing work + * in kswapd context, and hence we may need to inherit that state + * temporarily to ensure that we don't block waiting for memory reclaim + * in any way. + */ + if (args->kswapd) + new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; + + current_set_flags_nested(&pflags, new_pflags); + + args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, + args->key, args->curp, args->stat); + complete(args->done); + + current_restore_flags_nested(&pflags, new_pflags); +} + +/* + * BMBT split requests often come in with little stack to work on. Push + * them off to a worker thread so there is lots of stack to use. For the other + * btree types, just call directly to avoid the context switch overhead here. + */ +STATIC int /* error */ +xfs_btree_split( + struct xfs_btree_cur *cur, + int level, + union xfs_btree_ptr *ptrp, + union xfs_btree_key *key, + struct xfs_btree_cur **curp, + int *stat) /* success/failure */ +{ + struct xfs_btree_split_args args; + DECLARE_COMPLETION_ONSTACK(done); + + if (cur->bc_btnum != XFS_BTNUM_BMAP) + return __xfs_btree_split(cur, level, ptrp, key, curp, stat); + + args.cur = cur; + args.level = level; + args.ptrp = ptrp; + args.key = key; + args.curp = curp; + args.stat = stat; + args.done = &done; + args.kswapd = current_is_kswapd(); + INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker); + queue_work(xfs_alloc_wq, &args.work); + wait_for_completion(&done); + destroy_work_on_stack(&args.work); + return args.result; +} + + /* * Copy the old inode root contents into a real block and make the * broot point to it. diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index f5ca0286a0af..6e93b5ef0a6b 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -483,10 +483,16 @@ xfs_sb_quota_to_disk( } /* - * GQUOTINO and PQUOTINO cannot be used together in versions - * of superblock that do not have pquotino. from->sb_flags - * tells us which quota is active and should be copied to - * disk. + * GQUOTINO and PQUOTINO cannot be used together in versions of + * superblock that do not have pquotino. from->sb_flags tells us which + * quota is active and should be copied to disk. If neither are active, + * make sure we write NULLFSINO to the sb_gquotino field as a quota + * inode value of "0" is invalid when the XFS_SB_VERSION_QUOTA feature + * bit is set. + * + * Note that we don't need to handle the sb_uquotino or sb_pquotino here + * as they do not require any translation. Hence the main sb field loop + * will write them appropriately from the in-core superblock. */ if ((*fields & XFS_SB_GQUOTINO) && (from->sb_qflags & XFS_GQUOTA_ACCT)) @@ -494,6 +500,17 @@ xfs_sb_quota_to_disk( else if ((*fields & XFS_SB_PQUOTINO) && (from->sb_qflags & XFS_PQUOTA_ACCT)) to->sb_gquotino = cpu_to_be64(from->sb_pquotino); + else { + /* + * We can't rely on just the fields being logged to tell us + * that it is safe to write NULLFSINO - we should only do that + * if quotas are not actually enabled. Hence only write + * NULLFSINO if both in-core quota inodes are NULL. + */ + if (from->sb_gquotino == NULLFSINO && + from->sb_pquotino == NULLFSINO) + to->sb_gquotino = cpu_to_be64(NULLFSINO); + } *fields &= ~(XFS_SB_PQUOTINO | XFS_SB_GQUOTINO); } diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 48fd7ea65b1e..d32889af1777 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -249,59 +249,6 @@ xfs_bmap_rtalloc( } /* - * Stack switching interfaces for allocation - */ -static void -xfs_bmapi_allocate_worker( - struct work_struct *work) -{ - struct xfs_bmalloca *args = container_of(work, - struct xfs_bmalloca, work); - unsigned long pflags; - unsigned long new_pflags = PF_FSTRANS; - - /* - * we are in a transaction context here, but may also be doing work - * in kswapd context, and hence we may need to inherit that state - * temporarily to ensure that we don't block waiting for memory reclaim - * in any way. - */ - if (args->kswapd) - new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; - - current_set_flags_nested(&pflags, new_pflags); - - args->result = __xfs_bmapi_allocate(args); - complete(args->done); - - current_restore_flags_nested(&pflags, new_pflags); -} - -/* - * Some allocation requests often come in with little stack to work on. Push - * them off to a worker thread so there is lots of stack to use. Otherwise just - * call directly to avoid the context switch overhead here. - */ -int -xfs_bmapi_allocate( - struct xfs_bmalloca *args) -{ - DECLARE_COMPLETION_ONSTACK(done); - - if (!args->stack_switch) - return __xfs_bmapi_allocate(args); - - - args->done = &done; - args->kswapd = current_is_kswapd(); - INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker); - queue_work(xfs_alloc_wq, &args->work); - wait_for_completion(&done); - destroy_work_on_stack(&args->work); - return args->result; -} - -/* * Check if the endoff is outside the last extent. If so the caller will grow * the allocation to a stripe unit boundary. All offsets are considered outside * the end of file for an empty fork, so 1 is returned in *eof in that case. diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 075f72232a64..2fdb72d2c908 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -55,8 +55,6 @@ struct xfs_bmalloca { bool userdata;/* set if is user data */ bool aeof; /* allocated space at eof */ bool conv; /* overwriting unwritten extents */ - bool stack_switch; - bool kswapd; /* allocation in kswapd context */ int flags; struct completion *done; struct work_struct work; @@ -66,8 +64,6 @@ struct xfs_bmalloca { int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist, int *committed); int xfs_bmap_rtalloc(struct xfs_bmalloca *ap); -int xfs_bmapi_allocate(struct xfs_bmalloca *args); -int __xfs_bmapi_allocate(struct xfs_bmalloca *args); int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff, int whichfork, int *eof); int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip, diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index 68a68f704837..c24c67e22a2a 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h @@ -139,6 +139,21 @@ static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type) } } +/* + * Check whether a dquot is under low free space conditions. We assume the quota + * is enabled and enforced. + */ +static inline bool xfs_dquot_lowsp(struct xfs_dquot *dqp) +{ + int64_t freesp; + + freesp = be64_to_cpu(dqp->q_core.d_blk_hardlimit) - dqp->q_res_bcount; + if (freesp < dqp->q_low_space[XFS_QLOWSP_1_PCNT]) + return true; + + return false; +} + #define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) #define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) #define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 181605da08e4..fcf91a22f5d8 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -38,6 +38,7 @@ #include "xfs_trace.h" #include "xfs_log.h" #include "xfs_dinode.h" +#include "xfs_icache.h" #include <linux/aio.h> #include <linux/dcache.h> @@ -689,14 +690,28 @@ write_retry: ret = generic_perform_write(file, from, pos); if (likely(ret >= 0)) iocb->ki_pos = pos + ret; + /* - * If we just got an ENOSPC, try to write back all dirty inodes to - * convert delalloc space to free up some of the excess reserved - * metadata space. + * If we hit a space limit, try to free up some lingering preallocated + * space before returning an error. In the case of ENOSPC, first try to + * write back all dirty inodes to free up some of the excess reserved + * metadata space. This reduces the chances that the eofblocks scan + * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this + * also behaves as a filter to prevent too many eofblocks scans from + * running at the same time. */ - if (ret == -ENOSPC && !enospc) { + if (ret == -EDQUOT && !enospc) { + enospc = xfs_inode_free_quota_eofblocks(ip); + if (enospc) + goto write_retry; + } else if (ret == -ENOSPC && !enospc) { + struct xfs_eofblocks eofb = {0}; + enospc = 1; xfs_flush_inodes(ip->i_mount); + eofb.eof_scan_owner = ip->i_ino; /* for locking */ + eofb.eof_flags = XFS_EOF_FLAGS_SYNC; + xfs_icache_free_eofblocks(ip->i_mount, &eofb); goto write_retry; } diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index dffafc6bf7b0..18dc721ca19f 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -375,6 +375,9 @@ struct xfs_fs_eofblocks { #define XFS_EOF_FLAGS_GID (1 << 2) /* filter by gid */ #define XFS_EOF_FLAGS_PRID (1 << 3) /* filter by project id */ #define XFS_EOF_FLAGS_MINFILESIZE (1 << 4) /* filter by min file size */ +#define XFS_EOF_FLAGS_UNION (1 << 5) /* union filter algorithm; + * kernel only, not included in + * valid mask */ #define XFS_EOF_FLAGS_VALID \ (XFS_EOF_FLAGS_SYNC | \ XFS_EOF_FLAGS_UID | \ diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 08ba4c6e1359..981b2cf51985 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -33,6 +33,9 @@ #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_bmap_util.h" +#include "xfs_quota.h" +#include "xfs_dquot_item.h" +#include "xfs_dquot.h" #include <linux/kthread.h> #include <linux/freezer.h> @@ -1203,6 +1206,30 @@ xfs_inode_match_id( return 1; } +/* + * A union-based inode filtering algorithm. Process the inode if any of the + * criteria match. This is for global/internal scans only. + */ +STATIC int +xfs_inode_match_id_union( + struct xfs_inode *ip, + struct xfs_eofblocks *eofb) +{ + if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) && + uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid)) + return 1; + + if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) && + gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid)) + return 1; + + if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) && + xfs_get_projid(ip) == eofb->eof_prid) + return 1; + + return 0; +} + STATIC int xfs_inode_free_eofblocks( struct xfs_inode *ip, @@ -1211,6 +1238,10 @@ xfs_inode_free_eofblocks( { int ret; struct xfs_eofblocks *eofb = args; + bool need_iolock = true; + int match; + + ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); if (!xfs_can_free_eofblocks(ip, false)) { /* inode could be preallocated or append-only */ @@ -1228,16 +1259,28 @@ xfs_inode_free_eofblocks( return 0; if (eofb) { - if (!xfs_inode_match_id(ip, eofb)) + if (eofb->eof_flags & XFS_EOF_FLAGS_UNION) + match = xfs_inode_match_id_union(ip, eofb); + else + match = xfs_inode_match_id(ip, eofb); + if (!match) return 0; /* skip the inode if the file size is too small */ if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && XFS_ISIZE(ip) < eofb->eof_min_file_size) return 0; + + /* + * A scan owner implies we already hold the iolock. Skip it in + * xfs_free_eofblocks() to avoid deadlock. This also eliminates + * the possibility of EAGAIN being returned. + */ + if (eofb->eof_scan_owner == ip->i_ino) + need_iolock = false; } - ret = xfs_free_eofblocks(ip->i_mount, ip, true); + ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock); /* don't revisit the inode if we're not waiting */ if (ret == -EAGAIN && !(flags & SYNC_WAIT)) @@ -1260,6 +1303,55 @@ xfs_icache_free_eofblocks( eofb, XFS_ICI_EOFBLOCKS_TAG); } +/* + * Run eofblocks scans on the quotas applicable to the inode. For inodes with + * multiple quotas, we don't know exactly which quota caused an allocation + * failure. We make a best effort by including each quota under low free space + * conditions (less than 1% free space) in the scan. + */ +int +xfs_inode_free_quota_eofblocks( + struct xfs_inode *ip) +{ + int scan = 0; + struct xfs_eofblocks eofb = {0}; + struct xfs_dquot *dq; + + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); + + /* + * Set the scan owner to avoid a potential livelock. Otherwise, the scan + * can repeatedly trylock on the inode we're currently processing. We + * run a sync scan to increase effectiveness and use the union filter to + * cover all applicable quotas in a single scan. + */ + eofb.eof_scan_owner = ip->i_ino; + eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC; + + if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) { + dq = xfs_inode_dquot(ip, XFS_DQ_USER); + if (dq && xfs_dquot_lowsp(dq)) { + eofb.eof_uid = VFS_I(ip)->i_uid; + eofb.eof_flags |= XFS_EOF_FLAGS_UID; + scan = 1; + } + } + + if (XFS_IS_GQUOTA_ENFORCED(ip->i_mount)) { + dq = xfs_inode_dquot(ip, XFS_DQ_GROUP); + if (dq && xfs_dquot_lowsp(dq)) { + eofb.eof_gid = VFS_I(ip)->i_gid; + eofb.eof_flags |= XFS_EOF_FLAGS_GID; + scan = 1; + } + } + + if (scan) + xfs_icache_free_eofblocks(ip->i_mount, &eofb); + + return scan; +} + void xfs_inode_set_eofblocks_tag( xfs_inode_t *ip) diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index 6250430d609c..46748b86b12f 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -27,6 +27,7 @@ struct xfs_eofblocks { kgid_t eof_gid; prid_t eof_prid; __u64 eof_min_file_size; + xfs_ino_t eof_scan_owner; }; #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ @@ -57,6 +58,7 @@ void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip); void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip); int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *); +int xfs_inode_free_quota_eofblocks(struct xfs_inode *ip); void xfs_eofblocks_worker(struct work_struct *); int xfs_inode_ag_iterator(struct xfs_mount *mp, @@ -84,6 +86,7 @@ xfs_fs_eofblocks_from_user( dst->eof_flags = src->eof_flags; dst->eof_prid = src->eof_prid; dst->eof_min_file_size = src->eof_min_file_size; + dst->eof_scan_owner = NULLFSINO; dst->eof_uid = INVALID_UID; if (src->eof_flags & XFS_EOF_FLAGS_UID) { diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 923c044bd26f..e9c47b6f5e5a 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -397,7 +397,8 @@ xfs_quota_calc_throttle( struct xfs_inode *ip, int type, xfs_fsblock_t *qblocks, - int *qshift) + int *qshift, + int64_t *qfreesp) { int64_t freesp; int shift = 0; @@ -406,6 +407,7 @@ xfs_quota_calc_throttle( /* over hi wmark, squash the prealloc completely */ if (dq->q_res_bcount >= dq->q_prealloc_hi_wmark) { *qblocks = 0; + *qfreesp = 0; return; } @@ -418,6 +420,9 @@ xfs_quota_calc_throttle( shift += 2; } + if (freesp < *qfreesp) + *qfreesp = freesp; + /* only overwrite the throttle values if we are more aggressive */ if ((freesp >> shift) < (*qblocks >> *qshift)) { *qblocks = freesp; @@ -476,15 +481,18 @@ xfs_iomap_prealloc_size( } /* - * Check each quota to cap the prealloc size and provide a shift - * value to throttle with. + * Check each quota to cap the prealloc size, provide a shift value to + * throttle with and adjust amount of available space. */ if (xfs_quota_need_throttle(ip, XFS_DQ_USER, alloc_blocks)) - xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift); + xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift, + &freesp); if (xfs_quota_need_throttle(ip, XFS_DQ_GROUP, alloc_blocks)) - xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift); + xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift, + &freesp); if (xfs_quota_need_throttle(ip, XFS_DQ_PROJ, alloc_blocks)) - xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift); + xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift, + &freesp); /* * The final prealloc size is set to the minimum of free space available @@ -749,8 +757,7 @@ xfs_iomap_write_allocate( * pointer that the caller gave to us. */ error = xfs_bmapi_write(tp, ip, map_start_fsb, - count_fsb, - XFS_BMAPI_STACK_SWITCH, + count_fsb, 0, &first_block, 1, imap, &nimaps, &free_list); if (error) diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 69aae8affd41..d3ef6de475bb 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -179,6 +179,17 @@ typedef __uint64_t __psunsigned_t; #define MAX(a,b) (max(a,b)) #define howmany(x, y) (((x)+((y)-1))/(y)) +/* + * XFS wrapper structure for sysfs support. It depends on external data + * structures and is embedded in various internal data structures to implement + * the XFS sysfs object heirarchy. Define it here for broad access throughout + * the codebase. + */ +struct xfs_kobj { + struct kobject kobject; + struct completion complete; +}; + /* Kernel uid/gid conversion. These are used to convert to/from the on disk * uid_t/gid_t types to the kuid_t/kgid_t types that the kernel uses internally. * The conversion here is type only, the value will remain the same since we diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 7647818b8c8a..149a4a575a09 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -34,6 +34,7 @@ #include "xfs_trace.h" #include "xfs_fsops.h" #include "xfs_cksum.h" +#include "xfs_sysfs.h" kmem_zone_t *xfs_log_ticket_zone; @@ -707,6 +708,11 @@ xfs_log_mount( } } + error = xfs_sysfs_init(&mp->m_log->l_kobj, &xfs_log_ktype, &mp->m_kobj, + "log"); + if (error) + goto out_destroy_ail; + /* Normal transactions can now occur */ mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY; @@ -947,6 +953,9 @@ xfs_log_unmount( xfs_log_quiesce(mp); xfs_trans_ail_destroy(mp); + + xfs_sysfs_del(&mp->m_log->l_kobj); + xlog_dealloc_log(mp->m_log); } diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 9bc403a9e54f..db7cbdeb2b42 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -405,6 +405,8 @@ struct xlog { struct xlog_grant_head l_reserve_head; struct xlog_grant_head l_write_head; + struct xfs_kobj l_kobj; + /* The following field are used for debugging; need to hold icloglock */ #ifdef DEBUG char *l_iclog_bak[XLOG_MAX_ICLOGS]; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index e0e232efe9af..5b639df0413e 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -42,6 +42,7 @@ #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_dinode.h" +#include "xfs_sysfs.h" #ifdef HAVE_PERCPU_SB @@ -60,6 +61,8 @@ static DEFINE_MUTEX(xfs_uuid_table_mutex); static int xfs_uuid_table_size; static uuid_t *xfs_uuid_table; +extern struct kset *xfs_kset; + /* * See if the UUID is unique among mounted XFS filesystems. * Mount fails if UUID is nil or a FS with the same UUID is already mounted. @@ -727,10 +730,15 @@ xfs_mountfs( xfs_set_maxicount(mp); - error = xfs_uuid_mount(mp); + mp->m_kobj.kobject.kset = xfs_kset; + error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname); if (error) goto out; + error = xfs_uuid_mount(mp); + if (error) + goto out_remove_sysfs; + /* * Set the minimum read and write sizes */ @@ -851,7 +859,7 @@ xfs_mountfs( !mp->m_sb.sb_inprogress) { error = xfs_initialize_perag_data(mp, sbp->sb_agcount); if (error) - goto out_fail_wait; + goto out_log_dealloc; } /* @@ -923,7 +931,7 @@ xfs_mountfs( xfs_notice(mp, "resetting quota flags"); error = xfs_mount_reset_sbqflags(mp); if (error) - return error; + goto out_rtunmount; } } @@ -985,6 +993,8 @@ xfs_mountfs( xfs_da_unmount(mp); out_remove_uuid: xfs_uuid_unmount(mp); + out_remove_sysfs: + xfs_sysfs_del(&mp->m_kobj); out: return error; } @@ -1067,6 +1077,8 @@ xfs_unmountfs( xfs_errortag_clearall(mp, 0); #endif xfs_free_perag(mp); + + xfs_sysfs_del(&mp->m_kobj); } int diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 7295a0b7c343..b0447c86e7e2 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -166,6 +166,7 @@ typedef struct xfs_mount { on the next remount,rw */ int64_t m_low_space[XFS_LOWSP_MAX]; /* low free space thresholds */ + struct xfs_kobj m_kobj; struct workqueue_struct *m_data_workqueue; struct workqueue_struct *m_unwritten_workqueue; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 3fbedbb88083..b194652033cd 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -61,6 +61,7 @@ static const struct super_operations xfs_super_operations; static kmem_zone_t *xfs_ioend_zone; mempool_t *xfs_ioend_pool; +struct kset *xfs_kset; #define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */ #define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */ @@ -1761,9 +1762,15 @@ init_xfs_fs(void) if (error) goto out_cleanup_procfs; + xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj); + if (!xfs_kset) { + error = -ENOMEM; + goto out_sysctl_unregister;; + } + error = xfs_qm_init(); if (error) - goto out_sysctl_unregister; + goto out_kset_unregister; error = register_filesystem(&xfs_fs_type); if (error) @@ -1772,6 +1779,8 @@ init_xfs_fs(void) out_qm_exit: xfs_qm_exit(); + out_kset_unregister: + kset_unregister(xfs_kset); out_sysctl_unregister: xfs_sysctl_unregister(); out_cleanup_procfs: @@ -1793,6 +1802,7 @@ exit_xfs_fs(void) { xfs_qm_exit(); unregister_filesystem(&xfs_fs_type); + kset_unregister(xfs_kset); xfs_sysctl_unregister(); xfs_cleanup_procfs(); xfs_buf_terminate(); diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c new file mode 100644 index 000000000000..9835139ce1ec --- /dev/null +++ b/fs/xfs/xfs_sysfs.c @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2014 Red Hat, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "xfs.h" +#include "xfs_sysfs.h" +#include "xfs_log_format.h" +#include "xfs_log.h" +#include "xfs_log_priv.h" + +struct xfs_sysfs_attr { + struct attribute attr; + ssize_t (*show)(char *buf, void *data); + ssize_t (*store)(const char *buf, size_t count, void *data); +}; + +static inline struct xfs_sysfs_attr * +to_attr(struct attribute *attr) +{ + return container_of(attr, struct xfs_sysfs_attr, attr); +} + +#define XFS_SYSFS_ATTR_RW(name) \ + static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RW(name) +#define XFS_SYSFS_ATTR_RO(name) \ + static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RO(name) + +#define ATTR_LIST(name) &xfs_sysfs_attr_##name.attr + +/* + * xfs_mount kobject. This currently has no attributes and thus no need for show + * and store helpers. The mp kobject serves as the per-mount parent object that + * is identified by the fsname under sysfs. + */ + +struct kobj_type xfs_mp_ktype = { + .release = xfs_sysfs_release, +}; + +/* xlog */ + +STATIC ssize_t +log_head_lsn_show( + char *buf, + void *data) +{ + struct xlog *log = data; + int cycle; + int block; + + spin_lock(&log->l_icloglock); + cycle = log->l_curr_cycle; + block = log->l_curr_block; + spin_unlock(&log->l_icloglock); + + return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block); +} +XFS_SYSFS_ATTR_RO(log_head_lsn); + +STATIC ssize_t +log_tail_lsn_show( + char *buf, + void *data) +{ + struct xlog *log = data; + int cycle; + int block; + + xlog_crack_atomic_lsn(&log->l_tail_lsn, &cycle, &block); + return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block); +} +XFS_SYSFS_ATTR_RO(log_tail_lsn); + +STATIC ssize_t +reserve_grant_head_show( + char *buf, + void *data) +{ + struct xlog *log = data; + int cycle; + int bytes; + + xlog_crack_grant_head(&log->l_reserve_head.grant, &cycle, &bytes); + return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes); +} +XFS_SYSFS_ATTR_RO(reserve_grant_head); + +STATIC ssize_t +write_grant_head_show( + char *buf, + void *data) +{ + struct xlog *log = data; + int cycle; + int bytes; + + xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &bytes); + return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes); +} +XFS_SYSFS_ATTR_RO(write_grant_head); + +static struct attribute *xfs_log_attrs[] = { + ATTR_LIST(log_head_lsn), + ATTR_LIST(log_tail_lsn), + ATTR_LIST(reserve_grant_head), + ATTR_LIST(write_grant_head), + NULL, +}; + +static inline struct xlog * +to_xlog(struct kobject *kobject) +{ + struct xfs_kobj *kobj = to_kobj(kobject); + return container_of(kobj, struct xlog, l_kobj); +} + +STATIC ssize_t +xfs_log_show( + struct kobject *kobject, + struct attribute *attr, + char *buf) +{ + struct xlog *log = to_xlog(kobject); + struct xfs_sysfs_attr *xfs_attr = to_attr(attr); + + return xfs_attr->show ? xfs_attr->show(buf, log) : 0; +} + +STATIC ssize_t +xfs_log_store( + struct kobject *kobject, + struct attribute *attr, + const char *buf, + size_t count) +{ + struct xlog *log = to_xlog(kobject); + struct xfs_sysfs_attr *xfs_attr = to_attr(attr); + + return xfs_attr->store ? xfs_attr->store(buf, count, log) : 0; +} + +static struct sysfs_ops xfs_log_ops = { + .show = xfs_log_show, + .store = xfs_log_store, +}; + +struct kobj_type xfs_log_ktype = { + .release = xfs_sysfs_release, + .sysfs_ops = &xfs_log_ops, + .default_attrs = xfs_log_attrs, +}; diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h new file mode 100644 index 000000000000..54a2091183c0 --- /dev/null +++ b/fs/xfs/xfs_sysfs.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2014 Red Hat, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef __XFS_SYSFS_H__ +#define __XFS_SYSFS_H__ + +extern struct kobj_type xfs_mp_ktype; /* xfs_mount */ +extern struct kobj_type xfs_log_ktype; /* xlog */ + +static inline struct xfs_kobj * +to_kobj(struct kobject *kobject) +{ + return container_of(kobject, struct xfs_kobj, kobject); +} + +static inline void +xfs_sysfs_release(struct kobject *kobject) +{ + struct xfs_kobj *kobj = to_kobj(kobject); + complete(&kobj->complete); +} + +static inline int +xfs_sysfs_init( + struct xfs_kobj *kobj, + struct kobj_type *ktype, + struct xfs_kobj *parent_kobj, + const char *name) +{ + init_completion(&kobj->complete); + return kobject_init_and_add(&kobj->kobject, ktype, + &parent_kobj->kobject, "%s", name); +} + +static inline void +xfs_sysfs_del( + struct xfs_kobj *kobj) +{ + kobject_del(&kobj->kobject); + kobject_put(&kobj->kobject); + wait_for_completion(&kobj->complete); +} + +#endif /* __XFS_SYSFS_H__ */ |