summaryrefslogtreecommitdiff
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c7
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h4
-rw-r--r--fs/xfs/libxfs/xfs_btree.c82
-rw-r--r--fs/xfs/libxfs/xfs_sb.c25
-rw-r--r--fs/xfs/xfs_bmap_util.c53
-rw-r--r--fs/xfs/xfs_bmap_util.h4
-rw-r--r--fs/xfs/xfs_dquot.h15
-rw-r--r--fs/xfs/xfs_file.c23
-rw-r--r--fs/xfs/xfs_fs.h3
-rw-r--r--fs/xfs/xfs_icache.c96
-rw-r--r--fs/xfs/xfs_icache.h3
-rw-r--r--fs/xfs/xfs_iomap.c23
-rw-r--r--fs/xfs/xfs_linux.h11
-rw-r--r--fs/xfs/xfs_log.c9
-rw-r--r--fs/xfs/xfs_log_priv.h2
-rw-r--r--fs/xfs/xfs_mount.c18
-rw-r--r--fs/xfs/xfs_mount.h1
-rw-r--r--fs/xfs/xfs_super.c12
-rw-r--r--fs/xfs/xfs_sysfs.c165
-rw-r--r--fs/xfs/xfs_sysfs.h59
21 files changed, 528 insertions, 88 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 0dfa26d626f5..d61799949580 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -86,6 +86,7 @@ xfs-y += xfs_aops.o \
xfs_mru_cache.o \
xfs_super.o \
xfs_symlink.o \
+ xfs_sysfs.o \
xfs_trans.o \
xfs_xattr.o \
kmem.o \
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 224963b1efc0..de2d26d32844 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4294,8 +4294,8 @@ xfs_bmapi_delay(
}
-int
-__xfs_bmapi_allocate(
+static int
+xfs_bmapi_allocate(
struct xfs_bmalloca *bma)
{
struct xfs_mount *mp = bma->ip->i_mount;
@@ -4574,9 +4574,6 @@ xfs_bmapi_write(
bma.flist = flist;
bma.firstblock = firstblock;
- if (flags & XFS_BMAPI_STACK_SWITCH)
- bma.stack_switch = 1;
-
while (bno < end && n < *nmap) {
inhole = eof || bma.got.br_startoff > bno;
wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 38ba36e9b2f0..b879ca56a64c 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -77,7 +77,6 @@ typedef struct xfs_bmap_free
* from written to unwritten, otherwise convert from unwritten to written.
*/
#define XFS_BMAPI_CONVERT 0x040
-#define XFS_BMAPI_STACK_SWITCH 0x080
#define XFS_BMAPI_FLAGS \
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
@@ -86,8 +85,7 @@ typedef struct xfs_bmap_free
{ XFS_BMAPI_PREALLOC, "PREALLOC" }, \
{ XFS_BMAPI_IGSTATE, "IGSTATE" }, \
{ XFS_BMAPI_CONTIG, "CONTIG" }, \
- { XFS_BMAPI_CONVERT, "CONVERT" }, \
- { XFS_BMAPI_STACK_SWITCH, "STACK_SWITCH" }
+ { XFS_BMAPI_CONVERT, "CONVERT" }
static inline int xfs_bmapi_aflag(int w)
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 2435928b0765..8fe6a93ff473 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -33,6 +33,7 @@
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_cksum.h"
+#include "xfs_alloc.h"
/*
* Cursor allocation zone.
@@ -2323,7 +2324,7 @@ error1:
* record (to be inserted into parent).
*/
STATIC int /* error */
-xfs_btree_split(
+__xfs_btree_split(
struct xfs_btree_cur *cur,
int level,
union xfs_btree_ptr *ptrp,
@@ -2503,6 +2504,85 @@ error0:
return error;
}
+struct xfs_btree_split_args {
+ struct xfs_btree_cur *cur;
+ int level;
+ union xfs_btree_ptr *ptrp;
+ union xfs_btree_key *key;
+ struct xfs_btree_cur **curp;
+ int *stat; /* success/failure */
+ int result;
+ bool kswapd; /* allocation in kswapd context */
+ struct completion *done;
+ struct work_struct work;
+};
+
+/*
+ * Stack switching interfaces for allocation
+ */
+static void
+xfs_btree_split_worker(
+ struct work_struct *work)
+{
+ struct xfs_btree_split_args *args = container_of(work,
+ struct xfs_btree_split_args, work);
+ unsigned long pflags;
+ unsigned long new_pflags = PF_FSTRANS;
+
+ /*
+ * we are in a transaction context here, but may also be doing work
+ * in kswapd context, and hence we may need to inherit that state
+ * temporarily to ensure that we don't block waiting for memory reclaim
+ * in any way.
+ */
+ if (args->kswapd)
+ new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
+
+ current_set_flags_nested(&pflags, new_pflags);
+
+ args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
+ args->key, args->curp, args->stat);
+ complete(args->done);
+
+ current_restore_flags_nested(&pflags, new_pflags);
+}
+
+/*
+ * BMBT split requests often come in with little stack to work on. Push
+ * them off to a worker thread so there is lots of stack to use. For the other
+ * btree types, just call directly to avoid the context switch overhead here.
+ */
+STATIC int /* error */
+xfs_btree_split(
+ struct xfs_btree_cur *cur,
+ int level,
+ union xfs_btree_ptr *ptrp,
+ union xfs_btree_key *key,
+ struct xfs_btree_cur **curp,
+ int *stat) /* success/failure */
+{
+ struct xfs_btree_split_args args;
+ DECLARE_COMPLETION_ONSTACK(done);
+
+ if (cur->bc_btnum != XFS_BTNUM_BMAP)
+ return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
+
+ args.cur = cur;
+ args.level = level;
+ args.ptrp = ptrp;
+ args.key = key;
+ args.curp = curp;
+ args.stat = stat;
+ args.done = &done;
+ args.kswapd = current_is_kswapd();
+ INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker);
+ queue_work(xfs_alloc_wq, &args.work);
+ wait_for_completion(&done);
+ destroy_work_on_stack(&args.work);
+ return args.result;
+}
+
+
/*
* Copy the old inode root contents into a real block and make the
* broot point to it.
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index f5ca0286a0af..6e93b5ef0a6b 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -483,10 +483,16 @@ xfs_sb_quota_to_disk(
}
/*
- * GQUOTINO and PQUOTINO cannot be used together in versions
- * of superblock that do not have pquotino. from->sb_flags
- * tells us which quota is active and should be copied to
- * disk.
+ * GQUOTINO and PQUOTINO cannot be used together in versions of
+ * superblock that do not have pquotino. from->sb_flags tells us which
+ * quota is active and should be copied to disk. If neither are active,
+ * make sure we write NULLFSINO to the sb_gquotino field as a quota
+ * inode value of "0" is invalid when the XFS_SB_VERSION_QUOTA feature
+ * bit is set.
+ *
+ * Note that we don't need to handle the sb_uquotino or sb_pquotino here
+ * as they do not require any translation. Hence the main sb field loop
+ * will write them appropriately from the in-core superblock.
*/
if ((*fields & XFS_SB_GQUOTINO) &&
(from->sb_qflags & XFS_GQUOTA_ACCT))
@@ -494,6 +500,17 @@ xfs_sb_quota_to_disk(
else if ((*fields & XFS_SB_PQUOTINO) &&
(from->sb_qflags & XFS_PQUOTA_ACCT))
to->sb_gquotino = cpu_to_be64(from->sb_pquotino);
+ else {
+ /*
+ * We can't rely on just the fields being logged to tell us
+ * that it is safe to write NULLFSINO - we should only do that
+ * if quotas are not actually enabled. Hence only write
+ * NULLFSINO if both in-core quota inodes are NULL.
+ */
+ if (from->sb_gquotino == NULLFSINO &&
+ from->sb_pquotino == NULLFSINO)
+ to->sb_gquotino = cpu_to_be64(NULLFSINO);
+ }
*fields &= ~(XFS_SB_PQUOTINO | XFS_SB_GQUOTINO);
}
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 48fd7ea65b1e..d32889af1777 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -249,59 +249,6 @@ xfs_bmap_rtalloc(
}
/*
- * Stack switching interfaces for allocation
- */
-static void
-xfs_bmapi_allocate_worker(
- struct work_struct *work)
-{
- struct xfs_bmalloca *args = container_of(work,
- struct xfs_bmalloca, work);
- unsigned long pflags;
- unsigned long new_pflags = PF_FSTRANS;
-
- /*
- * we are in a transaction context here, but may also be doing work
- * in kswapd context, and hence we may need to inherit that state
- * temporarily to ensure that we don't block waiting for memory reclaim
- * in any way.
- */
- if (args->kswapd)
- new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
-
- current_set_flags_nested(&pflags, new_pflags);
-
- args->result = __xfs_bmapi_allocate(args);
- complete(args->done);
-
- current_restore_flags_nested(&pflags, new_pflags);
-}
-
-/*
- * Some allocation requests often come in with little stack to work on. Push
- * them off to a worker thread so there is lots of stack to use. Otherwise just
- * call directly to avoid the context switch overhead here.
- */
-int
-xfs_bmapi_allocate(
- struct xfs_bmalloca *args)
-{
- DECLARE_COMPLETION_ONSTACK(done);
-
- if (!args->stack_switch)
- return __xfs_bmapi_allocate(args);
-
-
- args->done = &done;
- args->kswapd = current_is_kswapd();
- INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
- queue_work(xfs_alloc_wq, &args->work);
- wait_for_completion(&done);
- destroy_work_on_stack(&args->work);
- return args->result;
-}
-
-/*
* Check if the endoff is outside the last extent. If so the caller will grow
* the allocation to a stripe unit boundary. All offsets are considered outside
* the end of file for an empty fork, so 1 is returned in *eof in that case.
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 075f72232a64..2fdb72d2c908 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -55,8 +55,6 @@ struct xfs_bmalloca {
bool userdata;/* set if is user data */
bool aeof; /* allocated space at eof */
bool conv; /* overwriting unwritten extents */
- bool stack_switch;
- bool kswapd; /* allocation in kswapd context */
int flags;
struct completion *done;
struct work_struct work;
@@ -66,8 +64,6 @@ struct xfs_bmalloca {
int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
int *committed);
int xfs_bmap_rtalloc(struct xfs_bmalloca *ap);
-int xfs_bmapi_allocate(struct xfs_bmalloca *args);
-int __xfs_bmapi_allocate(struct xfs_bmalloca *args);
int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
int whichfork, int *eof);
int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 68a68f704837..c24c67e22a2a 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -139,6 +139,21 @@ static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type)
}
}
+/*
+ * Check whether a dquot is under low free space conditions. We assume the quota
+ * is enabled and enforced.
+ */
+static inline bool xfs_dquot_lowsp(struct xfs_dquot *dqp)
+{
+ int64_t freesp;
+
+ freesp = be64_to_cpu(dqp->q_core.d_blk_hardlimit) - dqp->q_res_bcount;
+ if (freesp < dqp->q_low_space[XFS_QLOWSP_1_PCNT])
+ return true;
+
+ return false;
+}
+
#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock)))
#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY)
#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 181605da08e4..fcf91a22f5d8 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -38,6 +38,7 @@
#include "xfs_trace.h"
#include "xfs_log.h"
#include "xfs_dinode.h"
+#include "xfs_icache.h"
#include <linux/aio.h>
#include <linux/dcache.h>
@@ -689,14 +690,28 @@ write_retry:
ret = generic_perform_write(file, from, pos);
if (likely(ret >= 0))
iocb->ki_pos = pos + ret;
+
/*
- * If we just got an ENOSPC, try to write back all dirty inodes to
- * convert delalloc space to free up some of the excess reserved
- * metadata space.
+ * If we hit a space limit, try to free up some lingering preallocated
+ * space before returning an error. In the case of ENOSPC, first try to
+ * write back all dirty inodes to free up some of the excess reserved
+ * metadata space. This reduces the chances that the eofblocks scan
+ * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this
+ * also behaves as a filter to prevent too many eofblocks scans from
+ * running at the same time.
*/
- if (ret == -ENOSPC && !enospc) {
+ if (ret == -EDQUOT && !enospc) {
+ enospc = xfs_inode_free_quota_eofblocks(ip);
+ if (enospc)
+ goto write_retry;
+ } else if (ret == -ENOSPC && !enospc) {
+ struct xfs_eofblocks eofb = {0};
+
enospc = 1;
xfs_flush_inodes(ip->i_mount);
+ eofb.eof_scan_owner = ip->i_ino; /* for locking */
+ eofb.eof_flags = XFS_EOF_FLAGS_SYNC;
+ xfs_icache_free_eofblocks(ip->i_mount, &eofb);
goto write_retry;
}
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index dffafc6bf7b0..18dc721ca19f 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -375,6 +375,9 @@ struct xfs_fs_eofblocks {
#define XFS_EOF_FLAGS_GID (1 << 2) /* filter by gid */
#define XFS_EOF_FLAGS_PRID (1 << 3) /* filter by project id */
#define XFS_EOF_FLAGS_MINFILESIZE (1 << 4) /* filter by min file size */
+#define XFS_EOF_FLAGS_UNION (1 << 5) /* union filter algorithm;
+ * kernel only, not included in
+ * valid mask */
#define XFS_EOF_FLAGS_VALID \
(XFS_EOF_FLAGS_SYNC | \
XFS_EOF_FLAGS_UID | \
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 08ba4c6e1359..981b2cf51985 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -33,6 +33,9 @@
#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_bmap_util.h"
+#include "xfs_quota.h"
+#include "xfs_dquot_item.h"
+#include "xfs_dquot.h"
#include <linux/kthread.h>
#include <linux/freezer.h>
@@ -1203,6 +1206,30 @@ xfs_inode_match_id(
return 1;
}
+/*
+ * A union-based inode filtering algorithm. Process the inode if any of the
+ * criteria match. This is for global/internal scans only.
+ */
+STATIC int
+xfs_inode_match_id_union(
+ struct xfs_inode *ip,
+ struct xfs_eofblocks *eofb)
+{
+ if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) &&
+ uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid))
+ return 1;
+
+ if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) &&
+ gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid))
+ return 1;
+
+ if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) &&
+ xfs_get_projid(ip) == eofb->eof_prid)
+ return 1;
+
+ return 0;
+}
+
STATIC int
xfs_inode_free_eofblocks(
struct xfs_inode *ip,
@@ -1211,6 +1238,10 @@ xfs_inode_free_eofblocks(
{
int ret;
struct xfs_eofblocks *eofb = args;
+ bool need_iolock = true;
+ int match;
+
+ ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0));
if (!xfs_can_free_eofblocks(ip, false)) {
/* inode could be preallocated or append-only */
@@ -1228,16 +1259,28 @@ xfs_inode_free_eofblocks(
return 0;
if (eofb) {
- if (!xfs_inode_match_id(ip, eofb))
+ if (eofb->eof_flags & XFS_EOF_FLAGS_UNION)
+ match = xfs_inode_match_id_union(ip, eofb);
+ else
+ match = xfs_inode_match_id(ip, eofb);
+ if (!match)
return 0;
/* skip the inode if the file size is too small */
if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
XFS_ISIZE(ip) < eofb->eof_min_file_size)
return 0;
+
+ /*
+ * A scan owner implies we already hold the iolock. Skip it in
+ * xfs_free_eofblocks() to avoid deadlock. This also eliminates
+ * the possibility of EAGAIN being returned.
+ */
+ if (eofb->eof_scan_owner == ip->i_ino)
+ need_iolock = false;
}
- ret = xfs_free_eofblocks(ip->i_mount, ip, true);
+ ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock);
/* don't revisit the inode if we're not waiting */
if (ret == -EAGAIN && !(flags & SYNC_WAIT))
@@ -1260,6 +1303,55 @@ xfs_icache_free_eofblocks(
eofb, XFS_ICI_EOFBLOCKS_TAG);
}
+/*
+ * Run eofblocks scans on the quotas applicable to the inode. For inodes with
+ * multiple quotas, we don't know exactly which quota caused an allocation
+ * failure. We make a best effort by including each quota under low free space
+ * conditions (less than 1% free space) in the scan.
+ */
+int
+xfs_inode_free_quota_eofblocks(
+ struct xfs_inode *ip)
+{
+ int scan = 0;
+ struct xfs_eofblocks eofb = {0};
+ struct xfs_dquot *dq;
+
+ ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+
+ /*
+ * Set the scan owner to avoid a potential livelock. Otherwise, the scan
+ * can repeatedly trylock on the inode we're currently processing. We
+ * run a sync scan to increase effectiveness and use the union filter to
+ * cover all applicable quotas in a single scan.
+ */
+ eofb.eof_scan_owner = ip->i_ino;
+ eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC;
+
+ if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) {
+ dq = xfs_inode_dquot(ip, XFS_DQ_USER);
+ if (dq && xfs_dquot_lowsp(dq)) {
+ eofb.eof_uid = VFS_I(ip)->i_uid;
+ eofb.eof_flags |= XFS_EOF_FLAGS_UID;
+ scan = 1;
+ }
+ }
+
+ if (XFS_IS_GQUOTA_ENFORCED(ip->i_mount)) {
+ dq = xfs_inode_dquot(ip, XFS_DQ_GROUP);
+ if (dq && xfs_dquot_lowsp(dq)) {
+ eofb.eof_gid = VFS_I(ip)->i_gid;
+ eofb.eof_flags |= XFS_EOF_FLAGS_GID;
+ scan = 1;
+ }
+ }
+
+ if (scan)
+ xfs_icache_free_eofblocks(ip->i_mount, &eofb);
+
+ return scan;
+}
+
void
xfs_inode_set_eofblocks_tag(
xfs_inode_t *ip)
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 6250430d609c..46748b86b12f 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -27,6 +27,7 @@ struct xfs_eofblocks {
kgid_t eof_gid;
prid_t eof_prid;
__u64 eof_min_file_size;
+ xfs_ino_t eof_scan_owner;
};
#define SYNC_WAIT 0x0001 /* wait for i/o to complete */
@@ -57,6 +58,7 @@ void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip);
void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *);
+int xfs_inode_free_quota_eofblocks(struct xfs_inode *ip);
void xfs_eofblocks_worker(struct work_struct *);
int xfs_inode_ag_iterator(struct xfs_mount *mp,
@@ -84,6 +86,7 @@ xfs_fs_eofblocks_from_user(
dst->eof_flags = src->eof_flags;
dst->eof_prid = src->eof_prid;
dst->eof_min_file_size = src->eof_min_file_size;
+ dst->eof_scan_owner = NULLFSINO;
dst->eof_uid = INVALID_UID;
if (src->eof_flags & XFS_EOF_FLAGS_UID) {
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 923c044bd26f..e9c47b6f5e5a 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -397,7 +397,8 @@ xfs_quota_calc_throttle(
struct xfs_inode *ip,
int type,
xfs_fsblock_t *qblocks,
- int *qshift)
+ int *qshift,
+ int64_t *qfreesp)
{
int64_t freesp;
int shift = 0;
@@ -406,6 +407,7 @@ xfs_quota_calc_throttle(
/* over hi wmark, squash the prealloc completely */
if (dq->q_res_bcount >= dq->q_prealloc_hi_wmark) {
*qblocks = 0;
+ *qfreesp = 0;
return;
}
@@ -418,6 +420,9 @@ xfs_quota_calc_throttle(
shift += 2;
}
+ if (freesp < *qfreesp)
+ *qfreesp = freesp;
+
/* only overwrite the throttle values if we are more aggressive */
if ((freesp >> shift) < (*qblocks >> *qshift)) {
*qblocks = freesp;
@@ -476,15 +481,18 @@ xfs_iomap_prealloc_size(
}
/*
- * Check each quota to cap the prealloc size and provide a shift
- * value to throttle with.
+ * Check each quota to cap the prealloc size, provide a shift value to
+ * throttle with and adjust amount of available space.
*/
if (xfs_quota_need_throttle(ip, XFS_DQ_USER, alloc_blocks))
- xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift);
+ xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift,
+ &freesp);
if (xfs_quota_need_throttle(ip, XFS_DQ_GROUP, alloc_blocks))
- xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift);
+ xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift,
+ &freesp);
if (xfs_quota_need_throttle(ip, XFS_DQ_PROJ, alloc_blocks))
- xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift);
+ xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift,
+ &freesp);
/*
* The final prealloc size is set to the minimum of free space available
@@ -749,8 +757,7 @@ xfs_iomap_write_allocate(
* pointer that the caller gave to us.
*/
error = xfs_bmapi_write(tp, ip, map_start_fsb,
- count_fsb,
- XFS_BMAPI_STACK_SWITCH,
+ count_fsb, 0,
&first_block, 1,
imap, &nimaps, &free_list);
if (error)
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 69aae8affd41..d3ef6de475bb 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -179,6 +179,17 @@ typedef __uint64_t __psunsigned_t;
#define MAX(a,b) (max(a,b))
#define howmany(x, y) (((x)+((y)-1))/(y))
+/*
+ * XFS wrapper structure for sysfs support. It depends on external data
+ * structures and is embedded in various internal data structures to implement
+ * the XFS sysfs object heirarchy. Define it here for broad access throughout
+ * the codebase.
+ */
+struct xfs_kobj {
+ struct kobject kobject;
+ struct completion complete;
+};
+
/* Kernel uid/gid conversion. These are used to convert to/from the on disk
* uid_t/gid_t types to the kuid_t/kgid_t types that the kernel uses internally.
* The conversion here is type only, the value will remain the same since we
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 7647818b8c8a..149a4a575a09 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -34,6 +34,7 @@
#include "xfs_trace.h"
#include "xfs_fsops.h"
#include "xfs_cksum.h"
+#include "xfs_sysfs.h"
kmem_zone_t *xfs_log_ticket_zone;
@@ -707,6 +708,11 @@ xfs_log_mount(
}
}
+ error = xfs_sysfs_init(&mp->m_log->l_kobj, &xfs_log_ktype, &mp->m_kobj,
+ "log");
+ if (error)
+ goto out_destroy_ail;
+
/* Normal transactions can now occur */
mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY;
@@ -947,6 +953,9 @@ xfs_log_unmount(
xfs_log_quiesce(mp);
xfs_trans_ail_destroy(mp);
+
+ xfs_sysfs_del(&mp->m_log->l_kobj);
+
xlog_dealloc_log(mp->m_log);
}
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 9bc403a9e54f..db7cbdeb2b42 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -405,6 +405,8 @@ struct xlog {
struct xlog_grant_head l_reserve_head;
struct xlog_grant_head l_write_head;
+ struct xfs_kobj l_kobj;
+
/* The following field are used for debugging; need to hold icloglock */
#ifdef DEBUG
char *l_iclog_bak[XLOG_MAX_ICLOGS];
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index e0e232efe9af..5b639df0413e 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -42,6 +42,7 @@
#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_dinode.h"
+#include "xfs_sysfs.h"
#ifdef HAVE_PERCPU_SB
@@ -60,6 +61,8 @@ static DEFINE_MUTEX(xfs_uuid_table_mutex);
static int xfs_uuid_table_size;
static uuid_t *xfs_uuid_table;
+extern struct kset *xfs_kset;
+
/*
* See if the UUID is unique among mounted XFS filesystems.
* Mount fails if UUID is nil or a FS with the same UUID is already mounted.
@@ -727,10 +730,15 @@ xfs_mountfs(
xfs_set_maxicount(mp);
- error = xfs_uuid_mount(mp);
+ mp->m_kobj.kobject.kset = xfs_kset;
+ error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname);
if (error)
goto out;
+ error = xfs_uuid_mount(mp);
+ if (error)
+ goto out_remove_sysfs;
+
/*
* Set the minimum read and write sizes
*/
@@ -851,7 +859,7 @@ xfs_mountfs(
!mp->m_sb.sb_inprogress) {
error = xfs_initialize_perag_data(mp, sbp->sb_agcount);
if (error)
- goto out_fail_wait;
+ goto out_log_dealloc;
}
/*
@@ -923,7 +931,7 @@ xfs_mountfs(
xfs_notice(mp, "resetting quota flags");
error = xfs_mount_reset_sbqflags(mp);
if (error)
- return error;
+ goto out_rtunmount;
}
}
@@ -985,6 +993,8 @@ xfs_mountfs(
xfs_da_unmount(mp);
out_remove_uuid:
xfs_uuid_unmount(mp);
+ out_remove_sysfs:
+ xfs_sysfs_del(&mp->m_kobj);
out:
return error;
}
@@ -1067,6 +1077,8 @@ xfs_unmountfs(
xfs_errortag_clearall(mp, 0);
#endif
xfs_free_perag(mp);
+
+ xfs_sysfs_del(&mp->m_kobj);
}
int
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 7295a0b7c343..b0447c86e7e2 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -166,6 +166,7 @@ typedef struct xfs_mount {
on the next remount,rw */
int64_t m_low_space[XFS_LOWSP_MAX];
/* low free space thresholds */
+ struct xfs_kobj m_kobj;
struct workqueue_struct *m_data_workqueue;
struct workqueue_struct *m_unwritten_workqueue;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 3fbedbb88083..b194652033cd 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -61,6 +61,7 @@
static const struct super_operations xfs_super_operations;
static kmem_zone_t *xfs_ioend_zone;
mempool_t *xfs_ioend_pool;
+struct kset *xfs_kset;
#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */
#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */
@@ -1761,9 +1762,15 @@ init_xfs_fs(void)
if (error)
goto out_cleanup_procfs;
+ xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj);
+ if (!xfs_kset) {
+ error = -ENOMEM;
+ goto out_sysctl_unregister;;
+ }
+
error = xfs_qm_init();
if (error)
- goto out_sysctl_unregister;
+ goto out_kset_unregister;
error = register_filesystem(&xfs_fs_type);
if (error)
@@ -1772,6 +1779,8 @@ init_xfs_fs(void)
out_qm_exit:
xfs_qm_exit();
+ out_kset_unregister:
+ kset_unregister(xfs_kset);
out_sysctl_unregister:
xfs_sysctl_unregister();
out_cleanup_procfs:
@@ -1793,6 +1802,7 @@ exit_xfs_fs(void)
{
xfs_qm_exit();
unregister_filesystem(&xfs_fs_type);
+ kset_unregister(xfs_kset);
xfs_sysctl_unregister();
xfs_cleanup_procfs();
xfs_buf_terminate();
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
new file mode 100644
index 000000000000..9835139ce1ec
--- /dev/null
+++ b/fs/xfs/xfs_sysfs.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2014 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "xfs.h"
+#include "xfs_sysfs.h"
+#include "xfs_log_format.h"
+#include "xfs_log.h"
+#include "xfs_log_priv.h"
+
+struct xfs_sysfs_attr {
+ struct attribute attr;
+ ssize_t (*show)(char *buf, void *data);
+ ssize_t (*store)(const char *buf, size_t count, void *data);
+};
+
+static inline struct xfs_sysfs_attr *
+to_attr(struct attribute *attr)
+{
+ return container_of(attr, struct xfs_sysfs_attr, attr);
+}
+
+#define XFS_SYSFS_ATTR_RW(name) \
+ static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RW(name)
+#define XFS_SYSFS_ATTR_RO(name) \
+ static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RO(name)
+
+#define ATTR_LIST(name) &xfs_sysfs_attr_##name.attr
+
+/*
+ * xfs_mount kobject. This currently has no attributes and thus no need for show
+ * and store helpers. The mp kobject serves as the per-mount parent object that
+ * is identified by the fsname under sysfs.
+ */
+
+struct kobj_type xfs_mp_ktype = {
+ .release = xfs_sysfs_release,
+};
+
+/* xlog */
+
+STATIC ssize_t
+log_head_lsn_show(
+ char *buf,
+ void *data)
+{
+ struct xlog *log = data;
+ int cycle;
+ int block;
+
+ spin_lock(&log->l_icloglock);
+ cycle = log->l_curr_cycle;
+ block = log->l_curr_block;
+ spin_unlock(&log->l_icloglock);
+
+ return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block);
+}
+XFS_SYSFS_ATTR_RO(log_head_lsn);
+
+STATIC ssize_t
+log_tail_lsn_show(
+ char *buf,
+ void *data)
+{
+ struct xlog *log = data;
+ int cycle;
+ int block;
+
+ xlog_crack_atomic_lsn(&log->l_tail_lsn, &cycle, &block);
+ return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block);
+}
+XFS_SYSFS_ATTR_RO(log_tail_lsn);
+
+STATIC ssize_t
+reserve_grant_head_show(
+ char *buf,
+ void *data)
+{
+ struct xlog *log = data;
+ int cycle;
+ int bytes;
+
+ xlog_crack_grant_head(&log->l_reserve_head.grant, &cycle, &bytes);
+ return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes);
+}
+XFS_SYSFS_ATTR_RO(reserve_grant_head);
+
+STATIC ssize_t
+write_grant_head_show(
+ char *buf,
+ void *data)
+{
+ struct xlog *log = data;
+ int cycle;
+ int bytes;
+
+ xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &bytes);
+ return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes);
+}
+XFS_SYSFS_ATTR_RO(write_grant_head);
+
+static struct attribute *xfs_log_attrs[] = {
+ ATTR_LIST(log_head_lsn),
+ ATTR_LIST(log_tail_lsn),
+ ATTR_LIST(reserve_grant_head),
+ ATTR_LIST(write_grant_head),
+ NULL,
+};
+
+static inline struct xlog *
+to_xlog(struct kobject *kobject)
+{
+ struct xfs_kobj *kobj = to_kobj(kobject);
+ return container_of(kobj, struct xlog, l_kobj);
+}
+
+STATIC ssize_t
+xfs_log_show(
+ struct kobject *kobject,
+ struct attribute *attr,
+ char *buf)
+{
+ struct xlog *log = to_xlog(kobject);
+ struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
+
+ return xfs_attr->show ? xfs_attr->show(buf, log) : 0;
+}
+
+STATIC ssize_t
+xfs_log_store(
+ struct kobject *kobject,
+ struct attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct xlog *log = to_xlog(kobject);
+ struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
+
+ return xfs_attr->store ? xfs_attr->store(buf, count, log) : 0;
+}
+
+static struct sysfs_ops xfs_log_ops = {
+ .show = xfs_log_show,
+ .store = xfs_log_store,
+};
+
+struct kobj_type xfs_log_ktype = {
+ .release = xfs_sysfs_release,
+ .sysfs_ops = &xfs_log_ops,
+ .default_attrs = xfs_log_attrs,
+};
diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h
new file mode 100644
index 000000000000..54a2091183c0
--- /dev/null
+++ b/fs/xfs/xfs_sysfs.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2014 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef __XFS_SYSFS_H__
+#define __XFS_SYSFS_H__
+
+extern struct kobj_type xfs_mp_ktype; /* xfs_mount */
+extern struct kobj_type xfs_log_ktype; /* xlog */
+
+static inline struct xfs_kobj *
+to_kobj(struct kobject *kobject)
+{
+ return container_of(kobject, struct xfs_kobj, kobject);
+}
+
+static inline void
+xfs_sysfs_release(struct kobject *kobject)
+{
+ struct xfs_kobj *kobj = to_kobj(kobject);
+ complete(&kobj->complete);
+}
+
+static inline int
+xfs_sysfs_init(
+ struct xfs_kobj *kobj,
+ struct kobj_type *ktype,
+ struct xfs_kobj *parent_kobj,
+ const char *name)
+{
+ init_completion(&kobj->complete);
+ return kobject_init_and_add(&kobj->kobject, ktype,
+ &parent_kobj->kobject, "%s", name);
+}
+
+static inline void
+xfs_sysfs_del(
+ struct xfs_kobj *kobj)
+{
+ kobject_del(&kobj->kobject);
+ kobject_put(&kobj->kobject);
+ wait_for_completion(&kobj->complete);
+}
+
+#endif /* __XFS_SYSFS_H__ */