diff options
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/xfs_acl.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap.c | 193 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap.h | 15 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.c | 97 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_buf_item.c | 19 | ||||
-rw-r--r-- | fs/xfs/xfs_dir2.c | 342 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 26 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.c | 10 | ||||
-rw-r--r-- | fs/xfs/xfs_iops.c | 14 | ||||
-rw-r--r-- | fs/xfs/xfs_log.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_log_cil.c | 55 | ||||
-rw-r--r-- | fs/xfs/xfs_rtalloc.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_sb.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_trans.c | 12 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_resv.c | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_resv.h | 1 |
20 files changed, 609 insertions, 196 deletions
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 0ecec1896f25..6888ad886ff6 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -281,7 +281,7 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) if (!acl) goto set_acl; - error = -EINVAL; + error = -E2BIG; if (acl->a_count > XFS_ACL_MAX_ENTRIES(XFS_M(inode->i_sb))) return error; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index db2cfb067d0b..ef62c6b6130a 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1441,7 +1441,8 @@ xfs_vm_direct_IO( ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, nr_segs, xfs_get_blocks_direct, - xfs_end_io_direct_write, NULL, 0); + xfs_end_io_direct_write, NULL, + DIO_ASYNC_EXTEND); if (ret != -EIOCBQUEUED && iocb->private) goto out_destroy_ioend; } else { diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 152543c4ca70..5b6092ef51ef 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -5378,3 +5378,196 @@ error0: } return error; } + +/* + * Shift extent records to the left to cover a hole. + * + * The maximum number of extents to be shifted in a single operation + * is @num_exts, and @current_ext keeps track of the current extent + * index we have shifted. @offset_shift_fsb is the length by which each + * extent is shifted. If there is no hole to shift the extents + * into, this will be considered invalid operation and we abort immediately. + */ +int +xfs_bmap_shift_extents( + struct xfs_trans *tp, + struct xfs_inode *ip, + int *done, + xfs_fileoff_t start_fsb, + xfs_fileoff_t offset_shift_fsb, + xfs_extnum_t *current_ext, + xfs_fsblock_t *firstblock, + struct xfs_bmap_free *flist, + int num_exts) +{ + struct xfs_btree_cur *cur; + struct xfs_bmbt_rec_host *gotp; + struct xfs_bmbt_irec got; + struct xfs_bmbt_irec left; + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp; + xfs_extnum_t nexts = 0; + xfs_fileoff_t startoff; + int error = 0; + int i; + int whichfork = XFS_DATA_FORK; + int logflags; + xfs_filblks_t blockcount = 0; + + if (unlikely(XFS_TEST_ERROR( + (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), + mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { + XFS_ERROR_REPORT("xfs_bmap_shift_extents", + XFS_ERRLEVEL_LOW, mp); + return XFS_ERROR(EFSCORRUPTED); + } + + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + + ASSERT(current_ext != NULL); + + ifp = XFS_IFORK_PTR(ip, whichfork); + + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + /* Read in all the extents */ + error = xfs_iread_extents(tp, ip, whichfork); + if (error) + return error; + } + + /* + * If *current_ext is 0, we would need to lookup the extent + * from where we would start shifting and store it in gotp. + */ + if (!*current_ext) { + gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext); + /* + * gotp can be null in 2 cases: 1) if there are no extents + * or 2) start_fsb lies in a hole beyond which there are + * no extents. Either way, we are done. + */ + if (!gotp) { + *done = 1; + return 0; + } + } + + /* We are going to change core inode */ + logflags = XFS_ILOG_CORE; + + if (ifp->if_flags & XFS_IFBROOT) { + cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); + cur->bc_private.b.firstblock = *firstblock; + cur->bc_private.b.flist = flist; + cur->bc_private.b.flags = 0; + } else { + cur = NULL; + logflags |= XFS_ILOG_DEXT; + } + + while (nexts++ < num_exts && + *current_ext < XFS_IFORK_NEXTENTS(ip, whichfork)) { + + gotp = xfs_iext_get_ext(ifp, *current_ext); + xfs_bmbt_get_all(gotp, &got); + startoff = got.br_startoff - offset_shift_fsb; + + /* + * Before shifting extent into hole, make sure that the hole + * is large enough to accomodate the shift. + */ + if (*current_ext) { + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, + *current_ext - 1), &left); + + if (startoff < left.br_startoff + left.br_blockcount) + error = XFS_ERROR(EINVAL); + } else if (offset_shift_fsb > got.br_startoff) { + /* + * When first extent is shifted, offset_shift_fsb + * should be less than the stating offset of + * the first extent. + */ + error = XFS_ERROR(EINVAL); + } + + if (error) + goto del_cursor; + + if (cur) { + error = xfs_bmbt_lookup_eq(cur, got.br_startoff, + got.br_startblock, + got.br_blockcount, + &i); + if (error) + goto del_cursor; + XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); + } + + /* Check if we can merge 2 adjacent extents */ + if (*current_ext && + left.br_startoff + left.br_blockcount == startoff && + left.br_startblock + left.br_blockcount == + got.br_startblock && + left.br_state == got.br_state && + left.br_blockcount + got.br_blockcount <= MAXEXTLEN) { + blockcount = left.br_blockcount + + got.br_blockcount; + xfs_iext_remove(ip, *current_ext, 1, 0); + if (cur) { + error = xfs_btree_delete(cur, &i); + if (error) + goto del_cursor; + XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); + } + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) - 1); + gotp = xfs_iext_get_ext(ifp, --*current_ext); + xfs_bmbt_get_all(gotp, &got); + + /* Make cursor point to the extent we will update */ + if (cur) { + error = xfs_bmbt_lookup_eq(cur, got.br_startoff, + got.br_startblock, + got.br_blockcount, + &i); + if (error) + goto del_cursor; + XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); + } + + xfs_bmbt_set_blockcount(gotp, blockcount); + got.br_blockcount = blockcount; + } else { + /* We have to update the startoff */ + xfs_bmbt_set_startoff(gotp, startoff); + got.br_startoff = startoff; + } + + if (cur) { + error = xfs_bmbt_update(cur, got.br_startoff, + got.br_startblock, + got.br_blockcount, + got.br_state); + if (error) + goto del_cursor; + } + + (*current_ext)++; + } + + /* Check if we are done */ + if (*current_ext == XFS_IFORK_NEXTENTS(ip, whichfork)) + *done = 1; + +del_cursor: + if (cur) + xfs_btree_del_cursor(cur, + error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + + xfs_trans_log_inode(tp, ip, logflags); + + return error; +} diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 33b41f351225..f84bd7af43be 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h @@ -127,6 +127,16 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp) { BMAP_RIGHT_FILLING, "RF" }, \ { BMAP_ATTRFORK, "ATTR" } + +/* + * This macro is used to determine how many extents will be shifted + * in one write transaction. We could require two splits, + * an extent move on the first and an extent merge on the second, + * So it is proper that one extent is shifted inside write transaction + * at a time. + */ +#define XFS_BMAP_MAX_SHIFT_EXTENTS 1 + #ifdef DEBUG void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, int whichfork, unsigned long caller_ip); @@ -169,5 +179,10 @@ int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, xfs_extnum_t num); uint xfs_default_attroffset(struct xfs_inode *ip); +int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, + int *done, xfs_fileoff_t start_fsb, + xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext, + xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist, + int num_exts); #endif /* __XFS_BMAP_H__ */ diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index f264616080ca..01f6a646caa1 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1349,7 +1349,6 @@ xfs_free_file_space( * the freeing of the space succeeds at ENOSPC. */ tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); - tp->t_flags |= XFS_TRANS_RESERVE; error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0); /* @@ -1468,6 +1467,102 @@ out: } /* + * xfs_collapse_file_space() + * This routine frees disk space and shift extent for the given file. + * The first thing we do is to free data blocks in the specified range + * by calling xfs_free_file_space(). It would also sync dirty data + * and invalidate page cache over the region on which collapse range + * is working. And Shift extent records to the left to cover a hole. + * RETURNS: + * 0 on success + * errno on error + * + */ +int +xfs_collapse_file_space( + struct xfs_inode *ip, + xfs_off_t offset, + xfs_off_t len) +{ + int done = 0; + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int error; + xfs_extnum_t current_ext = 0; + struct xfs_bmap_free free_list; + xfs_fsblock_t first_block; + int committed; + xfs_fileoff_t start_fsb; + xfs_fileoff_t shift_fsb; + + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); + + trace_xfs_collapse_file_space(ip); + + start_fsb = XFS_B_TO_FSB(mp, offset + len); + shift_fsb = XFS_B_TO_FSB(mp, len); + + error = xfs_free_file_space(ip, offset, len); + if (error) + return error; + + while (!error && !done) { + tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); + tp->t_flags |= XFS_TRANS_RESERVE; + /* + * We would need to reserve permanent block for transaction. + * This will come into picture when after shifting extent into + * hole we found that adjacent extents can be merged which + * may lead to freeing of a block during record update. + */ + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, + XFS_DIOSTRAT_SPACE_RES(mp, 0), 0); + if (error) { + ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); + xfs_trans_cancel(tp, 0); + break; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, + ip->i_gdquot, ip->i_pdquot, + XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, + XFS_QMOPT_RES_REGBLKS); + if (error) + goto out; + + xfs_trans_ijoin(tp, ip, 0); + + xfs_bmap_init(&free_list, &first_block); + + /* + * We are using the write transaction in which max 2 bmbt + * updates are allowed + */ + error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb, + shift_fsb, ¤t_ext, + &first_block, &free_list, + XFS_BMAP_MAX_SHIFT_EXTENTS); + if (error) + goto out; + + error = xfs_bmap_finish(&tp, &free_list, &committed); + if (error) + goto out; + + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + } + + return error; + +out: + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; +} + +/* * We need to check that the format of the data fork in the temporary inode is * valid for the target inode before doing the swap. This is not a problem with * attr1 because of the fixed fork offset, but attr2 has a dynamically sized diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 900747b25772..935ed2b24edf 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -99,6 +99,8 @@ int xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t len); int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t len); +int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset, + xfs_off_t len); /* EOF block manipulation functions */ bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 33149113e333..8752821443be 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -796,20 +796,6 @@ xfs_buf_item_init( bip->bli_formats[i].blf_map_size = map_size; } -#ifdef XFS_TRANS_DEBUG - /* - * Allocate the arrays for tracking what needs to be logged - * and what our callers request to be logged. bli_orig - * holds a copy of the original, clean buffer for comparison - * against, and bli_logged keeps a 1 bit flag per byte in - * the buffer to indicate which bytes the callers have asked - * to have logged. - */ - bip->bli_orig = kmem_alloc(BBTOB(bp->b_length), KM_SLEEP); - memcpy(bip->bli_orig, bp->b_addr, BBTOB(bp->b_length)); - bip->bli_logged = kmem_zalloc(BBTOB(bp->b_length) / NBBY, KM_SLEEP); -#endif - /* * Put the buf item into the list of items attached to the * buffer at the front. @@ -957,11 +943,6 @@ STATIC void xfs_buf_item_free( xfs_buf_log_item_t *bip) { -#ifdef XFS_TRANS_DEBUG - kmem_free(bip->bli_orig); - kmem_free(bip->bli_logged); -#endif /* XFS_TRANS_DEBUG */ - xfs_buf_item_free_format(bip); kmem_zone_free(xfs_buf_item_zone, bip); } diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index ce16ef02997a..fda46253966a 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c @@ -180,16 +180,23 @@ xfs_dir_init( xfs_inode_t *dp, xfs_inode_t *pdp) { - xfs_da_args_t args; + struct xfs_da_args *args; int error; - memset((char *)&args, 0, sizeof(args)); - args.dp = dp; - args.trans = tp; ASSERT(S_ISDIR(dp->i_d.di_mode)); - if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino))) + error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino); + if (error) return error; - return xfs_dir2_sf_create(&args, pdp->i_ino); + + args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); + if (!args) + return ENOMEM; + + args->dp = dp; + args->trans = tp; + error = xfs_dir2_sf_create(args, pdp->i_ino); + kmem_free(args); + return error; } /* @@ -205,41 +212,56 @@ xfs_dir_createname( xfs_bmap_free_t *flist, /* bmap's freeblock list */ xfs_extlen_t total) /* bmap's total block count */ { - xfs_da_args_t args; + struct xfs_da_args *args; int rval; int v; /* type-checking value */ ASSERT(S_ISDIR(dp->i_d.di_mode)); - if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) + rval = xfs_dir_ino_validate(tp->t_mountp, inum); + if (rval) return rval; XFS_STATS_INC(xs_dir_create); - memset(&args, 0, sizeof(xfs_da_args_t)); - args.name = name->name; - args.namelen = name->len; - args.filetype = name->type; - args.hashval = dp->i_mount->m_dirnameops->hashname(name); - args.inumber = inum; - args.dp = dp; - args.firstblock = first; - args.flist = flist; - args.total = total; - args.whichfork = XFS_DATA_FORK; - args.trans = tp; - args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; - - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) - rval = xfs_dir2_sf_addname(&args); - else if ((rval = xfs_dir2_isblock(tp, dp, &v))) - return rval; - else if (v) - rval = xfs_dir2_block_addname(&args); - else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) - return rval; - else if (v) - rval = xfs_dir2_leaf_addname(&args); + args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); + if (!args) + return ENOMEM; + + args->name = name->name; + args->namelen = name->len; + args->filetype = name->type; + args->hashval = dp->i_mount->m_dirnameops->hashname(name); + args->inumber = inum; + args->dp = dp; + args->firstblock = first; + args->flist = flist; + args->total = total; + args->whichfork = XFS_DATA_FORK; + args->trans = tp; + args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; + + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + rval = xfs_dir2_sf_addname(args); + goto out_free; + } + + rval = xfs_dir2_isblock(tp, dp, &v); + if (rval) + goto out_free; + if (v) { + rval = xfs_dir2_block_addname(args); + goto out_free; + } + + rval = xfs_dir2_isleaf(tp, dp, &v); + if (rval) + goto out_free; + if (v) + rval = xfs_dir2_leaf_addname(args); else - rval = xfs_dir2_node_addname(&args); + rval = xfs_dir2_node_addname(args); + +out_free: + kmem_free(args); return rval; } @@ -282,46 +304,66 @@ xfs_dir_lookup( xfs_ino_t *inum, /* out: inode number */ struct xfs_name *ci_name) /* out: actual name if CI match */ { - xfs_da_args_t args; + struct xfs_da_args *args; int rval; int v; /* type-checking value */ ASSERT(S_ISDIR(dp->i_d.di_mode)); XFS_STATS_INC(xs_dir_lookup); - memset(&args, 0, sizeof(xfs_da_args_t)); - args.name = name->name; - args.namelen = name->len; - args.filetype = name->type; - args.hashval = dp->i_mount->m_dirnameops->hashname(name); - args.dp = dp; - args.whichfork = XFS_DATA_FORK; - args.trans = tp; - args.op_flags = XFS_DA_OP_OKNOENT; + /* + * We need to use KM_NOFS here so that lockdep will not throw false + * positive deadlock warnings on a non-transactional lookup path. It is + * safe to recurse into inode recalim in that case, but lockdep can't + * easily be taught about it. Hence KM_NOFS avoids having to add more + * lockdep Doing this avoids having to add a bunch of lockdep class + * annotations into the reclaim path for the ilock. + */ + args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); + args->name = name->name; + args->namelen = name->len; + args->filetype = name->type; + args->hashval = dp->i_mount->m_dirnameops->hashname(name); + args->dp = dp; + args->whichfork = XFS_DATA_FORK; + args->trans = tp; + args->op_flags = XFS_DA_OP_OKNOENT; if (ci_name) - args.op_flags |= XFS_DA_OP_CILOOKUP; + args->op_flags |= XFS_DA_OP_CILOOKUP; - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) - rval = xfs_dir2_sf_lookup(&args); - else if ((rval = xfs_dir2_isblock(tp, dp, &v))) - return rval; - else if (v) - rval = xfs_dir2_block_lookup(&args); - else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) - return rval; - else if (v) - rval = xfs_dir2_leaf_lookup(&args); + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + rval = xfs_dir2_sf_lookup(args); + goto out_check_rval; + } + + rval = xfs_dir2_isblock(tp, dp, &v); + if (rval) + goto out_free; + if (v) { + rval = xfs_dir2_block_lookup(args); + goto out_check_rval; + } + + rval = xfs_dir2_isleaf(tp, dp, &v); + if (rval) + goto out_free; + if (v) + rval = xfs_dir2_leaf_lookup(args); else - rval = xfs_dir2_node_lookup(&args); + rval = xfs_dir2_node_lookup(args); + +out_check_rval: if (rval == EEXIST) rval = 0; if (!rval) { - *inum = args.inumber; + *inum = args->inumber; if (ci_name) { - ci_name->name = args.value; - ci_name->len = args.valuelen; + ci_name->name = args->value; + ci_name->len = args->valuelen; } } +out_free: + kmem_free(args); return rval; } @@ -338,38 +380,51 @@ xfs_dir_removename( xfs_bmap_free_t *flist, /* bmap's freeblock list */ xfs_extlen_t total) /* bmap's total block count */ { - xfs_da_args_t args; + struct xfs_da_args *args; int rval; int v; /* type-checking value */ ASSERT(S_ISDIR(dp->i_d.di_mode)); XFS_STATS_INC(xs_dir_remove); - memset(&args, 0, sizeof(xfs_da_args_t)); - args.name = name->name; - args.namelen = name->len; - args.filetype = name->type; - args.hashval = dp->i_mount->m_dirnameops->hashname(name); - args.inumber = ino; - args.dp = dp; - args.firstblock = first; - args.flist = flist; - args.total = total; - args.whichfork = XFS_DATA_FORK; - args.trans = tp; - - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) - rval = xfs_dir2_sf_removename(&args); - else if ((rval = xfs_dir2_isblock(tp, dp, &v))) - return rval; - else if (v) - rval = xfs_dir2_block_removename(&args); - else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) - return rval; - else if (v) - rval = xfs_dir2_leaf_removename(&args); + args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); + if (!args) + return ENOMEM; + + args->name = name->name; + args->namelen = name->len; + args->filetype = name->type; + args->hashval = dp->i_mount->m_dirnameops->hashname(name); + args->inumber = ino; + args->dp = dp; + args->firstblock = first; + args->flist = flist; + args->total = total; + args->whichfork = XFS_DATA_FORK; + args->trans = tp; + + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + rval = xfs_dir2_sf_removename(args); + goto out_free; + } + + rval = xfs_dir2_isblock(tp, dp, &v); + if (rval) + goto out_free; + if (v) { + rval = xfs_dir2_block_removename(args); + goto out_free; + } + + rval = xfs_dir2_isleaf(tp, dp, &v); + if (rval) + goto out_free; + if (v) + rval = xfs_dir2_leaf_removename(args); else - rval = xfs_dir2_node_removename(&args); + rval = xfs_dir2_node_removename(args); +out_free: + kmem_free(args); return rval; } @@ -386,40 +441,54 @@ xfs_dir_replace( xfs_bmap_free_t *flist, /* bmap's freeblock list */ xfs_extlen_t total) /* bmap's total block count */ { - xfs_da_args_t args; + struct xfs_da_args *args; int rval; int v; /* type-checking value */ ASSERT(S_ISDIR(dp->i_d.di_mode)); - if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) + rval = xfs_dir_ino_validate(tp->t_mountp, inum); + if (rval) return rval; - memset(&args, 0, sizeof(xfs_da_args_t)); - args.name = name->name; - args.namelen = name->len; - args.filetype = name->type; - args.hashval = dp->i_mount->m_dirnameops->hashname(name); - args.inumber = inum; - args.dp = dp; - args.firstblock = first; - args.flist = flist; - args.total = total; - args.whichfork = XFS_DATA_FORK; - args.trans = tp; - - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) - rval = xfs_dir2_sf_replace(&args); - else if ((rval = xfs_dir2_isblock(tp, dp, &v))) - return rval; - else if (v) - rval = xfs_dir2_block_replace(&args); - else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) - return rval; - else if (v) - rval = xfs_dir2_leaf_replace(&args); + args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); + if (!args) + return ENOMEM; + + args->name = name->name; + args->namelen = name->len; + args->filetype = name->type; + args->hashval = dp->i_mount->m_dirnameops->hashname(name); + args->inumber = inum; + args->dp = dp; + args->firstblock = first; + args->flist = flist; + args->total = total; + args->whichfork = XFS_DATA_FORK; + args->trans = tp; + + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + rval = xfs_dir2_sf_replace(args); + goto out_free; + } + + rval = xfs_dir2_isblock(tp, dp, &v); + if (rval) + goto out_free; + if (v) { + rval = xfs_dir2_block_replace(args); + goto out_free; + } + + rval = xfs_dir2_isleaf(tp, dp, &v); + if (rval) + goto out_free; + if (v) + rval = xfs_dir2_leaf_replace(args); else - rval = xfs_dir2_node_replace(&args); + rval = xfs_dir2_node_replace(args); +out_free: + kmem_free(args); return rval; } @@ -434,7 +503,7 @@ xfs_dir_canenter( struct xfs_name *name, /* name of entry to add */ uint resblks) { - xfs_da_args_t args; + struct xfs_da_args *args; int rval; int v; /* type-checking value */ @@ -443,29 +512,42 @@ xfs_dir_canenter( ASSERT(S_ISDIR(dp->i_d.di_mode)); - memset(&args, 0, sizeof(xfs_da_args_t)); - args.name = name->name; - args.namelen = name->len; - args.filetype = name->type; - args.hashval = dp->i_mount->m_dirnameops->hashname(name); - args.dp = dp; - args.whichfork = XFS_DATA_FORK; - args.trans = tp; - args.op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME | + args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); + if (!args) + return ENOMEM; + + args->name = name->name; + args->namelen = name->len; + args->filetype = name->type; + args->hashval = dp->i_mount->m_dirnameops->hashname(name); + args->dp = dp; + args->whichfork = XFS_DATA_FORK; + args->trans = tp; + args->op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) - rval = xfs_dir2_sf_addname(&args); - else if ((rval = xfs_dir2_isblock(tp, dp, &v))) - return rval; - else if (v) - rval = xfs_dir2_block_addname(&args); - else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) - return rval; - else if (v) - rval = xfs_dir2_leaf_addname(&args); + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + rval = xfs_dir2_sf_addname(args); + goto out_free; + } + + rval = xfs_dir2_isblock(tp, dp, &v); + if (rval) + goto out_free; + if (v) { + rval = xfs_dir2_block_addname(args); + goto out_free; + } + + rval = xfs_dir2_isleaf(tp, dp, &v); + if (rval) + goto out_free; + if (v) + rval = xfs_dir2_leaf_addname(args); else - rval = xfs_dir2_node_addname(&args); + rval = xfs_dir2_node_addname(args); +out_free: + kmem_free(args); return rval; } diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 7aeb4c895b32..868b19f096bf 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -615,7 +615,7 @@ xfs_qm_dqread( if (flags & XFS_QMOPT_DQALLOC) { tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_attrsetm, + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_dqalloc, XFS_QM_DQALLOC_SPACE_RES(mp), 0); if (error) goto error1; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 2e7989e3a2d6..8fb97a65286e 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -823,7 +823,8 @@ xfs_file_fallocate( if (!S_ISREG(inode->i_mode)) return -EINVAL; - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | + FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) return -EOPNOTSUPP; xfs_ilock(ip, XFS_IOLOCK_EXCL); @@ -831,6 +832,20 @@ xfs_file_fallocate( error = xfs_free_file_space(ip, offset, len); if (error) goto out_unlock; + } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { + unsigned blksize_mask = (1 << inode->i_blkbits) - 1; + + if (offset & blksize_mask || len & blksize_mask) { + error = -EINVAL; + goto out_unlock; + } + + ASSERT(offset + len < i_size_read(inode)); + new_size = i_size_read(inode) - len; + + error = xfs_collapse_file_space(ip, offset, len); + if (error) + goto out_unlock; } else { if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > i_size_read(inode)) { @@ -840,8 +855,11 @@ xfs_file_fallocate( goto out_unlock; } - error = xfs_alloc_file_space(ip, offset, len, - XFS_BMAPI_PREALLOC); + if (mode & FALLOC_FL_ZERO_RANGE) + error = xfs_zero_file_space(ip, offset, len); + else + error = xfs_alloc_file_space(ip, offset, len, + XFS_BMAPI_PREALLOC); if (error) goto out_unlock; } @@ -859,7 +877,7 @@ xfs_file_fallocate( if (ip->i_d.di_mode & S_IXGRP) ip->i_d.di_mode &= ~S_ISGID; - if (!(mode & FALLOC_FL_PUNCH_HOLE)) + if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE))) ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 22d1cbea283d..3b80ebae05f5 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -128,7 +128,6 @@ xfs_iomap_write_direct( xfs_fsblock_t firstfsb; xfs_extlen_t extsz, temp; int nimaps; - int bmapi_flag; int quota_flag; int rt; xfs_trans_t *tp; @@ -200,18 +199,15 @@ xfs_iomap_write_direct( xfs_trans_ijoin(tp, ip, 0); - bmapi_flag = 0; - if (offset < XFS_ISIZE(ip) || extsz) - bmapi_flag |= XFS_BMAPI_PREALLOC; - /* * From this point onwards we overwrite the imap pointer that the * caller gave to us. */ xfs_bmap_init(&free_list, &firstfsb); nimaps = 1; - error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flag, - &firstfsb, 0, imap, &nimaps, &free_list); + error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, + XFS_BMAPI_PREALLOC, &firstfsb, 0, + imap, &nimaps, &free_list); if (error) goto out_bmap_cancel; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 9ddfb8190ca1..bb3bb658e39c 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -48,6 +48,18 @@ #include <linux/fiemap.h> #include <linux/slab.h> +/* + * Directories have different lock order w.r.t. mmap_sem compared to regular + * files. This is due to readdir potentially triggering page faults on a user + * buffer inside filldir(), and this happens with the ilock on the directory + * held. For regular files, the lock order is the other way around - the + * mmap_sem is taken during the page fault, and then we lock the ilock to do + * block mapping. Hence we need a different class for the directory ilock so + * that lockdep can tell them apart. + */ +static struct lock_class_key xfs_nondir_ilock_class; +static struct lock_class_key xfs_dir_ilock_class; + static int xfs_initxattrs( struct inode *inode, @@ -1191,6 +1203,7 @@ xfs_setup_inode( xfs_diflags_to_iflags(inode, ip); ip->d_ops = ip->i_mount->m_nondir_inode_ops; + lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class); switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_op = &xfs_inode_operations; @@ -1198,6 +1211,7 @@ xfs_setup_inode( inode->i_mapping->a_ops = &xfs_address_space_operations; break; case S_IFDIR: + lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class); if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) inode->i_op = &xfs_dir_ci_inode_operations; else diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index b0f4ef77fa70..2c4004475e71 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -175,7 +175,7 @@ void xlog_iodone(struct xfs_buf *); struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); void xfs_log_ticket_put(struct xlog_ticket *ticket); -int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, +void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, xfs_lsn_t *commit_lsn, int flags); bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 4ef6fdbced78..7e5455391176 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -499,13 +499,6 @@ xlog_cil_push( cil->xc_ctx = new_ctx; /* - * mirror the new sequence into the cil structure so that we can do - * unlocked checks against the current sequence in log forces without - * risking deferencing a freed context pointer. - */ - cil->xc_current_sequence = new_ctx->sequence; - - /* * The switch is now done, so we can drop the context lock and move out * of a shared context. We can't just go straight to the commit record, * though - we need to synchronise with previous and future commits so @@ -523,8 +516,15 @@ xlog_cil_push( * Hence we need to add this context to the committing context list so * that higher sequences will wait for us to write out a commit record * before they do. + * + * xfs_log_force_lsn requires us to mirror the new sequence into the cil + * structure atomically with the addition of this sequence to the + * committing list. This also ensures that we can do unlocked checks + * against the current sequence in log forces without risking + * deferencing a freed context pointer. */ spin_lock(&cil->xc_push_lock); + cil->xc_current_sequence = new_ctx->sequence; list_add(&ctx->committing, &cil->xc_committing); spin_unlock(&cil->xc_push_lock); up_write(&cil->xc_ctx_lock); @@ -662,8 +662,14 @@ xlog_cil_push_background( } +/* + * xlog_cil_push_now() is used to trigger an immediate CIL push to the sequence + * number that is passed. When it returns, the work will be queued for + * @push_seq, but it won't be completed. The caller is expected to do any + * waiting for push_seq to complete if it is required. + */ static void -xlog_cil_push_foreground( +xlog_cil_push_now( struct xlog *log, xfs_lsn_t push_seq) { @@ -688,10 +694,8 @@ xlog_cil_push_foreground( } cil->xc_push_seq = push_seq; + queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work); spin_unlock(&cil->xc_push_lock); - - /* do the push now */ - xlog_cil_push(log); } bool @@ -721,7 +725,7 @@ xlog_cil_empty( * background commit, returns without it held once background commits are * allowed again. */ -int +void xfs_log_commit_cil( struct xfs_mount *mp, struct xfs_trans *tp, @@ -767,7 +771,6 @@ xfs_log_commit_cil( xlog_cil_push_background(log); up_read(&cil->xc_ctx_lock); - return 0; } /* @@ -796,7 +799,8 @@ xlog_cil_force_lsn( * xlog_cil_push() handles racing pushes for the same sequence, * so no need to deal with it here. */ - xlog_cil_push_foreground(log, sequence); +restart: + xlog_cil_push_now(log, sequence); /* * See if we can find a previous sequence still committing. @@ -804,7 +808,6 @@ xlog_cil_force_lsn( * before allowing the force of push_seq to go ahead. Hence block * on commits for those as well. */ -restart: spin_lock(&cil->xc_push_lock); list_for_each_entry(ctx, &cil->xc_committing, committing) { if (ctx->sequence > sequence) @@ -822,6 +825,28 @@ restart: /* found it! */ commit_lsn = ctx->commit_lsn; } + + /* + * The call to xlog_cil_push_now() executes the push in the background. + * Hence by the time we have got here it our sequence may not have been + * pushed yet. This is true if the current sequence still matches the + * push sequence after the above wait loop and the CIL still contains + * dirty objects. + * + * When the push occurs, it will empty the CIL and + * atomically increment the currect sequence past the push sequence and + * move it into the committing list. Of course, if the CIL is clean at + * the time of the push, it won't have pushed the CIL at all, so in that + * case we should try the push for this sequence again from the start + * just in case. + */ + + if (sequence == cil->xc_current_sequence && + !list_empty(&cil->xc_cil)) { + spin_unlock(&cil->xc_push_lock); + goto restart; + } + spin_unlock(&cil->xc_push_lock); return commit_lsn; } diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index a6a76b2b6a85..ec5ca65c6211 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -842,7 +842,7 @@ xfs_growfs_rt_alloc( /* * Reserve space & log for one extent added to the file. */ - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata, + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtalloc, resblks, 0); if (error) goto error_cancel; diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c index b134aa88297d..0c0e41bbe4e3 100644 --- a/fs/xfs/xfs_sb.c +++ b/fs/xfs/xfs_sb.c @@ -288,6 +288,7 @@ xfs_mount_validate_sb( sbp->sb_inodelog < XFS_DINODE_MIN_LOG || sbp->sb_inodelog > XFS_DINODE_MAX_LOG || sbp->sb_inodesize != (1 << sbp->sb_inodelog) || + sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) || (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 425dfa45b9a0..a4ae41c179a8 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -603,6 +603,7 @@ DEFINE_INODE_EVENT(xfs_readlink); DEFINE_INODE_EVENT(xfs_inactive_symlink); DEFINE_INODE_EVENT(xfs_alloc_file_space); DEFINE_INODE_EVENT(xfs_free_file_space); +DEFINE_INODE_EVENT(xfs_collapse_file_space); DEFINE_INODE_EVENT(xfs_readdir); #ifdef CONFIG_XFS_POSIX_ACL DEFINE_INODE_EVENT(xfs_get_acl); diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index c812c5c060de..54a57326d85b 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -887,12 +887,7 @@ xfs_trans_commit( xfs_trans_apply_sb_deltas(tp); xfs_trans_apply_dquot_deltas(tp); - error = xfs_log_commit_cil(mp, tp, &commit_lsn, flags); - if (error == ENOMEM) { - xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); - error = XFS_ERROR(EIO); - goto out_unreserve; - } + xfs_log_commit_cil(mp, tp, &commit_lsn, flags); current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); xfs_trans_free(tp); @@ -902,10 +897,7 @@ xfs_trans_commit( * log out now and wait for it. */ if (sync) { - if (!error) { - error = _xfs_log_force_lsn(mp, commit_lsn, - XFS_LOG_SYNC, NULL); - } + error = _xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL); XFS_STATS_INC(xs_trans_sync); } else { XFS_STATS_INC(xs_trans_async); diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c index 2ffd3e331b49..8515b0449dc8 100644 --- a/fs/xfs/xfs_trans_resv.c +++ b/fs/xfs/xfs_trans_resv.c @@ -644,15 +644,14 @@ xfs_calc_qm_setqlim_reservation( /* * Allocating quota on disk if needed. - * the write transaction log space: M_RES(mp)->tr_write.tr_logres + * the write transaction log space for quota file extent allocation * the unit of quota allocation: one system block size */ STATIC uint xfs_calc_qm_dqalloc_reservation( struct xfs_mount *mp) { - ASSERT(M_RES(mp)->tr_write.tr_logres); - return M_RES(mp)->tr_write.tr_logres + + return xfs_calc_write_reservation(mp) + xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1); } @@ -784,7 +783,6 @@ xfs_trans_resv_calc( /* The following transaction are logged in logical format */ resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp); resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp); - resp->tr_swrite.tr_logres = xfs_calc_swrite_reservation(mp); resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp); resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp); resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp); diff --git a/fs/xfs/xfs_trans_resv.h b/fs/xfs/xfs_trans_resv.h index de7de9aaad8a..f76c1297b83f 100644 --- a/fs/xfs/xfs_trans_resv.h +++ b/fs/xfs/xfs_trans_resv.h @@ -42,7 +42,6 @@ struct xfs_trans_resv { struct xfs_trans_res tr_ifree; /* inode free trans */ struct xfs_trans_res tr_ichange; /* inode update trans */ struct xfs_trans_res tr_growdata; /* fs data section grow trans */ - struct xfs_trans_res tr_swrite; /* sync write inode trans */ struct xfs_trans_res tr_addafork; /* add inode attr fork trans */ struct xfs_trans_res tr_writeid; /* write setuid/setgid file */ struct xfs_trans_res tr_attrinval; /* attr fork buffer |